From a8fefddcae86f8c9691db54fe50938219f1eb930 Mon Sep 17 00:00:00 2001 From: Vincent Date: Tue, 24 Feb 2026 19:40:31 +0000 Subject: [PATCH 1/8] [AIROCMLIR-445] Lower 'linalg.generic' convolution into rock --- .../Conversion/LinalgToRock/LinalgToRock.cpp | 331 +++++++++++++++++- .../LinalgToRock/LinalgToRockPass.cpp | 13 +- .../mixr-to-linalg-conv.cpu.mlir | 7 +- .../mixr-to-linalg-conv1d-group.cpu.mlir | 2 + .../mixr-to-linalg-conv3d-group.cpu.mlir | 2 + .../linalg-to-rock-conv3d-no-pad.e2d.mlir | 7 + .../mixr-to-linalg-conv2d-group.cpu.mlir | 2 + 7 files changed, 359 insertions(+), 5 deletions(-) create mode 100644 mlir/test/fusion/pr-e2e/linalg-to-rock-conv3d-no-pad.e2d.mlir diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp index d0fb43bd5cf1..8f5d7ac5f3ae 100644 --- a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp +++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp @@ -13,10 +13,13 @@ #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/Rock/IR/Rock.h" +#include "mlir/Dialect/Rock/IR/TransformMapBuilder.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/PatternMatch.h" +#include "llvm/ADT/StringMap.h" using namespace mlir; @@ -139,8 +142,334 @@ LogicalResult MatmulConverter::matchAndRewrite( return success(); } +namespace { +enum class ConvType { + Conv1D_NGCH_FGCH, + Conv2D_NGCHW_GFCHW, + Conv3D_NGCHWD_GFCHWD +}; + +struct ConvFields { + ConvType type; + ArrayAttr padding, stride, dilation; + StringAttr perfConfig; +}; + +struct ConvLinalgConverter final + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + using OpConversionPattern::getTypeConverter; + using OpAdaptor = typename OpConversionPattern::OpAdaptor; + + LogicalResult + matchAndRewrite(linalg::GenericOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override; + +private: + /// Returns strides, dilation, and padding if any + FailureOr isConv(ConversionPatternRewriter &rewriter, + linalg::GenericOp op) const; +}; +} // namespace + +FailureOr +ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter, + linalg::GenericOp op) const { + // FIXME: In the future, it is possible to extract strides, dilation, and + // padding by matching the AffineExpr syntax tree. We can also infer the + // dimension and layout of the convolution from the affine_map. + llvm::StringMap opNameMapper{ + {"conv3d_ngchwd_gfchwd", ConvType::Conv3D_NGCHWD_GFCHWD}, + {"conv2d_ngchw_gfchw", ConvType::Conv2D_NGCHW_GFCHW}, + {"conv1d_ngch_gfch", ConvType::Conv1D_NGCH_FGCH}}; + + StringAttr name = op->getAttrOfType("conv_op"); + if (!opNameMapper.contains(name.getValue())) { + return failure(); + } + ConvType convType = opNameMapper[name.getValue()]; + + auto convertToArrayAttr = + [&](Attribute arr, ArrayRef dimOneDefaults = {}) -> ArrayAttr { + DenseIntElementsAttr casted = dyn_cast(arr); + if (!casted) { + return nullptr; + } + + SmallVector values; + llvm::transform(casted.getValues(), std::back_inserter(values), + [&](int64_t val) { return val; }); + if (convType == ConvType::Conv1D_NGCH_FGCH) { + values.insert(values.end(), dimOneDefaults.begin(), dimOneDefaults.end()); + } + return rewriter.getIndexArrayAttr(values); + }; + + auto dilation = + convertToArrayAttr(op->getAttr("dilation"), /*dimOneDefaults=*/1); + auto stride = convertToArrayAttr(op->getAttr("stride"), /*dimOneDefaults=*/1); + + // We are given padding in format [dim0low, dim1low, ..., dim1high, + // dim2high,...] but rock expects [dim0low, dim1low, dim2low, ...] + SmallVector newPaddingOrder; + auto originalPaddingOrder = convertToArrayAttr(op->getAttr("pad")).getValue(); + int64_t dim = originalPaddingOrder.size() / 2; + for (int64_t i = 0; i < dim; ++i) { + newPaddingOrder.push_back(originalPaddingOrder[i]); + newPaddingOrder.push_back(originalPaddingOrder[i]); + } + if (convType == ConvType::Conv1D_NGCH_FGCH) { + newPaddingOrder.push_back(rewriter.getIndexAttr(0)); + newPaddingOrder.push_back(rewriter.getIndexAttr(0)); + } + auto padding = rewriter.getArrayAttr(newPaddingOrder); + if (!padding || !dilation || !stride) { + return failure(); + } + + StringAttr perfConfig = op->getAttrOfType("perf_config"); + return ConvFields{convType, padding, stride, dilation, perfConfig}; +} + +LogicalResult ConvLinalgConverter::matchAndRewrite( + linalg::GenericOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const { + FailureOr maybeConvParams = isConv(rewriter, op); + if (failed(maybeConvParams)) + return failure(); + + ConvFields convParams = maybeConvParams.value(); + Location loc = op.getLoc(); + + // We have layout filter = GFC* but we need GF*C + auto getFilter = [&](Value startFilter) -> Value { + ArrayRef startFilterShape = + cast(startFilter.getType()).getShape(); + int64_t dim = startFilterShape.size() - 3; + switch (dim) { + case 3: { + rock::BottomUpTMBuilder filterBuilder( + rewriter, {"g", "f", "c", "h", "w", "d"}, startFilterShape, loc); + filterBuilder.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"}); + filterBuilder.passThrough({"hk", "wk", "dk"}, {2, 3, 4}, {"h", "w", "d"}); + filterBuilder.passThrough({"ck"}, {5}, {"c"}); + auto attr = filterBuilder.get(); + auto filter = rock::TransformOp::create(rewriter, loc, startFilter, attr); + return filter; + } + case 2: { + return startFilter; + } + case 1: { + rock::BottomUpTMBuilder filterBuilder(rewriter, {"g", "f", "c", "h"}, + startFilterShape, loc); + filterBuilder.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"}); + filterBuilder.unmerge({"hk", "wk"}, {2, 3}, {"h"}, + {startFilterShape[3], 1}); + filterBuilder.passThrough({"ck"}, {4}, {"c"}); + auto attr = filterBuilder.get(); + auto filter = rock::TransformOp::create(rewriter, loc, startFilter, attr); + return filter; + } + default: + llvm_unreachable("seen unsupported cases"); + } + }; + + // We have input filter = NGC* but we need N*GC + auto getInput = [&](Value in) -> FailureOr { + // dealing with padding + if (llvm::any_of(convParams.padding.getValue(), [](Attribute attr) { + return cast(attr).getInt() != 0; + })) { + // clang-format off + // Here we are essentially removing the padding while keeping the group + // dimension expansion. We remove the padding because the rock.conv handles + // padding for us This code structure comes from what migraphx-to-linalg + // emits. In theory, there can be other code structure that are emitted in + // linalg pipeline to handle padding. + // Original: + // %padded = tensor.pad %original ... + // %group_expansion = tensor.expand_shape %padded ... + // New: + // %group_expansion = tensor.expand_shape %original + // clang-format on + if (auto expanded = in.getDefiningOp(); + auto padded = + expanded->getOperand(0).getDefiningOp()) { + SmallVector resultShape( + expanded.getResultType().getShape()); + auto lowPad = padded.getStaticLow(); + auto highPad = padded.getStaticHigh(); + int64_t numPadDims = lowPad.size(); + int64_t numExpandedDims = resultShape.size(); + + // Padding is defined in pre-expand space. The spatial dims are at the + // tail of both the pre-expand and post-expand tensors (expand_shape + // only splits an earlier dim), so align from the end. + for (int64_t i = numPadDims - 1, j = numExpandedDims - 1; + i >= 0 && j >= 0; --i, --j) { + resultShape[j] -= (lowPad[i] + highPad[i]); + } + + RankedTensorType newResultType = RankedTensorType::get( + resultShape, padded.getResultType().getElementType()); + auto temp = padded.getOperand(0); + in = tensor::ExpandShapeOp::create(rewriter, expanded.getLoc(), + newResultType, temp, + expanded.getReassociationIndices()); + rewriter.replaceOp(expanded, in); + rewriter.eraseOp(padded); + } else { + op.emitError("unexpected padding code structure"); + return failure(); + } + } + + ArrayRef startInputShape = + cast(in.getType()).getShape(); + int64_t dim = startInputShape.size() - 3; + switch (dim) { + case 3: { + rock::BottomUpTMBuilder inputBuilder( + rewriter, {"n", "g", "c", "h", "w", "d"}, startInputShape, loc); + inputBuilder.passThrough({"ni"}, {0}, {"n"}); + inputBuilder.passThrough({"hi", "wi", "di"}, {1, 2, 3}, {"h", "w", "d"}); + inputBuilder.passThrough({"gi", "ci"}, {4, 5}, {"g", "c"}); + auto inputAttr = inputBuilder.get(); + auto input = rock::TransformOp::create(rewriter, loc, in, inputAttr); + return input.getResult(); + } + case 2: { + return in; + } + case 1: { + // migraphx-to-tosa pipeline handles 1d convolution by converting + // 1 dimensional input into 2 dimensional. 1x1x3x10 (NGCH) becomes + // 1x1x3x1x10 (NHWGC). We are reproducing that here + int64_t h = startInputShape[3]; + rock::BottomUpTMBuilder filterBuilder(rewriter, {"n", "g", "c", "h"}, + startInputShape, loc); + filterBuilder.passThrough({"ni"}, {0}, {"n"}); + filterBuilder.unmerge({"hi", "wi"}, {1, 2}, {"h"}, {h, 1}); + filterBuilder.passThrough({"gi", "ci"}, {3, 4}, {"g", "c"}); + auto attr = filterBuilder.get(); + return rock::TransformOp::create(rewriter, loc, in, attr).getResult(); + } + default: + llvm_unreachable("unsupported cases"); + } + }; + + // Creating the final result shape + RankedTensorType linalgResultType = + cast(op.getResult(0).getType()); + ArrayRef linalgOutputShape = linalgResultType.getShape(); + SmallVector rockOutputShape(linalgOutputShape); + if (linalgOutputShape.size() - 3 == 3 || linalgOutputShape.size() - 3 == 1) { + rockOutputShape.clear(); + rockOutputShape.push_back(linalgOutputShape[0]); + rockOutputShape.insert(rockOutputShape.end(), + std::next(linalgOutputShape.begin(), 3), + linalgOutputShape.end()); + if (linalgOutputShape.size() - 3 == 1) + rockOutputShape.push_back(1); + rockOutputShape.push_back(linalgOutputShape[1]); + rockOutputShape.push_back(linalgOutputShape[2]); + } + RankedTensorType rockResultType = + RankedTensorType::get(rockOutputShape, linalgResultType.getElementType()); + Value output = bufferization::AllocTensorOp::create(rewriter, op.getLoc(), + rockResultType, {}); + + auto maybeInput = getInput(op.getOperand(0)); + if (failed(maybeInput)) { + return failure(); + } + auto input = *maybeInput; + auto filter = getFilter(op.getOperand(1)); + auto cop = rock::ConvOp::create(rewriter, loc, rockResultType, filter, input, + output, /*features=*/nullptr, + /*blockSize=*/nullptr, /*gridSize=*/nullptr, + convParams.padding, convParams.stride, + convParams.dilation, /*params=*/nullptr); + // TODO: add splitk + if (convParams.perfConfig) { + cop->setAttr("perf_config", convParams.perfConfig); + } + + // Here we are going to emit layouts + switch (convParams.type) { + case ConvType::Conv3D_NGCHWD_GFCHWD: + cop->setAttr("filter_layout", + rewriter.getStrArrayAttr({"g", "k", "0", "1", "2", "c"})); + cop->setAttr("input_layout", rewriter.getStrArrayAttr( + {"ni", "0i", "1i", "2i", "gi", "ci"})); + cop->setAttr("output_layout", rewriter.getStrArrayAttr( + {"no", "0o", "1o", "2o", "go", "ko"})); + break; + case ConvType::Conv2D_NGCHW_GFCHW: + cop->setAttr("filter_layout", + rewriter.getStrArrayAttr({"g", "k", "c", "y", "x"})); + cop->setAttr("input_layout", + rewriter.getStrArrayAttr({"ni", "gi", "ci", "hi", "wi"})); + cop->setAttr("output_layout", + rewriter.getStrArrayAttr({"no", "go", "ko", "ho", "wo"})); + break; + case ConvType::Conv1D_NGCH_FGCH: + cop->setAttr("filter_layout", + rewriter.getStrArrayAttr({"g", "k", "y", "x", "c"})); + cop->setAttr("input_layout", + rewriter.getStrArrayAttr({"ni", "hi", "wi", "gi", "ci"})); + cop->setAttr("output_layout", + rewriter.getStrArrayAttr({"no", "ho", "wo", "go", "ko"})); + break; + default: + llvm_unreachable("edge case one"); + } + + // output has type ["no", "0o", "1o", "2o", "go", "ko"] + // We need to reshape to ngfhwd + ArrayRef startResultShape = rockResultType.getShape(); + Value finalReshaped; + switch (convParams.type) { + case ConvType::Conv3D_NGCHWD_GFCHWD: { + rock::BottomUpTMBuilder resultBuilder( + rewriter, {"n", "h", "w", "d", "g", "f"}, startResultShape, loc); + resultBuilder.passThrough({"go", "fo"}, {1, 2}, {"g", "f"}); + resultBuilder.passThrough({"no"}, {0}, {"n"}); + resultBuilder.passThrough({"ho", "wo", "do"}, {3, 4, 5}, {"h", "w", "d"}); + auto resultAttr = resultBuilder.get(); + finalReshaped = + rock::TransformOp::create(rewriter, loc, cop.getResult(), resultAttr); + break; + } + case ConvType::Conv2D_NGCHW_GFCHW: { + finalReshaped = cop.getResult(); + break; + } + case ConvType::Conv1D_NGCH_FGCH: { + rock::BottomUpTMBuilder resultBuilder(rewriter, {"n", "h", "w", "g", "f"}, + startResultShape, loc); + resultBuilder.passThrough({"no"}, {0}, {"n"}); + resultBuilder.passThrough({"go", "fo"}, {1, 2}, {"g", "f"}); + resultBuilder.merge("ho", 3, {"h", "w"}); + auto resultAttr = resultBuilder.get(); + finalReshaped = + rock::TransformOp::create(rewriter, loc, cop.getResult(), resultAttr); + break; + } + default: { + return op.emitError("unimplemented final reshape"); + } + } + + rewriter.replaceOp(op, finalReshaped); + return success(); +} + void mlir::rock::populateLinalgToRockConversionPattern( RewritePatternSet &pattern, MLIRContext *context) { pattern.add, - MatmulConverter>(context); + MatmulConverter, ConvLinalgConverter>(context); } diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp index f7148fe81d01..11ed67331c0b 100644 --- a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp +++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp @@ -47,8 +47,17 @@ static void populateLinalgToRockDialectConversion(ConversionTarget &target) { if (!linalgOp) { return std::nullopt; } - return linalg::isElementwise(linalgOp) || isa(op) || - isa(op); + + linalg::GenericOp castedOp = dyn_cast(op); + if (castedOp && + llvm::any_of(castedOp.getIteratorTypesArray(), [](auto type) { + return linalg::isReductionIterator(type); + })) { + return false; + } + + return linalg::isElementwise(linalgOp) || isa(op) || + castedOp; }); } diff --git a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir index 849a2aa7bee4..5ba4fecae0fa 100644 --- a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir +++ b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir @@ -1,12 +1,14 @@ // RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand_type float -rand_min 0 -rand_max 0 -fut conv_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=GOLD // RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH // RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH +// RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL -/// README - There are essentially two tests (BOTH, and GOLD). +/// README - There are essentially three tests (BOTH, GOLD, and FINAL). /// BOTH checks if the tosa pipeline gives the same value (given the /// same seed) as the linalg pipeline. They will pass if both of them /// returns the same value. GOLD checks if the output for the linalg pipeline -/// matches an equivalent pytorch implementation. +/// matches an equivalent pytorch implementation. FINAL verifies if the linalg +/// pipeline can be converted to rock /// Gold value computed as the following: /// @@ -35,6 +37,7 @@ module{ + // FINAL: [1 1 1] // BOTH: [6.09101, 7.06269, 5.96599, 7.63177, 5.83172, 5.96893, 5.16868, 6.0204, 6.80761, 6.78844, 5.75672, 7.33505, 5.417{{.*}}, 6.04153, 5.14715, 6.728{{.*}}, 7.30343, 7.90745, 6.73162, 8.21738, 5.65554, 7.37453, 6.6329, 6.6093, 5.2816, 6.17693, 5.19904, 6.38292, 4.55713, 4.62921, 4.72307, 5.47466, 4.551, 6.15787, 4.97358, 5.89798, 5.10684, 6.01542, 5.18933, 5.58596, 5.22862, 7.13881, 4.88134, 5.56315, 5.52007, 6.27824, 4.93779, 5.71044, 6.27934, 7.51976, 5.23159, 7.17014, 6.74235, 5.59631, 5.33666, 6.20902, 4.95302, 5.26817, 4.50571, 5.17464, 4.49137, 4.80133, 3.39298, 4.92709] // GOLD: [1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625] func.func @conv(%arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<4x3x2x2x2xf32, 24x8x4x2x1>) -> !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> { diff --git a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir index a9658468d92f..1e5cdab8f085 100644 --- a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir +++ b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir @@ -1,8 +1,10 @@ // RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_1d_group_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH // RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_1d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH +// RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_1d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL // Only a small subset of the array is checked because it is quite huge +// FINAL: [1 1 1] // BOTH: [2.94651, 3.09122, 3.86495, 4.54138, 3.18018, 4.06578, 2.97265, 4.05155, 2.35716, 4.26762, 3.49153, 4.14329, 3.82529, 5.43395, 4.66598, 4.98826, 4.41554, 5.15631, 3.91766, 4.79236, 4.52993, 4.25152, 4.87812, 5.10546, 4.19679, 5.1306, 4.2836, 3.7857, 5.21429, 4.6504, 4.83997, 3.91648, 5.86651, 4.76546, 5.00734, 5.18668, 5.38386, 4.1707, 5.43972, 5.57541, 5.33734, 5.14293, 4.10719, 5.32505, 4.39825 func.func @conv_1d_group(%in: !migraphx.shaped<10x8x123xf32, 984x123x1>, %fil: !migraphx.shaped<12x2x7xf32, 14x7x1>) -> !migraphx.shaped<10x12x53xf32, 636x53x1> { %out = migraphx.convolution %in, %fil {dilation = [4], group = 4 : i64, padding = [3,3], padding_mode = 0 : i64, stride = [2]} : diff --git a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir index 83630aa811d4..84e14f7ef9d0 100644 --- a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir +++ b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir @@ -1,7 +1,9 @@ // RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_3d_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH // RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_3d_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH +// RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_3d_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL // BOTH: [9.78569, 8.8887, 12.9401, 10.9686, 8.19386, 11.9315, 17.5043, 11.5946, 18.9063 +// FINAL: [1 1 1] func.func @conv_3d(%in: !migraphx.shaped<10x8x12x13x14xf32, 17472x2184x182x14x1>, %fil: !migraphx.shaped<12x8x2x3x4xf32, 192x24x12x4x1>) -> !migraphx.shaped<10x12x13x6x3xf32, 2808x234x18x3x1> { %out = migraphx.convolution %in, %fil {dilation = [3, 4, 5], group = 1 : i64, padding = [2, 3, 4, 2, 3, 4], padding_mode = 0 : i64, stride = [1, 2, 3]} : <10x8x12x13x14xf32, 17472x2184x182x14x1>, <12x8x2x3x4xf32, 192x24x12x4x1> -> <10x12x13x6x3xf32, 2808x234x18x3x1> diff --git a/mlir/test/fusion/pr-e2e/linalg-to-rock-conv3d-no-pad.e2d.mlir b/mlir/test/fusion/pr-e2e/linalg-to-rock-conv3d-no-pad.e2d.mlir new file mode 100644 index 000000000000..a70f9d19a8bb --- /dev/null +++ b/mlir/test/fusion/pr-e2e/linalg-to-rock-conv3d-no-pad.e2d.mlir @@ -0,0 +1,7 @@ +// RUN: rocmlir-gen -fut conv3d_add -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx-linalg,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv3d_add_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s + +// CHECK: [1 1 1] +func.func @conv3d_add(%arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<9x1x2x2x2xf32, 8x8x4x2x1>) -> !migraphx.shaped<2x9x2x2x2xf32, 72x8x4x2x1> { + %0 = migraphx.convolution %arg1, %arg2 {dilation = [2, 2, 2], group = 3 : i64, padding = [0, 0, 0, 0, 0, 0], padding_mode = 0 : i64, stride = [2, 2, 2]} : <2x3x5x5x5xf32, 375x125x25x5x1>, <9x1x2x2x2xf32, 8x8x4x2x1> -> <2x9x2x2x2xf32, 72x8x4x2x1> + return %0: !migraphx.shaped<2x9x2x2x2xf32, 72x8x4x2x1> +} diff --git a/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir b/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir index 5f019521aeac..64f838c35cdf 100644 --- a/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir +++ b/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir @@ -1,10 +1,12 @@ // RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH // RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH +// RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_2d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL // Here we are checking to see if conv_2d with non standard stride, dilation, and a group parameter matches the existing tosa pipeline // Note - this array is quite large, so we are only checking a small subset // BOTH: [5.83007, 7.83374, 8.46274, 9.03237, 6.51391, 7.75809, 9.73003, 8.48013, 8.15419, 9.9975, 7.50244, 7.11982, 6.58057 +// FINAL: [1 1 1] func.func @conv_2d_group(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %fil: !migraphx.shaped<8x2x4x5xf32, 40x20x5x1>) -> !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1> { %out = migraphx.convolution %in, %fil {dilation = [2, 3], group = 2 : i64, padding = [2, 2, 2, 2], padding_mode = 0 : i64, stride = [4, 5]} : <2x4x123x124xf32, 61008x15252x124x1>, <8x2x4x5xf32, 40x20x5x1> -> <2x8x27x19xf32, 4104x513x19x1> From a6c02224245b808765242150d6caf81f4ec04a65 Mon Sep 17 00:00:00 2001 From: Vincent Date: Wed, 25 Feb 2026 14:25:48 +0000 Subject: [PATCH 2/8] Rebase from preivous branch to use enum instead of string --- .../Conversion/LinalgToRock/LinalgToRock.cpp | 45 +++++++------------ 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp index 8f5d7ac5f3ae..3276256ee82d 100644 --- a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp +++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp @@ -143,14 +143,8 @@ LogicalResult MatmulConverter::matchAndRewrite( } namespace { -enum class ConvType { - Conv1D_NGCH_FGCH, - Conv2D_NGCHW_GFCHW, - Conv3D_NGCHWD_GFCHWD -}; - struct ConvFields { - ConvType type; + rock::LinalgConvType type; ArrayAttr padding, stride, dilation; StringAttr perfConfig; }; @@ -178,28 +172,19 @@ ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter, // FIXME: In the future, it is possible to extract strides, dilation, and // padding by matching the AffineExpr syntax tree. We can also infer the // dimension and layout of the convolution from the affine_map. - llvm::StringMap opNameMapper{ - {"conv3d_ngchwd_gfchwd", ConvType::Conv3D_NGCHWD_GFCHWD}, - {"conv2d_ngchw_gfchw", ConvType::Conv2D_NGCHW_GFCHW}, - {"conv1d_ngch_gfch", ConvType::Conv1D_NGCH_FGCH}}; - - StringAttr name = op->getAttrOfType("conv_op"); - if (!opNameMapper.contains(name.getValue())) { + rock::LinalgConvTypeAttr name = op->getAttrOfType("conv_op"); + if (!name) { return failure(); } - ConvType convType = opNameMapper[name.getValue()]; + rock::LinalgConvType convType = name.getValue(); auto convertToArrayAttr = [&](Attribute arr, ArrayRef dimOneDefaults = {}) -> ArrayAttr { - DenseIntElementsAttr casted = dyn_cast(arr); - if (!casted) { - return nullptr; - } - + ArrayAttr casted = dyn_cast(arr); SmallVector values; - llvm::transform(casted.getValues(), std::back_inserter(values), - [&](int64_t val) { return val; }); - if (convType == ConvType::Conv1D_NGCH_FGCH) { + llvm::transform(casted.getValue(), std::back_inserter(values), + [&](Attribute val) { return cast(val).getInt(); }); + if (convType == rock::LinalgConvType::Conv1dNgchGfch) { values.insert(values.end(), dimOneDefaults.begin(), dimOneDefaults.end()); } return rewriter.getIndexArrayAttr(values); @@ -218,7 +203,7 @@ ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter, newPaddingOrder.push_back(originalPaddingOrder[i]); newPaddingOrder.push_back(originalPaddingOrder[i]); } - if (convType == ConvType::Conv1D_NGCH_FGCH) { + if (convType == rock::LinalgConvType::Conv1dNgchGfch) { newPaddingOrder.push_back(rewriter.getIndexAttr(0)); newPaddingOrder.push_back(rewriter.getIndexAttr(0)); } @@ -400,7 +385,7 @@ LogicalResult ConvLinalgConverter::matchAndRewrite( // Here we are going to emit layouts switch (convParams.type) { - case ConvType::Conv3D_NGCHWD_GFCHWD: + case rock::LinalgConvType::Conv3dNgchwdGfchwd: cop->setAttr("filter_layout", rewriter.getStrArrayAttr({"g", "k", "0", "1", "2", "c"})); cop->setAttr("input_layout", rewriter.getStrArrayAttr( @@ -408,7 +393,7 @@ LogicalResult ConvLinalgConverter::matchAndRewrite( cop->setAttr("output_layout", rewriter.getStrArrayAttr( {"no", "0o", "1o", "2o", "go", "ko"})); break; - case ConvType::Conv2D_NGCHW_GFCHW: + case rock::LinalgConvType::Conv2dNgchwGfchw: cop->setAttr("filter_layout", rewriter.getStrArrayAttr({"g", "k", "c", "y", "x"})); cop->setAttr("input_layout", @@ -416,7 +401,7 @@ LogicalResult ConvLinalgConverter::matchAndRewrite( cop->setAttr("output_layout", rewriter.getStrArrayAttr({"no", "go", "ko", "ho", "wo"})); break; - case ConvType::Conv1D_NGCH_FGCH: + case rock::LinalgConvType::Conv1dNgchGfch: cop->setAttr("filter_layout", rewriter.getStrArrayAttr({"g", "k", "y", "x", "c"})); cop->setAttr("input_layout", @@ -433,7 +418,7 @@ LogicalResult ConvLinalgConverter::matchAndRewrite( ArrayRef startResultShape = rockResultType.getShape(); Value finalReshaped; switch (convParams.type) { - case ConvType::Conv3D_NGCHWD_GFCHWD: { + case rock::LinalgConvType::Conv3dNgchwdGfchwd: { rock::BottomUpTMBuilder resultBuilder( rewriter, {"n", "h", "w", "d", "g", "f"}, startResultShape, loc); resultBuilder.passThrough({"go", "fo"}, {1, 2}, {"g", "f"}); @@ -444,11 +429,11 @@ LogicalResult ConvLinalgConverter::matchAndRewrite( rock::TransformOp::create(rewriter, loc, cop.getResult(), resultAttr); break; } - case ConvType::Conv2D_NGCHW_GFCHW: { + case rock::LinalgConvType::Conv2dNgchwGfchw: { finalReshaped = cop.getResult(); break; } - case ConvType::Conv1D_NGCH_FGCH: { + case rock::LinalgConvType::Conv1dNgchGfch: { rock::BottomUpTMBuilder resultBuilder(rewriter, {"n", "h", "w", "g", "f"}, startResultShape, loc); resultBuilder.passThrough({"no"}, {0}, {"n"}); From 6c7dabc725501bbb40c75c00bc8dc8919d7ee1ac Mon Sep 17 00:00:00 2001 From: Vincent Date: Wed, 25 Feb 2026 15:53:53 +0000 Subject: [PATCH 3/8] Added lit test --- .../Conversion/LinalgToRock/LinalgToRock.cpp | 512 +++++++++--------- .../LinalgToRock/linalg-to-rock-conv.mlir | 102 ++++ 2 files changed, 367 insertions(+), 247 deletions(-) create mode 100644 mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp index 3276256ee82d..51a1a220d2e0 100644 --- a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp +++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp @@ -13,13 +13,11 @@ #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" -#include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/Rock/IR/Rock.h" #include "mlir/Dialect/Rock/IR/TransformMapBuilder.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/PatternMatch.h" -#include "llvm/ADT/StringMap.h" using namespace mlir; @@ -142,13 +140,220 @@ LogicalResult MatmulConverter::matchAndRewrite( return success(); } +//===----------------------------------------------------------------------===// +// ConvLinalgConverter: linalg.generic (conv) -> rock.conv +//===----------------------------------------------------------------------===// + namespace { struct ConvFields { rock::LinalgConvType type; + int64_t spatialDim; ArrayAttr padding, stride, dilation; StringAttr perfConfig; }; +} // namespace + +static int64_t getSpatialDim(rock::LinalgConvType type) { + switch (type) { + case rock::LinalgConvType::Conv1dNgchGfch: + return 1; + case rock::LinalgConvType::Conv2dNgchwGfchw: + return 2; + case rock::LinalgConvType::Conv3dNgchwdGfchwd: + return 3; + } + llvm_unreachable("unknown LinalgConvType"); +} + +/// Set filter_layout, input_layout, and output_layout on a rock.conv op. +static void setConvLayoutAttrs(OpBuilder &builder, rock::ConvOp cop, + rock::LinalgConvType type) { + auto set = [&](StringRef name, ArrayRef layout) { + cop->setAttr(name, builder.getStrArrayAttr(layout)); + }; + switch (type) { + case rock::LinalgConvType::Conv3dNgchwdGfchwd: + set("filter_layout", {"g", "k", "0", "1", "2", "c"}); + set("input_layout", {"ni", "0i", "1i", "2i", "gi", "ci"}); + set("output_layout", {"no", "0o", "1o", "2o", "go", "ko"}); + break; + case rock::LinalgConvType::Conv2dNgchwGfchw: + set("filter_layout", {"g", "k", "c", "y", "x"}); + set("input_layout", {"ni", "gi", "ci", "hi", "wi"}); + set("output_layout", {"no", "go", "ko", "ho", "wo"}); + break; + case rock::LinalgConvType::Conv1dNgchGfch: + set("filter_layout", {"g", "k", "y", "x", "c"}); + set("input_layout", {"ni", "hi", "wi", "gi", "ci"}); + set("output_layout", {"no", "ho", "wo", "go", "ko"}); + break; + } +} + +/// Transform filter from GFC* layout to GF*C layout for rock.conv. +/// 2D is already in the correct layout. +static Value transformFilter(OpBuilder &builder, Location loc, Value filter, + int64_t spatialDim) { + ArrayRef shape = + cast(filter.getType()).getShape(); + switch (spatialDim) { + case 3: { + rock::BottomUpTMBuilder b(builder, {"g", "f", "c", "h", "w", "d"}, shape, + loc); + b.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"}); + b.passThrough({"hk", "wk", "dk"}, {2, 3, 4}, {"h", "w", "d"}); + b.passThrough({"ck"}, {5}, {"c"}); + return rock::TransformOp::create(builder, loc, filter, b.get()); + } + case 2: + return filter; + case 1: { + // Conv1D is expanded into Conv2D (matching migraphx-to-tosa): unmerge + // H into (H, W=1). + rock::BottomUpTMBuilder b(builder, {"g", "f", "c", "h"}, shape, loc); + b.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"}); + b.unmerge({"hk", "wk"}, {2, 3}, {"h"}, {shape[3], 1}); + b.passThrough({"ck"}, {4}, {"c"}); + return rock::TransformOp::create(builder, loc, filter, b.get()); + } + default: + llvm_unreachable("unsupported spatial dim for filter transform"); + } +} + +/// Remove the tensor.pad + tensor.expand_shape pattern emitted by +/// migraphx-to-linalg, replacing it with just tensor.expand_shape on the +/// unpadded source. rock.conv handles padding internally. +/// +/// Expected IR structure: +/// %padded = tensor.pad %original ... +/// %expanded = tensor.expand_shape %padded ... +/// Replaced with: +/// %expanded = tensor.expand_shape %original ... +static FailureOr +removePaddingFromInput(ConversionPatternRewriter &rewriter, + linalg::GenericOp op, Value in, ArrayAttr padding) { + bool hasPadding = llvm::any_of(padding.getValue(), [](Attribute attr) { + return cast(attr).getInt() != 0; + }); + if (!hasPadding) + return in; + + auto expanded = in.getDefiningOp(); + if (!expanded) { + op.emitError("unexpected padding code structure"); + return failure(); + } + auto padded = expanded->getOperand(0).getDefiningOp(); + if (!padded) { + op.emitError("unexpected padding code structure"); + return failure(); + } + + SmallVector resultShape(expanded.getResultType().getShape()); + auto lowPad = padded.getStaticLow(); + auto highPad = padded.getStaticHigh(); + int64_t numPadDims = lowPad.size(); + int64_t numExpandedDims = resultShape.size(); + + // Padding is defined in pre-expand space. The spatial dims are at the + // tail of both tensors (expand_shape only splits an earlier dim), so + // align from the end. + for (int64_t i = numPadDims - 1, j = numExpandedDims - 1; + i >= 0 && j >= 0; --i, --j) { + resultShape[j] -= (lowPad[i] + highPad[i]); + } + + RankedTensorType newResultType = RankedTensorType::get( + resultShape, padded.getResultType().getElementType()); + Value result = tensor::ExpandShapeOp::create( + rewriter, expanded.getLoc(), newResultType, padded.getOperand(0), + expanded.getReassociationIndices()); + rewriter.replaceOp(expanded, result); + rewriter.eraseOp(padded); + return result; +} + +/// Transform input from NGC* layout to N*GC layout for rock.conv. +/// 2D is already in the correct layout. +static Value transformInput(OpBuilder &builder, Location loc, Value input, + int64_t spatialDim) { + ArrayRef shape = + cast(input.getType()).getShape(); + switch (spatialDim) { + case 3: { + rock::BottomUpTMBuilder b(builder, {"n", "g", "c", "h", "w", "d"}, shape, + loc); + b.passThrough({"ni"}, {0}, {"n"}); + b.passThrough({"hi", "wi", "di"}, {1, 2, 3}, {"h", "w", "d"}); + b.passThrough({"gi", "ci"}, {4, 5}, {"g", "c"}); + return rock::TransformOp::create(builder, loc, input, b.get()); + } + case 2: + return input; + case 1: { + // Conv1D is expanded into Conv2D (matching migraphx-to-tosa): unmerge + // H into (H, W=1). + int64_t h = shape[3]; + rock::BottomUpTMBuilder b(builder, {"n", "g", "c", "h"}, shape, loc); + b.passThrough({"ni"}, {0}, {"n"}); + b.unmerge({"hi", "wi"}, {1, 2}, {"h"}, {h, 1}); + b.passThrough({"gi", "ci"}, {3, 4}, {"g", "c"}); + return rock::TransformOp::create(builder, loc, input, b.get()); + } + default: + llvm_unreachable("unsupported spatial dim for input transform"); + } +} + +/// Compute the rock output shape from the linalg output shape. +/// Linalg layout is NGF* while rock needs N*GF (with extra W=1 for 1D). +static SmallVector +computeRockOutputShape(ArrayRef linalgShape, int64_t spatialDim) { + if (spatialDim == 2) + return SmallVector(linalgShape); + SmallVector shape; + shape.push_back(linalgShape[0]); + shape.insert(shape.end(), std::next(linalgShape.begin(), 3), + linalgShape.end()); + if (spatialDim == 1) + shape.push_back(1); // Conv1D expanded to Conv2D: extra W=1 + shape.push_back(linalgShape[1]); + shape.push_back(linalgShape[2]); + return shape; +} + +/// Transform rock.conv output back to the linalg output layout. +/// 2D needs no transform. +static Value transformOutput(OpBuilder &builder, Location loc, Value convResult, + int64_t spatialDim) { + if (spatialDim == 2) + return convResult; + ArrayRef shape = + cast(convResult.getType()).getShape(); + switch (spatialDim) { + case 3: { + rock::BottomUpTMBuilder b(builder, {"n", "h", "w", "d", "g", "f"}, shape, + loc); + b.passThrough({"go", "fo"}, {1, 2}, {"g", "f"}); + b.passThrough({"no"}, {0}, {"n"}); + b.passThrough({"ho", "wo", "do"}, {3, 4, 5}, {"h", "w", "d"}); + return rock::TransformOp::create(builder, loc, convResult, b.get()); + } + case 1: { + // Conv1D was expanded into Conv2D: merge (H, W=1) back into H. + rock::BottomUpTMBuilder b(builder, {"n", "h", "w", "g", "f"}, shape, loc); + b.passThrough({"no"}, {0}, {"n"}); + b.passThrough({"go", "fo"}, {1, 2}, {"g", "f"}); + b.merge("ho", 3, {"h", "w"}); + return rock::TransformOp::create(builder, loc, convResult, b.get()); + } + default: + llvm_unreachable("unsupported spatial dim for output transform"); + } +} +namespace { struct ConvLinalgConverter final : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -160,7 +365,6 @@ struct ConvLinalgConverter final ConversionPatternRewriter &rewriter) const override; private: - /// Returns strides, dilation, and padding if any FailureOr isConv(ConversionPatternRewriter &rewriter, linalg::GenericOp op) const; }; @@ -169,287 +373,101 @@ struct ConvLinalgConverter final FailureOr ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter, linalg::GenericOp op) const { - // FIXME: In the future, it is possible to extract strides, dilation, and - // padding by matching the AffineExpr syntax tree. We can also infer the - // dimension and layout of the convolution from the affine_map. - rock::LinalgConvTypeAttr name = op->getAttrOfType("conv_op"); - if (!name) { + // FIXME: In the future, strides, dilation, and padding can be extracted + // by matching the AffineExpr syntax tree. The convolution dimension and + // layout could also be inferred from the affine_map. + auto name = op->getAttrOfType("conv_op"); + if (!name) return failure(); - } rock::LinalgConvType convType = name.getValue(); + int64_t spatialDim = getSpatialDim(convType); auto convertToArrayAttr = [&](Attribute arr, ArrayRef dimOneDefaults = {}) -> ArrayAttr { - ArrayAttr casted = dyn_cast(arr); SmallVector values; - llvm::transform(casted.getValue(), std::back_inserter(values), - [&](Attribute val) { return cast(val).getInt(); }); - if (convType == rock::LinalgConvType::Conv1dNgchGfch) { - values.insert(values.end(), dimOneDefaults.begin(), dimOneDefaults.end()); - } + llvm::transform( + cast(arr).getValue(), std::back_inserter(values), + [](Attribute val) { return cast(val).getInt(); }); + // Conv1D is expanded into Conv2D to match the migraphx-to-tosa pipeline. + // Append identity defaults (stride=1, dilation=1, pad=0) for the extra + // spatial dimension. + if (spatialDim == 1) + values.insert(values.end(), dimOneDefaults.begin(), + dimOneDefaults.end()); return rewriter.getIndexArrayAttr(values); }; auto dilation = convertToArrayAttr(op->getAttr("dilation"), /*dimOneDefaults=*/1); - auto stride = convertToArrayAttr(op->getAttr("stride"), /*dimOneDefaults=*/1); - - // We are given padding in format [dim0low, dim1low, ..., dim1high, - // dim2high,...] but rock expects [dim0low, dim1low, dim2low, ...] - SmallVector newPaddingOrder; - auto originalPaddingOrder = convertToArrayAttr(op->getAttr("pad")).getValue(); - int64_t dim = originalPaddingOrder.size() / 2; - for (int64_t i = 0; i < dim; ++i) { - newPaddingOrder.push_back(originalPaddingOrder[i]); - newPaddingOrder.push_back(originalPaddingOrder[i]); + auto stride = + convertToArrayAttr(op->getAttr("stride"), /*dimOneDefaults=*/1); + + // Input format: [dim0_low, dim1_low, ..., dim0_high, dim1_high, ...] + // Rock format: [dim0_low, dim0_high, dim1_low, dim1_high, ...] + auto originalPadding = convertToArrayAttr(op->getAttr("pad")).getValue(); + int64_t numSpatial = originalPadding.size() / 2; + SmallVector interleavedPad; + for (int64_t i = 0; i < numSpatial; ++i) { + interleavedPad.push_back(originalPadding[i]); + interleavedPad.push_back(originalPadding[numSpatial + i]); } - if (convType == rock::LinalgConvType::Conv1dNgchGfch) { - newPaddingOrder.push_back(rewriter.getIndexAttr(0)); - newPaddingOrder.push_back(rewriter.getIndexAttr(0)); + // For Conv1D is expanded into Conv2D like the tosa pipeline, so + // we set the last dimension have 0 padding to stay consistent. + if (spatialDim == 1) { + interleavedPad.push_back(rewriter.getIndexAttr(0)); + interleavedPad.push_back(rewriter.getIndexAttr(0)); } - auto padding = rewriter.getArrayAttr(newPaddingOrder); - if (!padding || !dilation || !stride) { + auto padding = rewriter.getArrayAttr(interleavedPad); + if (!padding || !dilation || !stride) return failure(); - } StringAttr perfConfig = op->getAttrOfType("perf_config"); - return ConvFields{convType, padding, stride, dilation, perfConfig}; + return ConvFields{convType, spatialDim, padding, stride, dilation, + perfConfig}; } LogicalResult ConvLinalgConverter::matchAndRewrite( linalg::GenericOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const { - FailureOr maybeConvParams = isConv(rewriter, op); - if (failed(maybeConvParams)) + FailureOr maybeConv = isConv(rewriter, op); + if (failed(maybeConv)) return failure(); - ConvFields convParams = maybeConvParams.value(); + ConvFields conv = *maybeConv; Location loc = op.getLoc(); - // We have layout filter = GFC* but we need GF*C - auto getFilter = [&](Value startFilter) -> Value { - ArrayRef startFilterShape = - cast(startFilter.getType()).getShape(); - int64_t dim = startFilterShape.size() - 3; - switch (dim) { - case 3: { - rock::BottomUpTMBuilder filterBuilder( - rewriter, {"g", "f", "c", "h", "w", "d"}, startFilterShape, loc); - filterBuilder.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"}); - filterBuilder.passThrough({"hk", "wk", "dk"}, {2, 3, 4}, {"h", "w", "d"}); - filterBuilder.passThrough({"ck"}, {5}, {"c"}); - auto attr = filterBuilder.get(); - auto filter = rock::TransformOp::create(rewriter, loc, startFilter, attr); - return filter; - } - case 2: { - return startFilter; - } - case 1: { - rock::BottomUpTMBuilder filterBuilder(rewriter, {"g", "f", "c", "h"}, - startFilterShape, loc); - filterBuilder.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"}); - filterBuilder.unmerge({"hk", "wk"}, {2, 3}, {"h"}, - {startFilterShape[3], 1}); - filterBuilder.passThrough({"ck"}, {4}, {"c"}); - auto attr = filterBuilder.get(); - auto filter = rock::TransformOp::create(rewriter, loc, startFilter, attr); - return filter; - } - default: - llvm_unreachable("seen unsupported cases"); - } - }; + auto maybeInput = + removePaddingFromInput(rewriter, op, op.getOperand(0), conv.padding); + if (failed(maybeInput)) + return failure(); - // We have input filter = NGC* but we need N*GC - auto getInput = [&](Value in) -> FailureOr { - // dealing with padding - if (llvm::any_of(convParams.padding.getValue(), [](Attribute attr) { - return cast(attr).getInt() != 0; - })) { - // clang-format off - // Here we are essentially removing the padding while keeping the group - // dimension expansion. We remove the padding because the rock.conv handles - // padding for us This code structure comes from what migraphx-to-linalg - // emits. In theory, there can be other code structure that are emitted in - // linalg pipeline to handle padding. - // Original: - // %padded = tensor.pad %original ... - // %group_expansion = tensor.expand_shape %padded ... - // New: - // %group_expansion = tensor.expand_shape %original - // clang-format on - if (auto expanded = in.getDefiningOp(); - auto padded = - expanded->getOperand(0).getDefiningOp()) { - SmallVector resultShape( - expanded.getResultType().getShape()); - auto lowPad = padded.getStaticLow(); - auto highPad = padded.getStaticHigh(); - int64_t numPadDims = lowPad.size(); - int64_t numExpandedDims = resultShape.size(); - - // Padding is defined in pre-expand space. The spatial dims are at the - // tail of both the pre-expand and post-expand tensors (expand_shape - // only splits an earlier dim), so align from the end. - for (int64_t i = numPadDims - 1, j = numExpandedDims - 1; - i >= 0 && j >= 0; --i, --j) { - resultShape[j] -= (lowPad[i] + highPad[i]); - } - - RankedTensorType newResultType = RankedTensorType::get( - resultShape, padded.getResultType().getElementType()); - auto temp = padded.getOperand(0); - in = tensor::ExpandShapeOp::create(rewriter, expanded.getLoc(), - newResultType, temp, - expanded.getReassociationIndices()); - rewriter.replaceOp(expanded, in); - rewriter.eraseOp(padded); - } else { - op.emitError("unexpected padding code structure"); - return failure(); - } - } - - ArrayRef startInputShape = - cast(in.getType()).getShape(); - int64_t dim = startInputShape.size() - 3; - switch (dim) { - case 3: { - rock::BottomUpTMBuilder inputBuilder( - rewriter, {"n", "g", "c", "h", "w", "d"}, startInputShape, loc); - inputBuilder.passThrough({"ni"}, {0}, {"n"}); - inputBuilder.passThrough({"hi", "wi", "di"}, {1, 2, 3}, {"h", "w", "d"}); - inputBuilder.passThrough({"gi", "ci"}, {4, 5}, {"g", "c"}); - auto inputAttr = inputBuilder.get(); - auto input = rock::TransformOp::create(rewriter, loc, in, inputAttr); - return input.getResult(); - } - case 2: { - return in; - } - case 1: { - // migraphx-to-tosa pipeline handles 1d convolution by converting - // 1 dimensional input into 2 dimensional. 1x1x3x10 (NGCH) becomes - // 1x1x3x1x10 (NHWGC). We are reproducing that here - int64_t h = startInputShape[3]; - rock::BottomUpTMBuilder filterBuilder(rewriter, {"n", "g", "c", "h"}, - startInputShape, loc); - filterBuilder.passThrough({"ni"}, {0}, {"n"}); - filterBuilder.unmerge({"hi", "wi"}, {1, 2}, {"h"}, {h, 1}); - filterBuilder.passThrough({"gi", "ci"}, {3, 4}, {"g", "c"}); - auto attr = filterBuilder.get(); - return rock::TransformOp::create(rewriter, loc, in, attr).getResult(); - } - default: - llvm_unreachable("unsupported cases"); - } - }; + Value input = transformInput(rewriter, loc, *maybeInput, conv.spatialDim); + Value filter = + transformFilter(rewriter, loc, op.getOperand(1), conv.spatialDim); - // Creating the final result shape RankedTensorType linalgResultType = cast(op.getResult(0).getType()); - ArrayRef linalgOutputShape = linalgResultType.getShape(); - SmallVector rockOutputShape(linalgOutputShape); - if (linalgOutputShape.size() - 3 == 3 || linalgOutputShape.size() - 3 == 1) { - rockOutputShape.clear(); - rockOutputShape.push_back(linalgOutputShape[0]); - rockOutputShape.insert(rockOutputShape.end(), - std::next(linalgOutputShape.begin(), 3), - linalgOutputShape.end()); - if (linalgOutputShape.size() - 3 == 1) - rockOutputShape.push_back(1); - rockOutputShape.push_back(linalgOutputShape[1]); - rockOutputShape.push_back(linalgOutputShape[2]); - } + SmallVector rockShape = + computeRockOutputShape(linalgResultType.getShape(), conv.spatialDim); RankedTensorType rockResultType = - RankedTensorType::get(rockOutputShape, linalgResultType.getElementType()); - Value output = bufferization::AllocTensorOp::create(rewriter, op.getLoc(), - rockResultType, {}); + RankedTensorType::get(rockShape, linalgResultType.getElementType()); + Value output = + bufferization::AllocTensorOp::create(rewriter, loc, rockResultType, {}); - auto maybeInput = getInput(op.getOperand(0)); - if (failed(maybeInput)) { - return failure(); - } - auto input = *maybeInput; - auto filter = getFilter(op.getOperand(1)); auto cop = rock::ConvOp::create(rewriter, loc, rockResultType, filter, input, output, /*features=*/nullptr, /*blockSize=*/nullptr, /*gridSize=*/nullptr, - convParams.padding, convParams.stride, - convParams.dilation, /*params=*/nullptr); + conv.padding, conv.stride, conv.dilation, + /*params=*/nullptr); // TODO: add splitk - if (convParams.perfConfig) { - cop->setAttr("perf_config", convParams.perfConfig); - } + if (conv.perfConfig) + cop->setAttr("perf_config", conv.perfConfig); - // Here we are going to emit layouts - switch (convParams.type) { - case rock::LinalgConvType::Conv3dNgchwdGfchwd: - cop->setAttr("filter_layout", - rewriter.getStrArrayAttr({"g", "k", "0", "1", "2", "c"})); - cop->setAttr("input_layout", rewriter.getStrArrayAttr( - {"ni", "0i", "1i", "2i", "gi", "ci"})); - cop->setAttr("output_layout", rewriter.getStrArrayAttr( - {"no", "0o", "1o", "2o", "go", "ko"})); - break; - case rock::LinalgConvType::Conv2dNgchwGfchw: - cop->setAttr("filter_layout", - rewriter.getStrArrayAttr({"g", "k", "c", "y", "x"})); - cop->setAttr("input_layout", - rewriter.getStrArrayAttr({"ni", "gi", "ci", "hi", "wi"})); - cop->setAttr("output_layout", - rewriter.getStrArrayAttr({"no", "go", "ko", "ho", "wo"})); - break; - case rock::LinalgConvType::Conv1dNgchGfch: - cop->setAttr("filter_layout", - rewriter.getStrArrayAttr({"g", "k", "y", "x", "c"})); - cop->setAttr("input_layout", - rewriter.getStrArrayAttr({"ni", "hi", "wi", "gi", "ci"})); - cop->setAttr("output_layout", - rewriter.getStrArrayAttr({"no", "ho", "wo", "go", "ko"})); - break; - default: - llvm_unreachable("edge case one"); - } + setConvLayoutAttrs(rewriter, cop, conv.type); - // output has type ["no", "0o", "1o", "2o", "go", "ko"] - // We need to reshape to ngfhwd - ArrayRef startResultShape = rockResultType.getShape(); - Value finalReshaped; - switch (convParams.type) { - case rock::LinalgConvType::Conv3dNgchwdGfchwd: { - rock::BottomUpTMBuilder resultBuilder( - rewriter, {"n", "h", "w", "d", "g", "f"}, startResultShape, loc); - resultBuilder.passThrough({"go", "fo"}, {1, 2}, {"g", "f"}); - resultBuilder.passThrough({"no"}, {0}, {"n"}); - resultBuilder.passThrough({"ho", "wo", "do"}, {3, 4, 5}, {"h", "w", "d"}); - auto resultAttr = resultBuilder.get(); - finalReshaped = - rock::TransformOp::create(rewriter, loc, cop.getResult(), resultAttr); - break; - } - case rock::LinalgConvType::Conv2dNgchwGfchw: { - finalReshaped = cop.getResult(); - break; - } - case rock::LinalgConvType::Conv1dNgchGfch: { - rock::BottomUpTMBuilder resultBuilder(rewriter, {"n", "h", "w", "g", "f"}, - startResultShape, loc); - resultBuilder.passThrough({"no"}, {0}, {"n"}); - resultBuilder.passThrough({"go", "fo"}, {1, 2}, {"g", "f"}); - resultBuilder.merge("ho", 3, {"h", "w"}); - auto resultAttr = resultBuilder.get(); - finalReshaped = - rock::TransformOp::create(rewriter, loc, cop.getResult(), resultAttr); - break; - } - default: { - return op.emitError("unimplemented final reshape"); - } - } - - rewriter.replaceOp(op, finalReshaped); + Value result = + transformOutput(rewriter, loc, cop.getResult(), conv.spatialDim); + rewriter.replaceOp(op, result); return success(); } diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir new file mode 100644 index 000000000000..8ea0df3923f3 --- /dev/null +++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir @@ -0,0 +1,102 @@ +// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -rock-view-to-transform -verify-diagnostics --split-input-file | FileCheck %s + +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7 * 2, d4 * 2 + d8 * 2, d5 * 2 + d9 * 2)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> + +// CHECK-LABEL: func.func @conv_3d( +// CHECK: rock.conv({{.*}}) {dilations = [2 : index, 2 : index, 2 : index], filter_layout = ["g", "k", "0", "1", "2", "c"], input_layout = ["ni", "0i", "1i", "2i", "gi", "ci"], output_layout = ["no", "0o", "1o", "2o", "go", "ko"], padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index], strides = [2 : index, 2 : index, 2 : index]} +func.func @conv_3d(%arg0: tensor<64xf32>, %arg1: tensor<750xf32>, %arg2: tensor<96xf32>) -> tensor<64xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %cst = arith.constant dense<0.000000e+00> : tensor<2x1x4x2x2x2xf32> + %expanded = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [2, 4, 2, 2, 2] : tensor<64xf32> into tensor<2x4x2x2x2xf32> + %expanded_0 = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4, 5]] output_shape [2, 1, 3, 5, 5, 5] : tensor<750xf32> into tensor<2x1x3x5x5x5xf32> + %expanded_1 = tensor.expand_shape %arg2 [[0, 1, 2, 3, 4, 5]] output_shape [1, 4, 3, 2, 2, 2] : tensor<96xf32> into tensor<1x4x3x2x2x2xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_0, %expanded_1 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs = {conv_op = #rock, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]} { + ^bb0(%in: f32, %in_3: f32, %out: f32): + %3 = arith.mulf %in, %in_3 : f32 + %4 = arith.addf %out, %3 : f32 + linalg.yield %4 : f32 + } -> tensor<2x1x4x2x2x2xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<2x1x4x2x2x2xf32> into tensor<2x4x2x2x2xf32> + %1 = tensor.empty() : tensor<2x4x2x2x2xf32> + %2 = linalg.add ins(%collapsed, %expanded : tensor<2x4x2x2x2xf32>, tensor<2x4x2x2x2xf32>) outs(%1 : tensor<2x4x2x2x2xf32>) -> tensor<2x4x2x2x2xf32> + %collapsed_2 = tensor.collapse_shape %2 [[0, 1, 2, 3, 4]] : tensor<2x4x2x2x2xf32> into tensor<64xf32> + return %collapsed_2 : tensor<64xf32> +} + +// ----- + +#map3 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 * 4 + d6 * 2, d4 * 5 + d7 * 3)> +#map4 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)> +#map5 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)> + +// CHECK-LABEL: func.func @conv_2d +// CHECK: rock.conv({{.*}}) {dilations = [2 : index, 3 : index], filter_layout = ["g", "k", "c", "y", "x"], input_layout = ["ni", "gi", "ci", "hi", "wi"], output_layout = ["no", "go", "ko", "ho", "wo"], padding = [2 : index, 2 : index, 2 : index, 2 : index], strides = [4 : index, 5 : index]} +func.func @conv_2d(%arg0: tensor<122016xf32>, %arg1: tensor<320xf32>) -> tensor<8208xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %cst = arith.constant dense<0.000000e+00> : tensor<2x2x4x27x19xf32> + %cst_0 = arith.constant 0.000000e+00 : f32 + %expanded = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [2, 4, 123, 124] : tensor<122016xf32> into tensor<2x4x123x124xf32> + %padded = tensor.pad %expanded low[0, 0, 2, 2] high[0, 0, 2, 2] { + ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index): + tensor.yield %cst_0 : f32 + } : tensor<2x4x123x124xf32> to tensor<2x4x127x128xf32> + %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4]] output_shape [2, 2, 2, 127, 128] : tensor<2x4x127x128xf32> into tensor<2x2x2x127x128xf32> + %expanded_2 = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [2, 4, 2, 4, 5] : tensor<320xf32> into tensor<2x4x2x4x5xf32> + %0 = linalg.generic {indexing_maps = [#map3, #map4, #map5], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<2x2x2x127x128xf32>, tensor<2x4x2x4x5xf32>) outs(%cst : tensor<2x2x4x27x19xf32>) attrs = {conv_op = #rock, dilation = [2, 3], group = 2 : i64, pad = [2, 2, 2, 2], stride = [4, 5]} { + ^bb0(%in: f32, %in_3: f32, %out: f32): + %1 = arith.mulf %in, %in_3 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<2x2x4x27x19xf32> + %collapsed = tensor.collapse_shape %0 [[0, 1, 2, 3, 4]] : tensor<2x2x4x27x19xf32> into tensor<8208xf32> + return %collapsed : tensor<8208xf32> +} + +// ----- + +#map6 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)> +#map7 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> +#map8 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @conv_1d +// CHECK: rock.conv({{.*}}) {dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "y", "x", "c"], input_layout = ["ni", "hi", "wi", "gi", "ci"], output_layout = ["no", "ho", "wo", "go", "ko"], padding = [3 : index, 3 : index, 0 : index, 0 : index], strides = [1 : index, 1 : index]} +func.func @conv_1d(%arg0: tensor<14336xf32>, %arg1: tensor<672xf32>, %arg2: tensor<1344xf32>) -> tensor<14336xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x64x224xf32> + %cst_0 = arith.constant 0.000000e+00 : f32 + %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [1, 3, 224] : tensor<672xf32> into tensor<1x3x224xf32> + %padded = tensor.pad %expanded low[0, 0, 3] high[0, 0, 3] { + ^bb0(%arg3: index, %arg4: index, %arg5: index): + tensor.yield %cst_0 : f32 + } : tensor<1x3x224xf32> to tensor<1x3x230xf32> + %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3]] output_shape [1, 1, 3, 230] : tensor<1x3x230xf32> into tensor<1x1x3x230xf32> + %expanded_2 = tensor.expand_shape %arg2 [[0, 1, 2, 3]] output_shape [1, 64, 3, 7] : tensor<1344xf32> into tensor<1x64x3x7xf32> + %0 = linalg.generic {indexing_maps = [#map6, #map7, #map8], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x230xf32>, tensor<1x64x3x7xf32>) outs(%cst : tensor<1x1x64x224xf32>) attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [3, 3], stride = [1]} { + ^bb0(%in: f32, %in_3: f32, %out: f32): + %1 = arith.mulf %in, %in_3 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x64x224xf32> + %collapsed = tensor.collapse_shape %0 [[0, 1, 2, 3]] : tensor<1x1x64x224xf32> into tensor<14336xf32> + return %collapsed : tensor<14336xf32> +} + +// ----- + +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7 * 2, d4 * 2 + d8 * 2, d5 * 2 + d9 * 2)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> + +// CHECK-LABEL: func.func @mlir_perf_config +// CHECK: rock.conv({{.*}}) {dilations = [2 : index, 2 : index, 2 : index], filter_layout = ["g", "k", "0", "1", "2", "c"], input_layout = ["ni", "0i", "1i", "2i", "gi", "ci"], output_layout = ["no", "0o", "1o", "2o", "go", "ko"], padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", strides = [2 : index, 2 : index, 2 : index]} +func.func @mlir_perf_config(%arg0: tensor<750xf32>, %arg1: tensor<96xf32>) -> tensor<64xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %cst = arith.constant dense<0.000000e+00> : tensor<2x1x4x2x2x2xf32> + %expanded = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4, 5]] output_shape [2, 1, 3, 5, 5, 5] : tensor<750xf32> into tensor<2x1x3x5x5x5xf32> + %expanded_0 = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4, 5]] output_shape [1, 4, 3, 2, 2, 2] : tensor<96xf32> into tensor<1x4x3x2x2x2xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded, %expanded_0 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs = {conv_op = #rock, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]} { + ^bb0(%in: f32, %in_1: f32, %out: f32): + %1 = arith.mulf %in, %in_1 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<2x1x4x2x2x2xf32> + %collapsed = tensor.collapse_shape %0 [[0, 1, 2, 3, 4, 5]] : tensor<2x1x4x2x2x2xf32> into tensor<64xf32> + return %collapsed : tensor<64xf32> +} From 7abde9b04fab61db917d7a7048de3559bb5e5f5d Mon Sep 17 00:00:00 2001 From: Vincent Date: Wed, 25 Feb 2026 15:58:12 +0000 Subject: [PATCH 4/8] Added comments --- mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp index 11ed67331c0b..e4c4aded48a3 100644 --- a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp +++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp @@ -48,6 +48,8 @@ static void populateLinalgToRockDialectConversion(ConversionTarget &target) { return std::nullopt; } + // Convolution linalg.generic has reduction iteration type. It is not + // a legal operation in that case linalg::GenericOp castedOp = dyn_cast(op); if (castedOp && llvm::any_of(castedOp.getIteratorTypesArray(), [](auto type) { From f58acdda833f1fc86de28d43c38fc8c1861de288 Mon Sep 17 00:00:00 2001 From: Vincent Date: Wed, 4 Mar 2026 04:24:50 +0000 Subject: [PATCH 5/8] Simplify changes and removed all the rock.transforms --- .../Conversion/LinalgToRock/LinalgToRock.cpp | 198 +++++------------- 1 file changed, 50 insertions(+), 148 deletions(-) diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp index 51a1a220d2e0..8b8a1c133815 100644 --- a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp +++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp @@ -166,59 +166,22 @@ static int64_t getSpatialDim(rock::LinalgConvType type) { } /// Set filter_layout, input_layout, and output_layout on a rock.conv op. +/// Layouts match the linalg convention: GKC*, NGC*, NGK*. static void setConvLayoutAttrs(OpBuilder &builder, rock::ConvOp cop, - rock::LinalgConvType type) { - auto set = [&](StringRef name, ArrayRef layout) { - cop->setAttr(name, builder.getStrArrayAttr(layout)); + int64_t spatialDim) { + auto *ctx = builder.getContext(); + auto setLayout = [&](StringRef attrName, ArrayRef prefix, + StringRef suffix) { + SmallVector layout; + for (StringRef dim : prefix) + layout.push_back(StringAttr::get(ctx, dim)); + for (int64_t i = 0; i < spatialDim; ++i) + layout.push_back(StringAttr::get(ctx, Twine(i) + suffix)); + cop->setAttr(attrName, builder.getArrayAttr(layout)); }; - switch (type) { - case rock::LinalgConvType::Conv3dNgchwdGfchwd: - set("filter_layout", {"g", "k", "0", "1", "2", "c"}); - set("input_layout", {"ni", "0i", "1i", "2i", "gi", "ci"}); - set("output_layout", {"no", "0o", "1o", "2o", "go", "ko"}); - break; - case rock::LinalgConvType::Conv2dNgchwGfchw: - set("filter_layout", {"g", "k", "c", "y", "x"}); - set("input_layout", {"ni", "gi", "ci", "hi", "wi"}); - set("output_layout", {"no", "go", "ko", "ho", "wo"}); - break; - case rock::LinalgConvType::Conv1dNgchGfch: - set("filter_layout", {"g", "k", "y", "x", "c"}); - set("input_layout", {"ni", "hi", "wi", "gi", "ci"}); - set("output_layout", {"no", "ho", "wo", "go", "ko"}); - break; - } -} - -/// Transform filter from GFC* layout to GF*C layout for rock.conv. -/// 2D is already in the correct layout. -static Value transformFilter(OpBuilder &builder, Location loc, Value filter, - int64_t spatialDim) { - ArrayRef shape = - cast(filter.getType()).getShape(); - switch (spatialDim) { - case 3: { - rock::BottomUpTMBuilder b(builder, {"g", "f", "c", "h", "w", "d"}, shape, - loc); - b.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"}); - b.passThrough({"hk", "wk", "dk"}, {2, 3, 4}, {"h", "w", "d"}); - b.passThrough({"ck"}, {5}, {"c"}); - return rock::TransformOp::create(builder, loc, filter, b.get()); - } - case 2: - return filter; - case 1: { - // Conv1D is expanded into Conv2D (matching migraphx-to-tosa): unmerge - // H into (H, W=1). - rock::BottomUpTMBuilder b(builder, {"g", "f", "c", "h"}, shape, loc); - b.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"}); - b.unmerge({"hk", "wk"}, {2, 3}, {"h"}, {shape[3], 1}); - b.passThrough({"ck"}, {4}, {"c"}); - return rock::TransformOp::create(builder, loc, filter, b.get()); - } - default: - llvm_unreachable("unsupported spatial dim for filter transform"); - } + setLayout("filter_layout", {"g", "k", "c"}, ""); + setLayout("input_layout", {"ni", "gi", "ci"}, "i"); + setLayout("output_layout", {"no", "go", "ko"}, "o"); } /// Remove the tensor.pad + tensor.expand_shape pattern emitted by @@ -274,85 +237,6 @@ removePaddingFromInput(ConversionPatternRewriter &rewriter, return result; } -/// Transform input from NGC* layout to N*GC layout for rock.conv. -/// 2D is already in the correct layout. -static Value transformInput(OpBuilder &builder, Location loc, Value input, - int64_t spatialDim) { - ArrayRef shape = - cast(input.getType()).getShape(); - switch (spatialDim) { - case 3: { - rock::BottomUpTMBuilder b(builder, {"n", "g", "c", "h", "w", "d"}, shape, - loc); - b.passThrough({"ni"}, {0}, {"n"}); - b.passThrough({"hi", "wi", "di"}, {1, 2, 3}, {"h", "w", "d"}); - b.passThrough({"gi", "ci"}, {4, 5}, {"g", "c"}); - return rock::TransformOp::create(builder, loc, input, b.get()); - } - case 2: - return input; - case 1: { - // Conv1D is expanded into Conv2D (matching migraphx-to-tosa): unmerge - // H into (H, W=1). - int64_t h = shape[3]; - rock::BottomUpTMBuilder b(builder, {"n", "g", "c", "h"}, shape, loc); - b.passThrough({"ni"}, {0}, {"n"}); - b.unmerge({"hi", "wi"}, {1, 2}, {"h"}, {h, 1}); - b.passThrough({"gi", "ci"}, {3, 4}, {"g", "c"}); - return rock::TransformOp::create(builder, loc, input, b.get()); - } - default: - llvm_unreachable("unsupported spatial dim for input transform"); - } -} - -/// Compute the rock output shape from the linalg output shape. -/// Linalg layout is NGF* while rock needs N*GF (with extra W=1 for 1D). -static SmallVector -computeRockOutputShape(ArrayRef linalgShape, int64_t spatialDim) { - if (spatialDim == 2) - return SmallVector(linalgShape); - SmallVector shape; - shape.push_back(linalgShape[0]); - shape.insert(shape.end(), std::next(linalgShape.begin(), 3), - linalgShape.end()); - if (spatialDim == 1) - shape.push_back(1); // Conv1D expanded to Conv2D: extra W=1 - shape.push_back(linalgShape[1]); - shape.push_back(linalgShape[2]); - return shape; -} - -/// Transform rock.conv output back to the linalg output layout. -/// 2D needs no transform. -static Value transformOutput(OpBuilder &builder, Location loc, Value convResult, - int64_t spatialDim) { - if (spatialDim == 2) - return convResult; - ArrayRef shape = - cast(convResult.getType()).getShape(); - switch (spatialDim) { - case 3: { - rock::BottomUpTMBuilder b(builder, {"n", "h", "w", "d", "g", "f"}, shape, - loc); - b.passThrough({"go", "fo"}, {1, 2}, {"g", "f"}); - b.passThrough({"no"}, {0}, {"n"}); - b.passThrough({"ho", "wo", "do"}, {3, 4, 5}, {"h", "w", "d"}); - return rock::TransformOp::create(builder, loc, convResult, b.get()); - } - case 1: { - // Conv1D was expanded into Conv2D: merge (H, W=1) back into H. - rock::BottomUpTMBuilder b(builder, {"n", "h", "w", "g", "f"}, shape, loc); - b.passThrough({"no"}, {0}, {"n"}); - b.passThrough({"go", "fo"}, {1, 2}, {"g", "f"}); - b.merge("ho", 3, {"h", "w"}); - return rock::TransformOp::create(builder, loc, convResult, b.get()); - } - default: - llvm_unreachable("unsupported spatial dim for output transform"); - } -} - namespace { struct ConvLinalgConverter final : public OpConversionPattern { @@ -373,9 +257,6 @@ struct ConvLinalgConverter final FailureOr ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter, linalg::GenericOp op) const { - // FIXME: In the future, strides, dilation, and padding can be extracted - // by matching the AffineExpr syntax tree. The convolution dimension and - // layout could also be inferred from the affine_map. auto name = op->getAttrOfType("conv_op"); if (!name) return failure(); @@ -388,9 +269,8 @@ ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter, llvm::transform( cast(arr).getValue(), std::back_inserter(values), [](Attribute val) { return cast(val).getInt(); }); - // Conv1D is expanded into Conv2D to match the migraphx-to-tosa pipeline. - // Append identity defaults (stride=1, dilation=1, pad=0) for the extra - // spatial dimension. + // Conv1D is expanded into Conv2D: append identity defaults for the + // extra spatial dimension (stride=1, dilation=1, pad=0). if (spatialDim == 1) values.insert(values.end(), dimOneDefaults.begin(), dimOneDefaults.end()); @@ -411,8 +291,7 @@ ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter, interleavedPad.push_back(originalPadding[i]); interleavedPad.push_back(originalPadding[numSpatial + i]); } - // For Conv1D is expanded into Conv2D like the tosa pipeline, so - // we set the last dimension have 0 padding to stay consistent. + // Conv1D is expanded into Conv2D if (spatialDim == 1) { interleavedPad.push_back(rewriter.getIndexAttr(0)); interleavedPad.push_back(rewriter.getIndexAttr(0)); @@ -441,19 +320,36 @@ LogicalResult ConvLinalgConverter::matchAndRewrite( if (failed(maybeInput)) return failure(); - Value input = transformInput(rewriter, loc, *maybeInput, conv.spatialDim); - Value filter = - transformFilter(rewriter, loc, op.getOperand(1), conv.spatialDim); + Value input = *maybeInput; + Value filter = op.getOperand(1); + + // Conv1D is expanded into Conv2D: unmerge the single spatial dim + // into (spatial, W=1) for filter and input. + int64_t effectiveSpatialDim = conv.spatialDim; + if (conv.spatialDim == 1) { + effectiveSpatialDim = 2; + auto filterShape = cast(filter.getType()).getShape(); + rock::BottomUpTMBuilder builder(rewriter, {"g", "k", "c", "0"}, filterShape, loc); + builder.passThrough({"gf", "kf", "cf"}, {0, 1, 2}, {"g", "k", "c"}); + builder.unmerge({"0f", "1f"}, {3, 4}, "0", {filterShape[3], 1}); + filter = rock::TransformOp::create(rewriter, loc, filter, builder.get()); + + auto inputShape = cast(input.getType()).getShape(); + rock::BottomUpTMBuilder b(rewriter, {"n", "g", "c", "0"}, inputShape, loc); + b.passThrough({"nu", "gu", "cu"}, {0, 1, 2}, {"n", "g", "c"}); + b.unmerge({"0u", "1u"}, {3, 4}, "0", {inputShape[3], 1}); + input = rock::TransformOp::create(rewriter, loc, input, b.get()); + } RankedTensorType linalgResultType = cast(op.getResult(0).getType()); - SmallVector rockShape = - computeRockOutputShape(linalgResultType.getShape(), conv.spatialDim); + SmallVector rockShape(linalgResultType.getShape()); + if (conv.spatialDim == 1) + rockShape.push_back(1); RankedTensorType rockResultType = RankedTensorType::get(rockShape, linalgResultType.getElementType()); Value output = bufferization::AllocTensorOp::create(rewriter, loc, rockResultType, {}); - auto cop = rock::ConvOp::create(rewriter, loc, rockResultType, filter, input, output, /*features=*/nullptr, /*blockSize=*/nullptr, /*gridSize=*/nullptr, @@ -462,11 +358,17 @@ LogicalResult ConvLinalgConverter::matchAndRewrite( // TODO: add splitk if (conv.perfConfig) cop->setAttr("perf_config", conv.perfConfig); + setConvLayoutAttrs(rewriter, cop, effectiveSpatialDim); + + Value result = cop.getResult(); + if (conv.spatialDim == 1) { + auto shape = cast(result.getType()).getShape(); + rock::BottomUpTMBuilder b(rewriter, {"n", "g", "k", "0", "1"}, shape, loc); + b.passThrough({"no", "go", "ko"}, {0, 1, 2}, {"n", "g", "k"}); + b.merge("0o", 3, {"0", "1"}); + result = rock::TransformOp::create(rewriter, loc, result, b.get()); + } - setConvLayoutAttrs(rewriter, cop, conv.type); - - Value result = - transformOutput(rewriter, loc, cop.getResult(), conv.spatialDim); rewriter.replaceOp(op, result); return success(); } From 7de89e5948e8a421c1b17507814bb40a56d02751 Mon Sep 17 00:00:00 2001 From: Vincent Date: Wed, 4 Mar 2026 16:44:10 +0000 Subject: [PATCH 6/8] Address comments and added more testcase --- .../Conversion/LinalgToRock/LinalgToRock.cpp | 43 ++- .../LinalgToRock/LinalgToRockPass.cpp | 8 +- .../LinalgToRock/linalg-to-rock-conv-1d.mlir | 235 +++++++++++++++++ .../LinalgToRock/linalg-to-rock-conv-2d.mlir | 209 +++++++++++++++ .../LinalgToRock/linalg-to-rock-conv-3d.mlir | 247 ++++++++++++++++++ .../LinalgToRock/linalg-to-rock-conv.mlir | 102 -------- .../LinalgToRock/linalg-to-rock-invalid.mlir | 104 ++++++++ 7 files changed, 828 insertions(+), 120 deletions(-) create mode 100644 mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir create mode 100644 mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir create mode 100644 mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir delete mode 100644 mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp index 8b8a1c133815..926ec53f47d3 100644 --- a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp +++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp @@ -208,7 +208,7 @@ removePaddingFromInput(ConversionPatternRewriter &rewriter, return failure(); } auto padded = expanded->getOperand(0).getDefiningOp(); - if (!padded) { + if (!padded || !padded->hasOneUse()) { op.emitError("unexpected padding code structure"); return failure(); } @@ -222,8 +222,8 @@ removePaddingFromInput(ConversionPatternRewriter &rewriter, // Padding is defined in pre-expand space. The spatial dims are at the // tail of both tensors (expand_shape only splits an earlier dim), so // align from the end. - for (int64_t i = numPadDims - 1, j = numExpandedDims - 1; - i >= 0 && j >= 0; --i, --j) { + for (int64_t i = numPadDims - 1, j = numExpandedDims - 1; i >= 0 && j >= 0; + --i, --j) { resultShape[j] -= (lowPad[i] + highPad[i]); } @@ -262,9 +262,17 @@ ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter, return failure(); rock::LinalgConvType convType = name.getValue(); int64_t spatialDim = getSpatialDim(convType); + // Conv1D is broadcasted into Conv2D. To check for error, we + // use effectiveDim instead because it one more stride/dilation + // in the expanded dimension + int64_t effectiveDim = (spatialDim == 1) ? spatialDim + 1 : spatialDim; auto convertToArrayAttr = [&](Attribute arr, ArrayRef dimOneDefaults = {}) -> ArrayAttr { + if(!arr || !isa(arr)){ + return ArrayAttr {}; + } + SmallVector values; llvm::transform( cast(arr).getValue(), std::back_inserter(values), @@ -272,19 +280,26 @@ ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter, // Conv1D is expanded into Conv2D: append identity defaults for the // extra spatial dimension (stride=1, dilation=1, pad=0). if (spatialDim == 1) - values.insert(values.end(), dimOneDefaults.begin(), - dimOneDefaults.end()); + values.insert(values.end(), dimOneDefaults.begin(), dimOneDefaults.end()); return rewriter.getIndexArrayAttr(values); }; auto dilation = - convertToArrayAttr(op->getAttr("dilation"), /*dimOneDefaults=*/1); + convertToArrayAttr(op->getAttr("dilation"), /*dimOneDefaults=*/{1}); auto stride = - convertToArrayAttr(op->getAttr("stride"), /*dimOneDefaults=*/1); + convertToArrayAttr(op->getAttr("stride"), /*dimOneDefaults=*/{1}); + if (!dilation || !stride || (int64_t)dilation.size() != effectiveDim || (int64_t)stride.size() != effectiveDim){ + op.emitError("invalid dilation or stride"); + return failure(); + } // Input format: [dim0_low, dim1_low, ..., dim0_high, dim1_high, ...] // Rock format: [dim0_low, dim0_high, dim1_low, dim1_high, ...] - auto originalPadding = convertToArrayAttr(op->getAttr("pad")).getValue(); + auto originalPadding = convertToArrayAttr(op->getAttr("pad")); + if(!originalPadding){ + op.emitError("no padding found"); + return failure(); + } int64_t numSpatial = originalPadding.size() / 2; SmallVector interleavedPad; for (int64_t i = 0; i < numSpatial; ++i) { @@ -297,12 +312,15 @@ ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter, interleavedPad.push_back(rewriter.getIndexAttr(0)); } auto padding = rewriter.getArrayAttr(interleavedPad); - if (!padding || !dilation || !stride) + // note that Conv1D is expanded into Conv2D + if(effectiveDim*2 != (int64_t)padding.size()){ + op.emitError("invalid number of padding"); return failure(); + } StringAttr perfConfig = op->getAttrOfType("perf_config"); - return ConvFields{convType, spatialDim, padding, stride, dilation, - perfConfig}; + return ConvFields{convType, spatialDim, padding, + stride, dilation, perfConfig}; } LogicalResult ConvLinalgConverter::matchAndRewrite( @@ -329,7 +347,8 @@ LogicalResult ConvLinalgConverter::matchAndRewrite( if (conv.spatialDim == 1) { effectiveSpatialDim = 2; auto filterShape = cast(filter.getType()).getShape(); - rock::BottomUpTMBuilder builder(rewriter, {"g", "k", "c", "0"}, filterShape, loc); + rock::BottomUpTMBuilder builder(rewriter, {"g", "k", "c", "0"}, filterShape, + loc); builder.passThrough({"gf", "kf", "cf"}, {0, 1, 2}, {"g", "k", "c"}); builder.unmerge({"0f", "1f"}, {3, 4}, "0", {filterShape[3], 1}); filter = rock::TransformOp::create(rewriter, loc, filter, builder.get()); diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp index e4c4aded48a3..7f3fb294abf7 100644 --- a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp +++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp @@ -48,13 +48,9 @@ static void populateLinalgToRockDialectConversion(ConversionTarget &target) { return std::nullopt; } - // Convolution linalg.generic has reduction iteration type. It is not - // a legal operation in that case + // Convolution has attributes. linalg::GenericOp castedOp = dyn_cast(op); - if (castedOp && - llvm::any_of(castedOp.getIteratorTypesArray(), [](auto type) { - return linalg::isReductionIterator(type); - })) { + if (castedOp && castedOp->hasAttr("conv_op")) { return false; } diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir new file mode 100644 index 000000000000..09ce6d9a36a7 --- /dev/null +++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir @@ -0,0 +1,235 @@ +// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -verify-diagnostics --split-input-file | FileCheck %s + +// Input: NCL = 1x3x10, Filter: FCL = 6x3x3 +// stride=1, dilation=1, padding=0, group=1 + +#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @conv_1d_basic( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]] +// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_1d_basic(%arg0: tensor<30xf32>, %arg1: tensor<54xf32>) -> tensor<48xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 3] : tensor<54xf32> into tensor<6x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 10] : tensor<30xf32> into tensor<1x3x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 10] : tensor<1x3x10xf32> into tensor<1x1x3x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x8xf32>) attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x8xf32> into tensor<1x6x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x8xf32> into tensor<48xf32> + return %collapsed_3 : tensor<48xf32> + } +} + +// ----- + +// Input: NCL = 1x3x20, Filter: FCL = 6x3x3 +// stride=1, dilation=3, padding=0, group=1 +#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5 * 3)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @conv_1d_dilation( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]] +// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]]) +// CHECK-SAME: dilations = [3 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_1d_dilation(%arg0: tensor<60xf32>, %arg1: tensor<54xf32>) -> tensor<84xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 3] : tensor<54xf32> into tensor<6x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 20] : tensor<60xf32> into tensor<1x3x20xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 20] : tensor<1x3x20xf32> into tensor<1x1x3x20xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x14xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x14xf32>) attrs = {conv_op = #rock, dilation = [3], group = 1 : i64, pad = [0, 0], stride = [1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x14xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x14xf32> into tensor<1x6x14xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x14xf32> into tensor<84xf32> + return %collapsed_3 : tensor<84xf32> + } +} + +// ----- + +// Input: NCL = 1x3x10, Filter: FCL = 6x3x5 +// stride=1, dilation=1, padding=[2,2], group=1 +#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @conv_1d_padding( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[expanded_3:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_1]] +// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_3]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [2 : index, 2 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]] +// CHECK-DAG: %[[collapsed_4:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_4]] +module { + func.func @conv_1d_padding(%arg0: tensor<30xf32>, %arg1: tensor<90xf32>) -> tensor<60xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 5] : tensor<90xf32> into tensor<6x3x5xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 10] : tensor<30xf32> into tensor<1x3x10xf32> + %cst = arith.constant 0.000000e+00 : f32 + %padded = tensor.pad %expanded_0 low[0, 0, 2] high[0, 0, 2] { + ^bb0(%arg2: index, %arg3: index, %arg4: index): + tensor.yield %cst : f32 + } : tensor<1x3x10xf32> to tensor<1x3x14xf32> + %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3]] output_shape [1, 1, 3, 14] : tensor<1x3x14xf32> into tensor<1x1x3x14xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 5] : tensor<6x3x5xf32> into tensor<1x6x3x5xf32> + %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x14xf32>, tensor<1x6x3x5xf32>) outs(%cst_3 : tensor<1x1x6x10xf32>) attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [2, 2], stride = [1]} { + ^bb0(%in: f32, %in_5: f32, %out: f32): + %1 = arith.mulf %in, %in_5 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x10xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x10xf32> into tensor<1x6x10xf32> + %collapsed_4 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x10xf32> into tensor<60xf32> + return %collapsed_4 : tensor<60xf32> + } +} + +// ----- + +// Input: NCL = 1x3x10, Filter: FCL = 6x3x3 +// stride=2, dilation=1, padding=0, group=1 +#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 * 2 + d5)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @conv_1d_stride( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]] +// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [2 : index, 1 : index] +// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_1d_stride(%arg0: tensor<30xf32>, %arg1: tensor<54xf32>) -> tensor<24xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 3] : tensor<54xf32> into tensor<6x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 10] : tensor<30xf32> into tensor<1x3x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 10] : tensor<1x3x10xf32> into tensor<1x1x3x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x4xf32>) attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [2]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x4xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x4xf32> into tensor<1x6x4xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x4xf32> into tensor<24xf32> + return %collapsed_3 : tensor<24xf32> + } +} + +// ----- + +// Input: NCL = 1x6x10, Filter: F(C/G)L = 9x2x3 (group=3, C_per_group=2, F_per_group=3) +// stride=1, dilation=1, padding=0, group=3 +#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @conv_1d_groups( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]] +// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_1d_groups(%arg0: tensor<60xf32>, %arg1: tensor<54xf32>) -> tensor<72xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [9, 2, 3] : tensor<54xf32> into tensor<9x2x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 6, 10] : tensor<60xf32> into tensor<1x6x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 3, 2, 10] : tensor<1x6x10xf32> into tensor<1x3x2x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [3, 3, 2, 3] : tensor<9x2x3xf32> into tensor<3x3x2x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10xf32>, tensor<3x3x2x3xf32>) outs(%cst : tensor<1x3x3x8xf32>) attrs = {conv_op = #rock, dilation = [1], group = 3 : i64, pad = [0, 0], stride = [1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x3x3x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x3x3x8xf32> into tensor<1x9x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x9x8xf32> into tensor<72xf32> + return %collapsed_3 : tensor<72xf32> + } +} diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir new file mode 100644 index 000000000000..174f877e8a04 --- /dev/null +++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir @@ -0,0 +1,209 @@ +// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -verify-diagnostics --split-input-file | FileCheck %s + +// CHECK-LABEL: func.func @conv_2d_basic( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6, d4 + d7)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)> +module { + func.func @conv_2d_basic(%arg0: tensor<300xf32>, %arg1: tensor<162xf32>) -> tensor<384xf32> attributes {kernel, arch="##TOKEN_ARCH##"}{ + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 10, 10] : tensor<300xf32> into tensor<1x3x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 10, 10] : tensor<1x3x10x10xf32> into tensor<1x1x3x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x8x8xf32> into tensor<1x6x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x8x8xf32> into tensor<384xf32> + return %collapsed_3 : tensor<384xf32> + } +} + +// ----- + +// CHECK-LABEL: func.func @conv_2d_dilation( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [2 : index, 3 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6 * 2, d4 + d7 * 3)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)> +module { + func.func @conv_2d_dilation(%arg0: tensor<1200xf32>, %arg1: tensor<162xf32>) -> tensor<1344xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 20, 20] : tensor<1200xf32> into tensor<1x3x20x20xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 20, 20] : tensor<1x3x20x20xf32> into tensor<1x1x3x20x20xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x16x14xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20x20xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x16x14xf32>) attrs = {conv_op = #rock, dilation = [2, 3], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x16x14xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x16x14xf32> into tensor<1x6x16x14xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x16x14xf32> into tensor<1344xf32> + return %collapsed_3 : tensor<1344xf32> + } +} + +// ----- + +// CHECK-LABEL: func.func @conv_2d_padding( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[expanded_3:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_1]], %[[expanded_3]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [1 : index, 1 : index, 1 : index, 1 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_4:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_4]] +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6, d4 + d7)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)> +module { + func.func @conv_2d_padding(%arg0: tensor<300xf32>, %arg1: tensor<162xf32>) -> tensor<600xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 10, 10] : tensor<300xf32> into tensor<1x3x10x10xf32> + %cst = arith.constant 0.000000e+00 : f32 + %padded = tensor.pad %expanded_0 low[0, 0, 1, 1] high[0, 0, 1, 1] { + ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index): + tensor.yield %cst : f32 + } : tensor<1x3x10x10xf32> to tensor<1x3x12x12xf32> + %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 12, 12] : tensor<1x3x12x12xf32> into tensor<1x1x3x12x12xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32> + %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10x10xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12xf32>, tensor<1x6x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 1 : i64, pad = [1, 1, 1, 1], stride = [1, 1]} { + ^bb0(%in: f32, %in_5: f32, %out: f32): + %1 = arith.mulf %in, %in_5 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x10x10xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x10x10xf32> into tensor<1x6x10x10xf32> + %collapsed_4 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x10x10xf32> into tensor<600xf32> + return %collapsed_4 : tensor<600xf32> + } +} + +// ----- + +// CHECK-LABEL: func.func @conv_2d_stride( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [2 : index, 3 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 * 2 + d6, d4 * 3 + d7)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)> +module { + func.func @conv_2d_stride(%arg0: tensor<300xf32>, %arg1: tensor<162xf32>) -> tensor<72xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 10, 10] : tensor<300xf32> into tensor<1x3x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 10, 10] : tensor<1x3x10x10xf32> into tensor<1x1x3x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4x3xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x3xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [2, 3]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x4x3xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x4x3xf32> into tensor<1x6x4x3xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x4x3xf32> into tensor<72xf32> + return %collapsed_3 : tensor<72xf32> + } +} + +// ----- + +// CHECK-LABEL: func.func @conv_2d_groups( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6, d4 + d7)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)> +module { + func.func @conv_2d_groups(%arg0: tensor<600xf32>, %arg1: tensor<162xf32>) -> tensor<576xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [9, 2, 3, 3] : tensor<162xf32> into tensor<9x2x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 6, 10, 10] : tensor<600xf32> into tensor<1x6x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 3, 2, 10, 10] : tensor<1x6x10x10xf32> into tensor<1x3x2x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [3, 3, 2, 3, 3] : tensor<9x2x3x3xf32> into tensor<3x3x2x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8x8xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10xf32>, tensor<3x3x2x3x3xf32>) outs(%cst : tensor<1x3x3x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 3 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x3x3x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x3x3x8x8xf32> into tensor<1x9x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x9x8x8xf32> into tensor<576xf32> + return %collapsed_3 : tensor<576xf32> + } +} diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir new file mode 100644 index 000000000000..d6a0c6773963 --- /dev/null +++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir @@ -0,0 +1,247 @@ +// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -verify-diagnostics --split-input-file | FileCheck %s + +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-LABEL: func.func @conv_3d_basic( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_3d_basic(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x8x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32> + return %collapsed_3 : tensor<3072xf32> + } +} + +// ----- +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7 * 2, d4 + d8 * 2, d5 + d9 * 2)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-LABEL: func.func @conv_3d_dilation( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [2 : index, 2 : index, 2 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_3d_dilation(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<1296xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x6x6x6xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x6x6x6xf32>) attrs = {conv_op = #rock, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x6x6x6xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x6x6x6xf32> into tensor<1x6x6x6x6xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x6x6x6xf32> into tensor<1296xf32> + return %collapsed_3 : tensor<1296xf32> + } +} + +// ----- +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-LABEL: func.func @conv_3d_padding( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[expanded_3:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_1]], %[[expanded_3]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"] +// CHECK-SAME: padding = [1 : index, 1 : index, 1 : index, 1 : index, 1 : index, 1 : index] +// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_4:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_4]] +module { + func.func @conv_3d_padding(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<6000xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %cst = arith.constant 0.000000e+00 : f32 + %padded = tensor.pad %expanded_0 low[0, 0, 1, 1, 1] high[0, 0, 1, 1, 1] { + ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index): + tensor.yield %cst : f32 + } : tensor<1x3x10x10x10xf32> to tensor<1x3x12x12x12xf32> + %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 12, 12, 12] : tensor<1x3x12x12x12xf32> into tensor<1x1x3x12x12x12xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10x10x10xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12x12xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10x10xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [1, 1, 1, 1, 1, 1], stride = [1, 1, 1]} { + ^bb0(%in: f32, %in_5: f32, %out: f32): + %1 = arith.mulf %in, %in_5 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x10x10x10xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x10x10x10xf32> into tensor<1x6x10x10x10xf32> + %collapsed_4 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x10x10x10xf32> into tensor<6000xf32> + return %collapsed_4 : tensor<6000xf32> + } +} + +// ----- +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7, d4 * 2 + d8, d5 * 2 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-LABEL: func.func @conv_3d_stride( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [2 : index, 2 : index, 2 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_3d_stride(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<384xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4x4x4xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x4x4xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x4x4x4xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x4x4x4xf32> into tensor<1x6x4x4x4xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x4x4x4xf32> into tensor<384xf32> + return %collapsed_3 : tensor<384xf32> + } +} + +// ----- +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-LABEL: func.func @conv_3d_groups( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_3d_groups(%arg0: tensor<6000xf32>, %arg1: tensor<486xf32>) -> tensor<4608xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [9, 2, 3, 3, 3] : tensor<486xf32> into tensor<9x2x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 6, 10, 10, 10] : tensor<6000xf32> into tensor<1x6x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 3, 2, 10, 10, 10] : tensor<1x6x10x10x10xf32> into tensor<1x3x2x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [3, 3, 2, 3, 3, 3] : tensor<9x2x3x3x3xf32> into tensor<3x3x2x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8x8x8xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10x10xf32>, tensor<3x3x2x3x3x3xf32>) outs(%cst : tensor<1x3x3x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 3 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x3x3x8x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x3x3x8x8x8xf32> into tensor<1x9x8x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x9x8x8x8xf32> into tensor<4608xf32> + return %collapsed_3 : tensor<4608xf32> + } +} + +// ----- +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7 * 2, d4 * 2 + d8 * 2, d5 * 2 + d9 * 2)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-LABEL: func.func @conv_3d_perf_config( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [2 : index, 2 : index, 2 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1" +// CHECK-SAME: strides = [2 : index, 2 : index, 2 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_3d_perf_config(%arg0: tensor<750xf32>, %arg1: tensor<96xf32>) -> tensor<64xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [4, 3, 2, 2, 2] : tensor<96xf32> into tensor<4x3x2x2x2xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [2, 3, 5, 5, 5] : tensor<750xf32> into tensor<2x3x5x5x5xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [2, 1, 3, 5, 5, 5] : tensor<2x3x5x5x5xf32> into tensor<2x1x3x5x5x5xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 4, 3, 2, 2, 2] : tensor<4x3x2x2x2xf32> into tensor<1x4x3x2x2x2xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<2x1x4x2x2x2xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs = {conv_op = #rock, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<2x1x4x2x2x2xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<2x1x4x2x2x2xf32> into tensor<2x4x2x2x2xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<2x4x2x2x2xf32> into tensor<64xf32> + return %collapsed_3 : tensor<64xf32> + } +} + diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir deleted file mode 100644 index 8ea0df3923f3..000000000000 --- a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir +++ /dev/null @@ -1,102 +0,0 @@ -// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -rock-view-to-transform -verify-diagnostics --split-input-file | FileCheck %s - -#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7 * 2, d4 * 2 + d8 * 2, d5 * 2 + d9 * 2)> -#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> -#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> - -// CHECK-LABEL: func.func @conv_3d( -// CHECK: rock.conv({{.*}}) {dilations = [2 : index, 2 : index, 2 : index], filter_layout = ["g", "k", "0", "1", "2", "c"], input_layout = ["ni", "0i", "1i", "2i", "gi", "ci"], output_layout = ["no", "0o", "1o", "2o", "go", "ko"], padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index], strides = [2 : index, 2 : index, 2 : index]} -func.func @conv_3d(%arg0: tensor<64xf32>, %arg1: tensor<750xf32>, %arg2: tensor<96xf32>) -> tensor<64xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { - %cst = arith.constant dense<0.000000e+00> : tensor<2x1x4x2x2x2xf32> - %expanded = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [2, 4, 2, 2, 2] : tensor<64xf32> into tensor<2x4x2x2x2xf32> - %expanded_0 = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4, 5]] output_shape [2, 1, 3, 5, 5, 5] : tensor<750xf32> into tensor<2x1x3x5x5x5xf32> - %expanded_1 = tensor.expand_shape %arg2 [[0, 1, 2, 3, 4, 5]] output_shape [1, 4, 3, 2, 2, 2] : tensor<96xf32> into tensor<1x4x3x2x2x2xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_0, %expanded_1 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs = {conv_op = #rock, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]} { - ^bb0(%in: f32, %in_3: f32, %out: f32): - %3 = arith.mulf %in, %in_3 : f32 - %4 = arith.addf %out, %3 : f32 - linalg.yield %4 : f32 - } -> tensor<2x1x4x2x2x2xf32> - %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<2x1x4x2x2x2xf32> into tensor<2x4x2x2x2xf32> - %1 = tensor.empty() : tensor<2x4x2x2x2xf32> - %2 = linalg.add ins(%collapsed, %expanded : tensor<2x4x2x2x2xf32>, tensor<2x4x2x2x2xf32>) outs(%1 : tensor<2x4x2x2x2xf32>) -> tensor<2x4x2x2x2xf32> - %collapsed_2 = tensor.collapse_shape %2 [[0, 1, 2, 3, 4]] : tensor<2x4x2x2x2xf32> into tensor<64xf32> - return %collapsed_2 : tensor<64xf32> -} - -// ----- - -#map3 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 * 4 + d6 * 2, d4 * 5 + d7 * 3)> -#map4 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)> -#map5 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)> - -// CHECK-LABEL: func.func @conv_2d -// CHECK: rock.conv({{.*}}) {dilations = [2 : index, 3 : index], filter_layout = ["g", "k", "c", "y", "x"], input_layout = ["ni", "gi", "ci", "hi", "wi"], output_layout = ["no", "go", "ko", "ho", "wo"], padding = [2 : index, 2 : index, 2 : index, 2 : index], strides = [4 : index, 5 : index]} -func.func @conv_2d(%arg0: tensor<122016xf32>, %arg1: tensor<320xf32>) -> tensor<8208xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { - %cst = arith.constant dense<0.000000e+00> : tensor<2x2x4x27x19xf32> - %cst_0 = arith.constant 0.000000e+00 : f32 - %expanded = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [2, 4, 123, 124] : tensor<122016xf32> into tensor<2x4x123x124xf32> - %padded = tensor.pad %expanded low[0, 0, 2, 2] high[0, 0, 2, 2] { - ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index): - tensor.yield %cst_0 : f32 - } : tensor<2x4x123x124xf32> to tensor<2x4x127x128xf32> - %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4]] output_shape [2, 2, 2, 127, 128] : tensor<2x4x127x128xf32> into tensor<2x2x2x127x128xf32> - %expanded_2 = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [2, 4, 2, 4, 5] : tensor<320xf32> into tensor<2x4x2x4x5xf32> - %0 = linalg.generic {indexing_maps = [#map3, #map4, #map5], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<2x2x2x127x128xf32>, tensor<2x4x2x4x5xf32>) outs(%cst : tensor<2x2x4x27x19xf32>) attrs = {conv_op = #rock, dilation = [2, 3], group = 2 : i64, pad = [2, 2, 2, 2], stride = [4, 5]} { - ^bb0(%in: f32, %in_3: f32, %out: f32): - %1 = arith.mulf %in, %in_3 : f32 - %2 = arith.addf %out, %1 : f32 - linalg.yield %2 : f32 - } -> tensor<2x2x4x27x19xf32> - %collapsed = tensor.collapse_shape %0 [[0, 1, 2, 3, 4]] : tensor<2x2x4x27x19xf32> into tensor<8208xf32> - return %collapsed : tensor<8208xf32> -} - -// ----- - -#map6 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)> -#map7 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> -#map8 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> -// CHECK-LABEL: func.func @conv_1d -// CHECK: rock.conv({{.*}}) {dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "y", "x", "c"], input_layout = ["ni", "hi", "wi", "gi", "ci"], output_layout = ["no", "ho", "wo", "go", "ko"], padding = [3 : index, 3 : index, 0 : index, 0 : index], strides = [1 : index, 1 : index]} -func.func @conv_1d(%arg0: tensor<14336xf32>, %arg1: tensor<672xf32>, %arg2: tensor<1344xf32>) -> tensor<14336xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { - %cst = arith.constant dense<0.000000e+00> : tensor<1x1x64x224xf32> - %cst_0 = arith.constant 0.000000e+00 : f32 - %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [1, 3, 224] : tensor<672xf32> into tensor<1x3x224xf32> - %padded = tensor.pad %expanded low[0, 0, 3] high[0, 0, 3] { - ^bb0(%arg3: index, %arg4: index, %arg5: index): - tensor.yield %cst_0 : f32 - } : tensor<1x3x224xf32> to tensor<1x3x230xf32> - %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3]] output_shape [1, 1, 3, 230] : tensor<1x3x230xf32> into tensor<1x1x3x230xf32> - %expanded_2 = tensor.expand_shape %arg2 [[0, 1, 2, 3]] output_shape [1, 64, 3, 7] : tensor<1344xf32> into tensor<1x64x3x7xf32> - %0 = linalg.generic {indexing_maps = [#map6, #map7, #map8], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x230xf32>, tensor<1x64x3x7xf32>) outs(%cst : tensor<1x1x64x224xf32>) attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [3, 3], stride = [1]} { - ^bb0(%in: f32, %in_3: f32, %out: f32): - %1 = arith.mulf %in, %in_3 : f32 - %2 = arith.addf %out, %1 : f32 - linalg.yield %2 : f32 - } -> tensor<1x1x64x224xf32> - %collapsed = tensor.collapse_shape %0 [[0, 1, 2, 3]] : tensor<1x1x64x224xf32> into tensor<14336xf32> - return %collapsed : tensor<14336xf32> -} - -// ----- - -#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7 * 2, d4 * 2 + d8 * 2, d5 * 2 + d9 * 2)> -#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> -#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> - -// CHECK-LABEL: func.func @mlir_perf_config -// CHECK: rock.conv({{.*}}) {dilations = [2 : index, 2 : index, 2 : index], filter_layout = ["g", "k", "0", "1", "2", "c"], input_layout = ["ni", "0i", "1i", "2i", "gi", "ci"], output_layout = ["no", "0o", "1o", "2o", "go", "ko"], padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", strides = [2 : index, 2 : index, 2 : index]} -func.func @mlir_perf_config(%arg0: tensor<750xf32>, %arg1: tensor<96xf32>) -> tensor<64xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { - %cst = arith.constant dense<0.000000e+00> : tensor<2x1x4x2x2x2xf32> - %expanded = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4, 5]] output_shape [2, 1, 3, 5, 5, 5] : tensor<750xf32> into tensor<2x1x3x5x5x5xf32> - %expanded_0 = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4, 5]] output_shape [1, 4, 3, 2, 2, 2] : tensor<96xf32> into tensor<1x4x3x2x2x2xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded, %expanded_0 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs = {conv_op = #rock, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]} { - ^bb0(%in: f32, %in_1: f32, %out: f32): - %1 = arith.mulf %in, %in_1 : f32 - %2 = arith.addf %out, %1 : f32 - linalg.yield %2 : f32 - } -> tensor<2x1x4x2x2x2xf32> - %collapsed = tensor.collapse_shape %0 [[0, 1, 2, 3, 4, 5]] : tensor<2x1x4x2x2x2xf32> into tensor<64xf32> - return %collapsed : tensor<64xf32> -} diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir index 1c4d30fef269..f291a4ac97bc 100644 --- a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir +++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir @@ -4,3 +4,107 @@ func.func @no_kernel_attribute_test() { func.return } + +// ----- + +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +module { + func.func @conv_3d_no_padding(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> + // expected-error @+2 {{no padding found}} + // expected-error @+1 {{failed to legalize operation}} + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, stride = [1, 1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x8x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32> + return %collapsed_3 : tensor<3072xf32> + } +} + +// ----- + +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +module { + func.func @conv_3d_no_stride(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> + // expected-error @+2 {{invalid dilation or stride}} + // expected-error @+1 {{failed to legalize operation}} + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x8x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32> + return %collapsed_3 : tensor<3072xf32> + } +} + +// ----- + +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +module { + func.func @conv_3d_invalid_padding(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> + // expected-error @+2 {{invalid number of padding}} + // expected-error @+1 {{failed to legalize operation}} + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x8x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32> + return %collapsed_3 : tensor<3072xf32> + } +} + +// ----- + +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +module { + func.func @conv_3d_invalid_stride(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> + // expected-error @+2 {{invalid dilation or stride}} + // expected-error @+1 {{failed to legalize operation}} + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x8x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32> + return %collapsed_3 : tensor<3072xf32> + } +} From ebb7a41667e32e4105c2ae4830472e37e7386ba2 Mon Sep 17 00:00:00 2001 From: Vincent Date: Mon, 16 Mar 2026 13:14:55 +0000 Subject: [PATCH 7/8] Fixed rebase --- mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp index 926ec53f47d3..70277301bdc7 100644 --- a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp +++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp @@ -155,11 +155,11 @@ struct ConvFields { static int64_t getSpatialDim(rock::LinalgConvType type) { switch (type) { - case rock::LinalgConvType::Conv1dNgchGfch: + case rock::LinalgConvType::Conv1dNgchGkch: return 1; - case rock::LinalgConvType::Conv2dNgchwGfchw: + case rock::LinalgConvType::Conv2dNgchwGkchw: return 2; - case rock::LinalgConvType::Conv3dNgchwdGfchwd: + case rock::LinalgConvType::Conv3dNgchwdGkchwd: return 3; } llvm_unreachable("unknown LinalgConvType"); From 6225bdf4a9ec131b4f3a3d298fa49b36abeb4d8c Mon Sep 17 00:00:00 2001 From: Vincent Date: Mon, 16 Mar 2026 13:49:05 +0000 Subject: [PATCH 8/8] Fixed testcase after changing attributes to gf to gk --- .../LinalgToRock/linalg-to-rock-conv-1d.mlir | 10 +++++----- .../LinalgToRock/linalg-to-rock-conv-2d.mlir | 10 +++++----- .../LinalgToRock/linalg-to-rock-conv-3d.mlir | 12 ++++++------ .../LinalgToRock/linalg-to-rock-invalid.mlir | 8 ++++---- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir index 09ce6d9a36a7..7354f97fc676 100644 --- a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir +++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir @@ -33,7 +33,7 @@ module { %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 10] : tensor<1x3x10xf32> into tensor<1x1x3x10xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32> %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x8xf32>) attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x8xf32>) attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [1]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 @@ -79,7 +79,7 @@ module { %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 20] : tensor<1x3x20xf32> into tensor<1x1x3x20xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32> %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x14xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x14xf32>) attrs = {conv_op = #rock, dilation = [3], group = 1 : i64, pad = [0, 0], stride = [1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x14xf32>) attrs = {conv_op = #rock, dilation = [3], group = 1 : i64, pad = [0, 0], stride = [1]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 @@ -130,7 +130,7 @@ module { %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3]] output_shape [1, 1, 3, 14] : tensor<1x3x14xf32> into tensor<1x1x3x14xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 5] : tensor<6x3x5xf32> into tensor<1x6x3x5xf32> %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x14xf32>, tensor<1x6x3x5xf32>) outs(%cst_3 : tensor<1x1x6x10xf32>) attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [2, 2], stride = [1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x14xf32>, tensor<1x6x3x5xf32>) outs(%cst_3 : tensor<1x1x6x10xf32>) attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [2, 2], stride = [1]} { ^bb0(%in: f32, %in_5: f32, %out: f32): %1 = arith.mulf %in, %in_5 : f32 %2 = arith.addf %out, %1 : f32 @@ -176,7 +176,7 @@ module { %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 10] : tensor<1x3x10xf32> into tensor<1x1x3x10xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32> %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x4xf32>) attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [2]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x4xf32>) attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [2]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 @@ -222,7 +222,7 @@ module { %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 3, 2, 10] : tensor<1x6x10xf32> into tensor<1x3x2x10xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [3, 3, 2, 3] : tensor<9x2x3xf32> into tensor<3x3x2x3xf32> %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10xf32>, tensor<3x3x2x3xf32>) outs(%cst : tensor<1x3x3x8xf32>) attrs = {conv_op = #rock, dilation = [1], group = 3 : i64, pad = [0, 0], stride = [1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10xf32>, tensor<3x3x2x3xf32>) outs(%cst : tensor<1x3x3x8xf32>) attrs = {conv_op = #rock, dilation = [1], group = 3 : i64, pad = [0, 0], stride = [1]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir index 174f877e8a04..c6e301ebc876 100644 --- a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir +++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir @@ -27,7 +27,7 @@ module { %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 10, 10] : tensor<1x3x10x10xf32> into tensor<1x1x3x10x10xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32> %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 @@ -68,7 +68,7 @@ module { %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 20, 20] : tensor<1x3x20x20xf32> into tensor<1x1x3x20x20xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32> %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x16x14xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20x20xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x16x14xf32>) attrs = {conv_op = #rock, dilation = [2, 3], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20x20xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x16x14xf32>) attrs = {conv_op = #rock, dilation = [2, 3], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 @@ -114,7 +114,7 @@ module { %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 12, 12] : tensor<1x3x12x12xf32> into tensor<1x1x3x12x12xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32> %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10x10xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12xf32>, tensor<1x6x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 1 : i64, pad = [1, 1, 1, 1], stride = [1, 1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12xf32>, tensor<1x6x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 1 : i64, pad = [1, 1, 1, 1], stride = [1, 1]} { ^bb0(%in: f32, %in_5: f32, %out: f32): %1 = arith.mulf %in, %in_5 : f32 %2 = arith.addf %out, %1 : f32 @@ -155,7 +155,7 @@ module { %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 10, 10] : tensor<1x3x10x10xf32> into tensor<1x1x3x10x10xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32> %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4x3xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x3xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [2, 3]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x3xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [2, 3]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 @@ -196,7 +196,7 @@ module { %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 3, 2, 10, 10] : tensor<1x6x10x10xf32> into tensor<1x3x2x10x10xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [3, 3, 2, 3, 3] : tensor<9x2x3x3xf32> into tensor<3x3x2x3x3xf32> %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8x8xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10xf32>, tensor<3x3x2x3x3xf32>) outs(%cst : tensor<1x3x3x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 3 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10xf32>, tensor<3x3x2x3x3xf32>) outs(%cst : tensor<1x3x3x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 3 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir index d6a0c6773963..83deae7fa892 100644 --- a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir +++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir @@ -27,7 +27,7 @@ module { %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 @@ -67,7 +67,7 @@ module { %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x6x6x6xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x6x6x6xf32>) attrs = {conv_op = #rock, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x6x6x6xf32>) attrs = {conv_op = #rock, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 @@ -112,7 +112,7 @@ module { %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 12, 12, 12] : tensor<1x3x12x12x12xf32> into tensor<1x1x3x12x12x12xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10x10x10xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12x12xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10x10xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [1, 1, 1, 1, 1, 1], stride = [1, 1, 1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12x12xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10x10xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [1, 1, 1, 1, 1, 1], stride = [1, 1, 1]} { ^bb0(%in: f32, %in_5: f32, %out: f32): %1 = arith.mulf %in, %in_5 : f32 %2 = arith.addf %out, %1 : f32 @@ -152,7 +152,7 @@ module { %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4x4x4xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x4x4xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x4x4xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 @@ -192,7 +192,7 @@ module { %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 3, 2, 10, 10, 10] : tensor<1x6x10x10x10xf32> into tensor<1x3x2x10x10x10xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [3, 3, 2, 3, 3, 3] : tensor<9x2x3x3x3xf32> into tensor<3x3x2x3x3x3xf32> %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8x8x8xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10x10xf32>, tensor<3x3x2x3x3x3xf32>) outs(%cst : tensor<1x3x3x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 3 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10x10xf32>, tensor<3x3x2x3x3x3xf32>) outs(%cst : tensor<1x3x3x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 3 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 @@ -233,7 +233,7 @@ module { %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [2, 1, 3, 5, 5, 5] : tensor<2x3x5x5x5xf32> into tensor<2x1x3x5x5x5xf32> %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 4, 3, 2, 2, 2] : tensor<4x3x2x2x2xf32> into tensor<1x4x3x2x2x2xf32> %cst = arith.constant dense<0.000000e+00> : tensor<2x1x4x2x2x2xf32> - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs = {conv_op = #rock, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs = {conv_op = #rock, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir index f291a4ac97bc..2f009cb40031 100644 --- a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir +++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir @@ -19,7 +19,7 @@ module { %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> // expected-error @+2 {{no padding found}} // expected-error @+1 {{failed to legalize operation}} - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, stride = [1, 1, 1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, stride = [1, 1, 1]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 @@ -45,7 +45,7 @@ module { %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> // expected-error @+2 {{invalid dilation or stride}} // expected-error @+1 {{failed to legalize operation}} - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 @@ -71,7 +71,7 @@ module { %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> // expected-error @+2 {{invalid number of padding}} // expected-error @+1 {{failed to legalize operation}} - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1, 1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1, 1]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32 @@ -97,7 +97,7 @@ module { %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> // expected-error @+2 {{invalid dilation or stride}} // expected-error @+1 {{failed to legalize operation}} - %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1]} { + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1]} { ^bb0(%in: f32, %in_4: f32, %out: f32): %1 = arith.mulf %in, %in_4 : f32 %2 = arith.addf %out, %1 : f32