diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp index d0fb43bd5cf1..70277301bdc7 100644 --- a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp +++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Rock/IR/Rock.h" +#include "mlir/Dialect/Rock/IR/TransformMapBuilder.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/PatternMatch.h" @@ -139,8 +140,260 @@ LogicalResult MatmulConverter::matchAndRewrite( return success(); } +//===----------------------------------------------------------------------===// +// ConvLinalgConverter: linalg.generic (conv) -> rock.conv +//===----------------------------------------------------------------------===// + +namespace { +struct ConvFields { + rock::LinalgConvType type; + int64_t spatialDim; + ArrayAttr padding, stride, dilation; + StringAttr perfConfig; +}; +} // namespace + +static int64_t getSpatialDim(rock::LinalgConvType type) { + switch (type) { + case rock::LinalgConvType::Conv1dNgchGkch: + return 1; + case rock::LinalgConvType::Conv2dNgchwGkchw: + return 2; + case rock::LinalgConvType::Conv3dNgchwdGkchwd: + return 3; + } + llvm_unreachable("unknown LinalgConvType"); +} + +/// Set filter_layout, input_layout, and output_layout on a rock.conv op. +/// Layouts match the linalg convention: GKC*, NGC*, NGK*. +static void setConvLayoutAttrs(OpBuilder &builder, rock::ConvOp cop, + int64_t spatialDim) { + auto *ctx = builder.getContext(); + auto setLayout = [&](StringRef attrName, ArrayRef prefix, + StringRef suffix) { + SmallVector layout; + for (StringRef dim : prefix) + layout.push_back(StringAttr::get(ctx, dim)); + for (int64_t i = 0; i < spatialDim; ++i) + layout.push_back(StringAttr::get(ctx, Twine(i) + suffix)); + cop->setAttr(attrName, builder.getArrayAttr(layout)); + }; + setLayout("filter_layout", {"g", "k", "c"}, ""); + setLayout("input_layout", {"ni", "gi", "ci"}, "i"); + setLayout("output_layout", {"no", "go", "ko"}, "o"); +} + +/// Remove the tensor.pad + tensor.expand_shape pattern emitted by +/// migraphx-to-linalg, replacing it with just tensor.expand_shape on the +/// unpadded source. rock.conv handles padding internally. +/// +/// Expected IR structure: +/// %padded = tensor.pad %original ... +/// %expanded = tensor.expand_shape %padded ... +/// Replaced with: +/// %expanded = tensor.expand_shape %original ... +static FailureOr +removePaddingFromInput(ConversionPatternRewriter &rewriter, + linalg::GenericOp op, Value in, ArrayAttr padding) { + bool hasPadding = llvm::any_of(padding.getValue(), [](Attribute attr) { + return cast(attr).getInt() != 0; + }); + if (!hasPadding) + return in; + + auto expanded = in.getDefiningOp(); + if (!expanded) { + op.emitError("unexpected padding code structure"); + return failure(); + } + auto padded = expanded->getOperand(0).getDefiningOp(); + if (!padded || !padded->hasOneUse()) { + op.emitError("unexpected padding code structure"); + return failure(); + } + + SmallVector resultShape(expanded.getResultType().getShape()); + auto lowPad = padded.getStaticLow(); + auto highPad = padded.getStaticHigh(); + int64_t numPadDims = lowPad.size(); + int64_t numExpandedDims = resultShape.size(); + + // Padding is defined in pre-expand space. The spatial dims are at the + // tail of both tensors (expand_shape only splits an earlier dim), so + // align from the end. + for (int64_t i = numPadDims - 1, j = numExpandedDims - 1; i >= 0 && j >= 0; + --i, --j) { + resultShape[j] -= (lowPad[i] + highPad[i]); + } + + RankedTensorType newResultType = RankedTensorType::get( + resultShape, padded.getResultType().getElementType()); + Value result = tensor::ExpandShapeOp::create( + rewriter, expanded.getLoc(), newResultType, padded.getOperand(0), + expanded.getReassociationIndices()); + rewriter.replaceOp(expanded, result); + rewriter.eraseOp(padded); + return result; +} + +namespace { +struct ConvLinalgConverter final + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + using OpConversionPattern::getTypeConverter; + using OpAdaptor = typename OpConversionPattern::OpAdaptor; + + LogicalResult + matchAndRewrite(linalg::GenericOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override; + +private: + FailureOr isConv(ConversionPatternRewriter &rewriter, + linalg::GenericOp op) const; +}; +} // namespace + +FailureOr +ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter, + linalg::GenericOp op) const { + auto name = op->getAttrOfType("conv_op"); + if (!name) + return failure(); + rock::LinalgConvType convType = name.getValue(); + int64_t spatialDim = getSpatialDim(convType); + // Conv1D is broadcasted into Conv2D. To check for error, we + // use effectiveDim instead because it one more stride/dilation + // in the expanded dimension + int64_t effectiveDim = (spatialDim == 1) ? spatialDim + 1 : spatialDim; + + auto convertToArrayAttr = + [&](Attribute arr, ArrayRef dimOneDefaults = {}) -> ArrayAttr { + if(!arr || !isa(arr)){ + return ArrayAttr {}; + } + + SmallVector values; + llvm::transform( + cast(arr).getValue(), std::back_inserter(values), + [](Attribute val) { return cast(val).getInt(); }); + // Conv1D is expanded into Conv2D: append identity defaults for the + // extra spatial dimension (stride=1, dilation=1, pad=0). + if (spatialDim == 1) + values.insert(values.end(), dimOneDefaults.begin(), dimOneDefaults.end()); + return rewriter.getIndexArrayAttr(values); + }; + + auto dilation = + convertToArrayAttr(op->getAttr("dilation"), /*dimOneDefaults=*/{1}); + auto stride = + convertToArrayAttr(op->getAttr("stride"), /*dimOneDefaults=*/{1}); + if (!dilation || !stride || (int64_t)dilation.size() != effectiveDim || (int64_t)stride.size() != effectiveDim){ + op.emitError("invalid dilation or stride"); + return failure(); + } + + // Input format: [dim0_low, dim1_low, ..., dim0_high, dim1_high, ...] + // Rock format: [dim0_low, dim0_high, dim1_low, dim1_high, ...] + auto originalPadding = convertToArrayAttr(op->getAttr("pad")); + if(!originalPadding){ + op.emitError("no padding found"); + return failure(); + } + int64_t numSpatial = originalPadding.size() / 2; + SmallVector interleavedPad; + for (int64_t i = 0; i < numSpatial; ++i) { + interleavedPad.push_back(originalPadding[i]); + interleavedPad.push_back(originalPadding[numSpatial + i]); + } + // Conv1D is expanded into Conv2D + if (spatialDim == 1) { + interleavedPad.push_back(rewriter.getIndexAttr(0)); + interleavedPad.push_back(rewriter.getIndexAttr(0)); + } + auto padding = rewriter.getArrayAttr(interleavedPad); + // note that Conv1D is expanded into Conv2D + if(effectiveDim*2 != (int64_t)padding.size()){ + op.emitError("invalid number of padding"); + return failure(); + } + + StringAttr perfConfig = op->getAttrOfType("perf_config"); + return ConvFields{convType, spatialDim, padding, + stride, dilation, perfConfig}; +} + +LogicalResult ConvLinalgConverter::matchAndRewrite( + linalg::GenericOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const { + FailureOr maybeConv = isConv(rewriter, op); + if (failed(maybeConv)) + return failure(); + + ConvFields conv = *maybeConv; + Location loc = op.getLoc(); + + auto maybeInput = + removePaddingFromInput(rewriter, op, op.getOperand(0), conv.padding); + if (failed(maybeInput)) + return failure(); + + Value input = *maybeInput; + Value filter = op.getOperand(1); + + // Conv1D is expanded into Conv2D: unmerge the single spatial dim + // into (spatial, W=1) for filter and input. + int64_t effectiveSpatialDim = conv.spatialDim; + if (conv.spatialDim == 1) { + effectiveSpatialDim = 2; + auto filterShape = cast(filter.getType()).getShape(); + rock::BottomUpTMBuilder builder(rewriter, {"g", "k", "c", "0"}, filterShape, + loc); + builder.passThrough({"gf", "kf", "cf"}, {0, 1, 2}, {"g", "k", "c"}); + builder.unmerge({"0f", "1f"}, {3, 4}, "0", {filterShape[3], 1}); + filter = rock::TransformOp::create(rewriter, loc, filter, builder.get()); + + auto inputShape = cast(input.getType()).getShape(); + rock::BottomUpTMBuilder b(rewriter, {"n", "g", "c", "0"}, inputShape, loc); + b.passThrough({"nu", "gu", "cu"}, {0, 1, 2}, {"n", "g", "c"}); + b.unmerge({"0u", "1u"}, {3, 4}, "0", {inputShape[3], 1}); + input = rock::TransformOp::create(rewriter, loc, input, b.get()); + } + + RankedTensorType linalgResultType = + cast(op.getResult(0).getType()); + SmallVector rockShape(linalgResultType.getShape()); + if (conv.spatialDim == 1) + rockShape.push_back(1); + RankedTensorType rockResultType = + RankedTensorType::get(rockShape, linalgResultType.getElementType()); + Value output = + bufferization::AllocTensorOp::create(rewriter, loc, rockResultType, {}); + auto cop = rock::ConvOp::create(rewriter, loc, rockResultType, filter, input, + output, /*features=*/nullptr, + /*blockSize=*/nullptr, /*gridSize=*/nullptr, + conv.padding, conv.stride, conv.dilation, + /*params=*/nullptr); + // TODO: add splitk + if (conv.perfConfig) + cop->setAttr("perf_config", conv.perfConfig); + setConvLayoutAttrs(rewriter, cop, effectiveSpatialDim); + + Value result = cop.getResult(); + if (conv.spatialDim == 1) { + auto shape = cast(result.getType()).getShape(); + rock::BottomUpTMBuilder b(rewriter, {"n", "g", "k", "0", "1"}, shape, loc); + b.passThrough({"no", "go", "ko"}, {0, 1, 2}, {"n", "g", "k"}); + b.merge("0o", 3, {"0", "1"}); + result = rock::TransformOp::create(rewriter, loc, result, b.get()); + } + + rewriter.replaceOp(op, result); + return success(); +} + void mlir::rock::populateLinalgToRockConversionPattern( RewritePatternSet &pattern, MLIRContext *context) { pattern.add, - MatmulConverter>(context); + MatmulConverter, ConvLinalgConverter>(context); } diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp index f7148fe81d01..7f3fb294abf7 100644 --- a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp +++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp @@ -47,8 +47,15 @@ static void populateLinalgToRockDialectConversion(ConversionTarget &target) { if (!linalgOp) { return std::nullopt; } - return linalg::isElementwise(linalgOp) || isa(op) || - isa(op); + + // Convolution has attributes. + linalg::GenericOp castedOp = dyn_cast(op); + if (castedOp && castedOp->hasAttr("conv_op")) { + return false; + } + + return linalg::isElementwise(linalgOp) || isa(op) || + castedOp; }); } diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir new file mode 100644 index 000000000000..7354f97fc676 --- /dev/null +++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir @@ -0,0 +1,235 @@ +// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -verify-diagnostics --split-input-file | FileCheck %s + +// Input: NCL = 1x3x10, Filter: FCL = 6x3x3 +// stride=1, dilation=1, padding=0, group=1 + +#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @conv_1d_basic( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]] +// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_1d_basic(%arg0: tensor<30xf32>, %arg1: tensor<54xf32>) -> tensor<48xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 3] : tensor<54xf32> into tensor<6x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 10] : tensor<30xf32> into tensor<1x3x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 10] : tensor<1x3x10xf32> into tensor<1x1x3x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x8xf32>) attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x8xf32> into tensor<1x6x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x8xf32> into tensor<48xf32> + return %collapsed_3 : tensor<48xf32> + } +} + +// ----- + +// Input: NCL = 1x3x20, Filter: FCL = 6x3x3 +// stride=1, dilation=3, padding=0, group=1 +#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5 * 3)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @conv_1d_dilation( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]] +// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]]) +// CHECK-SAME: dilations = [3 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_1d_dilation(%arg0: tensor<60xf32>, %arg1: tensor<54xf32>) -> tensor<84xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 3] : tensor<54xf32> into tensor<6x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 20] : tensor<60xf32> into tensor<1x3x20xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 20] : tensor<1x3x20xf32> into tensor<1x1x3x20xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x14xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x14xf32>) attrs = {conv_op = #rock, dilation = [3], group = 1 : i64, pad = [0, 0], stride = [1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x14xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x14xf32> into tensor<1x6x14xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x14xf32> into tensor<84xf32> + return %collapsed_3 : tensor<84xf32> + } +} + +// ----- + +// Input: NCL = 1x3x10, Filter: FCL = 6x3x5 +// stride=1, dilation=1, padding=[2,2], group=1 +#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @conv_1d_padding( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[expanded_3:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_1]] +// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_3]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [2 : index, 2 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]] +// CHECK-DAG: %[[collapsed_4:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_4]] +module { + func.func @conv_1d_padding(%arg0: tensor<30xf32>, %arg1: tensor<90xf32>) -> tensor<60xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 5] : tensor<90xf32> into tensor<6x3x5xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 10] : tensor<30xf32> into tensor<1x3x10xf32> + %cst = arith.constant 0.000000e+00 : f32 + %padded = tensor.pad %expanded_0 low[0, 0, 2] high[0, 0, 2] { + ^bb0(%arg2: index, %arg3: index, %arg4: index): + tensor.yield %cst : f32 + } : tensor<1x3x10xf32> to tensor<1x3x14xf32> + %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3]] output_shape [1, 1, 3, 14] : tensor<1x3x14xf32> into tensor<1x1x3x14xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 5] : tensor<6x3x5xf32> into tensor<1x6x3x5xf32> + %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x14xf32>, tensor<1x6x3x5xf32>) outs(%cst_3 : tensor<1x1x6x10xf32>) attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [2, 2], stride = [1]} { + ^bb0(%in: f32, %in_5: f32, %out: f32): + %1 = arith.mulf %in, %in_5 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x10xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x10xf32> into tensor<1x6x10xf32> + %collapsed_4 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x10xf32> into tensor<60xf32> + return %collapsed_4 : tensor<60xf32> + } +} + +// ----- + +// Input: NCL = 1x3x10, Filter: FCL = 6x3x3 +// stride=2, dilation=1, padding=0, group=1 +#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 * 2 + d5)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @conv_1d_stride( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]] +// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [2 : index, 1 : index] +// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_1d_stride(%arg0: tensor<30xf32>, %arg1: tensor<54xf32>) -> tensor<24xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 3] : tensor<54xf32> into tensor<6x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 10] : tensor<30xf32> into tensor<1x3x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 10] : tensor<1x3x10xf32> into tensor<1x1x3x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x4xf32>) attrs = {conv_op = #rock, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [2]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x4xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x4xf32> into tensor<1x6x4xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x4xf32> into tensor<24xf32> + return %collapsed_3 : tensor<24xf32> + } +} + +// ----- + +// Input: NCL = 1x6x10, Filter: F(C/G)L = 9x2x3 (group=3, C_per_group=2, F_per_group=3) +// stride=1, dilation=1, padding=0, group=3 +#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> +// CHECK-LABEL: func.func @conv_1d_groups( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]] +// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_1d_groups(%arg0: tensor<60xf32>, %arg1: tensor<54xf32>) -> tensor<72xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [9, 2, 3] : tensor<54xf32> into tensor<9x2x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 6, 10] : tensor<60xf32> into tensor<1x6x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 3, 2, 10] : tensor<1x6x10xf32> into tensor<1x3x2x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [3, 3, 2, 3] : tensor<9x2x3xf32> into tensor<3x3x2x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10xf32>, tensor<3x3x2x3xf32>) outs(%cst : tensor<1x3x3x8xf32>) attrs = {conv_op = #rock, dilation = [1], group = 3 : i64, pad = [0, 0], stride = [1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x3x3x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x3x3x8xf32> into tensor<1x9x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x9x8xf32> into tensor<72xf32> + return %collapsed_3 : tensor<72xf32> + } +} diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir new file mode 100644 index 000000000000..c6e301ebc876 --- /dev/null +++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir @@ -0,0 +1,209 @@ +// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -verify-diagnostics --split-input-file | FileCheck %s + +// CHECK-LABEL: func.func @conv_2d_basic( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6, d4 + d7)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)> +module { + func.func @conv_2d_basic(%arg0: tensor<300xf32>, %arg1: tensor<162xf32>) -> tensor<384xf32> attributes {kernel, arch="##TOKEN_ARCH##"}{ + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 10, 10] : tensor<300xf32> into tensor<1x3x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 10, 10] : tensor<1x3x10x10xf32> into tensor<1x1x3x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x8x8xf32> into tensor<1x6x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x8x8xf32> into tensor<384xf32> + return %collapsed_3 : tensor<384xf32> + } +} + +// ----- + +// CHECK-LABEL: func.func @conv_2d_dilation( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [2 : index, 3 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6 * 2, d4 + d7 * 3)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)> +module { + func.func @conv_2d_dilation(%arg0: tensor<1200xf32>, %arg1: tensor<162xf32>) -> tensor<1344xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 20, 20] : tensor<1200xf32> into tensor<1x3x20x20xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 20, 20] : tensor<1x3x20x20xf32> into tensor<1x1x3x20x20xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x16x14xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20x20xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x16x14xf32>) attrs = {conv_op = #rock, dilation = [2, 3], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x16x14xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x16x14xf32> into tensor<1x6x16x14xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x16x14xf32> into tensor<1344xf32> + return %collapsed_3 : tensor<1344xf32> + } +} + +// ----- + +// CHECK-LABEL: func.func @conv_2d_padding( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[expanded_3:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_1]], %[[expanded_3]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [1 : index, 1 : index, 1 : index, 1 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_4:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_4]] +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6, d4 + d7)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)> +module { + func.func @conv_2d_padding(%arg0: tensor<300xf32>, %arg1: tensor<162xf32>) -> tensor<600xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 10, 10] : tensor<300xf32> into tensor<1x3x10x10xf32> + %cst = arith.constant 0.000000e+00 : f32 + %padded = tensor.pad %expanded_0 low[0, 0, 1, 1] high[0, 0, 1, 1] { + ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index): + tensor.yield %cst : f32 + } : tensor<1x3x10x10xf32> to tensor<1x3x12x12xf32> + %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 12, 12] : tensor<1x3x12x12xf32> into tensor<1x1x3x12x12xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32> + %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10x10xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12xf32>, tensor<1x6x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 1 : i64, pad = [1, 1, 1, 1], stride = [1, 1]} { + ^bb0(%in: f32, %in_5: f32, %out: f32): + %1 = arith.mulf %in, %in_5 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x10x10xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x10x10xf32> into tensor<1x6x10x10xf32> + %collapsed_4 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x10x10xf32> into tensor<600xf32> + return %collapsed_4 : tensor<600xf32> + } +} + +// ----- + +// CHECK-LABEL: func.func @conv_2d_stride( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [2 : index, 3 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 * 2 + d6, d4 * 3 + d7)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)> +module { + func.func @conv_2d_stride(%arg0: tensor<300xf32>, %arg1: tensor<162xf32>) -> tensor<72xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 10, 10] : tensor<300xf32> into tensor<1x3x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 10, 10] : tensor<1x3x10x10xf32> into tensor<1x1x3x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4x3xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x3xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [2, 3]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x4x3xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x4x3xf32> into tensor<1x6x4x3xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x4x3xf32> into tensor<72xf32> + return %collapsed_3 : tensor<72xf32> + } +} + +// ----- + +// CHECK-LABEL: func.func @conv_2d_groups( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6, d4 + d7)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)> +module { + func.func @conv_2d_groups(%arg0: tensor<600xf32>, %arg1: tensor<162xf32>) -> tensor<576xf32> attributes {kernel, arch="##TOKEN_ARCH##"} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [9, 2, 3, 3] : tensor<162xf32> into tensor<9x2x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 6, 10, 10] : tensor<600xf32> into tensor<1x6x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 3, 2, 10, 10] : tensor<1x6x10x10xf32> into tensor<1x3x2x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [3, 3, 2, 3, 3] : tensor<9x2x3x3xf32> into tensor<3x3x2x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8x8xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10xf32>, tensor<3x3x2x3x3xf32>) outs(%cst : tensor<1x3x3x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1], group = 3 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x3x3x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x3x3x8x8xf32> into tensor<1x9x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x9x8x8xf32> into tensor<576xf32> + return %collapsed_3 : tensor<576xf32> + } +} diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir new file mode 100644 index 000000000000..83deae7fa892 --- /dev/null +++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir @@ -0,0 +1,247 @@ +// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -verify-diagnostics --split-input-file | FileCheck %s + +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-LABEL: func.func @conv_3d_basic( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_3d_basic(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x8x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32> + return %collapsed_3 : tensor<3072xf32> + } +} + +// ----- +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7 * 2, d4 + d8 * 2, d5 + d9 * 2)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-LABEL: func.func @conv_3d_dilation( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [2 : index, 2 : index, 2 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_3d_dilation(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<1296xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x6x6x6xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x6x6x6xf32>) attrs = {conv_op = #rock, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x6x6x6xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x6x6x6xf32> into tensor<1x6x6x6x6xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x6x6x6xf32> into tensor<1296xf32> + return %collapsed_3 : tensor<1296xf32> + } +} + +// ----- +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-LABEL: func.func @conv_3d_padding( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[expanded_3:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_1]], %[[expanded_3]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"] +// CHECK-SAME: padding = [1 : index, 1 : index, 1 : index, 1 : index, 1 : index, 1 : index] +// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_4:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_4]] +module { + func.func @conv_3d_padding(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<6000xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %cst = arith.constant 0.000000e+00 : f32 + %padded = tensor.pad %expanded_0 low[0, 0, 1, 1, 1] high[0, 0, 1, 1, 1] { + ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index): + tensor.yield %cst : f32 + } : tensor<1x3x10x10x10xf32> to tensor<1x3x12x12x12xf32> + %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 12, 12, 12] : tensor<1x3x12x12x12xf32> into tensor<1x1x3x12x12x12xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10x10x10xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12x12xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10x10xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [1, 1, 1, 1, 1, 1], stride = [1, 1, 1]} { + ^bb0(%in: f32, %in_5: f32, %out: f32): + %1 = arith.mulf %in, %in_5 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x10x10x10xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x10x10x10xf32> into tensor<1x6x10x10x10xf32> + %collapsed_4 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x10x10x10xf32> into tensor<6000xf32> + return %collapsed_4 : tensor<6000xf32> + } +} + +// ----- +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7, d4 * 2 + d8, d5 * 2 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-LABEL: func.func @conv_3d_stride( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [2 : index, 2 : index, 2 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_3d_stride(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<384xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4x4x4xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x4x4xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x4x4x4xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x4x4x4xf32> into tensor<1x6x4x4x4xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x4x4x4xf32> into tensor<384xf32> + return %collapsed_3 : tensor<384xf32> + } +} + +// ----- +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-LABEL: func.func @conv_3d_groups( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_3d_groups(%arg0: tensor<6000xf32>, %arg1: tensor<486xf32>) -> tensor<4608xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [9, 2, 3, 3, 3] : tensor<486xf32> into tensor<9x2x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 6, 10, 10, 10] : tensor<6000xf32> into tensor<1x6x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 3, 2, 10, 10, 10] : tensor<1x6x10x10x10xf32> into tensor<1x3x2x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [3, 3, 2, 3, 3, 3] : tensor<9x2x3x3x3xf32> into tensor<3x3x2x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8x8x8xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10x10xf32>, tensor<3x3x2x3x3x3xf32>) outs(%cst : tensor<1x3x3x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 3 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x3x3x8x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x3x3x8x8x8xf32> into tensor<1x9x8x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x9x8x8x8xf32> into tensor<4608xf32> + return %collapsed_3 : tensor<4608xf32> + } +} + +// ----- +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7 * 2, d4 * 2 + d8 * 2, d5 * 2 + d9 * 2)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-LABEL: func.func @conv_3d_perf_config( +// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor +// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]] +// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]] +// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]] +// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]] +// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor +// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]]) +// CHECK-SAME: dilations = [2 : index, 2 : index, 2 : index] +// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"] +// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"] +// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"] +// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index] +// CHECK-SAME: perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1" +// CHECK-SAME: strides = [2 : index, 2 : index, 2 : index] +// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]] +// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]] +// CHECK-DAG: return %[[collapsed_3]] +module { + func.func @conv_3d_perf_config(%arg0: tensor<750xf32>, %arg1: tensor<96xf32>) -> tensor<64xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [4, 3, 2, 2, 2] : tensor<96xf32> into tensor<4x3x2x2x2xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [2, 3, 5, 5, 5] : tensor<750xf32> into tensor<2x3x5x5x5xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [2, 1, 3, 5, 5, 5] : tensor<2x3x5x5x5xf32> into tensor<2x1x3x5x5x5xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 4, 3, 2, 2, 2] : tensor<4x3x2x2x2xf32> into tensor<1x4x3x2x2x2xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<2x1x4x2x2x2xf32> + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs = {conv_op = #rock, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<2x1x4x2x2x2xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<2x1x4x2x2x2xf32> into tensor<2x4x2x2x2xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<2x4x2x2x2xf32> into tensor<64xf32> + return %collapsed_3 : tensor<64xf32> + } +} + diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir index 1c4d30fef269..2f009cb40031 100644 --- a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir +++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir @@ -4,3 +4,107 @@ func.func @no_kernel_attribute_test() { func.return } + +// ----- + +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +module { + func.func @conv_3d_no_padding(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> + // expected-error @+2 {{no padding found}} + // expected-error @+1 {{failed to legalize operation}} + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, stride = [1, 1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x8x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32> + return %collapsed_3 : tensor<3072xf32> + } +} + +// ----- + +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +module { + func.func @conv_3d_no_stride(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> + // expected-error @+2 {{invalid dilation or stride}} + // expected-error @+1 {{failed to legalize operation}} + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x8x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32> + return %collapsed_3 : tensor<3072xf32> + } +} + +// ----- + +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +module { + func.func @conv_3d_invalid_padding(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> + // expected-error @+2 {{invalid number of padding}} + // expected-error @+1 {{failed to legalize operation}} + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x8x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32> + return %collapsed_3 : tensor<3072xf32> + } +} + +// ----- + +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)> +module { + func.func @conv_3d_invalid_stride(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} { + %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32> + %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32> + %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32> + %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32> + %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32> + // expected-error @+2 {{invalid dilation or stride}} + // expected-error @+1 {{failed to legalize operation}} + %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs = {conv_op = #rock, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1]} { + ^bb0(%in: f32, %in_4: f32, %out: f32): + %1 = arith.mulf %in, %in_4 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor<1x1x6x8x8x8xf32> + %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32> + %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32> + return %collapsed_3 : tensor<3072xf32> + } +} diff --git a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir index 849a2aa7bee4..5ba4fecae0fa 100644 --- a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir +++ b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir @@ -1,12 +1,14 @@ // RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand_type float -rand_min 0 -rand_max 0 -fut conv_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=GOLD // RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH // RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH +// RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL -/// README - There are essentially two tests (BOTH, and GOLD). +/// README - There are essentially three tests (BOTH, GOLD, and FINAL). /// BOTH checks if the tosa pipeline gives the same value (given the /// same seed) as the linalg pipeline. They will pass if both of them /// returns the same value. GOLD checks if the output for the linalg pipeline -/// matches an equivalent pytorch implementation. +/// matches an equivalent pytorch implementation. FINAL verifies if the linalg +/// pipeline can be converted to rock /// Gold value computed as the following: /// @@ -35,6 +37,7 @@ module{ + // FINAL: [1 1 1] // BOTH: [6.09101, 7.06269, 5.96599, 7.63177, 5.83172, 5.96893, 5.16868, 6.0204, 6.80761, 6.78844, 5.75672, 7.33505, 5.417{{.*}}, 6.04153, 5.14715, 6.728{{.*}}, 7.30343, 7.90745, 6.73162, 8.21738, 5.65554, 7.37453, 6.6329, 6.6093, 5.2816, 6.17693, 5.19904, 6.38292, 4.55713, 4.62921, 4.72307, 5.47466, 4.551, 6.15787, 4.97358, 5.89798, 5.10684, 6.01542, 5.18933, 5.58596, 5.22862, 7.13881, 4.88134, 5.56315, 5.52007, 6.27824, 4.93779, 5.71044, 6.27934, 7.51976, 5.23159, 7.17014, 6.74235, 5.59631, 5.33666, 6.20902, 4.95302, 5.26817, 4.50571, 5.17464, 4.49137, 4.80133, 3.39298, 4.92709] // GOLD: [1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625] func.func @conv(%arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<4x3x2x2x2xf32, 24x8x4x2x1>) -> !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> { diff --git a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir index a9658468d92f..1e5cdab8f085 100644 --- a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir +++ b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir @@ -1,8 +1,10 @@ // RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_1d_group_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH // RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_1d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH +// RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_1d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL // Only a small subset of the array is checked because it is quite huge +// FINAL: [1 1 1] // BOTH: [2.94651, 3.09122, 3.86495, 4.54138, 3.18018, 4.06578, 2.97265, 4.05155, 2.35716, 4.26762, 3.49153, 4.14329, 3.82529, 5.43395, 4.66598, 4.98826, 4.41554, 5.15631, 3.91766, 4.79236, 4.52993, 4.25152, 4.87812, 5.10546, 4.19679, 5.1306, 4.2836, 3.7857, 5.21429, 4.6504, 4.83997, 3.91648, 5.86651, 4.76546, 5.00734, 5.18668, 5.38386, 4.1707, 5.43972, 5.57541, 5.33734, 5.14293, 4.10719, 5.32505, 4.39825 func.func @conv_1d_group(%in: !migraphx.shaped<10x8x123xf32, 984x123x1>, %fil: !migraphx.shaped<12x2x7xf32, 14x7x1>) -> !migraphx.shaped<10x12x53xf32, 636x53x1> { %out = migraphx.convolution %in, %fil {dilation = [4], group = 4 : i64, padding = [3,3], padding_mode = 0 : i64, stride = [2]} : diff --git a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir index 83630aa811d4..84e14f7ef9d0 100644 --- a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir +++ b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir @@ -1,7 +1,9 @@ // RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_3d_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH // RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_3d_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH +// RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_3d_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL // BOTH: [9.78569, 8.8887, 12.9401, 10.9686, 8.19386, 11.9315, 17.5043, 11.5946, 18.9063 +// FINAL: [1 1 1] func.func @conv_3d(%in: !migraphx.shaped<10x8x12x13x14xf32, 17472x2184x182x14x1>, %fil: !migraphx.shaped<12x8x2x3x4xf32, 192x24x12x4x1>) -> !migraphx.shaped<10x12x13x6x3xf32, 2808x234x18x3x1> { %out = migraphx.convolution %in, %fil {dilation = [3, 4, 5], group = 1 : i64, padding = [2, 3, 4, 2, 3, 4], padding_mode = 0 : i64, stride = [1, 2, 3]} : <10x8x12x13x14xf32, 17472x2184x182x14x1>, <12x8x2x3x4xf32, 192x24x12x4x1> -> <10x12x13x6x3xf32, 2808x234x18x3x1> diff --git a/mlir/test/fusion/pr-e2e/linalg-to-rock-conv3d-no-pad.e2d.mlir b/mlir/test/fusion/pr-e2e/linalg-to-rock-conv3d-no-pad.e2d.mlir new file mode 100644 index 000000000000..a70f9d19a8bb --- /dev/null +++ b/mlir/test/fusion/pr-e2e/linalg-to-rock-conv3d-no-pad.e2d.mlir @@ -0,0 +1,7 @@ +// RUN: rocmlir-gen -fut conv3d_add -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx-linalg,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv3d_add_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s + +// CHECK: [1 1 1] +func.func @conv3d_add(%arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<9x1x2x2x2xf32, 8x8x4x2x1>) -> !migraphx.shaped<2x9x2x2x2xf32, 72x8x4x2x1> { + %0 = migraphx.convolution %arg1, %arg2 {dilation = [2, 2, 2], group = 3 : i64, padding = [0, 0, 0, 0, 0, 0], padding_mode = 0 : i64, stride = [2, 2, 2]} : <2x3x5x5x5xf32, 375x125x25x5x1>, <9x1x2x2x2xf32, 8x8x4x2x1> -> <2x9x2x2x2xf32, 72x8x4x2x1> + return %0: !migraphx.shaped<2x9x2x2x2xf32, 72x8x4x2x1> +} diff --git a/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir b/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir index 5f019521aeac..64f838c35cdf 100644 --- a/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir +++ b/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir @@ -1,10 +1,12 @@ // RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone - | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH // RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH +// RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_2d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL // Here we are checking to see if conv_2d with non standard stride, dilation, and a group parameter matches the existing tosa pipeline // Note - this array is quite large, so we are only checking a small subset // BOTH: [5.83007, 7.83374, 8.46274, 9.03237, 6.51391, 7.75809, 9.73003, 8.48013, 8.15419, 9.9975, 7.50244, 7.11982, 6.58057 +// FINAL: [1 1 1] func.func @conv_2d_group(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %fil: !migraphx.shaped<8x2x4x5xf32, 40x20x5x1>) -> !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1> { %out = migraphx.convolution %in, %fil {dilation = [2, 3], group = 2 : i64, padding = [2, 2, 2, 2], padding_mode = 0 : i64, stride = [4, 5]} : <2x4x123x124xf32, 61008x15252x124x1>, <8x2x4x5xf32, 40x20x5x1> -> <2x8x27x19xf32, 4104x513x19x1>