From a8fefddcae86f8c9691db54fe50938219f1eb930 Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Tue, 24 Feb 2026 19:40:31 +0000
Subject: [PATCH 1/8] [AIROCMLIR-445] Lower 'linalg.generic' convolution into
 rock

---
 .../Conversion/LinalgToRock/LinalgToRock.cpp  | 331 +++++++++++++++++-
 .../LinalgToRock/LinalgToRockPass.cpp         |  13 +-
 .../mixr-to-linalg-conv.cpu.mlir              |   7 +-
 .../mixr-to-linalg-conv1d-group.cpu.mlir      |   2 +
 .../mixr-to-linalg-conv3d-group.cpu.mlir      |   2 +
 .../linalg-to-rock-conv3d-no-pad.e2d.mlir     |   7 +
 .../mixr-to-linalg-conv2d-group.cpu.mlir      |   2 +
 7 files changed, 359 insertions(+), 5 deletions(-)
 create mode 100644 mlir/test/fusion/pr-e2e/linalg-to-rock-conv3d-no-pad.e2d.mlir
diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
index d0fb43bd5cf1..8f5d7ac5f3ae 100644
--- a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
+++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
@@ -13,10 +13,13 @@
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/Rock/IR/Rock.h"
+#include "mlir/Dialect/Rock/IR/TransformMapBuilder.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/PatternMatch.h"
+#include "llvm/ADT/StringMap.h"
 
 using namespace mlir;
 
@@ -139,8 +142,334 @@ LogicalResult MatmulConverter<LinalgMatOp>::matchAndRewrite(
   return success();
 }
 
+namespace {
+enum class ConvType {
+  Conv1D_NGCH_FGCH,
+  Conv2D_NGCHW_GFCHW,
+  Conv3D_NGCHWD_GFCHWD
+};
+
+struct ConvFields {
+  ConvType type;
+  ArrayAttr padding, stride, dilation;
+  StringAttr perfConfig;
+};
+
+struct ConvLinalgConverter final
+    : public OpConversionPattern<linalg::GenericOp> {
+  using OpConversionPattern<linalg::GenericOp>::OpConversionPattern;
+  using OpConversionPattern<linalg::GenericOp>::getTypeConverter;
+  using OpAdaptor = typename OpConversionPattern<linalg::GenericOp>::OpAdaptor;
+
+  LogicalResult
+  matchAndRewrite(linalg::GenericOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override;
+
+private:
+  /// Returns strides, dilation, and padding if any
+  FailureOr<ConvFields> isConv(ConversionPatternRewriter &rewriter,
+                               linalg::GenericOp op) const;
+};
+} // namespace
+
+FailureOr<ConvFields>
+ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter,
+                            linalg::GenericOp op) const {
+  // FIXME: In the future, it is possible to extract strides, dilation, and
+  // padding by matching the AffineExpr syntax tree. We can also infer the
+  // dimension and layout of the convolution from the affine_map.
+  llvm::StringMap<ConvType> opNameMapper{
+      {"conv3d_ngchwd_gfchwd", ConvType::Conv3D_NGCHWD_GFCHWD},
+      {"conv2d_ngchw_gfchw", ConvType::Conv2D_NGCHW_GFCHW},
+      {"conv1d_ngch_gfch", ConvType::Conv1D_NGCH_FGCH}};
+
+  StringAttr name = op->getAttrOfType<StringAttr>("conv_op");
+  if (!opNameMapper.contains(name.getValue())) {
+    return failure();
+  }
+  ConvType convType = opNameMapper[name.getValue()];
+
+  auto convertToArrayAttr =
+      [&](Attribute arr, ArrayRef<int64_t> dimOneDefaults = {}) -> ArrayAttr {
+    DenseIntElementsAttr casted = dyn_cast<DenseIntElementsAttr>(arr);
+    if (!casted) {
+      return nullptr;
+    }
+
+    SmallVector<int64_t, 4> values;
+    llvm::transform(casted.getValues<int64_t>(), std::back_inserter(values),
+                    [&](int64_t val) { return val; });
+    if (convType == ConvType::Conv1D_NGCH_FGCH) {
+      values.insert(values.end(), dimOneDefaults.begin(), dimOneDefaults.end());
+    }
+    return rewriter.getIndexArrayAttr(values);
+  };
+
+  auto dilation =
+      convertToArrayAttr(op->getAttr("dilation"), /*dimOneDefaults=*/1);
+  auto stride = convertToArrayAttr(op->getAttr("stride"), /*dimOneDefaults=*/1);
+
+  // We are given padding in format [dim0low, dim1low, ..., dim1high,
+  // dim2high,...] but rock expects [dim0low, dim1low, dim2low, ...]
+  SmallVector<Attribute, 4> newPaddingOrder;
+  auto originalPaddingOrder = convertToArrayAttr(op->getAttr("pad")).getValue();
+  int64_t dim = originalPaddingOrder.size() / 2;
+  for (int64_t i = 0; i < dim; ++i) {
+    newPaddingOrder.push_back(originalPaddingOrder[i]);
+    newPaddingOrder.push_back(originalPaddingOrder[i]);
+  }
+  if (convType == ConvType::Conv1D_NGCH_FGCH) {
+    newPaddingOrder.push_back(rewriter.getIndexAttr(0));
+    newPaddingOrder.push_back(rewriter.getIndexAttr(0));
+  }
+  auto padding = rewriter.getArrayAttr(newPaddingOrder);
+  if (!padding || !dilation || !stride) {
+    return failure();
+  }
+
+  StringAttr perfConfig = op->getAttrOfType<StringAttr>("perf_config");
+  return ConvFields{convType, padding, stride, dilation, perfConfig};
+}
+
+LogicalResult ConvLinalgConverter::matchAndRewrite(
+    linalg::GenericOp op, OpAdaptor adaptor,
+    ConversionPatternRewriter &rewriter) const {
+  FailureOr<ConvFields> maybeConvParams = isConv(rewriter, op);
+  if (failed(maybeConvParams))
+    return failure();
+
+  ConvFields convParams = maybeConvParams.value();
+  Location loc = op.getLoc();
+
+  // We have layout filter = GFC* but we need GF*C
+  auto getFilter = [&](Value startFilter) -> Value {
+    ArrayRef<int64_t> startFilterShape =
+        cast<RankedTensorType>(startFilter.getType()).getShape();
+    int64_t dim = startFilterShape.size() - 3;
+    switch (dim) {
+    case 3: {
+      rock::BottomUpTMBuilder filterBuilder(
+          rewriter, {"g", "f", "c", "h", "w", "d"}, startFilterShape, loc);
+      filterBuilder.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"});
+      filterBuilder.passThrough({"hk", "wk", "dk"}, {2, 3, 4}, {"h", "w", "d"});
+      filterBuilder.passThrough({"ck"}, {5}, {"c"});
+      auto attr = filterBuilder.get();
+      auto filter = rock::TransformOp::create(rewriter, loc, startFilter, attr);
+      return filter;
+    }
+    case 2: {
+      return startFilter;
+    }
+    case 1: {
+      rock::BottomUpTMBuilder filterBuilder(rewriter, {"g", "f", "c", "h"},
+                                            startFilterShape, loc);
+      filterBuilder.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"});
+      filterBuilder.unmerge({"hk", "wk"}, {2, 3}, {"h"},
+                            {startFilterShape[3], 1});
+      filterBuilder.passThrough({"ck"}, {4}, {"c"});
+      auto attr = filterBuilder.get();
+      auto filter = rock::TransformOp::create(rewriter, loc, startFilter, attr);
+      return filter;
+    }
+    default:
+      llvm_unreachable("seen unsupported cases");
+    }
+  };
+
+  // We have input filter = NGC* but we need N*GC
+  auto getInput = [&](Value in) -> FailureOr<Value> {
+    // dealing with padding
+    if (llvm::any_of(convParams.padding.getValue(), [](Attribute attr) {
+          return cast<IntegerAttr>(attr).getInt() != 0;
+        })) {
+      // clang-format off
+      // Here we are essentially removing the padding while keeping the group
+      // dimension expansion. We remove the padding because the rock.conv handles
+      // padding for us This code structure comes from what migraphx-to-linalg
+      // emits. In theory, there can be other code structure that are emitted in 
+      // linalg pipeline to handle padding.
+      // Original: 
+      // %padded = tensor.pad %original ... 
+      // %group_expansion = tensor.expand_shape %padded ... 
+      // New: 
+      // %group_expansion = tensor.expand_shape %original
+      // clang-format on
+      if (auto expanded = in.getDefiningOp<tensor::ExpandShapeOp>();
+          auto padded =
+              expanded->getOperand(0).getDefiningOp<tensor::PadOp>()) {
+        SmallVector<int64_t, 6> resultShape(
+            expanded.getResultType().getShape());
+        auto lowPad = padded.getStaticLow();
+        auto highPad = padded.getStaticHigh();
+        int64_t numPadDims = lowPad.size();
+        int64_t numExpandedDims = resultShape.size();
+
+        // Padding is defined in pre-expand space. The spatial dims are at the
+        // tail of both the pre-expand and post-expand tensors (expand_shape
+        // only splits an earlier dim), so align from the end.
+        for (int64_t i = numPadDims - 1, j = numExpandedDims - 1;
+             i >= 0 && j >= 0; --i, --j) {
+          resultShape[j] -= (lowPad[i] + highPad[i]);
+        }
+
+        RankedTensorType newResultType = RankedTensorType::get(
+            resultShape, padded.getResultType().getElementType());
+        auto temp = padded.getOperand(0);
+        in = tensor::ExpandShapeOp::create(rewriter, expanded.getLoc(),
+                                           newResultType, temp,
+                                           expanded.getReassociationIndices());
+        rewriter.replaceOp(expanded, in);
+        rewriter.eraseOp(padded);
+      } else {
+        op.emitError("unexpected padding code structure");
+        return failure();
+      }
+    }
+
+    ArrayRef<int64_t> startInputShape =
+        cast<RankedTensorType>(in.getType()).getShape();
+    int64_t dim = startInputShape.size() - 3;
+    switch (dim) {
+    case 3: {
+      rock::BottomUpTMBuilder inputBuilder(
+          rewriter, {"n", "g", "c", "h", "w", "d"}, startInputShape, loc);
+      inputBuilder.passThrough({"ni"}, {0}, {"n"});
+      inputBuilder.passThrough({"hi", "wi", "di"}, {1, 2, 3}, {"h", "w", "d"});
+      inputBuilder.passThrough({"gi", "ci"}, {4, 5}, {"g", "c"});
+      auto inputAttr = inputBuilder.get();
+      auto input = rock::TransformOp::create(rewriter, loc, in, inputAttr);
+      return input.getResult();
+    }
+    case 2: {
+      return in;
+    }
+    case 1: {
+      // migraphx-to-tosa pipeline handles 1d convolution by converting
+      // 1 dimensional input into 2 dimensional. 1x1x3x10 (NGCH) becomes
+      // 1x1x3x1x10 (NHWGC). We are reproducing that here
+      int64_t h = startInputShape[3];
+      rock::BottomUpTMBuilder filterBuilder(rewriter, {"n", "g", "c", "h"},
+                                            startInputShape, loc);
+      filterBuilder.passThrough({"ni"}, {0}, {"n"});
+      filterBuilder.unmerge({"hi", "wi"}, {1, 2}, {"h"}, {h, 1});
+      filterBuilder.passThrough({"gi", "ci"}, {3, 4}, {"g", "c"});
+      auto attr = filterBuilder.get();
+      return rock::TransformOp::create(rewriter, loc, in, attr).getResult();
+    }
+    default:
+      llvm_unreachable("unsupported cases");
+    }
+  };
+
+  // Creating the final result shape
+  RankedTensorType linalgResultType =
+      cast<RankedTensorType>(op.getResult(0).getType());
+  ArrayRef<int64_t> linalgOutputShape = linalgResultType.getShape();
+  SmallVector<int64_t, 4> rockOutputShape(linalgOutputShape);
+  if (linalgOutputShape.size() - 3 == 3 || linalgOutputShape.size() - 3 == 1) {
+    rockOutputShape.clear();
+    rockOutputShape.push_back(linalgOutputShape[0]);
+    rockOutputShape.insert(rockOutputShape.end(),
+                           std::next(linalgOutputShape.begin(), 3),
+                           linalgOutputShape.end());
+    if (linalgOutputShape.size() - 3 == 1)
+      rockOutputShape.push_back(1);
+    rockOutputShape.push_back(linalgOutputShape[1]);
+    rockOutputShape.push_back(linalgOutputShape[2]);
+  }
+  RankedTensorType rockResultType =
+      RankedTensorType::get(rockOutputShape, linalgResultType.getElementType());
+  Value output = bufferization::AllocTensorOp::create(rewriter, op.getLoc(),
+                                                      rockResultType, {});
+
+  auto maybeInput = getInput(op.getOperand(0));
+  if (failed(maybeInput)) {
+    return failure();
+  }
+  auto input = *maybeInput;
+  auto filter = getFilter(op.getOperand(1));
+  auto cop = rock::ConvOp::create(rewriter, loc, rockResultType, filter, input,
+                                  output, /*features=*/nullptr,
+                                  /*blockSize=*/nullptr, /*gridSize=*/nullptr,
+                                  convParams.padding, convParams.stride,
+                                  convParams.dilation, /*params=*/nullptr);
+  // TODO: add splitk
+  if (convParams.perfConfig) {
+    cop->setAttr("perf_config", convParams.perfConfig);
+  }
+
+  // Here we are going to emit layouts
+  switch (convParams.type) {
+  case ConvType::Conv3D_NGCHWD_GFCHWD:
+    cop->setAttr("filter_layout",
+                 rewriter.getStrArrayAttr({"g", "k", "0", "1", "2", "c"}));
+    cop->setAttr("input_layout", rewriter.getStrArrayAttr(
+                                     {"ni", "0i", "1i", "2i", "gi", "ci"}));
+    cop->setAttr("output_layout", rewriter.getStrArrayAttr(
+                                      {"no", "0o", "1o", "2o", "go", "ko"}));
+    break;
+  case ConvType::Conv2D_NGCHW_GFCHW:
+    cop->setAttr("filter_layout",
+                 rewriter.getStrArrayAttr({"g", "k", "c", "y", "x"}));
+    cop->setAttr("input_layout",
+                 rewriter.getStrArrayAttr({"ni", "gi", "ci", "hi", "wi"}));
+    cop->setAttr("output_layout",
+                 rewriter.getStrArrayAttr({"no", "go", "ko", "ho", "wo"}));
+    break;
+  case ConvType::Conv1D_NGCH_FGCH:
+    cop->setAttr("filter_layout",
+                 rewriter.getStrArrayAttr({"g", "k", "y", "x", "c"}));
+    cop->setAttr("input_layout",
+                 rewriter.getStrArrayAttr({"ni", "hi", "wi", "gi", "ci"}));
+    cop->setAttr("output_layout",
+                 rewriter.getStrArrayAttr({"no", "ho", "wo", "go", "ko"}));
+    break;
+  default:
+    llvm_unreachable("edge case one");
+  }
+
+  // output has type ["no", "0o", "1o", "2o", "go", "ko"]
+  // We need to reshape to ngfhwd
+  ArrayRef<int64_t> startResultShape = rockResultType.getShape();
+  Value finalReshaped;
+  switch (convParams.type) {
+  case ConvType::Conv3D_NGCHWD_GFCHWD: {
+    rock::BottomUpTMBuilder resultBuilder(
+        rewriter, {"n", "h", "w", "d", "g", "f"}, startResultShape, loc);
+    resultBuilder.passThrough({"go", "fo"}, {1, 2}, {"g", "f"});
+    resultBuilder.passThrough({"no"}, {0}, {"n"});
+    resultBuilder.passThrough({"ho", "wo", "do"}, {3, 4, 5}, {"h", "w", "d"});
+    auto resultAttr = resultBuilder.get();
+    finalReshaped =
+        rock::TransformOp::create(rewriter, loc, cop.getResult(), resultAttr);
+    break;
+  }
+  case ConvType::Conv2D_NGCHW_GFCHW: {
+    finalReshaped = cop.getResult();
+    break;
+  }
+  case ConvType::Conv1D_NGCH_FGCH: {
+    rock::BottomUpTMBuilder resultBuilder(rewriter, {"n", "h", "w", "g", "f"},
+                                          startResultShape, loc);
+    resultBuilder.passThrough({"no"}, {0}, {"n"});
+    resultBuilder.passThrough({"go", "fo"}, {1, 2}, {"g", "f"});
+    resultBuilder.merge("ho", 3, {"h", "w"});
+    auto resultAttr = resultBuilder.get();
+    finalReshaped =
+        rock::TransformOp::create(rewriter, loc, cop.getResult(), resultAttr);
+    break;
+  }
+  default: {
+    return op.emitError("unimplemented final reshape");
+  }
+  }
+
+  rewriter.replaceOp(op, finalReshaped);
+  return success();
+}
+
 void mlir::rock::populateLinalgToRockConversionPattern(
     RewritePatternSet &pattern, MLIRContext *context) {
   pattern.add<MatmulConverter<linalg::BatchMatmulOp>,
-              MatmulConverter<linalg::MatmulOp>>(context);
+              MatmulConverter<linalg::MatmulOp>, ConvLinalgConverter>(context);
 }
diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp
index f7148fe81d01..11ed67331c0b 100644
--- a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp
+++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp
@@ -47,8 +47,17 @@ static void populateLinalgToRockDialectConversion(ConversionTarget &target) {
         if (!linalgOp) {
           return std::nullopt;
         }
-        return linalg::isElementwise(linalgOp) || isa<linalg::GenericOp>(op) ||
-               isa<linalg::YieldOp>(op);
+
+        linalg::GenericOp castedOp = dyn_cast<linalg::GenericOp>(op);
+        if (castedOp &&
+            llvm::any_of(castedOp.getIteratorTypesArray(), [](auto type) {
+              return linalg::isReductionIterator(type);
+            })) {
+          return false;
+        }
+
+        return linalg::isElementwise(linalgOp) || isa<linalg::YieldOp>(op) ||
+               castedOp;
       });
 }
 
diff --git a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir
index 849a2aa7bee4..5ba4fecae0fa 100644
--- a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir
+++ b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir
@@ -1,12 +1,14 @@
 // RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand_type float -rand_min 0 -rand_max 0 -fut conv_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=GOLD
 // RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
 // RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+// RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL
 
-/// README - There are essentially two tests (BOTH, and GOLD). 
+/// README - There are essentially three tests (BOTH, GOLD, and FINAL). 
 /// BOTH checks if the tosa pipeline gives the same value (given the 
 /// same seed) as the linalg pipeline. They will pass if both of them 
 /// returns the same value. GOLD checks if the output for the linalg pipeline 
-/// matches an equivalent pytorch implementation.
+/// matches an equivalent pytorch implementation. FINAL verifies if the linalg 
+/// pipeline can be converted to rock
 
 /// Gold value computed as the following:
 ///
@@ -35,6 +37,7 @@
 
 
 module{
+  // FINAL: [1 1 1]
   // BOTH: [6.09101, 7.06269, 5.96599, 7.63177, 5.83172, 5.96893, 5.16868, 6.0204, 6.80761, 6.78844, 5.75672, 7.33505, 5.417{{.*}}, 6.04153, 5.14715, 6.728{{.*}}, 7.30343, 7.90745, 6.73162, 8.21738, 5.65554, 7.37453, 6.6329, 6.6093, 5.2816, 6.17693, 5.19904, 6.38292, 4.55713, 4.62921, 4.72307, 5.47466, 4.551, 6.15787, 4.97358, 5.89798, 5.10684, 6.01542, 5.18933, 5.58596, 5.22862, 7.13881, 4.88134, 5.56315, 5.52007, 6.27824, 4.93779, 5.71044, 6.27934, 7.51976, 5.23159, 7.17014, 6.74235, 5.59631, 5.33666, 6.20902, 4.95302, 5.26817, 4.50571, 5.17464, 4.49137, 4.80133, 3.39298, 4.92709]
   // GOLD: [1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625]
   func.func @conv(%arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<4x3x2x2x2xf32, 24x8x4x2x1>) -> !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> { 
diff --git a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir
index a9658468d92f..1e5cdab8f085 100644
--- a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir
+++ b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir
@@ -1,8 +1,10 @@
 // RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_1d_group_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
 // RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_1d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+// RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_1d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL
 
 // Only a small subset of the array is checked because it is quite huge
 
+// FINAL: [1 1 1]
 // BOTH: [2.94651, 3.09122, 3.86495, 4.54138, 3.18018, 4.06578, 2.97265, 4.05155, 2.35716, 4.26762, 3.49153, 4.14329, 3.82529, 5.43395, 4.66598, 4.98826, 4.41554, 5.15631, 3.91766, 4.79236, 4.52993, 4.25152, 4.87812, 5.10546, 4.19679, 5.1306, 4.2836, 3.7857, 5.21429, 4.6504, 4.83997, 3.91648, 5.86651, 4.76546, 5.00734, 5.18668, 5.38386, 4.1707, 5.43972, 5.57541, 5.33734, 5.14293, 4.10719, 5.32505, 4.39825
 func.func @conv_1d_group(%in: !migraphx.shaped<10x8x123xf32, 984x123x1>, %fil: !migraphx.shaped<12x2x7xf32, 14x7x1>) -> !migraphx.shaped<10x12x53xf32, 636x53x1> {
   %out = migraphx.convolution %in, %fil {dilation = [4], group = 4 : i64, padding = [3,3], padding_mode = 0 : i64, stride = [2]} :
diff --git a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir
index 83630aa811d4..84e14f7ef9d0 100644
--- a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir
+++ b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir
@@ -1,7 +1,9 @@
 // RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_3d_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
 // RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_3d_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+// RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_3d_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL
 
 // BOTH: [9.78569, 8.8887, 12.9401, 10.9686, 8.19386, 11.9315, 17.5043, 11.5946, 18.9063
+// FINAL: [1 1 1]
 func.func @conv_3d(%in: !migraphx.shaped<10x8x12x13x14xf32, 17472x2184x182x14x1>, %fil: !migraphx.shaped<12x8x2x3x4xf32, 192x24x12x4x1>) -> !migraphx.shaped<10x12x13x6x3xf32, 2808x234x18x3x1> {
   %out = migraphx.convolution %in, %fil {dilation = [3, 4, 5], group = 1 : i64, padding = [2, 3, 4, 2, 3, 4], padding_mode = 0 : i64, stride = [1, 2, 3]} : 
         <10x8x12x13x14xf32, 17472x2184x182x14x1>, <12x8x2x3x4xf32, 192x24x12x4x1> -> <10x12x13x6x3xf32, 2808x234x18x3x1>
diff --git a/mlir/test/fusion/pr-e2e/linalg-to-rock-conv3d-no-pad.e2d.mlir b/mlir/test/fusion/pr-e2e/linalg-to-rock-conv3d-no-pad.e2d.mlir
new file mode 100644
index 000000000000..a70f9d19a8bb
--- /dev/null
+++ b/mlir/test/fusion/pr-e2e/linalg-to-rock-conv3d-no-pad.e2d.mlir
@@ -0,0 +1,7 @@
+// RUN: rocmlir-gen -fut conv3d_add -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx-linalg,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv3d_add_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s
+
+// CHECK: [1 1 1]
+func.func @conv3d_add(%arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<9x1x2x2x2xf32, 8x8x4x2x1>) -> !migraphx.shaped<2x9x2x2x2xf32, 72x8x4x2x1> {
+  %0 = migraphx.convolution %arg1, %arg2 {dilation = [2, 2, 2], group = 3 : i64, padding = [0, 0, 0, 0, 0, 0], padding_mode = 0 : i64, stride = [2, 2, 2]} : <2x3x5x5x5xf32, 375x125x25x5x1>, <9x1x2x2x2xf32, 8x8x4x2x1> -> <2x9x2x2x2xf32, 72x8x4x2x1>
+  return %0: !migraphx.shaped<2x9x2x2x2xf32, 72x8x4x2x1>
+}
diff --git a/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir b/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir
index 5f019521aeac..64f838c35cdf 100644
--- a/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir
+++ b/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir
@@ -1,10 +1,12 @@
 // RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
 // RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+// RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_2d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL
 
 // Here we are checking to see if conv_2d with non standard stride, dilation, and a group parameter matches the existing tosa pipeline
 // Note - this array is quite large, so we are only checking a small subset
 
 // BOTH: [5.83007, 7.83374, 8.46274, 9.03237, 6.51391, 7.75809, 9.73003, 8.48013, 8.15419, 9.9975, 7.50244, 7.11982, 6.58057
+// FINAL: [1 1 1]
 func.func @conv_2d_group(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %fil: !migraphx.shaped<8x2x4x5xf32, 40x20x5x1>) -> !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1> {
   %out = migraphx.convolution %in, %fil {dilation = [2, 3], group = 2 : i64, padding = [2, 2, 2, 2], padding_mode = 0 : i64, stride = [4, 5]} : 
     <2x4x123x124xf32, 61008x15252x124x1>, <8x2x4x5xf32, 40x20x5x1> -> <2x8x27x19xf32, 4104x513x19x1>

From a6c02224245b808765242150d6caf81f4ec04a65 Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Wed, 25 Feb 2026 14:25:48 +0000
Subject: [PATCH 2/8] Rebase from preivous branch to use enum instead of string

---
 .../Conversion/LinalgToRock/LinalgToRock.cpp  | 45 +++++++------------
 1 file changed, 15 insertions(+), 30 deletions(-)

diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
index 8f5d7ac5f3ae..3276256ee82d 100644
--- a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
+++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
@@ -143,14 +143,8 @@ LogicalResult MatmulConverter<LinalgMatOp>::matchAndRewrite(
 }
 
 namespace {
-enum class ConvType {
-  Conv1D_NGCH_FGCH,
-  Conv2D_NGCHW_GFCHW,
-  Conv3D_NGCHWD_GFCHWD
-};
-
 struct ConvFields {
-  ConvType type;
+  rock::LinalgConvType type;
   ArrayAttr padding, stride, dilation;
   StringAttr perfConfig;
 };
@@ -178,28 +172,19 @@ ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter,
   // FIXME: In the future, it is possible to extract strides, dilation, and
   // padding by matching the AffineExpr syntax tree. We can also infer the
   // dimension and layout of the convolution from the affine_map.
-  llvm::StringMap<ConvType> opNameMapper{
-      {"conv3d_ngchwd_gfchwd", ConvType::Conv3D_NGCHWD_GFCHWD},
-      {"conv2d_ngchw_gfchw", ConvType::Conv2D_NGCHW_GFCHW},
-      {"conv1d_ngch_gfch", ConvType::Conv1D_NGCH_FGCH}};
-
-  StringAttr name = op->getAttrOfType<StringAttr>("conv_op");
-  if (!opNameMapper.contains(name.getValue())) {
+  rock::LinalgConvTypeAttr name = op->getAttrOfType<rock::LinalgConvTypeAttr>("conv_op");
+  if (!name) {
     return failure();
   }
-  ConvType convType = opNameMapper[name.getValue()];
+  rock::LinalgConvType convType = name.getValue();
 
   auto convertToArrayAttr =
       [&](Attribute arr, ArrayRef<int64_t> dimOneDefaults = {}) -> ArrayAttr {
-    DenseIntElementsAttr casted = dyn_cast<DenseIntElementsAttr>(arr);
-    if (!casted) {
-      return nullptr;
-    }
-
+        ArrayAttr casted = dyn_cast<ArrayAttr>(arr);
     SmallVector<int64_t, 4> values;
-    llvm::transform(casted.getValues<int64_t>(), std::back_inserter(values),
-                    [&](int64_t val) { return val; });
-    if (convType == ConvType::Conv1D_NGCH_FGCH) {
+    llvm::transform(casted.getValue(), std::back_inserter(values),
+                    [&](Attribute val) { return cast<IntegerAttr>(val).getInt(); });
+    if (convType == rock::LinalgConvType::Conv1dNgchGfch) {
       values.insert(values.end(), dimOneDefaults.begin(), dimOneDefaults.end());
     }
     return rewriter.getIndexArrayAttr(values);
@@ -218,7 +203,7 @@ ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter,
     newPaddingOrder.push_back(originalPaddingOrder[i]);
     newPaddingOrder.push_back(originalPaddingOrder[i]);
   }
-  if (convType == ConvType::Conv1D_NGCH_FGCH) {
+  if (convType == rock::LinalgConvType::Conv1dNgchGfch) {
     newPaddingOrder.push_back(rewriter.getIndexAttr(0));
     newPaddingOrder.push_back(rewriter.getIndexAttr(0));
   }
@@ -400,7 +385,7 @@ LogicalResult ConvLinalgConverter::matchAndRewrite(
 
   // Here we are going to emit layouts
   switch (convParams.type) {
-  case ConvType::Conv3D_NGCHWD_GFCHWD:
+    case rock::LinalgConvType::Conv3dNgchwdGfchwd:
     cop->setAttr("filter_layout",
                  rewriter.getStrArrayAttr({"g", "k", "0", "1", "2", "c"}));
     cop->setAttr("input_layout", rewriter.getStrArrayAttr(
@@ -408,7 +393,7 @@ LogicalResult ConvLinalgConverter::matchAndRewrite(
     cop->setAttr("output_layout", rewriter.getStrArrayAttr(
                                       {"no", "0o", "1o", "2o", "go", "ko"}));
     break;
-  case ConvType::Conv2D_NGCHW_GFCHW:
+    case rock::LinalgConvType::Conv2dNgchwGfchw:
     cop->setAttr("filter_layout",
                  rewriter.getStrArrayAttr({"g", "k", "c", "y", "x"}));
     cop->setAttr("input_layout",
@@ -416,7 +401,7 @@ LogicalResult ConvLinalgConverter::matchAndRewrite(
     cop->setAttr("output_layout",
                  rewriter.getStrArrayAttr({"no", "go", "ko", "ho", "wo"}));
     break;
-  case ConvType::Conv1D_NGCH_FGCH:
+    case rock::LinalgConvType::Conv1dNgchGfch:
     cop->setAttr("filter_layout",
                  rewriter.getStrArrayAttr({"g", "k", "y", "x", "c"}));
     cop->setAttr("input_layout",
@@ -433,7 +418,7 @@ LogicalResult ConvLinalgConverter::matchAndRewrite(
   ArrayRef<int64_t> startResultShape = rockResultType.getShape();
   Value finalReshaped;
   switch (convParams.type) {
-  case ConvType::Conv3D_NGCHWD_GFCHWD: {
+    case rock::LinalgConvType::Conv3dNgchwdGfchwd: {
     rock::BottomUpTMBuilder resultBuilder(
         rewriter, {"n", "h", "w", "d", "g", "f"}, startResultShape, loc);
     resultBuilder.passThrough({"go", "fo"}, {1, 2}, {"g", "f"});
@@ -444,11 +429,11 @@ LogicalResult ConvLinalgConverter::matchAndRewrite(
         rock::TransformOp::create(rewriter, loc, cop.getResult(), resultAttr);
     break;
   }
-  case ConvType::Conv2D_NGCHW_GFCHW: {
+    case rock::LinalgConvType::Conv2dNgchwGfchw: {
     finalReshaped = cop.getResult();
     break;
   }
-  case ConvType::Conv1D_NGCH_FGCH: {
+  case rock::LinalgConvType::Conv1dNgchGfch: {
     rock::BottomUpTMBuilder resultBuilder(rewriter, {"n", "h", "w", "g", "f"},
                                           startResultShape, loc);
     resultBuilder.passThrough({"no"}, {0}, {"n"});

From 6c7dabc725501bbb40c75c00bc8dc8919d7ee1ac Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Wed, 25 Feb 2026 15:53:53 +0000
Subject: [PATCH 3/8] Added lit test

---
 .../Conversion/LinalgToRock/LinalgToRock.cpp  | 512 +++++++++---------
 .../LinalgToRock/linalg-to-rock-conv.mlir     | 102 ++++
 2 files changed, 367 insertions(+), 247 deletions(-)
 create mode 100644 mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir

diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
index 3276256ee82d..51a1a220d2e0 100644
--- a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
+++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
@@ -13,13 +13,11 @@
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
-#include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/Rock/IR/Rock.h"
 #include "mlir/Dialect/Rock/IR/TransformMapBuilder.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/PatternMatch.h"
-#include "llvm/ADT/StringMap.h"
 
 using namespace mlir;
 
@@ -142,13 +140,220 @@ LogicalResult MatmulConverter<LinalgMatOp>::matchAndRewrite(
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// ConvLinalgConverter: linalg.generic (conv) -> rock.conv
+//===----------------------------------------------------------------------===//
+
 namespace {
 struct ConvFields {
   rock::LinalgConvType type;
+  int64_t spatialDim;
   ArrayAttr padding, stride, dilation;
   StringAttr perfConfig;
 };
+} // namespace
+
+static int64_t getSpatialDim(rock::LinalgConvType type) {
+  switch (type) {
+  case rock::LinalgConvType::Conv1dNgchGfch:
+    return 1;
+  case rock::LinalgConvType::Conv2dNgchwGfchw:
+    return 2;
+  case rock::LinalgConvType::Conv3dNgchwdGfchwd:
+    return 3;
+  }
+  llvm_unreachable("unknown LinalgConvType");
+}
+
+/// Set filter_layout, input_layout, and output_layout on a rock.conv op.
+static void setConvLayoutAttrs(OpBuilder &builder, rock::ConvOp cop,
+                               rock::LinalgConvType type) {
+  auto set = [&](StringRef name, ArrayRef<StringRef> layout) {
+    cop->setAttr(name, builder.getStrArrayAttr(layout));
+  };
+  switch (type) {
+  case rock::LinalgConvType::Conv3dNgchwdGfchwd:
+    set("filter_layout", {"g", "k", "0", "1", "2", "c"});
+    set("input_layout", {"ni", "0i", "1i", "2i", "gi", "ci"});
+    set("output_layout", {"no", "0o", "1o", "2o", "go", "ko"});
+    break;
+  case rock::LinalgConvType::Conv2dNgchwGfchw:
+    set("filter_layout", {"g", "k", "c", "y", "x"});
+    set("input_layout", {"ni", "gi", "ci", "hi", "wi"});
+    set("output_layout", {"no", "go", "ko", "ho", "wo"});
+    break;
+  case rock::LinalgConvType::Conv1dNgchGfch:
+    set("filter_layout", {"g", "k", "y", "x", "c"});
+    set("input_layout", {"ni", "hi", "wi", "gi", "ci"});
+    set("output_layout", {"no", "ho", "wo", "go", "ko"});
+    break;
+  }
+}
+
+/// Transform filter from GFC* layout to GF*C layout for rock.conv.
+/// 2D is already in the correct layout.
+static Value transformFilter(OpBuilder &builder, Location loc, Value filter,
+                             int64_t spatialDim) {
+  ArrayRef<int64_t> shape =
+      cast<RankedTensorType>(filter.getType()).getShape();
+  switch (spatialDim) {
+  case 3: {
+    rock::BottomUpTMBuilder b(builder, {"g", "f", "c", "h", "w", "d"}, shape,
+                              loc);
+    b.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"});
+    b.passThrough({"hk", "wk", "dk"}, {2, 3, 4}, {"h", "w", "d"});
+    b.passThrough({"ck"}, {5}, {"c"});
+    return rock::TransformOp::create(builder, loc, filter, b.get());
+  }
+  case 2:
+    return filter;
+  case 1: {
+    // Conv1D is expanded into Conv2D (matching migraphx-to-tosa): unmerge
+    // H into (H, W=1).
+    rock::BottomUpTMBuilder b(builder, {"g", "f", "c", "h"}, shape, loc);
+    b.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"});
+    b.unmerge({"hk", "wk"}, {2, 3}, {"h"}, {shape[3], 1});
+    b.passThrough({"ck"}, {4}, {"c"});
+    return rock::TransformOp::create(builder, loc, filter, b.get());
+  }
+  default:
+    llvm_unreachable("unsupported spatial dim for filter transform");
+  }
+}
+
+/// Remove the tensor.pad + tensor.expand_shape pattern emitted by
+/// migraphx-to-linalg, replacing it with just tensor.expand_shape on the
+/// unpadded source. rock.conv handles padding internally.
+///
+/// Expected IR structure:
+///   %padded = tensor.pad %original ...
+///   %expanded = tensor.expand_shape %padded ...
+/// Replaced with:
+///   %expanded = tensor.expand_shape %original ...
+static FailureOr<Value>
+removePaddingFromInput(ConversionPatternRewriter &rewriter,
+                       linalg::GenericOp op, Value in, ArrayAttr padding) {
+  bool hasPadding = llvm::any_of(padding.getValue(), [](Attribute attr) {
+    return cast<IntegerAttr>(attr).getInt() != 0;
+  });
+  if (!hasPadding)
+    return in;
+
+  auto expanded = in.getDefiningOp<tensor::ExpandShapeOp>();
+  if (!expanded) {
+    op.emitError("unexpected padding code structure");
+    return failure();
+  }
+  auto padded = expanded->getOperand(0).getDefiningOp<tensor::PadOp>();
+  if (!padded) {
+    op.emitError("unexpected padding code structure");
+    return failure();
+  }
+
+  SmallVector<int64_t, 6> resultShape(expanded.getResultType().getShape());
+  auto lowPad = padded.getStaticLow();
+  auto highPad = padded.getStaticHigh();
+  int64_t numPadDims = lowPad.size();
+  int64_t numExpandedDims = resultShape.size();
+
+  // Padding is defined in pre-expand space. The spatial dims are at the
+  // tail of both tensors (expand_shape only splits an earlier dim), so
+  // align from the end.
+  for (int64_t i = numPadDims - 1, j = numExpandedDims - 1;
+       i >= 0 && j >= 0; --i, --j) {
+    resultShape[j] -= (lowPad[i] + highPad[i]);
+  }
+
+  RankedTensorType newResultType = RankedTensorType::get(
+      resultShape, padded.getResultType().getElementType());
+  Value result = tensor::ExpandShapeOp::create(
+      rewriter, expanded.getLoc(), newResultType, padded.getOperand(0),
+      expanded.getReassociationIndices());
+  rewriter.replaceOp(expanded, result);
+  rewriter.eraseOp(padded);
+  return result;
+}
+
+/// Transform input from NGC* layout to N*GC layout for rock.conv.
+/// 2D is already in the correct layout.
+static Value transformInput(OpBuilder &builder, Location loc, Value input,
+                            int64_t spatialDim) {
+  ArrayRef<int64_t> shape =
+      cast<RankedTensorType>(input.getType()).getShape();
+  switch (spatialDim) {
+  case 3: {
+    rock::BottomUpTMBuilder b(builder, {"n", "g", "c", "h", "w", "d"}, shape,
+                              loc);
+    b.passThrough({"ni"}, {0}, {"n"});
+    b.passThrough({"hi", "wi", "di"}, {1, 2, 3}, {"h", "w", "d"});
+    b.passThrough({"gi", "ci"}, {4, 5}, {"g", "c"});
+    return rock::TransformOp::create(builder, loc, input, b.get());
+  }
+  case 2:
+    return input;
+  case 1: {
+    // Conv1D is expanded into Conv2D (matching migraphx-to-tosa): unmerge
+    // H into (H, W=1).
+    int64_t h = shape[3];
+    rock::BottomUpTMBuilder b(builder, {"n", "g", "c", "h"}, shape, loc);
+    b.passThrough({"ni"}, {0}, {"n"});
+    b.unmerge({"hi", "wi"}, {1, 2}, {"h"}, {h, 1});
+    b.passThrough({"gi", "ci"}, {3, 4}, {"g", "c"});
+    return rock::TransformOp::create(builder, loc, input, b.get());
+  }
+  default:
+    llvm_unreachable("unsupported spatial dim for input transform");
+  }
+}
+
+/// Compute the rock output shape from the linalg output shape.
+/// Linalg layout is NGF* while rock needs N*GF (with extra W=1 for 1D).
+static SmallVector<int64_t, 6>
+computeRockOutputShape(ArrayRef<int64_t> linalgShape, int64_t spatialDim) {
+  if (spatialDim == 2)
+    return SmallVector<int64_t, 6>(linalgShape);
+  SmallVector<int64_t, 6> shape;
+  shape.push_back(linalgShape[0]);
+  shape.insert(shape.end(), std::next(linalgShape.begin(), 3),
+               linalgShape.end());
+  if (spatialDim == 1)
+    shape.push_back(1); // Conv1D expanded to Conv2D: extra W=1
+  shape.push_back(linalgShape[1]);
+  shape.push_back(linalgShape[2]);
+  return shape;
+}
+
+/// Transform rock.conv output back to the linalg output layout.
+/// 2D needs no transform.
+static Value transformOutput(OpBuilder &builder, Location loc, Value convResult,
+                             int64_t spatialDim) {
+  if (spatialDim == 2)
+    return convResult;
+  ArrayRef<int64_t> shape =
+      cast<RankedTensorType>(convResult.getType()).getShape();
+  switch (spatialDim) {
+  case 3: {
+    rock::BottomUpTMBuilder b(builder, {"n", "h", "w", "d", "g", "f"}, shape,
+                              loc);
+    b.passThrough({"go", "fo"}, {1, 2}, {"g", "f"});
+    b.passThrough({"no"}, {0}, {"n"});
+    b.passThrough({"ho", "wo", "do"}, {3, 4, 5}, {"h", "w", "d"});
+    return rock::TransformOp::create(builder, loc, convResult, b.get());
+  }
+  case 1: {
+    // Conv1D was expanded into Conv2D: merge (H, W=1) back into H.
+    rock::BottomUpTMBuilder b(builder, {"n", "h", "w", "g", "f"}, shape, loc);
+    b.passThrough({"no"}, {0}, {"n"});
+    b.passThrough({"go", "fo"}, {1, 2}, {"g", "f"});
+    b.merge("ho", 3, {"h", "w"});
+    return rock::TransformOp::create(builder, loc, convResult, b.get());
+  }
+  default:
+    llvm_unreachable("unsupported spatial dim for output transform");
+  }
+}
 
+namespace {
 struct ConvLinalgConverter final
     : public OpConversionPattern<linalg::GenericOp> {
   using OpConversionPattern<linalg::GenericOp>::OpConversionPattern;
@@ -160,7 +365,6 @@ struct ConvLinalgConverter final
                   ConversionPatternRewriter &rewriter) const override;
 
 private:
-  /// Returns strides, dilation, and padding if any
   FailureOr<ConvFields> isConv(ConversionPatternRewriter &rewriter,
                                linalg::GenericOp op) const;
 };
@@ -169,287 +373,101 @@ struct ConvLinalgConverter final
 FailureOr<ConvFields>
 ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter,
                             linalg::GenericOp op) const {
-  // FIXME: In the future, it is possible to extract strides, dilation, and
-  // padding by matching the AffineExpr syntax tree. We can also infer the
-  // dimension and layout of the convolution from the affine_map.
-  rock::LinalgConvTypeAttr name = op->getAttrOfType<rock::LinalgConvTypeAttr>("conv_op");
-  if (!name) {
+  // FIXME: In the future, strides, dilation, and padding can be extracted
+  // by matching the AffineExpr syntax tree. The convolution dimension and
+  // layout could also be inferred from the affine_map.
+  auto name = op->getAttrOfType<rock::LinalgConvTypeAttr>("conv_op");
+  if (!name)
     return failure();
-  }
   rock::LinalgConvType convType = name.getValue();
+  int64_t spatialDim = getSpatialDim(convType);
 
   auto convertToArrayAttr =
       [&](Attribute arr, ArrayRef<int64_t> dimOneDefaults = {}) -> ArrayAttr {
-        ArrayAttr casted = dyn_cast<ArrayAttr>(arr);
     SmallVector<int64_t, 4> values;
-    llvm::transform(casted.getValue(), std::back_inserter(values),
-                    [&](Attribute val) { return cast<IntegerAttr>(val).getInt(); });
-    if (convType == rock::LinalgConvType::Conv1dNgchGfch) {
-      values.insert(values.end(), dimOneDefaults.begin(), dimOneDefaults.end());
-    }
+    llvm::transform(
+        cast<ArrayAttr>(arr).getValue(), std::back_inserter(values),
+        [](Attribute val) { return cast<IntegerAttr>(val).getInt(); });
+    // Conv1D is expanded into Conv2D to match the migraphx-to-tosa pipeline.
+    // Append identity defaults (stride=1, dilation=1, pad=0) for the extra
+    // spatial dimension.
+    if (spatialDim == 1)
+      values.insert(values.end(), dimOneDefaults.begin(),
+                    dimOneDefaults.end());
     return rewriter.getIndexArrayAttr(values);
   };
 
   auto dilation =
       convertToArrayAttr(op->getAttr("dilation"), /*dimOneDefaults=*/1);
-  auto stride = convertToArrayAttr(op->getAttr("stride"), /*dimOneDefaults=*/1);
-
-  // We are given padding in format [dim0low, dim1low, ..., dim1high,
-  // dim2high,...] but rock expects [dim0low, dim1low, dim2low, ...]
-  SmallVector<Attribute, 4> newPaddingOrder;
-  auto originalPaddingOrder = convertToArrayAttr(op->getAttr("pad")).getValue();
-  int64_t dim = originalPaddingOrder.size() / 2;
-  for (int64_t i = 0; i < dim; ++i) {
-    newPaddingOrder.push_back(originalPaddingOrder[i]);
-    newPaddingOrder.push_back(originalPaddingOrder[i]);
+  auto stride =
+      convertToArrayAttr(op->getAttr("stride"), /*dimOneDefaults=*/1);
+
+  // Input format:  [dim0_low, dim1_low, ..., dim0_high, dim1_high, ...]
+  // Rock  format:  [dim0_low, dim0_high, dim1_low, dim1_high, ...]
+  auto originalPadding = convertToArrayAttr(op->getAttr("pad")).getValue();
+  int64_t numSpatial = originalPadding.size() / 2;
+  SmallVector<Attribute, 8> interleavedPad;
+  for (int64_t i = 0; i < numSpatial; ++i) {
+    interleavedPad.push_back(originalPadding[i]);
+    interleavedPad.push_back(originalPadding[numSpatial + i]);
   }
-  if (convType == rock::LinalgConvType::Conv1dNgchGfch) {
-    newPaddingOrder.push_back(rewriter.getIndexAttr(0));
-    newPaddingOrder.push_back(rewriter.getIndexAttr(0));
+  // For Conv1D is expanded into Conv2D like the tosa pipeline, so
+  // we set the last dimension have 0 padding to stay consistent.
+  if (spatialDim == 1) {
+    interleavedPad.push_back(rewriter.getIndexAttr(0));
+    interleavedPad.push_back(rewriter.getIndexAttr(0));
   }
-  auto padding = rewriter.getArrayAttr(newPaddingOrder);
-  if (!padding || !dilation || !stride) {
+  auto padding = rewriter.getArrayAttr(interleavedPad);
+  if (!padding || !dilation || !stride)
     return failure();
-  }
 
   StringAttr perfConfig = op->getAttrOfType<StringAttr>("perf_config");
-  return ConvFields{convType, padding, stride, dilation, perfConfig};
+  return ConvFields{convType, spatialDim, padding, stride, dilation,
+                    perfConfig};
 }
 
 LogicalResult ConvLinalgConverter::matchAndRewrite(
     linalg::GenericOp op, OpAdaptor adaptor,
     ConversionPatternRewriter &rewriter) const {
-  FailureOr<ConvFields> maybeConvParams = isConv(rewriter, op);
-  if (failed(maybeConvParams))
+  FailureOr<ConvFields> maybeConv = isConv(rewriter, op);
+  if (failed(maybeConv))
     return failure();
 
-  ConvFields convParams = maybeConvParams.value();
+  ConvFields conv = *maybeConv;
   Location loc = op.getLoc();
 
-  // We have layout filter = GFC* but we need GF*C
-  auto getFilter = [&](Value startFilter) -> Value {
-    ArrayRef<int64_t> startFilterShape =
-        cast<RankedTensorType>(startFilter.getType()).getShape();
-    int64_t dim = startFilterShape.size() - 3;
-    switch (dim) {
-    case 3: {
-      rock::BottomUpTMBuilder filterBuilder(
-          rewriter, {"g", "f", "c", "h", "w", "d"}, startFilterShape, loc);
-      filterBuilder.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"});
-      filterBuilder.passThrough({"hk", "wk", "dk"}, {2, 3, 4}, {"h", "w", "d"});
-      filterBuilder.passThrough({"ck"}, {5}, {"c"});
-      auto attr = filterBuilder.get();
-      auto filter = rock::TransformOp::create(rewriter, loc, startFilter, attr);
-      return filter;
-    }
-    case 2: {
-      return startFilter;
-    }
-    case 1: {
-      rock::BottomUpTMBuilder filterBuilder(rewriter, {"g", "f", "c", "h"},
-                                            startFilterShape, loc);
-      filterBuilder.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"});
-      filterBuilder.unmerge({"hk", "wk"}, {2, 3}, {"h"},
-                            {startFilterShape[3], 1});
-      filterBuilder.passThrough({"ck"}, {4}, {"c"});
-      auto attr = filterBuilder.get();
-      auto filter = rock::TransformOp::create(rewriter, loc, startFilter, attr);
-      return filter;
-    }
-    default:
-      llvm_unreachable("seen unsupported cases");
-    }
-  };
+  auto maybeInput =
+      removePaddingFromInput(rewriter, op, op.getOperand(0), conv.padding);
+  if (failed(maybeInput))
+    return failure();
 
-  // We have input filter = NGC* but we need N*GC
-  auto getInput = [&](Value in) -> FailureOr<Value> {
-    // dealing with padding
-    if (llvm::any_of(convParams.padding.getValue(), [](Attribute attr) {
-          return cast<IntegerAttr>(attr).getInt() != 0;
-        })) {
-      // clang-format off
-      // Here we are essentially removing the padding while keeping the group
-      // dimension expansion. We remove the padding because the rock.conv handles
-      // padding for us This code structure comes from what migraphx-to-linalg
-      // emits. In theory, there can be other code structure that are emitted in 
-      // linalg pipeline to handle padding.
-      // Original: 
-      // %padded = tensor.pad %original ... 
-      // %group_expansion = tensor.expand_shape %padded ... 
-      // New: 
-      // %group_expansion = tensor.expand_shape %original
-      // clang-format on
-      if (auto expanded = in.getDefiningOp<tensor::ExpandShapeOp>();
-          auto padded =
-              expanded->getOperand(0).getDefiningOp<tensor::PadOp>()) {
-        SmallVector<int64_t, 6> resultShape(
-            expanded.getResultType().getShape());
-        auto lowPad = padded.getStaticLow();
-        auto highPad = padded.getStaticHigh();
-        int64_t numPadDims = lowPad.size();
-        int64_t numExpandedDims = resultShape.size();
-
-        // Padding is defined in pre-expand space. The spatial dims are at the
-        // tail of both the pre-expand and post-expand tensors (expand_shape
-        // only splits an earlier dim), so align from the end.
-        for (int64_t i = numPadDims - 1, j = numExpandedDims - 1;
-             i >= 0 && j >= 0; --i, --j) {
-          resultShape[j] -= (lowPad[i] + highPad[i]);
-        }
-
-        RankedTensorType newResultType = RankedTensorType::get(
-            resultShape, padded.getResultType().getElementType());
-        auto temp = padded.getOperand(0);
-        in = tensor::ExpandShapeOp::create(rewriter, expanded.getLoc(),
-                                           newResultType, temp,
-                                           expanded.getReassociationIndices());
-        rewriter.replaceOp(expanded, in);
-        rewriter.eraseOp(padded);
-      } else {
-        op.emitError("unexpected padding code structure");
-        return failure();
-      }
-    }
-
-    ArrayRef<int64_t> startInputShape =
-        cast<RankedTensorType>(in.getType()).getShape();
-    int64_t dim = startInputShape.size() - 3;
-    switch (dim) {
-    case 3: {
-      rock::BottomUpTMBuilder inputBuilder(
-          rewriter, {"n", "g", "c", "h", "w", "d"}, startInputShape, loc);
-      inputBuilder.passThrough({"ni"}, {0}, {"n"});
-      inputBuilder.passThrough({"hi", "wi", "di"}, {1, 2, 3}, {"h", "w", "d"});
-      inputBuilder.passThrough({"gi", "ci"}, {4, 5}, {"g", "c"});
-      auto inputAttr = inputBuilder.get();
-      auto input = rock::TransformOp::create(rewriter, loc, in, inputAttr);
-      return input.getResult();
-    }
-    case 2: {
-      return in;
-    }
-    case 1: {
-      // migraphx-to-tosa pipeline handles 1d convolution by converting
-      // 1 dimensional input into 2 dimensional. 1x1x3x10 (NGCH) becomes
-      // 1x1x3x1x10 (NHWGC). We are reproducing that here
-      int64_t h = startInputShape[3];
-      rock::BottomUpTMBuilder filterBuilder(rewriter, {"n", "g", "c", "h"},
-                                            startInputShape, loc);
-      filterBuilder.passThrough({"ni"}, {0}, {"n"});
-      filterBuilder.unmerge({"hi", "wi"}, {1, 2}, {"h"}, {h, 1});
-      filterBuilder.passThrough({"gi", "ci"}, {3, 4}, {"g", "c"});
-      auto attr = filterBuilder.get();
-      return rock::TransformOp::create(rewriter, loc, in, attr).getResult();
-    }
-    default:
-      llvm_unreachable("unsupported cases");
-    }
-  };
+  Value input = transformInput(rewriter, loc, *maybeInput, conv.spatialDim);
+  Value filter =
+      transformFilter(rewriter, loc, op.getOperand(1), conv.spatialDim);
 
-  // Creating the final result shape
   RankedTensorType linalgResultType =
       cast<RankedTensorType>(op.getResult(0).getType());
-  ArrayRef<int64_t> linalgOutputShape = linalgResultType.getShape();
-  SmallVector<int64_t, 4> rockOutputShape(linalgOutputShape);
-  if (linalgOutputShape.size() - 3 == 3 || linalgOutputShape.size() - 3 == 1) {
-    rockOutputShape.clear();
-    rockOutputShape.push_back(linalgOutputShape[0]);
-    rockOutputShape.insert(rockOutputShape.end(),
-                           std::next(linalgOutputShape.begin(), 3),
-                           linalgOutputShape.end());
-    if (linalgOutputShape.size() - 3 == 1)
-      rockOutputShape.push_back(1);
-    rockOutputShape.push_back(linalgOutputShape[1]);
-    rockOutputShape.push_back(linalgOutputShape[2]);
-  }
+  SmallVector<int64_t, 6> rockShape =
+      computeRockOutputShape(linalgResultType.getShape(), conv.spatialDim);
   RankedTensorType rockResultType =
-      RankedTensorType::get(rockOutputShape, linalgResultType.getElementType());
-  Value output = bufferization::AllocTensorOp::create(rewriter, op.getLoc(),
-                                                      rockResultType, {});
+      RankedTensorType::get(rockShape, linalgResultType.getElementType());
+  Value output =
+      bufferization::AllocTensorOp::create(rewriter, loc, rockResultType, {});
 
-  auto maybeInput = getInput(op.getOperand(0));
-  if (failed(maybeInput)) {
-    return failure();
-  }
-  auto input = *maybeInput;
-  auto filter = getFilter(op.getOperand(1));
   auto cop = rock::ConvOp::create(rewriter, loc, rockResultType, filter, input,
                                   output, /*features=*/nullptr,
                                   /*blockSize=*/nullptr, /*gridSize=*/nullptr,
-                                  convParams.padding, convParams.stride,
-                                  convParams.dilation, /*params=*/nullptr);
+                                  conv.padding, conv.stride, conv.dilation,
+                                  /*params=*/nullptr);
   // TODO: add splitk
-  if (convParams.perfConfig) {
-    cop->setAttr("perf_config", convParams.perfConfig);
-  }
+  if (conv.perfConfig)
+    cop->setAttr("perf_config", conv.perfConfig);
 
-  // Here we are going to emit layouts
-  switch (convParams.type) {
-    case rock::LinalgConvType::Conv3dNgchwdGfchwd:
-    cop->setAttr("filter_layout",
-                 rewriter.getStrArrayAttr({"g", "k", "0", "1", "2", "c"}));
-    cop->setAttr("input_layout", rewriter.getStrArrayAttr(
-                                     {"ni", "0i", "1i", "2i", "gi", "ci"}));
-    cop->setAttr("output_layout", rewriter.getStrArrayAttr(
-                                      {"no", "0o", "1o", "2o", "go", "ko"}));
-    break;
-    case rock::LinalgConvType::Conv2dNgchwGfchw:
-    cop->setAttr("filter_layout",
-                 rewriter.getStrArrayAttr({"g", "k", "c", "y", "x"}));
-    cop->setAttr("input_layout",
-                 rewriter.getStrArrayAttr({"ni", "gi", "ci", "hi", "wi"}));
-    cop->setAttr("output_layout",
-                 rewriter.getStrArrayAttr({"no", "go", "ko", "ho", "wo"}));
-    break;
-    case rock::LinalgConvType::Conv1dNgchGfch:
-    cop->setAttr("filter_layout",
-                 rewriter.getStrArrayAttr({"g", "k", "y", "x", "c"}));
-    cop->setAttr("input_layout",
-                 rewriter.getStrArrayAttr({"ni", "hi", "wi", "gi", "ci"}));
-    cop->setAttr("output_layout",
-                 rewriter.getStrArrayAttr({"no", "ho", "wo", "go", "ko"}));
-    break;
-  default:
-    llvm_unreachable("edge case one");
-  }
+  setConvLayoutAttrs(rewriter, cop, conv.type);
 
-  // output has type ["no", "0o", "1o", "2o", "go", "ko"]
-  // We need to reshape to ngfhwd
-  ArrayRef<int64_t> startResultShape = rockResultType.getShape();
-  Value finalReshaped;
-  switch (convParams.type) {
-    case rock::LinalgConvType::Conv3dNgchwdGfchwd: {
-    rock::BottomUpTMBuilder resultBuilder(
-        rewriter, {"n", "h", "w", "d", "g", "f"}, startResultShape, loc);
-    resultBuilder.passThrough({"go", "fo"}, {1, 2}, {"g", "f"});
-    resultBuilder.passThrough({"no"}, {0}, {"n"});
-    resultBuilder.passThrough({"ho", "wo", "do"}, {3, 4, 5}, {"h", "w", "d"});
-    auto resultAttr = resultBuilder.get();
-    finalReshaped =
-        rock::TransformOp::create(rewriter, loc, cop.getResult(), resultAttr);
-    break;
-  }
-    case rock::LinalgConvType::Conv2dNgchwGfchw: {
-    finalReshaped = cop.getResult();
-    break;
-  }
-  case rock::LinalgConvType::Conv1dNgchGfch: {
-    rock::BottomUpTMBuilder resultBuilder(rewriter, {"n", "h", "w", "g", "f"},
-                                          startResultShape, loc);
-    resultBuilder.passThrough({"no"}, {0}, {"n"});
-    resultBuilder.passThrough({"go", "fo"}, {1, 2}, {"g", "f"});
-    resultBuilder.merge("ho", 3, {"h", "w"});
-    auto resultAttr = resultBuilder.get();
-    finalReshaped =
-        rock::TransformOp::create(rewriter, loc, cop.getResult(), resultAttr);
-    break;
-  }
-  default: {
-    return op.emitError("unimplemented final reshape");
-  }
-  }
-
-  rewriter.replaceOp(op, finalReshaped);
+  Value result =
+      transformOutput(rewriter, loc, cop.getResult(), conv.spatialDim);
+  rewriter.replaceOp(op, result);
   return success();
 }
 
diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir
new file mode 100644
index 000000000000..8ea0df3923f3
--- /dev/null
+++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir
@@ -0,0 +1,102 @@
+// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -rock-view-to-transform -verify-diagnostics --split-input-file | FileCheck %s
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7 * 2, d4 * 2 + d8 * 2, d5 * 2 + d9 * 2)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+
+// CHECK-LABEL: func.func @conv_3d(
+// CHECK: rock.conv({{.*}}) {dilations = [2 : index, 2 : index, 2 : index], filter_layout = ["g", "k", "0", "1", "2", "c"], input_layout = ["ni", "0i", "1i", "2i", "gi", "ci"], output_layout = ["no", "0o", "1o", "2o", "go", "ko"], padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index], strides = [2 : index, 2 : index, 2 : index]}
+func.func @conv_3d(%arg0: tensor<64xf32>, %arg1: tensor<750xf32>, %arg2: tensor<96xf32>) -> tensor<64xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+  %cst = arith.constant dense<0.000000e+00> : tensor<2x1x4x2x2x2xf32>
+  %expanded = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [2, 4, 2, 2, 2] : tensor<64xf32> into tensor<2x4x2x2x2xf32>
+  %expanded_0 = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4, 5]] output_shape [2, 1, 3, 5, 5, 5] : tensor<750xf32> into tensor<2x1x3x5x5x5xf32>
+  %expanded_1 = tensor.expand_shape %arg2 [[0, 1, 2, 3, 4, 5]] output_shape [1, 4, 3, 2, 2, 2] : tensor<96xf32> into tensor<1x4x3x2x2x2xf32>
+  %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_0, %expanded_1 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]} {
+  ^bb0(%in: f32, %in_3: f32, %out: f32):
+    %3 = arith.mulf %in, %in_3 : f32
+    %4 = arith.addf %out, %3 : f32
+    linalg.yield %4 : f32
+  } -> tensor<2x1x4x2x2x2xf32>
+  %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<2x1x4x2x2x2xf32> into tensor<2x4x2x2x2xf32>
+  %1 = tensor.empty() : tensor<2x4x2x2x2xf32>
+  %2 = linalg.add ins(%collapsed, %expanded : tensor<2x4x2x2x2xf32>, tensor<2x4x2x2x2xf32>) outs(%1 : tensor<2x4x2x2x2xf32>) -> tensor<2x4x2x2x2xf32>
+  %collapsed_2 = tensor.collapse_shape %2 [[0, 1, 2, 3, 4]] : tensor<2x4x2x2x2xf32> into tensor<64xf32>
+  return %collapsed_2 : tensor<64xf32>
+}
+
+// -----
+
+#map3 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 * 4 + d6 * 2, d4 * 5 + d7 * 3)>
+#map4 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)>
+#map5 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)>
+
+// CHECK-LABEL: func.func @conv_2d
+// CHECK: rock.conv({{.*}}) {dilations = [2 : index, 3 : index], filter_layout = ["g", "k", "c", "y", "x"], input_layout = ["ni", "gi", "ci", "hi", "wi"], output_layout = ["no", "go", "ko", "ho", "wo"], padding = [2 : index, 2 : index, 2 : index, 2 : index], strides = [4 : index, 5 : index]}
+func.func @conv_2d(%arg0: tensor<122016xf32>, %arg1: tensor<320xf32>) -> tensor<8208xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+  %cst = arith.constant dense<0.000000e+00> : tensor<2x2x4x27x19xf32>
+  %cst_0 = arith.constant 0.000000e+00 : f32
+  %expanded = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [2, 4, 123, 124] : tensor<122016xf32> into tensor<2x4x123x124xf32>
+  %padded = tensor.pad %expanded low[0, 0, 2, 2] high[0, 0, 2, 2] {
+  ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index):
+    tensor.yield %cst_0 : f32
+  } : tensor<2x4x123x124xf32> to tensor<2x4x127x128xf32>
+  %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4]] output_shape [2, 2, 2, 127, 128] : tensor<2x4x127x128xf32> into tensor<2x2x2x127x128xf32>
+  %expanded_2 = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [2, 4, 2, 4, 5] : tensor<320xf32> into tensor<2x4x2x4x5xf32>
+  %0 = linalg.generic {indexing_maps = [#map3, #map4, #map5], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<2x2x2x127x128xf32>, tensor<2x4x2x4x5xf32>) outs(%cst : tensor<2x2x4x27x19xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gfchw>, dilation = [2, 3], group = 2 : i64, pad = [2, 2, 2, 2], stride = [4, 5]} {
+  ^bb0(%in: f32, %in_3: f32, %out: f32):
+    %1 = arith.mulf %in, %in_3 : f32
+    %2 = arith.addf %out, %1 : f32
+    linalg.yield %2 : f32
+  } -> tensor<2x2x4x27x19xf32>
+  %collapsed = tensor.collapse_shape %0 [[0, 1, 2, 3, 4]] : tensor<2x2x4x27x19xf32> into tensor<8208xf32>
+  return %collapsed : tensor<8208xf32>
+}
+
+// -----
+
+#map6 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)>
+#map7 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
+#map8 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @conv_1d
+// CHECK: rock.conv({{.*}}) {dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "y", "x", "c"], input_layout = ["ni", "hi", "wi", "gi", "ci"], output_layout = ["no", "ho", "wo", "go", "ko"], padding = [3 : index, 3 : index, 0 : index, 0 : index], strides = [1 : index, 1 : index]}
+func.func @conv_1d(%arg0: tensor<14336xf32>, %arg1: tensor<672xf32>, %arg2: tensor<1344xf32>) -> tensor<14336xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+  %cst = arith.constant dense<0.000000e+00> : tensor<1x1x64x224xf32>
+  %cst_0 = arith.constant 0.000000e+00 : f32
+  %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [1, 3, 224] : tensor<672xf32> into tensor<1x3x224xf32>
+  %padded = tensor.pad %expanded low[0, 0, 3] high[0, 0, 3] {
+  ^bb0(%arg3: index, %arg4: index, %arg5: index):
+    tensor.yield %cst_0 : f32
+  } : tensor<1x3x224xf32> to tensor<1x3x230xf32>
+  %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3]] output_shape [1, 1, 3, 230] : tensor<1x3x230xf32> into tensor<1x1x3x230xf32>
+  %expanded_2 = tensor.expand_shape %arg2 [[0, 1, 2, 3]] output_shape [1, 64, 3, 7] : tensor<1344xf32> into tensor<1x64x3x7xf32>
+  %0 = linalg.generic {indexing_maps = [#map6, #map7, #map8], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x230xf32>, tensor<1x64x3x7xf32>) outs(%cst : tensor<1x1x64x224xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gfch>, dilation = [1], group = 1 : i64, pad = [3, 3], stride = [1]} {
+  ^bb0(%in: f32, %in_3: f32, %out: f32):
+    %1 = arith.mulf %in, %in_3 : f32
+    %2 = arith.addf %out, %1 : f32
+    linalg.yield %2 : f32
+  } -> tensor<1x1x64x224xf32>
+  %collapsed = tensor.collapse_shape %0 [[0, 1, 2, 3]] : tensor<1x1x64x224xf32> into tensor<14336xf32>
+  return %collapsed : tensor<14336xf32>
+}
+
+// -----
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7 * 2, d4 * 2 + d8 * 2, d5 * 2 + d9 * 2)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+
+// CHECK-LABEL: func.func @mlir_perf_config
+// CHECK: rock.conv({{.*}}) {dilations = [2 : index, 2 : index, 2 : index], filter_layout = ["g", "k", "0", "1", "2", "c"], input_layout = ["ni", "0i", "1i", "2i", "gi", "ci"], output_layout = ["no", "0o", "1o", "2o", "go", "ko"], padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", strides = [2 : index, 2 : index, 2 : index]}
+func.func @mlir_perf_config(%arg0: tensor<750xf32>, %arg1: tensor<96xf32>) -> tensor<64xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+  %cst = arith.constant dense<0.000000e+00> : tensor<2x1x4x2x2x2xf32>
+  %expanded = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4, 5]] output_shape [2, 1, 3, 5, 5, 5] : tensor<750xf32> into tensor<2x1x3x5x5x5xf32>
+  %expanded_0 = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4, 5]] output_shape [1, 4, 3, 2, 2, 2] : tensor<96xf32> into tensor<1x4x3x2x2x2xf32>
+  %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded, %expanded_0 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]} {
+  ^bb0(%in: f32, %in_1: f32, %out: f32):
+    %1 = arith.mulf %in, %in_1 : f32
+    %2 = arith.addf %out, %1 : f32
+    linalg.yield %2 : f32
+  } -> tensor<2x1x4x2x2x2xf32>
+  %collapsed = tensor.collapse_shape %0 [[0, 1, 2, 3, 4, 5]] : tensor<2x1x4x2x2x2xf32> into tensor<64xf32>
+  return %collapsed : tensor<64xf32>
+}

From 7abde9b04fab61db917d7a7048de3559bb5e5f5d Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Wed, 25 Feb 2026 15:58:12 +0000
Subject: [PATCH 4/8] Added comments

---
 mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp
index 11ed67331c0b..e4c4aded48a3 100644
--- a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp
+++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp
@@ -48,6 +48,8 @@ static void populateLinalgToRockDialectConversion(ConversionTarget &target) {
           return std::nullopt;
         }
 
+        // Convolution linalg.generic has reduction iteration type. It is not
+        // a legal operation in that case
         linalg::GenericOp castedOp = dyn_cast<linalg::GenericOp>(op);
         if (castedOp &&
             llvm::any_of(castedOp.getIteratorTypesArray(), [](auto type) {

From f58acdda833f1fc86de28d43c38fc8c1861de288 Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Wed, 4 Mar 2026 04:24:50 +0000
Subject: [PATCH 5/8] Simplify changes and removed all the rock.transforms

---
 .../Conversion/LinalgToRock/LinalgToRock.cpp  | 198 +++++-------------
 1 file changed, 50 insertions(+), 148 deletions(-)

diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
index 51a1a220d2e0..8b8a1c133815 100644
--- a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
+++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
@@ -166,59 +166,22 @@ static int64_t getSpatialDim(rock::LinalgConvType type) {
 }
 
 /// Set filter_layout, input_layout, and output_layout on a rock.conv op.
+/// Layouts match the linalg convention: GKC*, NGC*, NGK*.
 static void setConvLayoutAttrs(OpBuilder &builder, rock::ConvOp cop,
-                               rock::LinalgConvType type) {
-  auto set = [&](StringRef name, ArrayRef<StringRef> layout) {
-    cop->setAttr(name, builder.getStrArrayAttr(layout));
+                               int64_t spatialDim) {
+  auto *ctx = builder.getContext();
+  auto setLayout = [&](StringRef attrName, ArrayRef<StringRef> prefix,
+                       StringRef suffix) {
+    SmallVector<Attribute> layout;
+    for (StringRef dim : prefix)
+      layout.push_back(StringAttr::get(ctx, dim));
+    for (int64_t i = 0; i < spatialDim; ++i)
+      layout.push_back(StringAttr::get(ctx, Twine(i) + suffix));
+    cop->setAttr(attrName, builder.getArrayAttr(layout));
   };
-  switch (type) {
-  case rock::LinalgConvType::Conv3dNgchwdGfchwd:
-    set("filter_layout", {"g", "k", "0", "1", "2", "c"});
-    set("input_layout", {"ni", "0i", "1i", "2i", "gi", "ci"});
-    set("output_layout", {"no", "0o", "1o", "2o", "go", "ko"});
-    break;
-  case rock::LinalgConvType::Conv2dNgchwGfchw:
-    set("filter_layout", {"g", "k", "c", "y", "x"});
-    set("input_layout", {"ni", "gi", "ci", "hi", "wi"});
-    set("output_layout", {"no", "go", "ko", "ho", "wo"});
-    break;
-  case rock::LinalgConvType::Conv1dNgchGfch:
-    set("filter_layout", {"g", "k", "y", "x", "c"});
-    set("input_layout", {"ni", "hi", "wi", "gi", "ci"});
-    set("output_layout", {"no", "ho", "wo", "go", "ko"});
-    break;
-  }
-}
-
-/// Transform filter from GFC* layout to GF*C layout for rock.conv.
-/// 2D is already in the correct layout.
-static Value transformFilter(OpBuilder &builder, Location loc, Value filter,
-                             int64_t spatialDim) {
-  ArrayRef<int64_t> shape =
-      cast<RankedTensorType>(filter.getType()).getShape();
-  switch (spatialDim) {
-  case 3: {
-    rock::BottomUpTMBuilder b(builder, {"g", "f", "c", "h", "w", "d"}, shape,
-                              loc);
-    b.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"});
-    b.passThrough({"hk", "wk", "dk"}, {2, 3, 4}, {"h", "w", "d"});
-    b.passThrough({"ck"}, {5}, {"c"});
-    return rock::TransformOp::create(builder, loc, filter, b.get());
-  }
-  case 2:
-    return filter;
-  case 1: {
-    // Conv1D is expanded into Conv2D (matching migraphx-to-tosa): unmerge
-    // H into (H, W=1).
-    rock::BottomUpTMBuilder b(builder, {"g", "f", "c", "h"}, shape, loc);
-    b.passThrough({"gk", "fk"}, {0, 1}, {"g", "f"});
-    b.unmerge({"hk", "wk"}, {2, 3}, {"h"}, {shape[3], 1});
-    b.passThrough({"ck"}, {4}, {"c"});
-    return rock::TransformOp::create(builder, loc, filter, b.get());
-  }
-  default:
-    llvm_unreachable("unsupported spatial dim for filter transform");
-  }
+  setLayout("filter_layout", {"g", "k", "c"}, "");
+  setLayout("input_layout", {"ni", "gi", "ci"}, "i");
+  setLayout("output_layout", {"no", "go", "ko"}, "o");
 }
 
 /// Remove the tensor.pad + tensor.expand_shape pattern emitted by
@@ -274,85 +237,6 @@ removePaddingFromInput(ConversionPatternRewriter &rewriter,
   return result;
 }
 
-/// Transform input from NGC* layout to N*GC layout for rock.conv.
-/// 2D is already in the correct layout.
-static Value transformInput(OpBuilder &builder, Location loc, Value input,
-                            int64_t spatialDim) {
-  ArrayRef<int64_t> shape =
-      cast<RankedTensorType>(input.getType()).getShape();
-  switch (spatialDim) {
-  case 3: {
-    rock::BottomUpTMBuilder b(builder, {"n", "g", "c", "h", "w", "d"}, shape,
-                              loc);
-    b.passThrough({"ni"}, {0}, {"n"});
-    b.passThrough({"hi", "wi", "di"}, {1, 2, 3}, {"h", "w", "d"});
-    b.passThrough({"gi", "ci"}, {4, 5}, {"g", "c"});
-    return rock::TransformOp::create(builder, loc, input, b.get());
-  }
-  case 2:
-    return input;
-  case 1: {
-    // Conv1D is expanded into Conv2D (matching migraphx-to-tosa): unmerge
-    // H into (H, W=1).
-    int64_t h = shape[3];
-    rock::BottomUpTMBuilder b(builder, {"n", "g", "c", "h"}, shape, loc);
-    b.passThrough({"ni"}, {0}, {"n"});
-    b.unmerge({"hi", "wi"}, {1, 2}, {"h"}, {h, 1});
-    b.passThrough({"gi", "ci"}, {3, 4}, {"g", "c"});
-    return rock::TransformOp::create(builder, loc, input, b.get());
-  }
-  default:
-    llvm_unreachable("unsupported spatial dim for input transform");
-  }
-}
-
-/// Compute the rock output shape from the linalg output shape.
-/// Linalg layout is NGF* while rock needs N*GF (with extra W=1 for 1D).
-static SmallVector<int64_t, 6>
-computeRockOutputShape(ArrayRef<int64_t> linalgShape, int64_t spatialDim) {
-  if (spatialDim == 2)
-    return SmallVector<int64_t, 6>(linalgShape);
-  SmallVector<int64_t, 6> shape;
-  shape.push_back(linalgShape[0]);
-  shape.insert(shape.end(), std::next(linalgShape.begin(), 3),
-               linalgShape.end());
-  if (spatialDim == 1)
-    shape.push_back(1); // Conv1D expanded to Conv2D: extra W=1
-  shape.push_back(linalgShape[1]);
-  shape.push_back(linalgShape[2]);
-  return shape;
-}
-
-/// Transform rock.conv output back to the linalg output layout.
-/// 2D needs no transform.
-static Value transformOutput(OpBuilder &builder, Location loc, Value convResult,
-                             int64_t spatialDim) {
-  if (spatialDim == 2)
-    return convResult;
-  ArrayRef<int64_t> shape =
-      cast<RankedTensorType>(convResult.getType()).getShape();
-  switch (spatialDim) {
-  case 3: {
-    rock::BottomUpTMBuilder b(builder, {"n", "h", "w", "d", "g", "f"}, shape,
-                              loc);
-    b.passThrough({"go", "fo"}, {1, 2}, {"g", "f"});
-    b.passThrough({"no"}, {0}, {"n"});
-    b.passThrough({"ho", "wo", "do"}, {3, 4, 5}, {"h", "w", "d"});
-    return rock::TransformOp::create(builder, loc, convResult, b.get());
-  }
-  case 1: {
-    // Conv1D was expanded into Conv2D: merge (H, W=1) back into H.
-    rock::BottomUpTMBuilder b(builder, {"n", "h", "w", "g", "f"}, shape, loc);
-    b.passThrough({"no"}, {0}, {"n"});
-    b.passThrough({"go", "fo"}, {1, 2}, {"g", "f"});
-    b.merge("ho", 3, {"h", "w"});
-    return rock::TransformOp::create(builder, loc, convResult, b.get());
-  }
-  default:
-    llvm_unreachable("unsupported spatial dim for output transform");
-  }
-}
-
 namespace {
 struct ConvLinalgConverter final
     : public OpConversionPattern<linalg::GenericOp> {
@@ -373,9 +257,6 @@ struct ConvLinalgConverter final
 FailureOr<ConvFields>
 ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter,
                             linalg::GenericOp op) const {
-  // FIXME: In the future, strides, dilation, and padding can be extracted
-  // by matching the AffineExpr syntax tree. The convolution dimension and
-  // layout could also be inferred from the affine_map.
   auto name = op->getAttrOfType<rock::LinalgConvTypeAttr>("conv_op");
   if (!name)
     return failure();
@@ -388,9 +269,8 @@ ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter,
     llvm::transform(
         cast<ArrayAttr>(arr).getValue(), std::back_inserter(values),
         [](Attribute val) { return cast<IntegerAttr>(val).getInt(); });
-    // Conv1D is expanded into Conv2D to match the migraphx-to-tosa pipeline.
-    // Append identity defaults (stride=1, dilation=1, pad=0) for the extra
-    // spatial dimension.
+    // Conv1D is expanded into Conv2D: append identity defaults for the
+    // extra spatial dimension (stride=1, dilation=1, pad=0).
     if (spatialDim == 1)
       values.insert(values.end(), dimOneDefaults.begin(),
                     dimOneDefaults.end());
@@ -411,8 +291,7 @@ ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter,
     interleavedPad.push_back(originalPadding[i]);
     interleavedPad.push_back(originalPadding[numSpatial + i]);
   }
-  // For Conv1D is expanded into Conv2D like the tosa pipeline, so
-  // we set the last dimension have 0 padding to stay consistent.
+  // Conv1D is expanded into Conv2D
   if (spatialDim == 1) {
     interleavedPad.push_back(rewriter.getIndexAttr(0));
     interleavedPad.push_back(rewriter.getIndexAttr(0));
@@ -441,19 +320,36 @@ LogicalResult ConvLinalgConverter::matchAndRewrite(
   if (failed(maybeInput))
     return failure();
 
-  Value input = transformInput(rewriter, loc, *maybeInput, conv.spatialDim);
-  Value filter =
-      transformFilter(rewriter, loc, op.getOperand(1), conv.spatialDim);
+  Value input = *maybeInput;
+  Value filter = op.getOperand(1);
+
+  // Conv1D is expanded into Conv2D: unmerge the single spatial dim
+  // into (spatial, W=1) for filter and input.
+  int64_t effectiveSpatialDim = conv.spatialDim;
+  if (conv.spatialDim == 1) {
+    effectiveSpatialDim = 2;
+    auto filterShape = cast<RankedTensorType>(filter.getType()).getShape();
+    rock::BottomUpTMBuilder builder(rewriter, {"g", "k", "c", "0"}, filterShape, loc);
+    builder.passThrough({"gf", "kf", "cf"}, {0, 1, 2}, {"g", "k", "c"});
+    builder.unmerge({"0f", "1f"}, {3, 4}, "0", {filterShape[3], 1});
+    filter = rock::TransformOp::create(rewriter, loc, filter, builder.get());
+
+    auto inputShape = cast<RankedTensorType>(input.getType()).getShape();
+    rock::BottomUpTMBuilder b(rewriter, {"n", "g", "c", "0"}, inputShape, loc);
+    b.passThrough({"nu", "gu", "cu"}, {0, 1, 2}, {"n", "g", "c"});
+    b.unmerge({"0u", "1u"}, {3, 4}, "0", {inputShape[3], 1});
+    input = rock::TransformOp::create(rewriter, loc, input, b.get());
+  }
 
   RankedTensorType linalgResultType =
       cast<RankedTensorType>(op.getResult(0).getType());
-  SmallVector<int64_t, 6> rockShape =
-      computeRockOutputShape(linalgResultType.getShape(), conv.spatialDim);
+  SmallVector<int64_t> rockShape(linalgResultType.getShape());
+  if (conv.spatialDim == 1)
+    rockShape.push_back(1);
   RankedTensorType rockResultType =
       RankedTensorType::get(rockShape, linalgResultType.getElementType());
   Value output =
       bufferization::AllocTensorOp::create(rewriter, loc, rockResultType, {});
-
   auto cop = rock::ConvOp::create(rewriter, loc, rockResultType, filter, input,
                                   output, /*features=*/nullptr,
                                   /*blockSize=*/nullptr, /*gridSize=*/nullptr,
@@ -462,11 +358,17 @@ LogicalResult ConvLinalgConverter::matchAndRewrite(
   // TODO: add splitk
   if (conv.perfConfig)
     cop->setAttr("perf_config", conv.perfConfig);
+  setConvLayoutAttrs(rewriter, cop, effectiveSpatialDim);
+
+  Value result = cop.getResult();
+  if (conv.spatialDim == 1) {
+    auto shape = cast<RankedTensorType>(result.getType()).getShape();
+    rock::BottomUpTMBuilder b(rewriter, {"n", "g", "k", "0", "1"}, shape, loc);
+    b.passThrough({"no", "go", "ko"}, {0, 1, 2}, {"n", "g", "k"});
+    b.merge("0o", 3, {"0", "1"});
+    result = rock::TransformOp::create(rewriter, loc, result, b.get());
+  }
 
-  setConvLayoutAttrs(rewriter, cop, conv.type);
-
-  Value result =
-      transformOutput(rewriter, loc, cop.getResult(), conv.spatialDim);
   rewriter.replaceOp(op, result);
   return success();
 }

From 7de89e5948e8a421c1b17507814bb40a56d02751 Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Wed, 4 Mar 2026 16:44:10 +0000
Subject: [PATCH 6/8] Address comments and added more testcase

---
 .../Conversion/LinalgToRock/LinalgToRock.cpp  |  43 ++-
 .../LinalgToRock/LinalgToRockPass.cpp         |   8 +-
 .../LinalgToRock/linalg-to-rock-conv-1d.mlir  | 235 +++++++++++++++++
 .../LinalgToRock/linalg-to-rock-conv-2d.mlir  | 209 +++++++++++++++
 .../LinalgToRock/linalg-to-rock-conv-3d.mlir  | 247 ++++++++++++++++++
 .../LinalgToRock/linalg-to-rock-conv.mlir     | 102 --------
 .../LinalgToRock/linalg-to-rock-invalid.mlir  | 104 ++++++++
 7 files changed, 828 insertions(+), 120 deletions(-)
 create mode 100644 mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir
 create mode 100644 mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir
 create mode 100644 mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir
 delete mode 100644 mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir

diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
index 8b8a1c133815..926ec53f47d3 100644
--- a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
+++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
@@ -208,7 +208,7 @@ removePaddingFromInput(ConversionPatternRewriter &rewriter,
     return failure();
   }
   auto padded = expanded->getOperand(0).getDefiningOp<tensor::PadOp>();
-  if (!padded) {
+  if (!padded || !padded->hasOneUse()) {
     op.emitError("unexpected padding code structure");
     return failure();
   }
@@ -222,8 +222,8 @@ removePaddingFromInput(ConversionPatternRewriter &rewriter,
   // Padding is defined in pre-expand space. The spatial dims are at the
   // tail of both tensors (expand_shape only splits an earlier dim), so
   // align from the end.
-  for (int64_t i = numPadDims - 1, j = numExpandedDims - 1;
-       i >= 0 && j >= 0; --i, --j) {
+  for (int64_t i = numPadDims - 1, j = numExpandedDims - 1; i >= 0 && j >= 0;
+       --i, --j) {
     resultShape[j] -= (lowPad[i] + highPad[i]);
   }
 
@@ -262,9 +262,17 @@ ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter,
     return failure();
   rock::LinalgConvType convType = name.getValue();
   int64_t spatialDim = getSpatialDim(convType);
+  // Conv1D is broadcasted into Conv2D. To check for error, we 
+  // use effectiveDim instead because it one more stride/dilation 
+  // in the expanded dimension
+  int64_t effectiveDim = (spatialDim == 1) ? spatialDim + 1 : spatialDim;
 
   auto convertToArrayAttr =
       [&](Attribute arr, ArrayRef<int64_t> dimOneDefaults = {}) -> ArrayAttr {
+    if(!arr || !isa<ArrayAttr>(arr)){
+      return ArrayAttr {};
+    }
+
     SmallVector<int64_t, 4> values;
     llvm::transform(
         cast<ArrayAttr>(arr).getValue(), std::back_inserter(values),
@@ -272,19 +280,26 @@ ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter,
     // Conv1D is expanded into Conv2D: append identity defaults for the
     // extra spatial dimension (stride=1, dilation=1, pad=0).
     if (spatialDim == 1)
-      values.insert(values.end(), dimOneDefaults.begin(),
-                    dimOneDefaults.end());
+      values.insert(values.end(), dimOneDefaults.begin(), dimOneDefaults.end());
     return rewriter.getIndexArrayAttr(values);
   };
 
   auto dilation =
-      convertToArrayAttr(op->getAttr("dilation"), /*dimOneDefaults=*/1);
+      convertToArrayAttr(op->getAttr("dilation"), /*dimOneDefaults=*/{1});
   auto stride =
-      convertToArrayAttr(op->getAttr("stride"), /*dimOneDefaults=*/1);
+      convertToArrayAttr(op->getAttr("stride"), /*dimOneDefaults=*/{1});
+  if (!dilation || !stride || (int64_t)dilation.size() != effectiveDim || (int64_t)stride.size() != effectiveDim){
+    op.emitError("invalid dilation or stride");
+    return failure();
+  }
 
   // Input format:  [dim0_low, dim1_low, ..., dim0_high, dim1_high, ...]
   // Rock  format:  [dim0_low, dim0_high, dim1_low, dim1_high, ...]
-  auto originalPadding = convertToArrayAttr(op->getAttr("pad")).getValue();
+  auto originalPadding = convertToArrayAttr(op->getAttr("pad"));
+  if(!originalPadding){
+    op.emitError("no padding found");
+    return failure();
+  }
   int64_t numSpatial = originalPadding.size() / 2;
   SmallVector<Attribute, 8> interleavedPad;
   for (int64_t i = 0; i < numSpatial; ++i) {
@@ -297,12 +312,15 @@ ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter,
     interleavedPad.push_back(rewriter.getIndexAttr(0));
   }
   auto padding = rewriter.getArrayAttr(interleavedPad);
-  if (!padding || !dilation || !stride)
+  // note that Conv1D is expanded into Conv2D
+  if(effectiveDim*2 != (int64_t)padding.size()){
+    op.emitError("invalid number of padding");
     return failure();
+  }
 
   StringAttr perfConfig = op->getAttrOfType<StringAttr>("perf_config");
-  return ConvFields{convType, spatialDim, padding, stride, dilation,
-                    perfConfig};
+  return ConvFields{convType, spatialDim, padding,
+                    stride,   dilation,   perfConfig};
 }
 
 LogicalResult ConvLinalgConverter::matchAndRewrite(
@@ -329,7 +347,8 @@ LogicalResult ConvLinalgConverter::matchAndRewrite(
   if (conv.spatialDim == 1) {
     effectiveSpatialDim = 2;
     auto filterShape = cast<RankedTensorType>(filter.getType()).getShape();
-    rock::BottomUpTMBuilder builder(rewriter, {"g", "k", "c", "0"}, filterShape, loc);
+    rock::BottomUpTMBuilder builder(rewriter, {"g", "k", "c", "0"}, filterShape,
+                                    loc);
     builder.passThrough({"gf", "kf", "cf"}, {0, 1, 2}, {"g", "k", "c"});
     builder.unmerge({"0f", "1f"}, {3, 4}, "0", {filterShape[3], 1});
     filter = rock::TransformOp::create(rewriter, loc, filter, builder.get());
diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp
index e4c4aded48a3..7f3fb294abf7 100644
--- a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp
+++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp
@@ -48,13 +48,9 @@ static void populateLinalgToRockDialectConversion(ConversionTarget &target) {
           return std::nullopt;
         }
 
-        // Convolution linalg.generic has reduction iteration type. It is not
-        // a legal operation in that case
+        // Convolution has attributes.
         linalg::GenericOp castedOp = dyn_cast<linalg::GenericOp>(op);
-        if (castedOp &&
-            llvm::any_of(castedOp.getIteratorTypesArray(), [](auto type) {
-              return linalg::isReductionIterator(type);
-            })) {
+        if (castedOp && castedOp->hasAttr("conv_op")) {
           return false;
         }
 
diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir
new file mode 100644
index 000000000000..09ce6d9a36a7
--- /dev/null
+++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir
@@ -0,0 +1,235 @@
+// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -verify-diagnostics --split-input-file | FileCheck %s
+
+// Input: NCL = 1x3x10, Filter: FCL = 6x3x3
+// stride=1, dilation=1, padding=0, group=1
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @conv_1d_basic(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]]
+// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_1d_basic(%arg0: tensor<30xf32>, %arg1: tensor<54xf32>) -> tensor<48xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 3] : tensor<54xf32> into tensor<6x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 10] : tensor<30xf32> into tensor<1x3x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 10] : tensor<1x3x10xf32> into tensor<1x1x3x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gfch>, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x8xf32> into tensor<1x6x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x8xf32> into tensor<48xf32>
+    return %collapsed_3 : tensor<48xf32>
+  }
+}
+
+// -----
+
+// Input: NCL = 1x3x20, Filter: FCL = 6x3x3
+// stride=1, dilation=3, padding=0, group=1
+#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5 * 3)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @conv_1d_dilation(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]]
+// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]])
+// CHECK-SAME: dilations = [3 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_1d_dilation(%arg0: tensor<60xf32>, %arg1: tensor<54xf32>) -> tensor<84xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 3] : tensor<54xf32> into tensor<6x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 20] : tensor<60xf32> into tensor<1x3x20xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 20] : tensor<1x3x20xf32> into tensor<1x1x3x20xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x14xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x14xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gfch>, dilation = [3], group = 1 : i64, pad = [0, 0], stride = [1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x14xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x14xf32> into tensor<1x6x14xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x14xf32> into tensor<84xf32>
+    return %collapsed_3 : tensor<84xf32>
+  }
+}
+
+// -----
+
+// Input: NCL = 1x3x10, Filter: FCL = 6x3x5
+// stride=1, dilation=1, padding=[2,2], group=1
+#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @conv_1d_padding(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[expanded_3:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_1]]
+// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_3]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [2 : index, 2 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]]
+// CHECK-DAG: %[[collapsed_4:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_4]]
+module {
+  func.func @conv_1d_padding(%arg0: tensor<30xf32>, %arg1: tensor<90xf32>) -> tensor<60xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 5] : tensor<90xf32> into tensor<6x3x5xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 10] : tensor<30xf32> into tensor<1x3x10xf32>
+    %cst = arith.constant 0.000000e+00 : f32
+    %padded = tensor.pad %expanded_0 low[0, 0, 2] high[0, 0, 2] {
+    ^bb0(%arg2: index, %arg3: index, %arg4: index):
+      tensor.yield %cst : f32
+    } : tensor<1x3x10xf32> to tensor<1x3x14xf32>
+    %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3]] output_shape [1, 1, 3, 14] : tensor<1x3x14xf32> into tensor<1x1x3x14xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 5] : tensor<6x3x5xf32> into tensor<1x6x3x5xf32>
+    %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x14xf32>, tensor<1x6x3x5xf32>) outs(%cst_3 : tensor<1x1x6x10xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gfch>, dilation = [1], group = 1 : i64, pad = [2, 2], stride = [1]} {
+    ^bb0(%in: f32, %in_5: f32, %out: f32):
+      %1 = arith.mulf %in, %in_5 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x10xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x10xf32> into tensor<1x6x10xf32>
+    %collapsed_4 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x10xf32> into tensor<60xf32>
+    return %collapsed_4 : tensor<60xf32>
+  }
+}
+
+// -----
+
+// Input: NCL = 1x3x10, Filter: FCL = 6x3x3
+// stride=2, dilation=1, padding=0, group=1
+#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 * 2 + d5)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @conv_1d_stride(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]]
+// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [2 : index, 1 : index]
+// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_1d_stride(%arg0: tensor<30xf32>, %arg1: tensor<54xf32>) -> tensor<24xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 3] : tensor<54xf32> into tensor<6x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 10] : tensor<30xf32> into tensor<1x3x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 10] : tensor<1x3x10xf32> into tensor<1x1x3x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x4xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gfch>, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [2]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x4xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x4xf32> into tensor<1x6x4xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x4xf32> into tensor<24xf32>
+    return %collapsed_3 : tensor<24xf32>
+  }
+}
+
+// -----
+
+// Input: NCL = 1x6x10, Filter: F(C/G)L = 9x2x3 (group=3, C_per_group=2, F_per_group=3)
+// stride=1, dilation=1, padding=0, group=3
+#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @conv_1d_groups(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]]
+// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_1d_groups(%arg0: tensor<60xf32>, %arg1: tensor<54xf32>) -> tensor<72xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [9, 2, 3] : tensor<54xf32> into tensor<9x2x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 6, 10] : tensor<60xf32> into tensor<1x6x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 3, 2, 10] : tensor<1x6x10xf32> into tensor<1x3x2x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [3, 3, 2, 3] : tensor<9x2x3xf32> into tensor<3x3x2x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10xf32>, tensor<3x3x2x3xf32>) outs(%cst : tensor<1x3x3x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gfch>, dilation = [1], group = 3 : i64, pad = [0, 0], stride = [1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x3x3x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x3x3x8xf32> into tensor<1x9x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x9x8xf32> into tensor<72xf32>
+    return %collapsed_3 : tensor<72xf32>
+  }
+}
diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir
new file mode 100644
index 000000000000..174f877e8a04
--- /dev/null
+++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir
@@ -0,0 +1,209 @@
+// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -verify-diagnostics --split-input-file | FileCheck %s
+
+// CHECK-LABEL: func.func @conv_2d_basic(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6, d4 + d7)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)>
+module {
+  func.func @conv_2d_basic(%arg0: tensor<300xf32>, %arg1: tensor<162xf32>) -> tensor<384xf32> attributes {kernel, arch="##TOKEN_ARCH##"}{
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 10, 10] : tensor<300xf32> into tensor<1x3x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 10, 10] : tensor<1x3x10x10xf32> into tensor<1x1x3x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gfchw>, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x8x8xf32> into tensor<1x6x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x8x8xf32> into tensor<384xf32>
+    return %collapsed_3 : tensor<384xf32>
+  }
+}
+
+// -----
+
+// CHECK-LABEL: func.func @conv_2d_dilation(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [2 : index, 3 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6 * 2, d4 + d7 * 3)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)>
+module {
+  func.func @conv_2d_dilation(%arg0: tensor<1200xf32>, %arg1: tensor<162xf32>) -> tensor<1344xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 20, 20] : tensor<1200xf32> into tensor<1x3x20x20xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 20, 20] : tensor<1x3x20x20xf32> into tensor<1x1x3x20x20xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x16x14xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20x20xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x16x14xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gfchw>, dilation = [2, 3], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x16x14xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x16x14xf32> into tensor<1x6x16x14xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x16x14xf32> into tensor<1344xf32>
+    return %collapsed_3 : tensor<1344xf32>
+  }
+}
+
+// -----
+
+// CHECK-LABEL: func.func @conv_2d_padding(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[expanded_3:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_1]], %[[expanded_3]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [1 : index, 1 : index, 1 : index, 1 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_4:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_4]]
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6, d4 + d7)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)>
+module {
+  func.func @conv_2d_padding(%arg0: tensor<300xf32>, %arg1: tensor<162xf32>) -> tensor<600xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 10, 10] : tensor<300xf32> into tensor<1x3x10x10xf32>
+    %cst = arith.constant 0.000000e+00 : f32
+    %padded = tensor.pad %expanded_0 low[0, 0, 1, 1] high[0, 0, 1, 1] {
+    ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index):
+      tensor.yield %cst : f32
+    } : tensor<1x3x10x10xf32> to tensor<1x3x12x12xf32>
+    %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 12, 12] : tensor<1x3x12x12xf32> into tensor<1x1x3x12x12xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32>
+    %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10x10xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12xf32>, tensor<1x6x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gfchw>, dilation = [1, 1], group = 1 : i64, pad = [1, 1, 1, 1], stride = [1, 1]} {
+    ^bb0(%in: f32, %in_5: f32, %out: f32):
+      %1 = arith.mulf %in, %in_5 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x10x10xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x10x10xf32> into tensor<1x6x10x10xf32>
+    %collapsed_4 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x10x10xf32> into tensor<600xf32>
+    return %collapsed_4 : tensor<600xf32>
+  }
+}
+
+// -----
+
+// CHECK-LABEL: func.func @conv_2d_stride(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [2 : index, 3 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 * 2 + d6, d4 * 3 + d7)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)>
+module {
+  func.func @conv_2d_stride(%arg0: tensor<300xf32>, %arg1: tensor<162xf32>) -> tensor<72xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 10, 10] : tensor<300xf32> into tensor<1x3x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 10, 10] : tensor<1x3x10x10xf32> into tensor<1x1x3x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4x3xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x3xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gfchw>, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [2, 3]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x4x3xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x4x3xf32> into tensor<1x6x4x3xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x4x3xf32> into tensor<72xf32>
+    return %collapsed_3 : tensor<72xf32>
+  }
+}
+
+// -----
+
+// CHECK-LABEL: func.func @conv_2d_groups(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6, d4 + d7)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)>
+module {
+  func.func @conv_2d_groups(%arg0: tensor<600xf32>, %arg1: tensor<162xf32>) -> tensor<576xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [9, 2, 3, 3] : tensor<162xf32> into tensor<9x2x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 6, 10, 10] : tensor<600xf32> into tensor<1x6x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 3, 2, 10, 10] : tensor<1x6x10x10xf32> into tensor<1x3x2x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [3, 3, 2, 3, 3] : tensor<9x2x3x3xf32> into tensor<3x3x2x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8x8xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10xf32>, tensor<3x3x2x3x3xf32>) outs(%cst : tensor<1x3x3x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gfchw>, dilation = [1, 1], group = 3 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x3x3x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x3x3x8x8xf32> into tensor<1x9x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x9x8x8xf32> into tensor<576xf32>
+    return %collapsed_3 : tensor<576xf32>
+  }
+}
diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir
new file mode 100644
index 000000000000..d6a0c6773963
--- /dev/null
+++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir
@@ -0,0 +1,247 @@
+// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -verify-diagnostics --split-input-file | FileCheck %s
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-LABEL: func.func @conv_3d_basic(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_3d_basic(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x8x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32>
+    return %collapsed_3 : tensor<3072xf32>
+  }
+}
+
+// -----
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7 * 2, d4 + d8 * 2, d5 + d9 * 2)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-LABEL: func.func @conv_3d_dilation(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [2 : index, 2 : index, 2 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_3d_dilation(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<1296xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x6x6x6xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x6x6x6xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x6x6x6xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x6x6x6xf32> into tensor<1x6x6x6x6xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x6x6x6xf32> into tensor<1296xf32>
+    return %collapsed_3 : tensor<1296xf32>
+  }
+}
+
+// -----
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-LABEL: func.func @conv_3d_padding(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[expanded_3:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_1]], %[[expanded_3]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"]
+// CHECK-SAME: padding = [1 : index, 1 : index, 1 : index, 1 : index, 1 : index, 1 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_4:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_4]]
+module {
+  func.func @conv_3d_padding(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<6000xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %cst = arith.constant 0.000000e+00 : f32
+    %padded = tensor.pad %expanded_0 low[0, 0, 1, 1, 1] high[0, 0, 1, 1, 1] {
+    ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index):
+      tensor.yield %cst : f32
+    } : tensor<1x3x10x10x10xf32> to tensor<1x3x12x12x12xf32>
+    %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 12, 12, 12] : tensor<1x3x12x12x12xf32> into tensor<1x1x3x12x12x12xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10x10x10xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12x12xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10x10xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [1, 1, 1, 1, 1, 1], stride = [1, 1, 1]} {
+    ^bb0(%in: f32, %in_5: f32, %out: f32):
+      %1 = arith.mulf %in, %in_5 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x10x10x10xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x10x10x10xf32> into tensor<1x6x10x10x10xf32>
+    %collapsed_4 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x10x10x10xf32> into tensor<6000xf32>
+    return %collapsed_4 : tensor<6000xf32>
+  }
+}
+
+// -----
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7, d4 * 2 + d8, d5 * 2 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-LABEL: func.func @conv_3d_stride(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [2 : index, 2 : index, 2 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_3d_stride(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<384xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4x4x4xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x4x4xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x4x4x4xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x4x4x4xf32> into tensor<1x6x4x4x4xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x4x4x4xf32> into tensor<384xf32>
+    return %collapsed_3 : tensor<384xf32>
+  }
+}
+
+// -----
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-LABEL: func.func @conv_3d_groups(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_3d_groups(%arg0: tensor<6000xf32>, %arg1: tensor<486xf32>) -> tensor<4608xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [9, 2, 3, 3, 3] : tensor<486xf32> into tensor<9x2x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 6, 10, 10, 10] : tensor<6000xf32> into tensor<1x6x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 3, 2, 10, 10, 10] : tensor<1x6x10x10x10xf32> into tensor<1x3x2x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [3, 3, 2, 3, 3, 3] : tensor<9x2x3x3x3xf32> into tensor<3x3x2x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8x8x8xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10x10xf32>, tensor<3x3x2x3x3x3xf32>) outs(%cst : tensor<1x3x3x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 3 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x3x3x8x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x3x3x8x8x8xf32> into tensor<1x9x8x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x9x8x8x8xf32> into tensor<4608xf32>
+    return %collapsed_3 : tensor<4608xf32>
+  }
+}
+
+// -----
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7 * 2, d4 * 2 + d8 * 2, d5 * 2 + d9 * 2)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-LABEL: func.func @conv_3d_perf_config(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [2 : index, 2 : index, 2 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1"
+// CHECK-SAME: strides = [2 : index, 2 : index, 2 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_3d_perf_config(%arg0: tensor<750xf32>, %arg1: tensor<96xf32>) -> tensor<64xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [4, 3, 2, 2, 2] : tensor<96xf32> into tensor<4x3x2x2x2xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [2, 3, 5, 5, 5] : tensor<750xf32> into tensor<2x3x5x5x5xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [2, 1, 3, 5, 5, 5] : tensor<2x3x5x5x5xf32> into tensor<2x1x3x5x5x5xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 4, 3, 2, 2, 2] : tensor<4x3x2x2x2xf32> into tensor<1x4x3x2x2x2xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<2x1x4x2x2x2xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<2x1x4x2x2x2xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<2x1x4x2x2x2xf32> into tensor<2x4x2x2x2xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<2x4x2x2x2xf32> into tensor<64xf32>
+    return %collapsed_3 : tensor<64xf32>
+  }
+}
+
diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir
deleted file mode 100644
index 8ea0df3923f3..000000000000
--- a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv.mlir
+++ /dev/null
@@ -1,102 +0,0 @@
-// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -rock-view-to-transform -verify-diagnostics --split-input-file | FileCheck %s
-
-#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7 * 2, d4 * 2 + d8 * 2, d5 * 2 + d9 * 2)>
-#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
-#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
-
-// CHECK-LABEL: func.func @conv_3d(
-// CHECK: rock.conv({{.*}}) {dilations = [2 : index, 2 : index, 2 : index], filter_layout = ["g", "k", "0", "1", "2", "c"], input_layout = ["ni", "0i", "1i", "2i", "gi", "ci"], output_layout = ["no", "0o", "1o", "2o", "go", "ko"], padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index], strides = [2 : index, 2 : index, 2 : index]}
-func.func @conv_3d(%arg0: tensor<64xf32>, %arg1: tensor<750xf32>, %arg2: tensor<96xf32>) -> tensor<64xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
-  %cst = arith.constant dense<0.000000e+00> : tensor<2x1x4x2x2x2xf32>
-  %expanded = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [2, 4, 2, 2, 2] : tensor<64xf32> into tensor<2x4x2x2x2xf32>
-  %expanded_0 = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4, 5]] output_shape [2, 1, 3, 5, 5, 5] : tensor<750xf32> into tensor<2x1x3x5x5x5xf32>
-  %expanded_1 = tensor.expand_shape %arg2 [[0, 1, 2, 3, 4, 5]] output_shape [1, 4, 3, 2, 2, 2] : tensor<96xf32> into tensor<1x4x3x2x2x2xf32>
-  %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_0, %expanded_1 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]} {
-  ^bb0(%in: f32, %in_3: f32, %out: f32):
-    %3 = arith.mulf %in, %in_3 : f32
-    %4 = arith.addf %out, %3 : f32
-    linalg.yield %4 : f32
-  } -> tensor<2x1x4x2x2x2xf32>
-  %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<2x1x4x2x2x2xf32> into tensor<2x4x2x2x2xf32>
-  %1 = tensor.empty() : tensor<2x4x2x2x2xf32>
-  %2 = linalg.add ins(%collapsed, %expanded : tensor<2x4x2x2x2xf32>, tensor<2x4x2x2x2xf32>) outs(%1 : tensor<2x4x2x2x2xf32>) -> tensor<2x4x2x2x2xf32>
-  %collapsed_2 = tensor.collapse_shape %2 [[0, 1, 2, 3, 4]] : tensor<2x4x2x2x2xf32> into tensor<64xf32>
-  return %collapsed_2 : tensor<64xf32>
-}
-
-// -----
-
-#map3 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 * 4 + d6 * 2, d4 * 5 + d7 * 3)>
-#map4 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)>
-#map5 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)>
-
-// CHECK-LABEL: func.func @conv_2d
-// CHECK: rock.conv({{.*}}) {dilations = [2 : index, 3 : index], filter_layout = ["g", "k", "c", "y", "x"], input_layout = ["ni", "gi", "ci", "hi", "wi"], output_layout = ["no", "go", "ko", "ho", "wo"], padding = [2 : index, 2 : index, 2 : index, 2 : index], strides = [4 : index, 5 : index]}
-func.func @conv_2d(%arg0: tensor<122016xf32>, %arg1: tensor<320xf32>) -> tensor<8208xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
-  %cst = arith.constant dense<0.000000e+00> : tensor<2x2x4x27x19xf32>
-  %cst_0 = arith.constant 0.000000e+00 : f32
-  %expanded = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [2, 4, 123, 124] : tensor<122016xf32> into tensor<2x4x123x124xf32>
-  %padded = tensor.pad %expanded low[0, 0, 2, 2] high[0, 0, 2, 2] {
-  ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index):
-    tensor.yield %cst_0 : f32
-  } : tensor<2x4x123x124xf32> to tensor<2x4x127x128xf32>
-  %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4]] output_shape [2, 2, 2, 127, 128] : tensor<2x4x127x128xf32> into tensor<2x2x2x127x128xf32>
-  %expanded_2 = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [2, 4, 2, 4, 5] : tensor<320xf32> into tensor<2x4x2x4x5xf32>
-  %0 = linalg.generic {indexing_maps = [#map3, #map4, #map5], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<2x2x2x127x128xf32>, tensor<2x4x2x4x5xf32>) outs(%cst : tensor<2x2x4x27x19xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gfchw>, dilation = [2, 3], group = 2 : i64, pad = [2, 2, 2, 2], stride = [4, 5]} {
-  ^bb0(%in: f32, %in_3: f32, %out: f32):
-    %1 = arith.mulf %in, %in_3 : f32
-    %2 = arith.addf %out, %1 : f32
-    linalg.yield %2 : f32
-  } -> tensor<2x2x4x27x19xf32>
-  %collapsed = tensor.collapse_shape %0 [[0, 1, 2, 3, 4]] : tensor<2x2x4x27x19xf32> into tensor<8208xf32>
-  return %collapsed : tensor<8208xf32>
-}
-
-// -----
-
-#map6 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)>
-#map7 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
-#map8 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
-// CHECK-LABEL: func.func @conv_1d
-// CHECK: rock.conv({{.*}}) {dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "y", "x", "c"], input_layout = ["ni", "hi", "wi", "gi", "ci"], output_layout = ["no", "ho", "wo", "go", "ko"], padding = [3 : index, 3 : index, 0 : index, 0 : index], strides = [1 : index, 1 : index]}
-func.func @conv_1d(%arg0: tensor<14336xf32>, %arg1: tensor<672xf32>, %arg2: tensor<1344xf32>) -> tensor<14336xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
-  %cst = arith.constant dense<0.000000e+00> : tensor<1x1x64x224xf32>
-  %cst_0 = arith.constant 0.000000e+00 : f32
-  %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [1, 3, 224] : tensor<672xf32> into tensor<1x3x224xf32>
-  %padded = tensor.pad %expanded low[0, 0, 3] high[0, 0, 3] {
-  ^bb0(%arg3: index, %arg4: index, %arg5: index):
-    tensor.yield %cst_0 : f32
-  } : tensor<1x3x224xf32> to tensor<1x3x230xf32>
-  %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3]] output_shape [1, 1, 3, 230] : tensor<1x3x230xf32> into tensor<1x1x3x230xf32>
-  %expanded_2 = tensor.expand_shape %arg2 [[0, 1, 2, 3]] output_shape [1, 64, 3, 7] : tensor<1344xf32> into tensor<1x64x3x7xf32>
-  %0 = linalg.generic {indexing_maps = [#map6, #map7, #map8], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x230xf32>, tensor<1x64x3x7xf32>) outs(%cst : tensor<1x1x64x224xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gfch>, dilation = [1], group = 1 : i64, pad = [3, 3], stride = [1]} {
-  ^bb0(%in: f32, %in_3: f32, %out: f32):
-    %1 = arith.mulf %in, %in_3 : f32
-    %2 = arith.addf %out, %1 : f32
-    linalg.yield %2 : f32
-  } -> tensor<1x1x64x224xf32>
-  %collapsed = tensor.collapse_shape %0 [[0, 1, 2, 3]] : tensor<1x1x64x224xf32> into tensor<14336xf32>
-  return %collapsed : tensor<14336xf32>
-}
-
-// -----
-
-#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7 * 2, d4 * 2 + d8 * 2, d5 * 2 + d9 * 2)>
-#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
-#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
-
-// CHECK-LABEL: func.func @mlir_perf_config
-// CHECK: rock.conv({{.*}}) {dilations = [2 : index, 2 : index, 2 : index], filter_layout = ["g", "k", "0", "1", "2", "c"], input_layout = ["ni", "0i", "1i", "2i", "gi", "ci"], output_layout = ["no", "0o", "1o", "2o", "go", "ko"], padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", strides = [2 : index, 2 : index, 2 : index]}
-func.func @mlir_perf_config(%arg0: tensor<750xf32>, %arg1: tensor<96xf32>) -> tensor<64xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
-  %cst = arith.constant dense<0.000000e+00> : tensor<2x1x4x2x2x2xf32>
-  %expanded = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4, 5]] output_shape [2, 1, 3, 5, 5, 5] : tensor<750xf32> into tensor<2x1x3x5x5x5xf32>
-  %expanded_0 = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4, 5]] output_shape [1, 4, 3, 2, 2, 2] : tensor<96xf32> into tensor<1x4x3x2x2x2xf32>
-  %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded, %expanded_0 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]} {
-  ^bb0(%in: f32, %in_1: f32, %out: f32):
-    %1 = arith.mulf %in, %in_1 : f32
-    %2 = arith.addf %out, %1 : f32
-    linalg.yield %2 : f32
-  } -> tensor<2x1x4x2x2x2xf32>
-  %collapsed = tensor.collapse_shape %0 [[0, 1, 2, 3, 4, 5]] : tensor<2x1x4x2x2x2xf32> into tensor<64xf32>
-  return %collapsed : tensor<64xf32>
-}
diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir
index 1c4d30fef269..f291a4ac97bc 100644
--- a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir
+++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir
@@ -4,3 +4,107 @@
 func.func @no_kernel_attribute_test() {
   func.return
 }
+
+// -----
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+module {
+  func.func @conv_3d_no_padding(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
+    // expected-error @+2 {{no padding found}}
+    // expected-error @+1 {{failed to legalize operation}}
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 1 : i64, stride = [1, 1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x8x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32>
+    return %collapsed_3 : tensor<3072xf32>
+  }
+}
+
+// -----
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+module {
+  func.func @conv_3d_no_stride(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
+    // expected-error @+2 {{invalid dilation or stride}}
+    // expected-error @+1 {{failed to legalize operation}}
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x8x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32>
+    return %collapsed_3 : tensor<3072xf32>
+  }
+}
+
+// -----
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+module {
+  func.func @conv_3d_invalid_padding(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
+    // expected-error @+2 {{invalid number of padding}}
+    // expected-error @+1 {{failed to legalize operation}}
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x8x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32>
+    return %collapsed_3 : tensor<3072xf32>
+  }
+}
+
+// -----
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+module {
+  func.func @conv_3d_invalid_stride(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
+    // expected-error @+2 {{invalid dilation or stride}}
+    // expected-error @+1 {{failed to legalize operation}}
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x8x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32>
+    return %collapsed_3 : tensor<3072xf32>
+  }
+}

From ebb7a41667e32e4105c2ae4830472e37e7386ba2 Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Mon, 16 Mar 2026 13:14:55 +0000
Subject: [PATCH 7/8] Fixed rebase

---
 mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
index 926ec53f47d3..70277301bdc7 100644
--- a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
+++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
@@ -155,11 +155,11 @@ struct ConvFields {
 
 static int64_t getSpatialDim(rock::LinalgConvType type) {
   switch (type) {
-  case rock::LinalgConvType::Conv1dNgchGfch:
+  case rock::LinalgConvType::Conv1dNgchGkch:
     return 1;
-  case rock::LinalgConvType::Conv2dNgchwGfchw:
+  case rock::LinalgConvType::Conv2dNgchwGkchw:
     return 2;
-  case rock::LinalgConvType::Conv3dNgchwdGfchwd:
+  case rock::LinalgConvType::Conv3dNgchwdGkchwd:
     return 3;
   }
   llvm_unreachable("unknown LinalgConvType");

From 6225bdf4a9ec131b4f3a3d298fa49b36abeb4d8c Mon Sep 17 00:00:00 2001
From: Vincent <llvm@viceroygroup.ca>
Date: Mon, 16 Mar 2026 13:49:05 +0000
Subject: [PATCH 8/8] Fixed testcase after changing attributes to gf to gk

---
 .../LinalgToRock/linalg-to-rock-conv-1d.mlir         | 10 +++++-----
 .../LinalgToRock/linalg-to-rock-conv-2d.mlir         | 10 +++++-----
 .../LinalgToRock/linalg-to-rock-conv-3d.mlir         | 12 ++++++------
 .../LinalgToRock/linalg-to-rock-invalid.mlir         |  8 ++++----
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir
index 09ce6d9a36a7..7354f97fc676 100644
--- a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir
+++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir
@@ -33,7 +33,7 @@ module {
     %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 10] : tensor<1x3x10xf32> into tensor<1x1x3x10xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32>
     %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gfch>, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gkch>, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [1]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
@@ -79,7 +79,7 @@ module {
     %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 20] : tensor<1x3x20xf32> into tensor<1x1x3x20xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32>
     %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x14xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x14xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gfch>, dilation = [3], group = 1 : i64, pad = [0, 0], stride = [1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x14xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gkch>, dilation = [3], group = 1 : i64, pad = [0, 0], stride = [1]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
@@ -130,7 +130,7 @@ module {
     %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3]] output_shape [1, 1, 3, 14] : tensor<1x3x14xf32> into tensor<1x1x3x14xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 5] : tensor<6x3x5xf32> into tensor<1x6x3x5xf32>
     %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x14xf32>, tensor<1x6x3x5xf32>) outs(%cst_3 : tensor<1x1x6x10xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gfch>, dilation = [1], group = 1 : i64, pad = [2, 2], stride = [1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x14xf32>, tensor<1x6x3x5xf32>) outs(%cst_3 : tensor<1x1x6x10xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gkch>, dilation = [1], group = 1 : i64, pad = [2, 2], stride = [1]} {
     ^bb0(%in: f32, %in_5: f32, %out: f32):
       %1 = arith.mulf %in, %in_5 : f32
       %2 = arith.addf %out, %1 : f32
@@ -176,7 +176,7 @@ module {
     %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 10] : tensor<1x3x10xf32> into tensor<1x1x3x10xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32>
     %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x4xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gfch>, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [2]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x4xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gkch>, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [2]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
@@ -222,7 +222,7 @@ module {
     %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 3, 2, 10] : tensor<1x6x10xf32> into tensor<1x3x2x10xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [3, 3, 2, 3] : tensor<9x2x3xf32> into tensor<3x3x2x3xf32>
     %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10xf32>, tensor<3x3x2x3xf32>) outs(%cst : tensor<1x3x3x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gfch>, dilation = [1], group = 3 : i64, pad = [0, 0], stride = [1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10xf32>, tensor<3x3x2x3xf32>) outs(%cst : tensor<1x3x3x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gkch>, dilation = [1], group = 3 : i64, pad = [0, 0], stride = [1]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir
index 174f877e8a04..c6e301ebc876 100644
--- a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir
+++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir
@@ -27,7 +27,7 @@ module {
     %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 10, 10] : tensor<1x3x10x10xf32> into tensor<1x1x3x10x10xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32>
     %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gfchw>, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gkchw>, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
@@ -68,7 +68,7 @@ module {
     %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 20, 20] : tensor<1x3x20x20xf32> into tensor<1x1x3x20x20xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32>
     %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x16x14xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20x20xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x16x14xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gfchw>, dilation = [2, 3], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20x20xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x16x14xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gkchw>, dilation = [2, 3], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
@@ -114,7 +114,7 @@ module {
     %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 12, 12] : tensor<1x3x12x12xf32> into tensor<1x1x3x12x12xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32>
     %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10x10xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12xf32>, tensor<1x6x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gfchw>, dilation = [1, 1], group = 1 : i64, pad = [1, 1, 1, 1], stride = [1, 1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12xf32>, tensor<1x6x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gkchw>, dilation = [1, 1], group = 1 : i64, pad = [1, 1, 1, 1], stride = [1, 1]} {
     ^bb0(%in: f32, %in_5: f32, %out: f32):
       %1 = arith.mulf %in, %in_5 : f32
       %2 = arith.addf %out, %1 : f32
@@ -155,7 +155,7 @@ module {
     %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 10, 10] : tensor<1x3x10x10xf32> into tensor<1x1x3x10x10xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32>
     %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4x3xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x3xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gfchw>, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [2, 3]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x3xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gkchw>, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [2, 3]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
@@ -196,7 +196,7 @@ module {
     %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 3, 2, 10, 10] : tensor<1x6x10x10xf32> into tensor<1x3x2x10x10xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [3, 3, 2, 3, 3] : tensor<9x2x3x3xf32> into tensor<3x3x2x3x3xf32>
     %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8x8xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10xf32>, tensor<3x3x2x3x3xf32>) outs(%cst : tensor<1x3x3x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gfchw>, dilation = [1, 1], group = 3 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10xf32>, tensor<3x3x2x3x3xf32>) outs(%cst : tensor<1x3x3x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gkchw>, dilation = [1, 1], group = 3 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir
index d6a0c6773963..83deae7fa892 100644
--- a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir
+++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir
@@ -27,7 +27,7 @@ module {
     %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
     %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
@@ -67,7 +67,7 @@ module {
     %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
     %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x6x6x6xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x6x6x6xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x6x6x6xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
@@ -112,7 +112,7 @@ module {
     %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 12, 12, 12] : tensor<1x3x12x12x12xf32> into tensor<1x1x3x12x12x12xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
     %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10x10x10xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12x12xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10x10xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [1, 1, 1, 1, 1, 1], stride = [1, 1, 1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12x12xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10x10xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [1, 1, 1, 1, 1, 1], stride = [1, 1, 1]} {
     ^bb0(%in: f32, %in_5: f32, %out: f32):
       %1 = arith.mulf %in, %in_5 : f32
       %2 = arith.addf %out, %1 : f32
@@ -152,7 +152,7 @@ module {
     %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
     %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4x4x4xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x4x4xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x4x4xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
@@ -192,7 +192,7 @@ module {
     %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 3, 2, 10, 10, 10] : tensor<1x6x10x10x10xf32> into tensor<1x3x2x10x10x10xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [3, 3, 2, 3, 3, 3] : tensor<9x2x3x3x3xf32> into tensor<3x3x2x3x3x3xf32>
     %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8x8x8xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10x10xf32>, tensor<3x3x2x3x3x3xf32>) outs(%cst : tensor<1x3x3x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 3 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10x10xf32>, tensor<3x3x2x3x3x3xf32>) outs(%cst : tensor<1x3x3x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 3 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
@@ -233,7 +233,7 @@ module {
     %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [2, 1, 3, 5, 5, 5] : tensor<2x3x5x5x5xf32> into tensor<2x1x3x5x5x5xf32>
     %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 4, 3, 2, 2, 2] : tensor<4x3x2x2x2xf32> into tensor<1x4x3x2x2x2xf32>
     %cst = arith.constant dense<0.000000e+00> : tensor<2x1x4x2x2x2xf32>
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir
index f291a4ac97bc..2f009cb40031 100644
--- a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir
+++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir
@@ -19,7 +19,7 @@ module {
     %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
     // expected-error @+2 {{no padding found}}
     // expected-error @+1 {{failed to legalize operation}}
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 1 : i64, stride = [1, 1, 1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 1 : i64, stride = [1, 1, 1]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
@@ -45,7 +45,7 @@ module {
     %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
     // expected-error @+2 {{invalid dilation or stride}}
     // expected-error @+1 {{failed to legalize operation}}
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
@@ -71,7 +71,7 @@ module {
     %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
     // expected-error @+2 {{invalid number of padding}}
     // expected-error @+1 {{failed to legalize operation}}
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1, 1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1, 1]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32
@@ -97,7 +97,7 @@ module {
     %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
     // expected-error @+2 {{invalid dilation or stride}}
     // expected-error @+1 {{failed to legalize operation}}
-    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gfchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1]} {
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1]} {
     ^bb0(%in: f32, %in_4: f32, %out: f32):
       %1 = arith.mulf %in, %in_4 : f32
       %2 = arith.addf %out, %1 : f32