diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
index d0fb43bd5cf1..70277301bdc7 100644
--- a/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
+++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRock.cpp
@@ -14,6 +14,7 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Rock/IR/Rock.h"
+#include "mlir/Dialect/Rock/IR/TransformMapBuilder.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/PatternMatch.h"
@@ -139,8 +140,260 @@ LogicalResult MatmulConverter<LinalgMatOp>::matchAndRewrite(
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// ConvLinalgConverter: linalg.generic (conv) -> rock.conv
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct ConvFields {
+  rock::LinalgConvType type;
+  int64_t spatialDim;
+  ArrayAttr padding, stride, dilation;
+  StringAttr perfConfig;
+};
+} // namespace
+
+static int64_t getSpatialDim(rock::LinalgConvType type) {
+  switch (type) {
+  case rock::LinalgConvType::Conv1dNgchGkch:
+    return 1;
+  case rock::LinalgConvType::Conv2dNgchwGkchw:
+    return 2;
+  case rock::LinalgConvType::Conv3dNgchwdGkchwd:
+    return 3;
+  }
+  llvm_unreachable("unknown LinalgConvType");
+}
+
+/// Set filter_layout, input_layout, and output_layout on a rock.conv op.
+/// Layouts match the linalg convention: GKC*, NGC*, NGK*.
+static void setConvLayoutAttrs(OpBuilder &builder, rock::ConvOp cop,
+                               int64_t spatialDim) {
+  auto *ctx = builder.getContext();
+  auto setLayout = [&](StringRef attrName, ArrayRef<StringRef> prefix,
+                       StringRef suffix) {
+    SmallVector<Attribute> layout;
+    for (StringRef dim : prefix)
+      layout.push_back(StringAttr::get(ctx, dim));
+    for (int64_t i = 0; i < spatialDim; ++i)
+      layout.push_back(StringAttr::get(ctx, Twine(i) + suffix));
+    cop->setAttr(attrName, builder.getArrayAttr(layout));
+  };
+  setLayout("filter_layout", {"g", "k", "c"}, "");
+  setLayout("input_layout", {"ni", "gi", "ci"}, "i");
+  setLayout("output_layout", {"no", "go", "ko"}, "o");
+}
+
+/// Remove the tensor.pad + tensor.expand_shape pattern emitted by
+/// migraphx-to-linalg, replacing it with just tensor.expand_shape on the
+/// unpadded source. rock.conv handles padding internally.
+///
+/// Expected IR structure:
+///   %padded = tensor.pad %original ...
+///   %expanded = tensor.expand_shape %padded ...
+/// Replaced with:
+///   %expanded = tensor.expand_shape %original ...
+static FailureOr<Value>
+removePaddingFromInput(ConversionPatternRewriter &rewriter,
+                       linalg::GenericOp op, Value in, ArrayAttr padding) {
+  bool hasPadding = llvm::any_of(padding.getValue(), [](Attribute attr) {
+    return cast<IntegerAttr>(attr).getInt() != 0;
+  });
+  if (!hasPadding)
+    return in;
+
+  auto expanded = in.getDefiningOp<tensor::ExpandShapeOp>();
+  if (!expanded) {
+    op.emitError("unexpected padding code structure");
+    return failure();
+  }
+  auto padded = expanded->getOperand(0).getDefiningOp<tensor::PadOp>();
+  if (!padded || !padded->hasOneUse()) {
+    op.emitError("unexpected padding code structure");
+    return failure();
+  }
+
+  SmallVector<int64_t, 6> resultShape(expanded.getResultType().getShape());
+  auto lowPad = padded.getStaticLow();
+  auto highPad = padded.getStaticHigh();
+  int64_t numPadDims = lowPad.size();
+  int64_t numExpandedDims = resultShape.size();
+
+  // Padding is defined in pre-expand space. The spatial dims are at the
+  // tail of both tensors (expand_shape only splits an earlier dim), so
+  // align from the end.
+  for (int64_t i = numPadDims - 1, j = numExpandedDims - 1; i >= 0 && j >= 0;
+       --i, --j) {
+    resultShape[j] -= (lowPad[i] + highPad[i]);
+  }
+
+  RankedTensorType newResultType = RankedTensorType::get(
+      resultShape, padded.getResultType().getElementType());
+  Value result = tensor::ExpandShapeOp::create(
+      rewriter, expanded.getLoc(), newResultType, padded.getOperand(0),
+      expanded.getReassociationIndices());
+  rewriter.replaceOp(expanded, result);
+  rewriter.eraseOp(padded);
+  return result;
+}
+
+namespace {
+struct ConvLinalgConverter final
+    : public OpConversionPattern<linalg::GenericOp> {
+  using OpConversionPattern<linalg::GenericOp>::OpConversionPattern;
+  using OpConversionPattern<linalg::GenericOp>::getTypeConverter;
+  using OpAdaptor = typename OpConversionPattern<linalg::GenericOp>::OpAdaptor;
+
+  LogicalResult
+  matchAndRewrite(linalg::GenericOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override;
+
+private:
+  FailureOr<ConvFields> isConv(ConversionPatternRewriter &rewriter,
+                               linalg::GenericOp op) const;
+};
+} // namespace
+
+FailureOr<ConvFields>
+ConvLinalgConverter::isConv(ConversionPatternRewriter &rewriter,
+                            linalg::GenericOp op) const {
+  auto name = op->getAttrOfType<rock::LinalgConvTypeAttr>("conv_op");
+  if (!name)
+    return failure();
+  rock::LinalgConvType convType = name.getValue();
+  int64_t spatialDim = getSpatialDim(convType);
+  // Conv1D is broadcasted into Conv2D. To check for error, we 
+  // use effectiveDim instead because it one more stride/dilation 
+  // in the expanded dimension
+  int64_t effectiveDim = (spatialDim == 1) ? spatialDim + 1 : spatialDim;
+
+  auto convertToArrayAttr =
+      [&](Attribute arr, ArrayRef<int64_t> dimOneDefaults = {}) -> ArrayAttr {
+    if(!arr || !isa<ArrayAttr>(arr)){
+      return ArrayAttr {};
+    }
+
+    SmallVector<int64_t, 4> values;
+    llvm::transform(
+        cast<ArrayAttr>(arr).getValue(), std::back_inserter(values),
+        [](Attribute val) { return cast<IntegerAttr>(val).getInt(); });
+    // Conv1D is expanded into Conv2D: append identity defaults for the
+    // extra spatial dimension (stride=1, dilation=1, pad=0).
+    if (spatialDim == 1)
+      values.insert(values.end(), dimOneDefaults.begin(), dimOneDefaults.end());
+    return rewriter.getIndexArrayAttr(values);
+  };
+
+  auto dilation =
+      convertToArrayAttr(op->getAttr("dilation"), /*dimOneDefaults=*/{1});
+  auto stride =
+      convertToArrayAttr(op->getAttr("stride"), /*dimOneDefaults=*/{1});
+  if (!dilation || !stride || (int64_t)dilation.size() != effectiveDim || (int64_t)stride.size() != effectiveDim){
+    op.emitError("invalid dilation or stride");
+    return failure();
+  }
+
+  // Input format:  [dim0_low, dim1_low, ..., dim0_high, dim1_high, ...]
+  // Rock  format:  [dim0_low, dim0_high, dim1_low, dim1_high, ...]
+  auto originalPadding = convertToArrayAttr(op->getAttr("pad"));
+  if(!originalPadding){
+    op.emitError("no padding found");
+    return failure();
+  }
+  int64_t numSpatial = originalPadding.size() / 2;
+  SmallVector<Attribute, 8> interleavedPad;
+  for (int64_t i = 0; i < numSpatial; ++i) {
+    interleavedPad.push_back(originalPadding[i]);
+    interleavedPad.push_back(originalPadding[numSpatial + i]);
+  }
+  // Conv1D is expanded into Conv2D
+  if (spatialDim == 1) {
+    interleavedPad.push_back(rewriter.getIndexAttr(0));
+    interleavedPad.push_back(rewriter.getIndexAttr(0));
+  }
+  auto padding = rewriter.getArrayAttr(interleavedPad);
+  // note that Conv1D is expanded into Conv2D
+  if(effectiveDim*2 != (int64_t)padding.size()){
+    op.emitError("invalid number of padding");
+    return failure();
+  }
+
+  StringAttr perfConfig = op->getAttrOfType<StringAttr>("perf_config");
+  return ConvFields{convType, spatialDim, padding,
+                    stride,   dilation,   perfConfig};
+}
+
+LogicalResult ConvLinalgConverter::matchAndRewrite(
+    linalg::GenericOp op, OpAdaptor adaptor,
+    ConversionPatternRewriter &rewriter) const {
+  FailureOr<ConvFields> maybeConv = isConv(rewriter, op);
+  if (failed(maybeConv))
+    return failure();
+
+  ConvFields conv = *maybeConv;
+  Location loc = op.getLoc();
+
+  auto maybeInput =
+      removePaddingFromInput(rewriter, op, op.getOperand(0), conv.padding);
+  if (failed(maybeInput))
+    return failure();
+
+  Value input = *maybeInput;
+  Value filter = op.getOperand(1);
+
+  // Conv1D is expanded into Conv2D: unmerge the single spatial dim
+  // into (spatial, W=1) for filter and input.
+  int64_t effectiveSpatialDim = conv.spatialDim;
+  if (conv.spatialDim == 1) {
+    effectiveSpatialDim = 2;
+    auto filterShape = cast<RankedTensorType>(filter.getType()).getShape();
+    rock::BottomUpTMBuilder builder(rewriter, {"g", "k", "c", "0"}, filterShape,
+                                    loc);
+    builder.passThrough({"gf", "kf", "cf"}, {0, 1, 2}, {"g", "k", "c"});
+    builder.unmerge({"0f", "1f"}, {3, 4}, "0", {filterShape[3], 1});
+    filter = rock::TransformOp::create(rewriter, loc, filter, builder.get());
+
+    auto inputShape = cast<RankedTensorType>(input.getType()).getShape();
+    rock::BottomUpTMBuilder b(rewriter, {"n", "g", "c", "0"}, inputShape, loc);
+    b.passThrough({"nu", "gu", "cu"}, {0, 1, 2}, {"n", "g", "c"});
+    b.unmerge({"0u", "1u"}, {3, 4}, "0", {inputShape[3], 1});
+    input = rock::TransformOp::create(rewriter, loc, input, b.get());
+  }
+
+  RankedTensorType linalgResultType =
+      cast<RankedTensorType>(op.getResult(0).getType());
+  SmallVector<int64_t> rockShape(linalgResultType.getShape());
+  if (conv.spatialDim == 1)
+    rockShape.push_back(1);
+  RankedTensorType rockResultType =
+      RankedTensorType::get(rockShape, linalgResultType.getElementType());
+  Value output =
+      bufferization::AllocTensorOp::create(rewriter, loc, rockResultType, {});
+  auto cop = rock::ConvOp::create(rewriter, loc, rockResultType, filter, input,
+                                  output, /*features=*/nullptr,
+                                  /*blockSize=*/nullptr, /*gridSize=*/nullptr,
+                                  conv.padding, conv.stride, conv.dilation,
+                                  /*params=*/nullptr);
+  // TODO: add splitk
+  if (conv.perfConfig)
+    cop->setAttr("perf_config", conv.perfConfig);
+  setConvLayoutAttrs(rewriter, cop, effectiveSpatialDim);
+
+  Value result = cop.getResult();
+  if (conv.spatialDim == 1) {
+    auto shape = cast<RankedTensorType>(result.getType()).getShape();
+    rock::BottomUpTMBuilder b(rewriter, {"n", "g", "k", "0", "1"}, shape, loc);
+    b.passThrough({"no", "go", "ko"}, {0, 1, 2}, {"n", "g", "k"});
+    b.merge("0o", 3, {"0", "1"});
+    result = rock::TransformOp::create(rewriter, loc, result, b.get());
+  }
+
+  rewriter.replaceOp(op, result);
+  return success();
+}
+
 void mlir::rock::populateLinalgToRockConversionPattern(
     RewritePatternSet &pattern, MLIRContext *context) {
   pattern.add<MatmulConverter<linalg::BatchMatmulOp>,
-              MatmulConverter<linalg::MatmulOp>>(context);
+              MatmulConverter<linalg::MatmulOp>, ConvLinalgConverter>(context);
 }
diff --git a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp
index f7148fe81d01..7f3fb294abf7 100644
--- a/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp
+++ b/mlir/lib/Conversion/LinalgToRock/LinalgToRockPass.cpp
@@ -47,8 +47,15 @@ static void populateLinalgToRockDialectConversion(ConversionTarget &target) {
         if (!linalgOp) {
           return std::nullopt;
         }
-        return linalg::isElementwise(linalgOp) || isa<linalg::GenericOp>(op) ||
-               isa<linalg::YieldOp>(op);
+
+        // Convolution has attributes.
+        linalg::GenericOp castedOp = dyn_cast<linalg::GenericOp>(op);
+        if (castedOp && castedOp->hasAttr("conv_op")) {
+          return false;
+        }
+
+        return linalg::isElementwise(linalgOp) || isa<linalg::YieldOp>(op) ||
+               castedOp;
       });
 }
 
diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir
new file mode 100644
index 000000000000..7354f97fc676
--- /dev/null
+++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-1d.mlir
@@ -0,0 +1,235 @@
+// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -verify-diagnostics --split-input-file | FileCheck %s
+
+// Input: NCL = 1x3x10, Filter: FCL = 6x3x3
+// stride=1, dilation=1, padding=0, group=1
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @conv_1d_basic(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]]
+// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_1d_basic(%arg0: tensor<30xf32>, %arg1: tensor<54xf32>) -> tensor<48xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 3] : tensor<54xf32> into tensor<6x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 10] : tensor<30xf32> into tensor<1x3x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 10] : tensor<1x3x10xf32> into tensor<1x1x3x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gkch>, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x8xf32> into tensor<1x6x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x8xf32> into tensor<48xf32>
+    return %collapsed_3 : tensor<48xf32>
+  }
+}
+
+// -----
+
+// Input: NCL = 1x3x20, Filter: FCL = 6x3x3
+// stride=1, dilation=3, padding=0, group=1
+#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5 * 3)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @conv_1d_dilation(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]]
+// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]])
+// CHECK-SAME: dilations = [3 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_1d_dilation(%arg0: tensor<60xf32>, %arg1: tensor<54xf32>) -> tensor<84xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 3] : tensor<54xf32> into tensor<6x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 20] : tensor<60xf32> into tensor<1x3x20xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 20] : tensor<1x3x20xf32> into tensor<1x1x3x20xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x14xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x14xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gkch>, dilation = [3], group = 1 : i64, pad = [0, 0], stride = [1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x14xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x14xf32> into tensor<1x6x14xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x14xf32> into tensor<84xf32>
+    return %collapsed_3 : tensor<84xf32>
+  }
+}
+
+// -----
+
+// Input: NCL = 1x3x10, Filter: FCL = 6x3x5
+// stride=1, dilation=1, padding=[2,2], group=1
+#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @conv_1d_padding(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[expanded_3:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_1]]
+// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_3]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [2 : index, 2 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]]
+// CHECK-DAG: %[[collapsed_4:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_4]]
+module {
+  func.func @conv_1d_padding(%arg0: tensor<30xf32>, %arg1: tensor<90xf32>) -> tensor<60xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 5] : tensor<90xf32> into tensor<6x3x5xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 10] : tensor<30xf32> into tensor<1x3x10xf32>
+    %cst = arith.constant 0.000000e+00 : f32
+    %padded = tensor.pad %expanded_0 low[0, 0, 2] high[0, 0, 2] {
+    ^bb0(%arg2: index, %arg3: index, %arg4: index):
+      tensor.yield %cst : f32
+    } : tensor<1x3x10xf32> to tensor<1x3x14xf32>
+    %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3]] output_shape [1, 1, 3, 14] : tensor<1x3x14xf32> into tensor<1x1x3x14xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 5] : tensor<6x3x5xf32> into tensor<1x6x3x5xf32>
+    %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x14xf32>, tensor<1x6x3x5xf32>) outs(%cst_3 : tensor<1x1x6x10xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gkch>, dilation = [1], group = 1 : i64, pad = [2, 2], stride = [1]} {
+    ^bb0(%in: f32, %in_5: f32, %out: f32):
+      %1 = arith.mulf %in, %in_5 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x10xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x10xf32> into tensor<1x6x10xf32>
+    %collapsed_4 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x10xf32> into tensor<60xf32>
+    return %collapsed_4 : tensor<60xf32>
+  }
+}
+
+// -----
+
+// Input: NCL = 1x3x10, Filter: FCL = 6x3x3
+// stride=2, dilation=1, padding=0, group=1
+#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 * 2 + d5)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @conv_1d_stride(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]]
+// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [2 : index, 1 : index]
+// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_1d_stride(%arg0: tensor<30xf32>, %arg1: tensor<54xf32>) -> tensor<24xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [6, 3, 3] : tensor<54xf32> into tensor<6x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 3, 10] : tensor<30xf32> into tensor<1x3x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 1, 3, 10] : tensor<1x3x10xf32> into tensor<1x1x3x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [1, 6, 3, 3] : tensor<6x3x3xf32> into tensor<1x6x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10xf32>, tensor<1x6x3x3xf32>) outs(%cst : tensor<1x1x6x4xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gkch>, dilation = [1], group = 1 : i64, pad = [0, 0], stride = [2]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x4xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x1x6x4xf32> into tensor<1x6x4xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x6x4xf32> into tensor<24xf32>
+    return %collapsed_3 : tensor<24xf32>
+  }
+}
+
+// -----
+
+// Input: NCL = 1x6x10, Filter: F(C/G)L = 9x2x3 (group=3, C_per_group=2, F_per_group=3)
+// stride=1, dilation=1, padding=0, group=3
+#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3 + d5)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
+// CHECK-LABEL: func.func @conv_1d_groups(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[t0:.*]] = rock.transform %[[expanded_2]]
+// CHECK-DAG: %[[t1:.*]] = rock.transform %[[expanded_1]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[t0]], %[[t1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[t2:.*]] = rock.transform %[[conv]]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[t2]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_1d_groups(%arg0: tensor<60xf32>, %arg1: tensor<54xf32>) -> tensor<72xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2]] output_shape [9, 2, 3] : tensor<54xf32> into tensor<9x2x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2]] output_shape [1, 6, 10] : tensor<60xf32> into tensor<1x6x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3]] output_shape [1, 3, 2, 10] : tensor<1x6x10xf32> into tensor<1x3x2x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3]] output_shape [3, 3, 2, 3] : tensor<9x2x3xf32> into tensor<3x3x2x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10xf32>, tensor<3x3x2x3xf32>) outs(%cst : tensor<1x3x3x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv1d_ngch_gkch>, dilation = [1], group = 3 : i64, pad = [0, 0], stride = [1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x3x3x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3]] : tensor<1x3x3x8xf32> into tensor<1x9x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2]] : tensor<1x9x8xf32> into tensor<72xf32>
+    return %collapsed_3 : tensor<72xf32>
+  }
+}
diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir
new file mode 100644
index 000000000000..c6e301ebc876
--- /dev/null
+++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-2d.mlir
@@ -0,0 +1,209 @@
+// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -verify-diagnostics --split-input-file | FileCheck %s
+
+// CHECK-LABEL: func.func @conv_2d_basic(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6, d4 + d7)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)>
+module {
+  func.func @conv_2d_basic(%arg0: tensor<300xf32>, %arg1: tensor<162xf32>) -> tensor<384xf32> attributes {kernel, arch="##TOKEN_ARCH##"}{
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 10, 10] : tensor<300xf32> into tensor<1x3x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 10, 10] : tensor<1x3x10x10xf32> into tensor<1x1x3x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gkchw>, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x8x8xf32> into tensor<1x6x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x8x8xf32> into tensor<384xf32>
+    return %collapsed_3 : tensor<384xf32>
+  }
+}
+
+// -----
+
+// CHECK-LABEL: func.func @conv_2d_dilation(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [2 : index, 3 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6 * 2, d4 + d7 * 3)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)>
+module {
+  func.func @conv_2d_dilation(%arg0: tensor<1200xf32>, %arg1: tensor<162xf32>) -> tensor<1344xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 20, 20] : tensor<1200xf32> into tensor<1x3x20x20xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 20, 20] : tensor<1x3x20x20xf32> into tensor<1x1x3x20x20xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x16x14xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x20x20xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x16x14xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gkchw>, dilation = [2, 3], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x16x14xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x16x14xf32> into tensor<1x6x16x14xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x16x14xf32> into tensor<1344xf32>
+    return %collapsed_3 : tensor<1344xf32>
+  }
+}
+
+// -----
+
+// CHECK-LABEL: func.func @conv_2d_padding(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[expanded_3:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_1]], %[[expanded_3]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [1 : index, 1 : index, 1 : index, 1 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_4:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_4]]
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6, d4 + d7)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)>
+module {
+  func.func @conv_2d_padding(%arg0: tensor<300xf32>, %arg1: tensor<162xf32>) -> tensor<600xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 10, 10] : tensor<300xf32> into tensor<1x3x10x10xf32>
+    %cst = arith.constant 0.000000e+00 : f32
+    %padded = tensor.pad %expanded_0 low[0, 0, 1, 1] high[0, 0, 1, 1] {
+    ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index):
+      tensor.yield %cst : f32
+    } : tensor<1x3x10x10xf32> to tensor<1x3x12x12xf32>
+    %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 12, 12] : tensor<1x3x12x12xf32> into tensor<1x1x3x12x12xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32>
+    %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10x10xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12xf32>, tensor<1x6x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gkchw>, dilation = [1, 1], group = 1 : i64, pad = [1, 1, 1, 1], stride = [1, 1]} {
+    ^bb0(%in: f32, %in_5: f32, %out: f32):
+      %1 = arith.mulf %in, %in_5 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x10x10xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x10x10xf32> into tensor<1x6x10x10xf32>
+    %collapsed_4 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x10x10xf32> into tensor<600xf32>
+    return %collapsed_4 : tensor<600xf32>
+  }
+}
+
+// -----
+
+// CHECK-LABEL: func.func @conv_2d_stride(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [2 : index, 3 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 * 2 + d6, d4 * 3 + d7)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)>
+module {
+  func.func @conv_2d_stride(%arg0: tensor<300xf32>, %arg1: tensor<162xf32>) -> tensor<72xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [6, 3, 3, 3] : tensor<162xf32> into tensor<6x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 3, 10, 10] : tensor<300xf32> into tensor<1x3x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 1, 3, 10, 10] : tensor<1x3x10x10xf32> into tensor<1x1x3x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [1, 6, 3, 3, 3] : tensor<6x3x3x3xf32> into tensor<1x6x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4x3xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10xf32>, tensor<1x6x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x3xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gkchw>, dilation = [1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [2, 3]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x4x3xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x1x6x4x3xf32> into tensor<1x6x4x3xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x6x4x3xf32> into tensor<72xf32>
+    return %collapsed_3 : tensor<72xf32>
+  }
+}
+
+// -----
+
+// CHECK-LABEL: func.func @conv_2d_groups(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d5, d3 + d6, d4 + d7)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d1, d2, d5, d6, d7)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4)>
+module {
+  func.func @conv_2d_groups(%arg0: tensor<600xf32>, %arg1: tensor<162xf32>) -> tensor<576xf32> attributes {kernel, arch="##TOKEN_ARCH##"} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3]] output_shape [9, 2, 3, 3] : tensor<162xf32> into tensor<9x2x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3]] output_shape [1, 6, 10, 10] : tensor<600xf32> into tensor<1x6x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4]] output_shape [1, 3, 2, 10, 10] : tensor<1x6x10x10xf32> into tensor<1x3x2x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4]] output_shape [3, 3, 2, 3, 3] : tensor<9x2x3x3xf32> into tensor<3x3x2x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8x8xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10xf32>, tensor<3x3x2x3x3xf32>) outs(%cst : tensor<1x3x3x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv2d_ngchw_gkchw>, dilation = [1, 1], group = 3 : i64, pad = [0, 0, 0, 0], stride = [1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x3x3x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4]] : tensor<1x3x3x8x8xf32> into tensor<1x9x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3]] : tensor<1x9x8x8xf32> into tensor<576xf32>
+    return %collapsed_3 : tensor<576xf32>
+  }
+}
diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir
new file mode 100644
index 000000000000..83deae7fa892
--- /dev/null
+++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-conv-3d.mlir
@@ -0,0 +1,247 @@
+// RUN: sed s/##TOKEN_ARCH##/%arch/g %s | rocmlir-opt --linalg-to-rock -verify-diagnostics --split-input-file | FileCheck %s
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-LABEL: func.func @conv_3d_basic(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_3d_basic(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x8x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32>
+    return %collapsed_3 : tensor<3072xf32>
+  }
+}
+
+// -----
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7 * 2, d4 + d8 * 2, d5 + d9 * 2)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-LABEL: func.func @conv_3d_dilation(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [2 : index, 2 : index, 2 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_3d_dilation(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<1296xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x6x6x6xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x6x6x6xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x6x6x6xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x6x6x6xf32> into tensor<1x6x6x6x6xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x6x6x6xf32> into tensor<1296xf32>
+    return %collapsed_3 : tensor<1296xf32>
+  }
+}
+
+// -----
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-LABEL: func.func @conv_3d_padding(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[expanded_3:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_1]], %[[expanded_3]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"]
+// CHECK-SAME: padding = [1 : index, 1 : index, 1 : index, 1 : index, 1 : index, 1 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_4:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_4]]
+module {
+  func.func @conv_3d_padding(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<6000xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %cst = arith.constant 0.000000e+00 : f32
+    %padded = tensor.pad %expanded_0 low[0, 0, 1, 1, 1] high[0, 0, 1, 1, 1] {
+    ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index):
+      tensor.yield %cst : f32
+    } : tensor<1x3x10x10x10xf32> to tensor<1x3x12x12x12xf32>
+    %expanded_1 = tensor.expand_shape %padded [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 12, 12, 12] : tensor<1x3x12x12x12xf32> into tensor<1x1x3x12x12x12xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst_3 = arith.constant dense<0.000000e+00> : tensor<1x1x6x10x10x10xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x12x12x12xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst_3 : tensor<1x1x6x10x10x10xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [1, 1, 1, 1, 1, 1], stride = [1, 1, 1]} {
+    ^bb0(%in: f32, %in_5: f32, %out: f32):
+      %1 = arith.mulf %in, %in_5 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x10x10x10xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x10x10x10xf32> into tensor<1x6x10x10x10xf32>
+    %collapsed_4 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x10x10x10xf32> into tensor<6000xf32>
+    return %collapsed_4 : tensor<6000xf32>
+  }
+}
+
+// -----
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7, d4 * 2 + d8, d5 * 2 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-LABEL: func.func @conv_3d_stride(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [2 : index, 2 : index, 2 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_3d_stride(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<384xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x4x4x4xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x4x4x4xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [2, 2, 2]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x4x4x4xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x4x4x4xf32> into tensor<1x6x4x4x4xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x4x4x4xf32> into tensor<384xf32>
+    return %collapsed_3 : tensor<384xf32>
+  }
+}
+
+// -----
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-LABEL: func.func @conv_3d_groups(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [1 : index, 1 : index, 1 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: strides = [1 : index, 1 : index, 1 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_3d_groups(%arg0: tensor<6000xf32>, %arg1: tensor<486xf32>) -> tensor<4608xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [9, 2, 3, 3, 3] : tensor<486xf32> into tensor<9x2x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 6, 10, 10, 10] : tensor<6000xf32> into tensor<1x6x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 3, 2, 10, 10, 10] : tensor<1x6x10x10x10xf32> into tensor<1x3x2x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [3, 3, 2, 3, 3, 3] : tensor<9x2x3x3x3xf32> into tensor<3x3x2x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x3x3x8x8x8xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x3x2x10x10x10xf32>, tensor<3x3x2x3x3x3xf32>) outs(%cst : tensor<1x3x3x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 3 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x3x3x8x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x3x3x8x8x8xf32> into tensor<1x9x8x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x9x8x8x8xf32> into tensor<4608xf32>
+    return %collapsed_3 : tensor<4608xf32>
+  }
+}
+
+// -----
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 * 2 + d7 * 2, d4 * 2 + d8 * 2, d5 * 2 + d9 * 2)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+// CHECK-LABEL: func.func @conv_3d_perf_config(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor
+// CHECK-DAG: %[[expanded:.*]] = tensor.expand_shape %[[arg1]]
+// CHECK-DAG: %[[expanded_0:.*]] = tensor.expand_shape %[[arg0]]
+// CHECK-DAG: %[[expanded_1:.*]] = tensor.expand_shape %[[expanded_0]]
+// CHECK-DAG: %[[expanded_2:.*]] = tensor.expand_shape %[[expanded]]
+// CHECK-DAG: %[[alloc:.*]] = bufferization.alloc_tensor
+// CHECK-DAG: %[[conv:.*]] = rock.conv(%[[expanded_2]], %[[expanded_1]], %[[alloc]])
+// CHECK-SAME: dilations = [2 : index, 2 : index, 2 : index]
+// CHECK-SAME: filter_layout = ["g", "k", "c", "0", "1", "2"]
+// CHECK-SAME: input_layout = ["ni", "gi", "ci", "0i", "1i", "2i"]
+// CHECK-SAME: output_layout = ["no", "go", "ko", "0o", "1o", "2o"]
+// CHECK-SAME: padding = [0 : index, 0 : index, 0 : index, 0 : index, 0 : index, 0 : index]
+// CHECK-SAME: perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1"
+// CHECK-SAME: strides = [2 : index, 2 : index, 2 : index]
+// CHECK-DAG: %[[collapsed:.*]] = tensor.collapse_shape %[[conv]]
+// CHECK-DAG: %[[collapsed_3:.*]] = tensor.collapse_shape %[[collapsed]]
+// CHECK-DAG: return %[[collapsed_3]]
+module {
+  func.func @conv_3d_perf_config(%arg0: tensor<750xf32>, %arg1: tensor<96xf32>) -> tensor<64xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [4, 3, 2, 2, 2] : tensor<96xf32> into tensor<4x3x2x2x2xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [2, 3, 5, 5, 5] : tensor<750xf32> into tensor<2x3x5x5x5xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [2, 1, 3, 5, 5, 5] : tensor<2x3x5x5x5xf32> into tensor<2x1x3x5x5x5xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 4, 3, 2, 2, 2] : tensor<4x3x2x2x2xf32> into tensor<1x4x3x2x2x2xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<2x1x4x2x2x2xf32>
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<2x1x3x5x5x5xf32>, tensor<1x4x3x2x2x2xf32>) outs(%cst : tensor<2x1x4x2x2x2xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [2, 2, 2], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], perf_config = "v3:16,32,4,16,16,4,4,1,2,1,1", stride = [2, 2, 2]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<2x1x4x2x2x2xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<2x1x4x2x2x2xf32> into tensor<2x4x2x2x2xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<2x4x2x2x2xf32> into tensor<64xf32>
+    return %collapsed_3 : tensor<64xf32>
+  }
+}
+
diff --git a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir
index 1c4d30fef269..2f009cb40031 100644
--- a/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir
+++ b/mlir/test/Conversion/LinalgToRock/linalg-to-rock-invalid.mlir
@@ -4,3 +4,107 @@
 func.func @no_kernel_attribute_test() {
   func.return
 }
+
+// -----
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+module {
+  func.func @conv_3d_no_padding(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
+    // expected-error @+2 {{no padding found}}
+    // expected-error @+1 {{failed to legalize operation}}
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 1 : i64, stride = [1, 1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x8x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32>
+    return %collapsed_3 : tensor<3072xf32>
+  }
+}
+
+// -----
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+module {
+  func.func @conv_3d_no_stride(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
+    // expected-error @+2 {{invalid dilation or stride}}
+    // expected-error @+1 {{failed to legalize operation}}
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x8x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32>
+    return %collapsed_3 : tensor<3072xf32>
+  }
+}
+
+// -----
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+module {
+  func.func @conv_3d_invalid_padding(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
+    // expected-error @+2 {{invalid number of padding}}
+    // expected-error @+1 {{failed to legalize operation}}
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0], stride = [1, 1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x8x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32>
+    return %collapsed_3 : tensor<3072xf32>
+  }
+}
+
+// -----
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d6, d3 + d7, d4 + d8, d5 + d9)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d1, d2, d6, d7, d8, d9)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8, d9) -> (d0, d1, d2, d3, d4, d5)>
+module {
+  func.func @conv_3d_invalid_stride(%arg0: tensor<3000xf32>, %arg1: tensor<486xf32>) -> tensor<3072xf32> attributes {arch = "##TOKEN_ARCH##", kernel} {
+    %expanded = tensor.expand_shape %arg1 [[0, 1, 2, 3, 4]] output_shape [6, 3, 3, 3, 3] : tensor<486xf32> into tensor<6x3x3x3x3xf32>
+    %expanded_0 = tensor.expand_shape %arg0 [[0, 1, 2, 3, 4]] output_shape [1, 3, 10, 10, 10] : tensor<3000xf32> into tensor<1x3x10x10x10xf32>
+    %expanded_1 = tensor.expand_shape %expanded_0 [[0], [1, 2], [3], [4], [5]] output_shape [1, 1, 3, 10, 10, 10] : tensor<1x3x10x10x10xf32> into tensor<1x1x3x10x10x10xf32>
+    %expanded_2 = tensor.expand_shape %expanded [[0, 1], [2], [3], [4], [5]] output_shape [1, 6, 3, 3, 3, 3] : tensor<6x3x3x3x3xf32> into tensor<1x6x3x3x3x3xf32>
+    %cst = arith.constant dense<0.000000e+00> : tensor<1x1x6x8x8x8xf32>
+    // expected-error @+2 {{invalid dilation or stride}}
+    // expected-error @+1 {{failed to legalize operation}}
+    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction"]} ins(%expanded_1, %expanded_2 : tensor<1x1x3x10x10x10xf32>, tensor<1x6x3x3x3x3xf32>) outs(%cst : tensor<1x1x6x8x8x8xf32>) attrs =  {conv_op = #rock<LinalgConvType conv3d_ngchwd_gkchwd>, dilation = [1, 1, 1], group = 1 : i64, pad = [0, 0, 0, 0, 0, 0], stride = [1, 1]} {
+    ^bb0(%in: f32, %in_4: f32, %out: f32):
+      %1 = arith.mulf %in, %in_4 : f32
+      %2 = arith.addf %out, %1 : f32
+      linalg.yield %2 : f32
+    } -> tensor<1x1x6x8x8x8xf32>
+    %collapsed = tensor.collapse_shape %0 [[0], [1, 2], [3], [4], [5]] : tensor<1x1x6x8x8x8xf32> into tensor<1x6x8x8x8xf32>
+    %collapsed_3 = tensor.collapse_shape %collapsed [[0, 1, 2, 3, 4]] : tensor<1x6x8x8x8xf32> into tensor<3072xf32>
+    return %collapsed_3 : tensor<3072xf32>
+  }
+}
diff --git a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir
index 849a2aa7bee4..5ba4fecae0fa 100644
--- a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir
+++ b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv.cpu.mlir
@@ -1,12 +1,14 @@
 // RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand_type float -rand_min 0 -rand_max 0 -fut conv_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=GOLD
 // RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
 // RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+// RUN: rocmlir-gen -fut conv -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL
 
-/// README - There are essentially two tests (BOTH, and GOLD). 
+/// README - There are essentially three tests (BOTH, GOLD, and FINAL). 
 /// BOTH checks if the tosa pipeline gives the same value (given the 
 /// same seed) as the linalg pipeline. They will pass if both of them 
 /// returns the same value. GOLD checks if the output for the linalg pipeline 
-/// matches an equivalent pytorch implementation.
+/// matches an equivalent pytorch implementation. FINAL verifies if the linalg 
+/// pipeline can be converted to rock
 
 /// Gold value computed as the following:
 ///
@@ -35,6 +37,7 @@
 
 
 module{
+  // FINAL: [1 1 1]
   // BOTH: [6.09101, 7.06269, 5.96599, 7.63177, 5.83172, 5.96893, 5.16868, 6.0204, 6.80761, 6.78844, 5.75672, 7.33505, 5.417{{.*}}, 6.04153, 5.14715, 6.728{{.*}}, 7.30343, 7.90745, 6.73162, 8.21738, 5.65554, 7.37453, 6.6329, 6.6093, 5.2816, 6.17693, 5.19904, 6.38292, 4.55713, 4.62921, 4.72307, 5.47466, 4.551, 6.15787, 4.97358, 5.89798, 5.10684, 6.01542, 5.18933, 5.58596, 5.22862, 7.13881, 4.88134, 5.56315, 5.52007, 6.27824, 4.93779, 5.71044, 6.27934, 7.51976, 5.23159, 7.17014, 6.74235, 5.59631, 5.33666, 6.20902, 4.95302, 5.26817, 4.50571, 5.17464, 4.49137, 4.80133, 3.39298, 4.92709]
   // GOLD: [1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625, -1.625, 1.0625, 1.0625]
   func.func @conv(%arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<4x3x2x2x2xf32, 24x8x4x2x1>) -> !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> { 
diff --git a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir
index a9658468d92f..1e5cdab8f085 100644
--- a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir
+++ b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv1d-group.cpu.mlir
@@ -1,8 +1,10 @@
 // RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_1d_group_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
 // RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_1d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+// RUN: rocmlir-gen -fut conv_1d_group -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_1d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL
 
 // Only a small subset of the array is checked because it is quite huge
 
+// FINAL: [1 1 1]
 // BOTH: [2.94651, 3.09122, 3.86495, 4.54138, 3.18018, 4.06578, 2.97265, 4.05155, 2.35716, 4.26762, 3.49153, 4.14329, 3.82529, 5.43395, 4.66598, 4.98826, 4.41554, 5.15631, 3.91766, 4.79236, 4.52993, 4.25152, 4.87812, 5.10546, 4.19679, 5.1306, 4.2836, 3.7857, 5.21429, 4.6504, 4.83997, 3.91648, 5.86651, 4.76546, 5.00734, 5.18668, 5.38386, 4.1707, 5.43972, 5.57541, 5.33734, 5.14293, 4.10719, 5.32505, 4.39825
 func.func @conv_1d_group(%in: !migraphx.shaped<10x8x123xf32, 984x123x1>, %fil: !migraphx.shaped<12x2x7xf32, 14x7x1>) -> !migraphx.shaped<10x12x53xf32, 636x53x1> {
   %out = migraphx.convolution %in, %fil {dilation = [4], group = 4 : i64, padding = [3,3], padding_mode = 0 : i64, stride = [2]} :
diff --git a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir
index 83630aa811d4..84e14f7ef9d0 100644
--- a/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir
+++ b/mlir/test/fusion/e2e/mixr-to-linalg/mixr-to-linalg-conv3d-group.cpu.mlir
@@ -1,7 +1,9 @@
 // RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_3d_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
 // RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_3d_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+// RUN: rocmlir-gen -fut conv_3d -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_3d_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL
 
 // BOTH: [9.78569, 8.8887, 12.9401, 10.9686, 8.19386, 11.9315, 17.5043, 11.5946, 18.9063
+// FINAL: [1 1 1]
 func.func @conv_3d(%in: !migraphx.shaped<10x8x12x13x14xf32, 17472x2184x182x14x1>, %fil: !migraphx.shaped<12x8x2x3x4xf32, 192x24x12x4x1>) -> !migraphx.shaped<10x12x13x6x3xf32, 2808x234x18x3x1> {
   %out = migraphx.convolution %in, %fil {dilation = [3, 4, 5], group = 1 : i64, padding = [2, 3, 4, 2, 3, 4], padding_mode = 0 : i64, stride = [1, 2, 3]} : 
         <10x8x12x13x14xf32, 17472x2184x182x14x1>, <12x8x2x3x4xf32, 192x24x12x4x1> -> <10x12x13x6x3xf32, 2808x234x18x3x1>
diff --git a/mlir/test/fusion/pr-e2e/linalg-to-rock-conv3d-no-pad.e2d.mlir b/mlir/test/fusion/pr-e2e/linalg-to-rock-conv3d-no-pad.e2d.mlir
new file mode 100644
index 000000000000..a70f9d19a8bb
--- /dev/null
+++ b/mlir/test/fusion/pr-e2e/linalg-to-rock-conv3d-no-pad.e2d.mlir
@@ -0,0 +1,7 @@
+// RUN: rocmlir-gen -fut conv3d_add -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx-linalg,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv3d_add_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s
+
+// CHECK: [1 1 1]
+func.func @conv3d_add(%arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<9x1x2x2x2xf32, 8x8x4x2x1>) -> !migraphx.shaped<2x9x2x2x2xf32, 72x8x4x2x1> {
+  %0 = migraphx.convolution %arg1, %arg2 {dilation = [2, 2, 2], group = 3 : i64, padding = [0, 0, 0, 0, 0, 0], padding_mode = 0 : i64, stride = [2, 2, 2]} : <2x3x5x5x5xf32, 375x125x25x5x1>, <9x1x2x2x2xf32, 8x8x4x2x1> -> <2x9x2x2x2xf32, 72x8x4x2x1>
+  return %0: !migraphx.shaped<2x9x2x2x2xf32, 72x8x4x2x1>
+}
diff --git a/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir b/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir
index 5f019521aeac..64f838c35cdf 100644
--- a/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir
+++ b/mlir/test/fusion/pr-e2e/mixr-to-linalg-conv2d-group.cpu.mlir
@@ -1,10 +1,12 @@
 // RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx-linalg,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone -  | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
 // RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver --host-pipeline=migraphx,highlevel --kernel-pipeline=migraphx,highlevel | rocmlir-gen -ph -print-results -rand 1 -rand_type=float -fut conv_2d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=BOTH
+// RUN: rocmlir-gen -fut conv_2d_group -arch %arch --clone-harness %s | rocmlir-driver -host-pipeline=migraphx,highlevel -kernel-pipeline=migraphx-linalg,highlevel -targets %arch | rocmlir-gen -ph -print-results -rand 1 -rand_type float -fut conv_2d_group_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s --check-prefix=FINAL
 
 // Here we are checking to see if conv_2d with non standard stride, dilation, and a group parameter matches the existing tosa pipeline
 // Note - this array is quite large, so we are only checking a small subset
 
 // BOTH: [5.83007, 7.83374, 8.46274, 9.03237, 6.51391, 7.75809, 9.73003, 8.48013, 8.15419, 9.9975, 7.50244, 7.11982, 6.58057
+// FINAL: [1 1 1]
 func.func @conv_2d_group(%in: !migraphx.shaped<2x4x123x124xf32, 61008x15252x124x1>, %fil: !migraphx.shaped<8x2x4x5xf32, 40x20x5x1>) -> !migraphx.shaped<2x8x27x19xf32, 4104x513x19x1> {
   %out = migraphx.convolution %in, %fil {dilation = [2, 3], group = 2 : i64, padding = [2, 2, 2, 2], padding_mode = 0 : i64, stride = [4, 5]} : 
     <2x4x123x124xf32, 61008x15252x124x1>, <8x2x4x5xf32, 40x20x5x1> -> <2x8x27x19xf32, 4104x513x19x1>