diff --git a/bitsandbytes/backends/xpu/ops.py b/bitsandbytes/backends/xpu/ops.py
index a0620dc4b..dfd0fb2d9 100644
--- a/bitsandbytes/backends/xpu/ops.py
+++ b/bitsandbytes/backends/xpu/ops.py
@@ -32,6 +32,8 @@ def _dequantize_4bit_impl(
     dtype: torch.dtype,
     out: torch.Tensor,
 ) -> None:
+    # XPU SYCL kernels only support contiguous tensors.
+    A = A.contiguous()
     args = (
         None,
         get_ptr(A),
@@ -61,6 +63,8 @@ def _dequantize_4bit_impl(
 def _dequantize_blockwise_impl(
     A: torch.Tensor, absmax: torch.Tensor, code: torch.Tensor, blocksize: int, dtype: torch.dtype, out: torch.Tensor
 ) -> None:
+    # XPU SYCL kernels only support contiguous tensors.
+    A = A.contiguous()
     args = (
         get_ptr(code),
         get_ptr(A),