Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,9 @@ if(APHRODITE_GPU_LANG STREQUAL "CUDA")
message(STATUS "Enabling cumem allocator extension.")
# link against cuda driver library
list(APPEND CUMEM_LIBS CUDA::cuda_driver)
define_gpu_extension_target(
cumem_allocator
DESTINATION aphrodite
define_gpu_extension_target(
cumem_allocator
DESTINATION aphrodite/extensions/cuda
Comment on lines +224 to +226

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The indentation for define_gpu_extension_target and its arguments appears to have been accidentally changed, making it inconsistent with the surrounding code style. Please restore the original indentation to improve readability and maintain consistency.

  define_gpu_extension_target(
    cumem_allocator
    DESTINATION aphrodite/extensions/cuda

LANGUAGE CXX
SOURCES ${APHRODITE_CUMEM_EXT_SRC}
LIBRARIES ${CUMEM_LIBS}
Expand Down Expand Up @@ -804,7 +804,7 @@ endif()
message(STATUS "Enabling C extension.")
define_gpu_extension_target(
_C
DESTINATION aphrodite
DESTINATION $<IF:$<STREQUAL:${APHRODITE_GPU_LANG},CUDA>,aphrodite/extensions/cuda,aphrodite/extensions/rocm>
LANGUAGE ${APHRODITE_GPU_LANG}
SOURCES ${APHRODITE_EXT_SRC}
COMPILE_FLAGS ${APHRODITE_GPU_FLAGS}
Expand Down Expand Up @@ -918,7 +918,7 @@ endif()
message(STATUS "Enabling moe extension.")
define_gpu_extension_target(
_moe_C
DESTINATION aphrodite
DESTINATION $<IF:$<STREQUAL:${APHRODITE_GPU_LANG},CUDA>,aphrodite/extensions/cuda,aphrodite/extensions/rocm>
LANGUAGE ${APHRODITE_GPU_LANG}
SOURCES ${APHRODITE_MOE_EXT_SRC}
COMPILE_FLAGS ${APHRODITE_GPU_FLAGS}
Expand All @@ -939,7 +939,7 @@ if(APHRODITE_GPU_LANG STREQUAL "HIP")

define_gpu_extension_target(
_rocm_C
DESTINATION aphrodite
DESTINATION aphrodite/extensions/rocm
LANGUAGE ${APHRODITE_GPU_LANG}
SOURCES ${APHRODITE_ROCM_EXT_SRC}
COMPILE_FLAGS ${APHRODITE_GPU_FLAGS}
Expand Down
26 changes: 21 additions & 5 deletions aphrodite/_custom_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,30 @@

if not current_platform.is_tpu() and not current_platform.is_xpu():
try:
import aphrodite._C
if current_platform.is_cuda():
import aphrodite.extensions.cuda._C # noqa: F401
elif current_platform.is_rocm():
import aphrodite.extensions.rocm._C # noqa: F401
# Also register ROCm-specific ops if present
with contextlib.suppress(ImportError):
import aphrodite.extensions.rocm._rocm_C # noqa: F401
elif current_platform.is_cpu():
import aphrodite.extensions.cpu._C # noqa: F401
else:
# Other platforms not handled here
pass
except ImportError as e:
logger.warning("Failed to import from aphrodite._C with {!r}", e)
logger.warning("Failed to import platform-specific _C with {!r}", e)

supports_moe_ops = False
with contextlib.suppress(ImportError):
import aphrodite._moe_C # noqa: F401
supports_moe_ops = True
if current_platform.is_cuda():
with contextlib.suppress(ImportError):
import aphrodite.extensions.cuda._moe_C # noqa: F401
supports_moe_ops = True
elif current_platform.is_rocm():
with contextlib.suppress(ImportError):
import aphrodite.extensions.rocm._moe_C # noqa: F401
supports_moe_ops = True
Comment on lines 11 to +36

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The logic for importing platform-specific extensions and setting supports_moe_ops contains duplicated code for CUDA and ROCm backends. This can be refactored to improve readability and maintainability by first determining the backend and then using a single block of code for the import logic. Using importlib.import_module would make dynamic imports cleaner.

import importlib

if not current_platform.is_tpu() and not current_platform.is_xpu():
    backend = None
    if current_platform.is_cuda():
        backend = "cuda"
    elif current_platform.is_rocm():
        backend = "rocm"
    elif current_platform.is_cpu():
        backend = "cpu"

    if backend:
        try:
            importlib.import_module(f"aphrodite.extensions.{backend}._C")
            if backend == "rocm":
                # Also register ROCm-specific ops if present
                with contextlib.suppress(ImportError):
                    importlib.import_module("aphrodite.extensions.rocm._rocm_C")
        except ImportError as e:
            logger.warning("Failed to import platform-specific _C for backend {} with {!r}", backend, e)
    else:
        # Other platforms not handled here
        pass

supports_moe_ops = False
if current_platform.is_cuda() or current_platform.is_rocm():
    backend = "cuda" if current_platform.is_cuda() else "rocm"
    with contextlib.suppress(ImportError):
        importlib.import_module(f"aphrodite.extensions.{backend}._moe_C")
        supports_moe_ops = True


if TYPE_CHECKING:

Expand Down
7 changes: 4 additions & 3 deletions aphrodite/attention/ops/flashmla.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

if current_platform.is_cuda():
try:
import aphrodite._flashmla_C # noqa: F401
import aphrodite.extensions.cuda._flashmla_C # noqa: F401
_flashmla_C_AVAILABLE = True
except ImportError:
_flashmla_C_AVAILABLE = False
Expand All @@ -24,8 +24,9 @@ def is_flashmla_supported() -> Tuple[bool, Optional[str]]:
if current_platform.get_device_capability()[0] != 9:
return False, "FlashMLA is only supported on Hopper devices."
if not _flashmla_C_AVAILABLE:
return False, "aphrodite._flashmla_C is not available, likely was not "\
"compiled due to insufficient nvcc version or a supported arch "\
return False, "aphrodite.extensions.cuda._flashmla_C is not " \
"available, likely was not compiled due to insufficient nvcc " \
"version or a supported arch "\
"(only sm90a currently) was not in the list of target arches to "\
"compile for."
return True, None
Expand Down
1 change: 1 addition & 0 deletions aphrodite/extensions/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This directory contains the compiled binaries for the Aphrodite extensions. By default, it's empty and will be populated when building the Aphrodite extensions.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
2 changes: 1 addition & 1 deletion aphrodite/platforms/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from typing_extensions import ParamSpec

# import custom ops, trigger op registration
import aphrodite._C # noqa
import aphrodite.extensions.cuda._C # noqa
import aphrodite.common.envs as envs
from aphrodite.common.logger import log_once
from aphrodite.utils import cuda_device_count_stateless, import_pynvml
Expand Down
10 changes: 6 additions & 4 deletions aphrodite/platforms/rocm.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,17 @@
logger.warning("Failed to import from amdsmi with {}", e)

try:
import aphrodite._C # noqa: F401
import aphrodite.extensions.rocm._C # noqa: F401
except ImportError as e:
logger.warning("Failed to import from aphrodite._C with {}", e)
logger.warning(
"Failed to import from aphrodite.extensions.rocm._C with {}", e)

# import custom ops, trigger op registration
try:
import aphrodite._rocm_C # noqa: F401
import aphrodite.extensions.rocm._rocm_C # noqa: F401
except ImportError as e:
logger.warning("Failed to import from aphrodite._rocm_C with {}", e)
logger.warning(
"Failed to import from aphrodite.extensions.rocm._rocm_C with {}", e)

# Models not supported by ROCm.
_ROCM_UNSUPPORTED_MODELS: list[str] = []
Expand Down
2 changes: 1 addition & 1 deletion cmake/cpu_extension.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ message(STATUS "CPU extension source files: ${APHRODITE_EXT_SRC}")

define_gpu_extension_target(
_C
DESTINATION aphrodite
DESTINATION aphrodite/extensions/cpu
LANGUAGE CXX
SOURCES ${APHRODITE_EXT_SRC}
LIBRARIES ${LIBS}
Expand Down
2 changes: 1 addition & 1 deletion cmake/external_project/flashmla.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.3 AND FLASH_MLA_ARCHS)

define_gpu_extension_target(
_flashmla_C
DESTINATION aphrodite
DESTINATION aphrodite/extensions/cuda
LANGUAGE ${APHRODITE_GPU_LANG}
SOURCES ${FlashMLA_SOURCES}
COMPILE_FLAGS ${APHRODITE_GPU_FLAGS}
Expand Down
2 changes: 1 addition & 1 deletion requirements/rocm-build.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Common dependencies
-r common.txt

--extra-index-url https://download.pytorch.org/whl/rocm6.3
--extra-index-url https://download.pytorch.org/whl/rocm6.4
torch==2.8.0
torchvision==0.23.0
torchaudio==2.8.0
Expand Down
2 changes: 1 addition & 1 deletion requirements/rocm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ numba == 0.61.2; python_version > '3.9'
boto3
botocore
datasets
ray>=2.10.0,<2.45.0
ray>=2.10.0

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Removing the upper version constraint for ray could introduce instability if a future version includes breaking changes. It's generally safer to specify a tested upper bound. If this change is intentional and has been tested, consider adding a comment to explain why the upper bound was removed.

peft
pytest-asyncio
tensorizer==2.10.1
Expand Down
29 changes: 21 additions & 8 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,11 @@ def build_extensions(self) -> None:
targets = []

def target_name(s: str) -> str:
return s.removeprefix("aphrodite.").removeprefix(
"aphrodite_flash_attn.")
return s.removeprefix("aphrodite.extensions.cuda.") \
.removeprefix("aphrodite.extensions.rocm.") \
.removeprefix("aphrodite.extensions.cpu.") \
.removeprefix("aphrodite.") \
.removeprefix("aphrodite_flash_attn.")

# Build all the extensions
for ext in self.extensions:
Expand Down Expand Up @@ -633,11 +636,15 @@ def _read_requirements(filename: str) -> list[str]:

# Skip building extensions if using precompiled binaries
if not envs.APHRODITE_USE_PRECOMPILED:
if _is_cuda() or _is_hip():
ext_modules.append(CMakeExtension(name="aphrodite._moe_C"))
# MoE extension per backend
if _is_cuda():
ext_modules.append(CMakeExtension(name="aphrodite.extensions.cuda._moe_C"))
elif _is_hip():
ext_modules.append(CMakeExtension(name="aphrodite.extensions.rocm._moe_C"))

# ROCm-specific extension
if _is_hip():
ext_modules.append(CMakeExtension(name="aphrodite._rocm_C"))
ext_modules.append(CMakeExtension(name="aphrodite.extensions.rocm._rocm_C"))

if _is_cuda():
if not envs.APHRODITE_DISABLE_FLASH_ATTN_COMPILE:
Expand All @@ -656,11 +663,17 @@ def _read_requirements(filename: str) -> list[str]:
if envs.APHRODITE_USE_PRECOMPILED or \
get_nvcc_cuda_version() >= Version("12.3"):
ext_modules.append(
CMakeExtension(name="aphrodite._flashmla_C", optional=True))
ext_modules.append(CMakeExtension(name="aphrodite.cumem_allocator"))
CMakeExtension(name="aphrodite.extensions.cuda._flashmla_C", optional=True))
ext_modules.append(CMakeExtension(name="aphrodite.extensions.cuda.cumem_allocator"))

# Core extension per backend
if _build_custom_ops():
ext_modules.append(CMakeExtension(name="aphrodite._C"))
if _is_cuda():
ext_modules.append(CMakeExtension(name="aphrodite.extensions.cuda._C"))
elif _is_hip():
ext_modules.append(CMakeExtension(name="aphrodite.extensions.rocm._C"))
elif _is_cpu():
ext_modules.append(CMakeExtension(name="aphrodite.extensions.cpu._C"))

package_data = {
"aphrodite": [
Expand Down
Loading