From 2bc205f3de8ed67af0cc52f198ba5856619fb241 Mon Sep 17 00:00:00 2001 From: bingoo <1575938147@qq.com> Date: Mon, 29 Jun 2026 10:59:09 +0800 Subject: [PATCH 1/3] support arm --- requirements.txt | 4 ++-- requirements_dcu.txt | 4 ++-- requirements_iluvatar.txt | 4 ++-- requirements_metaxgpu.txt | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index f0ead0abb1d..3a2e3c9951a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ paddleformers>=1.1.1 redis etcd3 httpx -tool_helpers +fast_dataindex cupy-cuda12x pybind11[global] tabulate @@ -23,7 +23,7 @@ xlwt visualdl setuptools-scm>=8 prometheus-client -decord +paddlecodec moviepy triton crcmod diff --git a/requirements_dcu.txt b/requirements_dcu.txt index c34fa1657cf..4242384dffe 100644 --- a/requirements_dcu.txt +++ b/requirements_dcu.txt @@ -14,7 +14,7 @@ paddleformers==1.0.0 redis etcd3 httpx -tool_helpers +fast_dataindex pybind11[global] tabulate gradio @@ -22,7 +22,7 @@ xlwt visualdl setuptools-scm>=8 prometheus-client -decord +paddlecodec moviepy use-triton-in-paddle crcmod diff --git a/requirements_iluvatar.txt b/requirements_iluvatar.txt index 6958d5743e3..b28e0474587 100644 --- a/requirements_iluvatar.txt +++ b/requirements_iluvatar.txt @@ -14,7 +14,7 @@ paddleformers==1.0.0 redis etcd3 httpx -tool_helpers +fast_dataindex pybind11[global] tabulate gradio @@ -22,7 +22,7 @@ xlwt visualdl setuptools-scm>=8 prometheus-client -decord +paddlecodec moviepy wheel triton diff --git a/requirements_metaxgpu.txt b/requirements_metaxgpu.txt index c1e0ef140b1..2acf8e9069d 100644 --- a/requirements_metaxgpu.txt +++ b/requirements_metaxgpu.txt @@ -14,7 +14,7 @@ paddleformers==0.4.1 redis etcd3 httpx -tool_helpers +fast_dataindex cupy-cuda12x pybind11[global] tabulate @@ -23,7 +23,7 @@ xlwt visualdl setuptools-scm>=8 prometheus-client -decord +paddlecodec moviepy triton use-triton-in-paddle From d46591d449287b638d9bc8d20217deb426f8951b Mon Sep 17 00:00:00 2001 From: bingoo <1575938147@qq.com> Date: Mon, 29 Jun 2026 11:38:33 +0800 Subject: [PATCH 2/3] adpative arm --- .../input/ernie4_5_vl_processor/__init__.py | 4 +- .../input/ernie4_5_vl_processor/process.py | 6 +- .../ernie4_5_vl_processor/process_video.py | 8 +- .../utils/video_utils.py | 82 +++++- .../input/paddleocr_vl_processor/process.py | 4 +- .../input/qwen3_vl_processor/process.py | 4 +- fastdeploy/input/qwen_vl_processor/process.py | 4 +- fastdeploy/input/video_utils.py | 81 ++++-- tests/input/test_ernie_video_utils.py | 244 ++++++++++++++++ tests/input/test_ernie_vl_processor.py | 6 +- tests/input/test_paddleocr_vl_processor.py | 4 +- tests/input/test_process_video.py | 18 +- tests/input/test_video_utils.py | 266 +++++++++++++----- 13 files changed, 599 insertions(+), 132 deletions(-) create mode 100644 tests/input/test_ernie_video_utils.py diff --git a/fastdeploy/input/ernie4_5_vl_processor/__init__.py b/fastdeploy/input/ernie4_5_vl_processor/__init__.py index f7d30a78d58..a49cf95ed49 100644 --- a/fastdeploy/input/ernie4_5_vl_processor/__init__.py +++ b/fastdeploy/input/ernie4_5_vl_processor/__init__.py @@ -16,13 +16,13 @@ from .ernie4_5_vl_processor import Ernie4_5_VLProcessor from .process import DataProcessor, fancy_print -from .process_video import read_video_decord +from .process_video import read_video_paddlecodec from .utils.video_utils import VideoReaderWrapper __all__ = [ "DataProcessor", "fancy_print", "VideoReaderWrapper", - "read_video_decord", + "read_video_paddlecodec", "Ernie4_5_VLProcessor", ] diff --git a/fastdeploy/input/ernie4_5_vl_processor/process.py b/fastdeploy/input/ernie4_5_vl_processor/process.py index ab87c562176..741fabec15c 100644 --- a/fastdeploy/input/ernie4_5_vl_processor/process.py +++ b/fastdeploy/input/ernie4_5_vl_processor/process.py @@ -38,7 +38,7 @@ from fastdeploy.utils import data_processor_logger from .image_preprocessor.image_preprocessor_adaptive import AdaptiveImageProcessor -from .process_video import read_frames_decord, read_video_decord +from .process_video import read_frames_paddlecodec, read_video_paddlecodec from .utils.render_timestamp import render_frame_timestamp @@ -630,7 +630,7 @@ def _extract_labels(self, outputs: Dict, tgts: List[str]) -> None: outputs["labels"] = labels def _load_and_process_video(self, url: str, item: Dict) -> List[Image.Image]: - reader, meta, path = read_video_decord(url, save_to_disk=False) + reader, meta, path = read_video_paddlecodec(url, save_to_disk=False) video_frame_args = dict() video_frame_args["fps"] = item.get("fps", self.fps) @@ -641,7 +641,7 @@ def _load_and_process_video(self, url: str, item: Dict) -> List[Image.Image]: video_frame_args = self._set_video_frame_args(video_frame_args, meta) - frames_data, _, timestamps = read_frames_decord( + frames_data, _, timestamps = read_frames_paddlecodec( path, reader, meta, diff --git a/fastdeploy/input/ernie4_5_vl_processor/process_video.py b/fastdeploy/input/ernie4_5_vl_processor/process_video.py index 91120096c70..c22bf780e4b 100644 --- a/fastdeploy/input/ernie4_5_vl_processor/process_video.py +++ b/fastdeploy/input/ernie4_5_vl_processor/process_video.py @@ -27,8 +27,8 @@ from .utils.video_utils import VideoReaderWrapper -def read_video_decord(video_path, save_to_disk): - """get reader and meta by decord""" +def read_video_paddlecodec(video_path, save_to_disk): + """get reader and meta by paddlecodec""" # video_path = get_downloadable(video_path, save_to_disk=save_to_disk) if isinstance(video_path, VideoReaderWrapper): video_reader = video_path @@ -113,7 +113,7 @@ def get_frame_indices( return frame_indices -def read_frames_decord( +def read_frames_paddlecodec( video_path, video_reader, video_meta, @@ -126,7 +126,7 @@ def read_frames_decord( frame_indices=None, tol=10, ): - """get frames by decord""" + """get frames by paddlecodec""" if frame_indices is None: frame_indices = get_frame_indices( diff --git a/fastdeploy/input/ernie4_5_vl_processor/utils/video_utils.py b/fastdeploy/input/ernie4_5_vl_processor/utils/video_utils.py index a4769ca8ecc..71b748369e3 100644 --- a/fastdeploy/input/ernie4_5_vl_processor/utils/video_utils.py +++ b/fastdeploy/input/ernie4_5_vl_processor/utils/video_utils.py @@ -18,7 +18,10 @@ import os from tempfile import NamedTemporaryFile as ntf -import decord +import numpy as np +import paddle + +from fastdeploy.utils import get_logger try: # moviepy 1.0 @@ -27,6 +30,8 @@ # moviepy 2.0 import moviepy as mp +logger = get_logger("video_utils") + def is_gif(data: bytes) -> bool: """ @@ -35,19 +40,24 @@ def is_gif(data: bytes) -> bool: return data[:6] in (b"GIF87a", b"GIF89a") -class VideoReaderWrapper(decord.VideoReader): - """ - Solving memory leak bug +class _NumpyFrame: + """Wrapper so that frame[idx].asnumpy() keeps working with paddlecodec.""" + + def __init__(self, array): + self._array = array + + def asnumpy(self): + return self._array - https://github.com/dmlc/decord/issues/208 - """ + +class VideoReaderWrapper: + """paddlecodec VideoDecoder wrapper with GIF support.""" def __init__(self, video_path, *args, **kwargs): with ntf(delete=True, suffix=".gif") as gif_file: gif_input = None self.original_file = None if isinstance(video_path, str): - self.original_file = video_path if video_path.lower().endswith(".gif"): gif_input = video_path elif isinstance(video_path, bytes): @@ -70,14 +80,58 @@ def __init__(self, video_path, *args, **kwargs): video_path = mp4_file.name self.original_file = video_path - super().__init__(video_path, *args, **kwargs) - self.seek(0) + with paddle.use_compat_guard(enable=True, scope={"torchcodec"}): + try: + import sys + + from torchcodec.decoders import VideoDecoder + + sys.modules["torchcodec"] = None + except (ImportError, RuntimeError) as e: + logger.error( + f"Failed to load 'torchcodec' backend via Paddle proxy.\n" + f" - Common Causes:\n" + f" 1. Conflict with official 'torch' or 'torchcodec' packages.\n" + f" 2. Missing FFmpeg libraries or System library mismatch (CXXABI).\n" + f" - Recommended Fix Steps:\n" + f" 1. Install dependencies: `conda install ffmpeg -c conda-forge` or `apt-get update && apt-get install ffmpeg` \n" + f" 2. Uninstall conflicts: `pip uninstall torchcodec paddlecodec -y`\n" + f" 3. Reinstall packages: `pip install paddlecodec --force-reinstall`\n" + f" - If you encounter 'CXXABI' or 'libstdc++' errors, your system libraries might be outdated.\n" + f" Try prioritizing Conda libraries by running: `LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH python your_script.py`\n" + f" - Original Error: {e}" + ) + raise + PADDLECODEC_NUM_THREADS = int(os.environ.get("PADDLECODEC_NUM_THREADS", 0)) + self._decoder = VideoDecoder( + video_path, + seek_mode="exact", + num_ffmpeg_threads=PADDLECODEC_NUM_THREADS, + device=kwargs.get("device", "cpu"), + dimension_order="NHWC", + ) + + def __len__(self): + return self._decoder.metadata.num_frames def __getitem__(self, key): - frames = super().__getitem__(key) - self.seek(0) - return frames + if isinstance(key, (int, np.integer)): + frame = self._decoder.get_frames_at(indices=[int(key)]).data[0] + return _NumpyFrame(frame.numpy()) + if isinstance(key, slice): + indices = list(range(*key.indices(len(self)))) + else: + indices = list(key) if not isinstance(key, list) else key + frames = self._decoder.get_frames_at(indices=indices).data + return _NumpyFrame(frames.numpy()) + + def get_avg_fps(self): + return self._decoder.metadata.average_fps def __del__(self): - if self.original_file and os.path.exists(self.original_file): - os.remove(self.original_file) + original_file = getattr(self, "original_file", None) + if original_file and os.path.exists(original_file): + try: + os.remove(original_file) + except OSError: + pass diff --git a/fastdeploy/input/paddleocr_vl_processor/process.py b/fastdeploy/input/paddleocr_vl_processor/process.py index 58f51a9aa10..adab05f2563 100644 --- a/fastdeploy/input/paddleocr_vl_processor/process.py +++ b/fastdeploy/input/paddleocr_vl_processor/process.py @@ -28,7 +28,7 @@ from fastdeploy.entrypoints.chat_utils import parse_chat_messages from fastdeploy.input.mm_data_processor import MMBaseDataProcessor from fastdeploy.input.utils import IDS_TYPE_FLAG -from fastdeploy.input.video_utils import read_video_decord +from fastdeploy.input.video_utils import read_video_paddlecodec from fastdeploy.input.video_utils import sample_frames_paddleocr as sample_frames from fastdeploy.multimodal.hasher import MultimodalHasher from fastdeploy.utils import data_processor_logger @@ -530,7 +530,7 @@ def _load_and_process_video(self, url: str, item: Dict) -> Tuple[np.ndarray, Dic - frames: Processed video frames as numpy array - metadata: Updated video metadata dictionary """ - reader, meta, _ = read_video_decord(url, save_to_disk=False) + reader, meta, _ = read_video_paddlecodec(url, save_to_disk=False) # Apply frame sampling if fps or target_frames specified fps = item.get("fps", self.fps) diff --git a/fastdeploy/input/qwen3_vl_processor/process.py b/fastdeploy/input/qwen3_vl_processor/process.py index 994ec512911..3bae7d78dea 100644 --- a/fastdeploy/input/qwen3_vl_processor/process.py +++ b/fastdeploy/input/qwen3_vl_processor/process.py @@ -28,7 +28,7 @@ from fastdeploy.entrypoints.chat_utils import parse_chat_messages from fastdeploy.input.mm_data_processor import MMBaseDataProcessor from fastdeploy.input.utils import IDS_TYPE_FLAG -from fastdeploy.input.video_utils import read_video_decord +from fastdeploy.input.video_utils import read_video_paddlecodec from fastdeploy.input.video_utils import sample_frames_qwen as sample_frames from fastdeploy.multimodal.hasher import MultimodalHasher from fastdeploy.utils import data_processor_logger @@ -681,7 +681,7 @@ def _load_and_process_video(self, url: str, item: Dict) -> Tuple[np.ndarray, Dic - frames: Processed video frames as numpy array - metadata: Updated video metadata dictionary """ - reader, meta, _ = read_video_decord(url, save_to_disk=False) + reader, meta, _ = read_video_paddlecodec(url, save_to_disk=False) # Apply frame sampling if fps or target_frames specified fps = item.get("fps", self.fps) diff --git a/fastdeploy/input/qwen_vl_processor/process.py b/fastdeploy/input/qwen_vl_processor/process.py index a84fac7854e..0951f196b65 100644 --- a/fastdeploy/input/qwen_vl_processor/process.py +++ b/fastdeploy/input/qwen_vl_processor/process.py @@ -28,7 +28,7 @@ from fastdeploy.entrypoints.chat_utils import parse_chat_messages from fastdeploy.input.mm_data_processor import MMBaseDataProcessor from fastdeploy.input.utils import IDS_TYPE_FLAG -from fastdeploy.input.video_utils import read_video_decord +from fastdeploy.input.video_utils import read_video_paddlecodec from fastdeploy.input.video_utils import sample_frames_qwen as sample_frames from fastdeploy.multimodal.hasher import MultimodalHasher from fastdeploy.utils import data_processor_logger @@ -531,7 +531,7 @@ def _load_and_process_video(self, url: str, item: Dict) -> Tuple[np.ndarray, Dic - frames: Processed video frames as numpy array - metadata: Updated video metadata dictionary """ - reader, meta, _ = read_video_decord(url, save_to_disk=False) + reader, meta, _ = read_video_paddlecodec(url, save_to_disk=False) # Apply frame sampling if fps or target_frames specified fps = item.get("fps", self.fps) diff --git a/fastdeploy/input/video_utils.py b/fastdeploy/input/video_utils.py index a81cf3f5d61..e1520c4b02e 100644 --- a/fastdeploy/input/video_utils.py +++ b/fastdeploy/input/video_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Shared video utilities: VideoReaderWrapper, read_video_decord, and sample_frames.""" +"""Shared video utilities: VideoReaderWrapper, read_video_paddlecodec, and sample_frames.""" import io import math @@ -21,13 +21,16 @@ from typing import Optional, Union import numpy as np +import paddle from fastdeploy.input.image_processors.common import ceil_by_factor, floor_by_factor -from fastdeploy.utils import data_processor_logger +from fastdeploy.utils import data_processor_logger, get_logger + +logger = get_logger("video_utils") __all__ = [ "VideoReaderWrapper", - "read_video_decord", + "read_video_paddlecodec", "sample_frames", "sample_frames_qwen", "sample_frames_paddleocr", @@ -44,15 +47,20 @@ def _is_gif(data: bytes) -> bool: return data[:6] in (b"GIF87a", b"GIF89a") -class VideoReaderWrapper: - """decord.VideoReader wrapper that fixes a memory leak and adds GIF support. +class _NumpyFrame: + """Wrapper so that frame[idx].asnumpy() keeps working with paddlecodec.""" - Reference: https://github.com/dmlc/decord/issues/208 - """ + def __init__(self, array): + self._array = array + + def asnumpy(self): + return self._array - def __init__(self, video_path, *args, **kwargs): - import decord +class VideoReaderWrapper: + """paddlecodec VideoDecoder wrapper with GIF support.""" + + def __init__(self, video_path, *args, **kwargs): try: # moviepy 1.0 import moviepy.editor as mp @@ -91,22 +99,53 @@ def __init__(self, video_path, *args, **kwargs): video_path = mp4_path self.original_file = video_path # temp mp4, cleaned up in __del__ - self._reader = decord.VideoReader(video_path, *args, **kwargs) - self._reader.seek(0) + with paddle.use_compat_guard(enable=True, scope={"torchcodec"}): + try: + import sys + + from torchcodec.decoders import VideoDecoder + + sys.modules["torchcodec"] = None + except (ImportError, RuntimeError) as e: + logger.error( + f"Failed to load 'torchcodec' backend via Paddle proxy.\n" + f" - Common Causes:\n" + f" 1. Conflict with official 'torch' or 'torchcodec' packages.\n" + f" 2. Missing FFmpeg libraries or System library mismatch (CXXABI).\n" + f" - Recommended Fix Steps:\n" + f" 1. Install dependencies: `conda install ffmpeg -c conda-forge` or `apt-get update && apt-get install ffmpeg` \n" + f" 2. Uninstall conflicts: `pip uninstall torchcodec paddlecodec -y`\n" + f" 3. Reinstall packages: `pip install paddlecodec --force-reinstall`\n" + f" - If you encounter 'CXXABI' or 'libstdc++' errors, your system libraries might be outdated.\n" + f" Try prioritizing Conda libraries by running: `LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH python your_script.py`\n" + f" - Original Error: {e}" + ) + raise + PADDLECODEC_NUM_THREADS = int(os.environ.get("PADDLECODEC_NUM_THREADS", 0)) + self._decoder = VideoDecoder( + video_path, + seek_mode="exact", + num_ffmpeg_threads=PADDLECODEC_NUM_THREADS, + device=kwargs.get("device", "cpu"), + dimension_order="NHWC", + ) def __len__(self): - return len(self._reader) + return self._decoder.metadata.num_frames def __getitem__(self, key): - frames = self._reader[key] - self._reader.seek(0) - return frames + if isinstance(key, (int, np.integer)): + frame = self._decoder.get_frames_at(indices=[int(key)]).data[0] + return _NumpyFrame(frame.numpy()) + if isinstance(key, slice): + indices = list(range(*key.indices(len(self)))) + else: + indices = list(key) if not isinstance(key, list) else key + frames = self._decoder.get_frames_at(indices=indices).data + return _NumpyFrame(frames.numpy()) def get_avg_fps(self): - return self._reader.get_avg_fps() - - def seek(self, pos): - return self._reader.seek(pos) + return self._decoder.metadata.average_fps def __del__(self): original_file = getattr(self, "original_file", None) @@ -118,11 +157,11 @@ def __del__(self): # --------------------------------------------------------------------------- -# read_video_decord +# read_video_paddlecodec # --------------------------------------------------------------------------- -def read_video_decord(video_path, save_to_disk: bool = False): +def read_video_paddlecodec(video_path, save_to_disk: bool = False): """Load a video file and return (video_reader, video_meta, video_path). video_meta contains keys: "fps", "duration", "num_of_frame". diff --git a/tests/input/test_ernie_video_utils.py b/tests/input/test_ernie_video_utils.py new file mode 100644 index 00000000000..d635a794b62 --- /dev/null +++ b/tests/input/test_ernie_video_utils.py @@ -0,0 +1,244 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for ernie4_5_vl_processor.utils.video_utils (paddlecodec backend).""" + +import io +import os +import tempfile +import unittest +from unittest.mock import MagicMock, patch + +import numpy as np + +from fastdeploy.input.ernie4_5_vl_processor.utils.video_utils import ( + VideoReaderWrapper, + _NumpyFrame, + is_gif, +) + +MODULE = "fastdeploy.input.ernie4_5_vl_processor.utils.video_utils" + +GIF87_HEADER = b"GIF87a" + b"\x00" * 10 +GIF89_HEADER = b"GIF89a" + b"\x00" * 10 +NOT_GIF = b"NOTGIF" + b"\x00" * 10 + + +def _make_mock_decoder(num_frames=100, fps=25.0): + """Return a mock that mimics torchcodec VideoDecoder.""" + decoder = MagicMock() + decoder.metadata.num_frames = num_frames + decoder.metadata.average_fps = fps + + def _get_frames_at(indices): + batch = MagicMock() + tensor = MagicMock() + tensor.numpy.return_value = np.zeros((len(indices), 480, 640, 3), dtype=np.uint8) + first = MagicMock() + first.numpy.return_value = np.zeros((480, 640, 3), dtype=np.uint8) + tensor.__getitem__ = MagicMock(return_value=first) + batch.data = tensor + return batch + + decoder.get_frames_at = MagicMock(side_effect=_get_frames_at) + return decoder + + +class _Guard: + def __enter__(self): + return None + + def __exit__(self, *a): + return False + + +class TestIsGif(unittest.TestCase): + def test_gif87a(self): + self.assertTrue(is_gif(GIF87_HEADER)) + + def test_gif89a(self): + self.assertTrue(is_gif(GIF89_HEADER)) + + def test_not_gif(self): + self.assertFalse(is_gif(NOT_GIF)) + + +class TestNumpyFrame(unittest.TestCase): + def test_asnumpy_roundtrip(self): + arr = np.arange(6).reshape(2, 3) + self.assertIs(_NumpyFrame(arr).asnumpy(), arr) + + +class TestVideoReaderWrapper(unittest.TestCase): + def _make_wrapper(self, video_path, mock_decoder=None, decoder_factory=None): + if mock_decoder is None: + mock_decoder = _make_mock_decoder() + + decoders_module = MagicMock() + if decoder_factory is not None: + decoders_module.VideoDecoder = decoder_factory + else: + decoders_module.VideoDecoder.return_value = mock_decoder + + mock_paddle = MagicMock() + mock_paddle.use_compat_guard.return_value = _Guard() + + with ( + patch.dict( + "sys.modules", + {"torchcodec": MagicMock(), "torchcodec.decoders": decoders_module}, + ), + patch(f"{MODULE}.paddle", mock_paddle), + patch(f"{MODULE}.mp", MagicMock()), + ): + return VideoReaderWrapper(video_path) + + def test_len(self): + wrapper = self._make_wrapper("/fake/video.mp4", _make_mock_decoder(num_frames=42)) + self.assertEqual(len(wrapper), 42) + + def test_getitem_int(self): + decoder = _make_mock_decoder() + wrapper = self._make_wrapper("/fake/video.mp4", decoder) + frame = wrapper[0] + self.assertIsInstance(frame.asnumpy(), np.ndarray) + decoder.get_frames_at.assert_called_with(indices=[0]) + + def test_getitem_slice(self): + decoder = _make_mock_decoder(num_frames=10) + wrapper = self._make_wrapper("/fake/video.mp4", decoder) + wrapper[1:4] + decoder.get_frames_at.assert_called_with(indices=[1, 2, 3]) + + def test_getitem_list(self): + decoder = _make_mock_decoder() + wrapper = self._make_wrapper("/fake/video.mp4", decoder) + wrapper[[2, 5]] + decoder.get_frames_at.assert_called_with(indices=[2, 5]) + + def test_get_avg_fps(self): + wrapper = self._make_wrapper("/fake/video.mp4", _make_mock_decoder(fps=12.0)) + self.assertEqual(wrapper.get_avg_fps(), 12.0) + + def test_decoder_args(self): + captured = {} + + def factory(path, **kwargs): + captured["path"] = path + captured.update(kwargs) + return _make_mock_decoder() + + self._make_wrapper("/fake/video.mp4", decoder_factory=factory) + self.assertEqual(captured["path"], "/fake/video.mp4") + self.assertEqual(captured["seek_mode"], "exact") + self.assertEqual(captured["dimension_order"], "NHWC") + self.assertEqual(captured["device"], "cpu") + + def test_num_ffmpeg_threads_env(self): + captured = {} + + def factory(path, **kwargs): + captured.update(kwargs) + return _make_mock_decoder() + + with patch.dict("os.environ", {"PADDLECODEC_NUM_THREADS": "8"}): + self._make_wrapper("/fake/video.mp4", decoder_factory=factory) + self.assertEqual(captured["num_ffmpeg_threads"], 8) + + def test_non_gif_string_does_not_set_original_file(self): + wrapper = self._make_wrapper("/fake/video.mp4") + self.assertIsNone(wrapper.original_file) + + def test_bytesio_non_gif_does_not_set_original_file(self): + wrapper = self._make_wrapper(io.BytesIO(NOT_GIF)) + self.assertIsNone(wrapper.original_file) + + def test_gif_string_sets_original_file(self): + mp_mock = MagicMock() + decoders_module = MagicMock() + decoders_module.VideoDecoder.return_value = _make_mock_decoder() + mock_paddle = MagicMock() + mock_paddle.use_compat_guard.return_value = _Guard() + + with ( + patch.dict( + "sys.modules", + {"torchcodec": MagicMock(), "torchcodec.decoders": decoders_module}, + ), + patch(f"{MODULE}.paddle", mock_paddle), + patch(f"{MODULE}.mp", mp_mock), + ): + wrapper = VideoReaderWrapper("/fake/anim.gif") + + mp_mock.VideoFileClip.assert_called_once_with("/fake/anim.gif") + self.assertIsNotNone(wrapper.original_file) + self.assertTrue(wrapper.original_file.endswith(".mp4")) + + def test_gif_bytes_sets_original_file(self): + mp_mock = MagicMock() + decoders_module = MagicMock() + decoders_module.VideoDecoder.return_value = _make_mock_decoder() + mock_paddle = MagicMock() + mock_paddle.use_compat_guard.return_value = _Guard() + + with ( + patch.dict( + "sys.modules", + {"torchcodec": MagicMock(), "torchcodec.decoders": decoders_module}, + ), + patch(f"{MODULE}.paddle", mock_paddle), + patch(f"{MODULE}.mp", mp_mock), + ): + wrapper = VideoReaderWrapper(GIF89_HEADER) + + mp_mock.VideoFileClip.assert_called_once() + self.assertIsNotNone(wrapper.original_file) + + def test_import_failure_reraises_and_logs(self): + mock_paddle = MagicMock() + mock_paddle.use_compat_guard.return_value = _Guard() + + broken = MagicMock() + type(broken).VideoDecoder = property(lambda self: (_ for _ in ()).throw(RuntimeError("boom"))) + + with ( + patch.dict( + "sys.modules", + {"torchcodec": MagicMock(), "torchcodec.decoders": broken}, + ), + patch(f"{MODULE}.paddle", mock_paddle), + patch(f"{MODULE}.mp", MagicMock()), + patch(f"{MODULE}.logger") as mock_logger, + ): + with self.assertRaises(RuntimeError): + VideoReaderWrapper("/fake/video.mp4") + + mock_logger.error.assert_called_once() + + def test_del_no_original_file(self): + wrapper = object.__new__(VideoReaderWrapper) + wrapper.original_file = None + wrapper.__del__() # should not raise + + def test_del_removes_temp_file(self): + with tempfile.NamedTemporaryFile(delete=False) as f: + tmp_path = f.name + wrapper = object.__new__(VideoReaderWrapper) + wrapper.original_file = tmp_path + wrapper.__del__() + self.assertFalse(os.path.exists(tmp_path)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/input/test_ernie_vl_processor.py b/tests/input/test_ernie_vl_processor.py index c440187667f..62d3599f0d2 100644 --- a/tests/input/test_ernie_vl_processor.py +++ b/tests/input/test_ernie_vl_processor.py @@ -1,4 +1,4 @@ -""" +""" # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -932,8 +932,8 @@ def _mock_video_processing(self, mock_frames=None): """Helper to mock video processing""" if mock_frames is None: mock_frames = [Image.new("RGB", (224, 224)) for _ in range(4)] - mock_read = patch("fastdeploy.input.ernie4_5_vl_processor.process.read_video_decord") - mock_frames_read = patch("fastdeploy.input.ernie4_5_vl_processor.process.read_frames_decord") + mock_read = patch("fastdeploy.input.ernie4_5_vl_processor.process.read_video_paddlecodec") + mock_frames_read = patch("fastdeploy.input.ernie4_5_vl_processor.process.read_frames_paddlecodec") mock_render = patch("fastdeploy.input.ernie4_5_vl_processor.process.render_frame_timestamp") return mock_read, mock_frames_read, mock_render, mock_frames diff --git a/tests/input/test_paddleocr_vl_processor.py b/tests/input/test_paddleocr_vl_processor.py index c93b5a8f97f..ea643d4936f 100644 --- a/tests/input/test_paddleocr_vl_processor.py +++ b/tests/input/test_paddleocr_vl_processor.py @@ -415,7 +415,7 @@ def test_request2ids(self, mock_text2ids, mock_parse_chat): self.assertEqual(result, "final_output") @patch(f"{MODULE_PATH}.sample_frames") - @patch(f"{MODULE_PATH}.read_video_decord") + @patch(f"{MODULE_PATH}.read_video_paddlecodec") def test_load_and_process_video(self, mock_read_video, mock_sample_frames): """测试 _load_and_process_video 的帧采样逻辑""" mock_reader = MagicMock() @@ -542,7 +542,7 @@ def test_text2ids_with_processed_data(self): mock_add_proc_vid.assert_called_once_with(self.dummy_processed_video_cache, ANY, "vid1") @patch(f"{MODULE_PATH}.sample_frames") - @patch(f"{MODULE_PATH}.read_video_decord") + @patch(f"{MODULE_PATH}.read_video_paddlecodec") def test_load_and_process_video_no_sampling(self, mock_read_video, mock_sample_frames): """新增:测试 _load_and_process_video 不采样(fps=-1)""" mock_reader = MagicMock() diff --git a/tests/input/test_process_video.py b/tests/input/test_process_video.py index 1882a21f637..9aa5fb475c1 100644 --- a/tests/input/test_process_video.py +++ b/tests/input/test_process_video.py @@ -27,8 +27,8 @@ import fastdeploy.input.ernie4_5_vl_processor.process_video as process_video_module from fastdeploy.input.ernie4_5_vl_processor.process_video import ( get_frame_indices, - read_frames_decord, - read_video_decord, + read_frames_paddlecodec, + read_video_paddlecodec, ) @@ -93,7 +93,7 @@ def test_read_video_decord_with_wrapper(self): # Patch VideoReaderWrapper in the target module so isinstance checks use our mock class with patch.object(process_video_module, "VideoReaderWrapper", MockVideoReaderWrapper): mock_reader = MockVideoReaderWrapper("dummy", vlen=10, fps=5) - reader, meta, path = read_video_decord(mock_reader, save_to_disk=False) + reader, meta, path = read_video_paddlecodec(mock_reader, save_to_disk=False) self.assertIs(reader, mock_reader) self.assertEqual(meta["fps"], 5) @@ -106,7 +106,7 @@ def test_read_video_decord_with_bytes(self): """Test that bytes input is wrapped into BytesIO and passed to VideoReaderWrapper.""" with patch.object(process_video_module, "VideoReaderWrapper", MockVideoReaderWrapper): data = b"\x00\x01\x02\x03" - reader, meta, path = read_video_decord(data, save_to_disk=False) + reader, meta, path = read_video_paddlecodec(data, save_to_disk=False) self.assertIsInstance(reader, MockVideoReaderWrapper) self.assertEqual(meta["fps"], 6) @@ -261,7 +261,7 @@ def test_basic_read_no_save(self): reader = MockVideoReaderWrapper("dummy", vlen=8, fps=4) meta = {"fps": 4, "duration": 8 / 4, "num_of_frame": 8} - ret, idxs, ts = read_frames_decord( + ret, idxs, ts = read_frames_paddlecodec( video_path="dummy", video_reader=reader, video_meta=meta, @@ -294,7 +294,7 @@ def test_read_and_save_to_disk(self): return_value="det_id", ), ): - ret, idxs, ts = read_frames_decord( + ret, idxs, ts = read_frames_paddlecodec( video_path="dummy", video_reader=reader, video_meta=meta, @@ -316,7 +316,7 @@ def test_fallback_previous_success(self): meta = {"fps": 5, "duration": 10 / 5, "num_of_frame": 10} idxs = [1, 2, 3, 6] - ret, new_idxs, ts = read_frames_decord( + ret, new_idxs, ts = read_frames_paddlecodec( video_path="dummy", video_reader=reader, video_meta=meta, @@ -335,7 +335,7 @@ def test_fallback_next_when_prev_fails(self): meta = {"fps": 5, "duration": 10 / 5, "num_of_frame": 10} idxs = [1, 2, 3, 6] - ret, new_idxs, ts = read_frames_decord( + ret, new_idxs, ts = read_frames_paddlecodec( video_path="dummy", video_reader=reader, video_meta=meta, @@ -371,7 +371,7 @@ def __getitem__(self, idx): # Request 2 frames: index 0 succeeds, index 1 always fails, # and tol=0 disallows searching neighbors -> stack and length assertion should fail with self.assertRaises(AssertionError): - read_frames_decord( + read_frames_paddlecodec( video_path="dummy", video_reader=reader, video_meta=meta, diff --git a/tests/input/test_video_utils.py b/tests/input/test_video_utils.py index 28e6f97e3d9..1fab020c56a 100644 --- a/tests/input/test_video_utils.py +++ b/tests/input/test_video_utils.py @@ -20,7 +20,7 @@ from fastdeploy.input.video_utils import ( _is_gif, - read_video_decord, + read_video_paddlecodec, sample_frames, sample_frames_paddleocr, sample_frames_qwen, @@ -35,16 +35,25 @@ NOT_GIF = b"NOTGIF" + b"\x00" * 10 -def _make_mock_reader(num_frames=100, fps=25.0): - """Return a mock that mimics decord.VideoReader.""" - reader = MagicMock() - reader.__len__ = MagicMock(return_value=num_frames) - reader.get_avg_fps = MagicMock(return_value=fps) - reader.seek = MagicMock(return_value=None) - frame = MagicMock() - frame.asnumpy = MagicMock(return_value=np.zeros((480, 640, 3), dtype=np.uint8)) - reader.__getitem__ = MagicMock(return_value=frame) - return reader +def _make_mock_decoder(num_frames=100, fps=25.0): + """Return a mock that mimics torchcodec VideoDecoder.""" + decoder = MagicMock() + decoder.metadata.num_frames = num_frames + decoder.metadata.average_fps = fps + + def _get_frames_at(indices): + batch = MagicMock() + tensor = MagicMock() + tensor.numpy.return_value = np.zeros((len(indices), 480, 640, 3), dtype=np.uint8) + # data[0] should also expose .numpy() + first = MagicMock() + first.numpy.return_value = np.zeros((480, 640, 3), dtype=np.uint8) + tensor.__getitem__ = MagicMock(return_value=first) + batch.data = tensor + return batch + + decoder.get_frames_at = MagicMock(side_effect=_get_frames_at) + return decoder # --------------------------------------------------------------------------- @@ -67,48 +76,161 @@ def test_short_bytes(self): # --------------------------------------------------------------------------- -# VideoReaderWrapper (mock decord + moviepy) +# VideoReaderWrapper (mock paddlecodec/torchcodec + moviepy) # --------------------------------------------------------------------------- class TestVideoReaderWrapper(unittest.TestCase): - def _make_wrapper(self, video_path, mock_reader=None): - """Construct a VideoReaderWrapper with decord mocked out.""" - from fastdeploy.input.video_utils import VideoReaderWrapper + @staticmethod + def _guard(): + """A no-op context manager standing in for paddle.use_compat_guard.""" + + class _Guard: + def __enter__(self): + return None + + def __exit__(self, *a): + return False - if mock_reader is None: - mock_reader = _make_mock_reader() + return _Guard() - mock_decord = MagicMock() - mock_decord.VideoReader.return_value = mock_reader + def _make_wrapper(self, video_path, mock_decoder=None, decoder_factory=None, moviepy_mock=None): + """Construct a VideoReaderWrapper with torchcodec/paddle mocked out. - with patch.dict("sys.modules", {"decord": mock_decord, "moviepy": MagicMock(), "moviepy.editor": MagicMock()}): + - mock_decoder: decoder instance returned by VideoDecoder(...) + - decoder_factory: optional callable used as VideoDecoder (captures args / + raises). Takes precedence over mock_decoder. + - moviepy_mock: optional mock for the moviepy module (for GIF path). + """ + from fastdeploy.input.video_utils import VideoReaderWrapper + + if mock_decoder is None: + mock_decoder = _make_mock_decoder() + + decoders_module = MagicMock() + if decoder_factory is not None: + decoders_module.VideoDecoder = decoder_factory + else: + decoders_module.VideoDecoder.return_value = mock_decoder + + mock_paddle = MagicMock() + mock_paddle.use_compat_guard.return_value = self._guard() + + moviepy = moviepy_mock or MagicMock() + + with ( + patch.dict( + "sys.modules", + { + "torchcodec": MagicMock(), + "torchcodec.decoders": decoders_module, + "moviepy": moviepy, + "moviepy.editor": moviepy, + }, + ), + patch("fastdeploy.input.video_utils.paddle", mock_paddle), + ): wrapper = VideoReaderWrapper(video_path) - wrapper._reader = mock_reader return wrapper def test_len(self): - reader = _make_mock_reader(num_frames=42) - wrapper = self._make_wrapper("/fake/video.mp4", reader) + decoder = _make_mock_decoder(num_frames=42) + wrapper = self._make_wrapper("/fake/video.mp4", decoder) self.assertEqual(len(wrapper), 42) - def test_getitem_resets_seek(self): - reader = _make_mock_reader() - wrapper = self._make_wrapper("/fake/video.mp4", reader) - _ = wrapper[0] - reader.seek.assert_called_with(0) + def test_getitem_int_returns_numpy_frame(self): + decoder = _make_mock_decoder() + wrapper = self._make_wrapper("/fake/video.mp4", decoder) + frame = wrapper[0] + self.assertIsInstance(frame.asnumpy(), np.ndarray) + # int access uses single-element indices list + decoder.get_frames_at.assert_called_with(indices=[0]) + + def test_getitem_numpy_integer(self): + decoder = _make_mock_decoder() + wrapper = self._make_wrapper("/fake/video.mp4", decoder) + frame = wrapper[np.int64(3)] + self.assertIsInstance(frame.asnumpy(), np.ndarray) + decoder.get_frames_at.assert_called_with(indices=[3]) + + def test_getitem_slice(self): + decoder = _make_mock_decoder(num_frames=10) + wrapper = self._make_wrapper("/fake/video.mp4", decoder) + frames = wrapper[2:5] + self.assertIsInstance(frames.asnumpy(), np.ndarray) + decoder.get_frames_at.assert_called_with(indices=[2, 3, 4]) + + def test_getitem_list(self): + decoder = _make_mock_decoder() + wrapper = self._make_wrapper("/fake/video.mp4", decoder) + frames = wrapper[[1, 4, 7]] + self.assertIsInstance(frames.asnumpy(), np.ndarray) + decoder.get_frames_at.assert_called_with(indices=[1, 4, 7]) def test_get_avg_fps(self): - reader = _make_mock_reader(fps=30.0) - wrapper = self._make_wrapper("/fake/video.mp4", reader) + decoder = _make_mock_decoder(fps=30.0) + wrapper = self._make_wrapper("/fake/video.mp4", decoder) self.assertEqual(wrapper.get_avg_fps(), 30.0) - def test_seek(self): - reader = _make_mock_reader() - wrapper = self._make_wrapper("/fake/video.mp4", reader) - wrapper.seek(5) - reader.seek.assert_called_with(5) + def test_decoder_constructed_with_expected_args(self): + """VideoDecoder must receive the expected keyword arguments.""" + captured = {} + + def factory(path, **kwargs): + captured["path"] = path + captured.update(kwargs) + return _make_mock_decoder() + + self._make_wrapper("/fake/video.mp4", decoder_factory=factory) + + self.assertEqual(captured["path"], "/fake/video.mp4") + self.assertEqual(captured["seek_mode"], "exact") + self.assertEqual(captured["dimension_order"], "NHWC") + self.assertEqual(captured["device"], "cpu") + self.assertIn("num_ffmpeg_threads", captured) + + def test_num_ffmpeg_threads_from_env(self): + """PADDLECODEC_NUM_THREADS env var controls num_ffmpeg_threads.""" + captured = {} + + def factory(path, **kwargs): + captured.update(kwargs) + return _make_mock_decoder() + + with patch.dict("os.environ", {"PADDLECODEC_NUM_THREADS": "4"}): + self._make_wrapper("/fake/video.mp4", decoder_factory=factory) + + self.assertEqual(captured["num_ffmpeg_threads"], 4) + + def test_torchcodec_import_failure_reraises_and_logs(self): + """When torchcodec import fails, the error is logged and re-raised.""" + from fastdeploy.input.video_utils import VideoReaderWrapper + + mock_paddle = MagicMock() + mock_paddle.use_compat_guard.return_value = self._guard() + + # A module whose attribute access raises ImportError mimics a broken backend + broken = MagicMock() + type(broken).VideoDecoder = property(lambda self: (_ for _ in ()).throw(ImportError("boom"))) + + with ( + patch.dict( + "sys.modules", + { + "torchcodec": MagicMock(), + "torchcodec.decoders": broken, + "moviepy": MagicMock(), + "moviepy.editor": MagicMock(), + }, + ), + patch("fastdeploy.input.video_utils.paddle", mock_paddle), + patch("fastdeploy.input.video_utils.logger") as mock_logger, + ): + with self.assertRaises(ImportError): + VideoReaderWrapper("/fake/video.mp4") + + mock_logger.error.assert_called_once() def test_del_no_original_file(self): """__del__ should be a no-op when original_file is None.""" @@ -116,7 +238,7 @@ def test_del_no_original_file(self): wrapper = object.__new__(VideoReaderWrapper) wrapper.original_file = None - wrapper._reader = _make_mock_reader() + wrapper._decoder = _make_mock_decoder() # Should not raise wrapper.__del__() @@ -132,53 +254,61 @@ def test_del_removes_temp_file(self): wrapper = object.__new__(VideoReaderWrapper) wrapper.original_file = tmp_path - wrapper._reader = _make_mock_reader() + wrapper._decoder = _make_mock_decoder() wrapper.__del__() self.assertFalse(os.path.exists(tmp_path)) def test_non_gif_string_path_does_not_set_original_file(self): """Passing a non-GIF string path must NOT set original_file (bug fix).""" - from fastdeploy.input.video_utils import VideoReaderWrapper - - mock_reader = _make_mock_reader() - mock_decord = MagicMock() - mock_decord.VideoReader.return_value = mock_reader - - with patch.dict("sys.modules", {"decord": mock_decord, "moviepy": MagicMock(), "moviepy.editor": MagicMock()}): - wrapper = VideoReaderWrapper("/fake/video.mp4") - + wrapper = self._make_wrapper("/fake/video.mp4") self.assertIsNone(wrapper.original_file) def test_bytesio_non_gif_path_does_not_set_original_file(self): """Passing a BytesIO that is NOT a GIF must not set original_file.""" - from fastdeploy.input.video_utils import VideoReaderWrapper + bio = io.BytesIO(NOT_GIF) + wrapper = self._make_wrapper(bio) + self.assertIsNone(wrapper.original_file) - mock_reader = _make_mock_reader() - mock_decord = MagicMock() - mock_decord.VideoReader.return_value = mock_reader + def test_gif_string_path_converts_to_mp4_and_sets_original_file(self): + """A .gif string path is transcoded to mp4 and tracked for cleanup.""" + moviepy = MagicMock() + clip = moviepy.editor.VideoFileClip.return_value - bio = io.BytesIO(NOT_GIF) - with patch.dict("sys.modules", {"decord": mock_decord, "moviepy": MagicMock(), "moviepy.editor": MagicMock()}): - wrapper = VideoReaderWrapper(bio) + wrapper = self._make_wrapper("/fake/anim.gif", moviepy_mock=moviepy) - self.assertIsNone(wrapper.original_file) + moviepy.editor.VideoFileClip.assert_called_once_with("/fake/anim.gif") + clip.write_videofile.assert_called_once() + clip.close.assert_called_once() + # original_file points at the generated temp mp4 + self.assertIsNotNone(wrapper.original_file) + self.assertTrue(wrapper.original_file.endswith(".mp4")) + def test_gif_bytes_converts_to_mp4(self): + """GIF bytes are written to a temp gif then transcoded to mp4.""" + moviepy = MagicMock() -# --------------------------------------------------------------------------- -# read_video_decord -# --------------------------------------------------------------------------- + wrapper = self._make_wrapper(GIF89_HEADER, moviepy_mock=moviepy) + moviepy.editor.VideoFileClip.assert_called_once() + self.assertIsNotNone(wrapper.original_file) + self.assertTrue(wrapper.original_file.endswith(".mp4")) -class TestReadVideoDecord(unittest.TestCase): - def _patch_wrapper(self, num_frames=100, fps=25.0): - """Return a context manager that replaces VideoReaderWrapper with a mock.""" - from fastdeploy.input import video_utils + def test_gif_bytesio_converts_to_mp4(self): + """GIF content in a BytesIO is transcoded to mp4.""" + moviepy = MagicMock() + + wrapper = self._make_wrapper(io.BytesIO(GIF87_HEADER), moviepy_mock=moviepy) + + moviepy.editor.VideoFileClip.assert_called_once() + self.assertIsNotNone(wrapper.original_file) + + +# --------------------------------------------------------------------------- +# read_video_paddlecodec +# --------------------------------------------------------------------------- - mock_wrapper = MagicMock() - mock_wrapper.__len__ = MagicMock(return_value=num_frames) - mock_wrapper.get_avg_fps = MagicMock(return_value=fps) - return patch.object(video_utils, "VideoReaderWrapper", return_value=mock_wrapper), mock_wrapper +class TestReadVideoPaddlecodec(unittest.TestCase): def test_existing_wrapper_passthrough(self): """Already-wrapped reader is returned as-is.""" from fastdeploy.input.video_utils import VideoReaderWrapper @@ -187,7 +317,7 @@ def test_existing_wrapper_passthrough(self): mock_wrapper.__len__ = MagicMock(return_value=50) mock_wrapper.get_avg_fps = MagicMock(return_value=10.0) - reader, meta, path = read_video_decord(mock_wrapper) + reader, meta, path = read_video_paddlecodec(mock_wrapper) self.assertIs(reader, mock_wrapper) self.assertEqual(meta["num_of_frame"], 50) @@ -211,7 +341,7 @@ def get_avg_fps(self): return 10.0 with patch.object(video_utils, "VideoReaderWrapper", FakeWrapper): - reader, meta, path = read_video_decord(b"fake_video_bytes") + reader, meta, path = read_video_paddlecodec(b"fake_video_bytes") self.assertIsInstance(captured[0], io.BytesIO) @@ -230,7 +360,7 @@ def get_avg_fps(self): return 30.0 with patch.object(video_utils, "VideoReaderWrapper", FakeWrapper): - reader, meta, path = read_video_decord("/fake/path.mp4") + reader, meta, path = read_video_paddlecodec("/fake/path.mp4") self.assertEqual(meta["num_of_frame"], 60) self.assertAlmostEqual(meta["duration"], 2.0) From d10f9376162ccf0a40eb05e1fba984e9848eba3c Mon Sep 17 00:00:00 2001 From: bingoo <1575938147@qq.com> Date: Tue, 30 Jun 2026 11:10:14 +0800 Subject: [PATCH 3/3] limit paddlecodec version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3a2e3c9951a..0d2ea975057 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,7 @@ xlwt visualdl setuptools-scm>=8 prometheus-client -paddlecodec +paddlecodec==0.1.0 moviepy triton crcmod