diff --git a/afy/cam_fomm.py b/afy/cam_fomm.py index 13ac1d4a3..40516894c 100644 --- a/afy/cam_fomm.py +++ b/afy/cam_fomm.py @@ -21,9 +21,12 @@ if _platform == 'darwin': if not opt.is_client: - info('\nOnly remote GPU mode is supported for Mac (use --is-client and --connect options to connect to the server)') - info('Standalone version will be available lately!\n') - exit() + import torch + if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): + info('Apple Silicon detected — using MPS with grid_sample CPU fallback.') + else: + info('\nNo GPU acceleration available on this Mac. Performance may be slow.') + info('Consider using --is-client with a remote GPU server for better performance.\n') def is_new_frame_better(source, driving, predictor): diff --git a/afy/predictor_local.py b/afy/predictor_local.py index 2f7797ec9..10a1cf052 100644 --- a/afy/predictor_local.py +++ b/afy/predictor_local.py @@ -1,5 +1,6 @@ from scipy.spatial import ConvexHull import torch +import torch.nn.functional as F import yaml from modules.keypoint_detector import KPDetector from modules.generator_optim import OcclusionAwareGenerator @@ -8,6 +9,27 @@ import face_alignment +def _patch_grid_sample_for_mps(): + """Monkey-patch grid_sample to run on CPU when inputs are on MPS. + + MPS grid_sample produces corrupted output, so we move tensors to CPU + for that single op and move the result back to MPS. All other ops + still benefit from MPS acceleration. + """ + _original_grid_sample = F.grid_sample + + def _safe_grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corners=None): + if input.is_mps: + result = _original_grid_sample( + input.cpu(), grid.cpu(), + mode=mode, padding_mode=padding_mode, align_corners=align_corners + ) + return result.to('mps') + return _original_grid_sample(input, grid, mode=mode, padding_mode=padding_mode, align_corners=align_corners) + + F.grid_sample = _safe_grid_sample + + def normalize_kp(kp_source, kp_driving, kp_driving_initial, adapt_movement_scale=False, use_relative_movement=False, use_relative_jacobian=False): if adapt_movement_scale: @@ -37,7 +59,15 @@ def to_tensor(a): class PredictorLocal: def __init__(self, config_path, checkpoint_path, relative=False, adapt_movement_scale=False, device=None, enc_downscale=1): - self.device = device or ('cuda' if torch.cuda.is_available() else 'cpu') + if device: + self.device = device + elif torch.cuda.is_available(): + self.device = 'cuda' + elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): + _patch_grid_sample_for_mps() + self.device = 'mps' + else: + self.device = 'cpu' self.relative = relative self.adapt_movement_scale = adapt_movement_scale self.start_frame = None diff --git a/docs/INSTALL_APPLE_SILICON.md b/docs/INSTALL_APPLE_SILICON.md new file mode 100644 index 000000000..da7b41a49 --- /dev/null +++ b/docs/INSTALL_APPLE_SILICON.md @@ -0,0 +1,218 @@ +# Installing Avatarify on Apple Silicon (M1/M2/M3/M4) + +Apple Silicon Macs can run Avatarify locally using PyTorch's [MPS (Metal Performance Shaders)](https://pytorch.org/docs/stable/notes/mps.html) backend for GPU acceleration. This guide walks through the full setup. + +## Prerequisites + +- A Mac with Apple Silicon (M1, M2, M3, M4, or their Pro/Max/Ultra variants) +- macOS 12.3 (Monterey) or later +- [Homebrew](https://brew.sh/) installed +- A working webcam (built-in or external) + +## Step-by-step Installation + +### 1. Install Miniconda (ARM64) + +Install the ARM64 version of Miniconda so that all packages are native to Apple Silicon: + +```bash +brew install --cask miniconda +``` + +Or download the installer manually from [Miniconda](https://docs.conda.io/en/latest/miniconda.html) — choose the **macOS Apple M1 64-bit** (arm64) variant. + +After installation, initialize conda for your shell: + +```bash +conda init zsh # default shell on macOS +# or: conda init bash +``` + +> **Important:** You must **close and reopen your terminal** after running `conda init`. If you skip this step you will see `CondaError: Run 'conda init' before 'conda activate'` when trying to activate environments. Alternatively, run `source ~/.zshrc` (or `source ~/.bashrc`) to reload the shell configuration without restarting. + +### 2. Clone the Repository + +```bash +git clone https://github.com/alievk/avatarify-python.git +cd avatarify-python +``` + +### 3. Create the Conda Environment + +Python 3.7 does not have ARM64 builds. Use Python 3.8 (or 3.10 for best compatibility): + +```bash +conda create -y -n avatarify python=3.10 +conda activate avatarify +``` + +### 4. Install PyTorch with MPS Support + +PyTorch 1.12+ supports MPS acceleration on Apple Silicon. Install the latest stable version: + +```bash +conda install -y pytorch torchvision -c pytorch +``` + +Verify MPS is available: + +```bash +python -c "import torch; print('MPS available:', torch.backends.mps.is_available())" +``` + +You should see `MPS available: True`. If not, make sure you are on macOS 12.3+ and using the ARM64 conda environment. + +### 5. Install Other Dependencies + +Some pinned dependency versions in the original `requirements.txt` do not have ARM64 wheels. Install compatible versions instead: + +```bash +conda install -y numpy scikit-image -c conda-forge +pip install opencv-python +pip install face-alignment +pip install pyzmq msgpack-numpy pyyaml requests +``` + +> **Note:** `pyfakewebcam` is Linux-only (depends on v4l2). On macOS, you will use OBS Studio for virtual camera output (see [Step 8](#8-set-up-virtual-camera-obs-studio)). + +### 6. Clone First Order Motion Model + +```bash +git clone https://github.com/alievk/first-order-model.git fomm +``` + +### 7. Download Network Weights + +Download `vox-adv-cpk.pth.tar` (228 MB) from one of these mirrors: + +- [Mirror 1 (S3)](https://openavatarify.s3-avatarify.com/weights/vox-adv-cpk.pth.tar) +- [Mirror 2 (Yandex Disk)](https://yadi.sk/d/M0FWpz2ExBfgAA) +- [Mirror 3 (Google Drive)](https://drive.google.com/file/d/1coUCdyRXDbpWnEkA99NLNY60mb9dQ_n3/view?usp=sharing) + +Place the file in the `avatarify-python` root directory (**do not unpack it**): + +```bash +# If you downloaded it to ~/Downloads: +mv ~/Downloads/vox-adv-cpk.pth.tar . +``` + +Verify the checksum (optional): + +```bash +md5 vox-adv-cpk.pth.tar +# Expected: 8a45a24037871c045fbb8a6a8aa95ebc +``` + +### 8. Set Up Virtual Camera (OBS Studio) + +Since CamTwist may not work reliably on Apple Silicon, use OBS Studio's built-in virtual camera: + +1. Download and install [OBS Studio](https://obsproject.com/) (the Apple Silicon native build). +2. Open OBS Studio. +3. In the **Sources** section, click **+**, select **Window Capture**, and choose the `avatarify` window. +4. Go to **Edit > Transform > Fit to screen**. +5. Click **Start Virtual Camera** in the bottom-right of OBS. +6. The OBS Virtual Camera will now be available as a camera input in Zoom, Teams, Slack, etc. + +### 9. Run Avatarify + +The existing `run_mac.sh` script enforces remote-only mode for macOS. To run locally on Apple Silicon, launch directly: + +```bash +conda activate avatarify +export PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd)/fomm +python afy/cam_fomm.py \ + --config fomm/config/vox-adv-256.yaml \ + --checkpoint vox-adv-cpk.pth.tar \ + --relative \ + --adapt_scale \ + --no-pad \ + --is-client +``` + +> **Important:** The current codebase requires `--is-client` on macOS. If you want to run fully locally without a remote server, you will need to comment out the Darwin platform check in `afy/cam_fomm.py` (lines 22-26): +> +> ```python +> # if _platform == 'darwin': +> # if not opt.is_client: +> # info('\nOnly remote GPU mode is supported for Mac ...') +> # exit() +> ``` +> +> Then run without `--is-client`: +> +> ```bash +> python afy/cam_fomm.py \ +> --config fomm/config/vox-adv-256.yaml \ +> --checkpoint vox-adv-cpk.pth.tar \ +> --relative \ +> --adapt_scale \ +> --no-pad +> ``` + +Two windows will appear: +- **cam** — shows your face position for calibration +- **avatarify** — shows the animated avatar preview + +See the main [README controls section](README.md#controls) for keyboard shortcuts. + +## Troubleshooting + +### `CondaError: Run 'conda init' before 'conda activate'` + +Run `conda init zsh` (or `conda init bash`), then **close and reopen your terminal**. Alternatively, run `source ~/.zshrc` to reload without restarting. See [Step 1](#1-install-miniconda-arm64). + +### `MPS available: False` + +- Make sure you are on macOS 12.3 or later: `sw_vers` +- Make sure you installed the ARM64 (not x86) conda: `python -c "import platform; print(platform.machine())"` should print `arm64` +- Make sure PyTorch is version 1.12 or later: `python -c "import torch; print(torch.__version__)"` + +### `ModuleNotFoundError: No module named 'fomm'` + +Make sure you set `PYTHONPATH` before running: + +```bash +export PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd)/fomm +``` + +### OpenCV errors on import + +If `opencv-python` fails to import, try: + +```bash +pip uninstall opencv-python +pip install opencv-python-headless +``` + +### Low FPS / poor performance + +- MPS acceleration is available but may not match CUDA performance. Expect approximately 5-15 FPS depending on your chip. +- Close other GPU-intensive applications. +- M1 Pro/Max/Ultra and newer chips will perform better than base M1. + +### face-alignment installation fails + +```bash +pip install face-alignment --no-deps +pip install scipy dlib +``` + +If `dlib` fails to build, install CMake first: + +```bash +brew install cmake +pip install dlib +``` + +## Performance Expectations + +| Chip | Approximate FPS | +| --- | --- | +| M1 | 5-10 | +| M1 Pro/Max | 10-15 | +| M2 / M2 Pro | 10-18 | +| M3 / M3 Pro | 12-20 | +| M4 / M4 Pro | 15-25 | + +*These are rough estimates. Actual performance depends on resolution, background processes, and PyTorch/MPS optimizations at the time of use.* diff --git a/docs/README.md b/docs/README.md index cafdab79d..f074cb2f6 100644 --- a/docs/README.md +++ b/docs/README.md @@ -10,6 +10,7 @@ - [Download network weights](#download-network-weights) - [Linux](#linux) - [Mac](#mac) + - [Mac (Apple Silicon)](#mac-apple-silicon) - [Windows](#windows) - [Remote GPU](#remote-gpu) - [Docker](#docker) @@ -91,6 +92,10 @@ bash scripts/install_mac.sh ``` 3. Download and install [CamTwist](http://camtwiststudio.com) from [here](http://camtwiststudio.com/download). It's easy. +#### Mac (Apple Silicon) + +If you have a Mac with an M1, M2, M3, or M4 chip, you can run Avatarify locally using PyTorch's MPS backend for GPU acceleration. See the dedicated **[Apple Silicon Installation Guide](INSTALL_APPLE_SILICON.md)** for full step-by-step instructions. + #### Windows