From a45457c5e63a31d3e833e2399672c4d48278a5f0 Mon Sep 17 00:00:00 2001 From: kaydenlms Date: Mon, 30 Mar 2026 12:48:39 -0400 Subject: [PATCH 1/8] Add Runtimes section with llama.cpp and MLX pages, add Convert to MLX guide - Add new Runtimes section between Getting Started and MCP with pages for llama.cpp and MLX runtimes - Add Convert Models to MLX page under modelyaml section - Renumber existing sections to accommodate new Runtimes section Co-Authored-By: Claude Opus 4.6 (1M context) --- 0_app/2_runtimes/index.md | 12 +++ 0_app/2_runtimes/llama-cpp.md | 41 ++++++++ 0_app/2_runtimes/mlx.md | 48 ++++++++++ 0_app/{2_mcp => 3_mcp}/deeplink.md | 0 0_app/{2_mcp => 3_mcp}/index.md | 0 0_app/4_modelyaml/convert-to-mlx.md | 93 +++++++++++++++++++ 0_app/{3_modelyaml => 4_modelyaml}/index.md | 0 0_app/{3_modelyaml => 4_modelyaml}/publish.md | 0 0_app/{3_presets => 4_presets}/import.md | 0 0_app/{3_presets => 4_presets}/index.md | 0 0_app/{3_presets => 4_presets}/publish.md | 0 0_app/{3_presets => 4_presets}/pull.md | 0 0_app/{3_presets => 4_presets}/push.md | 0 .../{5_advanced => 6_advanced}/_branching.md | 0 0_app/{5_advanced => 6_advanced}/_context.md | 0 0_app/{5_advanced => 6_advanced}/_errors.md | 0 0_app/{5_advanced => 6_advanced}/_vision.md | 0 .../import-model.md | 0 .../parallel-requests.md | 0 0_app/{5_advanced => 6_advanced}/per-model.md | 0 .../prompt-template.md | 0 .../speculative-decoding.md | 0 .../languages.md | 0 .../modes.md | 0 .../themes.md | 0 25 files changed, 194 insertions(+) create mode 100644 0_app/2_runtimes/index.md create mode 100644 0_app/2_runtimes/llama-cpp.md create mode 100644 0_app/2_runtimes/mlx.md rename 0_app/{2_mcp => 3_mcp}/deeplink.md (100%) rename 0_app/{2_mcp => 3_mcp}/index.md (100%) create mode 100644 0_app/4_modelyaml/convert-to-mlx.md rename 0_app/{3_modelyaml => 4_modelyaml}/index.md (100%) rename 0_app/{3_modelyaml => 4_modelyaml}/publish.md (100%) rename 0_app/{3_presets => 4_presets}/import.md (100%) rename 0_app/{3_presets => 4_presets}/index.md (100%) rename 0_app/{3_presets => 4_presets}/publish.md (100%) rename 0_app/{3_presets => 4_presets}/pull.md (100%) rename 0_app/{3_presets => 4_presets}/push.md (100%) rename 0_app/{5_advanced => 6_advanced}/_branching.md (100%) rename 0_app/{5_advanced => 6_advanced}/_context.md (100%) rename 0_app/{5_advanced => 6_advanced}/_errors.md (100%) rename 0_app/{5_advanced => 6_advanced}/_vision.md (100%) rename 0_app/{5_advanced => 6_advanced}/import-model.md (100%) rename 0_app/{5_advanced => 6_advanced}/parallel-requests.md (100%) rename 0_app/{5_advanced => 6_advanced}/per-model.md (100%) rename 0_app/{5_advanced => 6_advanced}/prompt-template.md (100%) rename 0_app/{5_advanced => 6_advanced}/speculative-decoding.md (100%) rename 0_app/{6_user-interface => 7_user-interface}/languages.md (100%) rename 0_app/{6_user-interface => 7_user-interface}/modes.md (100%) rename 0_app/{6_user-interface => 7_user-interface}/themes.md (100%) diff --git a/0_app/2_runtimes/index.md b/0_app/2_runtimes/index.md new file mode 100644 index 0000000..f145fde --- /dev/null +++ b/0_app/2_runtimes/index.md @@ -0,0 +1,12 @@ +--- +title: Runtimes +description: Learn about the inference runtimes available in LM Studio +index: 1 +--- + +LM Studio supports multiple inference runtimes for running large language models locally. Each runtime has different strengths depending on your hardware and use case. + +# Available Runtimes + +- [**llama.cpp**](runtimes/llama-cpp) – A widely-used, cross-platform runtime for running GGUF models efficiently on CPUs and GPUs. +- [**MLX**](runtimes/mlx) – An Apple Silicon-optimized runtime for running models with high performance on Mac. diff --git a/0_app/2_runtimes/llama-cpp.md b/0_app/2_runtimes/llama-cpp.md new file mode 100644 index 0000000..54496ac --- /dev/null +++ b/0_app/2_runtimes/llama-cpp.md @@ -0,0 +1,41 @@ +--- +title: What is llama.cpp +description: Learn about the llama.cpp inference runtime in LM Studio +index: 2 +--- + +## What is llama.cpp + +If you’ve tinkered with open source models, you’ve likely heard of [llama.cpp](https://github.com/ggml-org/llama.cpp). llama.cpp is an open-source inference engine written in C++, developed by Georgi Gerganov in 2023. The goal of this project is to make LLM inference accessible across a wide range of hardware, with minimal setup and without compromising on performance. LM Studio integrates llama.cpp under the hood as one of our primary runtime engines. + + +### Relevant Terminology + +- **GGUF**: GGUF (GPT-Generated Unified Format) is a file format for packaging a model with its weights and all the metadata needed to run them into a single, portable file. Any model in GGUF format can be loaded and run with the llama.cpp engine. +- **Quantization**: Today, most foundation open source models are still very large in size — a 7B parameter model at full float32 precision is around 28GB. In order to run these models on consumer hardware with limited memory, llama.cpp leverages quantization – a method to reduce model size with minimal quality loss – and stores quantized versions in the GGUF format. + + +## llama.cpp in LM Studio + +In the LM Studio app, open Runtime settings (⌘⇧R) to see llama.cpp as the runtime selection for GGUF. When downloading models in LM Studio, you may see -Q4_K_M.gguf in the model card. Q4_K_M.gguf indicates a Q4 quantized version of the model stored in GGUF format. + +## Download a GGUF model + +**From the GUI** + +In the app, head to the Model Search tab and filter by GGUF to see only models in that format. + +**Using the CLI** + +From the terminal, use `lms get` and include the `--gguf` flag to only show models in the GGUF format: + +```bash +lms get --gguf +``` + +## Converting models to GGUF format + +Many popular models already have GGUF versions available. Before converting manually, check if the model is available in GGUF format in [lmstudio-community](https://huggingface.co/lmstudio-community) or by using the `hf.co/models?library=gguf` tag on HuggingFace. + +Models stored locally in other formats can be converted to GGUF to run with llama.cpp. Non-GGUF models can be converted using the Python scripts in the [llama.cpp repository](https://github.com/ggml-org/llama.cpp). + diff --git a/0_app/2_runtimes/mlx.md b/0_app/2_runtimes/mlx.md new file mode 100644 index 0000000..90dbc63 --- /dev/null +++ b/0_app/2_runtimes/mlx.md @@ -0,0 +1,48 @@ +--- +title: What is MLX +description: Learn about the MLX inference runtime in LM Studio +index: 3 +--- + +## What is MLX + +[MLX](https://github.com/ml-explore/mlx) is a machine learning framework and library developed by Apple to optimize running ML workloads on Apple Silicon. [mlx-lm](https://github.com/ml-explore/mlx-lm/tree/main) and [mlx-vlm](https://github.com/Blaizzy/mlx-vlm) are packages built on top of the MLX framework that focuses specifically on performing inference with LLMs or VLMs, respectively. These packages maximize the speed and efficiency of running LLMs on Apple Silicon. + +LM Studio integrates both mlx-lm and mlx-vlm into its MLX runtime backend. Because MLX is built by Apple for Apple hardware, it is a Mac-only runtime and is not available on other platforms. + +## MLX and unified memory + +Unlike traditional systems where CPU and GPU maintain separate memory pools, Apple Silicon uses a unified memory architecture. Both GGUF and MLX take advantage of this hardware design, but MLX is built specifically for Apple Silicon and typically results in better performance (faster inference speeds) on Macs compared to GGUF. + +## MLX in LM Studio + +If you’re using LM Studio on a Mac, you will see MLX as an available runtime under Runtime Settings (⌘⇧R). + +When browsing models in LM Studio, you may see entries like `lmstudio-community/Qwen3-Coder-Next-MLX-4bit`. The LM Studio team maintains our own MLX conversions for popular open source models on Hugging Face at [lmstudio-community](https://huggingface.co/lmstudio-community). + +### Download an MLX model + +**From the GUI** + +In the app, head to the Model Search tab and filter by MLX to see only models in that format. + +**Using the CLI** + +From the terminal, use `lms get` and include the `--mlx` flag to only show MLX models: + +```bash +lms get --mlx +``` +Read more about `lms get` [here](https://lmstudio.ai/docs/cli/local-models/get). + +## Choosing between MLX and llama.cpp + +Note that the MLX runtime only supports models in MLX format, and the llama.cpp runtime only supports models in GGUF format. + +If you're on a Mac device, our Staff Picks will recommend the MLX model if it’s available as an option, and otherwise exclude the option if it is not recommended over GGUF. + +## Converting models to MLX format + +Many popular models already have MLX versions available. Before converting manually, check out [lmstudio-community](https://huggingface.co/lmstudio-community) or [mlx-community](https://huggingface.co/mlx-community) on HuggingFace to see if the model you want is available in MLX format. + +Models stored locally in other formats can be converted to MLX format to use with the MLX runtime. See our guide on [converting models to MLX format](../modelyaml/convert-to-mlx). diff --git a/0_app/2_mcp/deeplink.md b/0_app/3_mcp/deeplink.md similarity index 100% rename from 0_app/2_mcp/deeplink.md rename to 0_app/3_mcp/deeplink.md diff --git a/0_app/2_mcp/index.md b/0_app/3_mcp/index.md similarity index 100% rename from 0_app/2_mcp/index.md rename to 0_app/3_mcp/index.md diff --git a/0_app/4_modelyaml/convert-to-mlx.md b/0_app/4_modelyaml/convert-to-mlx.md new file mode 100644 index 0000000..1a14724 --- /dev/null +++ b/0_app/4_modelyaml/convert-to-mlx.md @@ -0,0 +1,93 @@ +--- +title: Convert Models to MLX +description: Convert models to MLX format for use with the MLX runtime in LM Studio +index: 6 +--- + +## Convert a model to MLX format +Convert models to MLX to use with LM Studio’s MLX runtime backend. + +LM Studio’s MLX capability integrates two different implementations, **mlx-lm** and **mlx-vlm**. The former should be used for text-only models while the latter should be used for vision models. + + +## Prerequisites + +You’ll need a Mac with Apple Silicon (M-series) + +### Install the packages + +**Text-only models** + +```bash +pip install mlx-lm +``` + +**Vision models** + +```bash +pip install mlx-vlm +pip3 install torch torchvision +``` + +### Run the conversion scripts +Use the conversion package that corresponds to your model type below. + + +## Convert a Hugging Face model to MLX + +### Text-only models + +```bash +python -m mlx_lm convert + --hf-path \ + --mlx-path /path/to/output/mlx-model +``` + +Example command to convert Qwen3-0.6B: + +```bash +python -m mlx_lm convert --hf-path Qwen/Qwen3-0.6B +``` + +### Vision models + +```bash +mlx_vlm.convert + --hf-path \ + --mlx-path /path/to/output/mlx-model +``` + +Example command to convert Qwen2.5-VL-3B-Instruct: + +```bash +mlx_vlm.convert --hf-path Qwen/Qwen2.5-VL-3B-Instruct +``` + +Note the following flags to include for either conversion tool: +`--hf-path` is the path to the Hugging Face model +`--mlx-path` is where you’d like the converted model to be saved + +To directly place the converted model in LM Studio’s model directory, we recommend setting `--mlx-path` to the following: + +```bash +~/.lmstudio/models/publisher/modelName +``` + +For the Qwen2.5-VL-3B-Instruct model above, an example command would look like: + +```bash +mlx_vlm.convert --hf-path Qwen/Qwen2.5-VL-3B-Instruct –-mlx-path ~/.lmstudio/models/publisher/Qwen2.5-VL-3B-Instruct-MLX +``` + +This way, you’ll see the model in LM Studio as soon as the conversion is complete – no import step needed. + +Use the `--help` flag on either command (e.g., `python -m mlx_vlm.convert --help`) to view advanced conversion options such as quantization. + +## Use the converted model in LM Studio + +**Note**: Skip this step if you placed the conversion output directly in LM Studio’s model directory. + +If you do not include the --mlx-path flag in your command, the tool automatically creates a folder called mlx_model in the folder you were currently in. In this case, you’ll need to manually import the converted MLX model to LM Studio by placing it in LM Studio’s expected models directory structure. By default, LM Studio stores models in `~/.lmstudio/models/`. + + +See our [MLX overview](../runtimes/mlx) for more information about using MLX models in LM Studio. diff --git a/0_app/3_modelyaml/index.md b/0_app/4_modelyaml/index.md similarity index 100% rename from 0_app/3_modelyaml/index.md rename to 0_app/4_modelyaml/index.md diff --git a/0_app/3_modelyaml/publish.md b/0_app/4_modelyaml/publish.md similarity index 100% rename from 0_app/3_modelyaml/publish.md rename to 0_app/4_modelyaml/publish.md diff --git a/0_app/3_presets/import.md b/0_app/4_presets/import.md similarity index 100% rename from 0_app/3_presets/import.md rename to 0_app/4_presets/import.md diff --git a/0_app/3_presets/index.md b/0_app/4_presets/index.md similarity index 100% rename from 0_app/3_presets/index.md rename to 0_app/4_presets/index.md diff --git a/0_app/3_presets/publish.md b/0_app/4_presets/publish.md similarity index 100% rename from 0_app/3_presets/publish.md rename to 0_app/4_presets/publish.md diff --git a/0_app/3_presets/pull.md b/0_app/4_presets/pull.md similarity index 100% rename from 0_app/3_presets/pull.md rename to 0_app/4_presets/pull.md diff --git a/0_app/3_presets/push.md b/0_app/4_presets/push.md similarity index 100% rename from 0_app/3_presets/push.md rename to 0_app/4_presets/push.md diff --git a/0_app/5_advanced/_branching.md b/0_app/6_advanced/_branching.md similarity index 100% rename from 0_app/5_advanced/_branching.md rename to 0_app/6_advanced/_branching.md diff --git a/0_app/5_advanced/_context.md b/0_app/6_advanced/_context.md similarity index 100% rename from 0_app/5_advanced/_context.md rename to 0_app/6_advanced/_context.md diff --git a/0_app/5_advanced/_errors.md b/0_app/6_advanced/_errors.md similarity index 100% rename from 0_app/5_advanced/_errors.md rename to 0_app/6_advanced/_errors.md diff --git a/0_app/5_advanced/_vision.md b/0_app/6_advanced/_vision.md similarity index 100% rename from 0_app/5_advanced/_vision.md rename to 0_app/6_advanced/_vision.md diff --git a/0_app/5_advanced/import-model.md b/0_app/6_advanced/import-model.md similarity index 100% rename from 0_app/5_advanced/import-model.md rename to 0_app/6_advanced/import-model.md diff --git a/0_app/5_advanced/parallel-requests.md b/0_app/6_advanced/parallel-requests.md similarity index 100% rename from 0_app/5_advanced/parallel-requests.md rename to 0_app/6_advanced/parallel-requests.md diff --git a/0_app/5_advanced/per-model.md b/0_app/6_advanced/per-model.md similarity index 100% rename from 0_app/5_advanced/per-model.md rename to 0_app/6_advanced/per-model.md diff --git a/0_app/5_advanced/prompt-template.md b/0_app/6_advanced/prompt-template.md similarity index 100% rename from 0_app/5_advanced/prompt-template.md rename to 0_app/6_advanced/prompt-template.md diff --git a/0_app/5_advanced/speculative-decoding.md b/0_app/6_advanced/speculative-decoding.md similarity index 100% rename from 0_app/5_advanced/speculative-decoding.md rename to 0_app/6_advanced/speculative-decoding.md diff --git a/0_app/6_user-interface/languages.md b/0_app/7_user-interface/languages.md similarity index 100% rename from 0_app/6_user-interface/languages.md rename to 0_app/7_user-interface/languages.md diff --git a/0_app/6_user-interface/modes.md b/0_app/7_user-interface/modes.md similarity index 100% rename from 0_app/6_user-interface/modes.md rename to 0_app/7_user-interface/modes.md diff --git a/0_app/6_user-interface/themes.md b/0_app/7_user-interface/themes.md similarity index 100% rename from 0_app/6_user-interface/themes.md rename to 0_app/7_user-interface/themes.md From d54585918d69f5d29c697d79f2a9b0a6f2d69272 Mon Sep 17 00:00:00 2001 From: kaydenlms Date: Mon, 30 Mar 2026 13:04:43 -0400 Subject: [PATCH 2/8] Add images to llama.cpp and MLX runtime pages Co-Authored-By: Claude Opus 4.6 (1M context) --- 0_app/2_runtimes/index.md | 2 +- 0_app/2_runtimes/llama-cpp.md | 8 +++++++- 0_app/2_runtimes/mlx.md | 4 ++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/0_app/2_runtimes/index.md b/0_app/2_runtimes/index.md index f145fde..7b4a9b5 100644 --- a/0_app/2_runtimes/index.md +++ b/0_app/2_runtimes/index.md @@ -6,7 +6,7 @@ index: 1 LM Studio supports multiple inference runtimes for running large language models locally. Each runtime has different strengths depending on your hardware and use case. -# Available Runtimes +## Available Runtimes - [**llama.cpp**](runtimes/llama-cpp) – A widely-used, cross-platform runtime for running GGUF models efficiently on CPUs and GPUs. - [**MLX**](runtimes/mlx) – An Apple Silicon-optimized runtime for running models with high performance on Mac. diff --git a/0_app/2_runtimes/llama-cpp.md b/0_app/2_runtimes/llama-cpp.md index 54496ac..a1bbd77 100644 --- a/0_app/2_runtimes/llama-cpp.md +++ b/0_app/2_runtimes/llama-cpp.md @@ -17,7 +17,11 @@ If you’ve tinkered with open source models, you’ve likely heard of [llama.cp ## llama.cpp in LM Studio -In the LM Studio app, open Runtime settings (⌘⇧R) to see llama.cpp as the runtime selection for GGUF. When downloading models in LM Studio, you may see -Q4_K_M.gguf in the model card. Q4_K_M.gguf indicates a Q4 quantized version of the model stored in GGUF format. +In the LM Studio app, open Runtime settings (⌘⇧R) to see llama.cpp as the runtime selection for GGUF. + + + + When downloading models in LM Studio, you may see -Q4_K_M.gguf in the model card. Q4_K_M.gguf indicates a Q4 quantized version of the model stored in GGUF format. ## Download a GGUF model @@ -25,6 +29,8 @@ In the LM Studio app, open Runtime settings (⌘⇧R) to see llama.cpp as the ru In the app, head to the Model Search tab and filter by GGUF to see only models in that format. + + **Using the CLI** From the terminal, use `lms get` and include the `--gguf` flag to only show models in the GGUF format: diff --git a/0_app/2_runtimes/mlx.md b/0_app/2_runtimes/mlx.md index 90dbc63..d187d6a 100644 --- a/0_app/2_runtimes/mlx.md +++ b/0_app/2_runtimes/mlx.md @@ -18,6 +18,8 @@ Unlike traditional systems where CPU and GPU maintain separate memory pools, App If you’re using LM Studio on a Mac, you will see MLX as an available runtime under Runtime Settings (⌘⇧R). + + When browsing models in LM Studio, you may see entries like `lmstudio-community/Qwen3-Coder-Next-MLX-4bit`. The LM Studio team maintains our own MLX conversions for popular open source models on Hugging Face at [lmstudio-community](https://huggingface.co/lmstudio-community). ### Download an MLX model @@ -26,6 +28,8 @@ When browsing models in LM Studio, you may see entries like `lmstudio-community/ In the app, head to the Model Search tab and filter by MLX to see only models in that format. + + **Using the CLI** From the terminal, use `lms get` and include the `--mlx` flag to only show MLX models: From 1e10ff0247f732bce4bd4ef3d2f3757ee67b1fca Mon Sep 17 00:00:00 2001 From: kaydenlms Date: Mon, 30 Mar 2026 14:02:44 -0400 Subject: [PATCH 3/8] Fix code block formatting in Convert to MLX guide - Add line continuations for multi-line commands - Replace unicode en-dash with ASCII hyphens in --mlx-path flag Co-Authored-By: Claude Opus 4.6 (1M context) --- 0_app/4_modelyaml/convert-to-mlx.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/0_app/4_modelyaml/convert-to-mlx.md b/0_app/4_modelyaml/convert-to-mlx.md index 1a14724..acc142c 100644 --- a/0_app/4_modelyaml/convert-to-mlx.md +++ b/0_app/4_modelyaml/convert-to-mlx.md @@ -38,8 +38,8 @@ Use the conversion package that corresponds to your model type below. ### Text-only models ```bash -python -m mlx_lm convert - --hf-path \ +python -m mlx_lm convert \ + --hf-path \ --mlx-path /path/to/output/mlx-model ``` @@ -52,8 +52,8 @@ python -m mlx_lm convert --hf-path Qwen/Qwen3-0.6B ### Vision models ```bash -mlx_vlm.convert - --hf-path \ +mlx_vlm.convert \ + --hf-path \ --mlx-path /path/to/output/mlx-model ``` @@ -76,7 +76,7 @@ To directly place the converted model in LM Studio’s model directory, we recom For the Qwen2.5-VL-3B-Instruct model above, an example command would look like: ```bash -mlx_vlm.convert --hf-path Qwen/Qwen2.5-VL-3B-Instruct –-mlx-path ~/.lmstudio/models/publisher/Qwen2.5-VL-3B-Instruct-MLX +mlx_vlm.convert --hf-path Qwen/Qwen2.5-VL-3B-Instruct --mlx-path ~/.lmstudio/models/publisher/Qwen2.5-VL-3B-Instruct-MLX ``` This way, you’ll see the model in LM Studio as soon as the conversion is complete – no import step needed. From 78647895ce4fa88cf591180bced580b1b00424dc Mon Sep 17 00:00:00 2001 From: kaydenlms Date: Tue, 7 Apr 2026 15:23:41 -0400 Subject: [PATCH 4/8] Move llama-cpp and mlx docs from Runtimes to Advanced section, hide convert-to-mlx - Remove 2_runtimes section entirely - Move llama-cpp.md and mlx.md under 6_advanced - Rename convert-to-mlx.md to _convert-to-mlx.md to hide article - Update internal link reference from runtimes/mlx to advanced/mlx Co-Authored-By: Claude Opus 4.6 (1M context) --- 0_app/2_runtimes/index.md | 12 ------------ .../{convert-to-mlx.md => _convert-to-mlx.md} | 2 +- 0_app/{2_runtimes => 6_advanced}/llama-cpp.md | 0 0_app/{2_runtimes => 6_advanced}/mlx.md | 0 4 files changed, 1 insertion(+), 13 deletions(-) delete mode 100644 0_app/2_runtimes/index.md rename 0_app/4_modelyaml/{convert-to-mlx.md => _convert-to-mlx.md} (97%) rename 0_app/{2_runtimes => 6_advanced}/llama-cpp.md (100%) rename 0_app/{2_runtimes => 6_advanced}/mlx.md (100%) diff --git a/0_app/2_runtimes/index.md b/0_app/2_runtimes/index.md deleted file mode 100644 index 7b4a9b5..0000000 --- a/0_app/2_runtimes/index.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: Runtimes -description: Learn about the inference runtimes available in LM Studio -index: 1 ---- - -LM Studio supports multiple inference runtimes for running large language models locally. Each runtime has different strengths depending on your hardware and use case. - -## Available Runtimes - -- [**llama.cpp**](runtimes/llama-cpp) – A widely-used, cross-platform runtime for running GGUF models efficiently on CPUs and GPUs. -- [**MLX**](runtimes/mlx) – An Apple Silicon-optimized runtime for running models with high performance on Mac. diff --git a/0_app/4_modelyaml/convert-to-mlx.md b/0_app/4_modelyaml/_convert-to-mlx.md similarity index 97% rename from 0_app/4_modelyaml/convert-to-mlx.md rename to 0_app/4_modelyaml/_convert-to-mlx.md index acc142c..a48fb07 100644 --- a/0_app/4_modelyaml/convert-to-mlx.md +++ b/0_app/4_modelyaml/_convert-to-mlx.md @@ -90,4 +90,4 @@ Use the `--help` flag on either command (e.g., `python -m mlx_vlm.convert --help If you do not include the --mlx-path flag in your command, the tool automatically creates a folder called mlx_model in the folder you were currently in. In this case, you’ll need to manually import the converted MLX model to LM Studio by placing it in LM Studio’s expected models directory structure. By default, LM Studio stores models in `~/.lmstudio/models/`. -See our [MLX overview](../runtimes/mlx) for more information about using MLX models in LM Studio. +See our [MLX overview](../advanced/mlx) for more information about using MLX models in LM Studio. diff --git a/0_app/2_runtimes/llama-cpp.md b/0_app/6_advanced/llama-cpp.md similarity index 100% rename from 0_app/2_runtimes/llama-cpp.md rename to 0_app/6_advanced/llama-cpp.md diff --git a/0_app/2_runtimes/mlx.md b/0_app/6_advanced/mlx.md similarity index 100% rename from 0_app/2_runtimes/mlx.md rename to 0_app/6_advanced/mlx.md From 8c7510a53e501ef9fa80117a5e8adb5048b7c5ac Mon Sep 17 00:00:00 2001 From: kaydenlms Date: Tue, 7 Apr 2026 15:28:19 -0400 Subject: [PATCH 5/8] Rename 'runtime' to 'engine' in llama.cpp, MLX, and convert-to-mlx docs Co-Authored-By: Claude Opus 4.6 (1M context) --- 0_app/4_modelyaml/_convert-to-mlx.md | 4 ++-- 0_app/6_advanced/llama-cpp.md | 6 +++--- 0_app/6_advanced/mlx.md | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/0_app/4_modelyaml/_convert-to-mlx.md b/0_app/4_modelyaml/_convert-to-mlx.md index a48fb07..75388d2 100644 --- a/0_app/4_modelyaml/_convert-to-mlx.md +++ b/0_app/4_modelyaml/_convert-to-mlx.md @@ -1,11 +1,11 @@ --- title: Convert Models to MLX -description: Convert models to MLX format for use with the MLX runtime in LM Studio +description: Convert models to MLX format for use with the MLX engine in LM Studio index: 6 --- ## Convert a model to MLX format -Convert models to MLX to use with LM Studio’s MLX runtime backend. +Convert models to MLX to use with LM Studio’s MLX engine. LM Studio’s MLX capability integrates two different implementations, **mlx-lm** and **mlx-vlm**. The former should be used for text-only models while the latter should be used for vision models. diff --git a/0_app/6_advanced/llama-cpp.md b/0_app/6_advanced/llama-cpp.md index a1bbd77..042ae30 100644 --- a/0_app/6_advanced/llama-cpp.md +++ b/0_app/6_advanced/llama-cpp.md @@ -1,12 +1,12 @@ --- title: What is llama.cpp -description: Learn about the llama.cpp inference runtime in LM Studio +description: Learn about the llama.cpp inference engine in LM Studio index: 2 --- ## What is llama.cpp -If you’ve tinkered with open source models, you’ve likely heard of [llama.cpp](https://github.com/ggml-org/llama.cpp). llama.cpp is an open-source inference engine written in C++, developed by Georgi Gerganov in 2023. The goal of this project is to make LLM inference accessible across a wide range of hardware, with minimal setup and without compromising on performance. LM Studio integrates llama.cpp under the hood as one of our primary runtime engines. +If you’ve tinkered with open source models, you’ve likely heard of [llama.cpp](https://github.com/ggml-org/llama.cpp). llama.cpp is an open-source inference engine written in C++, developed by Georgi Gerganov in 2023. The goal of this project is to make LLM inference accessible across a wide range of hardware, with minimal setup and without compromising on performance. LM Studio integrates llama.cpp under the hood as one of our primary engines. ### Relevant Terminology @@ -19,7 +19,7 @@ If you’ve tinkered with open source models, you’ve likely heard of [llama.cp In the LM Studio app, open Runtime settings (⌘⇧R) to see llama.cpp as the runtime selection for GGUF. - + When downloading models in LM Studio, you may see -Q4_K_M.gguf in the model card. Q4_K_M.gguf indicates a Q4 quantized version of the model stored in GGUF format. diff --git a/0_app/6_advanced/mlx.md b/0_app/6_advanced/mlx.md index d187d6a..ed3914a 100644 --- a/0_app/6_advanced/mlx.md +++ b/0_app/6_advanced/mlx.md @@ -1,6 +1,6 @@ --- title: What is MLX -description: Learn about the MLX inference runtime in LM Studio +description: Learn about the MLX inference engine in LM Studio index: 3 --- @@ -8,7 +8,7 @@ index: 3 [MLX](https://github.com/ml-explore/mlx) is a machine learning framework and library developed by Apple to optimize running ML workloads on Apple Silicon. [mlx-lm](https://github.com/ml-explore/mlx-lm/tree/main) and [mlx-vlm](https://github.com/Blaizzy/mlx-vlm) are packages built on top of the MLX framework that focuses specifically on performing inference with LLMs or VLMs, respectively. These packages maximize the speed and efficiency of running LLMs on Apple Silicon. -LM Studio integrates both mlx-lm and mlx-vlm into its MLX runtime backend. Because MLX is built by Apple for Apple hardware, it is a Mac-only runtime and is not available on other platforms. +LM Studio integrates both mlx-lm and mlx-vlm into its MLX engine. Because MLX is built by Apple for Apple hardware, it is a Mac-only engine and is not available on other platforms. ## MLX and unified memory @@ -16,9 +16,9 @@ Unlike traditional systems where CPU and GPU maintain separate memory pools, App ## MLX in LM Studio -If you’re using LM Studio on a Mac, you will see MLX as an available runtime under Runtime Settings (⌘⇧R). +If you’re using LM Studio on a Mac, you will see MLX as an available engine under Runtime Settings (⌘⇧R). - + When browsing models in LM Studio, you may see entries like `lmstudio-community/Qwen3-Coder-Next-MLX-4bit`. The LM Studio team maintains our own MLX conversions for popular open source models on Hugging Face at [lmstudio-community](https://huggingface.co/lmstudio-community). @@ -41,7 +41,7 @@ Read more about `lms get` [here](https://lmstudio.ai/docs/cli/local-models/get). ## Choosing between MLX and llama.cpp -Note that the MLX runtime only supports models in MLX format, and the llama.cpp runtime only supports models in GGUF format. +Note that the MLX engine only supports models in MLX format, and the llama.cpp engine only supports models in GGUF format. If you're on a Mac device, our Staff Picks will recommend the MLX model if it’s available as an option, and otherwise exclude the option if it is not recommended over GGUF. @@ -49,4 +49,4 @@ If you're on a Mac device, our Staff Picks will recommend the MLX model if it’ Many popular models already have MLX versions available. Before converting manually, check out [lmstudio-community](https://huggingface.co/lmstudio-community) or [mlx-community](https://huggingface.co/mlx-community) on HuggingFace to see if the model you want is available in MLX format. -Models stored locally in other formats can be converted to MLX format to use with the MLX runtime. See our guide on [converting models to MLX format](../modelyaml/convert-to-mlx). +Models stored locally in other formats can be converted to MLX format to use with the MLX engine. See our guide on [converting models to MLX format](../modelyaml/convert-to-mlx). From b3a6cdeb44692ea132dea741138b0ced18bbe567 Mon Sep 17 00:00:00 2001 From: kaydenlms Date: Tue, 7 Apr 2026 15:30:28 -0400 Subject: [PATCH 6/8] Add link to mlx-engine GitHub repo in MLX docs Co-Authored-By: Claude Opus 4.6 (1M context) --- 0_app/6_advanced/mlx.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/0_app/6_advanced/mlx.md b/0_app/6_advanced/mlx.md index ed3914a..c7c1ab7 100644 --- a/0_app/6_advanced/mlx.md +++ b/0_app/6_advanced/mlx.md @@ -8,7 +8,7 @@ index: 3 [MLX](https://github.com/ml-explore/mlx) is a machine learning framework and library developed by Apple to optimize running ML workloads on Apple Silicon. [mlx-lm](https://github.com/ml-explore/mlx-lm/tree/main) and [mlx-vlm](https://github.com/Blaizzy/mlx-vlm) are packages built on top of the MLX framework that focuses specifically on performing inference with LLMs or VLMs, respectively. These packages maximize the speed and efficiency of running LLMs on Apple Silicon. -LM Studio integrates both mlx-lm and mlx-vlm into its MLX engine. Because MLX is built by Apple for Apple hardware, it is a Mac-only engine and is not available on other platforms. +LM Studio integrates both mlx-lm and mlx-vlm into the [LM Studio MLX engine](https://github.com/lmstudio-ai/mlx-engine). Because MLX is built by Apple for Apple hardware, it is a Mac-only engine and is not available on other platforms. ## MLX and unified memory From 4363006421b2280284f4de8d6c34f577918e60ac Mon Sep 17 00:00:00 2001 From: Rugved Somwanshi Date: Fri, 10 Apr 2026 18:18:26 -0400 Subject: [PATCH 7/8] Drop conversion content in mlx page for now --- 0_app/6_advanced/mlx.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/0_app/6_advanced/mlx.md b/0_app/6_advanced/mlx.md index c7c1ab7..0fbc25a 100644 --- a/0_app/6_advanced/mlx.md +++ b/0_app/6_advanced/mlx.md @@ -44,9 +44,3 @@ Read more about `lms get` [here](https://lmstudio.ai/docs/cli/local-models/get). Note that the MLX engine only supports models in MLX format, and the llama.cpp engine only supports models in GGUF format. If you're on a Mac device, our Staff Picks will recommend the MLX model if it’s available as an option, and otherwise exclude the option if it is not recommended over GGUF. - -## Converting models to MLX format - -Many popular models already have MLX versions available. Before converting manually, check out [lmstudio-community](https://huggingface.co/lmstudio-community) or [mlx-community](https://huggingface.co/mlx-community) on HuggingFace to see if the model you want is available in MLX format. - -Models stored locally in other formats can be converted to MLX format to use with the MLX engine. See our guide on [converting models to MLX format](../modelyaml/convert-to-mlx). From dbbfcc516097d1b8390aeed4bf45aa5a1541a899 Mon Sep 17 00:00:00 2001 From: kaydenlms Date: Fri, 10 Apr 2026 18:39:46 -0400 Subject: [PATCH 8/8] Renumber 0_app folders after removing 2_runtimes Co-Authored-By: Claude Opus 4.6 (1M context) --- 0_app/{3_mcp => 2_mcp}/deeplink.md | 0 0_app/{3_mcp => 2_mcp}/index.md | 0 0_app/{4_modelyaml => 3_modelyaml}/_convert-to-mlx.md | 0 0_app/{4_modelyaml => 3_modelyaml}/index.md | 0 0_app/{4_modelyaml => 3_modelyaml}/publish.md | 0 0_app/{6_advanced => 5_advanced}/_branching.md | 0 0_app/{6_advanced => 5_advanced}/_context.md | 0 0_app/{6_advanced => 5_advanced}/_errors.md | 0 0_app/{6_advanced => 5_advanced}/_vision.md | 0 0_app/{6_advanced => 5_advanced}/import-model.md | 0 0_app/{6_advanced => 5_advanced}/llama-cpp.md | 0 0_app/{6_advanced => 5_advanced}/mlx.md | 0 0_app/{6_advanced => 5_advanced}/parallel-requests.md | 0 0_app/{6_advanced => 5_advanced}/per-model.md | 0 0_app/{6_advanced => 5_advanced}/prompt-template.md | 0 0_app/{6_advanced => 5_advanced}/speculative-decoding.md | 0 0_app/{7_user-interface => 6_user-interface}/languages.md | 0 0_app/{7_user-interface => 6_user-interface}/modes.md | 0 0_app/{7_user-interface => 6_user-interface}/themes.md | 0 19 files changed, 0 insertions(+), 0 deletions(-) rename 0_app/{3_mcp => 2_mcp}/deeplink.md (100%) rename 0_app/{3_mcp => 2_mcp}/index.md (100%) rename 0_app/{4_modelyaml => 3_modelyaml}/_convert-to-mlx.md (100%) rename 0_app/{4_modelyaml => 3_modelyaml}/index.md (100%) rename 0_app/{4_modelyaml => 3_modelyaml}/publish.md (100%) rename 0_app/{6_advanced => 5_advanced}/_branching.md (100%) rename 0_app/{6_advanced => 5_advanced}/_context.md (100%) rename 0_app/{6_advanced => 5_advanced}/_errors.md (100%) rename 0_app/{6_advanced => 5_advanced}/_vision.md (100%) rename 0_app/{6_advanced => 5_advanced}/import-model.md (100%) rename 0_app/{6_advanced => 5_advanced}/llama-cpp.md (100%) rename 0_app/{6_advanced => 5_advanced}/mlx.md (100%) rename 0_app/{6_advanced => 5_advanced}/parallel-requests.md (100%) rename 0_app/{6_advanced => 5_advanced}/per-model.md (100%) rename 0_app/{6_advanced => 5_advanced}/prompt-template.md (100%) rename 0_app/{6_advanced => 5_advanced}/speculative-decoding.md (100%) rename 0_app/{7_user-interface => 6_user-interface}/languages.md (100%) rename 0_app/{7_user-interface => 6_user-interface}/modes.md (100%) rename 0_app/{7_user-interface => 6_user-interface}/themes.md (100%) diff --git a/0_app/3_mcp/deeplink.md b/0_app/2_mcp/deeplink.md similarity index 100% rename from 0_app/3_mcp/deeplink.md rename to 0_app/2_mcp/deeplink.md diff --git a/0_app/3_mcp/index.md b/0_app/2_mcp/index.md similarity index 100% rename from 0_app/3_mcp/index.md rename to 0_app/2_mcp/index.md diff --git a/0_app/4_modelyaml/_convert-to-mlx.md b/0_app/3_modelyaml/_convert-to-mlx.md similarity index 100% rename from 0_app/4_modelyaml/_convert-to-mlx.md rename to 0_app/3_modelyaml/_convert-to-mlx.md diff --git a/0_app/4_modelyaml/index.md b/0_app/3_modelyaml/index.md similarity index 100% rename from 0_app/4_modelyaml/index.md rename to 0_app/3_modelyaml/index.md diff --git a/0_app/4_modelyaml/publish.md b/0_app/3_modelyaml/publish.md similarity index 100% rename from 0_app/4_modelyaml/publish.md rename to 0_app/3_modelyaml/publish.md diff --git a/0_app/6_advanced/_branching.md b/0_app/5_advanced/_branching.md similarity index 100% rename from 0_app/6_advanced/_branching.md rename to 0_app/5_advanced/_branching.md diff --git a/0_app/6_advanced/_context.md b/0_app/5_advanced/_context.md similarity index 100% rename from 0_app/6_advanced/_context.md rename to 0_app/5_advanced/_context.md diff --git a/0_app/6_advanced/_errors.md b/0_app/5_advanced/_errors.md similarity index 100% rename from 0_app/6_advanced/_errors.md rename to 0_app/5_advanced/_errors.md diff --git a/0_app/6_advanced/_vision.md b/0_app/5_advanced/_vision.md similarity index 100% rename from 0_app/6_advanced/_vision.md rename to 0_app/5_advanced/_vision.md diff --git a/0_app/6_advanced/import-model.md b/0_app/5_advanced/import-model.md similarity index 100% rename from 0_app/6_advanced/import-model.md rename to 0_app/5_advanced/import-model.md diff --git a/0_app/6_advanced/llama-cpp.md b/0_app/5_advanced/llama-cpp.md similarity index 100% rename from 0_app/6_advanced/llama-cpp.md rename to 0_app/5_advanced/llama-cpp.md diff --git a/0_app/6_advanced/mlx.md b/0_app/5_advanced/mlx.md similarity index 100% rename from 0_app/6_advanced/mlx.md rename to 0_app/5_advanced/mlx.md diff --git a/0_app/6_advanced/parallel-requests.md b/0_app/5_advanced/parallel-requests.md similarity index 100% rename from 0_app/6_advanced/parallel-requests.md rename to 0_app/5_advanced/parallel-requests.md diff --git a/0_app/6_advanced/per-model.md b/0_app/5_advanced/per-model.md similarity index 100% rename from 0_app/6_advanced/per-model.md rename to 0_app/5_advanced/per-model.md diff --git a/0_app/6_advanced/prompt-template.md b/0_app/5_advanced/prompt-template.md similarity index 100% rename from 0_app/6_advanced/prompt-template.md rename to 0_app/5_advanced/prompt-template.md diff --git a/0_app/6_advanced/speculative-decoding.md b/0_app/5_advanced/speculative-decoding.md similarity index 100% rename from 0_app/6_advanced/speculative-decoding.md rename to 0_app/5_advanced/speculative-decoding.md diff --git a/0_app/7_user-interface/languages.md b/0_app/6_user-interface/languages.md similarity index 100% rename from 0_app/7_user-interface/languages.md rename to 0_app/6_user-interface/languages.md diff --git a/0_app/7_user-interface/modes.md b/0_app/6_user-interface/modes.md similarity index 100% rename from 0_app/7_user-interface/modes.md rename to 0_app/6_user-interface/modes.md diff --git a/0_app/7_user-interface/themes.md b/0_app/6_user-interface/themes.md similarity index 100% rename from 0_app/7_user-interface/themes.md rename to 0_app/6_user-interface/themes.md