From 4f2096bf9a196913211befded93e2a82dc5dc3ce Mon Sep 17 00:00:00 2001 From: Yagil Burowski Date: Mon, 13 Apr 2026 22:00:41 -0400 Subject: [PATCH 1/8] Migrate docs content for Fumadocs --- 0_app/0_root/meta.json | 7 + 0_app/0_root/offline.md | 4 +- 0_app/1_basics/_connect-apps.md | 4 +- 0_app/1_basics/index.md | 4 +- 0_app/1_basics/meta.json | 12 + 0_app/2_mcp/{deeplink.md => deeplink.mdx} | 4 +- 0_app/2_mcp/index.md | 4 +- 0_app/2_mcp/meta.json | 6 + 0_app/3_modelyaml/meta.json | 6 + 0_app/3_modelyaml/publish.md | 2 +- 0_app/3_presets/meta.json | 9 + 0_app/5_advanced/meta.json | 14 + 0_app/5_advanced/per-model.md | 5 +- 0_app/6_user-interface/languages.md | 4 +- 0_app/6_user-interface/meta.json | 8 + 0_app/meta.json | 12 + 1_developer/0_core/0_server/index.md | 2 +- 1_developer/0_core/0_server/meta.json | 7 + .../0_core/0_server/serve-on-network.md | 2 +- 1_developer/0_core/0_server/settings.md | 2 +- 1_developer/0_core/authentication.md | 8 +- 1_developer/0_core/headless_llmster.md | 4 +- 1_developer/0_core/mcp.md | 730 +++++++++--------- 1_developer/0_core/meta.json | 12 + 1_developer/2_rest/chat.md | 242 +++--- 1_developer/2_rest/download-status.md | 36 +- 1_developer/2_rest/download.md | 40 +- 1_developer/2_rest/endpoints.md | 8 +- 1_developer/2_rest/index.md | 2 +- 1_developer/2_rest/list.md | 192 +++-- 1_developer/2_rest/load.md | 60 +- 1_developer/2_rest/meta.json | 15 + 1_developer/2_rest/quickstart.md | 516 ++++++------- 1_developer/2_rest/stateful-chats.md | 98 +-- 1_developer/2_rest/streaming-events.md | 377 ++++----- 1_developer/2_rest/unload.md | 34 +- 1_developer/3_openai-compat/completions.md | 4 +- 1_developer/3_openai-compat/meta.json | 12 + 1_developer/4_anthropic-compat/meta.json | 6 + 1_developer/meta.json | 11 + 1_python/1_getting-started/authentication.md | 72 +- 1_python/1_getting-started/meta.json | 8 + 1_python/1_getting-started/project-setup.md | 202 +++-- 1_python/1_getting-started/repl.md | 108 ++- .../cancelling-predictions.md | 115 ++- 1_python/1_llm-prediction/chat-completion.md | 470 +++++------ 1_python/1_llm-prediction/completion.md | 330 ++++---- 1_python/1_llm-prediction/image-input.md | 155 ++-- 1_python/1_llm-prediction/meta.json | 14 + 1_python/1_llm-prediction/parameters.md | 186 ++--- .../1_llm-prediction/speculative-decoding.md | 82 +- .../1_llm-prediction/structured-response.md | 192 +++-- .../1_llm-prediction/working-with-chats.md | 58 +- 1_python/2_agent/act.md | 173 ++--- 1_python/2_agent/meta.json | 8 + 1_python/2_agent/tools.md | 201 +++-- 1_python/3_embedding/index.md | 14 +- 1_python/4_tokenization/index.md | 75 +- 1_python/5_manage-models/_download-models.md | 121 ++- 1_python/5_manage-models/list-downloaded.md | 61 +- 1_python/5_manage-models/list-loaded.md | 55 +- 1_python/5_manage-models/loading.md | 211 +++-- 1_python/5_manage-models/meta.json | 9 + 1_python/6_model-info/get-context-length.md | 60 +- 1_python/6_model-info/get-load-config.md | 47 +- 1_python/6_model-info/get-model-info.md | 43 +- 1_python/6_model-info/meta.json | 8 + 1_python/_7_api-reference/meta.json | 18 + 1_python/_more/_apply-prompt-template.md | 52 +- 1_python/_more/meta.json | 6 + 1_python/index.md | 56 +- 1_python/meta.json | 14 + .../cancelling-predictions.md | 64 +- .../2_llm-prediction/chat-completion.md | 280 +++---- 2_typescript/2_llm-prediction/completion.md | 174 ++--- 2_typescript/2_llm-prediction/image-input.md | 47 +- 2_typescript/2_llm-prediction/meta.json | 14 + 2_typescript/2_llm-prediction/parameters.md | 74 +- .../2_llm-prediction/speculative-decoding.md | 62 +- .../2_llm-prediction/structured-response.md | 177 ++--- .../2_llm-prediction/working-with-chats.md | 100 +-- 2_typescript/3_agent/act.md | 192 +++-- 2_typescript/3_agent/meta.json | 8 + 2_typescript/3_agent/tools.md | 107 ++- .../1_tools-provider/custom-configuration.md | 100 ++- .../1_tools-provider/handling-aborts.md | 61 +- .../3_plugins/1_tools-provider/meta.json | 10 + .../1_tools-provider/multiple-tools.md | 81 +- .../3_plugins/1_tools-provider/single-tool.md | 86 +-- .../status-reports-and-warnings.md | 110 ++- .../custom-configuration.md | 80 +- .../custom-status-report.md | 45 +- .../2_prompt-preprocessor/examples.md | 48 +- .../3_plugins/2_prompt-preprocessor/meta.json | 9 + 2_typescript/3_plugins/3_generator/meta.json | 7 + .../3_generator/text-only-generators.md | 33 +- .../accessing-config.md | 33 +- .../4_custom-configuration/config-ts.md | 100 ++- .../defining-new-fields.md | 188 ++--- .../4_custom-configuration/meta.json | 8 + 2_typescript/3_plugins/meta.json | 11 + 2_typescript/4_embedding/index.md | 19 +- 2_typescript/5_tokenization/index.md | 106 ++- .../6_manage-models/_download-models.md | 109 ++- .../6_manage-models/list-downloaded.md | 14 +- 2_typescript/6_manage-models/list-loaded.md | 14 +- 2_typescript/6_manage-models/loading.md | 104 +-- 2_typescript/6_manage-models/meta.json | 9 + 2_typescript/7_api-reference/meta.json | 18 + 2_typescript/8_model-info/_get-load-config.md | 18 +- .../8_model-info/get-context-length.md | 59 +- 2_typescript/8_model-info/get-model-info.md | 54 +- 2_typescript/8_model-info/meta.json | 8 + 2_typescript/_more/_apply-prompt-template.md | 56 +- 2_typescript/_more/meta.json | 6 + 2_typescript/authentication.md | 56 +- 2_typescript/index.md | 41 +- 2_typescript/meta.json | 16 + 2_typescript/project-setup.md | 40 +- 3_cli/0_local-models/meta.json | 11 + 3_cli/1_serve/meta.json | 9 + 3_cli/2_daemon/daemon-down.md | 4 +- 3_cli/2_daemon/meta.json | 9 + 3_cli/3_link/link-enable.md | 4 +- 3_cli/3_link/meta.json | 10 + 3_cli/4_runtime/meta.json | 6 + 3_cli/5_develop-and-publish/meta.json | 9 + 3_cli/index.md | 6 +- 3_cli/meta.json | 13 + 4_integrations/1_mcp-remote/meta.json | 6 + 4_integrations/claude-code.md | 8 +- 4_integrations/codex.md | 8 +- 4_integrations/meta.json | 10 + 4_integrations/openclaw.md | 8 +- 5_lmlink/1_basics/meta.json | 8 + 5_lmlink/meta.json | 6 + README.md | 46 +- _template_dont_edit.md | 58 +- meta.json | 12 + 139 files changed, 4242 insertions(+), 4814 deletions(-) create mode 100644 0_app/0_root/meta.json create mode 100644 0_app/1_basics/meta.json rename 0_app/2_mcp/{deeplink.md => deeplink.mdx} (97%) create mode 100644 0_app/2_mcp/meta.json create mode 100644 0_app/3_modelyaml/meta.json create mode 100644 0_app/3_presets/meta.json create mode 100644 0_app/5_advanced/meta.json create mode 100644 0_app/6_user-interface/meta.json create mode 100644 0_app/meta.json create mode 100644 1_developer/0_core/0_server/meta.json create mode 100644 1_developer/0_core/meta.json create mode 100644 1_developer/2_rest/meta.json create mode 100644 1_developer/3_openai-compat/meta.json create mode 100644 1_developer/4_anthropic-compat/meta.json create mode 100644 1_developer/meta.json create mode 100644 1_python/1_getting-started/meta.json create mode 100644 1_python/1_llm-prediction/meta.json create mode 100644 1_python/2_agent/meta.json create mode 100644 1_python/5_manage-models/meta.json create mode 100644 1_python/6_model-info/meta.json create mode 100644 1_python/_7_api-reference/meta.json create mode 100644 1_python/_more/meta.json create mode 100644 1_python/meta.json create mode 100644 2_typescript/2_llm-prediction/meta.json create mode 100644 2_typescript/3_agent/meta.json create mode 100644 2_typescript/3_plugins/1_tools-provider/meta.json create mode 100644 2_typescript/3_plugins/2_prompt-preprocessor/meta.json create mode 100644 2_typescript/3_plugins/3_generator/meta.json create mode 100644 2_typescript/3_plugins/4_custom-configuration/meta.json create mode 100644 2_typescript/3_plugins/meta.json create mode 100644 2_typescript/6_manage-models/meta.json create mode 100644 2_typescript/7_api-reference/meta.json create mode 100644 2_typescript/8_model-info/meta.json create mode 100644 2_typescript/_more/meta.json create mode 100644 2_typescript/meta.json create mode 100644 3_cli/0_local-models/meta.json create mode 100644 3_cli/1_serve/meta.json create mode 100644 3_cli/2_daemon/meta.json create mode 100644 3_cli/3_link/meta.json create mode 100644 3_cli/4_runtime/meta.json create mode 100644 3_cli/5_develop-and-publish/meta.json create mode 100644 3_cli/meta.json create mode 100644 4_integrations/1_mcp-remote/meta.json create mode 100644 4_integrations/meta.json create mode 100644 5_lmlink/1_basics/meta.json create mode 100644 5_lmlink/meta.json create mode 100644 meta.json diff --git a/0_app/0_root/meta.json b/0_app/0_root/meta.json new file mode 100644 index 0000000..bc9c64f --- /dev/null +++ b/0_app/0_root/meta.json @@ -0,0 +1,7 @@ +{ + "title": "Introduction", + "pages": [ + "offline", + "system-requirements" + ] +} diff --git a/0_app/0_root/offline.md b/0_app/0_root/offline.md index aebcd97..af25c71 100644 --- a/0_app/0_root/offline.md +++ b/0_app/0_root/offline.md @@ -4,9 +4,9 @@ description: LM Studio can operate entirely offline, just make sure to get some index: 4 --- -```lms_notice +:::note In general, LM Studio does not require the internet in order to work. This includes core functions like chatting with models, chatting with documents, or running a local server, none of which require the internet. -``` +::: ### Operations that do NOT require connectivity diff --git a/0_app/1_basics/_connect-apps.md b/0_app/1_basics/_connect-apps.md index 5f1cc09..82a1ded 100644 --- a/0_app/1_basics/_connect-apps.md +++ b/0_app/1_basics/_connect-apps.md @@ -13,9 +13,9 @@ You can choose a theme in the Settings tab. Choosing the "Auto" option will automatically switch between Light and Dark themes based on your system settings. -```lms_protip +:::tip[Pro Tip] You can jump to Settings from anywhere in the app by pressing `cmd` + `,` on macOS or `ctrl` + `,` on Windows/Linux. -``` +::: ###### To get to the Settings page, you need to be on [Power User mode](/docs/modes) or higher. diff --git a/0_app/1_basics/index.md b/0_app/1_basics/index.md index 51c7513..d47c0aa 100644 --- a/0_app/1_basics/index.md +++ b/0_app/1_basics/index.md @@ -7,9 +7,9 @@ index: 1 Double check computer meets the minimum [system requirements](/docs/system-requirements). -```lms_info +:::info[Info] You might sometimes see terms such as `open-source models` or `open-weights models`. Different models might be released under different licenses and varying degrees of 'openness'. In order to run a model locally, you need to be able to get access to its "weights", often distributed as one or more files that end with `.gguf`, `.safetensors` etc. -``` +:::
diff --git a/0_app/1_basics/meta.json b/0_app/1_basics/meta.json new file mode 100644 index 0000000..a793b2b --- /dev/null +++ b/0_app/1_basics/meta.json @@ -0,0 +1,12 @@ +{ + "title": "Getting Started", + "pages": [ + "chat", + "_connect-apps", + "download-model", + "_keychords", + "lmstudio-vs-llmster-vs-lms", + "rag", + "_troubleshooting" + ] +} diff --git a/0_app/2_mcp/deeplink.md b/0_app/2_mcp/deeplink.mdx similarity index 97% rename from 0_app/2_mcp/deeplink.md rename to 0_app/2_mcp/deeplink.mdx index b4efd64..aa487fc 100644 --- a/0_app/2_mcp/deeplink.md +++ b/0_app/2_mcp/deeplink.mdx @@ -14,9 +14,7 @@ Starting with version 0.3.17 (10), LM Studio can act as an MCP host. Learn more Enter your MCP JSON entry to generate a deeplink for the `Add to LM Studio` button. -```lms_mcp_deep_link_generator - -``` + ## Try an example diff --git a/0_app/2_mcp/index.md b/0_app/2_mcp/index.md index f769557..d1d121d 100644 --- a/0_app/2_mcp/index.md +++ b/0_app/2_mcp/index.md @@ -10,9 +10,9 @@ Starting LM Studio 0.3.17, LM Studio acts as an **Model Context Protocol (MCP) H Never install MCPs from untrusted sources. -```lms_warning +:::warning[Heads Up] Some MCP servers can run arbitrary code, access your local files, and use your network connection. Always be cautious when installing and using MCP servers. If you don't trust the source, don't install it. -``` +::: # Use MCP servers in LM Studio diff --git a/0_app/2_mcp/meta.json b/0_app/2_mcp/meta.json new file mode 100644 index 0000000..a9726e1 --- /dev/null +++ b/0_app/2_mcp/meta.json @@ -0,0 +1,6 @@ +{ + "title": "MCP", + "pages": [ + "deeplink" + ] +} diff --git a/0_app/3_modelyaml/meta.json b/0_app/3_modelyaml/meta.json new file mode 100644 index 0000000..37a1889 --- /dev/null +++ b/0_app/3_modelyaml/meta.json @@ -0,0 +1,6 @@ +{ + "title": "model.yaml", + "pages": [ + "publish" + ] +} diff --git a/0_app/3_modelyaml/publish.md b/0_app/3_modelyaml/publish.md index eaea1ba..421c8ca 100644 --- a/0_app/3_modelyaml/publish.md +++ b/0_app/3_modelyaml/publish.md @@ -22,7 +22,7 @@ lms clone qwen/qwen3-8b This will result in a local copy `model.yaml`, `README` and other metadata files. Importantly, this does NOT download the model weights. -```lms_terminal +```bash title="Terminal" $ ls README.md manifest.json model.yaml thumbnail.png ``` diff --git a/0_app/3_presets/meta.json b/0_app/3_presets/meta.json new file mode 100644 index 0000000..6c054da --- /dev/null +++ b/0_app/3_presets/meta.json @@ -0,0 +1,9 @@ +{ + "title": "Presets", + "pages": [ + "import", + "publish", + "pull", + "push" + ] +} diff --git a/0_app/5_advanced/meta.json b/0_app/5_advanced/meta.json new file mode 100644 index 0000000..7c2b36c --- /dev/null +++ b/0_app/5_advanced/meta.json @@ -0,0 +1,14 @@ +{ + "title": "Advanced", + "pages": [ + "_branching", + "_context", + "_errors", + "import-model", + "parallel-requests", + "per-model", + "prompt-template", + "speculative-decoding", + "_vision" + ] +} diff --git a/0_app/5_advanced/per-model.md b/0_app/5_advanced/per-model.md index 91ba99f..e2b0969 100644 --- a/0_app/5_advanced/per-model.md +++ b/0_app/5_advanced/per-model.md @@ -26,14 +26,13 @@ This will open a dialog where you can set the default parameters for the model. Next time you load the model, these settings will be used. -```lms_protip +:::tip[Pro Tip] #### Reasons to set default load parameters (not required, totally optional) - Set a particular GPU offload settings for a given model - Set a particular context size for a given model - Whether or not to utilize Flash Attention for a given model - -``` +::: ## Advanced Topics diff --git a/0_app/6_user-interface/languages.md b/0_app/6_user-interface/languages.md index 91ab487..e69a5ee 100644 --- a/0_app/6_user-interface/languages.md +++ b/0_app/6_user-interface/languages.md @@ -14,9 +14,9 @@ You can choose a language in the Settings tab. Use the dropdown menu under Preferences > Language. -```lms_protip +:::tip[Pro Tip] You can jump to Settings from anywhere in the app by pressing `cmd` + `,` on macOS or `ctrl` + `,` on Windows/Linux. -``` +::: ###### To get to the Settings page, you need to be on [Power User mode](/docs/modes) or higher. diff --git a/0_app/6_user-interface/meta.json b/0_app/6_user-interface/meta.json new file mode 100644 index 0000000..9a4cb95 --- /dev/null +++ b/0_app/6_user-interface/meta.json @@ -0,0 +1,8 @@ +{ + "title": "User Interface", + "pages": [ + "languages", + "modes", + "themes" + ] +} diff --git a/0_app/meta.json b/0_app/meta.json new file mode 100644 index 0000000..d41e39a --- /dev/null +++ b/0_app/meta.json @@ -0,0 +1,12 @@ +{ + "title": "App", + "pages": [ + "0_root", + "1_basics", + "2_mcp", + "3_modelyaml", + "3_presets", + "5_advanced", + "6_user-interface" + ] +} diff --git a/1_developer/0_core/0_server/index.md b/1_developer/0_core/0_server/index.md index 64ac097..fa0775f 100644 --- a/1_developer/0_core/0_server/index.md +++ b/1_developer/0_core/0_server/index.md @@ -2,7 +2,7 @@ title: LM Studio as a Local LLM API Server sidebar_title: Running the Server description: Run an LLM API server on `localhost` with LM Studio -fullPage: false +full: false index: 1 --- diff --git a/1_developer/0_core/0_server/meta.json b/1_developer/0_core/0_server/meta.json new file mode 100644 index 0000000..06caa9f --- /dev/null +++ b/1_developer/0_core/0_server/meta.json @@ -0,0 +1,7 @@ +{ + "title": "Server", + "pages": [ + "serve-on-network", + "settings" + ] +} diff --git a/1_developer/0_core/0_server/serve-on-network.md b/1_developer/0_core/0_server/serve-on-network.md index 577ccd7..903ec55 100644 --- a/1_developer/0_core/0_server/serve-on-network.md +++ b/1_developer/0_core/0_server/serve-on-network.md @@ -2,7 +2,7 @@ title: Serve on Local Network sidebar_title: Serve on Local Network description: Allow other devices in your network use this LM Studio API server -fullPage: false +full: false index: 3 --- diff --git a/1_developer/0_core/0_server/settings.md b/1_developer/0_core/0_server/settings.md index 568e2f7..ca42094 100644 --- a/1_developer/0_core/0_server/settings.md +++ b/1_developer/0_core/0_server/settings.md @@ -2,7 +2,7 @@ title: Server Settings sidebar_title: Server Settings description: Configure server settings for LM Studio API Server -fullPage: false +full: false index: 2 --- diff --git a/1_developer/0_core/authentication.md b/1_developer/0_core/authentication.md index 0883597..feb9bdf 100644 --- a/1_developer/0_core/authentication.md +++ b/1_developer/0_core/authentication.md @@ -13,9 +13,9 @@ LM Studio supports API Tokens for authentication, providing a secure and conveni By default, LM Studio does not require authentication for API requests. To enable authentication so that only requests with a valid API Token are accepted, toggle the switch in the Developers Page > Server Settings. -```lms_info +:::info[Info] Once enabled, all requests made through the REST API, Python SDK, or Typescript SDK will need to include a valid API Token. See usage [below](#api-token-usage). -``` +::: @@ -45,9 +45,9 @@ To edit the permissions of an existing API Token, click on the Edit button next ### Using API Tokens with REST API: -```lms_noticechill +:::note The example below requires [allowing calling servers from mcp.json](/docs/developer/core/server/settings) to be enabled and the [Playwright MCP](https://github.com/microsoft/playwright-mcp) in mcp.json. -``` +::: ```bash curl -X POST \ diff --git a/1_developer/0_core/headless_llmster.md b/1_developer/0_core/headless_llmster.md index b2e7cd0..c5ba70c 100644 --- a/1_developer/0_core/headless_llmster.md +++ b/1_developer/0_core/headless_llmster.md @@ -7,9 +7,9 @@ index: 3 `llmster`, LM Studio's headless daemon, can be configured to run on startup. This guide covers setting up `llmster` to launch, load a model, and start an HTTP server automatically using `systemctl` on Linux. -```lms_info +:::info[Info] This guide is for Linux systems without a graphical interface. For machines with a GUI, you can configure LM Studio to [run as a service on login](/docs/developer/core/headless) instead. -``` +::: ## Install the Daemon diff --git a/1_developer/0_core/mcp.md b/1_developer/0_core/mcp.md index 3e98d60..5b23a55 100644 --- a/1_developer/0_core/mcp.md +++ b/1_developer/0_core/mcp.md @@ -56,332 +56,312 @@ MCP servers provide tools that models can call during chat requests. You can ena Ephemeral MCP servers are defined on-the-fly in each request. This is useful for testing or when you don't want to pre-configure servers. -```lms_info +:::info[Info] Ephemeral MCP servers require the "Allow per-request MCPs" setting to be enabled in [Server Settings](/docs/developer/core/server/settings). +::: + +```bash tab="curl" +curl http://localhost:1234/api/v1/chat \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ibm/granite-4-micro", + "input": "What is the top trending model on hugging face?", + "integrations": [ + { + "type": "ephemeral_mcp", + "server_label": "huggingface", + "server_url": "https://huggingface.co/mcp", + "allowed_tools": ["model_search"] + } + ], + "context_length": 8000 + }' ``` -```lms_code_snippet -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/chat \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "ibm/granite-4-micro", - "input": "What is the top trending model on hugging face?", - "integrations": [ - { - "type": "ephemeral_mcp", - "server_label": "huggingface", - "server_url": "https://huggingface.co/mcp", - "allowed_tools": ["model_search"] - } - ], - "context_length": 8000 - }' - Python: - language: python - code: | - import os - import requests - import json - - response = requests.post( - "http://localhost:1234/api/v1/chat", - headers={ - "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", - "Content-Type": "application/json" - }, - json={ - "model": "ibm/granite-4-micro", - "input": "What is the top trending model on hugging face?", - "integrations": [ - { - "type": "ephemeral_mcp", - "server_label": "huggingface", - "server_url": "https://huggingface.co/mcp", - "allowed_tools": ["model_search"] - } - ], - "context_length": 8000 - } - ) - print(json.dumps(response.json(), indent=2)) - TypeScript: - language: typescript - code: | - const response = await fetch("http://localhost:1234/api/v1/chat", { - method: "POST", - headers: { - "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, - "Content-Type": "application/json" - }, - body: JSON.stringify({ - "model": "ibm/granite-4-micro", - "input": "What is the top trending model on hugging face?", - "integrations": [ - { - "type": "ephemeral_mcp", - "server_label": "huggingface", - "server_url": "https://huggingface.co/mcp", - "allowed_tools": ["model_search"] - } - ], - "context_length": 8000 - }); - const data = await response.json(); - console.log(data); +```python tab="Python" +import os +import requests +import json + +response = requests.post( + "http://localhost:1234/api/v1/chat", + headers={ + "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", + "Content-Type": "application/json" + }, + json={ + "model": "ibm/granite-4-micro", + "input": "What is the top trending model on hugging face?", + "integrations": [ + { + "type": "ephemeral_mcp", + "server_label": "huggingface", + "server_url": "https://huggingface.co/mcp", + "allowed_tools": ["model_search"] + } + ], + "context_length": 8000 + } +) +print(json.dumps(response.json(), indent=2)) +``` + +```typescript tab="TypeScript" +const response = await fetch("http://localhost:1234/api/v1/chat", { + method: "POST", + headers: { + "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, + "Content-Type": "application/json" + }, + body: JSON.stringify({ + "model": "ibm/granite-4-micro", + "input": "What is the top trending model on hugging face?", + "integrations": [ + { + "type": "ephemeral_mcp", + "server_label": "huggingface", + "server_url": "https://huggingface.co/mcp", + "allowed_tools": ["model_search"] + } + ], + "context_length": 8000 + }); +const data = await response.json(); +console.log(data); ``` The model can now call tools from the specified MCP server: -```lms_code_snippet -variants: - response: - language: json - code: | - { - "model_instance_id": "ibm/granite-4-micro", - "output": [ - { - "type": "reasoning", - "content": "..." - }, - { - "type": "message", - "content": "..." - }, - { - "type": "tool_call", - "tool": "model_search", - "arguments": { - "sort": "trendingScore", - "limit": 1 - }, - "output": "...", - "provider_info": { - "server_label": "huggingface", - "type": "ephemeral_mcp" - } - }, - { - "type": "reasoning", - "content": "\n" - }, - { - "type": "message", - "content": "The top trending model is ..." - } - ], - "stats": { - "input_tokens": 419, - "total_output_tokens": 362, - "reasoning_output_tokens": 195, - "tokens_per_second": 27.620159487314744, - "time_to_first_token_seconds": 1.437 - }, - "response_id": "resp_7c1a08e3d6e279efcfecb02df9de7cbd316e93422d0bb5cb" +```json +{ + "model_instance_id": "ibm/granite-4-micro", + "output": [ + { + "type": "reasoning", + "content": "..." + }, + { + "type": "message", + "content": "..." + }, + { + "type": "tool_call", + "tool": "model_search", + "arguments": { + "sort": "trendingScore", + "limit": 1 + }, + "output": "...", + "provider_info": { + "server_label": "huggingface", + "type": "ephemeral_mcp" } + }, + { + "type": "reasoning", + "content": "\n" + }, + { + "type": "message", + "content": "The top trending model is ..." + } + ], + "stats": { + "input_tokens": 419, + "total_output_tokens": 362, + "reasoning_output_tokens": 195, + "tokens_per_second": 27.620159487314744, + "time_to_first_token_seconds": 1.437 + }, + "response_id": "resp_7c1a08e3d6e279efcfecb02df9de7cbd316e93422d0bb5cb" +} ``` ## MCP servers from mcp.json MCP servers can be pre-configured in your `mcp.json` file. This is the recommended approach for using MCP servers that take actions on your computer (like [microsoft/playwright-mcp](https://github.com/microsoft/playwright-mcp)) and servers that you use frequently. -```lms_info +:::info[Info] MCP servers from mcp.json require the "Allow calling servers from mcp.json" setting to be enabled in [Server Settings](/docs/developer/core/server/settings). -``` +::: -```lms_code_snippet -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/chat \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "ibm/granite-4-micro", - "input": "Open lmstudio.ai", - "integrations": ["mcp/playwright"], - "context_length": 8000, - "temperature": 0 - }' - Python: - language: python - code: | - import os - import requests - import json - - response = requests.post( - "http://localhost:1234/api/v1/chat", - headers={ - "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", - "Content-Type": "application/json" - }, - json={ - "model": "ibm/granite-4-micro", - "input": "Open lmstudio.ai", - "integrations": ["mcp/playwright"], - "context_length": 8000, - "temperature": 0 - } - ) - print(json.dumps(response.json(), indent=2)) - TypeScript: - language: typescript - code: | - const response = await fetch("http://localhost:1234/api/v1/chat", { - method: "POST", - headers: { - "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, - "Content-Type": "application/json" - }, - body: JSON.stringify({ - model: "ibm/granite-4-micro", - input: "Open lmstudio.ai", - integrations: ["mcp/playwright"], - context_length: 8000, - temperature: 0 - }) - }); - const data = await response.json(); - console.log(data); +```bash tab="curl" +curl http://localhost:1234/api/v1/chat \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ibm/granite-4-micro", + "input": "Open lmstudio.ai", + "integrations": ["mcp/playwright"], + "context_length": 8000, + "temperature": 0 + }' +``` + +```python tab="Python" +import os +import requests +import json + +response = requests.post( + "http://localhost:1234/api/v1/chat", + headers={ + "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", + "Content-Type": "application/json" + }, + json={ + "model": "ibm/granite-4-micro", + "input": "Open lmstudio.ai", + "integrations": ["mcp/playwright"], + "context_length": 8000, + "temperature": 0 + } +) +print(json.dumps(response.json(), indent=2)) +``` + +```typescript tab="TypeScript" +const response = await fetch("http://localhost:1234/api/v1/chat", { + method: "POST", + headers: { + "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, + "Content-Type": "application/json" + }, + body: JSON.stringify({ + model: "ibm/granite-4-micro", + input: "Open lmstudio.ai", + integrations: ["mcp/playwright"], + context_length: 8000, + temperature: 0 + }) +}); +const data = await response.json(); +console.log(data); ``` The response includes tool calls from the configured MCP server: -```lms_code_snippet -variants: - response: - language: json - code: | - { - "model_instance_id": "ibm/granite-4-micro", - "output": [ - { - "type": "reasoning", - "content": "..." - }, - { - "type": "message", - "content": "..." - }, - { - "type": "tool_call", - "tool": "browser_navigate", - "arguments": { - "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ" - }, - "output": "...", - "provider_info": { - "plugin_id": "mcp/playwright", - "type": "plugin" - } - }, - { - "type": "reasoning", - "content": "..." - }, - { - "type": "message", - "content": "The YouTube video page for ..." - } - ], - "stats": { - "input_tokens": 2614, - "total_output_tokens": 594, - "reasoning_output_tokens": 389, - "tokens_per_second": 26.293245822877495, - "time_to_first_token_seconds": 0.154 - }, - "response_id": "resp_cdac6a9b5e2a40027112e441ce6189db18c9040f96736407" +```json +{ + "model_instance_id": "ibm/granite-4-micro", + "output": [ + { + "type": "reasoning", + "content": "..." + }, + { + "type": "message", + "content": "..." + }, + { + "type": "tool_call", + "tool": "browser_navigate", + "arguments": { + "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ" + }, + "output": "...", + "provider_info": { + "plugin_id": "mcp/playwright", + "type": "plugin" } + }, + { + "type": "reasoning", + "content": "..." + }, + { + "type": "message", + "content": "The YouTube video page for ..." + } + ], + "stats": { + "input_tokens": 2614, + "total_output_tokens": 594, + "reasoning_output_tokens": 389, + "tokens_per_second": 26.293245822877495, + "time_to_first_token_seconds": 0.154 + }, + "response_id": "resp_cdac6a9b5e2a40027112e441ce6189db18c9040f96736407" +} ``` ## Restricting tool access For both ephemeral and mcp.json servers, you can limit which tools the model can call using the `allowed_tools` field. This is useful if you do not want certain tools from an MCP server to be used, and can speed up prompt processing due to the model receiving fewer tool definitions. -```lms_code_snippet -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/chat \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "ibm/granite-4-micro", - "input": "What is the top trending model on hugging face?", - "integrations": [ - { - "type": "ephemeral_mcp", - "server_label": "huggingface", - "server_url": "https://huggingface.co/mcp", - "allowed_tools": ["model_search"] - } - ], - "context_length": 8000 - }' - Python: - language: python - code: | - import os - import requests - import json - - response = requests.post( - "http://localhost:1234/api/v1/chat", - headers={ - "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", - "Content-Type": "application/json" - }, - json={ - "model": "ibm/granite-4-micro", - "input": "What is the top trending model on hugging face?", - "integrations": [ - { - "type": "ephemeral_mcp", - "server_label": "huggingface", - "server_url": "https://huggingface.co/mcp", - "allowed_tools": ["model_search"] - } - ], - "context_length": 8000 - } - ) - print(json.dumps(response.json(), indent=2)) - TypeScript: - language: typescript - code: | - const response = await fetch("http://localhost:1234/api/v1/chat", { - method: "POST", - headers: { - "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, - "Content-Type": "application/json" - }, - body: JSON.stringify({ - model: "ibm/granite-4-micro", - input: "What is the top trending model on hugging face?", - integrations: [ - { - type: "ephemeral_mcp", - server_label: "huggingface", - server_url: "https://huggingface.co/mcp", - allowed_tools: ["model_search"] - } - ], - context_length: 8000 - }) - }); - const data = await response.json(); - console.log(data); +```bash tab="curl" +curl http://localhost:1234/api/v1/chat \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ibm/granite-4-micro", + "input": "What is the top trending model on hugging face?", + "integrations": [ + { + "type": "ephemeral_mcp", + "server_label": "huggingface", + "server_url": "https://huggingface.co/mcp", + "allowed_tools": ["model_search"] + } + ], + "context_length": 8000 + }' +``` + +```python tab="Python" +import os +import requests +import json + +response = requests.post( + "http://localhost:1234/api/v1/chat", + headers={ + "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", + "Content-Type": "application/json" + }, + json={ + "model": "ibm/granite-4-micro", + "input": "What is the top trending model on hugging face?", + "integrations": [ + { + "type": "ephemeral_mcp", + "server_label": "huggingface", + "server_url": "https://huggingface.co/mcp", + "allowed_tools": ["model_search"] + } + ], + "context_length": 8000 + } +) +print(json.dumps(response.json(), indent=2)) +``` + +```typescript tab="TypeScript" +const response = await fetch("http://localhost:1234/api/v1/chat", { + method: "POST", + headers: { + "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, + "Content-Type": "application/json" + }, + body: JSON.stringify({ + model: "ibm/granite-4-micro", + input: "What is the top trending model on hugging face?", + integrations: [ + { + type: "ephemeral_mcp", + server_label: "huggingface", + server_url: "https://huggingface.co/mcp", + allowed_tools: ["model_search"] + } + ], + context_length: 8000 + }) +}); +const data = await response.json(); +console.log(data); ``` If `allowed_tools` is not provided, all tools from the server are available to the model. @@ -390,86 +370,82 @@ If `allowed_tools` is not provided, all tools from the server are available to t When using ephemeral MCP servers that require authentication, you can pass custom headers: -```lms_code_snippet -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/chat \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "ibm/granite-4-micro", - "input": "Give me details about my SUPER-SECRET-PRIVATE Hugging face model", - "integrations": [ - { - "type": "ephemeral_mcp", - "server_label": "huggingface", - "server_url": "https://huggingface.co/mcp", - "allowed_tools": ["model_search"], - "headers": { - "Authorization": "Bearer " - } - } - ], - "context_length": 8000 - }' - Python: - language: python - code: | - import os - import requests - import json - - response = requests.post( - "http://localhost:1234/api/v1/chat", - headers={ - "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", - "Content-Type": "application/json" - }, - json={ - "model": "ibm/granite-4-micro", - "input": "Give me details about my SUPER-SECRET-PRIVATE Hugging face model", - "integrations": [ - { - "type": "ephemeral_mcp", - "server_label": "huggingface", - "server_url": "https://huggingface.co/mcp", - "allowed_tools": ["model_search"], - "headers": { - "Authorization": "Bearer " - } - } - ], - "context_length": 8000 +```bash tab="curl" +curl http://localhost:1234/api/v1/chat \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ibm/granite-4-micro", + "input": "Give me details about my SUPER-SECRET-PRIVATE Hugging face model", + "integrations": [ + { + "type": "ephemeral_mcp", + "server_label": "huggingface", + "server_url": "https://huggingface.co/mcp", + "allowed_tools": ["model_search"], + "headers": { + "Authorization": "Bearer " + } + } + ], + "context_length": 8000 + }' +``` + +```python tab="Python" +import os +import requests +import json + +response = requests.post( + "http://localhost:1234/api/v1/chat", + headers={ + "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", + "Content-Type": "application/json" + }, + json={ + "model": "ibm/granite-4-micro", + "input": "Give me details about my SUPER-SECRET-PRIVATE Hugging face model", + "integrations": [ + { + "type": "ephemeral_mcp", + "server_label": "huggingface", + "server_url": "https://huggingface.co/mcp", + "allowed_tools": ["model_search"], + "headers": { + "Authorization": "Bearer " } - ) - print(json.dumps(response.json(), indent=2)) - TypeScript: - language: typescript - code: | - const response = await fetch("http://localhost:1234/api/v1/chat", { - method: "POST", + } + ], + "context_length": 8000 + } +) +print(json.dumps(response.json(), indent=2)) +``` + +```typescript tab="TypeScript" +const response = await fetch("http://localhost:1234/api/v1/chat", { + method: "POST", + headers: { + "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, + "Content-Type": "application/json" + }, + body: JSON.stringify({ + model: "ibm/granite-4-micro", + input: "Give me details about my SUPER-SECRET-PRIVATE Hugging face model", + integrations: [ + { + type: "ephemeral_mcp", + server_label: "huggingface", + server_url: "https://huggingface.co/mcp", + allowed_tools: ["model_search"], headers: { - "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, - "Content-Type": "application/json" - }, - body: JSON.stringify({ - model: "ibm/granite-4-micro", - input: "Give me details about my SUPER-SECRET-PRIVATE Hugging face model", - integrations: [ - { - type: "ephemeral_mcp", - server_label: "huggingface", - server_url: "https://huggingface.co/mcp", - allowed_tools: ["model_search"], - headers: { - Authorization: "Bearer " - } - } - ], - context_length: 8000 - }) - const data = await response.json(); - console.log(data); + Authorization: "Bearer " + } + } + ], + context_length: 8000 + }) +const data = await response.json(); +console.log(data); ``` diff --git a/1_developer/0_core/meta.json b/1_developer/0_core/meta.json new file mode 100644 index 0000000..eda74f9 --- /dev/null +++ b/1_developer/0_core/meta.json @@ -0,0 +1,12 @@ +{ + "title": "Core", + "pages": [ + "0_server", + "authentication", + "headless_llmster", + "headless", + "lmlink", + "mcp", + "ttl-and-auto-evict" + ] +} diff --git a/1_developer/2_rest/chat.md b/1_developer/2_rest/chat.md index 356bebe..7b6f497 100644 --- a/1_developer/2_rest/chat.md +++ b/1_developer/2_rest/chat.md @@ -1,7 +1,7 @@ --- title: "Chat with a model" description: "Send a message to a model and receive a response. Supports MCP integration." -fullPage: true +full: true index: 5 api_info: method: POST @@ -157,59 +157,55 @@ api_info: description: Identifier of existing response to append to. Must start with `"resp_"`. ``` :::split::: -```lms_code_snippet -variants: - Request with MCP: - language: bash - code: | - curl http://localhost:1234/api/v1/chat \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "ibm/granite-4-micro", - "input": "Tell me the top trending model on hugging face and navigate to https://lmstudio.ai", - "integrations": [ - { - "type": "ephemeral_mcp", - "server_label": "huggingface", - "server_url": "https://huggingface.co/mcp", - "allowed_tools": [ - "model_search" - ] - }, - { - "type": "plugin", - "id": "mcp/playwright", - "allowed_tools": [ - "browser_navigate" - ] - } - ], - "context_length": 8000, - "temperature": 0 - }' - Request with Images: - language: bash - code: | - # Image is a small red square encoded as a base64 data URL - curl http://localhost:1234/api/v1/chat \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "qwen/qwen3-vl-4b", - "input": [ - { - "type": "text", - "content": "Describe this image in two sentences" - }, - { - "type": "image", - "data_url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8z8BQz0AEYBxVSF+FABJADveWkH6oAAAAAElFTkSuQmCC" - } - ], - "context_length": 2048, - "temperature": 0 - }' +```bash tab="Request with MCP" +curl http://localhost:1234/api/v1/chat \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ibm/granite-4-micro", + "input": "Tell me the top trending model on hugging face and navigate to https://lmstudio.ai", + "integrations": [ + { + "type": "ephemeral_mcp", + "server_label": "huggingface", + "server_url": "https://huggingface.co/mcp", + "allowed_tools": [ + "model_search" + ] + }, + { + "type": "plugin", + "id": "mcp/playwright", + "allowed_tools": [ + "browser_navigate" + ] + } + ], + "context_length": 8000, + "temperature": 0 + }' +``` + +```bash tab="Request with Images" +# Image is a small red square encoded as a base64 data URL +curl http://localhost:1234/api/v1/chat \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "qwen/qwen3-vl-4b", + "input": [ + { + "type": "text", + "content": "Describe this image in two sentences" + }, + { + "type": "image", + "data_url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8z8BQz0AEYBxVSF+FABJADveWkH6oAAAAAElFTkSuQmCC" + } + ], + "context_length": 2048, + "temperature": 0 + }' ``` ```` @@ -349,78 +345,74 @@ variants: description: Identifier of the response for subsequent requests. Starts with `"resp_"`. Present when `store` is `true`. ``` :::split::: -```lms_code_snippet -variants: - Request with MCP: - language: json - code: | - { - "model_instance_id": "ibm/granite-4-micro", - "output": [ - { - "type": "tool_call", - "tool": "model_search", - "arguments": { - "sort": "trendingScore", - "query": "", - "limit": 1 - }, - "output": "...", - "provider_info": { - "server_label": "huggingface", - "type": "ephemeral_mcp" - } - }, - { - "type": "message", - "content": "..." - }, - { - "type": "tool_call", - "tool": "browser_navigate", - "arguments": { - "url": "https://lmstudio.ai" - }, - "output": "...", - "provider_info": { - "plugin_id": "mcp/playwright", - "type": "plugin" - } - }, - { - "type": "message", - "content": "**Top Trending Model on Hugging Face** ... Below is a quick snapshot of what’s on the landing page ... more details on the model or LM Studio itself!" - } - ], - "stats": { - "input_tokens": 646, - "total_output_tokens": 586, - "reasoning_output_tokens": 0, - "tokens_per_second": 29.753900615398926, - "time_to_first_token_seconds": 1.088, - "model_load_time_seconds": 2.656 - }, - "response_id": "resp_4ef013eba0def1ed23f19dde72b67974c579113f544086de" +```json tab="Request with MCP" +{ + "model_instance_id": "ibm/granite-4-micro", + "output": [ + { + "type": "tool_call", + "tool": "model_search", + "arguments": { + "sort": "trendingScore", + "query": "", + "limit": 1 + }, + "output": "...", + "provider_info": { + "server_label": "huggingface", + "type": "ephemeral_mcp" } - Request with Images: - language: json - code: | - { - "model_instance_id": "qwen/qwen3-vl-4b", - "output": [ - { - "type": "message", - "content": "This image is a solid, vibrant red square that fills the entire frame, with no discernible texture, pattern, or other elements. It presents a minimalist, uniform visual field of pure red, evoking a sense of boldness or urgency." - } - ], - "stats": { - "input_tokens": 17, - "total_output_tokens": 50, - "reasoning_output_tokens": 0, - "tokens_per_second": 51.03762685242662, - "time_to_first_token_seconds": 0.814 - }, - "response_id": "resp_0182bd7c479d7451f9a35471f9c26b34de87a7255856b9a4" + }, + { + "type": "message", + "content": "..." + }, + { + "type": "tool_call", + "tool": "browser_navigate", + "arguments": { + "url": "https://lmstudio.ai" + }, + "output": "...", + "provider_info": { + "plugin_id": "mcp/playwright", + "type": "plugin" } + }, + { + "type": "message", + "content": "**Top Trending Model on Hugging Face** ... Below is a quick snapshot of what’s on the landing page ... more details on the model or LM Studio itself!" + } + ], + "stats": { + "input_tokens": 646, + "total_output_tokens": 586, + "reasoning_output_tokens": 0, + "tokens_per_second": 29.753900615398926, + "time_to_first_token_seconds": 1.088, + "model_load_time_seconds": 2.656 + }, + "response_id": "resp_4ef013eba0def1ed23f19dde72b67974c579113f544086de" +} +``` + +```json tab="Request with Images" +{ + "model_instance_id": "qwen/qwen3-vl-4b", + "output": [ + { + "type": "message", + "content": "This image is a solid, vibrant red square that fills the entire frame, with no discernible texture, pattern, or other elements. It presents a minimalist, uniform visual field of pure red, evoking a sense of boldness or urgency." + } + ], + "stats": { + "input_tokens": 17, + "total_output_tokens": 50, + "reasoning_output_tokens": 0, + "tokens_per_second": 51.03762685242662, + "time_to_first_token_seconds": 0.814 + }, + "response_id": "resp_0182bd7c479d7451f9a35471f9c26b34de87a7255856b9a4" +} ``` ```` diff --git a/1_developer/2_rest/download-status.md b/1_developer/2_rest/download-status.md index e67ff48..88a12ae 100644 --- a/1_developer/2_rest/download-status.md +++ b/1_developer/2_rest/download-status.md @@ -1,7 +1,7 @@ --- title: "Get download status" description: "Get the status of model downloads" -fullPage: true +full: true index: 9 api_info: method: GET @@ -18,14 +18,9 @@ api_info: description: The unique identifier of the download job. `job_id` is returned by the [download](/docs/developer/rest/download) endpoint when a download is initiated. ``` :::split::: -```lms_code_snippet -title: Example Request -variants: - curl: - language: bash - code: | - curl -H "Authorization: Bearer $LM_API_TOKEN" \ - http://localhost:1234/api/v1/models/download/status/job_493c7c9ded +```bash title="Example Request" +curl -H "Authorization: Bearer $LM_API_TOKEN" \ + http://localhost:1234/api/v1/models/download/status/job_493c7c9ded ``` ```` @@ -67,19 +62,14 @@ Returns a single download job status object. The response varies based on the do description: Download start time in ISO 8601 format. ``` :::split::: -```lms_code_snippet -title: Response -variants: - json: - language: json - code: | - { - "job_id": "job_493c7c9ded", - "status": "completed", - "total_size_bytes": 2279145003, - "downloaded_bytes": 2279145003, - "started_at": "2025-10-03T15:33:23.496Z", - "completed_at": "2025-10-03T15:43:12.102Z" - } +```json title="Response" +{ + "job_id": "job_493c7c9ded", + "status": "completed", + "total_size_bytes": 2279145003, + "downloaded_bytes": 2279145003, + "started_at": "2025-10-03T15:33:23.496Z", + "completed_at": "2025-10-03T15:43:12.102Z" +} ``` ```` diff --git a/1_developer/2_rest/download.md b/1_developer/2_rest/download.md index aa61212..0031e4f 100644 --- a/1_developer/2_rest/download.md +++ b/1_developer/2_rest/download.md @@ -1,7 +1,7 @@ --- title: "Download a model" description: "Download LLMs and embedding models" -fullPage: true +full: true index: 8 api_info: method: POST @@ -22,18 +22,13 @@ api_info: description: Quantization level of the model to download (e.g., `Q4_K_M`). Only supported for Hugging Face links. ``` :::split::: -```lms_code_snippet -title: Example Request -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/models/download \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "ibm/granite-4-micro" - }' +```bash title="Example Request" +curl http://localhost:1234/api/v1/models/download \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ibm/granite-4-micro" + }' ``` ```` @@ -64,17 +59,12 @@ Returns a download job status object. The response varies based on the download description: Download start time in ISO 8601 format. Absent when `status` is `already_downloaded`. ``` :::split::: -```lms_code_snippet -title: Response -variants: - json: - language: json - code: | - { - "job_id": "job_493c7c9ded", - "status": "downloading", - "total_size_bytes": 2279145003, - "started_at": "2025-10-03T15:33:23.496Z" - } +```json title="Response" +{ + "job_id": "job_493c7c9ded", + "status": "downloading", + "total_size_bytes": 2279145003, + "started_at": "2025-10-03T15:33:23.496Z" +} ``` ```` diff --git a/1_developer/2_rest/endpoints.md b/1_developer/2_rest/endpoints.md index ef18ba3..dccc052 100644 --- a/1_developer/2_rest/endpoints.md +++ b/1_developer/2_rest/endpoints.md @@ -3,9 +3,9 @@ title: REST API v0 description: "The REST API includes enhanced stats such as Token / Second and Time To First Token (TTFT), as well as rich information about models such as loaded vs unloaded, max context, quantization, and more." --- -```lms_warning +:::warning[Heads Up] LM Studio now has a [v1 REST API](/docs/developer/rest)! We recommend using the v1 API for new projects! -``` +::: ##### Requires [LM Studio 0.3.6](/download) or newer. @@ -31,9 +31,9 @@ To start the server, run the following command: lms server start ``` -```lms_protip +:::tip[Pro Tip] You can run LM Studio as a service and get the server to auto-start on boot without launching the GUI. [Learn about Headless Mode](/docs/developer/core/headless). -``` +::: ## Endpoints diff --git a/1_developer/2_rest/index.md b/1_developer/2_rest/index.md index 6d3bc5c..5c5035f 100644 --- a/1_developer/2_rest/index.md +++ b/1_developer/2_rest/index.md @@ -2,7 +2,7 @@ title: LM Studio API sidebar_title: Overview description: LM Studio's REST API for local inference and model management -fullPage: false +full: false index: 1 --- diff --git a/1_developer/2_rest/list.md b/1_developer/2_rest/list.md index dacfa80..856396e 100644 --- a/1_developer/2_rest/list.md +++ b/1_developer/2_rest/list.md @@ -1,7 +1,7 @@ --- title: "List your models" description: "Get a list of available models on your system, including both LLMs and embedding models." -fullPage: true +full: true index: 6 api_info: method: GET @@ -12,14 +12,9 @@ api_info: This endpoint has no request parameters. :::split::: -```lms_code_snippet -title: Example Request -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/models \ - -H "Authorization: Bearer $LM_API_TOKEN" +```bash title="Example Request" +curl http://localhost:1234/api/v1/models \ + -H "Authorization: Bearer $LM_API_TOKEN" ``` ```` @@ -140,99 +135,94 @@ variants: description: The currently selected variant name. Present when `variants` is present. ``` :::split::: -```lms_code_snippet -title: Response -variants: - json: - language: json - code: | +```json title="Response" +{ + "models": [ + { + "type": "llm", + "publisher": "google", + "key": "google/gemma-4-26b-a4b", + "display_name": "Gemma 4 26B A4B", + "architecture": "gemma4", + "quantization": { + "name": "Q4_K_M", + "bits_per_weight": 4 + }, + "size_bytes": 17990911801, + "params_string": "26B-A4B", + "loaded_instances": [ + { + "id": "google/gemma-4-26b-a4b", + "config": { + "context_length": 4096, + "eval_batch_size": 512, + "parallel": 4, + "flash_attention": true, + "num_experts": 8, + "offload_kv_cache_to_gpu": true + } + } + ], + "max_context_length": 262144, + "format": "gguf", + "capabilities": { + "vision": true, + "trained_for_tool_use": true, + "reasoning": { + "allowed_options": [ + "off", + "on" + ], + "default": "on" + } + }, + "description": null, + "variants": [ + "google/gemma-4-26b-a4b@q4_k_m" + ], + "selected_variant": "google/gemma-4-26b-a4b@q4_k_m" + }, { - "models": [ - { - "type": "llm", - "publisher": "google", - "key": "google/gemma-4-26b-a4b", - "display_name": "Gemma 4 26B A4B", - "architecture": "gemma4", - "quantization": { - "name": "Q4_K_M", - "bits_per_weight": 4 - }, - "size_bytes": 17990911801, - "params_string": "26B-A4B", - "loaded_instances": [ - { - "id": "google/gemma-4-26b-a4b", - "config": { - "context_length": 4096, - "eval_batch_size": 512, - "parallel": 4, - "flash_attention": true, - "num_experts": 8, - "offload_kv_cache_to_gpu": true - } - } - ], - "max_context_length": 262144, - "format": "gguf", - "capabilities": { - "vision": true, - "trained_for_tool_use": true, - "reasoning": { - "allowed_options": [ - "off", - "on" - ], - "default": "on" - } - }, - "description": null, - "variants": [ - "google/gemma-4-26b-a4b@q4_k_m" - ], - "selected_variant": "google/gemma-4-26b-a4b@q4_k_m" - }, - { - "type": "llm", - "publisher": "deepseek", - "key": "deepseek-r1", - "display_name": "DeepSeek R1", - "architecture": "deepseek", - "quantization": { - "name": "Q4_K_M", - "bits_per_weight": 4 - }, - "size_bytes": 40492610355, - "params_string": "671B", - "loaded_instances": [], - "max_context_length": 131072, - "format": "gguf", - "capabilities": { - "vision": false, - "trained_for_tool_use": true, - "reasoning": { - "allowed_options": ["on"], - "default": "on" - } - }, - "description": null - }, - { - "type": "embedding", - "publisher": "gaianet", - "key": "text-embedding-nomic-embed-text-v1.5-embedding", - "display_name": "Nomic Embed Text v1.5", - "quantization": { - "name": "F16", - "bits_per_weight": 16 - }, - "size_bytes": 274290560, - "params_string": null, - "loaded_instances": [], - "max_context_length": 2048, - "format": "gguf" - } - ] + "type": "llm", + "publisher": "deepseek", + "key": "deepseek-r1", + "display_name": "DeepSeek R1", + "architecture": "deepseek", + "quantization": { + "name": "Q4_K_M", + "bits_per_weight": 4 + }, + "size_bytes": 40492610355, + "params_string": "671B", + "loaded_instances": [], + "max_context_length": 131072, + "format": "gguf", + "capabilities": { + "vision": false, + "trained_for_tool_use": true, + "reasoning": { + "allowed_options": ["on"], + "default": "on" + } + }, + "description": null + }, + { + "type": "embedding", + "publisher": "gaianet", + "key": "text-embedding-nomic-embed-text-v1.5-embedding", + "display_name": "Nomic Embed Text v1.5", + "quantization": { + "name": "F16", + "bits_per_weight": 16 + }, + "size_bytes": 274290560, + "params_string": null, + "loaded_instances": [], + "max_context_length": 2048, + "format": "gguf" } + ] +} ``` ```` diff --git a/1_developer/2_rest/load.md b/1_developer/2_rest/load.md index cdb4a11..ea58e61 100644 --- a/1_developer/2_rest/load.md +++ b/1_developer/2_rest/load.md @@ -1,7 +1,7 @@ --- title: "Load a model" description: "Load an LLM or Embedding model into memory with custom configuration for inference" -fullPage: true +full: true index: 7 api_info: method: POST @@ -42,21 +42,16 @@ api_info: description: If true, echoes the final load configuration in the response under `"load_config"`. Default `false`. ``` :::split::: -```lms_code_snippet -title: Example Request -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/models/load \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "openai/gpt-oss-20b", - "context_length": 16384, - "flash_attention": true, - "echo_load_config": true - }' +```bash title="Example Request" +curl http://localhost:1234/api/v1/models/load \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "openai/gpt-oss-20b", + "context_length": 16384, + "flash_attention": true, + "echo_load_config": true + }' ``` ```` @@ -118,24 +113,19 @@ variants: description: Maximum number of tokens that the model will consider. ``` :::split::: -```lms_code_snippet -title: Response -variants: - json: - language: json - code: | - { - "type": "llm", - "instance_id": "openai/gpt-oss-20b", - "load_time_seconds": 9.099, - "status": "loaded", - "load_config": { - "context_length": 16384, - "eval_batch_size": 512, - "flash_attention": true, - "offload_kv_cache_to_gpu": true, - "num_experts": 4 - } - } +```json title="Response" +{ + "type": "llm", + "instance_id": "openai/gpt-oss-20b", + "load_time_seconds": 9.099, + "status": "loaded", + "load_config": { + "context_length": 16384, + "eval_batch_size": 512, + "flash_attention": true, + "offload_kv_cache_to_gpu": true, + "num_experts": 4 + } +} ``` ```` diff --git a/1_developer/2_rest/meta.json b/1_developer/2_rest/meta.json new file mode 100644 index 0000000..97f3ec8 --- /dev/null +++ b/1_developer/2_rest/meta.json @@ -0,0 +1,15 @@ +{ + "title": "REST API", + "pages": [ + "chat", + "download-status", + "download", + "endpoints", + "list", + "load", + "quickstart", + "stateful-chats", + "streaming-events", + "unload" + ] +} diff --git a/1_developer/2_rest/quickstart.md b/1_developer/2_rest/quickstart.md index e2c12f6..f0bd98e 100644 --- a/1_developer/2_rest/quickstart.md +++ b/1_developer/2_rest/quickstart.md @@ -2,7 +2,7 @@ title: Get up and running with the LM Studio API sidebar_title: Quickstart description: Download a model and start a simple Chat session using the REST API -fullPage: false +full: false index: 2 --- @@ -36,53 +36,49 @@ Use the chat endpoint to send a message to a model. By default, the model will b The `/api/v1/chat` endpoint is stateful, which means you do not need to pass the full history in every request. Read more about it [here](/docs/developer/rest/stateful-chats). -```lms_code_snippet -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/chat \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "ibm/granite-4-micro", - "input": "Write a short haiku about sunrise." - }' - Python: - language: python - code: | - import os - import requests - import json - - response = requests.post( - "http://localhost:1234/api/v1/chat", - headers={ - "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", - "Content-Type": "application/json" - }, - json={ - "model": "ibm/granite-4-micro", - "input": "Write a short haiku about sunrise." - } - ) - print(json.dumps(response.json(), indent=2)) - TypeScript: - language: typescript - code: | - const response = await fetch("http://localhost:1234/api/v1/chat", { - method: "POST", - headers: { - "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, - "Content-Type": "application/json" - }, - body: JSON.stringify({ - model: "ibm/granite-4-micro", - input: "Write a short haiku about sunrise." - }) - }); - const data = await response.json(); - console.log(data); +```bash tab="curl" +curl http://localhost:1234/api/v1/chat \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ibm/granite-4-micro", + "input": "Write a short haiku about sunrise." + }' +``` + +```python tab="Python" +import os +import requests +import json + +response = requests.post( + "http://localhost:1234/api/v1/chat", + headers={ + "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", + "Content-Type": "application/json" + }, + json={ + "model": "ibm/granite-4-micro", + "input": "Write a short haiku about sunrise." + } +) +print(json.dumps(response.json(), indent=2)) +``` + +```typescript tab="TypeScript" +const response = await fetch("http://localhost:1234/api/v1/chat", { + method: "POST", + headers: { + "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, + "Content-Type": "application/json" + }, + body: JSON.stringify({ + model: "ibm/granite-4-micro", + input: "Write a short haiku about sunrise." + }) +}); +const data = await response.json(); +console.log(data); ``` See the full [chat](/docs/developer/rest/chat) docs for more details. @@ -91,154 +87,146 @@ See the full [chat](/docs/developer/rest/chat) docs for more details. Enable the model interact with ephemeral Model Context Protocol (MCP) servers in `/api/v1/chat` by specifying servers in the `integrations` field. -```lms_code_snippet -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/chat \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "ibm/granite-4-micro", - "input": "What is the top trending model on hugging face?", - "integrations": [ - { - "type": "ephemeral_mcp", - "server_label": "huggingface", - "server_url": "https://huggingface.co/mcp", - "allowed_tools": ["model_search"] - } - ], - "context_length": 8000 - }' - Python: - language: python - code: | - import os - import requests - import json - - response = requests.post( - "http://localhost:1234/api/v1/chat", - headers={ - "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", - "Content-Type": "application/json" - }, - json={ - "model": "ibm/granite-4-micro", - "input": "What is the top trending model on hugging face?", - "integrations": [ - { - "type": "ephemeral_mcp", - "server_label": "huggingface", - "server_url": "https://huggingface.co/mcp", - "allowed_tools": ["model_search"] - } - ], - "context_length": 8000 - } - ) - print(json.dumps(response.json(), indent=2)) - TypeScript: - language: typescript - code: | - const response = await fetch("http://localhost:1234/api/v1/chat", { - method: "POST", - headers: { - "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, - "Content-Type": "application/json" - }, - body: JSON.stringify({ - model: "ibm/granite-4-micro", - input: "What is the top trending model on hugging face?", - integrations: [ - { - type: "ephemeral_mcp", - server_label: "huggingface", - server_url: "https://huggingface.co/mcp", - allowed_tools: ["model_search"] - } - ], - context_length: 8000 - }) - const data = await response.json(); - console.log(data); +```bash tab="curl" +curl http://localhost:1234/api/v1/chat \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ibm/granite-4-micro", + "input": "What is the top trending model on hugging face?", + "integrations": [ + { + "type": "ephemeral_mcp", + "server_label": "huggingface", + "server_url": "https://huggingface.co/mcp", + "allowed_tools": ["model_search"] + } + ], + "context_length": 8000 + }' +``` + +```python tab="Python" +import os +import requests +import json + +response = requests.post( + "http://localhost:1234/api/v1/chat", + headers={ + "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", + "Content-Type": "application/json" + }, + json={ + "model": "ibm/granite-4-micro", + "input": "What is the top trending model on hugging face?", + "integrations": [ + { + "type": "ephemeral_mcp", + "server_label": "huggingface", + "server_url": "https://huggingface.co/mcp", + "allowed_tools": ["model_search"] + } + ], + "context_length": 8000 + } +) +print(json.dumps(response.json(), indent=2)) +``` + +```typescript tab="TypeScript" +const response = await fetch("http://localhost:1234/api/v1/chat", { + method: "POST", + headers: { + "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, + "Content-Type": "application/json" + }, + body: JSON.stringify({ + model: "ibm/granite-4-micro", + input: "What is the top trending model on hugging face?", + integrations: [ + { + type: "ephemeral_mcp", + server_label: "huggingface", + server_url: "https://huggingface.co/mcp", + allowed_tools: ["model_search"] + } + ], + context_length: 8000 + }) +const data = await response.json(); +console.log(data); ``` You can also use locally configured MCP plugins (from your `mcp.json`) via the `integrations` field. Using locally run MCP plugins requires authentication via an API token passed through the `Authorization` header. Read more about authentication [here](/docs/developer/core/authentication). -```lms_code_snippet -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/chat \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "ibm/granite-4-micro", - "input": "Open lmstudio.ai", - "integrations": [ - { - "type": "plugin", - "id": "mcp/playwright", - "allowed_tools": ["browser_navigate"] - } - ], - "context_length": 8000 - }' - Python: - language: python - code: | - import os - import requests - import json - - response = requests.post( - "http://localhost:1234/api/v1/chat", - headers={ - "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", - "Content-Type": "application/json" - }, - json={ - "model": "ibm/granite-4-micro", - "input": "Open lmstudio.ai", - "integrations": [ - { - "type": "plugin", - "id": "mcp/playwright", - "allowed_tools": ["browser_navigate"] - } - ], - "context_length": 8000 - } - ) - print(json.dumps(response.json(), indent=2)) - TypeScript: - language: typescript - code: | - const response = await fetch("http://localhost:1234/api/v1/chat", { - method: "POST", - headers: { - "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, - "Content-Type": "application/json" - }, - body: JSON.stringify({ - model: "ibm/granite-4-micro", - input: "Open lmstudio.ai", - integrations: [ - { - type: "plugin", - id: "mcp/playwright", - allowed_tools: ["browser_navigate"] - } - ], - context_length: 8000 - }) - }); - const data = await response.json(); - console.log(data); +```bash tab="curl" +curl http://localhost:1234/api/v1/chat \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ibm/granite-4-micro", + "input": "Open lmstudio.ai", + "integrations": [ + { + "type": "plugin", + "id": "mcp/playwright", + "allowed_tools": ["browser_navigate"] + } + ], + "context_length": 8000 + }' +``` + +```python tab="Python" +import os +import requests +import json + +response = requests.post( + "http://localhost:1234/api/v1/chat", + headers={ + "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", + "Content-Type": "application/json" + }, + json={ + "model": "ibm/granite-4-micro", + "input": "Open lmstudio.ai", + "integrations": [ + { + "type": "plugin", + "id": "mcp/playwright", + "allowed_tools": ["browser_navigate"] + } + ], + "context_length": 8000 + } +) +print(json.dumps(response.json(), indent=2)) +``` + +```typescript tab="TypeScript" +const response = await fetch("http://localhost:1234/api/v1/chat", { + method: "POST", + headers: { + "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, + "Content-Type": "application/json" + }, + body: JSON.stringify({ + model: "ibm/granite-4-micro", + input: "Open lmstudio.ai", + integrations: [ + { + type: "plugin", + id: "mcp/playwright", + allowed_tools: ["browser_navigate"] + } + ], + context_length: 8000 + }) +}); +const data = await response.json(); +console.log(data); ``` See the full [chat](/docs/developer/rest/chat) docs for more details. @@ -247,86 +235,78 @@ See the full [chat](/docs/developer/rest/chat) docs for more details. Use the download endpoint to download models by identifier from the [LM Studio model catalog](https://lmstudio.ai/models), or by Hugging Face model URL. -```lms_code_snippet -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/models/download \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "ibm/granite-4-micro" - }' - Python: - language: python - code: | - import os - import requests - import json - - response = requests.post( - "http://localhost:1234/api/v1/models/download", - headers={ - "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", - "Content-Type": "application/json" - }, - json={"model": "ibm/granite-4-micro"} - ) - print(json.dumps(response.json(), indent=2)) - TypeScript: - language: typescript - code: | - const response = await fetch("http://localhost:1234/api/v1/models/download", { - method: "POST", - headers: { - "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, - "Content-Type": "application/json" - }, - body: JSON.stringify({ - model: "ibm/granite-4-micro" - }) - }); - const data = await response.json(); - console.log(data); +```bash tab="curl" +curl http://localhost:1234/api/v1/models/download \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ibm/granite-4-micro" + }' +``` + +```python tab="Python" +import os +import requests +import json + +response = requests.post( + "http://localhost:1234/api/v1/models/download", + headers={ + "Authorization": f"Bearer {os.environ['LM_API_TOKEN']}", + "Content-Type": "application/json" + }, + json={"model": "ibm/granite-4-micro"} +) +print(json.dumps(response.json(), indent=2)) +``` + +```typescript tab="TypeScript" +const response = await fetch("http://localhost:1234/api/v1/models/download", { + method: "POST", + headers: { + "Authorization": `Bearer ${process.env.LM_API_TOKEN}`, + "Content-Type": "application/json" + }, + body: JSON.stringify({ + model: "ibm/granite-4-micro" + }) +}); +const data = await response.json(); +console.log(data); ``` The response will return a `job_id` that you can use to track download progress. -```lms_code_snippet -variants: - curl: - language: bash - code: | - curl -H "Authorization: Bearer $LM_API_TOKEN" \ - http://localhost:1234/api/v1/models/download/status/{job_id} - Python: - language: python - code: | - import os - import requests - import json - - job_id = "your-job-id" - response = requests.get( - f"http://localhost:1234/api/v1/models/download/status/{job_id}", - headers={"Authorization": f"Bearer {os.environ['LM_API_TOKEN']}"} - ) - print(json.dumps(response.json(), indent=2)) - TypeScript: - language: typescript - code: | - const jobId = "your-job-id"; - const response = await fetch( - `http://localhost:1234/api/v1/models/download/status/${jobId}`, - { - headers: { - "Authorization": `Bearer ${process.env.LM_API_TOKEN}` - } - } - ); - const data = await response.json(); - console.log(data); +```bash tab="curl" +curl -H "Authorization: Bearer $LM_API_TOKEN" \ + http://localhost:1234/api/v1/models/download/status/{job_id} +``` + +```python tab="Python" +import os +import requests +import json + +job_id = "your-job-id" +response = requests.get( + f"http://localhost:1234/api/v1/models/download/status/{job_id}", + headers={"Authorization": f"Bearer {os.environ['LM_API_TOKEN']}"} +) +print(json.dumps(response.json(), indent=2)) +``` + +```typescript tab="TypeScript" +const jobId = "your-job-id"; +const response = await fetch( + `http://localhost:1234/api/v1/models/download/status/${jobId}`, + { + headers: { + "Authorization": `Bearer ${process.env.LM_API_TOKEN}` + } + } +); +const data = await response.json(); +console.log(data); ``` See the [download](/docs/developer/rest/download) and [download status](/docs/developer/rest/download-status) docs for more details. diff --git a/1_developer/2_rest/stateful-chats.md b/1_developer/2_rest/stateful-chats.md index 5280eee..6b926e9 100644 --- a/1_developer/2_rest/stateful-chats.md +++ b/1_developer/2_rest/stateful-chats.md @@ -11,63 +11,48 @@ The `/api/v1/chat` endpoint is stateful by default. This means you don't need to When you send a chat request, LM Studio stores the conversation in a chat thread and returns a `response_id` in the response. Use this `response_id` in subsequent requests to continue the conversation. -```lms_code_snippet -title: Start a new conversation -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/chat \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "ibm/granite-4-micro", - "input": "My favorite color is blue." - }' +```bash title="Start a new conversation" +curl http://localhost:1234/api/v1/chat \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ibm/granite-4-micro", + "input": "My favorite color is blue." + }' ``` The response includes a `response_id`: -```lms_info +:::info[Info] Every response includes an unique `response_id` that you can use to reference that specific point in the conversation for future requests. This allows you to branch conversations. -``` +::: -```lms_code_snippet -title: Response -variants: - response: - language: json - code: | - { - "model_instance_id": "ibm/granite-4-micro", - "output": [ - { - "type": "message", - "content": "That's great! Blue is a beautiful color..." - } - ], - "response_id": "resp_abc123xyz..." - } +```json title="Response" +{ + "model_instance_id": "ibm/granite-4-micro", + "output": [ + { + "type": "message", + "content": "That's great! Blue is a beautiful color..." + } + ], + "response_id": "resp_abc123xyz..." +} ``` ## Continue a conversation Pass the `previous_response_id` in your next request to continue the conversation. The model will remember the previous context. -```lms_code_snippet -title: Continue the conversation -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/chat \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "ibm/granite-4-micro", - "input": "What color did I just mention?", - "previous_response_id": "resp_abc123xyz..." - }' +```bash title="Continue the conversation" +curl http://localhost:1234/api/v1/chat \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ibm/granite-4-micro", + "input": "What color did I just mention?", + "previous_response_id": "resp_abc123xyz..." + }' ``` The model can reference the previous message without you needing to resend it and will return a new `response_id` for further continuation. @@ -76,20 +61,15 @@ The model can reference the previous message without you needing to resend it an If you don't want to store the conversation, set `store` to `false`. The response will not include a `response_id`. -```lms_code_snippet -title: Stateless chat -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/chat \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "ibm/granite-4-micro", - "input": "Tell me a joke.", - "store": false - }' +```bash title="Stateless chat" +curl http://localhost:1234/api/v1/chat \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "ibm/granite-4-micro", + "input": "Tell me a joke.", + "store": false + }' ``` This is useful for one-off requests where you don't need to maintain context. diff --git a/1_developer/2_rest/streaming-events.md b/1_developer/2_rest/streaming-events.md index b783b38..18d2bb6 100644 --- a/1_developer/2_rest/streaming-events.md +++ b/1_developer/2_rest/streaming-events.md @@ -48,16 +48,11 @@ An event that is emitted at the start of a chat response stream. description: The type of the event. Always `chat.start`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "chat.start", - "model_instance_id": "openai/gpt-oss-20b" - } +```json title="Example Event Data" +{ + "type": "chat.start", + "model_instance_id": "openai/gpt-oss-20b" +} ``` ```` @@ -74,16 +69,11 @@ Signals the start of a model being loaded to fulfill the chat request. Will not description: The type of the event. Always `model_load.start`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "model_load.start", - "model_instance_id": "openai/gpt-oss-20b" - } +```json title="Example Event Data" +{ + "type": "model_load.start", + "model_instance_id": "openai/gpt-oss-20b" +} ``` ```` @@ -103,17 +93,12 @@ Progress of the model load. description: The type of the event. Always `model_load.progress`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "model_load.progress", - "model_instance_id": "openai/gpt-oss-20b", - "progress": 0.65 - } +```json title="Example Event Data" +{ + "type": "model_load.progress", + "model_instance_id": "openai/gpt-oss-20b", + "progress": 0.65 +} ``` ```` @@ -133,17 +118,12 @@ Signals a successfully completed model load. description: The type of the event. Always `model_load.end`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "model_load.end", - "model_instance_id": "openai/gpt-oss-20b", - "load_time_seconds": 12.34 - } +```json title="Example Event Data" +{ + "type": "model_load.end", + "model_instance_id": "openai/gpt-oss-20b", + "load_time_seconds": 12.34 +} ``` ```` @@ -157,15 +137,10 @@ Signals the start of the model processing a prompt. description: The type of the event. Always `prompt_processing.start`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "prompt_processing.start" - } +```json title="Example Event Data" +{ + "type": "prompt_processing.start" +} ``` ```` @@ -182,16 +157,11 @@ Progress of the model processing a prompt. description: The type of the event. Always `prompt_processing.progress`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "prompt_processing.progress", - "progress": 0.5 - } +```json title="Example Event Data" +{ + "type": "prompt_processing.progress", + "progress": 0.5 +} ``` ```` @@ -205,15 +175,10 @@ Signals the end of the model processing a prompt. description: The type of the event. Always `prompt_processing.end`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "prompt_processing.end" - } +```json title="Example Event Data" +{ + "type": "prompt_processing.end" +} ``` ```` @@ -227,15 +192,10 @@ Signals the model is starting to stream reasoning content. description: The type of the event. Always `reasoning.start`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "reasoning.start" - } +```json title="Example Event Data" +{ + "type": "reasoning.start" +} ``` ```` @@ -252,16 +212,11 @@ A chunk of reasoning content. Multiple deltas may arrive. description: The type of the event. Always `reasoning.delta`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "reasoning.delta", - "content": "Need to" - } +```json title="Example Event Data" +{ + "type": "reasoning.delta", + "content": "Need to" +} ``` ```` @@ -275,15 +230,10 @@ Signals the end of the reasoning stream. description: The type of the event. Always `reasoning.end`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "reasoning.end" - } +```json title="Example Event Data" +{ + "type": "reasoning.end" +} ``` ```` @@ -324,20 +274,15 @@ Emitted when the model starts a tool call. description: The type of the event. Always `tool_call.start`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "tool_call.start", - "tool": "model_search", - "provider_info": { - "type": "ephemeral_mcp", - "server_label": "huggingface" - } - } +```json title="Example Event Data" +{ + "type": "tool_call.start", + "tool": "model_search", + "provider_info": { + "type": "ephemeral_mcp", + "server_label": "huggingface" + } +} ``` ```` @@ -381,24 +326,19 @@ Arguments streamed for the current tool call. description: The type of the event. Always `tool_call.arguments`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "tool_call.arguments", - "tool": "model_search", - "arguments": { - "sort": "trendingScore", - "limit": 1 - }, - "provider_info": { - "type": "ephemeral_mcp", - "server_label": "huggingface" - } - } +```json title="Example Event Data" +{ + "type": "tool_call.arguments", + "tool": "model_search", + "arguments": { + "sort": "trendingScore", + "limit": 1 + }, + "provider_info": { + "type": "ephemeral_mcp", + "server_label": "huggingface" + } +} ``` ```` @@ -445,25 +385,20 @@ Result of the tool call, along with the arguments used. description: The type of the event. Always `tool_call.success`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "tool_call.success", - "tool": "model_search", - "arguments": { - "sort": "trendingScore", - "limit": 1 - }, - "output": "[{\"type\":\"text\",\"text\":\"Showing first 1 models...\"}]", - "provider_info": { - "type": "ephemeral_mcp", - "server_label": "huggingface" - } - } +```json title="Example Event Data" +{ + "type": "tool_call.success", + "tool": "model_search", + "arguments": { + "sort": "trendingScore", + "limit": 1 + }, + "output": "[{\"type\":\"text\",\"text\":\"Showing first 1 models...\"}]", + "provider_info": { + "type": "ephemeral_mcp", + "server_label": "huggingface" + } +} ``` ```` @@ -510,20 +445,15 @@ Indicates that the tool call failed. description: The type of the event. Always `tool_call.failure`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "tool_call.failure", - "reason": "Cannot find tool with name open_browser.", - "metadata": { - "type": "invalid_name", - "tool_name": "open_browser" - } - } +```json title="Example Event Data" +{ + "type": "tool_call.failure", + "reason": "Cannot find tool with name open_browser.", + "metadata": { + "type": "invalid_name", + "tool_name": "open_browser" + } +} ``` ```` @@ -537,15 +467,10 @@ Signals the model is about to stream a message. description: The type of the event. Always `message.start`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "message.start" - } +```json title="Example Event Data" +{ + "type": "message.start" +} ``` ```` @@ -562,16 +487,11 @@ A chunk of message content. Multiple deltas may arrive. description: The type of the event. Always `message.delta`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "message.delta", - "content": "The current" - } +```json title="Example Event Data" +{ + "type": "message.delta", + "content": "The current" +} ``` ```` @@ -585,15 +505,10 @@ Signals the end of the message stream. description: The type of the event. Always `message.end`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "message.end" - } +```json title="Example Event Data" +{ + "type": "message.end" +} ``` ```` @@ -625,21 +540,16 @@ An error occurred during streaming. The final payload will still be sent in `cha description: The type of the event. Always `error`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | - { - "type": "error", - "error": { - "type": "invalid_request", - "message": "\"model\" is required", - "code": "missing_required_parameter", - "param": "model" - } - } +```json title="Example Event Data" +{ + "type": "error", + "error": { + "type": "invalid_request", + "message": "\"model\" is required", + "code": "missing_required_parameter", + "param": "model" + } +} ``` ```` @@ -656,36 +566,31 @@ Final event containing the full aggregated response, equivalent to the non-strea description: The type of the event. Always `chat.end`. ``` :::split::: -```lms_code_snippet -title: Example Event Data -variants: - json: - language: json - code: | +```json title="Example Event Data" +{ + "type": "chat.end", + "result": { + "model_instance_id": "openai/gpt-oss-20b", + "output": [ + { "type": "reasoning", "content": "Need to call function." }, { - "type": "chat.end", - "result": { - "model_instance_id": "openai/gpt-oss-20b", - "output": [ - { "type": "reasoning", "content": "Need to call function." }, - { - "type": "tool_call", - "tool": "model_search", - "arguments": { "sort": "trendingScore", "limit": 1 }, - "output": "[{\"type\":\"text\",\"text\":\"Showing first 1 models...\"}]", - "provider_info": { "type": "ephemeral_mcp", "server_label": "huggingface" } - }, - { "type": "message", "content": "The current top‑trending model is..." } - ], - "stats": { - "input_tokens": 329, - "total_output_tokens": 268, - "reasoning_output_tokens": 5, - "tokens_per_second": 43.73, - "time_to_first_token_seconds": 0.781 - }, - "response_id": "resp_02b2017dbc06c12bfc353a2ed6c2b802f8cc682884bb5716" - } - } + "type": "tool_call", + "tool": "model_search", + "arguments": { "sort": "trendingScore", "limit": 1 }, + "output": "[{\"type\":\"text\",\"text\":\"Showing first 1 models...\"}]", + "provider_info": { "type": "ephemeral_mcp", "server_label": "huggingface" } + }, + { "type": "message", "content": "The current top‑trending model is..." } + ], + "stats": { + "input_tokens": 329, + "total_output_tokens": 268, + "reasoning_output_tokens": 5, + "tokens_per_second": 43.73, + "time_to_first_token_seconds": 0.781 + }, + "response_id": "resp_02b2017dbc06c12bfc353a2ed6c2b802f8cc682884bb5716" + } +} ``` ```` diff --git a/1_developer/2_rest/unload.md b/1_developer/2_rest/unload.md index b021494..4f185c5 100644 --- a/1_developer/2_rest/unload.md +++ b/1_developer/2_rest/unload.md @@ -1,7 +1,7 @@ --- title: "Unload a model" description: "Unload a loaded model from memory" -fullPage: true +full: true index: 8 api_info: method: POST @@ -18,18 +18,13 @@ api_info: description: Unique identifier of the model instance to unload. ``` :::split::: -```lms_code_snippet -title: Example Request -variants: - curl: - language: bash - code: | - curl http://localhost:1234/api/v1/models/unload \ - -H "Authorization: Bearer $LM_API_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "instance_id": "openai/gpt-oss-20b" - }' +```bash title="Example Request" +curl http://localhost:1234/api/v1/models/unload \ + -H "Authorization: Bearer $LM_API_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "instance_id": "openai/gpt-oss-20b" + }' ``` ```` @@ -43,14 +38,9 @@ variants: description: Unique identifier for the unloaded model instance. ``` :::split::: -```lms_code_snippet -title: Response -variants: - json: - language: json - code: | - { - "instance_id": "openai/gpt-oss-20b" - } +```json title="Response" +{ + "instance_id": "openai/gpt-oss-20b" +} ``` ```` diff --git a/1_developer/3_openai-compat/completions.md b/1_developer/3_openai-compat/completions.md index e37063b..ee656e2 100644 --- a/1_developer/3_openai-compat/completions.md +++ b/1_developer/3_openai-compat/completions.md @@ -6,11 +6,11 @@ api_info: method: POST --- -```lms_warning +:::warning[Heads Up] This endpoint is no longer supported by OpenAI. LM Studio continues to support it. Using this endpoint with chat‑tuned models may produce unexpected tokens. Prefer base models. -``` +::: - Method: `POST` - Prompt template is not applied diff --git a/1_developer/3_openai-compat/meta.json b/1_developer/3_openai-compat/meta.json new file mode 100644 index 0000000..3357bbc --- /dev/null +++ b/1_developer/3_openai-compat/meta.json @@ -0,0 +1,12 @@ +{ + "title": "OpenAI Compatibility", + "pages": [ + "chat-completions", + "completions", + "embeddings", + "models", + "responses", + "structured-output", + "tools" + ] +} diff --git a/1_developer/4_anthropic-compat/meta.json b/1_developer/4_anthropic-compat/meta.json new file mode 100644 index 0000000..2ac0ac3 --- /dev/null +++ b/1_developer/4_anthropic-compat/meta.json @@ -0,0 +1,6 @@ +{ + "title": "Anthropic Compatibility", + "pages": [ + "messages" + ] +} diff --git a/1_developer/meta.json b/1_developer/meta.json new file mode 100644 index 0000000..1561597 --- /dev/null +++ b/1_developer/meta.json @@ -0,0 +1,11 @@ +{ + "title": "Developer", + "pages": [ + "0_core", + "2_rest", + "3_openai-compat", + "4_anthropic-compat", + "api-changelog", + "_embeddings" + ] +} diff --git a/1_python/1_getting-started/authentication.md b/1_python/1_getting-started/authentication.md index 71858e9..356f427 100644 --- a/1_python/1_getting-started/authentication.md +++ b/1_python/1_getting-started/authentication.md @@ -11,9 +11,9 @@ LM Studio supports API Tokens for authentication, providing a secure and conveni By default, the LM Studio API runs **without enforcing authentication**. For production or shared environments, enable API Token authentication for secure access. -```lms_info +:::info[Info] To enable API Token authentication, create tokens and control granular permissions, check [this guide](/docs/developer/core/authentication) for more details. -``` +::: ## Providing the API Token @@ -22,41 +22,35 @@ The API Token can be provided in two ways: 1. **Environment Variable (Recommended)**: Set the `LM_API_TOKEN` environment variable, and the SDK will automatically read it. 2. **Function Argument**: Pass the token directly as the `api_token` parameter. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms - - # Configure the default client with an API token - lms.configure_default_client(api_token="your-token-here") - - model = lms.llm() - result = model.respond("What is the meaning of life?") - print(result) - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms - - # Pass api_token to the Client constructor - with lms.Client(api_token="your-token-here") as client: - model = client.llm.model() - result = model.respond("What is the meaning of life?") - print(result) - - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms - - # Pass api_token to the AsyncClient constructor - async with lms.AsyncClient(api_token="your-token-here") as client: - model = await client.llm.model() - result = await model.respond("What is the meaning of life?") - print(result) +```python tab="Python (convenience API)" +import lmstudio as lms + +# Configure the default client with an API token +lms.configure_default_client(api_token="your-token-here") + +model = lms.llm() +result = model.respond("What is the meaning of life?") +print(result) +``` + +```python tab="Python (scoped resource API)" +import lmstudio as lms + +# Pass api_token to the Client constructor +with lms.Client(api_token="your-token-here") as client: + model = client.llm.model() + result = model.respond("What is the meaning of life?") + print(result) +``` + +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms + +# Pass api_token to the AsyncClient constructor +async with lms.AsyncClient(api_token="your-token-here") as client: + model = await client.llm.model() + result = await model.respond("What is the meaning of life?") + print(result) ``` diff --git a/1_python/1_getting-started/meta.json b/1_python/1_getting-started/meta.json new file mode 100644 index 0000000..0696e54 --- /dev/null +++ b/1_python/1_getting-started/meta.json @@ -0,0 +1,8 @@ +{ + "title": "Getting Started", + "pages": [ + "authentication", + "project-setup", + "repl" + ] +} diff --git a/1_python/1_getting-started/project-setup.md b/1_python/1_getting-started/project-setup.md index f620af1..6b2357d 100644 --- a/1_python/1_getting-started/project-setup.md +++ b/1_python/1_getting-started/project-setup.md @@ -15,20 +15,16 @@ As it is published to PyPI, `lmstudio-python` may be installed using `pip` or your preferred project dependency manager (`pdm` and `uv` are shown, but other Python project management tools offer similar dependency addition commands). -```lms_code_snippet - variants: - pip: - language: bash - code: | - pip install lmstudio - pdm: - language: bash - code: | - pdm add lmstudio - uv: - language: bash - code: | - uv add lmstudio +```bash tab="pip" +pip install lmstudio +``` + +```bash tab="pdm" +pdm add lmstudio +``` + +```bash tab="uv" +uv add lmstudio ``` ## Customizing the server API host and TCP port @@ -40,53 +36,47 @@ SDK also required that the optional HTTP REST server be enabled). The network location of the server API can be overridden by passing a `"host:port"` string when creating the client instance. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms - SERVER_API_HOST = "localhost:1234" - - # This must be the *first* convenience API interaction (otherwise the SDK - # implicitly creates a client that accesses the default server API host) - lms.configure_default_client(SERVER_API_HOST) - - # Note: the dedicated configuration API was added in lmstudio-python 1.3.0 - # For compatibility with earlier SDK versions, it is still possible to use - # lms.get_default_client(SERVER_API_HOST) to configure the default client - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms - SERVER_API_HOST = "localhost:1234" - - # When using the scoped resource API, each client instance - # can be configured to use a specific server API host - with lms.Client(SERVER_API_HOST) as client: - model = client.llm.model() - - for fragment in model.respond_stream("What is the meaning of life?"): - print(fragment.content, end="", flush=True) - print() # Advance to a new line at the end of the response - - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms - SERVER_API_HOST = "localhost:1234" - - # When using the asynchronous API, each client instance - # can be configured to use a specific server API host - async with lms.AsyncClient(SERVER_API_HOST) as client: - model = await client.llm.model() - - for fragment in await model.respond_stream("What is the meaning of life?"): - print(fragment.content, end="", flush=True) - print() # Advance to a new line at the end of the response +```python tab="Python (convenience API)" +import lmstudio as lms +SERVER_API_HOST = "localhost:1234" + +# This must be the *first* convenience API interaction (otherwise the SDK +# implicitly creates a client that accesses the default server API host) +lms.configure_default_client(SERVER_API_HOST) + +# Note: the dedicated configuration API was added in lmstudio-python 1.3.0 +# For compatibility with earlier SDK versions, it is still possible to use +# lms.get_default_client(SERVER_API_HOST) to configure the default client +``` + +```python tab="Python (scoped resource API)" +import lmstudio as lms +SERVER_API_HOST = "localhost:1234" + +# When using the scoped resource API, each client instance +# can be configured to use a specific server API host +with lms.Client(SERVER_API_HOST) as client: + model = client.llm.model() + + for fragment in model.respond_stream("What is the meaning of life?"): + print(fragment.content, end="", flush=True) + print() # Advance to a new line at the end of the response +``` + +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms +SERVER_API_HOST = "localhost:1234" + +# When using the asynchronous API, each client instance +# can be configured to use a specific server API host +async with lms.AsyncClient(SERVER_API_HOST) as client: + model = await client.llm.model() + + for fragment in await model.respond_stream("What is the meaning of life?"): + print(fragment.content, end="", flush=True) + print() # Advance to a new line at the end of the response ``` ### Checking a specified API server host is running @@ -97,31 +87,26 @@ While the most common connection pattern is to let the SDK raise an exception if connect to the specified API server host, the SDK also supports running the API check directly without creating an SDK client instance first: -```lms_code_snippet - variants: - "Python (synchronous API)": - language: python - code: | - import lmstudio as lms - SERVER_API_HOST = "localhost:1234" - - if lms.Client.is_valid_api_host(SERVER_API_HOST): - print(f"An LM Studio API server instance is available at {SERVER_API_HOST}") - else: - print("No LM Studio API server instance found at {SERVER_API_HOST}") - - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms - SERVER_API_HOST = "localhost:1234" - - if await lms.AsyncClient.is_valid_api_host(SERVER_API_HOST): - print(f"An LM Studio API server instance is available at {SERVER_API_HOST}") - else: - print("No LM Studio API server instance found at {SERVER_API_HOST}") +```python tab="Python (synchronous API)" +import lmstudio as lms +SERVER_API_HOST = "localhost:1234" + +if lms.Client.is_valid_api_host(SERVER_API_HOST): + print(f"An LM Studio API server instance is available at {SERVER_API_HOST}") +else: + print("No LM Studio API server instance found at {SERVER_API_HOST}") +``` + +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms +SERVER_API_HOST = "localhost:1234" + +if await lms.AsyncClient.is_valid_api_host(SERVER_API_HOST): + print(f"An LM Studio API server instance is available at {SERVER_API_HOST}") +else: + print("No LM Studio API server instance found at {SERVER_API_HOST}") ``` ### Determining the default local API server port @@ -133,29 +118,24 @@ interface for a running API server instance. This scan is repeated for each new created. Rather than letting the SDK perform this scan implicitly, the SDK also supports running the scan explicitly, and passing in the reported API server details when creating clients: -```lms_code_snippet - variants: - "Python (synchronous API)": - language: python - code: | - import lmstudio as lms - - api_host = lms.Client.find_default_local_api_host() - if api_host is not None: - print(f"An LM Studio API server instance is available at {api_host}") - else: - print("No LM Studio API server instance found on any of the default local ports") - - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms - - api_host = await lms.AsyncClient.find_default_local_api_host() - if api_host is not None: - print(f"An LM Studio API server instance is available at {api_host}") - else: - print("No LM Studio API server instance found on any of the default local ports") +```python tab="Python (synchronous API)" +import lmstudio as lms + +api_host = lms.Client.find_default_local_api_host() +if api_host is not None: + print(f"An LM Studio API server instance is available at {api_host}") + else: + print("No LM Studio API server instance found on any of the default local ports") +``` + +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms + +api_host = await lms.AsyncClient.find_default_local_api_host() +if api_host is not None: + print(f"An LM Studio API server instance is available at {api_host}") + else: + print("No LM Studio API server instance found on any of the default local ports") ``` diff --git a/1_python/1_getting-started/repl.md b/1_python/1_getting-started/repl.md index df545a0..961b4a9 100644 --- a/1_python/1_getting-started/repl.md +++ b/1_python/1_getting-started/repl.md @@ -18,70 +18,58 @@ The convenience API allows the standard Python REPL, or more flexible alternativ Juypter Notebooks, to be used to interact with AI models loaded into LM Studio. For example: -```lms_code_snippet - title: "Python REPL" - variants: - "Interactive chat session": - language: python - code: | - >>> import lmstudio as lms - >>> loaded_models = lms.list_loaded_models() - >>> for idx, model in enumerate(loaded_models): - ... print(f"{idx:>3} {model}") - ... - 0 LLM(identifier='qwen2.5-7b-instruct') - >>> model = loaded_models[0] - >>> chat = lms.Chat("You answer questions concisely") - >>> chat = lms.Chat("You answer questions concisely") - >>> chat.add_user_message("Tell me three fruits") - UserMessage(content=[TextData(text='Tell me three fruits')]) - >>> print(model.respond(chat, on_message=chat.append)) - Banana, apple, orange. - >>> chat.add_user_message("Tell me three more fruits") - UserMessage(content=[TextData(text='Tell me three more fruits')]) - >>> print(model.respond(chat, on_message=chat.append)) - Mango, strawberry, avocado. - >>> chat.add_user_message("How many fruits have you told me?") - UserMessage(content=[TextData(text='How many fruits have you told me?')]) - >>> print(model.respond(chat, on_message=chat.append)) - You asked for three initial fruits and three more, so I've listed a total of six fruits. - +```python title="Python REPL" +>>> import lmstudio as lms +>>> loaded_models = lms.list_loaded_models() +>>> for idx, model in enumerate(loaded_models): +... print(f"{idx:>3} {model}") +... + 0 LLM(identifier='qwen2.5-7b-instruct') +>>> model = loaded_models[0] +>>> chat = lms.Chat("You answer questions concisely") +>>> chat = lms.Chat("You answer questions concisely") +>>> chat.add_user_message("Tell me three fruits") +UserMessage(content=[TextData(text='Tell me three fruits')]) +>>> print(model.respond(chat, on_message=chat.append)) +Banana, apple, orange. +>>> chat.add_user_message("Tell me three more fruits") +UserMessage(content=[TextData(text='Tell me three more fruits')]) +>>> print(model.respond(chat, on_message=chat.append)) +Mango, strawberry, avocado. +>>> chat.add_user_message("How many fruits have you told me?") +UserMessage(content=[TextData(text='How many fruits have you told me?')]) +>>> print(model.respond(chat, on_message=chat.append)) +You asked for three initial fruits and three more, so I've listed a total of six fruits. ``` While not primarily intended for use this way, the SDK's asynchronous structured concurrency API is compatible with the asynchronous Python REPL that is launched by `python -m asyncio`. For example: -```lms_code_snippet - title: "Python REPL" - variants: - "Asynchronous chat session": - language: python - code: | - # Note: assumes use of the "python -m asyncio" asynchronous REPL (or equivalent) - # Requires Python SDK version 1.5.0 or later - >>> from contextlib import AsyncExitStack - >>> import lmstudio as lms - >>> resources = AsyncExitStack() - >>> client = await resources.enter_async_context(lms.AsyncClient()) - >>> loaded_models = await client.llm.list_loaded() - >>> for idx, model in enumerate(loaded_models): - ... print(f"{idx:>3} {model}") - ... - 0 AsyncLLM(identifier='qwen2.5-7b-instruct-1m') - >>> model = loaded_models[0] - >>> chat = lms.Chat("You answer questions concisely") - >>> chat.add_user_message("Tell me three fruits") - UserMessage(content=[TextData(text='Tell me three fruits')]) - >>> print(await model.respond(chat, on_message=chat.append)) - Apple, banana, and orange. - >>> chat.add_user_message("Tell me three more fruits") - UserMessage(content=[TextData(text='Tell me three more fruits')]) - >>> print(await model.respond(chat, on_message=chat.append)) - Mango, strawberry, and pineapple. - >>> chat.add_user_message("How many fruits have you told me?") - UserMessage(content=[TextData(text='How many fruits have you told me?')]) - >>> print(await model.respond(chat, on_message=chat.append)) - You asked for three fruits initially, then three more, so I've listed six fruits in total. - +```python title="Python REPL" +# Note: assumes use of the "python -m asyncio" asynchronous REPL (or equivalent) +# Requires Python SDK version 1.5.0 or later +>>> from contextlib import AsyncExitStack +>>> import lmstudio as lms +>>> resources = AsyncExitStack() +>>> client = await resources.enter_async_context(lms.AsyncClient()) +>>> loaded_models = await client.llm.list_loaded() +>>> for idx, model in enumerate(loaded_models): +... print(f"{idx:>3} {model}") +... + 0 AsyncLLM(identifier='qwen2.5-7b-instruct-1m') +>>> model = loaded_models[0] +>>> chat = lms.Chat("You answer questions concisely") +>>> chat.add_user_message("Tell me three fruits") +UserMessage(content=[TextData(text='Tell me three fruits')]) +>>> print(await model.respond(chat, on_message=chat.append)) +Apple, banana, and orange. +>>> chat.add_user_message("Tell me three more fruits") +UserMessage(content=[TextData(text='Tell me three more fruits')]) +>>> print(await model.respond(chat, on_message=chat.append)) +Mango, strawberry, and pineapple. +>>> chat.add_user_message("How many fruits have you told me?") +UserMessage(content=[TextData(text='How many fruits have you told me?')]) +>>> print(await model.respond(chat, on_message=chat.append)) +You asked for three fruits initially, then three more, so I've listed six fruits in total. ``` diff --git a/1_python/1_llm-prediction/cancelling-predictions.md b/1_python/1_llm-prediction/cancelling-predictions.md index 5a1ba0e..3f00fda 100644 --- a/1_python/1_llm-prediction/cancelling-predictions.md +++ b/1_python/1_llm-prediction/cancelling-predictions.md @@ -12,72 +12,65 @@ The following snippet illustrates cancelling the request in response to an application specification cancellation condition (such as polling an event set by another thread). -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms - model = lms.llm() +```python tab="Python (convenience API)" +import lmstudio as lms +model = lms.llm() - prediction_stream = model.respond_stream("What is the meaning of life?") - cancelled = False - for fragment in prediction_stream: - if ...: # Cancellation condition will be app specific - cancelled = True - prediction_stream.cancel() - # Note: it is recommended to let the iteration complete, - # as doing so allows the partial result to be recorded. - # Breaking the loop *is* permitted, but means the partial result - # and final prediction stats won't be available to the client - # The stream allows the prediction result to be retrieved after iteration - if not cancelled: - print(prediction_stream.result()) - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms +prediction_stream = model.respond_stream("What is the meaning of life?") +cancelled = False +for fragment in prediction_stream: + if ...: # Cancellation condition will be app specific + cancelled = True + prediction_stream.cancel() + # Note: it is recommended to let the iteration complete, + # as doing so allows the partial result to be recorded. + # Breaking the loop *is* permitted, but means the partial result + # and final prediction stats won't be available to the client +# The stream allows the prediction result to be retrieved after iteration +if not cancelled: + print(prediction_stream.result()) +``` - with lms.Client() as client: - model = client.llm.model() +```python tab="Python (scoped resource API)" +import lmstudio as lms - prediction_stream = model.respond_stream("What is the meaning of life?") - cancelled = False - for fragment in prediction_stream: - if ...: # Cancellation condition will be app specific - cancelled = True - prediction_stream.cancel() - # Note: it is recommended to let the iteration complete, - # as doing so allows the partial result to be recorded. - # Breaking the loop *is* permitted, but means the partial result - # and final prediction stats won't be available to the client - # The stream allows the prediction result to be retrieved after iteration - if not cancelled: - print(prediction_stream.result()) +with lms.Client() as client: + model = client.llm.model() - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms + prediction_stream = model.respond_stream("What is the meaning of life?") + cancelled = False + for fragment in prediction_stream: + if ...: # Cancellation condition will be app specific + cancelled = True + prediction_stream.cancel() + # Note: it is recommended to let the iteration complete, + # as doing so allows the partial result to be recorded. + # Breaking the loop *is* permitted, but means the partial result + # and final prediction stats won't be available to the client + # The stream allows the prediction result to be retrieved after iteration + if not cancelled: + print(prediction_stream.result()) +``` - async with lms.AsyncClient() as client: - model = await client.llm.model() +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms - prediction_stream = await model.respond_stream("What is the meaning of life?") - cancelled = False - async for fragment in prediction_stream: - if ...: # Cancellation condition will be app specific - cancelled = True - await prediction_stream.cancel() - # Note: it is recommended to let the iteration complete, - # as doing so allows the partial result to be recorded. - # Breaking the loop *is* permitted, but means the partial result - # and final prediction stats won't be available to the client - # The stream allows the prediction result to be retrieved after iteration - if not cancelled: - print(prediction_stream.result()) +async with lms.AsyncClient() as client: + model = await client.llm.model() + prediction_stream = await model.respond_stream("What is the meaning of life?") + cancelled = False + async for fragment in prediction_stream: + if ...: # Cancellation condition will be app specific + cancelled = True + await prediction_stream.cancel() + # Note: it is recommended to let the iteration complete, + # as doing so allows the partial result to be recorded. + # Breaking the loop *is* permitted, but means the partial result + # and final prediction stats won't be available to the client + # The stream allows the prediction result to be retrieved after iteration + if not cancelled: + print(prediction_stream.result()) ``` diff --git a/1_python/1_llm-prediction/chat-completion.md b/1_python/1_llm-prediction/chat-completion.md index 35e1a42..8b426c4 100644 --- a/1_python/1_llm-prediction/chat-completion.md +++ b/1_python/1_llm-prediction/chat-completion.md @@ -11,36 +11,29 @@ Use `llm.respond(...)` to generate completions for a chat conversation. The following snippet shows how to obtain the AI's response to a quick chat prompt. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms - - model = lms.llm() - print(model.respond("What is the meaning of life?")) - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms - - with lms.Client() as client: - model = client.llm.model() - print(model.respond("What is the meaning of life?")) - - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms - - async with lms.AsyncClient() as client: - model = await client.llm.model() - print(await model.respond("What is the meaning of life?")) +```python tab="Python (convenience API)" +import lmstudio as lms +model = lms.llm() +print(model.respond("What is the meaning of life?")) +``` + +```python tab="Python (scoped resource API)" +import lmstudio as lms + +with lms.Client() as client: + model = client.llm.model() + print(model.respond("What is the meaning of life?")) +``` + +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms + +async with lms.AsyncClient() as client: + model = await client.llm.model() + print(await model.respond("What is the meaning of life?")) ``` ## Streaming a Chat Response @@ -49,44 +42,37 @@ The following snippet shows how to stream the AI's response to a chat prompt, displaying text fragments as they are received (rather than waiting for the entire response to be generated before displaying anything). -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms - model = lms.llm() - - for fragment in model.respond_stream("What is the meaning of life?"): - print(fragment.content, end="", flush=True) - print() # Advance to a new line at the end of the response +```python tab="Python (convenience API)" +import lmstudio as lms +model = lms.llm() - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms +for fragment in model.respond_stream("What is the meaning of life?"): + print(fragment.content, end="", flush=True) +print() # Advance to a new line at the end of the response +``` - with lms.Client() as client: - model = client.llm.model() +```python tab="Python (scoped resource API)" +import lmstudio as lms - for fragment in model.respond_stream("What is the meaning of life?"): - print(fragment.content, end="", flush=True) - print() # Advance to a new line at the end of the response +with lms.Client() as client: + model = client.llm.model() - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms + for fragment in model.respond_stream("What is the meaning of life?"): + print(fragment.content, end="", flush=True) + print() # Advance to a new line at the end of the response +``` - async with lms.AsyncClient() as client: - model = await client.llm.model() +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms - async for fragment in model.respond_stream("What is the meaning of life?"): - print(fragment.content, end="", flush=True) - print() # Advance to a new line at the end of the response +async with lms.AsyncClient() as client: + model = await client.llm.model() + async for fragment in model.respond_stream("What is the meaning of life?"): + print(fragment.content, end="", flush=True) + print() # Advance to a new line at the end of the response ``` ## Cancelling a Chat Response @@ -100,33 +86,26 @@ This can be done using the top-level `llm` convenience API, or the `model` method in the `llm` namespace when using the scoped resource API. For example, here is how to use Qwen2.5 7B Instruct. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms +```python tab="Python (convenience API)" +import lmstudio as lms - model = lms.llm("qwen2.5-7b-instruct") - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms +model = lms.llm("qwen2.5-7b-instruct") +``` - with lms.Client() as client: - model = client.llm.model("qwen2.5-7b-instruct") +```python tab="Python (scoped resource API)" +import lmstudio as lms - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms +with lms.Client() as client: + model = client.llm.model("qwen2.5-7b-instruct") +``` - async with lms.AsyncClient() as client: - model = await client.llm.model("qwen2.5-7b-instruct") +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms +async with lms.AsyncClient() as client: + model = await client.llm.model("qwen2.5-7b-instruct") ``` There are other ways to get a model handle. See [Managing Models in Memory](./../manage-models/loading) for more info. @@ -137,34 +116,15 @@ The input to the model is referred to as the "context". Conceptually, the model receives a multi-turn conversation as input, and it is asked to predict the assistant's response in that conversation. -```lms_code_snippet - variants: - "Constructing a Chat object": - language: python - code: | - import lmstudio as lms - - # Create a chat with an initial system prompt. - chat = lms.Chat("You are a resident AI philosopher.") - - # Build the chat context by adding messages of relevant types. - chat.add_user_message("What is the meaning of life?") - # ... continued in next example - - "From chat history data": - language: python - code: | - import lmstudio as lms - - # Create a chat object from a chat history dict - chat = lms.Chat.from_history({ - "messages": [ - { "role": "system", "content": "You are a resident AI philosopher." }, - { "role": "user", "content": "What is the meaning of life?" }, - ] - }) - # ... continued in next example +```python +import lmstudio as lms + +# Create a chat with an initial system prompt. +chat = lms.Chat("You are a resident AI philosopher.") +# Build the chat context by adding messages of relevant types. +chat.add_user_message("What is the meaning of life?") +# ... continued in next example ``` See [Working with Chats](./working-with-chats) for more information on managing chat context. @@ -175,92 +135,76 @@ See [Working with Chats](./working-with-chats) for more information on managing You can ask the LLM to predict the next response in the chat context using the `respond()` method. -```lms_code_snippet - variants: - "Non-streaming (synchronous API)": - language: python - code: | - # The `chat` object is created in the previous step. - result = model.respond(chat) - - print(result) - - "Streaming (synchronous API)": - language: python - code: | - # The `chat` object is created in the previous step. - prediction_stream = model.respond_stream(chat) - - for fragment in prediction_stream: - print(fragment.content, end="", flush=True) - print() # Advance to a new line at the end of the response - - "Non-streaming (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - # The `chat` object is created in the previous step. - result = await model.respond(chat) - - print(result) - - "Streaming (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - # The `chat` object is created in the previous step. - prediction_stream = await model.respond_stream(chat) - - async for fragment in prediction_stream: - print(fragment.content, end="", flush=True) - print() # Advance to a new line at the end of the response +```python tab="Non-streaming (synchronous API)" +# The `chat` object is created in the previous step. +result = model.respond(chat) + +print(result) +``` + +```python tab="Streaming (synchronous API)" +# The `chat` object is created in the previous step. +prediction_stream = model.respond_stream(chat) + +for fragment in prediction_stream: + print(fragment.content, end="", flush=True) +print() # Advance to a new line at the end of the response +``` + +```python tab="Non-streaming (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +# The `chat` object is created in the previous step. +result = await model.respond(chat) + +print(result) +``` + +```python tab="Streaming (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +# The `chat` object is created in the previous step. +prediction_stream = await model.respond_stream(chat) +async for fragment in prediction_stream: + print(fragment.content, end="", flush=True) +print() # Advance to a new line at the end of the response ``` ## Customize Inferencing Parameters You can pass in inferencing parameters via the `config` keyword parameter on `.respond()`. -```lms_code_snippet - variants: - "Non-streaming (synchronous API)": - language: python - code: | - result = model.respond(chat, config={ - "temperature": 0.6, - "maxTokens": 50, - }) - - "Streaming (synchronous API)": - language: python - code: | - prediction_stream = model.respond_stream(chat, config={ - "temperature": 0.6, - "maxTokens": 50, - }) - - "Non-streaming (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - result = await model.respond(chat, config={ - "temperature": 0.6, - "maxTokens": 50, - }) - - "Streaming (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - prediction_stream = await model.respond_stream(chat, config={ - "temperature": 0.6, - "maxTokens": 50, - }) +```python tab="Non-streaming (synchronous API)" +result = model.respond(chat, config={ + "temperature": 0.6, + "maxTokens": 50, +}) +``` + +```python tab="Streaming (synchronous API)" +prediction_stream = model.respond_stream(chat, config={ + "temperature": 0.6, + "maxTokens": 50, +}) +``` +```python tab="Non-streaming (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +result = await model.respond(chat, config={ + "temperature": 0.6, + "maxTokens": 50, +}) +``` + +```python tab="Streaming (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +prediction_stream = await model.respond_stream(chat, config={ + "temperature": 0.6, + "maxTokens": 50, +}) ``` See [Configuring the Model](./parameters) for more information on what can be configured. @@ -270,29 +214,23 @@ See [Configuring the Model](./parameters) for more information on what can be co You can also print prediction metadata, such as the model used for generation, number of generated tokens, time to first token, and stop reason. -```lms_code_snippet - variants: - "Non-streaming": - language: python - code: | - # `result` is the response from the model. - print("Model used:", result.model_info.display_name) - print("Predicted tokens:", result.stats.predicted_tokens_count) - print("Time to first token (seconds):", result.stats.time_to_first_token_sec) - print("Stop reason:", result.stats.stop_reason) - - "Streaming": - language: python - code: | - # After iterating through the prediction fragments, - # the overall prediction result may be obtained from the stream - result = prediction_stream.result() - - print("Model used:", result.model_info.display_name) - print("Predicted tokens:", result.stats.predicted_tokens_count) - print("Time to first token (seconds):", result.stats.time_to_first_token_sec) - print("Stop reason:", result.stats.stop_reason) +```python tab="Non-streaming" +# `result` is the response from the model. +print("Model used:", result.model_info.display_name) +print("Predicted tokens:", result.stats.predicted_tokens_count) +print("Time to first token (seconds):", result.stats.time_to_first_token_sec) +print("Stop reason:", result.stats.stop_reason) +``` + +```python tab="Streaming" +# After iterating through the prediction fragments, +# the overall prediction result may be obtained from the stream +result = prediction_stream.result() +print("Model used:", result.model_info.display_name) +print("Predicted tokens:", result.stats.predicted_tokens_count) +print("Time to first token (seconds):", result.stats.time_to_first_token_sec) +print("Stop reason:", result.stats.stop_reason) ``` Both the non-streaming and streaming result access is consistent across the synchronous and @@ -304,35 +242,29 @@ iterating the stream to completion before returning the result. ## Example: Multi-turn Chat -```lms_code_snippet - title: "chatbot.py" - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms - - model = lms.llm() - chat = lms.Chat("You are a task focused AI assistant") - - while True: - try: - user_input = input("You (leave blank to exit): ") - except EOFError: - print() - break - if not user_input: - break - chat.add_user_message(user_input) - prediction_stream = model.respond_stream( - chat, - on_message=chat.append, - ) - print("Bot: ", end="", flush=True) - for fragment in prediction_stream: - print(fragment.content, end="", flush=True) - print() - +```python title="chatbot.py" +import lmstudio as lms + +model = lms.llm() +chat = lms.Chat("You are a task focused AI assistant") + +while True: + try: + user_input = input("You (leave blank to exit): ") + except EOFError: + print() + break + if not user_input: + break + chat.add_user_message(user_input) + prediction_stream = model.respond_stream( + chat, + on_message=chat.append, + ) + print("Bot: ", end="", flush=True) + for fragment in prediction_stream: + print(fragment.content, end="", flush=True) + print() ``` ### Progress Callbacks @@ -341,49 +273,41 @@ Long prompts will often take a long time to first token, i.e. it takes the model If you want to get updates on the progress of this process, you can provide a float callback to `respond` that receives a float from 0.0-1.0 representing prompt processing progress. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms +```python tab="Python (convenience API)" +import lmstudio as lms - llm = lms.llm() +llm = lms.llm() - response = llm.respond( - "What is LM Studio?", - on_prompt_processing_progress = (lambda progress: print(f"{progress*100}% complete")), - ) - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms - - with lms.Client() as client: - llm = client.llm.model() +response = llm.respond( + "What is LM Studio?", + on_prompt_processing_progress = (lambda progress: print(f"{progress*100}% complete")), +) +``` - response = llm.respond( - "What is LM Studio?", - on_prompt_processing_progress = (lambda progress: print(f"{progress*100}% complete")), - ) +```python tab="Python (scoped resource API)" +import lmstudio as lms - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms +with lms.Client() as client: + llm = client.llm.model() - async with lms.AsyncClient() as client: - llm = await client.llm.model() + response = llm.respond( + "What is LM Studio?", + on_prompt_processing_progress = (lambda progress: print(f"{progress*100}% complete")), + ) +``` - response = await llm.respond( - "What is LM Studio?", - on_prompt_processing_progress = (lambda progress: print(f"{progress*100}% complete")), - ) +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms +async with lms.AsyncClient() as client: + llm = await client.llm.model() + response = await llm.respond( + "What is LM Studio?", + on_prompt_processing_progress = (lambda progress: print(f"{progress*100}% complete")), + ) ``` In addition to `on_prompt_processing_progress`, the other available progress callbacks are: diff --git a/1_python/1_llm-prediction/completion.md b/1_python/1_llm-prediction/completion.md index 8f38617..68836d3 100644 --- a/1_python/1_llm-prediction/completion.md +++ b/1_python/1_llm-prediction/completion.md @@ -15,110 +15,89 @@ This can be done using the top-level `llm` convenience API, or the `model` method in the `llm` namespace when using the scoped resource API. For example, here is how to use Qwen2.5 7B Instruct. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms +```python tab="Python (convenience API)" +import lmstudio as lms - model = lms.llm("qwen2.5-7b-instruct") - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms +model = lms.llm("qwen2.5-7b-instruct") +``` - with lms.Client() as client: - model = client.llm.model("qwen2.5-7b-instruct") +```python tab="Python (scoped resource API)" +import lmstudio as lms - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms +with lms.Client() as client: + model = client.llm.model("qwen2.5-7b-instruct") +``` - async with lms.AsyncClient() as client: - model = await client.llm.model("qwen2.5-7b-instruct") +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms +async with lms.AsyncClient() as client: + model = await client.llm.model("qwen2.5-7b-instruct") ``` ## 2. Generate a Completion Once you have a loaded model, you can generate completions by passing a string to the `complete` method on the `llm` handle. -```lms_code_snippet - variants: - "Non-streaming (synchronous API)": - language: python - code: | - # The `chat` object is created in the previous step. - result = model.complete("My name is", config={"maxTokens": 100}) - - print(result) - - "Streaming (synchronous API)": - language: python - code: | - # The `chat` object is created in the previous step. - prediction_stream = model.complete_stream("My name is", config={"maxTokens": 100}) - - for fragment in prediction_stream: - print(fragment.content, end="", flush=True) - print() # Advance to a new line at the end of the response - - "Non-streaming (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - # The `chat` object is created in the previous step. - result = await model.complete("My name is", config={"maxTokens": 100}) - - print(result) - - "Streaming (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - # The `chat` object is created in the previous step. - prediction_stream = await model.complete_stream("My name is", config={"maxTokens": 100}) - - async for fragment in prediction_stream: - print(fragment.content, end="", flush=True) - print() # Advance to a new line at the end of the response +```python tab="Non-streaming (synchronous API)" +# The `chat` object is created in the previous step. +result = model.complete("My name is", config={"maxTokens": 100}) + +print(result) +``` + +```python tab="Streaming (synchronous API)" +# The `chat` object is created in the previous step. +prediction_stream = model.complete_stream("My name is", config={"maxTokens": 100}) + +for fragment in prediction_stream: + print(fragment.content, end="", flush=True) +print() # Advance to a new line at the end of the response +``` + +```python tab="Non-streaming (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +# The `chat` object is created in the previous step. +result = await model.complete("My name is", config={"maxTokens": 100}) + +print(result) +``` + +```python tab="Streaming (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +# The `chat` object is created in the previous step. +prediction_stream = await model.complete_stream("My name is", config={"maxTokens": 100}) +async for fragment in prediction_stream: + print(fragment.content, end="", flush=True) +print() # Advance to a new line at the end of the response ``` ## 3. Print Prediction Stats You can also print prediction metadata, such as the model used for generation, number of generated tokens, time to first token, and stop reason. -```lms_code_snippet - variants: - "Non-streaming": - language: python - code: | - # `result` is the response from the model. - print("Model used:", result.model_info.display_name) - print("Predicted tokens:", result.stats.predicted_tokens_count) - print("Time to first token (seconds):", result.stats.time_to_first_token_sec) - print("Stop reason:", result.stats.stop_reason) - - "Streaming": - language: python - code: | - # After iterating through the prediction fragments, - # the overall prediction result may be obtained from the stream - result = prediction_stream.result() - - print("Model used:", result.model_info.display_name) - print("Predicted tokens:", result.stats.predicted_tokens_count) - print("Time to first token (seconds):", result.stats.time_to_first_token_sec) - print("Stop reason:", result.stats.stop_reason) +```python tab="Non-streaming" +# `result` is the response from the model. +print("Model used:", result.model_info.display_name) +print("Predicted tokens:", result.stats.predicted_tokens_count) +print("Time to first token (seconds):", result.stats.time_to_first_token_sec) +print("Stop reason:", result.stats.stop_reason) +``` + +```python tab="Streaming" +# After iterating through the prediction fragments, +# the overall prediction result may be obtained from the stream +result = prediction_stream.result() +print("Model used:", result.model_info.display_name) +print("Predicted tokens:", result.stats.predicted_tokens_count) +print("Time to first token (seconds):", result.stats.time_to_first_token_sec) +print("Stop reason:", result.stats.stop_reason) ``` Both the non-streaming and streaming result access is consistent across the synchronous and @@ -132,80 +111,66 @@ iterating the stream to completion before returning the result. Here's an example of how you might use the `complete` method to simulate a terminal. -```lms_code_snippet - title: "terminal-sim.py" - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms - - model = lms.llm() - console_history = [] - - while True: - try: - user_command = input("$ ") - except EOFError: - print() - break - if user_command.strip() == "exit": - break - console_history.append(f"$ {user_command}") - history_prompt = "\n".join(console_history) - prediction_stream = model.complete_stream( - history_prompt, - config={ "stopStrings": ["$"] }, - ) - for fragment in prediction_stream: - print(fragment.content, end="", flush=True) - print() - console_history.append(prediction_stream.result().content) - +```python title="terminal-sim.py" +import lmstudio as lms + +model = lms.llm() +console_history = [] + +while True: + try: + user_command = input("$ ") + except EOFError: + print() + break + if user_command.strip() == "exit": + break + console_history.append(f"$ {user_command}") + history_prompt = "\n".join(console_history) + prediction_stream = model.complete_stream( + history_prompt, + config={ "stopStrings": ["$"] }, + ) + for fragment in prediction_stream: + print(fragment.content, end="", flush=True) + print() + console_history.append(prediction_stream.result().content) ``` ## Customize Inferencing Parameters You can pass in inferencing parameters via the `config` keyword parameter on `.complete()`. -```lms_code_snippet - variants: - "Non-streaming (synchronous API)": - language: python - code: | - result = model.complete(initial_text, config={ - "temperature": 0.6, - "maxTokens": 50, - }) - - "Streaming (synchronous API)": - language: python - code: | - prediction_stream = model.complete_stream(initial_text, config={ - "temperature": 0.6, - "maxTokens": 50, - }) - - "Non-streaming (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - result = await model.complete(initial_text, config={ - "temperature": 0.6, - "maxTokens": 50, - }) - - "Streaming (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - prediction_stream = await model.complete_stream(initial_text, config={ - "temperature": 0.6, - "maxTokens": 50, - }) +```python tab="Non-streaming (synchronous API)" +result = model.complete(initial_text, config={ + "temperature": 0.6, + "maxTokens": 50, +}) +``` + +```python tab="Streaming (synchronous API)" +prediction_stream = model.complete_stream(initial_text, config={ + "temperature": 0.6, + "maxTokens": 50, +}) +``` +```python tab="Non-streaming (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +result = await model.complete(initial_text, config={ + "temperature": 0.6, + "maxTokens": 50, +}) +``` + +```python tab="Streaming (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +prediction_stream = await model.complete_stream(initial_text, config={ + "temperature": 0.6, + "maxTokens": 50, +}) ``` See [Configuring the Model](./parameters) for more information on what can be configured. @@ -216,48 +181,41 @@ Long prompts will often take a long time to first token, i.e. it takes the model If you want to get updates on the progress of this process, you can provide a float callback to `complete` that receives a float from 0.0-1.0 representing prompt processing progress. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms - - llm = lms.llm() +```python tab="Python (convenience API)" +import lmstudio as lms - completion = llm.complete( - "My name is", - on_prompt_processing_progress = (lambda progress: print(f"{progress*100}% complete")), - ) +llm = lms.llm() - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms +completion = llm.complete( + "My name is", + on_prompt_processing_progress = (lambda progress: print(f"{progress*100}% complete")), +) +``` - with lms.Client() as client: - llm = client.llm.model() +```python tab="Python (scoped resource API)" +import lmstudio as lms - completion = llm.complete( - "My name is", - on_prompt_processing_progress = (lambda progress: print(f"{progress*100}% processed")), - ) +with lms.Client() as client: + llm = client.llm.model() - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms + completion = llm.complete( + "My name is", + on_prompt_processing_progress = (lambda progress: print(f"{progress*100}% processed")), + ) +``` - async with lms.AsyncClient() as client: - llm = await client.llm.model() +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms - completion = await llm.complete( - "My name is", - on_prompt_processing_progress = (lambda progress: print(f"{progress*100}% processed")), - ) +async with lms.AsyncClient() as client: + llm = await client.llm.model() + completion = await llm.complete( + "My name is", + on_prompt_processing_progress = (lambda progress: print(f"{progress*100}% processed")), + ) ``` In addition to `on_prompt_processing_progress`, the other available progress callbacks are: diff --git a/1_python/1_llm-prediction/image-input.md b/1_python/1_llm-prediction/image-input.md index a3ff321..8c02fa9 100644 --- a/1_python/1_llm-prediction/image-input.md +++ b/1_python/1_llm-prediction/image-input.md @@ -20,33 +20,26 @@ lms get qwen2-vl-2b-instruct Connect to LM Studio and obtain a handle to the VLM (Vision-Language Model) you want to use. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms +```python tab="Python (convenience API)" +import lmstudio as lms - model = lms.llm("qwen2-vl-2b-instruct") - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms +model = lms.llm("qwen2-vl-2b-instruct") +``` - with lms.Client() as client: - model = client.llm.model("qwen2-vl-2b-instruct") +```python tab="Python (scoped resource API)" +import lmstudio as lms - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms +with lms.Client() as client: + model = client.llm.model("qwen2-vl-2b-instruct") +``` - async with lms.AsyncClient() as client: - model = await client.llm.model("qwen2-vl-2b-instruct") +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms +async with lms.AsyncClient() as client: + model = await client.llm.model("qwen2-vl-2b-instruct") ``` ## 2. Prepare the Image @@ -54,36 +47,29 @@ Connect to LM Studio and obtain a handle to the VLM (Vision-Language Model) you Use the `prepare_image()` function or `files` namespace method to get a handle to the image that can subsequently be passed to the model. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms +```python tab="Python (convenience API)" +import lmstudio as lms - image_path = "/path/to/image.jpg" # Replace with the path to your image - image_handle = lms.prepare_image(image_path) - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms +image_path = "/path/to/image.jpg" # Replace with the path to your image +image_handle = lms.prepare_image(image_path) +``` - with lms.Client() as client: - image_path = "/path/to/image.jpg" # Replace with the path to your image - image_handle = client.files.prepare_image(image_path) +```python tab="Python (scoped resource API)" +import lmstudio as lms - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms +with lms.Client() as client: + image_path = "/path/to/image.jpg" # Replace with the path to your image + image_handle = client.files.prepare_image(image_path) +``` - async with lms.AsyncClient() as client: - image_path = "/path/to/image.jpg" # Replace with the path to your image - image_handle = await client.files.prepare_image(image_path) +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms +async with lms.AsyncClient() as client: + image_path = "/path/to/image.jpg" # Replace with the path to your image + image_handle = await client.files.prepare_image(image_path) ``` If you only have the raw data of the image, you can supply the raw data directly as a bytes @@ -99,46 +85,39 @@ The LM Studio server supports JPEG, PNG, and WebP image formats. Generate a prediction by passing the image to the model in the `.respond()` method. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms - - image_path = "/path/to/image.jpg" # Replace with the path to your image - image_handle = lms.prepare_image(image_path) - model = lms.llm("qwen2-vl-2b-instruct") - chat = lms.Chat() - chat.add_user_message("Describe this image please", images=[image_handle]) - prediction = model.respond(chat) - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms - - with lms.Client() as client: - image_path = "/path/to/image.jpg" # Replace with the path to your image - image_handle = client.files.prepare_image(image_path) - model = client.llm.model("qwen2-vl-2b-instruct") - chat = lms.Chat() - chat.add_user_message("Describe this image please", images=[image_handle]) - prediction = model.respond(chat) - - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms - - async with lms.AsyncClient() as client: - image_path = "/path/to/image.jpg" # Replace with the path to your image - image_handle = client.files.prepare_image(image_path) - model = await client.llm.model("qwen2-vl-2b-instruct") - chat = lms.Chat() - chat.add_user_message("Describe this image please", images=[image_handle]) - prediction = await model.respond(chat) +```python tab="Python (convenience API)" +import lmstudio as lms + +image_path = "/path/to/image.jpg" # Replace with the path to your image +image_handle = lms.prepare_image(image_path) +model = lms.llm("qwen2-vl-2b-instruct") +chat = lms.Chat() +chat.add_user_message("Describe this image please", images=[image_handle]) +prediction = model.respond(chat) +``` + +```python tab="Python (scoped resource API)" +import lmstudio as lms + +with lms.Client() as client: + image_path = "/path/to/image.jpg" # Replace with the path to your image + image_handle = client.files.prepare_image(image_path) + model = client.llm.model("qwen2-vl-2b-instruct") + chat = lms.Chat() + chat.add_user_message("Describe this image please", images=[image_handle]) + prediction = model.respond(chat) +``` +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms + +async with lms.AsyncClient() as client: + image_path = "/path/to/image.jpg" # Replace with the path to your image + image_handle = client.files.prepare_image(image_path) + model = await client.llm.model("qwen2-vl-2b-instruct") + chat = lms.Chat() + chat.add_user_message("Describe this image please", images=[image_handle]) + prediction = await model.respond(chat) ``` diff --git a/1_python/1_llm-prediction/meta.json b/1_python/1_llm-prediction/meta.json new file mode 100644 index 0000000..d56764e --- /dev/null +++ b/1_python/1_llm-prediction/meta.json @@ -0,0 +1,14 @@ +{ + "title": "Basics", + "pages": [ + "cancelling-predictions", + "chat-completion", + "completion", + "image-input", + "_index", + "parameters", + "speculative-decoding", + "structured-response", + "working-with-chats" + ] +} diff --git a/1_python/1_llm-prediction/parameters.md b/1_python/1_llm-prediction/parameters.md index 4bc9e62..a17cc97 100644 --- a/1_python/1_llm-prediction/parameters.md +++ b/1_python/1_llm-prediction/parameters.md @@ -10,25 +10,19 @@ You can customize both inference-time and load-time parameters for your model. I Set inference-time parameters such as `temperature`, `maxTokens`, `topP` and more. -```lms_code_snippet - variants: - ".respond()": - language: python - code: | - result = model.respond(chat, config={ - "temperature": 0.6, - "maxTokens": 50, - }) - - ".complete()": - language: python - code: | - result = model.complete(chat, config={ - "temperature": 0.6, - "maxTokens": 50, - "stopStrings": ["\n\n"], - }) +```python tab=".respond()" +result = model.respond(chat, config={ + "temperature": 0.6, + "maxTokens": 50, +}) +``` +```python tab=".complete()" +result = model.complete(chat, config={ + "temperature": 0.6, + "maxTokens": 50, + "stopStrings": ["\n\n"], + }) ``` See [`LLMPredictionConfigInput`](./../../typescript/api-reference/llm-prediction-config-input) in the @@ -49,54 +43,47 @@ The `.model()` retrieves a handle to a model that has already been loaded, or lo **Note**: if the model is already loaded, the given configuration will be **ignored**. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms +```python tab="Python (convenience API)" +import lmstudio as lms + +model = lms.llm("qwen2.5-7b-instruct", config={ + "contextLength": 8192, + "gpu": { + "ratio": 0.5, + } +}) +``` - model = lms.llm("qwen2.5-7b-instruct", config={ +```python tab="Python (scoped resource API)" +import lmstudio as lms + +with lms.Client() as client: + model = client.llm.model( + "qwen2.5-7b-instruct", + config={ "contextLength": 8192, "gpu": { "ratio": 0.5, } - }) - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms - - with lms.Client() as client: - model = client.llm.model( - "qwen2.5-7b-instruct", - config={ - "contextLength": 8192, - "gpu": { - "ratio": 0.5, - } - } - ) - - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms - - async with lms.AsyncClient() as client: - model = await client.llm.model( - "qwen2.5-7b-instruct", - config={ - "contextLength": 8192, - "gpu": { - "ratio": 0.5, - } - } - ) + } + ) +``` +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms + +async with lms.AsyncClient() as client: + model = await client.llm.model( + "qwen2.5-7b-instruct", + config={ + "contextLength": 8192, + "gpu": { + "ratio": 0.5, + } + } + ) ``` See [`LLMLoadModelConfig`](./../../typescript/api-reference/llm-load-model-config) in the @@ -106,55 +93,48 @@ Typescript SDK documentation for all configurable fields. The `.load_new_instance()` method creates a new model instance and loads it with the specified configuration. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms +```python tab="Python (convenience API)" +import lmstudio as lms - client = lms.get_default_client() - model = client.llm.load_new_instance("qwen2.5-7b-instruct", config={ +client = lms.get_default_client() +model = client.llm.load_new_instance("qwen2.5-7b-instruct", config={ + "contextLength": 8192, + "gpu": { + "ratio": 0.5, + } +}) +``` + +```python tab="Python (scoped resource API)" +import lmstudio as lms + +with lms.Client() as client: + model = client.llm.load_new_instance( + "qwen2.5-7b-instruct", + config={ "contextLength": 8192, "gpu": { "ratio": 0.5, } - }) - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms - - with lms.Client() as client: - model = client.llm.load_new_instance( - "qwen2.5-7b-instruct", - config={ - "contextLength": 8192, - "gpu": { - "ratio": 0.5, - } - } - ) - - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms - - async with lms.AsyncClient() as client: - model = await client.llm.load_new_instance( - "qwen2.5-7b-instruct", - config={ - "contextLength": 8192, - "gpu": { - "ratio": 0.5, - } - } - ) + } + ) +``` + +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms +async with lms.AsyncClient() as client: + model = await client.llm.load_new_instance( + "qwen2.5-7b-instruct", + config={ + "contextLength": 8192, + "gpu": { + "ratio": 0.5, + } + } + ) ``` See [`LLMLoadModelConfig`](./../../typescript/api-reference/llm-load-model-config) in the diff --git a/1_python/1_llm-prediction/speculative-decoding.md b/1_python/1_llm-prediction/speculative-decoding.md index 6f5c305..bd66b96 100644 --- a/1_python/1_llm-prediction/speculative-decoding.md +++ b/1_python/1_llm-prediction/speculative-decoding.md @@ -10,48 +10,42 @@ Speculative decoding is a technique that can substantially increase the generati To use speculative decoding in `lmstudio-python`, simply provide a `draftModel` parameter when performing the prediction. You do not need to load the draft model separately. -```lms_code_snippet - variants: - "Non-streaming": - language: python - code: | - import lmstudio as lms - - main_model_key = "qwen2.5-7b-instruct" - draft_model_key = "qwen2.5-0.5b-instruct" - - model = lms.llm(main_model_key) - result = model.respond( - "What are the prime numbers between 0 and 100?", - config={ - "draftModel": draft_model_key, - } - ) - - print(result) - stats = result.stats - print(f"Accepted {stats.accepted_draft_tokens_count}/{stats.predicted_tokens_count} tokens") - - - Streaming: - language: python - code: | - import lmstudio as lms - - main_model_key = "qwen2.5-7b-instruct" - draft_model_key = "qwen2.5-0.5b-instruct" - - model = lms.llm(main_model_key) - prediction_stream = model.respond_stream( - "What are the prime numbers between 0 and 100?", - config={ - "draftModel": draft_model_key, - } - ) - for fragment in prediction_stream: - print(fragment.content, end="", flush=True) - print() # Advance to a new line at the end of the response - - stats = prediction_stream.result().stats - print(f"Accepted {stats.accepted_draft_tokens_count}/{stats.predicted_tokens_count} tokens") +```python tab="Non-streaming" +import lmstudio as lms + +main_model_key = "qwen2.5-7b-instruct" +draft_model_key = "qwen2.5-0.5b-instruct" + +model = lms.llm(main_model_key) +result = model.respond( + "What are the prime numbers between 0 and 100?", + config={ + "draftModel": draft_model_key, + } +) + +print(result) +stats = result.stats +print(f"Accepted {stats.accepted_draft_tokens_count}/{stats.predicted_tokens_count} tokens") +``` + +```python tab="Streaming" +import lmstudio as lms + +main_model_key = "qwen2.5-7b-instruct" +draft_model_key = "qwen2.5-0.5b-instruct" + +model = lms.llm(main_model_key) +prediction_stream = model.respond_stream( + "What are the prime numbers between 0 and 100?", + config={ + "draftModel": draft_model_key, + } +) +for fragment in prediction_stream: + print(fragment.content, end="", flush=True) +print() # Advance to a new line at the end of the response + +stats = prediction_stream.result().stats +print(f"Accepted {stats.accepted_draft_tokens_count}/{stats.predicted_tokens_count} tokens") ``` diff --git a/1_python/1_llm-prediction/structured-response.md b/1_python/1_llm-prediction/structured-response.md index 3c6f910..2270f56 100644 --- a/1_python/1_llm-prediction/structured-response.md +++ b/1_python/1_llm-prediction/structured-response.md @@ -39,64 +39,53 @@ while `lmstudio.BaseModel` is a `msgspec.Struct` subclass that implements `.mode #### Define a Class Based Schema -```lms_code_snippet - variants: - "pydantic.BaseModel": - language: python - code: | - from pydantic import BaseModel - - # A class based schema for a book - class BookSchema(BaseModel): - title: str - author: str - year: int - - "lmstudio.BaseModel": - language: python - code: | - from lmstudio import BaseModel - - # A class based schema for a book - class BookSchema(BaseModel): - title: str - author: str - year: int +```python tab="pydantic.BaseModel" +from pydantic import BaseModel + +# A class based schema for a book +class BookSchema(BaseModel): + title: str + author: str + year: int +``` + +```python tab="lmstudio.BaseModel" +from lmstudio import BaseModel +# A class based schema for a book +class BookSchema(BaseModel): + title: str + author: str + year: int ``` #### Generate a Structured Response -```lms_code_snippet - variants: - "Non-streaming": - language: python - code: | - result = model.respond("Tell me about The Hobbit", response_format=BookSchema) - book = result.parsed - - print(book) - # ^ - # Note that `book` is correctly typed as { title: string, author: string, year: number } - - Streaming: - language: python - code: | - prediction_stream = model.respond_stream("Tell me about The Hobbit", response_format=BookSchema) - - # Optionally stream the response - # for fragment in prediction: - # print(fragment.content, end="", flush=True) - # print() - # Note that even for structured responses, the *fragment* contents are still only text - - # Get the final structured result - result = prediction_stream.result() - book = result.parsed - - print(book) - # ^ - # Note that `book` is correctly typed as { title: string, author: string, year: number } +```python tab="Non-streaming" +result = model.respond("Tell me about The Hobbit", response_format=BookSchema) +book = result.parsed + +print(book) +# ^ +# Note that `book` is correctly typed as { title: string, author: string, year: number } +``` + +```python tab="Streaming" +prediction_stream = model.respond_stream("Tell me about The Hobbit", response_format=BookSchema) + +# Optionally stream the response +# for fragment in prediction: +# print(fragment.content, end="", flush=True) +# print() +# Note that even for structured responses, the *fragment* contents are still only text + +# Get the final structured result +result = prediction_stream.result() +book = result.parsed + +print(book) +# ^ +# Note that `book` is correctly typed as { title: string, author: string, year: number } ``` ## Enforce Using a JSON Schema @@ -120,36 +109,31 @@ schema = { #### Generate a Structured Response -```lms_code_snippet - variants: - "Non-streaming": - language: python - code: | - result = model.respond("Tell me about The Hobbit", response_format=schema) - book = result.parsed - - print(book) - # ^ - # Note that `book` is correctly typed as { title: string, author: string, year: number } - - Streaming: - language: python - code: | - prediction_stream = model.respond_stream("Tell me about The Hobbit", response_format=schema) - - # Stream the response - for fragment in prediction: - print(fragment.content, end="", flush=True) - print() - # Note that even for structured responses, the *fragment* contents are still only text - - # Get the final structured result - result = prediction_stream.result() - book = result.parsed - - print(book) - # ^ - # Note that `book` is correctly typed as { title: string, author: string, year: number } +```python tab="Non-streaming" +result = model.respond("Tell me about The Hobbit", response_format=schema) +book = result.parsed + +print(book) +# ^ +# Note that `book` is correctly typed as { title: string, author: string, year: number } +``` + +```python tab="Streaming" +prediction_stream = model.respond_stream("Tell me about The Hobbit", response_format=schema) + +# Stream the response +for fragment in prediction: + print(fragment.content, end="", flush=True) +print() +# Note that even for structured responses, the *fragment* contents are still only text + +# Get the final structured result +result = prediction_stream.result() +book = result.parsed + +print(book) +# ^ +# Note that `book` is correctly typed as { title: string, author: string, year: number } ``` diff --git a/1_python/1_llm-prediction/working-with-chats.md b/1_python/1_llm-prediction/working-with-chats.md index 3be7998..d88a710 100644 --- a/1_python/1_llm-prediction/working-with-chats.md +++ b/1_python/1_llm-prediction/working-with-chats.md @@ -12,12 +12,8 @@ There are a few ways to represent a chat when using the SDK. If your chat only has one single user message, you can use a single string to represent the chat. Here is an example with the `.respond` method. -```lms_code_snippet -variants: - "Single string": - language: python - code: | - prediction = llm.respond("What is the meaning of life?") +```python +prediction = llm.respond("What is the meaning of life?") ``` ## Option 2: Using the `Chat` Helper Class @@ -28,35 +24,25 @@ Here is an example with the `Chat` class, where the initial system prompt is supplied when initializing the chat instance, and then the initial user message is added via the corresponding method call. -```lms_code_snippet -variants: - "Simple chat": - language: python - code: | - chat = Chat("You are a resident AI philosopher.") - chat.add_user_message("What is the meaning of life?") +```python +chat = Chat("You are a resident AI philosopher.") +chat.add_user_message("What is the meaning of life?") - prediction = llm.respond(chat) +prediction = llm.respond(chat) ``` You can also quickly construct a `Chat` object using the `Chat.from_history` method. -```lms_code_snippet -variants: - "Chat history data": - language: python - code: | - chat = Chat.from_history({"messages": [ - { "role": "system", "content": "You are a resident AI philosopher." }, - { "role": "user", "content": "What is the meaning of life?" }, - ]}) - - "Single string": - language: python - code: | - # This constructs a chat with a single user message - chat = Chat.from_history("What is the meaning of life?") +```python tab="Chat history data" +chat = Chat.from_history({"messages": [ + { "role": "system", "content": "You are a resident AI philosopher." }, + { "role": "user", "content": "What is the meaning of life?" }, +]}) +``` +```python tab="Single string" +# This constructs a chat with a single user message +chat = Chat.from_history("What is the meaning of life?") ``` ## Option 3: Providing Chat History Data Directly @@ -64,13 +50,9 @@ variants: As the APIs that accept chat histories use `Chat.from_history` internally, they also accept the chat history data format as a regular dictionary: -```lms_code_snippet -variants: - "Chat history data": - language: python - code: | - prediction = llm.respond({"messages": [ - { "role": "system", "content": "You are a resident AI philosopher." }, - { "role": "user", "content": "What is the meaning of life?" }, - ]}) +```python +prediction = llm.respond({"messages": [ + { "role": "system", "content": "You are a resident AI philosopher." }, + { "role": "user", "content": "What is the meaning of life?" }, +]}) ``` diff --git a/1_python/2_agent/act.md b/1_python/2_agent/act.md index 3e90948..56ef95b 100644 --- a/1_python/2_agent/act.md +++ b/1_python/2_agent/act.md @@ -24,23 +24,19 @@ With this in mind, we say that the `.act()` API is an automatic "multi-round" to ### Quick Example -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms - - def multiply(a: float, b: float) -> float: - """Given two numbers a and b. Returns the product of them.""" - return a * b - - model = lms.llm("qwen2.5-7b-instruct") - model.act( - "What is the result of 12345 multiplied by 54321?", - [multiply], - on_message=print, - ) +```python +import lmstudio as lms + +def multiply(a: float, b: float) -> float: + """Given two numbers a and b. Returns the product of them.""" + return a * b + +model = lms.llm("qwen2.5-7b-instruct") +model.act( + "What is the result of 12345 multiplied by 54321?", + [multiply], + on_message=print, +) ``` ### What does it mean for an LLM to "use a tool"? @@ -76,88 +72,79 @@ Some general guidance when selecting a model: The following code demonstrates how to provide multiple tools in a single `.act()` call. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import math - import lmstudio as lms - - def add(a: int, b: int) -> int: - """Given two numbers a and b, returns the sum of them.""" - return a + b - - def is_prime(n: int) -> bool: - """Given a number n, returns True if n is a prime number.""" - if n < 2: - return False - sqrt = int(math.sqrt(n)) - for i in range(2, sqrt): - if n % i == 0: - return False - return True - - model = lms.llm("qwen2.5-7b-instruct") - model.act( - "Is the result of 12345 + 45668 a prime? Think step by step.", - [add, is_prime], - on_message=print, - ) +```python +import math +import lmstudio as lms + +def add(a: int, b: int) -> int: + """Given two numbers a and b, returns the sum of them.""" + return a + b + +def is_prime(n: int) -> bool: + """Given a number n, returns True if n is a prime number.""" + if n < 2: + return False + sqrt = int(math.sqrt(n)) + for i in range(2, sqrt): + if n % i == 0: + return False + return True + +model = lms.llm("qwen2.5-7b-instruct") +model.act( + "Is the result of 12345 + 45668 a prime? Think step by step.", + [add, is_prime], + on_message=print, +) ``` ### Example: Chat Loop with Create File Tool The following code creates a conversation loop with an LLM agent that can create files. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import readline # Enables input line editing - from pathlib import Path - - import lmstudio as lms - - def create_file(name: str, content: str): - """Create a file with the given name and content.""" - dest_path = Path(name) - if dest_path.exists(): - return "Error: File already exists." - try: - dest_path.write_text(content, encoding="utf-8") - except Exception as exc: - return "Error: {exc!r}" - return "File created." - - def print_fragment(fragment, round_index=0): - # .act() supplies the round index as the second parameter - # Setting a default value means the callback is also - # compatible with .complete() and .respond(). - print(fragment.content, end="", flush=True) - - model = lms.llm() - chat = lms.Chat("You are a task focused AI assistant") - - while True: - try: - user_input = input("You (leave blank to exit): ") - except EOFError: - print() - break - if not user_input: - break - chat.add_user_message(user_input) - print("Bot: ", end="", flush=True) - model.act( - chat, - [create_file], - on_message=chat.append, - on_prediction_fragment=print_fragment, - ) - print() - +```python +import readline # Enables input line editing +from pathlib import Path + +import lmstudio as lms + +def create_file(name: str, content: str): + """Create a file with the given name and content.""" + dest_path = Path(name) + if dest_path.exists(): + return "Error: File already exists." + try: + dest_path.write_text(content, encoding="utf-8") + except Exception as exc: + return "Error: {exc!r}" + return "File created." + +def print_fragment(fragment, round_index=0): + # .act() supplies the round index as the second parameter + # Setting a default value means the callback is also + # compatible with .complete() and .respond(). + print(fragment.content, end="", flush=True) + +model = lms.llm() +chat = lms.Chat("You are a task focused AI assistant") + +while True: + try: + user_input = input("You (leave blank to exit): ") + except EOFError: + print() + break + if not user_input: + break + chat.add_user_message(user_input) + print("Bot: ", end="", flush=True) + model.act( + chat, + [create_file], + on_message=chat.append, + on_prediction_fragment=print_fragment, + ) + print() ``` ### Progress Callbacks diff --git a/1_python/2_agent/meta.json b/1_python/2_agent/meta.json new file mode 100644 index 0000000..64a4fe7 --- /dev/null +++ b/1_python/2_agent/meta.json @@ -0,0 +1,8 @@ +{ + "title": "Agentic Flows", + "pages": [ + "act", + "_index", + "tools" + ] +} diff --git a/1_python/2_agent/tools.md b/1_python/2_agent/tools.md index fa44073..3ca8b6b 100644 --- a/1_python/2_agent/tools.md +++ b/1_python/2_agent/tools.md @@ -13,58 +13,51 @@ name and description passed to the language model. Follow one of the following examples to define functions as tools (the first approach is typically going to be the most convenient): -```lms_code_snippet - variants: - "Python function": - language: python - code: | - # Type hinted functions with clear names and docstrings - # may be used directly as tool definitions - def add(a: int, b: int) -> int: - """Given two numbers a and b, returns the sum of them.""" - # The SDK ensures arguments are coerced to their specified types - return a + b - - # Pass `add` directly to `act()` as a tool definition - - "ToolFunctionDef.from_callable": - language: python - code: | - from lmstudio import ToolFunctionDef - - def cryptic_name(a: int, b: int) -> int: - return a + b - - # Type hinted functions with cryptic names and missing or poor docstrings - # can be turned into clear tool definitions with `from_callable` - tool_def = ToolFunctionDef.from_callable( - cryptic_name, - name="add", - description="Given two numbers a and b, returns the sum of them." - ) - # Pass `tool_def` to `act()` as a tool definition - - "ToolFunctionDef": - language: python - code: | - from lmstudio import ToolFunctionDef - - def cryptic_name(a, b): - return a + b - - # Functions without type hints can be used without wrapping them - # at runtime by defining a tool function directly. - tool_def = ToolFunctionDef( - name="add", - description="Given two numbers a and b, returns the sum of them.", - parameters={ - "a": int, - "b": int, - }, - implementation=cryptic_name, - ) - # Pass `tool_def` to `act()` as a tool definition +```python tab="Python function" +# Type hinted functions with clear names and docstrings +# may be used directly as tool definitions +def add(a: int, b: int) -> int: + """Given two numbers a and b, returns the sum of them.""" + # The SDK ensures arguments are coerced to their specified types + return a + b + +# Pass `add` directly to `act()` as a tool definition +``` + +```python tab="ToolFunctionDef.from_callable" +from lmstudio import ToolFunctionDef +def cryptic_name(a: int, b: int) -> int: + return a + b + +# Type hinted functions with cryptic names and missing or poor docstrings +# can be turned into clear tool definitions with `from_callable` +tool_def = ToolFunctionDef.from_callable( + cryptic_name, + name="add", + description="Given two numbers a and b, returns the sum of them." +) +# Pass `tool_def` to `act()` as a tool definition +``` + +```python tab="ToolFunctionDef" +from lmstudio import ToolFunctionDef + +def cryptic_name(a, b): + return a + b + +# Functions without type hints can be used without wrapping them +# at runtime by defining a tool function directly. +tool_def = ToolFunctionDef( + name="add", + description="Given two numbers a and b, returns the sum of them.", + parameters={ + "a": int, + "b": int, + }, + implementation=cryptic_name, +) +# Pass `tool_def` to `act()` as a tool definition ``` **Important**: The tool name, description, and the parameter definitions are all passed to the model! @@ -80,43 +73,32 @@ can essentially turn your LLMs into autonomous agents that can perform tasks on ### Tool Definition -```lms_code_snippet - title: "create_file_tool.py" - variants: - Python: - language: python - code: | - from pathlib import Path - - def create_file(name: str, content: str): - """Create a file with the given name and content.""" - dest_path = Path(name) - if dest_path.exists(): - return "Error: File already exists." - try: - dest_path.write_text(content, encoding="utf-8") - except Exception as exc: - return "Error: {exc!r}" - return "File created." - +```python title="create_file_tool.py" +from pathlib import Path + +def create_file(name: str, content: str): + """Create a file with the given name and content.""" + dest_path = Path(name) + if dest_path.exists(): + return "Error: File already exists." + try: + dest_path.write_text(content, encoding="utf-8") + except Exception as exc: + return "Error: {exc!r}" + return "File created." ``` ### Example code using the `create_file` tool: -```lms_code_snippet - title: "example.py" - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms - from create_file_tool import create_file - - model = lms.llm("qwen2.5-7b-instruct") - model.act( - "Please create a file named output.txt with your understanding of the meaning of life.", - [create_file], - ) +```python title="example.py" +import lmstudio as lms +from create_file_tool import create_file + +model = lms.llm("qwen2.5-7b-instruct") +model.act( + "Please create a file named output.txt with your understanding of the meaning of life.", + [create_file], +) ``` ## Handling tool calling errors @@ -134,34 +116,29 @@ This error handling behaviour can be overridden using the `handle_invalid_tool_r callback. For example, the following code reverts the error handling back to raising exceptions locally in the client: -```lms_code_snippet - title: "example.py" - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms - - def divide(numerator: float, denominator: float) -> float: - """Divide the given numerator by the given denominator. Return the result.""" - return numerator / denominator - - model = lms.llm("qwen2.5-7b-instruct") - chat = Chat() - chat.add_user_message( - "Attempt to divide 1 by 0 using the tool. Explain the result." - ) - - def _raise_exc_in_client( - exc: LMStudioPredictionError, request: ToolCallRequest | None - ) -> None: - raise exc - - act_result = llm.act( - chat, - [divide], - handle_invalid_tool_request=_raise_exc_in_client, - ) +```python title="example.py" +import lmstudio as lms + +def divide(numerator: float, denominator: float) -> float: + """Divide the given numerator by the given denominator. Return the result.""" + return numerator / denominator + +model = lms.llm("qwen2.5-7b-instruct") +chat = Chat() +chat.add_user_message( + "Attempt to divide 1 by 0 using the tool. Explain the result." +) + +def _raise_exc_in_client( + exc: LMStudioPredictionError, request: ToolCallRequest | None +) -> None: + raise exc + +act_result = llm.act( + chat, + [divide], + handle_invalid_tool_request=_raise_exc_in_client, +) ``` When a tool request is passed in, the callback results are processed as follows: diff --git a/1_python/3_embedding/index.md b/1_python/3_embedding/index.md index 7a7a022..3e80f1a 100644 --- a/1_python/3_embedding/index.md +++ b/1_python/3_embedding/index.md @@ -18,16 +18,10 @@ lms get nomic-ai/nomic-embed-text-v1.5 To convert a string to a vector representation, pass it to the `embed` method on the corresponding embedding model handle. -```lms_code_snippet - title: "example.py" - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms +```python title="example.py" +import lmstudio as lms - model = lms.embedding_model("nomic-embed-text-v1.5") - - embedding = model.embed("Hello, world!") +model = lms.embedding_model("nomic-embed-text-v1.5") +embedding = model.embed("Hello, world!") ``` diff --git a/1_python/4_tokenization/index.md b/1_python/4_tokenization/index.md index ec71f6f..7121855 100644 --- a/1_python/4_tokenization/index.md +++ b/1_python/4_tokenization/index.md @@ -12,31 +12,23 @@ You can tokenize a string with a loaded LLM or embedding model using the SDK. In the below examples, the LLM reference can be replaced with an embedding model reference without requiring any other changes. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms +```python +import lmstudio as lms - model = lms.llm() +model = lms.llm() - tokens = model.tokenize("Hello, world!") +tokens = model.tokenize("Hello, world!") - print(tokens) # Array of token IDs. +print(tokens) # Array of token IDs. ``` ## Count tokens If you only care about the number of tokens, simply check the length of the resulting array. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - token_count = len(model.tokenize("Hello, world!")) - print("Token count:", token_count) +```python +token_count = len(model.tokenize("Hello, world!")) +print("Token count:", token_count) ``` ### Example: count context @@ -47,32 +39,27 @@ You can determine if a given conversation fits into a model's context by doing t 2. Count the number of tokens in the string. 3. Compare the token count to the model's context length. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms - - def does_chat_fit_in_context(model: lms.LLM, chat: lms.Chat) -> bool: - # Convert the conversation to a string using the prompt template. - formatted = model.apply_prompt_template(chat) - # Count the number of tokens in the string. - token_count = len(model.tokenize(formatted)) - # Get the current loaded context length of the model - context_length = model.get_context_length() - return token_count < context_length - - model = lms.llm() - - chat = lms.Chat.from_history({ - "messages": [ - { "role": "user", "content": "What is the meaning of life." }, - { "role": "assistant", "content": "The meaning of life is..." }, - # ... More messages - ] - }) - - print("Fits in context:", does_chat_fit_in_context(model, chat)) - +```python +import lmstudio as lms + +def does_chat_fit_in_context(model: lms.LLM, chat: lms.Chat) -> bool: + # Convert the conversation to a string using the prompt template. + formatted = model.apply_prompt_template(chat) + # Count the number of tokens in the string. + token_count = len(model.tokenize(formatted)) + # Get the current loaded context length of the model + context_length = model.get_context_length() + return token_count < context_length + +model = lms.llm() + +chat = lms.Chat.from_history({ + "messages": [ + { "role": "user", "content": "What is the meaning of life." }, + { "role": "assistant", "content": "The meaning of life is..." }, + # ... More messages + ] +}) + +print("Fits in context:", does_chat_fit_in_context(model, chat)) ``` diff --git a/1_python/5_manage-models/_download-models.md b/1_python/5_manage-models/_download-models.md index 43ef689..71234de 100644 --- a/1_python/5_manage-models/_download-models.md +++ b/1_python/5_manage-models/_download-models.md @@ -22,35 +22,31 @@ Downloading models consists of three steps: TODO: Actually translate this example code from TS to Python -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - - const client = new LMStudioClient() - - # 1. Search for the model you want - # Specify any/all of searchTerm, limit, compatibilityTypes - const searchResults = client.repository.searchModels({ - searchTerm: "llama 3.2 1b", # Search for Llama 3.2 1B - limit: 5, # Get top 5 results - compatibilityTypes: ["gguf"], # Only download GGUFs - }) - - # 2. Find download options - const bestResult = searchResults[0]; - const downloadOptions = bestResult.getDownloadOptions() - - # Let's download Q4_K_M, a good middle ground quantization - const desiredModel = downloadOptions.find(option => option.quantization === 'Q4_K_M') - - # 3. Download it! - const modelKey = desiredModel.download() - - # This returns a path you can use to load the model - const loadedModel = client.llm.model(modelKey) +```python +import { LMStudioClient } from "@lmstudio/sdk"; + +const client = new LMStudioClient() + +# 1. Search for the model you want +# Specify any/all of searchTerm, limit, compatibilityTypes +const searchResults = client.repository.searchModels({ + searchTerm: "llama 3.2 1b", # Search for Llama 3.2 1B + limit: 5, # Get top 5 results + compatibilityTypes: ["gguf"], # Only download GGUFs +}) + +# 2. Find download options +const bestResult = searchResults[0]; +const downloadOptions = bestResult.getDownloadOptions() + +# Let's download Q4_K_M, a good middle ground quantization +const desiredModel = downloadOptions.find(option => option.quantization === 'Q4_K_M') + +# 3. Download it! +const modelKey = desiredModel.download() + +# This returns a path you can use to load the model +const loadedModel = client.llm.model(modelKey) ``` ## Advanced Usage @@ -64,51 +60,44 @@ If you want to get updates on the progress of this process, you can provide call one for progress updates and/or one when the download is being finalized (validating checksums, etc.) -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import { LMStudioClient, type DownloadProgressUpdate } from "@lmstudio/sdk"; - - function printProgressUpdate(update: DownloadProgressUpdate) { - process.stdout.write(`Downloaded ${update.downloadedBytes} bytes of ${update.totalBytes} total \ - at ${update.speed_bytes_per_second} bytes/sec`) - } +```python tab="Python (convenience API)" +import { LMStudioClient, type DownloadProgressUpdate } from "@lmstudio/sdk"; - const client = new LMStudioClient() +function printProgressUpdate(update: DownloadProgressUpdate) { + process.stdout.write(`Downloaded ${update.downloadedBytes} bytes of ${update.totalBytes} total \ + at ${update.speed_bytes_per_second} bytes/sec`) +} - # ... Same code as before ... +const client = new LMStudioClient() - modelKey = desiredModel.download({ - onProgress: printProgressUpdate, - onStartFinalizing: () => console.log("Finalizing..."), - }) +# ... Same code as before ... - const loadedModel = client.llm.model(modelKey) +modelKey = desiredModel.download({ + onProgress: printProgressUpdate, + onStartFinalizing: () => console.log("Finalizing..."), +}) - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms +const loadedModel = client.llm.model(modelKey) +``` - def print_progress_update(update: lmstudio.DownloadProgressUpdate) -> None: - print(f"Downloaded {update.downloaded_bytes} bytes of {update.total_bytes} total \ - at {update.speed_bytes_per_second} bytes/sec") +```python tab="Python (scoped resource API)" +import lmstudio as lms - with lms.Client() as client: - # ... Same code as before ... +def print_progress_update(update: lmstudio.DownloadProgressUpdate) -> None: + print(f"Downloaded {update.downloaded_bytes} bytes of {update.total_bytes} total \ + at {update.speed_bytes_per_second} bytes/sec") - model_key = desired_model.download( - on_progress=print_progress_update, - on_finalize: lambda: print("Finalizing download...") - ) +with lms.Client() as client: + # ... Same code as before ... - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms + model_key = desired_model.download( + on_progress=print_progress_update, + on_finalize: lambda: print("Finalizing download...") + ) +``` +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms ``` diff --git a/1_python/5_manage-models/list-downloaded.md b/1_python/5_manage-models/list-downloaded.md index 6f09c1f..9035458 100644 --- a/1_python/5_manage-models/list-downloaded.md +++ b/1_python/5_manage-models/list-downloaded.md @@ -10,48 +10,41 @@ downloaded model reference to be converted in the full SDK handle for a loaded m ## Available Models on the LM Studio Server -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms +```python tab="Python (convenience API)" +import lmstudio as lms - downloaded = lms.list_downloaded_models() - llm_only = lms.list_downloaded_models("llm") - embedding_only = lms.list_downloaded_models("embedding") +downloaded = lms.list_downloaded_models() +llm_only = lms.list_downloaded_models("llm") +embedding_only = lms.list_downloaded_models("embedding") - for model in downloaded: - print(model) - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms +for model in downloaded: + print(model) +``` - with lms.Client() as client: - downloaded = client.list_downloaded_models() - llm_only = client.llm.list_downloaded() - embedding_only = client.embedding.list_downloaded() +```python tab="Python (scoped resource API)" +import lmstudio as lms - for model in downloaded: - print(model) +with lms.Client() as client: + downloaded = client.list_downloaded_models() + llm_only = client.llm.list_downloaded() + embedding_only = client.embedding.list_downloaded() - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms +for model in downloaded: + print(model) +``` - async with lms.AsyncClient() as client: - downloaded = await client.list_downloaded_models() - llm_only = await client.llm.list_downloaded() - embedding_only = await client.embedding.list_downloaded() +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms - for model in downloaded: - print(model) +async with lms.AsyncClient() as client: + downloaded = await client.list_downloaded_models() + llm_only = await client.llm.list_downloaded() + embedding_only = await client.embedding.list_downloaded() +for model in downloaded: + print(model) ``` This will give you results equivalent to using [`lms ls`](../../cli/ls) in the CLI. diff --git a/1_python/5_manage-models/list-loaded.md b/1_python/5_manage-models/list-loaded.md index e41ebd2..bb32254 100644 --- a/1_python/5_manage-models/list-loaded.md +++ b/1_python/5_manage-models/list-loaded.md @@ -11,43 +11,36 @@ The results are full SDK model handles, allowing access to all model functionali This will give you results equivalent to using [`lms ps`](../../cli/ps) in the CLI. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms +```python tab="Python (convenience API)" +import lmstudio as lms - all_loaded_models = lms.list_loaded_models() - llm_only = lms.list_loaded_models("llm") - embedding_only = lms.list_loaded_models("embedding") +all_loaded_models = lms.list_loaded_models() +llm_only = lms.list_loaded_models("llm") +embedding_only = lms.list_loaded_models("embedding") - print(all_loaded_models) - - Python (scoped resource API): - language: python - code: | - import lms +print(all_loaded_models) +``` - with lms.Client() as client: - all_loaded_models = client.list_loaded_models() - llm_only = client.llm.list_loaded() - embedding_only = client.embedding.list_loaded() +```python tab="Python (scoped resource API)" +import lms - print(all_loaded_models) +with lms.Client() as client: + all_loaded_models = client.list_loaded_models() + llm_only = client.llm.list_loaded() + embedding_only = client.embedding.list_loaded() - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms + print(all_loaded_models) +``` - async with lms.AsyncClient() as client: - all_loaded_models = await client.list_loaded_models() - llm_only = await client.llm.list_loaded() - embedding_only = await client.embedding.list_loaded() +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms - print(all_loaded_models) +async with lms.AsyncClient() as client: + all_loaded_models = await client.list_loaded_models() + llm_only = await client.llm.list_loaded() + embedding_only = await client.embedding.list_loaded() + print(all_loaded_models) ``` diff --git a/1_python/5_manage-models/loading.md b/1_python/5_manage-models/loading.md index 6a43ddf..91ab45a 100644 --- a/1_python/5_manage-models/loading.md +++ b/1_python/5_manage-models/loading.md @@ -28,33 +28,26 @@ AI models are huge. It can take a while to load them into memory. LM Studio's SD If you already have a model loaded in LM Studio (either via the GUI or `lms load`), you can use it by calling `.model()` without any arguments. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms +```python tab="Python (convenience API)" +import lmstudio as lms - model = lms.llm() - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms +model = lms.llm() +``` - with lms.Client() as client: - model = client.llm.model() +```python tab="Python (scoped resource API)" +import lmstudio as lms - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms +with lms.Client() as client: + model = client.llm.model() +``` - async with lms.AsyncClient() as client: - model = await client.llm.model() +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms +async with lms.AsyncClient() as client: + model = await client.llm.model() ``` ## Get a Specific Model with `.model("model-key")` @@ -65,33 +58,26 @@ If you want to use a specific model, you can provide the model key as an argumen Calling `.model("model-key")` will load the model if it's not already loaded, or return the existing instance if it is. -```lms_code_snippet - variants: - "Python (convenience API)": - language: python - code: | - import lmstudio as lms +```python tab="Python (convenience API)" +import lmstudio as lms - model = lms.llm("qwen/qwen3-4b-2507") - - "Python (scoped resource API)": - language: python - code: | - import lmstudio as lms +model = lms.llm("qwen/qwen3-4b-2507") +``` - with lms.Client() as client: - model = client.llm.model("qwen/qwen3-4b-2507") +```python tab="Python (scoped resource API)" +import lmstudio as lms - "Python (asynchronous API)": - language: python - code: | - # Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL - # Requires Python SDK version 1.5.0 or later - import lmstudio as lms +with lms.Client() as client: + model = client.llm.model("qwen/qwen3-4b-2507") +``` - async with lms.AsyncClient() as client: - model = await client.llm.model("qwen/qwen3-4b-2507") +```python tab="Python (asynchronous API)" +# Note: assumes use of an async function or the "python -m asyncio" asynchronous REPL +# Requires Python SDK version 1.5.0 or later +import lmstudio as lms +async with lms.AsyncClient() as client: + model = await client.llm.model("qwen/qwen3-4b-2507") ``` -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { Chat, LMStudioClient } from "@lmstudio/sdk"; - import { createInterface } from "readline/promises"; - - const rl = createInterface({ input: process.stdin, output: process.stdout }); - const client = new LMStudioClient(); - const model = await client.llm.model(); - const chat = Chat.empty(); - - while (true) { - const input = await rl.question("You: "); - // Append the user input to the chat - chat.append("user", input); - - const prediction = model.respond(chat, { - // When the model finish the entire message, push it to the chat - onMessage: (message) => chat.append(message), - }); - process.stdout.write("Bot: "); - for await (const { content } of prediction) { - process.stdout.write(content); - } - process.stdout.write("\n"); - } +```typescript +import { Chat, LMStudioClient } from "@lmstudio/sdk"; +import { createInterface } from "readline/promises"; + +const rl = createInterface({ input: process.stdin, output: process.stdout }); +const client = new LMStudioClient(); +const model = await client.llm.model(); +const chat = Chat.empty(); + +while (true) { + const input = await rl.question("You: "); + // Append the user input to the chat + chat.append("user", input); + + const prediction = model.respond(chat, { + // When the model finish the entire message, push it to the chat + onMessage: (message) => chat.append(message), + }); + process.stdout.write("Bot: "); + for await (const { content } of prediction) { + process.stdout.write(content); + } + process.stdout.write("\n"); +} ``` diff --git a/2_typescript/2_llm-prediction/working-with-chats.md b/2_typescript/2_llm-prediction/working-with-chats.md index 028884c..57c7c21 100644 --- a/2_typescript/2_llm-prediction/working-with-chats.md +++ b/2_typescript/2_llm-prediction/working-with-chats.md @@ -11,78 +11,62 @@ takes in a chat parameter as an input. There are a few ways to represent a chat You can use an array of messages to represent a chat. Here is an example with the `.respond()` method. -```lms_code_snippet -variants: - "Text-only": - language: typescript - code: | - const prediction = model.respond([ - { role: "system", content: "You are a resident AI philosopher." }, - { role: "user", content: "What is the meaning of life?" }, - ]); - With Images: - language: typescript - code: | - const image = await client.files.prepareImage("/path/to/image.jpg"); - - const prediction = model.respond([ - { role: "system", content: "You are a state-of-art object recognition system." }, - { role: "user", content: "What is this object?", images: [image] }, - ]); +```typescript tab="Text-only" +const prediction = model.respond([ + { role: "system", content: "You are a resident AI philosopher." }, + { role: "user", content: "What is the meaning of life?" }, +]); +``` + +```typescript tab="With Images" +const image = await client.files.prepareImage("/path/to/image.jpg"); + +const prediction = model.respond([ + { role: "system", content: "You are a state-of-art object recognition system." }, + { role: "user", content: "What is this object?", images: [image] }, +]); ``` ## Option 2: Input a Single String If your chat only has one single user message, you can use a single string to represent the chat. Here is an example with the `.respond` method. -```lms_code_snippet -variants: - TypeScript: - language: typescript - code: | - const prediction = model.respond("What is the meaning of life?"); +```typescript +const prediction = model.respond("What is the meaning of life?"); ``` ## Option 3: Using the `Chat` Helper Class For more complex tasks, it is recommended to use the `Chat` helper classes. It provides various commonly used methods to manage the chat. Here is an example with the `Chat` class. -```lms_code_snippet -variants: - "Text-only": - language: typescript - code: | - const chat = Chat.empty(); - chat.append("system", "You are a resident AI philosopher."); - chat.append("user", "What is the meaning of life?"); - - const prediction = model.respond(chat); - With Images: - language: typescript - code: | - const image = await client.files.prepareImage("/path/to/image.jpg"); - - const chat = Chat.empty(); - chat.append("system", "You are a state-of-art object recognition system."); - chat.append("user", "What is this object?", { images: [image] }); - - const prediction = model.respond(chat); +```typescript tab="Text-only" +const chat = Chat.empty(); +chat.append("system", "You are a resident AI philosopher."); +chat.append("user", "What is the meaning of life?"); + +const prediction = model.respond(chat); +``` + +```typescript tab="With Images" +const image = await client.files.prepareImage("/path/to/image.jpg"); + +const chat = Chat.empty(); +chat.append("system", "You are a state-of-art object recognition system."); +chat.append("user", "What is this object?", { images: [image] }); + +const prediction = model.respond(chat); ``` You can also quickly construct a `Chat` object using the `Chat.from` method. -```lms_code_snippet -variants: - "Array of messages": - language: typescript - code: | - const chat = Chat.from([ - { role: "system", content: "You are a resident AI philosopher." }, - { role: "user", content: "What is the meaning of life?" }, - ]); - "Single string": - language: typescript - code: | - // This constructs a chat with a single user message - const chat = Chat.from("What is the meaning of life?"); +```typescript tab="Array of messages" +const chat = Chat.from([ + { role: "system", content: "You are a resident AI philosopher." }, + { role: "user", content: "What is the meaning of life?" }, +]); +``` + +```typescript tab="Single string" +// This constructs a chat with a single user message +const chat = Chat.from("What is the meaning of life?"); ``` diff --git a/2_typescript/3_agent/act.md b/2_typescript/3_agent/act.md index f49a9c0..5e9f945 100644 --- a/2_typescript/3_agent/act.md +++ b/2_typescript/3_agent/act.md @@ -24,27 +24,23 @@ With this in mind, we say that the `.act()` API is an automatic "multi-round" to ### Quick Example -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient, tool } from "@lmstudio/sdk"; - import { z } from "zod"; - - const client = new LMStudioClient(); - - const multiplyTool = tool({ - name: "multiply", - description: "Given two numbers a and b. Returns the product of them.", - parameters: { a: z.number(), b: z.number() }, - implementation: ({ a, b }) => a * b, - }); - - const model = await client.llm.model("qwen2.5-7b-instruct"); - await model.act("What is the result of 12345 multiplied by 54321?", [multiplyTool], { - onMessage: (message) => console.info(message.toString()), - }); +```typescript +import { LMStudioClient, tool } from "@lmstudio/sdk"; +import { z } from "zod"; + +const client = new LMStudioClient(); + +const multiplyTool = tool({ + name: "multiply", + description: "Given two numbers a and b. Returns the product of them.", + parameters: { a: z.number(), b: z.number() }, + implementation: ({ a, b }) => a * b, +}); + +const model = await client.llm.model("qwen2.5-7b-instruct"); +await model.act("What is the result of 12345 multiplied by 54321?", [multiplyTool], { + onMessage: (message) => console.info(message.toString()), +}); ``` > **_NOTE:_** at this time, this code expects zod v3 @@ -70,91 +66,83 @@ Some general guidance when selecting a model: The following code demonstrates how to provide multiple tools in a single `.act()` call. -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient, tool } from "@lmstudio/sdk"; - import { z } from "zod"; - - const client = new LMStudioClient(); - - const additionTool = tool({ - name: "add", - description: "Given two numbers a and b. Returns the sum of them.", - parameters: { a: z.number(), b: z.number() }, - implementation: ({ a, b }) => a + b, - }); - - const isPrimeTool = tool({ - name: "isPrime", - description: "Given a number n. Returns true if n is a prime number.", - parameters: { n: z.number() }, - implementation: ({ n }) => { - if (n < 2) return false; - const sqrt = Math.sqrt(n); - for (let i = 2; i <= sqrt; i++) { - if (n % i === 0) return false; - } - return true; - }, - }); - - const model = await client.llm.model("qwen2.5-7b-instruct"); - await model.act( - "Is the result of 12345 + 45668 a prime? Think step by step.", - [additionTool, isPrimeTool], - { onMessage: (message) => console.info(message.toString()) }, - ); +```typescript +import { LMStudioClient, tool } from "@lmstudio/sdk"; +import { z } from "zod"; + +const client = new LMStudioClient(); + +const additionTool = tool({ + name: "add", + description: "Given two numbers a and b. Returns the sum of them.", + parameters: { a: z.number(), b: z.number() }, + implementation: ({ a, b }) => a + b, +}); + +const isPrimeTool = tool({ + name: "isPrime", + description: "Given a number n. Returns true if n is a prime number.", + parameters: { n: z.number() }, + implementation: ({ n }) => { + if (n < 2) return false; + const sqrt = Math.sqrt(n); + for (let i = 2; i <= sqrt; i++) { + if (n % i === 0) return false; + } + return true; + }, +}); + +const model = await client.llm.model("qwen2.5-7b-instruct"); +await model.act( + "Is the result of 12345 + 45668 a prime? Think step by step.", + [additionTool, isPrimeTool], + { onMessage: (message) => console.info(message.toString()) }, +); ``` ### Example: Chat Loop with Create File Tool The following code creates a conversation loop with an LLM agent that can create files. -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { Chat, LMStudioClient, tool } from "@lmstudio/sdk"; - import { existsSync } from "fs"; - import { writeFile } from "fs/promises"; - import { createInterface } from "readline/promises"; - import { z } from "zod"; - - const rl = createInterface({ input: process.stdin, output: process.stdout }); - const client = new LMStudioClient(); - const model = await client.llm.model(); - const chat = Chat.empty(); - - const createFileTool = tool({ - name: "createFile", - description: "Create a file with the given name and content.", - parameters: { name: z.string(), content: z.string() }, - implementation: async ({ name, content }) => { - if (existsSync(name)) { - return "Error: File already exists."; - } - await writeFile(name, content, "utf-8"); - return "File created."; - }, - }); - - while (true) { - const input = await rl.question("You: "); - // Append the user input to the chat - chat.append("user", input); - - process.stdout.write("Bot: "); - await model.act(chat, [createFileTool], { - // When the model finish the entire message, push it to the chat - onMessage: (message) => chat.append(message), - onPredictionFragment: ({ content }) => { - process.stdout.write(content); - }, - }); - process.stdout.write("\n"); - } +```typescript +import { Chat, LMStudioClient, tool } from "@lmstudio/sdk"; +import { existsSync } from "fs"; +import { writeFile } from "fs/promises"; +import { createInterface } from "readline/promises"; +import { z } from "zod"; + +const rl = createInterface({ input: process.stdin, output: process.stdout }); +const client = new LMStudioClient(); +const model = await client.llm.model(); +const chat = Chat.empty(); + +const createFileTool = tool({ + name: "createFile", + description: "Create a file with the given name and content.", + parameters: { name: z.string(), content: z.string() }, + implementation: async ({ name, content }) => { + if (existsSync(name)) { + return "Error: File already exists."; + } + await writeFile(name, content, "utf-8"); + return "File created."; + }, +}); + +while (true) { + const input = await rl.question("You: "); + // Append the user input to the chat + chat.append("user", input); + + process.stdout.write("Bot: "); + await model.act(chat, [createFileTool], { + // When the model finish the entire message, push it to the chat + onMessage: (message) => chat.append(message), + onPredictionFragment: ({ content }) => { + process.stdout.write(content); + }, + }); + process.stdout.write("\n"); +} ``` diff --git a/2_typescript/3_agent/meta.json b/2_typescript/3_agent/meta.json new file mode 100644 index 0000000..64a4fe7 --- /dev/null +++ b/2_typescript/3_agent/meta.json @@ -0,0 +1,8 @@ +{ + "title": "Agentic Flows", + "pages": [ + "act", + "_index", + "tools" + ] +} diff --git a/2_typescript/3_agent/tools.md b/2_typescript/3_agent/tools.md index 560bbcf..960edac 100644 --- a/2_typescript/3_agent/tools.md +++ b/2_typescript/3_agent/tools.md @@ -10,28 +10,23 @@ You can define tools with the `tool()` function and pass them to the model in th Follow this standard format to define functions as tools: -```lms_code_snippet - title: "index.ts" - variants: - TypeScript: - language: typescript - code: | - import { tool } from "@lmstudio/sdk"; - import { z } from "zod"; - - const exampleTool = tool({ - // The name of the tool - name: "add", - - // A description of the tool - description: "Given two numbers a and b. Returns the sum of them.", - - // zod schema of the parameters - parameters: { a: z.number(), b: z.number() }, - - // The implementation of the tool. Just a regular function. - implementation: ({ a, b }) => a + b, - }); +```typescript title="index.ts" +import { tool } from "@lmstudio/sdk"; +import { z } from "zod"; + +const exampleTool = tool({ + // The name of the tool + name: "add", + + // A description of the tool + description: "Given two numbers a and b. Returns the sum of them.", + + // zod schema of the parameters + parameters: { a: z.number(), b: z.number() }, + + // The implementation of the tool. Just a regular function. + implementation: ({ a, b }) => a + b, +}); ``` **Important**: The tool name, description, and the parameter definitions are all passed to the model! @@ -47,47 +42,37 @@ can essentially turn your LLMs into autonomous agents that can perform tasks on ### Tool Definition -```lms_code_snippet - title: "createFileTool.ts" - variants: - TypeScript: - language: typescript - code: | - import { tool } from "@lmstudio/sdk"; - import { existsSync } from "fs"; - import { writeFile } from "fs/promises"; - import { z } from "zod"; - - const createFileTool = tool({ - name: "createFile", - description: "Create a file with the given name and content.", - parameters: { name: z.string(), content: z.string() }, - implementation: async ({ name, content }) => { - if (existsSync(name)) { - return "Error: File already exists."; - } - await writeFile(name, content, "utf-8"); - return "File created."; - }, - }); +```typescript title="createFileTool.ts" +import { tool } from "@lmstudio/sdk"; +import { existsSync } from "fs"; +import { writeFile } from "fs/promises"; +import { z } from "zod"; + +const createFileTool = tool({ + name: "createFile", + description: "Create a file with the given name and content.", + parameters: { name: z.string(), content: z.string() }, + implementation: async ({ name, content }) => { + if (existsSync(name)) { + return "Error: File already exists."; + } + await writeFile(name, content, "utf-8"); + return "File created."; + }, +}); ``` ### Example code using the `createFile` tool: -```lms_code_snippet - title: "index.ts" - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - import { createFileTool } from "./createFileTool"; - - const client = new LMStudioClient(); - - const model = await client.llm.model("qwen2.5-7b-instruct"); - await model.act( - "Please create a file named output.txt with your understanding of the meaning of life.", - [createFileTool], - ); +```typescript title="index.ts" +import { LMStudioClient } from "@lmstudio/sdk"; +import { createFileTool } from "./createFileTool"; + +const client = new LMStudioClient(); + +const model = await client.llm.model("qwen2.5-7b-instruct"); +await model.act( + "Please create a file named output.txt with your understanding of the meaning of life.", + [createFileTool], +); ``` diff --git a/2_typescript/3_plugins/1_tools-provider/custom-configuration.md b/2_typescript/3_plugins/1_tools-provider/custom-configuration.md index fd6f804..53b022c 100644 --- a/2_typescript/3_plugins/1_tools-provider/custom-configuration.md +++ b/2_typescript/3_plugins/1_tools-provider/custom-configuration.md @@ -10,72 +10,62 @@ In the example below, we will ask the user to specify a folder name, and we will First, add the config field to `config.ts`: -```lms_code_snippet - title: "src/config.ts" - variants: - TypeScript: - language: typescript - code: | - export const configSchematics = createConfigSchematics() - .field( - "folderName", // Key of the configuration field - "string", // Type of the configuration field - { - displayName: "Folder Name", - subtitle: "The name of the folder where files will be created.", - }, - "default_folder", // Default value - ) - .build(); +```typescript title="src/config.ts" +export const configSchematics = createConfigSchematics() + .field( + "folderName", // Key of the configuration field + "string", // Type of the configuration field + { + displayName: "Folder Name", + subtitle: "The name of the folder where files will be created.", + }, + "default_folder", // Default value + ) + .build(); ``` -```lms_info +:::info[Info] In this example, we added the field to `configSchematics`, which is the "per-chat" configuration. If you want to add a global configuration field that is shared across different chats, you should add it under the section `globalConfigSchematics` in the same file. Learn more about configurations in [Custom Configurations](../plugins/configurations). -``` +::: Then, modify the tools provider to use the configuration value: -```lms_code_snippet - title: "src/toolsProvider.ts" - variants: - TypeScript: - language: typescript - code: | - import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk"; - import { existsSync } from "fs"; - import { mkdir, writeFile } from "fs/promises"; - import { join } from "path"; - import { z } from "zod"; - import { configSchematics } from "./config"; +```typescript title="src/toolsProvider.ts" +import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk"; +import { existsSync } from "fs"; +import { mkdir, writeFile } from "fs/promises"; +import { join } from "path"; +import { z } from "zod"; +import { configSchematics } from "./config"; - export async function toolsProvider(ctl: ToolsProviderController) { - const tools: Tool[] = []; +export async function toolsProvider(ctl: ToolsProviderController) { + const tools: Tool[] = []; - const createFileTool = tool({ - name: `create_file`, - description: "Create a file with the given name and content.", - parameters: { file_name: z.string(), content: z.string() }, - implementation: async ({ file_name, content }) => { - // Read the config field - const folderName = ctl.getPluginConfig(configSchematics).get("folderName"); - const folderPath = join(ctl.getWorkingDirectory(), folderName); + const createFileTool = tool({ + name: `create_file`, + description: "Create a file with the given name and content.", + parameters: { file_name: z.string(), content: z.string() }, + implementation: async ({ file_name, content }) => { + // Read the config field + const folderName = ctl.getPluginConfig(configSchematics).get("folderName"); + const folderPath = join(ctl.getWorkingDirectory(), folderName); - // Ensure the folder exists - await mkdir(folderPath, { recursive: true }); + // Ensure the folder exists + await mkdir(folderPath, { recursive: true }); - // Create the file - const filePath = join(folderPath, file_name); - if (existsSync(filePath)) { - return "Error: File already exists."; - } - await writeFile(filePath, content, "utf-8"); - return "File created."; - }, - }); - tools.push(createFileTool); // First tool + // Create the file + const filePath = join(folderPath, file_name); + if (existsSync(filePath)) { + return "Error: File already exists."; + } + await writeFile(filePath, content, "utf-8"); + return "File created."; + }, + }); + tools.push(createFileTool); // First tool - return tools; // Return the tools array - } + return tools; // Return the tools array +} ``` diff --git a/2_typescript/3_plugins/1_tools-provider/handling-aborts.md b/2_typescript/3_plugins/1_tools-provider/handling-aborts.md index b54f0d5..be00a51 100644 --- a/2_typescript/3_plugins/1_tools-provider/handling-aborts.md +++ b/2_typescript/3_plugins/1_tools-provider/handling-aborts.md @@ -6,42 +6,37 @@ index: 7 A prediction may be aborted by the user while your tool is still running. In such cases, you should handle the abort gracefully by handling the `AbortSignal` object passed as the second parameter to the tool's implementation function. -```lms_code_snippet - title: "src/toolsProvider.ts" - variants: - TypeScript: - language: typescript - code: | - import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk"; - import { z } from "zod"; +```typescript title="src/toolsProvider.ts" +import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk"; +import { z } from "zod"; - export async function toolsProvider(ctl: ToolsProviderController) { - const tools: Tool[] = []; +export async function toolsProvider(ctl: ToolsProviderController) { + const tools: Tool[] = []; - const fetchTool = tool({ - name: `fetch`, - description: "Fetch a URL using GET method.", - parameters: { url: z.string() }, - implementation: async ({ url }, { signal }) => { - const response = await fetch(url, { - method: "GET", - signal, // <-- Here, we pass the signal to fetch to allow cancellation - }); - if (!response.ok) { - return `Error: Failed to fetch ${url}: ${response.statusText}`; - } - const data = await response.text(); - return { - status: response.status, - headers: Object.fromEntries(response.headers.entries()), - data: data.substring(0, 1000), // Limit to 1000 characters - }; - }, - }); - tools.push(fetchTool); + const fetchTool = tool({ + name: `fetch`, + description: "Fetch a URL using GET method.", + parameters: { url: z.string() }, + implementation: async ({ url }, { signal }) => { + const response = await fetch(url, { + method: "GET", + signal, // <-- Here, we pass the signal to fetch to allow cancellation + }); + if (!response.ok) { + return `Error: Failed to fetch ${url}: ${response.statusText}`; + } + const data = await response.text(); + return { + status: response.status, + headers: Object.fromEntries(response.headers.entries()), + data: data.substring(0, 1000), // Limit to 1000 characters + }; + }, + }); + tools.push(fetchTool); - return tools; - } + return tools; +} ``` You can learn more about `AbortSignal` in the [MDN documentation](https://developer.mozilla.org/en-US/docs/Web/API/AbortSignal). diff --git a/2_typescript/3_plugins/1_tools-provider/meta.json b/2_typescript/3_plugins/1_tools-provider/meta.json new file mode 100644 index 0000000..6eb3fbf --- /dev/null +++ b/2_typescript/3_plugins/1_tools-provider/meta.json @@ -0,0 +1,10 @@ +{ + "title": "Tools Provider", + "pages": [ + "custom-configuration", + "handling-aborts", + "multiple-tools", + "single-tool", + "status-reports-and-warnings" + ] +} diff --git a/2_typescript/3_plugins/1_tools-provider/multiple-tools.md b/2_typescript/3_plugins/1_tools-provider/multiple-tools.md index 7b53404..0e9c8b2 100644 --- a/2_typescript/3_plugins/1_tools-provider/multiple-tools.md +++ b/2_typescript/3_plugins/1_tools-provider/multiple-tools.md @@ -8,51 +8,46 @@ A tools provider can define multiple tools for the model to use. Simply create a In the example below, we add a second tool to read the content of a file: -```lms_code_snippet - title: "src/toolsProvider.ts" - variants: - TypeScript: - language: typescript - code: | - import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk"; - import { z } from "zod"; - import { existsSync } from "fs"; - import { readFile, writeFile } from "fs/promises"; - import { join } from "path"; +```typescript title="src/toolsProvider.ts" +import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk"; +import { z } from "zod"; +import { existsSync } from "fs"; +import { readFile, writeFile } from "fs/promises"; +import { join } from "path"; - export async function toolsProvider(ctl: ToolsProviderController) { - const tools: Tool[] = []; +export async function toolsProvider(ctl: ToolsProviderController) { + const tools: Tool[] = []; - const createFileTool = tool({ - name: `create_file`, - description: "Create a file with the given name and content.", - parameters: { file_name: z.string(), content: z.string() }, - implementation: async ({ file_name, content }) => { - const filePath = join(ctl.getWorkingDirectory(), file_name); - if (existsSync(filePath)) { - return "Error: File already exists."; - } - await writeFile(filePath, content, "utf-8"); - return "File created."; - }, - }); - tools.push(createFileTool); // First tool + const createFileTool = tool({ + name: `create_file`, + description: "Create a file with the given name and content.", + parameters: { file_name: z.string(), content: z.string() }, + implementation: async ({ file_name, content }) => { + const filePath = join(ctl.getWorkingDirectory(), file_name); + if (existsSync(filePath)) { + return "Error: File already exists."; + } + await writeFile(filePath, content, "utf-8"); + return "File created."; + }, + }); + tools.push(createFileTool); // First tool - const readFileTool = tool({ - name: `read_file`, - description: "Read the content of a file with the given name.", - parameters: { file_name: z.string() }, - implementation: async ({ file_name }) => { - const filePath = join(ctl.getWorkingDirectory(), file_name); - if (!existsSync(filePath)) { - return "Error: File does not exist."; - } - const content = await readFile(filePath, "utf-8"); - return content; - }, - }); - tools.push(readFileTool); // Second tool + const readFileTool = tool({ + name: `read_file`, + description: "Read the content of a file with the given name.", + parameters: { file_name: z.string() }, + implementation: async ({ file_name }) => { + const filePath = join(ctl.getWorkingDirectory(), file_name); + if (!existsSync(filePath)) { + return "Error: File does not exist."; + } + const content = await readFile(filePath, "utf-8"); + return content; + }, + }); + tools.push(readFileTool); // Second tool - return tools; // Return the tools array - } + return tools; // Return the tools array +} ``` diff --git a/2_typescript/3_plugins/1_tools-provider/single-tool.md b/2_typescript/3_plugins/1_tools-provider/single-tool.md index 736b8e4..a6c5ba3 100644 --- a/2_typescript/3_plugins/1_tools-provider/single-tool.md +++ b/2_typescript/3_plugins/1_tools-provider/single-tool.md @@ -6,63 +6,53 @@ index: 3 To setup a tools provider, first create the a file `toolsProvider.ts` in your plugin's `src` directory: -```lms_code_snippet - title: "src/toolsProvider.ts" - variants: - TypeScript: - language: typescript - code: | - import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk"; - import { z } from "zod"; - import { existsSync } from "fs"; - import { writeFile } from "fs/promises"; - import { join } from "path"; - - export async function toolsProvider(ctl: ToolsProviderController) { - const tools: Tool[] = []; - - const createFileTool = tool({ - // Name of the tool, this will be passed to the model. Aim for concise, descriptive names - name: `create_file`, - // Your description here, more details will help the model to understand when to use the tool - description: "Create a file with the given name and content.", - parameters: { file_name: z.string(), content: z.string() }, - implementation: async ({ file_name, content }) => { - const filePath = join(ctl.getWorkingDirectory(), file_name); - if (existsSync(filePath)) { - return "Error: File already exists."; - } - await writeFile(filePath, content, "utf-8"); - return "File created."; - }, - }); - tools.push(createFileTool); - - return tools; - } +```typescript title="src/toolsProvider.ts" +import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk"; +import { z } from "zod"; +import { existsSync } from "fs"; +import { writeFile } from "fs/promises"; +import { join } from "path"; + +export async function toolsProvider(ctl: ToolsProviderController) { + const tools: Tool[] = []; + + const createFileTool = tool({ + // Name of the tool, this will be passed to the model. Aim for concise, descriptive names + name: `create_file`, + // Your description here, more details will help the model to understand when to use the tool + description: "Create a file with the given name and content.", + parameters: { file_name: z.string(), content: z.string() }, + implementation: async ({ file_name, content }) => { + const filePath = join(ctl.getWorkingDirectory(), file_name); + if (existsSync(filePath)) { + return "Error: File already exists."; + } + await writeFile(filePath, content, "utf-8"); + return "File created."; + }, + }); + tools.push(createFileTool); + + return tools; +} ``` The above tools provider defines a single tool called `create_file` that allows the model to create a file with a specified name and content inside the working directory. You can learn more about defining tools in [Tool Definition](../agent/tools). Then register the tools provider in your plugin's `index.ts`: -```lms_code_snippet - title: "src/index.ts" - variants: - TypeScript: - language: typescript - code: | - // ... other imports ... - import { toolsProvider } from "./toolsProvider"; +```typescript title="src/index.ts" +// ... other imports ... +import { toolsProvider } from "./toolsProvider"; - export async function main(context: PluginContext) { - // ... other plugin setup code ... +export async function main(context: PluginContext) { + // ... other plugin setup code ... - // Register the tools provider. - context.withToolsProvider(toolsProvider); // <-- Register the tools provider + // Register the tools provider. + context.withToolsProvider(toolsProvider); // <-- Register the tools provider - // ... other plugin setup code ... - } + // ... other plugin setup code ... +} ``` Now, you can try to ask the LLM to create a file, and it should be able to do so using the tool you just created. diff --git a/2_typescript/3_plugins/1_tools-provider/status-reports-and-warnings.md b/2_typescript/3_plugins/1_tools-provider/status-reports-and-warnings.md index 333676b..75270fb 100644 --- a/2_typescript/3_plugins/1_tools-provider/status-reports-and-warnings.md +++ b/2_typescript/3_plugins/1_tools-provider/status-reports-and-warnings.md @@ -10,36 +10,31 @@ You can use `status` and `warn` methods on the second parameter of the tool's im The following example shows how to implement a tool that waits for a specified number of seconds, providing status updates and warnings if the wait time exceeds 10 seconds: -```lms_code_snippet - title: "src/toolsProvider.ts" - variants: - TypeScript: - language: typescript - code: | - import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk"; - import { z } from "zod"; +```typescript title="src/toolsProvider.ts" +import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk"; +import { z } from "zod"; - export async function toolsProvider(ctl: ToolsProviderController) { - const tools: Tool[] = []; +export async function toolsProvider(ctl: ToolsProviderController) { + const tools: Tool[] = []; - const waitTool = tool({ - name: `wait`, - description: "Wait for a specified number of seconds.", - parameters: { seconds: z.number().min(1) }, - implementation: async ({ seconds }, { status, warn }) => { - if (seconds > 10) { - warn("The model asks to wait for more than 10 seconds."); - } - for (let i = 0; i < seconds; i++) { - status(`Waiting... ${i + 1}/${seconds} seconds`); - await new Promise((resolve) => setTimeout(resolve, 1000)); - } - }, - }); - tools.push(waitTool); + const waitTool = tool({ + name: `wait`, + description: "Wait for a specified number of seconds.", + parameters: { seconds: z.number().min(1) }, + implementation: async ({ seconds }, { status, warn }) => { + if (seconds > 10) { + warn("The model asks to wait for more than 10 seconds."); + } + for (let i = 0; i < seconds; i++) { + status(`Waiting... ${i + 1}/${seconds} seconds`); + await new Promise((resolve) => setTimeout(resolve, 1000)); + } + }, + }); + tools.push(waitTool); - return tools; // Return the tools array - } + return tools; // Return the tools array +} ``` Note status updates and warnings are only visible to the user. If you want the model to also see those messages, you should return them as part of the tool's return value. @@ -48,42 +43,37 @@ Note status updates and warnings are only visible to the user. If you want the m A prediction may be aborted by the user while your tool is still running. In such cases, you should handle the abort gracefully by handling the `AbortSignal` object passed as the second parameter to the tool's implementation function. -```lms_code_snippet - title: "src/toolsProvider.ts" - variants: - TypeScript: - language: typescript - code: | - import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk"; - import { z } from "zod"; +```typescript title="src/toolsProvider.ts" +import { tool, Tool, ToolsProviderController } from "@lmstudio/sdk"; +import { z } from "zod"; - export async function toolsProvider(ctl: ToolsProviderController) { - const tools: Tool[] = []; +export async function toolsProvider(ctl: ToolsProviderController) { + const tools: Tool[] = []; - const fetchTool = tool({ - name: `fetch`, - description: "Fetch a URL using GET method.", - parameters: { url: z.string() }, - implementation: async ({ url }, { signal }) => { - const response = await fetch(url, { - method: "GET", - signal, // <-- Here, we pass the signal to fetch to allow cancellation - }); - if (!response.ok) { - return `Error: Failed to fetch ${url}: ${response.statusText}`; - } - const data = await response.text(); - return { - status: response.status, - headers: Object.fromEntries(response.headers.entries()), - data: data.substring(0, 1000), // Limit to 1000 characters - }; - }, - }); - tools.push(fetchTool); + const fetchTool = tool({ + name: `fetch`, + description: "Fetch a URL using GET method.", + parameters: { url: z.string() }, + implementation: async ({ url }, { signal }) => { + const response = await fetch(url, { + method: "GET", + signal, // <-- Here, we pass the signal to fetch to allow cancellation + }); + if (!response.ok) { + return `Error: Failed to fetch ${url}: ${response.statusText}`; + } + const data = await response.text(); + return { + status: response.status, + headers: Object.fromEntries(response.headers.entries()), + data: data.substring(0, 1000), // Limit to 1000 characters + }; + }, + }); + tools.push(fetchTool); - return tools; - } + return tools; +} ``` You can learn more about `AbortSignal` in the [MDN documentation](https://developer.mozilla.org/en-US/docs/Web/API/AbortSignal). diff --git a/2_typescript/3_plugins/2_prompt-preprocessor/custom-configuration.md b/2_typescript/3_plugins/2_prompt-preprocessor/custom-configuration.md index ef63d04..b7a1294 100644 --- a/2_typescript/3_plugins/2_prompt-preprocessor/custom-configuration.md +++ b/2_typescript/3_plugins/2_prompt-preprocessor/custom-configuration.md @@ -10,60 +10,50 @@ The following is an example of how you can make the `specialInstructions` and `t First, add the config field to `config.ts`: -```lms_code_snippet - title: "src/config.ts" - variants: - TypeScript: - language: typescript - code: | - import { createConfigSchematics } from "@lmstudio/sdk"; - export const configSchematics = createConfigSchematics() - .field( - "specialInstructions", - "string", - { - displayName: "Special Instructions", - subtitle: "Special instructions to be injected when the trigger word is found.", - }, - "Here is some default special instructions.", - ) - .field( - "triggerWord", - "string", - { - displayName: "Trigger Word", - subtitle: "The word that will trigger the special instructions.", - }, - "@init", - ) - .build(); +```typescript title="src/config.ts" +import { createConfigSchematics } from "@lmstudio/sdk"; +export const configSchematics = createConfigSchematics() + .field( + "specialInstructions", + "string", + { + displayName: "Special Instructions", + subtitle: "Special instructions to be injected when the trigger word is found.", + }, + "Here is some default special instructions.", + ) + .field( + "triggerWord", + "string", + { + displayName: "Trigger Word", + subtitle: "The word that will trigger the special instructions.", + }, + "@init", + ) + .build(); ``` -```lms_info +:::info[Info] In this example, we added the field to `configSchematics`, which is the "per-chat" configuration. If you want to add a global configuration field that is shared across different chats, you should add it under the section `globalConfigSchematics` in the same file. Learn more about configurations in [Custom Configurations](../plugins/configurations). -``` +::: Then, modify the prompt preprocessor to use the configuration: -```lms_code_snippet - title: "src/promptPreprocessor.ts" - variants: - TypeScript: - language: typescript - code: | - import { type PromptPreprocessorController, type ChatMessage } from "@lmstudio/sdk"; - import { configSchematics } from "./config"; +```typescript title="src/promptPreprocessor.ts" +import { type PromptPreprocessorController, type ChatMessage } from "@lmstudio/sdk"; +import { configSchematics } from "./config"; - export async function preprocess(ctl: PromptPreprocessorController, userMessage: ChatMessage) { - const textContent = userMessage.getText(); - const pluginConfig = ctl.getPluginConfig(configSchematics); +export async function preprocess(ctl: PromptPreprocessorController, userMessage: ChatMessage) { + const textContent = userMessage.getText(); + const pluginConfig = ctl.getPluginConfig(configSchematics); - const triggerWord = pluginConfig.get("triggerWord"); - const specialInstructions = pluginConfig.get("specialInstructions"); + const triggerWord = pluginConfig.get("triggerWord"); + const specialInstructions = pluginConfig.get("specialInstructions"); - const transformed = textContent.replaceAll(triggerWord, specialInstructions); - return transformed; - } + const transformed = textContent.replaceAll(triggerWord, specialInstructions); + return transformed; +} ``` diff --git a/2_typescript/3_plugins/2_prompt-preprocessor/custom-status-report.md b/2_typescript/3_plugins/2_prompt-preprocessor/custom-status-report.md index 022bdd9..b6e9ca0 100644 --- a/2_typescript/3_plugins/2_prompt-preprocessor/custom-status-report.md +++ b/2_typescript/3_plugins/2_prompt-preprocessor/custom-status-report.md @@ -6,42 +6,27 @@ index: 4 Depending on the task, the prompt preprocessor may take some time to complete, for example, it may need to fetch some data from the internet or perform some heavy computation. In such cases, you can report the status of the preprocessing using `ctl.setStatus`. -```lms_code_snippet - title: "src/promptPreprocessor.ts" - variants: - TypeScript: - language: typescript - code: | - const status = ctl.createStatus({ - status: "loading", - text: "Preprocessing.", - }); +```typescript title="src/promptPreprocessor.ts" +const status = ctl.createStatus({ + status: "loading", + text: "Preprocessing.", +}); ``` You can update the status at any time by calling `status.setState`. -```lms_code_snippet - title: "src/promptPreprocessor.ts" - variants: - TypeScript: - language: typescript - code: | - status.setState({ - status: "done", - text: "Preprocessing done.", - }) +```typescript title="src/promptPreprocessor.ts" +status.setState({ + status: "done", + text: "Preprocessing done.", +}) ``` You can even add sub status to the status: -```lms_code_snippet - title: "src/promptPreprocessor.ts" - variants: - TypeScript: - language: typescript - code: | - const subStatus = status.addSubStatus({ - status: "loading", - text: "I am a sub status." - }); +```typescript title="src/promptPreprocessor.ts" +const subStatus = status.addSubStatus({ + status: "loading", + text: "I am a sub status." +}); ``` diff --git a/2_typescript/3_plugins/2_prompt-preprocessor/examples.md b/2_typescript/3_plugins/2_prompt-preprocessor/examples.md index af5c304..e8162fa 100644 --- a/2_typescript/3_plugins/2_prompt-preprocessor/examples.md +++ b/2_typescript/3_plugins/2_prompt-preprocessor/examples.md @@ -8,39 +8,29 @@ index: 2 The following is an example preprocessor that injects the current time before each user message. -```lms_code_snippet - title: "src/promptPreprocessor.ts" - variants: - TypeScript: - language: typescript - code: | - import { type PromptPreprocessorController, type ChatMessage } from "@lmstudio/sdk"; - export async function preprocess(ctl: PromptPreprocessorController, userMessage: ChatMessage) { - const textContent = userMessage.getText(); - const transformed = `Current time: ${new Date().toString()}\n\n${textContent}`; - return transformed; - } +```typescript title="src/promptPreprocessor.ts" +import { type PromptPreprocessorController, type ChatMessage } from "@lmstudio/sdk"; +export async function preprocess(ctl: PromptPreprocessorController, userMessage: ChatMessage) { + const textContent = userMessage.getText(); + const transformed = `Current time: ${new Date().toString()}\n\n${textContent}`; + return transformed; +} ``` ### Example: Replace Trigger Words Another example you can do it with simple text only processing is by replacing certain trigger words. For example, you can replace a `@init` trigger with a special initialization message. -```lms_code_snippet - title: "src/promptPreprocessor.ts" - variants: - TypeScript: - language: typescript - code: | - import { type PromptPreprocessorController, type ChatMessage, text } from "@lmstudio/sdk"; - - const mySpecialInstructions = text` - Here are some special instructions... - `; - - export async function preprocess(ctl: PromptPreprocessorController, userMessage: ChatMessage) { - const textContent = userMessage.getText(); - const transformed = textContent.replaceAll("@init", mySpecialInstructions); - return transformed; - } +```typescript title="src/promptPreprocessor.ts" +import { type PromptPreprocessorController, type ChatMessage, text } from "@lmstudio/sdk"; + +const mySpecialInstructions = text` + Here are some special instructions... +`; + +export async function preprocess(ctl: PromptPreprocessorController, userMessage: ChatMessage) { + const textContent = userMessage.getText(); + const transformed = textContent.replaceAll("@init", mySpecialInstructions); + return transformed; +} ``` diff --git a/2_typescript/3_plugins/2_prompt-preprocessor/meta.json b/2_typescript/3_plugins/2_prompt-preprocessor/meta.json new file mode 100644 index 0000000..4ec5301 --- /dev/null +++ b/2_typescript/3_plugins/2_prompt-preprocessor/meta.json @@ -0,0 +1,9 @@ +{ + "title": "Prompt Preprocessor", + "pages": [ + "custom-configuration", + "custom-status-report", + "examples", + "handling-aborts" + ] +} diff --git a/2_typescript/3_plugins/3_generator/meta.json b/2_typescript/3_plugins/3_generator/meta.json new file mode 100644 index 0000000..904b360 --- /dev/null +++ b/2_typescript/3_plugins/3_generator/meta.json @@ -0,0 +1,7 @@ +{ + "title": "Generators", + "pages": [ + "text-only-generators", + "tool-calling-generators" + ] +} diff --git a/2_typescript/3_plugins/3_generator/text-only-generators.md b/2_typescript/3_plugins/3_generator/text-only-generators.md index 1450b25..5927598 100644 --- a/2_typescript/3_plugins/3_generator/text-only-generators.md +++ b/2_typescript/3_plugins/3_generator/text-only-generators.md @@ -8,25 +8,20 @@ Generators take in the the generator controller and the current conversation sta The following is an example of a simple generator that echos back the last user message with 200 ms delay between each word: -```lms_code_snippet - title: "src/toolsProvider.ts" - variants: - TypeScript: - language: typescript - code: | - import { Chat, GeneratorController } from "@lmstudio/sdk"; - - export async function generate(ctl: GeneratorController, chat: Chat) { - // Just echo back the last message - const lastMessage = chat.at(-1).getText(); - // Split the last message into words - const words = lastMessage.split(/(?= )/); - for (const word of words) { - ctl.fragmentGenerated(word); // Send each word as a fragment - ctl.abortSignal.throwIfAborted(); // Allow for cancellation - await new Promise((resolve) => setTimeout(resolve, 200)); // Simulate some processing time - } - } +```typescript title="src/toolsProvider.ts" +import { Chat, GeneratorController } from "@lmstudio/sdk"; + +export async function generate(ctl: GeneratorController, chat: Chat) { + // Just echo back the last message + const lastMessage = chat.at(-1).getText(); + // Split the last message into words + const words = lastMessage.split(/(?= )/); + for (const word of words) { + ctl.fragmentGenerated(word); // Send each word as a fragment + ctl.abortSignal.throwIfAborted(); // Allow for cancellation + await new Promise((resolve) => setTimeout(resolve, 200)); // Simulate some processing time + } +} ``` ## Custom Configurations diff --git a/2_typescript/3_plugins/4_custom-configuration/accessing-config.md b/2_typescript/3_plugins/4_custom-configuration/accessing-config.md index 382560b..cde4736 100644 --- a/2_typescript/3_plugins/4_custom-configuration/accessing-config.md +++ b/2_typescript/3_plugins/4_custom-configuration/accessing-config.md @@ -8,26 +8,21 @@ You can access the configuration using the method `ctl.getPluginConfig(configSch For example, here is how to access the config within the promptPreprocessor: -```lms_code_snippet - title: "src/promptPreprocessor.ts" - variants: - TypeScript: - language: typescript - code: | - import { type PreprocessorController, type ChatMessage } from "@lmstudio/sdk"; - import { configSchematics } from "./config"; +```typescript title="src/promptPreprocessor.ts" +import { type PreprocessorController, type ChatMessage } from "@lmstudio/sdk"; +import { configSchematics } from "./config"; - export async function preprocess(ctl: PreprocessorController, userMessage: ChatMessage) { - const pluginConfig = ctl.getPluginConfig(configSchematics); - const myCustomField = pluginConfig.get("myCustomField"); +export async function preprocess(ctl: PreprocessorController, userMessage: ChatMessage) { + const pluginConfig = ctl.getPluginConfig(configSchematics); + const myCustomField = pluginConfig.get("myCustomField"); - const globalPluginConfig = ctl.getGlobalPluginConfig(configSchematics); - const globalMyCustomField = globalPluginConfig.get("myCustomField"); + const globalPluginConfig = ctl.getGlobalPluginConfig(configSchematics); + const globalMyCustomField = globalPluginConfig.get("myCustomField"); - return ( - `${userMessage.getText()},` + - `myCustomField: ${myCustomField}, ` + - `globalMyCustomField: ${globalMyCustomField}` - ); - } + return ( + `${userMessage.getText()},` + + `myCustomField: ${myCustomField}, ` + + `globalMyCustomField: ${globalMyCustomField}` + ); +} ``` diff --git a/2_typescript/3_plugins/4_custom-configuration/config-ts.md b/2_typescript/3_plugins/4_custom-configuration/config-ts.md index e1d2551..041b5af 100644 --- a/2_typescript/3_plugins/4_custom-configuration/config-ts.md +++ b/2_typescript/3_plugins/4_custom-configuration/config-ts.md @@ -6,68 +6,58 @@ index: 2 By default, the plugin scaffold will create a `config.ts` file in the `src/` directory which will contain the schematics of the configurations. If the files does not exist, you can create it manually: -```lms_code_snippet - title: "src/toolsProvider.ts" - variants: - TypeScript: - language: typescript - code: | - import { createConfigSchematics } from "@lmstudio/sdk"; - - export const configSchematics = createConfigSchematics() - .field( - "myCustomField", // The key of the field. - "numeric", // Type of the field. - // Options for the field. Different field types will have different options. - { - displayName: "My Custom Field", - hint: "This is my custom field. Doesn't do anything special.", - slider: { min: 0, max: 100, step: 1 }, // Add a slider to the field. - }, - 80, // Default Value - ) - // You can add more fields by chaining the field method. - // For example: - // .field("anotherField", ...) - .build(); - - export const globalConfigSchematics = createConfigSchematics() - .field( - "myGlobalCustomField", // The key of the field. - "string", - { - displayName: "My Global Custom Field", - hint: "This is my global custom field. Doesn't do anything special.", - }, - "default value", // Default Value - ) - // You can add more fields by chaining the field method. - // For example: - // .field("anotherGlobalField", ...) - .build(); +```typescript title="src/toolsProvider.ts" +import { createConfigSchematics } from "@lmstudio/sdk"; + +export const configSchematics = createConfigSchematics() + .field( + "myCustomField", // The key of the field. + "numeric", // Type of the field. + // Options for the field. Different field types will have different options. + { + displayName: "My Custom Field", + hint: "This is my custom field. Doesn't do anything special.", + slider: { min: 0, max: 100, step: 1 }, // Add a slider to the field. + }, + 80, // Default Value + ) + // You can add more fields by chaining the field method. + // For example: + // .field("anotherField", ...) + .build(); + +export const globalConfigSchematics = createConfigSchematics() + .field( + "myGlobalCustomField", // The key of the field. + "string", + { + displayName: "My Global Custom Field", + hint: "This is my global custom field. Doesn't do anything special.", + }, + "default value", // Default Value + ) + // You can add more fields by chaining the field method. + // For example: + // .field("anotherGlobalField", ...) + .build(); ``` If you've added your config schematics manual, you will also need to register the configurations in your plugin's `index.ts` file. This is done by calling `context.withConfigSchematics(configSchematics)` and `context.withGlobalConfigSchematics(globalConfigSchematics)` in the `main` function of your plugin. -```lms_code_snippet - title: "src/index.ts" - variants: - TypeScript: - language: typescript - code: | - // ... other imports ... - import { toolsProvider } from "./toolsProvider"; +```typescript title="src/index.ts" +// ... other imports ... +import { toolsProvider } from "./toolsProvider"; - export async function main(context: PluginContext) { - // ... other plugin setup code ... +export async function main(context: PluginContext) { + // ... other plugin setup code ... - // Register the configuration schematics. - context.withConfigSchematics(configSchematics); - // Register the global configuration schematics. - context.withGlobalConfigSchematics(globalConfigSchematics); + // Register the configuration schematics. + context.withConfigSchematics(configSchematics); + // Register the global configuration schematics. + context.withGlobalConfigSchematics(globalConfigSchematics); - // ... other plugin setup code ... - } + // ... other plugin setup code ... +} ``` diff --git a/2_typescript/3_plugins/4_custom-configuration/defining-new-fields.md b/2_typescript/3_plugins/4_custom-configuration/defining-new-fields.md index 4dc0d2d..db65722 100644 --- a/2_typescript/3_plugins/4_custom-configuration/defining-new-fields.md +++ b/2_typescript/3_plugins/4_custom-configuration/defining-new-fields.md @@ -8,124 +8,104 @@ We support the following field types: - `string`: A text input field. - ```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - // ... other fields ... - .field( - "stringField", // The key of the field. - "string", // Type of the field. - { - displayName: "A string field", - subtitle: "Subtitle", // Optional subtitle for the field. (Show below the field) - hint: "Hint", // Optional hint for the field. (Show on hover) - isParagraph: false, // Whether to show a large text input area for this field. - isProtected: false, // Whether the value should be obscured in the UI (e.g., for passwords). - placeholder: "Placeholder text", // Optional placeholder text for the field. - }, - "default value", // Default Value - ) - // ... other fields ... + ```typescript + // ... other fields ... + .field( + "stringField", // The key of the field. + "string", // Type of the field. + { + displayName: "A string field", + subtitle: "Subtitle", // Optional subtitle for the field. (Show below the field) + hint: "Hint", // Optional hint for the field. (Show on hover) + isParagraph: false, // Whether to show a large text input area for this field. + isProtected: false, // Whether the value should be obscured in the UI (e.g., for passwords). + placeholder: "Placeholder text", // Optional placeholder text for the field. + }, + "default value", // Default Value + ) + // ... other fields ... ``` - `numeric`: A number input field with optional validation and slider UI. - ```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - // ... other fields ... - .field( - "numberField", // The key of the field. - "numeric", // Type of the field. - { - displayName: "A number field", - subtitle: "Subtitle for", // Optional subtitle for the field. (Show below the field) - hint: "Hint for number field", // Optional hint for the field. (Show on hover) - int: false, // Whether the field should accept only integer values. - min: 0, // Minimum value for the field. - max: 100, // Maximum value for the field. - slider: { - // If present, configurations for the slider UI - min: 0, // Minimum value for the slider. - max: 100, // Maximum value for the slider. - step: 1, // Step value for the slider. - }, - }, - 42, // Default Value - ) - // ... other fields ... + ```typescript + // ... other fields ... + .field( + "numberField", // The key of the field. + "numeric", // Type of the field. + { + displayName: "A number field", + subtitle: "Subtitle for", // Optional subtitle for the field. (Show below the field) + hint: "Hint for number field", // Optional hint for the field. (Show on hover) + int: false, // Whether the field should accept only integer values. + min: 0, // Minimum value for the field. + max: 100, // Maximum value for the field. + slider: { + // If present, configurations for the slider UI + min: 0, // Minimum value for the slider. + max: 100, // Maximum value for the slider. + step: 1, // Step value for the slider. + }, + }, + 42, // Default Value + ) + // ... other fields ... ``` - `boolean`: A checkbox or toggle input field. - ```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - // ... other fields ... - .field( - "booleanField", // The key of the field. - "boolean", // Type of the field. - { - displayName: "A boolean field", - subtitle: "Subtitle", // Optional subtitle for the field. (Show below the field) - hint: "Hint", // Optional hint for the field. (Show on hover) - }, - true, // Default Value - ) - // ... other fields ... + ```typescript + // ... other fields ... + .field( + "booleanField", // The key of the field. + "boolean", // Type of the field. + { + displayName: "A boolean field", + subtitle: "Subtitle", // Optional subtitle for the field. (Show below the field) + hint: "Hint", // Optional hint for the field. (Show on hover) + }, + true, // Default Value + ) + // ... other fields ... ``` - `stringArray`: An array of string values with configurable constraints. - ```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - // ... other fields ... - .field( - "stringArrayField", - "stringArray", - { - displayName: "A string array field", - subtitle: "Subtitle", // Optional subtitle for the field. (Show below the field) - hint: "Hint", // Optional hint for the field. (Show on hover) - allowEmptyStrings: true, // Whether to allow empty strings in the array. - maxNumItems: 5, // Maximum number of items in the array. - }, - ["default", "values"], // Default Value - ) - // ... other fields ... + ```typescript + // ... other fields ... + .field( + "stringArrayField", + "stringArray", + { + displayName: "A string array field", + subtitle: "Subtitle", // Optional subtitle for the field. (Show below the field) + hint: "Hint", // Optional hint for the field. (Show on hover) + allowEmptyStrings: true, // Whether to allow empty strings in the array. + maxNumItems: 5, // Maximum number of items in the array. + }, + ["default", "values"], // Default Value + ) + // ... other fields ... ``` - `select`: A dropdown selection field with predefined options. - ```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - // ... other fields ... - .field( - "selectField", - "select", - { - displayName: "A select field", - options: [ - { value: "option1", displayName: "Option 1" }, - { value: "option2", displayName: "Option 2" }, - { value: "option3", displayName: "Option 3" }, - ], - subtitle: "Subtitle", // Optional subtitle for the field. (Show below the field) - hint: "Hint", // Optional hint for the field. (Show on hover) - }, - "option1", // Default Value - ) - // ... other fields ... + ```typescript + // ... other fields ... + .field( + "selectField", + "select", + { + displayName: "A select field", + options: [ + { value: "option1", displayName: "Option 1" }, + { value: "option2", displayName: "Option 2" }, + { value: "option3", displayName: "Option 3" }, + ], + subtitle: "Subtitle", // Optional subtitle for the field. (Show below the field) + hint: "Hint", // Optional hint for the field. (Show on hover) + }, + "option1", // Default Value + ) + // ... other fields ... ``` diff --git a/2_typescript/3_plugins/4_custom-configuration/meta.json b/2_typescript/3_plugins/4_custom-configuration/meta.json new file mode 100644 index 0000000..0970a6d --- /dev/null +++ b/2_typescript/3_plugins/4_custom-configuration/meta.json @@ -0,0 +1,8 @@ +{ + "title": "Custom Configuration", + "pages": [ + "accessing-config", + "config-ts", + "defining-new-fields" + ] +} diff --git a/2_typescript/3_plugins/meta.json b/2_typescript/3_plugins/meta.json new file mode 100644 index 0000000..d09cde0 --- /dev/null +++ b/2_typescript/3_plugins/meta.json @@ -0,0 +1,11 @@ +{ + "title": "Plugins", + "pages": [ + "1_tools-provider", + "2_prompt-preprocessor", + "3_generator", + "4_custom-configuration", + "5_publish-plugins", + "dependencies" + ] +} diff --git a/2_typescript/4_embedding/index.md b/2_typescript/4_embedding/index.md index d292c8d..b72e84f 100644 --- a/2_typescript/4_embedding/index.md +++ b/2_typescript/4_embedding/index.md @@ -18,16 +18,11 @@ lms get nomic-ai/nomic-embed-text-v1.5 To convert a string to a vector representation, pass it to the `embed` method on the corresponding embedding model handle. -```lms_code_snippet - title: "index.ts" - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - const client = new LMStudioClient(); - - const model = await client.embedding.model("nomic-embed-text-v1.5"); - - const { embedding } = await model.embed("Hello, world!"); +```typescript title="index.ts" +import { LMStudioClient } from "@lmstudio/sdk"; +const client = new LMStudioClient(); + +const model = await client.embedding.model("nomic-embed-text-v1.5"); + +const { embedding } = await model.embed("Hello, world!"); ``` diff --git a/2_typescript/5_tokenization/index.md b/2_typescript/5_tokenization/index.md index b03aba2..5dfcabb 100644 --- a/2_typescript/5_tokenization/index.md +++ b/2_typescript/5_tokenization/index.md @@ -10,32 +10,24 @@ Models use a tokenizer to internally convert text into "tokens" they can deal wi You can tokenize a string with a loaded LLM or embedding model using the SDK. In the below examples, `llm` can be replaced with an embedding model `emb`. -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; +```typescript +import { LMStudioClient } from "@lmstudio/sdk"; - const client = new LMStudioClient(); - const model = await client.llm.model(); +const client = new LMStudioClient(); +const model = await client.llm.model(); - const tokens = await model.tokenize("Hello, world!"); +const tokens = await model.tokenize("Hello, world!"); - console.info(tokens); // Array of token IDs. +console.info(tokens); // Array of token IDs. ``` ## Count tokens If you only care about the number of tokens, you can use the `.countTokens` method instead. -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - const tokenCount = await model.countTokens("Hello, world!"); - console.info("Token count:", tokenCount); +```typescript +const tokenCount = await model.countTokens("Hello, world!"); +console.info("Token count:", tokenCount); ``` ### Example: Count Context @@ -46,33 +38,29 @@ You can determine if a given conversation fits into a model's context by doing t 2. Count the number of tokens in the string. 3. Compare the token count to the model's context length. -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { Chat, type LLM, LMStudioClient } from "@lmstudio/sdk"; - - async function doesChatFitInContext(model: LLM, chat: Chat) { - // Convert the conversation to a string using the prompt template. - const formatted = await model.applyPromptTemplate(chat); - // Count the number of tokens in the string. - const tokenCount = await model.countTokens(formatted); - // Get the current loaded context length of the model - const contextLength = await model.getContextLength(); - return tokenCount < contextLength; - } - - const client = new LMStudioClient(); - const model = await client.llm.model(); - - const chat = Chat.from([ - { role: "user", content: "What is the meaning of life." }, - { role: "assistant", content: "The meaning of life is..." }, - // ... More messages - ]); - - console.info("Fits in context:", await doesChatFitInContext(model, chat)); +```typescript +import { Chat, type LLM, LMStudioClient } from "@lmstudio/sdk"; + +async function doesChatFitInContext(model: LLM, chat: Chat) { + // Convert the conversation to a string using the prompt template. + const formatted = await model.applyPromptTemplate(chat); + // Count the number of tokens in the string. + const tokenCount = await model.countTokens(formatted); + // Get the current loaded context length of the model + const contextLength = await model.getContextLength(); + return tokenCount < contextLength; +} + +const client = new LMStudioClient(); +const model = await client.llm.model(); + +const chat = Chat.from([ + { role: "user", content: "What is the meaning of life." }, + { role: "assistant", content: "The meaning of life is..." }, + // ... More messages +]); + +console.info("Fits in context:", await doesChatFitInContext(model, chat)); ``` diff --git a/2_typescript/6_manage-models/_download-models.md b/2_typescript/6_manage-models/_download-models.md index 07e6ebe..1c1f494 100644 --- a/2_typescript/6_manage-models/_download-models.md +++ b/2_typescript/6_manage-models/_download-models.md @@ -17,35 +17,31 @@ Downloading models consists of three steps: 2. Find the download option you want (e.g. quantization); and 3. Download the model! -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - - const client = new LMStudioClient(); - - // 1. Search for the model you want - // Specify any/all of searchTerm, limit, compatibilityTypes - const searchResults = await client.repository.searchModels({ - searchTerm: "llama 3.2 1b", // Search for Llama 3.2 1B - limit: 5, // Get top 5 results - compatibilityTypes: ["gguf"], // Only download GGUFs - }); - - // 2. Find download options - const bestResult = searchResults[0]; - const downloadOptions = await bestResult.getDownloadOptions(); - - // Let's download Q4_K_M, a good middle ground quantization - const desiredModel = downloadOptions.find(option => option.quantization === 'Q4_K_M'); - - // 3. Download it! - const modelKey = await desiredModel.download(); - - // This returns a path you can use to load the model - const loadedModel = await client.llm.model(modelKey); +```typescript +import { LMStudioClient } from "@lmstudio/sdk"; + +const client = new LMStudioClient(); + +// 1. Search for the model you want +// Specify any/all of searchTerm, limit, compatibilityTypes +const searchResults = await client.repository.searchModels({ + searchTerm: "llama 3.2 1b", // Search for Llama 3.2 1B + limit: 5, // Get top 5 results + compatibilityTypes: ["gguf"], // Only download GGUFs +}); + +// 2. Find download options +const bestResult = searchResults[0]; +const downloadOptions = await bestResult.getDownloadOptions(); + +// Let's download Q4_K_M, a good middle ground quantization +const desiredModel = downloadOptions.find(option => option.quantization === 'Q4_K_M'); + +// 3. Download it! +const modelKey = await desiredModel.download(); + +// This returns a path you can use to load the model +const loadedModel = await client.llm.model(modelKey); ``` ## Advanced Usage @@ -59,43 +55,38 @@ If you want to get updates on the progress of this process, you can provide call one for progress updates and/or one when the download is being finalized (validating checksums, etc.) -```lms_code_snippet - variants: - Python (with scoped resources): - language: python - code: | - import lmstudio +```python tab="Python (with scoped resources)" +import lmstudio - def print_progress_update(update: lmstudio.DownloadProgressUpdate) -> None: - print(f"Downloaded {update.downloaded_bytes} bytes of {update.total_bytes} total \ - at {update.speed_bytes_per_second} bytes/sec") +def print_progress_update(update: lmstudio.DownloadProgressUpdate) -> None: + print(f"Downloaded {update.downloaded_bytes} bytes of {update.total_bytes} total \ + at {update.speed_bytes_per_second} bytes/sec") - with lmstudio.Client() as client: - # ... Same code as before ... +with lmstudio.Client() as client: + # ... Same code as before ... - model_key = desired_model.download( - on_progress=print_progress_update, - on_finalize: lambda: print("Finalizing download...") - ) + model_key = desired_model.download( + on_progress=print_progress_update, + on_finalize: lambda: print("Finalizing download...") + ) +``` - TypeScript: - language: typescript - code: | - import { LMStudioClient, type DownloadProgressUpdate } from "@lmstudio/sdk"; +```typescript tab="TypeScript" +import { LMStudioClient, type DownloadProgressUpdate } from "@lmstudio/sdk"; - function printProgressUpdate(update: DownloadProgressUpdate) { - process.stdout.write(`Downloaded ${update.downloadedBytes} bytes of ${update.totalBytes} total \ - at ${update.speed_bytes_per_second} bytes/sec`); - } +function printProgressUpdate(update: DownloadProgressUpdate) { + process.stdout.write(`Downloaded ${update.downloadedBytes} bytes of ${update.totalBytes} total \ + at ${update.speed_bytes_per_second} bytes/sec`); +} - const client = new LMStudioClient(); +const client = new LMStudioClient(); - // ... Same code as before ... +// ... Same code as before ... - modelKey = await desiredModel.download({ - onProgress: printProgressUpdate, - onStartFinalizing: () => console.log("Finalizing..."), - }); +modelKey = await desiredModel.download({ + onProgress: printProgressUpdate, + onStartFinalizing: () => console.log("Finalizing..."), +}); - const loadedModel = await client.llm.model(modelKey); +const loadedModel = await client.llm.model(modelKey); ``` diff --git a/2_typescript/6_manage-models/list-downloaded.md b/2_typescript/6_manage-models/list-downloaded.md index 07a88c7..48c789b 100644 --- a/2_typescript/6_manage-models/list-downloaded.md +++ b/2_typescript/6_manage-models/list-downloaded.md @@ -9,15 +9,11 @@ You can iterate through locally available models using the `listLocalModels` met `listLocalModels` lives under the `system` namespace of the `LMStudioClient` object. -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - const client = new LMStudioClient(); - - console.info(await client.system.listDownloadedModels()); +```typescript +import { LMStudioClient } from "@lmstudio/sdk"; +const client = new LMStudioClient(); + +console.info(await client.system.listDownloadedModels()); ``` This will give you results equivalent to using [`lms ls`](../../cli/ls) in the CLI. diff --git a/2_typescript/6_manage-models/list-loaded.md b/2_typescript/6_manage-models/list-loaded.md index a971436..d6df419 100644 --- a/2_typescript/6_manage-models/list-loaded.md +++ b/2_typescript/6_manage-models/list-loaded.md @@ -9,17 +9,13 @@ You can iterate through models loaded into memory using the `listLoaded` method. This will give you results equivalent to using [`lms ps`](../../cli/ps) in the CLI. -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; +```typescript +import { LMStudioClient } from "@lmstudio/sdk"; - const client = new LMStudioClient(); +const client = new LMStudioClient(); - const llmOnly = await client.llm.listLoaded(); - const embeddingOnly = await client.embedding.listLoaded(); +const llmOnly = await client.llm.listLoaded(); +const embeddingOnly = await client.embedding.listLoaded(); ``` diff --git a/2_typescript/6_manage-models/loading.md b/2_typescript/6_manage-models/loading.md index 0ea3aa8..80cb392 100644 --- a/2_typescript/6_manage-models/loading.md +++ b/2_typescript/6_manage-models/loading.md @@ -20,15 +20,11 @@ AI models are huge. It can take a while to load them into memory. LM Studio's SD If you already have a model loaded in LM Studio (either via the GUI or `lms load`), you can use it by calling `.model()` without any arguments. -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - const client = new LMStudioClient(); - - const model = await client.llm.model(); +```typescript +import { LMStudioClient } from "@lmstudio/sdk"; +const client = new LMStudioClient(); + +const model = await client.llm.model(); ``` ## Get a Specific Model with `.model("model-key")` @@ -39,15 +35,11 @@ If you want to use a specific model, you can provide the model key as an argumen Calling `.model("model-key")` will load the model if it's not already loaded, or return the existing instance if it is. -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - const client = new LMStudioClient(); +```typescript +import { LMStudioClient } from "@lmstudio/sdk"; +const client = new LMStudioClient(); - const model = await client.llm.model("qwen/qwen3-4b-2507"); +const model = await client.llm.model("qwen/qwen3-4b-2507"); ``` @@ -56,18 +48,14 @@ Calling `.model("model-key")` will load the model if it's not already loaded, or Use `load()` to load a new instance of a model, even if one already exists. This allows you to have multiple instances of the same or different models loaded at the same time. -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - const client = new LMStudioClient(); - - const llama = await client.llm.load("qwen/qwen3-4b-2507"); - const another_llama = await client.llm.load("qwen/qwen3-4b-2507", { - identifier: "second-llama" - }); +```typescript +import { LMStudioClient } from "@lmstudio/sdk"; +const client = new LMStudioClient(); + +const llama = await client.llm.load("qwen/qwen3-4b-2507"); +const another_llama = await client.llm.load("qwen/qwen3-4b-2507", { + identifier: "second-llama" +}); ``` @@ -82,17 +70,13 @@ the server will generate one for you. You can always check in the server tab in Once you no longer need a model, you can unload it by simply calling `unload()` on its handle. -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; +```typescript +import { LMStudioClient } from "@lmstudio/sdk"; - const client = new LMStudioClient(); +const client = new LMStudioClient(); - const model = await client.llm.model(); - await model.unload(); +const model = await client.llm.model(); +await model.unload(); ``` ## Set Custom Load Config Parameters @@ -106,28 +90,24 @@ See [load-time configuration](../llm-prediction/parameters) for more. You can specify a _time to live_ for a model you load, which is the idle time (in seconds) after the last request until the model unloads. See [Idle TTL](/docs/api/ttl-and-auto-evict) for more on this. -```lms_code_snippet - variants: - "Using .load": - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - - const client = new LMStudioClient(); - - const model = await client.llm.load("qwen/qwen3-4b-2507", { - ttl: 300, // 300 seconds - }); - "Using .model": - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - - const client = new LMStudioClient(); - - const model = await client.llm.model("qwen/qwen3-4b-2507", { - // Note: specifying ttl in `.model` will only set the TTL for the model if the model is - // loaded from this call. If the model was already loaded, the TTL will not be updated. - ttl: 300, // 300 seconds - }); +```typescript tab="Using .load" +import { LMStudioClient } from "@lmstudio/sdk"; + +const client = new LMStudioClient(); + +const model = await client.llm.load("qwen/qwen3-4b-2507", { + ttl: 300, // 300 seconds +}); +``` + +```typescript tab="Using .model" +import { LMStudioClient } from "@lmstudio/sdk"; + +const client = new LMStudioClient(); + +const model = await client.llm.model("qwen/qwen3-4b-2507", { + // Note: specifying ttl in `.model` will only set the TTL for the model if the model is + // loaded from this call. If the model was already loaded, the TTL will not be updated. + ttl: 300, // 300 seconds +}); ``` diff --git a/2_typescript/6_manage-models/meta.json b/2_typescript/6_manage-models/meta.json new file mode 100644 index 0000000..9f1b188 --- /dev/null +++ b/2_typescript/6_manage-models/meta.json @@ -0,0 +1,9 @@ +{ + "title": "Manage Models", + "pages": [ + "_download-models", + "list-downloaded", + "list-loaded", + "loading" + ] +} diff --git a/2_typescript/7_api-reference/meta.json b/2_typescript/7_api-reference/meta.json new file mode 100644 index 0000000..361a193 --- /dev/null +++ b/2_typescript/7_api-reference/meta.json @@ -0,0 +1,18 @@ +{ + "title": "API Reference", + "pages": [ + "_act", + "_chat", + "_complete", + "_count-tokens", + "_embed", + "llm-load-model-config", + "_llm-namespace", + "llm-prediction-config-input", + "_lmstudioclient", + "_model", + "_respond", + "_system-namespace", + "_tokenize" + ] +} diff --git a/2_typescript/8_model-info/_get-load-config.md b/2_typescript/8_model-info/_get-load-config.md index c7b423d..ecc1bab 100644 --- a/2_typescript/8_model-info/_get-load-config.md +++ b/2_typescript/8_model-info/_get-load-config.md @@ -8,19 +8,15 @@ LM Studio allows you to configure certain parameters when loading a model You can retrieve the config with which a given model was loaded using the SDK. In the below examples, `llm` can be replaced with an embedding model `emb`. -```lms_protip +:::tip[Pro Tip] Context length is a special case that [has its own method](/docs/api/sdk/get-context-length). -``` +::: -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; +```typescript +import { LMStudioClient } from "@lmstudio/sdk"; - const client = new LMStudioClient(); - const model = await client.llm.model(); +const client = new LMStudioClient(); +const model = await client.llm.model(); - loadConfig = await model.getLoadConfig() +loadConfig = await model.getLoadConfig() ``` diff --git a/2_typescript/8_model-info/get-context-length.md b/2_typescript/8_model-info/get-context-length.md index 1a57013..00bbf04 100644 --- a/2_typescript/8_model-info/get-context-length.md +++ b/2_typescript/8_model-info/get-context-length.md @@ -9,13 +9,8 @@ LLMs and embedding models, due to their fundamental architecture, have a propert It's useful to be able to check the context length of a model, especially as an extra check before providing potentially long input to the model. -```lms_code_snippet - title: "index.ts" - variants: - TypeScript: - language: typescript - code: | - const contextLength = await model.getContextLength(); +```typescript title="index.ts" +const contextLength = await model.getContextLength(); ``` The `model` in the above code snippet is an instance of a loaded model you get from the `llm.model` method. See [Manage Models in Memory](../manage-models/loading) for more information. @@ -28,31 +23,27 @@ You can determine if a given conversation fits into a model's context by doing t 2. Count the number of tokens in the string. 3. Compare the token count to the model's context length. -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { Chat, type LLM, LMStudioClient } from "@lmstudio/sdk"; - - async function doesChatFitInContext(model: LLM, chat: Chat) { - // Convert the conversation to a string using the prompt template. - const formatted = await model.applyPromptTemplate(chat); - // Count the number of tokens in the string. - const tokenCount = await model.countTokens(formatted); - // Get the current loaded context length of the model - const contextLength = await model.getContextLength(); - return tokenCount < contextLength; - } - - const client = new LMStudioClient(); - const model = await client.llm.model(); - - const chat = Chat.from([ - { role: "user", content: "What is the meaning of life." }, - { role: "assistant", content: "The meaning of life is..." }, - // ... More messages - ]); - - console.info("Fits in context:", await doesChatFitInContext(model, chat)); +```typescript +import { Chat, type LLM, LMStudioClient } from "@lmstudio/sdk"; + +async function doesChatFitInContext(model: LLM, chat: Chat) { + // Convert the conversation to a string using the prompt template. + const formatted = await model.applyPromptTemplate(chat); + // Count the number of tokens in the string. + const tokenCount = await model.countTokens(formatted); + // Get the current loaded context length of the model + const contextLength = await model.getContextLength(); + return tokenCount < contextLength; +} + +const client = new LMStudioClient(); +const model = await client.llm.model(); + +const chat = Chat.from([ + { role: "user", content: "What is the meaning of life." }, + { role: "assistant", content: "The meaning of life is..." }, + // ... More messages +]); + +console.info("Fits in context:", await doesChatFitInContext(model, chat)); ``` diff --git a/2_typescript/8_model-info/get-model-info.md b/2_typescript/8_model-info/get-model-info.md index 9e9545a..eda0945 100644 --- a/2_typescript/8_model-info/get-model-info.md +++ b/2_typescript/8_model-info/get-model-info.md @@ -5,33 +5,29 @@ description: Get information about the model You can access information about a loaded model using the `getInfo` method. -```lms_code_snippet - variants: - LLM: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - - const client = new LMStudioClient(); - const model = await client.llm.model(); - - const modelInfo = await model.getInfo(); - - console.info("Model Key", modelInfo.modelKey); - console.info("Current Context Length", model.contextLength); - console.info("Model Trained for Tool Use", modelInfo.trainedForToolUse); - // etc. - Embedding Model: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - - const client = new LMStudioClient(); - const model = await client.embedding.model(); - - const modelInfo = await model.getInfo(); - - console.info("Model Key", modelInfo.modelKey); - console.info("Current Context Length", modelInfo.contextLength); - // etc. +```typescript tab="LLM" +import { LMStudioClient } from "@lmstudio/sdk"; + +const client = new LMStudioClient(); +const model = await client.llm.model(); + +const modelInfo = await model.getInfo(); + +console.info("Model Key", modelInfo.modelKey); +console.info("Current Context Length", model.contextLength); +console.info("Model Trained for Tool Use", modelInfo.trainedForToolUse); +// etc. +``` + +```typescript tab="Embedding Model" +import { LMStudioClient } from "@lmstudio/sdk"; + +const client = new LMStudioClient(); +const model = await client.embedding.model(); + +const modelInfo = await model.getInfo(); + +console.info("Model Key", modelInfo.modelKey); +console.info("Current Context Length", modelInfo.contextLength); +// etc. ``` diff --git a/2_typescript/8_model-info/meta.json b/2_typescript/8_model-info/meta.json new file mode 100644 index 0000000..b5ea714 --- /dev/null +++ b/2_typescript/8_model-info/meta.json @@ -0,0 +1,8 @@ +{ + "title": "Model Info", + "pages": [ + "get-context-length", + "_get-load-config", + "get-model-info" + ] +} diff --git a/2_typescript/_more/_apply-prompt-template.md b/2_typescript/_more/_apply-prompt-template.md index 798eed6..62a2d88 100644 --- a/2_typescript/_more/_apply-prompt-template.md +++ b/2_typescript/_more/_apply-prompt-template.md @@ -7,48 +7,40 @@ description: Apply a model's prompt template to a conversation LLMs (Large Language Models) operate on a text-in, text-out basis. Before processing conversations through these models, the input must be converted into a properly formatted string using a prompt template. If you need to inspect or work with this formatted string directly, the LM Studio SDK provides a streamlined way to apply a model's prompt template to your conversations. -```lms_info +:::info[Info] You do not need to use this method when using `.respond`. It will automatically apply the prompt template for you. -``` +::: ## Usage with a Chat You can apply a prompt template to a `Chat` by using the `applyPromptTemplate` method. This method takes a `Chat` object as input and returns a formatted string. -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { Chat, LMStudioClient } from "@lmstudio/sdk"; - - const client = new LMStudioClient(); - const llm = await client.llm.model(); // Use any loaded LLM - - const chat = Chat.createEmpty(); - chat.append("system", "You are a helpful assistant."); - chat.append("user", "What is LM Studio?"); - const formatted = await llm.applyPromptTemplate(chat); - console.info(formatted); +```typescript +import { Chat, LMStudioClient } from "@lmstudio/sdk"; + +const client = new LMStudioClient(); +const llm = await client.llm.model(); // Use any loaded LLM + +const chat = Chat.createEmpty(); +chat.append("system", "You are a helpful assistant."); +chat.append("user", "What is LM Studio?"); +const formatted = await llm.applyPromptTemplate(chat); +console.info(formatted); ``` ## Usage with an Array of Messages The same method can also be used with any object that can be converted to a `Chat`, for example, an array of messages. -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - - const client = new LMStudioClient(); - const llm = await client.llm.model(); // Use any loaded LLM - - const formatted = await llm.applyPromptTemplate([ - { role: "system", content: "You are a helpful assistant." }, - { role: "user", content: "What is LM Studio?" }, - ]); - console.info(formatted); +```typescript +import { LMStudioClient } from "@lmstudio/sdk"; + +const client = new LMStudioClient(); +const llm = await client.llm.model(); // Use any loaded LLM + +const formatted = await llm.applyPromptTemplate([ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: "What is LM Studio?" }, +]); +console.info(formatted); ``` diff --git a/2_typescript/_more/meta.json b/2_typescript/_more/meta.json new file mode 100644 index 0000000..48efd12 --- /dev/null +++ b/2_typescript/_more/meta.json @@ -0,0 +1,6 @@ +{ + "title": "More", + "pages": [ + "_apply-prompt-template" + ] +} diff --git a/2_typescript/authentication.md b/2_typescript/authentication.md index 56894d5..95d9225 100644 --- a/2_typescript/authentication.md +++ b/2_typescript/authentication.md @@ -11,9 +11,9 @@ LM Studio supports API Tokens for authentication, providing a secure and conveni By default, the LM Studio API runs **without enforcing authentication**. For production or shared environments, enable API Token authentication for secure access. -```lms_info +:::info[Info] To enable API Token authentication, create tokens and control granular permissions, check [this guide](/docs/developer/core/authentication) for more details. -``` +::: ## Providing the API Token @@ -22,32 +22,28 @@ There are two ways to provide the API Token when creating an instance of `LMStud 1. **Environment Variable (Recommended)**: Set the `LM_API_TOKEN` environment variable, and the SDK will automatically read it. 2. **Function Argument**: Pass the token directly as the `apiToken` parameter in the constructor. -```lms_code_snippet - variants: - Environment Variable: - language: typescript - code: | - // Set environment variables in your terminal before running the code: - // export LM_API_TOKEN="your-token-here" - - import { LMStudioClient } from "@lmstudio/sdk"; - // The SDK automatically reads from LM_API_TOKEN environment variable - const client = new LMStudioClient(); - - const model = await client.llm.model("qwen/qwen3-4b-2507"); - const result = await model.respond("What is the meaning of life?"); - - console.info(result.content); - Function Argument: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - const client = new LMStudioClient({ - apiToken: "your-token-here", - }); - - const model = await client.llm.model("qwen/qwen3-4b-2507"); - const result = await model.respond("What is the meaning of life?"); - - console.info(result.content); +```typescript tab="Environment Variable" +// Set environment variables in your terminal before running the code: +// export LM_API_TOKEN="your-token-here" + +import { LMStudioClient } from "@lmstudio/sdk"; +// The SDK automatically reads from LM_API_TOKEN environment variable +const client = new LMStudioClient(); + +const model = await client.llm.model("qwen/qwen3-4b-2507"); +const result = await model.respond("What is the meaning of life?"); + +console.info(result.content); +``` + +```typescript tab="Function Argument" +import { LMStudioClient } from "@lmstudio/sdk"; +const client = new LMStudioClient({ + apiToken: "your-token-here", +}); + +const model = await client.llm.model("qwen/qwen3-4b-2507"); +const result = await model.respond("What is the meaning of life?"); + +console.info(result.content); ``` diff --git a/2_typescript/index.md b/2_typescript/index.md index 2208c8e..0a54026 100644 --- a/2_typescript/index.md +++ b/2_typescript/index.md @@ -10,20 +10,16 @@ The SDK provides you a set of programmatic tools to interact with LLMs, embeddin `lmstudio-js` is available as an npm package. You can install it using npm, yarn, or pnpm. -```lms_code_snippet - variants: - npm: - language: bash - code: | - npm install @lmstudio/sdk --save - yarn: - language: bash - code: | - yarn add @lmstudio/sdk - pnpm: - language: bash - code: | - pnpm add @lmstudio/sdk +```bash tab="npm" +npm install @lmstudio/sdk --save +``` + +```bash tab="yarn" +yarn add @lmstudio/sdk +``` + +```bash tab="pnpm" +pnpm add @lmstudio/sdk ``` For the source code and open source contribution, visit [lmstudio-js](https://github.com/lmstudio-ai/lmstudio-js) on GitHub. @@ -38,19 +34,14 @@ For the source code and open source contribution, visit [lmstudio-js](https://gi ## Quick Example: Chat with a Llama Model -```lms_code_snippet - title: "index.ts" - variants: - TypeScript: - language: typescript - code: | - import { LMStudioClient } from "@lmstudio/sdk"; - const client = new LMStudioClient(); +```typescript title="index.ts" +import { LMStudioClient } from "@lmstudio/sdk"; +const client = new LMStudioClient(); - const model = await client.llm.model("qwen/qwen3-4b-2507"); - const result = await model.respond("What is the meaning of life?"); +const model = await client.llm.model("qwen/qwen3-4b-2507"); +const result = await model.respond("What is the meaning of life?"); - console.info(result.content); +console.info(result.content); ``` ### Getting Local Models diff --git a/2_typescript/meta.json b/2_typescript/meta.json new file mode 100644 index 0000000..71fc121 --- /dev/null +++ b/2_typescript/meta.json @@ -0,0 +1,16 @@ +{ + "title": "TypeScript SDK", + "pages": [ + "2_llm-prediction", + "3_agent", + "3_plugins", + "4_embedding", + "5_tokenization", + "6_manage-models", + "7_api-reference", + "8_model-info", + "authentication", + "_more", + "project-setup" + ] +} diff --git a/2_typescript/project-setup.md b/2_typescript/project-setup.md index 35a4a0e..7eff67c 100644 --- a/2_typescript/project-setup.md +++ b/2_typescript/project-setup.md @@ -11,34 +11,26 @@ index: 2 Use the following command to start an interactive project setup: -```lms_code_snippet - variants: - TypeScript (Recommended): - language: bash - code: | - lms create node-typescript - Javascript: - language: bash - code: | - lms create node-javascript +```bash tab="TypeScript (Recommended)" +lms create node-typescript +``` + +```bash tab="Javascript" +lms create node-javascript ``` ## Add `lmstudio-js` to an Exiting Project If you have already created a project and would like to use `lmstudio-js` in it, you can install it using npm, yarn, or pnpm. -```lms_code_snippet - variants: - npm: - language: bash - code: | - npm install @lmstudio/sdk --save - yarn: - language: bash - code: | - yarn add @lmstudio/sdk - pnpm: - language: bash - code: | - pnpm add @lmstudio/sdk +```bash tab="npm" +npm install @lmstudio/sdk --save +``` + +```bash tab="yarn" +yarn add @lmstudio/sdk +``` + +```bash tab="pnpm" +pnpm add @lmstudio/sdk ``` diff --git a/3_cli/0_local-models/meta.json b/3_cli/0_local-models/meta.json new file mode 100644 index 0000000..1a9abfa --- /dev/null +++ b/3_cli/0_local-models/meta.json @@ -0,0 +1,11 @@ +{ + "title": "Local Models", + "pages": [ + "chat", + "get", + "import", + "load", + "ls", + "ps" + ] +} diff --git a/3_cli/1_serve/meta.json b/3_cli/1_serve/meta.json new file mode 100644 index 0000000..b48f9f9 --- /dev/null +++ b/3_cli/1_serve/meta.json @@ -0,0 +1,9 @@ +{ + "title": "Serve", + "pages": [ + "log-stream", + "server-start", + "server-status", + "server-stop" + ] +} diff --git a/3_cli/2_daemon/daemon-down.md b/3_cli/2_daemon/daemon-down.md index 6725958..572cc76 100644 --- a/3_cli/2_daemon/daemon-down.md +++ b/3_cli/2_daemon/daemon-down.md @@ -11,9 +11,9 @@ The `lms daemon down` command stops the running llmster. lms daemon down ``` -```lms_info +:::info[Info] `lms daemon down` only works if llmster is running. It will not stop LM Studio if it is running as a GUI app. -``` +::: ### Learn more diff --git a/3_cli/2_daemon/meta.json b/3_cli/2_daemon/meta.json new file mode 100644 index 0000000..c079bed --- /dev/null +++ b/3_cli/2_daemon/meta.json @@ -0,0 +1,9 @@ +{ + "title": "Daemon", + "pages": [ + "daemon-down", + "daemon-status", + "daemon-up", + "daemon-update" + ] +} diff --git a/3_cli/3_link/link-enable.md b/3_cli/3_link/link-enable.md index 413bcae..9ab2878 100644 --- a/3_cli/3_link/link-enable.md +++ b/3_cli/3_link/link-enable.md @@ -7,9 +7,9 @@ index: 1 The `lms link enable` command enables LM Link on this device, allowing it to connect with other devices on the same link. -```lms_info +:::info[Info] LM Link requires an LM Studio account. Run `lms login` first if you haven't already. -``` +::: ## Enable LM Link diff --git a/3_cli/3_link/meta.json b/3_cli/3_link/meta.json new file mode 100644 index 0000000..78b28a3 --- /dev/null +++ b/3_cli/3_link/meta.json @@ -0,0 +1,10 @@ +{ + "title": "Link", + "pages": [ + "link-disable", + "link-enable", + "link-set-device-name", + "link-set-preferred-device", + "link-status" + ] +} diff --git a/3_cli/4_runtime/meta.json b/3_cli/4_runtime/meta.json new file mode 100644 index 0000000..df9dcfe --- /dev/null +++ b/3_cli/4_runtime/meta.json @@ -0,0 +1,6 @@ +{ + "title": "Runtime", + "pages": [ + "runtime" + ] +} diff --git a/3_cli/5_develop-and-publish/meta.json b/3_cli/5_develop-and-publish/meta.json new file mode 100644 index 0000000..fdada2a --- /dev/null +++ b/3_cli/5_develop-and-publish/meta.json @@ -0,0 +1,9 @@ +{ + "title": "Develop & Publish", + "pages": [ + "clone", + "dev", + "login", + "push" + ] +} diff --git a/3_cli/index.md b/3_cli/index.md index 48e4efa..6d4239c 100644 --- a/3_cli/index.md +++ b/3_cli/index.md @@ -34,13 +34,13 @@ lms --help ### Verify the installation -```lms_info +:::info[Info] 👉 You need to run LM Studio _at least once_ before you can use `lms`. -``` +::: Open a terminal window and run `lms`. -```lms_terminal +```bash title="Terminal" $ lms lms is LM Studio's CLI utility for your models, server, and inference runtime. (v0.0.47) diff --git a/3_cli/meta.json b/3_cli/meta.json new file mode 100644 index 0000000..7b5b402 --- /dev/null +++ b/3_cli/meta.json @@ -0,0 +1,13 @@ +{ + "title": "CLI", + "pages": [ + "0_local-models", + "1_serve", + "2_daemon", + "3_link", + "4_runtime", + "5_develop-and-publish", + "contributing", + "_lms-load" + ] +} diff --git a/4_integrations/1_mcp-remote/meta.json b/4_integrations/1_mcp-remote/meta.json new file mode 100644 index 0000000..8df5ad6 --- /dev/null +++ b/4_integrations/1_mcp-remote/meta.json @@ -0,0 +1,6 @@ +{ + "title": "MCP Integrations", + "pages": [ + "popular" + ] +} diff --git a/4_integrations/claude-code.md b/4_integrations/claude-code.md index 31dd739..5b1fd12 100644 --- a/4_integrations/claude-code.md +++ b/4_integrations/claude-code.md @@ -9,9 +9,9 @@ See: [Anthropic-compatible Messages endpoint](/docs/developer/anthropic-compat/m -```lms_protip +:::tip[Pro Tip] Have a powerful LLM rig? Use [LM Link](/docs/integrations/lmlink) to run Claude Code from your laptop while the model runs on your rig. -``` +::: ### 1) Start LM Studio's local server @@ -42,9 +42,9 @@ Notes: claude --model openai/gpt-oss-20b ``` -```lms_protip +:::tip[Pro Tip] Use a model (and server/model settings) with more than ~25k context length. Tools like Claude Code can consume a lot of context. -``` +::: ### 4) If Require Authentication is enabled, use your LM Studio API token diff --git a/4_integrations/codex.md b/4_integrations/codex.md index 93bb295..49de710 100644 --- a/4_integrations/codex.md +++ b/4_integrations/codex.md @@ -9,9 +9,9 @@ See: [OpenAI-compatible Responses endpoint](/docs/developer/openai-compat/respon -```lms_protip +:::tip[Pro Tip] Have a powerful LLM rig? Use [LM Link](/docs/integrations/lmlink) to run Codex from your laptop while the model runs on your rig. -``` +::: ### 1) Start LM Studio's local server @@ -35,9 +35,9 @@ codex --oss By default, Codex will download and use [openai/gpt-oss-20b](https://lmstudio.ai/models/openai/gpt-oss-20b). -```lms_protip +:::tip[Pro Tip] Use a model (and server/model settings) with more than ~25k context length. Tools like Codex can consume a lot of context. -``` +::: You can also use any other model you have available in LM Studio. For example: diff --git a/4_integrations/meta.json b/4_integrations/meta.json new file mode 100644 index 0000000..9119cd5 --- /dev/null +++ b/4_integrations/meta.json @@ -0,0 +1,10 @@ +{ + "title": "Integrations", + "pages": [ + "1_mcp-remote", + "claude-code", + "codex", + "lmlink", + "openclaw" + ] +} diff --git a/4_integrations/openclaw.md b/4_integrations/openclaw.md index 0e86183..57a6475 100644 --- a/4_integrations/openclaw.md +++ b/4_integrations/openclaw.md @@ -9,9 +9,9 @@ See: [OpenClaw Docs](https://docs.openclaw.ai/providers/lmstudio). -```lms_protip +:::tip[Pro Tip] Have a powerful LLM rig? Use [LM Link](/docs/integrations/lmlink) to run OpenClaw from your laptop while the model runs on your rig. -``` +::: ### 1) Start LM Studio's local server @@ -45,9 +45,9 @@ openclaw onboard \ --custom-model-id qwen/qwen3.5-9b ``` -```lms_protip +:::tip[Pro Tip] Use a model (and server/model settings) with more than ~50k context length. Tools like OpenClaw can consume a lot of context. -``` +::: ### 3) Set up LM Studio as default memory search provider diff --git a/5_lmlink/1_basics/meta.json b/5_lmlink/1_basics/meta.json new file mode 100644 index 0000000..c5c7dd6 --- /dev/null +++ b/5_lmlink/1_basics/meta.json @@ -0,0 +1,8 @@ +{ + "title": "Getting Started", + "pages": [ + "add-device", + "faq", + "preferred-device" + ] +} diff --git a/5_lmlink/meta.json b/5_lmlink/meta.json new file mode 100644 index 0000000..831a5ae --- /dev/null +++ b/5_lmlink/meta.json @@ -0,0 +1,6 @@ +{ + "title": "LM Link", + "pages": [ + "1_basics" + ] +} diff --git a/README.md b/README.md index 044c253..0b1977d 100644 --- a/README.md +++ b/README.md @@ -46,38 +46,28 @@ Configurations that look good: 2. no title + 2+ variants ```` -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - // Multi-line TypeScript code - function hello() { - console.log("hey") - return "world" - } - - Python: - language: python - code: | - # Multi-line Python code - def hello(): - print("hey") - return "world" +```typescript tab="TypeScript" +// Multi-line TypeScript code +function hello() { + console.log("hey") + return "world" +} +``` + +```python tab="Python" +# Multi-line Python code +def hello(): + print("hey") + return "world" ``` ```` ```` -```lms_code_snippet - title: "generator.py" - variants: - Python: - language: python - code: | - # Multi-line Python code - def hello(): - print("hey") - return "world" +```python title="generator.py" +# Multi-line Python code +def hello(): + print("hey") + return "world" ``` ```` diff --git a/_template_dont_edit.md b/_template_dont_edit.md index acdb566..aa9f3e6 100644 --- a/_template_dont_edit.md +++ b/_template_dont_edit.md @@ -12,36 +12,26 @@ Configurations that look good: 1. title + 1 variant 2. no title + 2+ variants -```lms_code_snippet - variants: - TypeScript: - language: typescript - code: | - // Multi-line TypeScript code - function hello() { - console.log("hey") - return "world" - } - - Python: - language: python - code: | - # Multi-line Python code - def hello(): - print("hey") - return "world" +```typescript tab="TypeScript" +// Multi-line TypeScript code +function hello() { + console.log("hey") + return "world" +} +``` + +```python tab="Python" +# Multi-line Python code +def hello(): + print("hey") + return "world" ``` -```lms_code_snippet - title: "generator.py" - variants: - Python: - language: python - code: | - # Multi-line Python code - def hello(): - print("hey") - return "world" +```python title="generator.py" +# Multi-line Python code +def hello(): + print("hey") + return "world" ```

@@ -82,17 +72,17 @@ async function main() { main(); ``` -```lms_notice +:::note You can jump to Settings from anywhere in the app by pressing `cmd` + `,` on macOS or `ctrl` + `,` on Windows/Linux. -``` +::: -```lms_protip +:::tip[Pro Tip] You can jump to Settings from anywhere in the app by pressing `cmd` + `,` on macOS or `ctrl` + `,` on Windows/Linux. -``` +::: -```lms_warning +:::warning[Heads Up] You can jump to Settings from anywhere in the app by pressing `cmd` + `,` on macOS or `ctrl` + `,` on Windows/Linux. -``` +::: ### Params diff --git a/meta.json b/meta.json new file mode 100644 index 0000000..c23b45b --- /dev/null +++ b/meta.json @@ -0,0 +1,12 @@ +{ + "title": "Docs", + "pages": [ + "0_app", + "1_developer", + "1_python", + "2_typescript", + "3_cli", + "4_integrations", + "5_lmlink" + ] +} From f2c20daca6b4d8748898fd311eab028b7ce281a2 Mon Sep 17 00:00:00 2001 From: Yagil Burowski Date: Tue, 14 Apr 2026 00:26:19 -0400 Subject: [PATCH 2/8] Organize Fumadocs section metadata --- 0_app/meta.json | 21 ++++++++++++++------- 1_developer/meta.json | 16 +++++++++++----- 1_python/meta.json | 29 ++++++++++++++++++++--------- 2_typescript/meta.json | 31 +++++++++++++++++++++---------- 3_cli/meta.json | 22 +++++++++++++++------- 4_integrations/meta.json | 9 ++++++--- 5_lmlink/meta.json | 5 ++++- 7 files changed, 91 insertions(+), 42 deletions(-) diff --git a/0_app/meta.json b/0_app/meta.json index d41e39a..086fdde 100644 --- a/0_app/meta.json +++ b/0_app/meta.json @@ -1,12 +1,19 @@ { "title": "App", "pages": [ - "0_root", - "1_basics", - "2_mcp", - "3_modelyaml", - "3_presets", - "5_advanced", - "6_user-interface" + "---Introduction---", + "...0_root", + "---Getting Started---", + "...1_basics", + "---MCP---", + "...2_mcp", + "---model.yaml---", + "...3_modelyaml", + "---Presets---", + "...3_presets", + "---Advanced---", + "...5_advanced", + "---User Interface---", + "...6_user-interface" ] } diff --git a/1_developer/meta.json b/1_developer/meta.json index 1561597..f81a22e 100644 --- a/1_developer/meta.json +++ b/1_developer/meta.json @@ -1,11 +1,17 @@ { "title": "Developer", "pages": [ - "0_core", - "2_rest", - "3_openai-compat", - "4_anthropic-compat", + "---Introduction---", + "index", "api-changelog", - "_embeddings" + "_embeddings", + "---Core---", + "...0_core", + "---REST API---", + "...2_rest", + "---OpenAI Compatibility---", + "...3_openai-compat", + "---Anthropic Compatibility---", + "...4_anthropic-compat" ] } diff --git a/1_python/meta.json b/1_python/meta.json index 9abd664..9e939d0 100644 --- a/1_python/meta.json +++ b/1_python/meta.json @@ -1,14 +1,25 @@ { "title": "Python SDK", "pages": [ - "1_getting-started", - "1_llm-prediction", - "2_agent", - "3_embedding", - "4_tokenization", - "5_manage-models", - "6_model-info", - "_7_api-reference", - "_more" + "---Introduction---", + "index", + "---Getting Started---", + "...1_getting-started", + "---Basics---", + "...1_llm-prediction", + "---Agentic Flows---", + "...2_agent", + "---Text Embedding---", + "...3_embedding", + "---Tokenization---", + "...4_tokenization", + "---Manage Models---", + "...5_manage-models", + "---Model Info---", + "...6_model-info", + "---API Reference---", + "..._7_api-reference", + "---More---", + "..._more" ] } diff --git a/2_typescript/meta.json b/2_typescript/meta.json index 71fc121..356449f 100644 --- a/2_typescript/meta.json +++ b/2_typescript/meta.json @@ -1,16 +1,27 @@ { "title": "TypeScript SDK", "pages": [ - "2_llm-prediction", - "3_agent", - "3_plugins", - "4_embedding", - "5_tokenization", - "6_manage-models", - "7_api-reference", - "8_model-info", + "---Introduction---", + "index", "authentication", - "_more", - "project-setup" + "project-setup", + "---Basics---", + "...2_llm-prediction", + "---Agentic Flows---", + "...3_agent", + "---Plugins---", + "...3_plugins", + "---Text Embedding---", + "...4_embedding", + "---Tokenization---", + "...5_tokenization", + "---Manage Models---", + "...6_manage-models", + "---API Reference---", + "...7_api-reference", + "---Model Info---", + "...8_model-info", + "---More---", + "..._more" ] } diff --git a/3_cli/meta.json b/3_cli/meta.json index 7b5b402..631b405 100644 --- a/3_cli/meta.json +++ b/3_cli/meta.json @@ -1,13 +1,21 @@ { "title": "CLI", "pages": [ - "0_local-models", - "1_serve", - "2_daemon", - "3_link", - "4_runtime", - "5_develop-and-publish", + "---Introduction---", + "index", "contributing", - "_lms-load" + "_lms-load", + "---Local Models---", + "...0_local-models", + "---Serve---", + "...1_serve", + "---Daemon---", + "...2_daemon", + "---Link---", + "...3_link", + "---Runtime---", + "...4_runtime", + "---Develop & Publish---", + "...5_develop-and-publish" ] } diff --git a/4_integrations/meta.json b/4_integrations/meta.json index 9119cd5..6af7f07 100644 --- a/4_integrations/meta.json +++ b/4_integrations/meta.json @@ -1,10 +1,13 @@ { "title": "Integrations", "pages": [ - "1_mcp-remote", + "---Introduction---", + "index", + "lmlink", "claude-code", "codex", - "lmlink", - "openclaw" + "openclaw", + "---MCP Integrations---", + "...1_mcp-remote" ] } diff --git a/5_lmlink/meta.json b/5_lmlink/meta.json index 831a5ae..fb44d63 100644 --- a/5_lmlink/meta.json +++ b/5_lmlink/meta.json @@ -1,6 +1,9 @@ { "title": "LM Link", "pages": [ - "1_basics" + "---Introduction---", + "index", + "---Getting Started---", + "...1_basics" ] } From 86cb996d916694c3b703093f7b80221119265bfe Mon Sep 17 00:00:00 2001 From: Yagil Burowski Date: Tue, 14 Apr 2026 01:38:33 -0400 Subject: [PATCH 3/8] Convert docs callout pages to MDX --- 0_app/0_root/{offline.md => offline.mdx} | 4 +- .../{_connect-apps.md => _connect-apps.mdx} | 8 +- 0_app/1_basics/{index.md => index.mdx} | 10 +-- 0_app/2_mcp/{index.md => index.mdx} | 10 +-- .../{per-model.md => per-model.mdx} | 18 ++--- .../{languages.md => languages.mdx} | 8 +- .../{authentication.md => authentication.mdx} | 20 ++--- ...adless_llmster.md => headless_llmster.mdx} | 4 +- 1_developer/0_core/{mcp.md => mcp.mdx} | 12 +-- .../2_rest/{endpoints.md => endpoints.mdx} | 8 +- .../{stateful-chats.md => stateful-chats.mdx} | 4 +- .../{completions.md => completions.mdx} | 4 +- .../{authentication.md => authentication.mdx} | 4 +- .../{loading.md => loading.mdx} | 16 +--- ...get-load-config.md => get-load-config.mdx} | 4 +- ...template.md => _apply-prompt-template.mdx} | 4 +- ...ed-response.md => structured-response.mdx} | 41 +--------- ...figuration.md => custom-configuration.mdx} | 4 +- ...figuration.md => custom-configuration.mdx} | 4 +- ...et-load-config.md => _get-load-config.mdx} | 4 +- ...template.md => _apply-prompt-template.mdx} | 4 +- .../{authentication.md => authentication.mdx} | 4 +- .../{daemon-down.md => daemon-down.mdx} | 4 +- .../{link-enable.md => link-enable.mdx} | 4 +- 3_cli/{index.md => index.mdx} | 4 +- 4_integrations/claude-code.md | 60 --------------- 4_integrations/claude-code.mdx | 74 +++++++++++++++++++ 4_integrations/codex.md | 48 ------------ 4_integrations/codex.mdx | 58 +++++++++++++++ 4_integrations/openclaw.md | 61 --------------- 4_integrations/openclaw.mdx | 73 ++++++++++++++++++ ...te_dont_edit.md => _template_dont_edit.mdx | 12 +-- 32 files changed, 292 insertions(+), 305 deletions(-) rename 0_app/0_root/{offline.md => offline.mdx} (98%) rename 0_app/1_basics/{_connect-apps.md => _connect-apps.mdx} (91%) rename 0_app/1_basics/{index.md => index.mdx} (90%) rename 0_app/2_mcp/{index.md => index.mdx} (82%) rename 0_app/5_advanced/{per-model.md => per-model.mdx} (68%) rename 0_app/6_user-interface/{languages.md => languages.mdx} (98%) rename 1_developer/0_core/{authentication.md => authentication.mdx} (72%) rename 1_developer/0_core/{headless_llmster.md => headless_llmster.mdx} (98%) rename 1_developer/0_core/{mcp.md => mcp.mdx} (98%) rename 1_developer/2_rest/{endpoints.md => endpoints.mdx} (98%) rename 1_developer/2_rest/{stateful-chats.md => stateful-chats.mdx} (98%) rename 1_developer/3_openai-compat/{completions.md => completions.mdx} (89%) rename 1_python/1_getting-started/{authentication.md => authentication.mdx} (97%) rename 1_python/5_manage-models/{loading.md => loading.mdx} (93%) rename 1_python/6_model-info/{get-load-config.md => get-load-config.mdx} (95%) rename 1_python/_more/{_apply-prompt-template.md => _apply-prompt-template.mdx} (97%) rename 2_typescript/2_llm-prediction/{structured-response.md => structured-response.mdx} (75%) rename 2_typescript/3_plugins/1_tools-provider/{custom-configuration.md => custom-configuration.mdx} (98%) rename 2_typescript/3_plugins/2_prompt-preprocessor/{custom-configuration.md => custom-configuration.mdx} (97%) rename 2_typescript/8_model-info/{_get-load-config.md => _get-load-config.mdx} (93%) rename 2_typescript/_more/{_apply-prompt-template.md => _apply-prompt-template.mdx} (97%) rename 2_typescript/{authentication.md => authentication.mdx} (97%) rename 3_cli/2_daemon/{daemon-down.md => daemon-down.mdx} (90%) rename 3_cli/3_link/{link-enable.md => link-enable.mdx} (94%) rename 3_cli/{index.md => index.mdx} (98%) delete mode 100644 4_integrations/claude-code.md create mode 100644 4_integrations/claude-code.mdx delete mode 100644 4_integrations/codex.md create mode 100644 4_integrations/codex.mdx delete mode 100644 4_integrations/openclaw.md create mode 100644 4_integrations/openclaw.mdx rename _template_dont_edit.md => _template_dont_edit.mdx (96%) diff --git a/0_app/0_root/offline.md b/0_app/0_root/offline.mdx similarity index 98% rename from 0_app/0_root/offline.md rename to 0_app/0_root/offline.mdx index af25c71..646de64 100644 --- a/0_app/0_root/offline.md +++ b/0_app/0_root/offline.mdx @@ -4,9 +4,9 @@ description: LM Studio can operate entirely offline, just make sure to get some index: 4 --- -:::note + In general, LM Studio does not require the internet in order to work. This includes core functions like chatting with models, chatting with documents, or running a local server, none of which require the internet. -::: + ### Operations that do NOT require connectivity diff --git a/0_app/1_basics/_connect-apps.md b/0_app/1_basics/_connect-apps.mdx similarity index 91% rename from 0_app/1_basics/_connect-apps.md rename to 0_app/1_basics/_connect-apps.mdx index 82a1ded..05bf4a5 100644 --- a/0_app/1_basics/_connect-apps.md +++ b/0_app/1_basics/_connect-apps.mdx @@ -5,7 +5,7 @@ description: Getting started with connecting applications to LM Studio LM Studio comes with a few built-in themes for app-wide color palettes. -
+
### Selecting a Theme @@ -13,13 +13,13 @@ You can choose a theme in the Settings tab. Choosing the "Auto" option will automatically switch between Light and Dark themes based on your system settings. -:::tip[Pro Tip] + You can jump to Settings from anywhere in the app by pressing `cmd` + `,` on macOS or `ctrl` + `,` on Windows/Linux. -::: + ###### To get to the Settings page, you need to be on [Power User mode](/docs/modes) or higher. -
+
### Community diff --git a/0_app/1_basics/index.md b/0_app/1_basics/index.mdx similarity index 90% rename from 0_app/1_basics/index.md rename to 0_app/1_basics/index.mdx index d47c0aa..2e77e4d 100644 --- a/0_app/1_basics/index.md +++ b/0_app/1_basics/index.mdx @@ -7,11 +7,11 @@ index: 1 Double check computer meets the minimum [system requirements](/docs/system-requirements). -:::info[Info] + You might sometimes see terms such as `open-source models` or `open-weights models`. Different models might be released under different licenses and varying degrees of 'openness'. In order to run a model locally, you need to be able to get access to its "weights", often distributed as one or more files that end with `.gguf`, `.safetensors` etc. -::: + -
+
## Getting up and running @@ -23,7 +23,7 @@ Once you're all set up, you need to **download your first LLM**. Head over to the Discover tab to download models. Pick one of the curated options or search for models by search query (e.g. `"Llama"`). See more in-depth information about downloading models [here](/docs/basics/download-models). - + ### 2. Load a model to memory @@ -45,7 +45,7 @@ Once the model is loaded, you can start a back-and-forth conversation with the m -
+
### Community diff --git a/0_app/2_mcp/index.md b/0_app/2_mcp/index.mdx similarity index 82% rename from 0_app/2_mcp/index.md rename to 0_app/2_mcp/index.mdx index d1d121d..b1ebbef 100644 --- a/0_app/2_mcp/index.md +++ b/0_app/2_mcp/index.mdx @@ -10,9 +10,9 @@ Starting LM Studio 0.3.17, LM Studio acts as an **Model Context Protocol (MCP) H Never install MCPs from untrusted sources. -:::warning[Heads Up] + Some MCP servers can run arbitrary code, access your local files, and use your network connection. Always be cautious when installing and using MCP servers. If you don't trust the source, don't install it. -::: + # Use MCP servers in LM Studio @@ -22,18 +22,18 @@ Starting 0.3.17 (b10), LM Studio supports both local and remote MCP servers. You Switch to the "Program" tab in the right hand sidebar. Click `Install > Edit mcp.json`. - + This will open the `mcp.json` file in the in-app editor. You can add MCP servers by editing this file. - + ### Example MCP to try: Hugging Face MCP Server This MCP server provides access to functions like model and dataset search.
- + Add MCP Server hf-mcp-server to LM Studio diff --git a/0_app/5_advanced/per-model.md b/0_app/5_advanced/per-model.mdx similarity index 68% rename from 0_app/5_advanced/per-model.md rename to 0_app/5_advanced/per-model.mdx index e2b0969..e542b6c 100644 --- a/0_app/5_advanced/per-model.md +++ b/0_app/5_advanced/per-model.mdx @@ -9,30 +9,30 @@ You can set default load settings for each model in LM Studio. When the model is loaded anywhere in the app (including through [`lms load`](/docs/cli#load-a-model-with-options)) these settings will be used. -
+
### Setting default parameters for a model Head to the My Models tab and click on the gear ⚙️ icon to edit the model's default parameters. - + This will open a dialog where you can set the default parameters for the model. -