diff --git a/README.md b/README.md index 7009aaa..70a2f81 100644 --- a/README.md +++ b/README.md @@ -263,9 +263,20 @@ claude mcp add cocoindex-code \ Any model supported by LiteLLM works — see the [full list of embedding providers](https://docs.litellm.ai/docs/embedding/supported_embedding). -### GPU-optimised local model +### Local SentenceTransformers models -If you have a GPU, [`nomic-ai/CodeRankEmbed`](https://huggingface.co/nomic-ai/CodeRankEmbed) delivers significantly better code retrieval than the default model. It is 137M parameters, requires ~1 GB VRAM, and has an 8192-token context window. +Use the `sbert/` prefix to load any [SentenceTransformers](https://www.sbert.net/) model locally (no API key required). + +**Example — general purpose text model:** +```bash +claude mcp add cocoindex-code \ + -e COCOINDEX_CODE_EMBEDDING_MODEL=sbert/nomic-ai/nomic-embed-text-v1 \ + -- cocoindex-code +``` + +**GPU-optimised code retrieval:** + +[`nomic-ai/CodeRankEmbed`](https://huggingface.co/nomic-ai/CodeRankEmbed) delivers significantly better code retrieval than the default model. It is 137M parameters, requires ~1 GB VRAM, and has an 8192-token context window. ```bash claude mcp add cocoindex-code \ diff --git a/src/cocoindex_code/config.py b/src/cocoindex_code/config.py index 6289364..fc1df87 100644 --- a/src/cocoindex_code/config.py +++ b/src/cocoindex_code/config.py @@ -76,7 +76,6 @@ class Config: embedding_model: str index_dir: Path device: str | None - trust_remote_code: bool extra_extensions: dict[str, str | None] excluded_patterns: list[str] @@ -103,16 +102,6 @@ def from_env(cls) -> Config: # Device: auto-detect CUDA or use env override device = os.environ.get("COCOINDEX_CODE_DEVICE") - # trust_remote_code: opt-in via env var only. - # sentence-transformers 5.x+ supports Jina models natively, so - # auto-enabling this for jinaai/ models causes failures with - # transformers 5.x (removed find_pruneable_heads_and_indices). - trust_remote_code = os.environ.get("COCOINDEX_CODE_TRUST_REMOTE_CODE", "").lower() in ( - "1", - "true", - "yes", - ) - # Extra file extensions (format: "inc:php,yaml,toml" — optional lang after colon) raw_extra = os.environ.get("COCOINDEX_CODE_EXTRA_EXTENSIONS", "") extra_extensions: dict[str, str | None] = {} @@ -134,7 +123,6 @@ def from_env(cls) -> Config: embedding_model=embedding_model, index_dir=index_dir, device=device, - trust_remote_code=trust_remote_code, extra_extensions=extra_extensions, excluded_patterns=excluded_patterns, ) diff --git a/src/cocoindex_code/shared.py b/src/cocoindex_code/shared.py index adade90..7b28d1b 100644 --- a/src/cocoindex_code/shared.py +++ b/src/cocoindex_code/shared.py @@ -31,19 +31,15 @@ # Models that define a "query" prompt for asymmetric retrieval. _QUERY_PROMPT_MODELS = {"nomic-ai/nomic-embed-code", "nomic-ai/CodeRankEmbed"} query_prompt_name: str | None = "query" if _model_name in _QUERY_PROMPT_MODELS else None - # Models whose custom remote code is known-compatible with transformers 5.x. - _KNOWN_REMOTE_CODE_MODELS = {"nomic-ai/CodeRankEmbed"} - _trust = config.trust_remote_code or _model_name in _KNOWN_REMOTE_CODE_MODELS embedder = SentenceTransformerEmbedder( _model_name, device=config.device, - trust_remote_code=_trust, + trust_remote_code=True, ) logger.info( - "Embedding model: %s | device: %s | trust_remote_code: %s", + "Embedding model: %s | device: %s", config.embedding_model, config.device, - _trust, ) else: from cocoindex.ops.litellm import LiteLLMEmbedder diff --git a/tests/test_config.py b/tests/test_config.py index d0abb38..015a88f 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -34,41 +34,6 @@ def test_env_var_overrides_device(self, tmp_path: Path) -> None: config = Config.from_env() assert config.device == "cpu" - -class TestConfigTrustRemoteCode: - """Tests for trust_remote_code env var control.""" - - def test_false_by_default(self, tmp_path: Path) -> None: - with patch.dict( - os.environ, - {"COCOINDEX_CODE_ROOT_PATH": str(tmp_path)}, - ): - os.environ.pop("COCOINDEX_CODE_TRUST_REMOTE_CODE", None) - config = Config.from_env() - assert config.trust_remote_code is False - - def test_true_when_env_var_set_to_true(self, tmp_path: Path) -> None: - with patch.dict( - os.environ, - { - "COCOINDEX_CODE_ROOT_PATH": str(tmp_path), - "COCOINDEX_CODE_TRUST_REMOTE_CODE": "true", - }, - ): - config = Config.from_env() - assert config.trust_remote_code is True - - def test_true_when_env_var_set_to_1(self, tmp_path: Path) -> None: - with patch.dict( - os.environ, - { - "COCOINDEX_CODE_ROOT_PATH": str(tmp_path), - "COCOINDEX_CODE_TRUST_REMOTE_CODE": "1", - }, - ): - config = Config.from_env() - assert config.trust_remote_code is True - def test_default_model_is_minilm(self, tmp_path: Path) -> None: with patch.dict( os.environ,