From 8cbac68c31b20f1433e3f67bca1ccf0d6602812f Mon Sep 17 00:00:00 2001 From: "mintlify[bot]" <109931778+mintlify[bot]@users.noreply.github.com> Date: Wed, 20 May 2026 10:51:42 +0000 Subject: [PATCH] docs: document nested field paths for FTS indexes --- docs/indexing/fts-index.mdx | 28 +++++++++++++++++++- docs/snippets/indexing.mdx | 2 ++ tests/py/test_indexing.py | 53 +++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/docs/indexing/fts-index.mdx b/docs/indexing/fts-index.mdx index 64fac40..610bdf8 100644 --- a/docs/indexing/fts-index.mdx +++ b/docs/indexing/fts-index.mdx @@ -5,7 +5,7 @@ description: "Create and tune BM25-based full-text search indexes in LanceDB." icon: "book" mode: "wide" --- -import { PyFtsIndexAsync as FtsIndexAsync, PyFtsIndexCreate as FtsIndexCreate, PyFtsIndexWait as FtsIndexWait } from '/snippets/indexing.mdx'; +import { PyFtsIndexAsync as FtsIndexAsync, PyFtsIndexCreate as FtsIndexCreate, PyFtsIndexNested as FtsIndexNested, PyFtsIndexWait as FtsIndexWait } from '/snippets/indexing.mdx'; LanceDB provides performant full-text search based on BM25, allowing you to incorporate keyword-based search in your retrieval solutions. This page shows examples on how to create and configure FTS indexes in LanceDB OSS and Enterprise, using the synchronous and asynchronous APIs. @@ -48,6 +48,32 @@ When using async connections (`connect_async`), use `create_index` with the `FTS The `create_fts_index` method is not available on `AsyncTable`. Use `create_index` with `FTS` config instead. +## Nested field paths + +FTS indexes can target text leaves inside struct columns by passing a dotted path (for example, `payload.text`). The same path works for [`MatchQuery`](/search/full-text-search) and [`PhraseQuery`](/search/full-text-search), and for the `columns` argument on async `nearest_to_text` queries. + +You can point an index at any string leaf nested in a struct, regardless of depth. The struct container itself isn't indexable: you have to name a specific text field. + + + + {FtsIndexNested} + + + +LanceDB rejects paths that don't resolve to a text leaf: + +- A struct container (for example, `payload`): raises `ValueError: FTS index cannot be created ...`. +- A non-text leaf such as an integer or float (for example, `payload.count`): raises the same error. +- A path that doesn't exist in the schema (for example, `payload.missing`): raises `ValueError: Field path ... not found`. + +The async API accepts the same dotted paths through `create_index`: + +```python Python icon="python" +from lancedb.index import FTS + +await async_table.create_index("payload.text", config=FTS(with_position=True)) +``` + ## Configuration Options ### FTS Parameters diff --git a/docs/snippets/indexing.mdx b/docs/snippets/indexing.mdx index 8dc6fbf..10df79b 100644 --- a/docs/snippets/indexing.mdx +++ b/docs/snippets/indexing.mdx @@ -4,6 +4,8 @@ export const PyFtsIndexAsync = "import asyncio\n\nimport lancedb\nimport polars export const PyFtsIndexCreate = "table_name = \"fts-index-create\"\ntable = db.open_table(table_name)\ntable.create_fts_index(\"text\")\n"; +export const PyFtsIndexNested = "from lancedb.query import MatchQuery, PhraseQuery\n\ntable = db.open_table(\"fts-index-nested\")\n\n# Index a text leaf inside a struct column using a dotted path.\ntable.create_fts_index(\"payload.text\", with_position=True)\n\n# The same dotted path works in MatchQuery and PhraseQuery.\nmatches = (\n table.search(MatchQuery(\"puppy\", \"payload.text\")).limit(5).to_list()\n)\nphrases = (\n table.search(PhraseQuery(\"puppy runs\", \"payload.text\"))\n .limit(5)\n .to_list()\n)\n"; + export const PyFtsIndexWait = "table_name = \"fts-index-wait\"\n\ntable = db.open_table(table_name)\ntable.create_fts_index(\"text\")\n\nindex_name = \"text_idx\"\ntable.wait_for_index([index_name])\n"; export const PyGpuIndexCuda = "table.create_index(\n num_partitions=256,\n num_sub_vectors=96,\n accelerator=\"cuda\",\n)\n"; diff --git a/tests/py/test_indexing.py b/tests/py/test_indexing.py index 2f50877..a293d7c 100644 --- a/tests/py/test_indexing.py +++ b/tests/py/test_indexing.py @@ -496,6 +496,59 @@ def test_fts_index_wait(tmp_db): assert table.list_indices() +def test_fts_index_nested_field(tmp_db): + nested_schema = pa.struct([ + pa.field("text", pa.string()), + pa.field("count", pa.int32()), + ]) + schema = pa.schema([ + pa.field("id", pa.int64()), + pa.field("payload", nested_schema), + ]) + tmp_db.create_table( + "fts-index-nested", + pa.table( + { + "id": pa.array([1, 2], pa.int64()), + "payload": pa.array( + [ + {"text": "Frodo was a happy puppy", "count": 1}, + {"text": "puppy runs through the meadow", "count": 2}, + ], + type=nested_schema, + ), + }, + schema=schema, + ), + mode="overwrite", + ) + + db = tmp_db + # --8<-- [start:fts_index_nested] + from lancedb.query import MatchQuery, PhraseQuery + + table = db.open_table("fts-index-nested") + + # Index a text leaf inside a struct column using a dotted path. + table.create_fts_index("payload.text", with_position=True) + + # The same dotted path works in MatchQuery and PhraseQuery. + matches = ( + table.search(MatchQuery("puppy", "payload.text")).limit(5).to_list() + ) + phrases = ( + table.search(PhraseQuery("puppy runs", "payload.text")) + .limit(5) + .to_list() + ) + # --8<-- [end:fts_index_nested] + + assert len(matches) > 0 + assert all("puppy" in row["payload"]["text"] for row in matches) + assert len(phrases) > 0 + assert all("puppy runs" in row["payload"]["text"] for row in phrases) + + @pytest.mark.asyncio async def test_fts_index_async(tmp_path, monkeypatch): monkeypatch.chdir(tmp_path)