diff --git a/src/query/text.ts b/src/query/text.ts index 79d4d5e..c1f0439 100644 --- a/src/query/text.ts +++ b/src/query/text.ts @@ -29,8 +29,12 @@ export interface TextQueryConfig { /** Free-text query. Tokenised on whitespace, normalized (lowercase, comma + curly-quote strip), stopword-filtered, then OR-joined. */ text: string; - /** Indexed text field to search against. */ - textFieldName: string; + /** + * Indexed text field to search against. Pass a string to search a single + * field, or a `Record` to search multiple fields with + * per-field weighting. Weights must be finite numbers > 0. + */ + textFieldName: string | Record; /** * Scorer to apply when ranking results. Defaults to `BM25STD`. @@ -53,6 +57,14 @@ export interface TextQueryConfig { /** Pagination limit. Defaults to numResults. */ limit?: number; + /** + * Per-token weight map. Keys are individual words (no inner whitespace) and + * are matched case-insensitively against the lowercased query tokens. Values + * must be finite numbers >= 0. A weight of 0 effectively suppresses scoring + * for that token. When omitted, no per-token weighting is applied. + */ + textWeights?: Record; + /** * Stopwords to drop before OR-joining tokens. * @@ -67,33 +79,121 @@ export interface TextQueryConfig { stopwords?: StopwordsInput; } +function parseFieldWeights(spec: string | Record): Record { + if (spec === undefined || spec === null) { + throw new QueryValidationError('textFieldName is required'); + } + if (typeof spec === 'string') { + if (spec.length === 0) { + throw new QueryValidationError('textFieldName is required'); + } + const single: Record = Object.create(null); + single[spec] = 1.0; + return Object.freeze(single); + } + if (typeof spec !== 'object' || Array.isArray(spec)) { + throw new QueryValidationError( + 'textFieldName must be a string or a record of field:weight mappings' + ); + } + const entries = Object.entries(spec); + if (entries.length === 0) { + throw new QueryValidationError('textFieldName record must contain at least one field'); + } + const normalized: Record = Object.create(null); + for (const [field, weight] of entries) { + if (typeof field !== 'string' || field.length === 0) { + throw new QueryValidationError('textFieldName keys must be non-empty strings'); + } + if (typeof weight !== 'number' || !Number.isFinite(weight) || weight <= 0) { + throw new QueryValidationError( + `textFieldName weight for '${field}' must be a finite number > 0, got ${String(weight)}` + ); + } + normalized[field] = weight; + } + return Object.freeze(normalized); +} + +function parseTextWeights(weights: Record | undefined): Record { + if (weights === undefined) { + return Object.freeze(Object.create(null) as Record); + } + if (weights === null || typeof weights !== 'object' || Array.isArray(weights)) { + throw new QueryValidationError('textWeights must be a record of token:weight mappings'); + } + const normalized: Record = Object.create(null); + for (const [rawKey, weight] of Object.entries(weights)) { + const key = rawKey.trim().toLowerCase(); + if (key.length === 0 || /\s/.test(key)) { + throw new QueryValidationError( + `textWeights keys must be single tokens with no whitespace, got '${rawKey}'` + ); + } + if (typeof weight !== 'number' || !Number.isFinite(weight) || weight < 0) { + throw new QueryValidationError( + `textWeights weight for '${key}' must be a finite number >= 0, got ${String(weight)}` + ); + } + normalized[key] = weight; + } + return Object.freeze(normalized); +} + /** * Full-text search query with optional filter. * - * Tokenises the input on whitespace, normalizes each token (trim, strip + * Tokenises the input on whitespace, normalises each token (trim, strip * leading/trailing commas, strip typographic quotes, lowercase), drops * stopwords, escapes Redis Search special characters, and OR-joins the * survivors inside the target field. Use `filter` to scope the search to * a subset of documents (e.g. by tag or numeric range). * - * **Note:** per-field and per-token weights from Python's - * `redisvl.query.TextQuery` are not yet ported. + * Supports per-token weighting via `textWeights` and per-field weighting by + * passing a `Record` to `textFieldName`. Both render using + * Redis Search's `=> { $weight: N }` syntax (dialect 2). * - * @example + * @example Single-field, default weights * ```typescript - * import { TextQuery, Tag } from 'redisvl'; + * new TextQuery({ + * text: 'machine learning', + * textFieldName: 'description', + * }); + * ``` * - * const q = new TextQuery({ + * @example Multi-field weighted + * ```typescript + * new TextQuery({ * text: 'machine learning', + * textFieldName: { title: 5.0, body: 1.0 }, + * }); + * ``` + * + * @example Per-token weighted + * ```typescript + * new TextQuery({ + * text: 'apple orange pear', * textFieldName: 'description', - * filter: Tag('category').eq('tech'), + * textWeights: { apple: 2.0, orange: 0.5 }, * }); - * const results = await index.search(q); * ``` */ export class TextQuery implements BaseQuery { public readonly text: string; - public readonly textFieldName: string; + /** + * Per-field weights. Frozen at construction. Iteration follows insertion + * order, which determines the order of field clauses in the rendered + * query. A single field with weight 1.0 renders identically to passing a + * bare string for `textFieldName`. + */ + public readonly fieldWeights: Readonly>; + /** + * Per-token weights. Keys are normalised to lowercase, whitespace-trimmed + * single tokens. Frozen at construction with a null prototype so adversarial + * keys (`constructor`, `__proto__`, etc.) cannot resolve via the prototype + * chain during render-time lookup. + */ + public readonly textWeights: Readonly>; public readonly textScorer: TextScorer; public readonly filter?: FilterInput; public readonly returnFields?: string[]; @@ -107,12 +207,9 @@ export class TextQuery implements BaseQuery { throw new QueryValidationError('text cannot be empty'); } - if (!config.textFieldName) { - throw new QueryValidationError('textFieldName is required'); - } - this.text = config.text; - this.textFieldName = config.textFieldName; + this.fieldWeights = parseFieldWeights(config.textFieldName); + this.textWeights = parseTextWeights(config.textWeights); this.textScorer = config.textScorer ?? 'BM25STD'; this.filter = config.filter; this.returnFields = config.returnFields; @@ -124,12 +221,19 @@ export class TextQuery implements BaseQuery { buildQuery(): string { const stopwordSet = this.stopwords; + const weights = this.textWeights; const tokens: string[] = []; for (const raw of this.text.split(/\s+/)) { const norm = normalizeToken(raw); if (norm.length === 0) continue; if (stopwordSet && stopwordSet.has(norm)) continue; - tokens.push(escaper.escape(norm)); + const escaped = escaper.escape(norm); + const weight = weights[norm]; + if (weight !== undefined) { + tokens.push(`${escaped}=>{$weight:${weight}}`); + } else { + tokens.push(escaped); + } } if (tokens.length === 0) { @@ -138,7 +242,20 @@ export class TextQuery implements BaseQuery { ); } - const textClause = `@${this.textFieldName}:(${tokens.join(' | ')})`; + const orList = tokens.join(' | '); + + const fieldClauses: string[] = []; + for (const [field, weight] of Object.entries(this.fieldWeights)) { + if (weight === 1.0) { + fieldClauses.push(`@${field}:(${orList})`); + } else { + fieldClauses.push(`@${field}:(${orList}) => { $weight: ${weight} }`); + } + } + + const textClause = + fieldClauses.length === 1 ? fieldClauses[0] : `(${fieldClauses.join(' | ')})`; + const filterStr = renderFilter(this.filter); if (filterStr === '*') { return textClause; @@ -149,4 +266,21 @@ export class TextQuery implements BaseQuery { buildParams(): Record { return {}; } + + /** + * Returns the configured text field. A bare string is returned when exactly + * one field is configured with weight 1.0. Otherwise returns a copy of the + * normalised field-weight record. Mirrors Python's `text_field_name` + * property for cross-language compatibility. + */ + get textFieldName(): string | Readonly> { + const entries = Object.entries(this.fieldWeights); + if (entries.length === 1) { + const [field, weight] = entries[0]; + if (weight === 1.0) { + return field; + } + } + return { ...this.fieldWeights }; + } } diff --git a/tests/integration/query-types.test.ts b/tests/integration/query-types.test.ts index b52fdb6..41ce9fc 100644 --- a/tests/integration/query-types.test.ts +++ b/tests/integration/query-types.test.ts @@ -374,4 +374,58 @@ describe('Query types integration (FilterQuery / CountQuery / VectorRangeQuery / expect(titles).toContain('Laptop computer for programming'); }); }); + + describe('TextQuery — per-field weights', () => { + it('ranks docs by per-field weight when the same terms appear in different fields', async () => { + // Stand up a dedicated two-text-field index so we can place + // identical match tokens in different fields and observe the + // per-field weight steering the ranking. + const indexName = `redisvl-test-text-weights-${Date.now()}`; + const schema = IndexSchema.fromObject({ + index: { + name: indexName, + prefix: `rvl-test-tw-${Date.now()}`, + storageType: 'hash', + }, + fields: [ + { name: 'title', type: 'text' }, + { name: 'body', type: 'text' }, + ], + }); + + const weightedIndex = new SearchIndex(schema, client); + await weightedIndex.create({ overwrite: true, drop: true }); + + try { + await weightedIndex.load( + [ + { id: 'a', title: 'foo bar', body: 'zzz zzz' }, + { id: 'b', title: 'zzz zzz', body: 'foo bar' }, + ], + { idField: 'id' } + ); + + // Let Redis index the two new docs. + await new Promise((r) => setTimeout(r, 100)); + + // Heavy ratio (10:1) keeps the assertion robust against + // BM25 quirks on a 2-doc corpus. + const q = new TextQuery({ + text: 'foo bar', + textFieldName: { title: 10.0, body: 1.0 }, + returnFields: ['id'], + textScorer: 'BM25STD', + }); + + const results = await weightedIndex.search(q); + expect(results.documents.length).toBeGreaterThanOrEqual(2); + // Doc A's match is in the higher-weighted `title` field, so + // it must rank above Doc B whose match is in `body`. + expect(results.documents[0].id).toContain('a'); + expect(results.documents[1].id).toContain('b'); + } finally { + await weightedIndex.delete({ drop: true }).catch(() => {}); + } + }); + }); }); diff --git a/tests/unit/query/text.test.ts b/tests/unit/query/text.test.ts index 2e68c21..c7dde03 100644 --- a/tests/unit/query/text.test.ts +++ b/tests/unit/query/text.test.ts @@ -14,7 +14,9 @@ describe('TextQuery', () => { }); it('throws if textFieldName is missing', () => { - expect(() => new TextQuery({ text: 'hello' } as any)).toThrow(QueryValidationError); + expect(() => new TextQuery({ text: 'hello' } as any)).toThrow( + /textFieldName is required/ + ); }); it('defaults numResults to 10', () => { @@ -79,6 +81,51 @@ describe('TextQuery', () => { }); expect(q.buildQuery()).toBe('@description:(quick | fox)'); }); + + it('renders a single field with weight 1.0 without a $weight clause', () => { + const q = new TextQuery({ + text: 'quick fox', + textFieldName: { description: 1.0 }, + }); + expect(q.buildQuery()).toBe('@description:(quick | fox)'); + }); + + it('renders a single field with non-default weight using $weight syntax', () => { + const q = new TextQuery({ + text: 'quick fox', + textFieldName: { description: 5 }, + }); + expect(q.buildQuery()).toBe('@description:(quick | fox) => { $weight: 5 }'); + }); + + it('renders multiple fields OR-joined with mixed weights', () => { + const q = new TextQuery({ + text: 'quick fox', + textFieldName: { title: 3, body: 1.0 }, + }); + expect(q.buildQuery()).toBe( + '(@title:(quick | fox) => { $weight: 3 } | @body:(quick | fox))' + ); + }); + + it('renders multiple fields with all weights 1.0 wrapped in outer parens', () => { + const q = new TextQuery({ + text: 'quick fox', + textFieldName: { title: 1.0, body: 1.0 }, + }); + expect(q.buildQuery()).toBe('(@title:(quick | fox) | @body:(quick | fox))'); + }); + + it('combines multi-field weighted text clause with a filter via AND', () => { + const q = new TextQuery({ + text: 'engineer', + textFieldName: { title: 2, summary: 1.0 }, + filter: Tag('active').eq('true'), + }); + expect(q.buildQuery()).toBe( + '(@active:{true} (@title:(engineer) => { $weight: 2 } | @summary:(engineer)))' + ); + }); }); describe('buildParams', () => { @@ -220,4 +267,192 @@ describe('TextQuery', () => { expect(stopwords.english.has('the')).toBe(true); }); }); + + describe('field weights', () => { + it('normalises a string textFieldName to weight 1.0 in fieldWeights', () => { + const q = new TextQuery({ text: 'hello', textFieldName: 'description' }); + expect(q.fieldWeights).toEqual({ description: 1.0 }); + }); + + it('accepts a Record for textFieldName', () => { + const q = new TextQuery({ + text: 'hello', + textFieldName: { title: 5.0, body: 1.0 }, + }); + expect(q.fieldWeights).toEqual({ title: 5.0, body: 1.0 }); + }); + + it('freezes fieldWeights to enforce readonly at runtime', () => { + const q = new TextQuery({ text: 'hello', textFieldName: { title: 2.0 } }); + expect(Object.isFrozen(q.fieldWeights)).toBe(true); + }); + + it('rejects an empty fieldWeights record', () => { + expect(() => new TextQuery({ text: 'hello', textFieldName: {} })).toThrow( + QueryValidationError + ); + }); + + it.each([0, -1, Number.NaN, Number.POSITIVE_INFINITY])( + 'rejects field weight %p', + (weight) => { + expect( + () => + new TextQuery({ + text: 'hello', + textFieldName: { title: weight }, + }) + ).toThrow(QueryValidationError); + } + ); + + it('rejects a non-numeric field weight', () => { + expect( + () => + new TextQuery({ + text: 'hello', + // eslint-disable-next-line @typescript-eslint/no-explicit-any + textFieldName: { title: 'five' as any }, + }) + ).toThrow(QueryValidationError); + }); + + it('rejects an array for textFieldName', () => { + expect( + () => + new TextQuery({ + text: 'hello', + // eslint-disable-next-line @typescript-eslint/no-explicit-any + textFieldName: ['title'] as any, + }) + ).toThrow(QueryValidationError); + }); + }); + + describe('text weights (per-token)', () => { + it('defaults textWeights to an empty frozen record', () => { + const q = new TextQuery({ text: 'hello', textFieldName: 'd' }); + expect(q.textWeights).toEqual({}); + expect(Object.isFrozen(q.textWeights)).toBe(true); + }); + + it('lowercases keys when parsing textWeights', () => { + const q = new TextQuery({ + text: 'hello', + textFieldName: 'd', + textWeights: { Apple: 2, ORANGE: 0.5 }, + }); + expect(q.textWeights).toEqual({ apple: 2, orange: 0.5 }); + }); + + it('trims whitespace around textWeights keys', () => { + const q = new TextQuery({ + text: 'hello', + textFieldName: 'd', + textWeights: { ' apple ': 2 }, + }); + expect(q.textWeights).toEqual({ apple: 2 }); + }); + + it('rejects textWeights keys containing inner whitespace', () => { + expect( + () => + new TextQuery({ + text: 'hello', + textFieldName: 'd', + textWeights: { 'two words': 2 }, + }) + ).toThrow(QueryValidationError); + }); + + it('accepts a token weight of 0', () => { + const q = new TextQuery({ + text: 'apple', + textFieldName: 'd', + textWeights: { apple: 0 }, + }); + expect(q.textWeights).toEqual({ apple: 0 }); + expect(q.buildQuery()).toBe('@d:(apple=>{$weight:0})'); + }); + + it.each([-1, Number.NaN, Number.POSITIVE_INFINITY])('rejects token weight %p', (weight) => { + expect( + () => + new TextQuery({ + text: 'apple', + textFieldName: 'd', + textWeights: { apple: weight }, + }) + ).toThrow(QueryValidationError); + }); + + it('renders per-token weights inside the OR list', () => { + const q = new TextQuery({ + text: 'apple orange pear', + textFieldName: 'd', + textWeights: { apple: 2, orange: 0.5 }, + }); + expect(q.buildQuery()).toBe('@d:(apple=>{$weight:2} | orange=>{$weight:0.5} | pear)'); + }); + + it('matches token-weight keys case-insensitively against input text', () => { + const q = new TextQuery({ + text: 'Apple ORANGE pear', + textFieldName: 'd', + textWeights: { apple: 2, orange: 0.5 }, + }); + // Tokens are lowercased before lookup (and before escape). + expect(q.buildQuery()).toBe('@d:(apple=>{$weight:2} | orange=>{$weight:0.5} | pear)'); + }); + + it('combines per-token and per-field weights', () => { + const q = new TextQuery({ + text: 'apple pear', + textFieldName: { title: 3, body: 1.0 }, + textWeights: { apple: 2 }, + }); + expect(q.buildQuery()).toBe( + '(@title:(apple=>{$weight:2} | pear) => { $weight: 3 } | @body:(apple=>{$weight:2} | pear))' + ); + }); + + it('does not resolve inherited keys via prototype chain (default textWeights)', () => { + const q = new TextQuery({ + text: 'constructor toString hasOwnProperty', + textFieldName: 'd', + }); + // Without prototype-null hardening, 'constructor' would resolve to the + // Object constructor and render as garbage. All three tokens must render + // bare. + expect(q.buildQuery()).toBe('@d:(constructor | tostring | hasownproperty)'); + }); + }); + + describe('textFieldName property', () => { + it('returns a bare string when single field has weight 1.0', () => { + const q = new TextQuery({ text: 'hello', textFieldName: 'description' }); + expect(q.textFieldName).toBe('description'); + }); + + it('returns a bare string when a single-field record has weight 1.0 (Python parity)', () => { + const q = new TextQuery({ text: 'hello', textFieldName: { description: 1.0 } }); + expect(q.textFieldName).toBe('description'); + }); + + it('returns the record when single field has non-default weight', () => { + const q = new TextQuery({ + text: 'hello', + textFieldName: { description: 5 }, + }); + expect(q.textFieldName).toEqual({ description: 5 }); + }); + + it('returns the record when multiple fields are configured', () => { + const q = new TextQuery({ + text: 'hello', + textFieldName: { title: 1.0, body: 1.0 }, + }); + expect(q.textFieldName).toEqual({ title: 1.0, body: 1.0 }); + }); + }); }); diff --git a/website/docs/user-guide/filters-and-queries.md b/website/docs/user-guide/filters-and-queries.md index 819056b..54e12d9 100644 --- a/website/docs/user-guide/filters-and-queries.md +++ b/website/docs/user-guide/filters-and-queries.md @@ -232,8 +232,49 @@ const results = await index.search(query); Tokens are split on whitespace, normalized (lowercased, with leading/trailing commas and typographic quotes stripped), filtered against an English stopword list by default, escaped, and OR-joined — so `'The quick fox'` becomes `@description:(quick | fox)`. The optional `filter` is combined with the text clause via AND. -:::note Parity gap -Per-token and per-field weights from Python redisvl's `TextQuery` are not yet implemented. Stopword filtering matches Python (English by default). +#### Weighting tokens and fields + +`TextQuery` supports two ways to bias scoring: + +**Per-field weights.** Pass a `Record` to `textFieldName` to +search multiple fields with different importance. Weights must be finite +numbers greater than 0. + +```typescript +import { TextQuery } from 'redis-vl'; + +const q = new TextQuery({ + text: 'machine learning', + textFieldName: { title: 5.0, body: 1.0 }, +}); +// Renders: (@title:(machine | learning) => { $weight: 5 } | @body:(machine | learning)) +``` + +A field with weight `1.0` is rendered without the `$weight` clause. If +exactly one field is provided with weight 1.0, the output is identical to +passing a string for `textFieldName`. + +**Per-token weights.** Pass a `textWeights` record to bias specific tokens +within the query. Keys are matched case-insensitively against the +normalised query tokens; values must be finite numbers `>= 0` (zero is +allowed and suppresses that token's scoring contribution). + +```typescript +const q = new TextQuery({ + text: 'apple orange pear', + textFieldName: 'description', + textWeights: { apple: 2.0, orange: 0.5 }, +}); +// Renders: @description:(apple=>{$weight:2} | orange=>{$weight:0.5} | pear) +``` + +Per-token and per-field weights can be combined; per-token weights are +applied inside every field clause. + +:::note Divergence from Python redisvl +Python exposes `set_field_weights()` and `set_text_weights()` mutators. The +TypeScript port keeps `TextQuery` fully immutable — construct a new query +instead. ::: #### Stopword filtering