Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 152 additions & 18 deletions src/query/text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,12 @@ export interface TextQueryConfig {
/** Free-text query. Tokenised on whitespace, normalized (lowercase, comma + curly-quote strip), stopword-filtered, then OR-joined. */
text: string;

/** Indexed text field to search against. */
textFieldName: string;
/**
* Indexed text field to search against. Pass a string to search a single
* field, or a `Record<field, weight>` to search multiple fields with
* per-field weighting. Weights must be finite numbers > 0.
*/
textFieldName: string | Record<string, number>;

/**
* Scorer to apply when ranking results. Defaults to `BM25STD`.
Expand All @@ -53,6 +57,14 @@ export interface TextQueryConfig {
/** Pagination limit. Defaults to numResults. */
limit?: number;

/**
* Per-token weight map. Keys are individual words (no inner whitespace) and
* are matched case-insensitively against the lowercased query tokens. Values
* must be finite numbers >= 0. A weight of 0 effectively suppresses scoring
* for that token. When omitted, no per-token weighting is applied.
*/
textWeights?: Record<string, number>;

/**
* Stopwords to drop before OR-joining tokens.
*
Expand All @@ -67,33 +79,121 @@ export interface TextQueryConfig {
stopwords?: StopwordsInput;
}

function parseFieldWeights(spec: string | Record<string, number>): Record<string, number> {
if (spec === undefined || spec === null) {
throw new QueryValidationError('textFieldName is required');
}
if (typeof spec === 'string') {
if (spec.length === 0) {
throw new QueryValidationError('textFieldName is required');
}
const single: Record<string, number> = Object.create(null);
single[spec] = 1.0;
return Object.freeze(single);
}
if (typeof spec !== 'object' || Array.isArray(spec)) {
throw new QueryValidationError(
'textFieldName must be a string or a record of field:weight mappings'
);
}
const entries = Object.entries(spec);
if (entries.length === 0) {
throw new QueryValidationError('textFieldName record must contain at least one field');
}
const normalized: Record<string, number> = Object.create(null);
for (const [field, weight] of entries) {
if (typeof field !== 'string' || field.length === 0) {
throw new QueryValidationError('textFieldName keys must be non-empty strings');
}
if (typeof weight !== 'number' || !Number.isFinite(weight) || weight <= 0) {
throw new QueryValidationError(
`textFieldName weight for '${field}' must be a finite number > 0, got ${String(weight)}`
);
}
normalized[field] = weight;
}
return Object.freeze(normalized);
}

function parseTextWeights(weights: Record<string, number> | undefined): Record<string, number> {
if (weights === undefined) {
return Object.freeze(Object.create(null) as Record<string, number>);
}
if (weights === null || typeof weights !== 'object' || Array.isArray(weights)) {
throw new QueryValidationError('textWeights must be a record of token:weight mappings');
}
const normalized: Record<string, number> = Object.create(null);
for (const [rawKey, weight] of Object.entries(weights)) {
const key = rawKey.trim().toLowerCase();
if (key.length === 0 || /\s/.test(key)) {
throw new QueryValidationError(
`textWeights keys must be single tokens with no whitespace, got '${rawKey}'`
);
}
if (typeof weight !== 'number' || !Number.isFinite(weight) || weight < 0) {
throw new QueryValidationError(
`textWeights weight for '${key}' must be a finite number >= 0, got ${String(weight)}`
);
}
normalized[key] = weight;
}
return Object.freeze(normalized);
}

/**
* Full-text search query with optional filter.
*
* Tokenises the input on whitespace, normalizes each token (trim, strip
* Tokenises the input on whitespace, normalises each token (trim, strip
* leading/trailing commas, strip typographic quotes, lowercase), drops
* stopwords, escapes Redis Search special characters, and OR-joins the
* survivors inside the target field. Use `filter` to scope the search to
* a subset of documents (e.g. by tag or numeric range).
*
* **Note:** per-field and per-token weights from Python's
* `redisvl.query.TextQuery` are not yet ported.
* Supports per-token weighting via `textWeights` and per-field weighting by
* passing a `Record<field, weight>` to `textFieldName`. Both render using
* Redis Search's `=> { $weight: N }` syntax (dialect 2).
*
* @example
* @example Single-field, default weights
* ```typescript
* import { TextQuery, Tag } from 'redisvl';
* new TextQuery({
* text: 'machine learning',
* textFieldName: 'description',
* });
* ```
*
* const q = new TextQuery({
* @example Multi-field weighted
* ```typescript
* new TextQuery({
* text: 'machine learning',
* textFieldName: { title: 5.0, body: 1.0 },
* });
* ```
*
* @example Per-token weighted
* ```typescript
* new TextQuery({
* text: 'apple orange pear',
* textFieldName: 'description',
* filter: Tag('category').eq('tech'),
* textWeights: { apple: 2.0, orange: 0.5 },
* });
* const results = await index.search(q);
* ```
*/
export class TextQuery implements BaseQuery {
public readonly text: string;
public readonly textFieldName: string;
/**
* Per-field weights. Frozen at construction. Iteration follows insertion
* order, which determines the order of field clauses in the rendered
* query. A single field with weight 1.0 renders identically to passing a
* bare string for `textFieldName`.
*/
public readonly fieldWeights: Readonly<Record<string, number>>;
/**
* Per-token weights. Keys are normalised to lowercase, whitespace-trimmed
* single tokens. Frozen at construction with a null prototype so adversarial
* keys (`constructor`, `__proto__`, etc.) cannot resolve via the prototype
* chain during render-time lookup.
*/
public readonly textWeights: Readonly<Record<string, number>>;
public readonly textScorer: TextScorer;
public readonly filter?: FilterInput;
public readonly returnFields?: string[];
Expand All @@ -107,12 +207,9 @@ export class TextQuery implements BaseQuery {
throw new QueryValidationError('text cannot be empty');
}

if (!config.textFieldName) {
throw new QueryValidationError('textFieldName is required');
}

this.text = config.text;
this.textFieldName = config.textFieldName;
this.fieldWeights = parseFieldWeights(config.textFieldName);
this.textWeights = parseTextWeights(config.textWeights);
this.textScorer = config.textScorer ?? 'BM25STD';
this.filter = config.filter;
this.returnFields = config.returnFields;
Expand All @@ -124,12 +221,19 @@ export class TextQuery implements BaseQuery {

buildQuery(): string {
const stopwordSet = this.stopwords;
const weights = this.textWeights;
const tokens: string[] = [];
for (const raw of this.text.split(/\s+/)) {
const norm = normalizeToken(raw);
if (norm.length === 0) continue;
if (stopwordSet && stopwordSet.has(norm)) continue;
tokens.push(escaper.escape(norm));
const escaped = escaper.escape(norm);
const weight = weights[norm];
if (weight !== undefined) {
tokens.push(`${escaped}=>{$weight:${weight}}`);
} else {
tokens.push(escaped);
}
}

if (tokens.length === 0) {
Expand All @@ -138,7 +242,20 @@ export class TextQuery implements BaseQuery {
);
}

const textClause = `@${this.textFieldName}:(${tokens.join(' | ')})`;
const orList = tokens.join(' | ');

const fieldClauses: string[] = [];
for (const [field, weight] of Object.entries(this.fieldWeights)) {
if (weight === 1.0) {
fieldClauses.push(`@${field}:(${orList})`);
} else {
fieldClauses.push(`@${field}:(${orList}) => { $weight: ${weight} }`);
}
}

const textClause =
fieldClauses.length === 1 ? fieldClauses[0] : `(${fieldClauses.join(' | ')})`;

const filterStr = renderFilter(this.filter);
if (filterStr === '*') {
return textClause;
Expand All @@ -149,4 +266,21 @@ export class TextQuery implements BaseQuery {
buildParams(): Record<string, unknown> {
return {};
}

/**
* Returns the configured text field. A bare string is returned when exactly
* one field is configured with weight 1.0. Otherwise returns a copy of the
* normalised field-weight record. Mirrors Python's `text_field_name`
* property for cross-language compatibility.
*/
get textFieldName(): string | Readonly<Record<string, number>> {
const entries = Object.entries(this.fieldWeights);
if (entries.length === 1) {
const [field, weight] = entries[0];
if (weight === 1.0) {
return field;
}
}
return { ...this.fieldWeights };
}
}
54 changes: 54 additions & 0 deletions tests/integration/query-types.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -374,4 +374,58 @@ describe('Query types integration (FilterQuery / CountQuery / VectorRangeQuery /
expect(titles).toContain('Laptop computer for programming');
});
});

describe('TextQuery — per-field weights', () => {
it('ranks docs by per-field weight when the same terms appear in different fields', async () => {
// Stand up a dedicated two-text-field index so we can place
// identical match tokens in different fields and observe the
// per-field weight steering the ranking.
const indexName = `redisvl-test-text-weights-${Date.now()}`;
const schema = IndexSchema.fromObject({
index: {
name: indexName,
prefix: `rvl-test-tw-${Date.now()}`,
storageType: 'hash',
},
fields: [
{ name: 'title', type: 'text' },
{ name: 'body', type: 'text' },
],
});

const weightedIndex = new SearchIndex(schema, client);
await weightedIndex.create({ overwrite: true, drop: true });

try {
await weightedIndex.load(
[
{ id: 'a', title: 'foo bar', body: 'zzz zzz' },
{ id: 'b', title: 'zzz zzz', body: 'foo bar' },
],
{ idField: 'id' }
);

// Let Redis index the two new docs.
await new Promise((r) => setTimeout(r, 100));

// Heavy ratio (10:1) keeps the assertion robust against
// BM25 quirks on a 2-doc corpus.
const q = new TextQuery({
text: 'foo bar',
textFieldName: { title: 10.0, body: 1.0 },
returnFields: ['id'],
textScorer: 'BM25STD',
});

const results = await weightedIndex.search(q);
expect(results.documents.length).toBeGreaterThanOrEqual(2);
// Doc A's match is in the higher-weighted `title` field, so
// it must rank above Doc B whose match is in `body`.
expect(results.documents[0].id).toContain('a');
expect(results.documents[1].id).toContain('b');
} finally {
await weightedIndex.delete({ drop: true }).catch(() => {});
}
});
});
});
Loading
Loading