forked from matthewmcneely/modusGraph
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathembedding_test.go
More file actions
654 lines (549 loc) · 22.9 KB
/
Copy pathembedding_test.go
File metadata and controls
654 lines (549 loc) · 22.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
/*
* SPDX-FileCopyrightText: © 2017-2026 Istari Digital, Inc.
* SPDX-License-Identifier: Apache-2.0
*/
package modusgraph_test
import (
"context"
"fmt"
"net/http"
"strings"
"testing"
"time"
dg "github.com/dolan-in/dgman/v2"
mg "github.com/matthewmcneely/modusgraph"
"github.com/stretchr/testify/require"
)
// mockEmbeddingProvider is a deterministic EmbeddingProvider for testing.
// Each unique text gets a distinct unit-vector embedding; identical texts get
// identical embeddings, enabling correct nearest-neighbour assertions.
type mockEmbeddingProvider struct {
dims int
callLog []string // tracks texts that were embedded
vectors map[string][]float32
}
func newMockProvider(dims int) *mockEmbeddingProvider {
return &mockEmbeddingProvider{
dims: dims,
vectors: make(map[string][]float32),
}
}
// register pre-registers a specific vector for a text so tests can control
// exactly what vector will be stored.
func (m *mockEmbeddingProvider) register(text string, vec []float32) {
m.vectors[text] = vec
}
func (m *mockEmbeddingProvider) Embed(_ context.Context, text string) ([]float32, error) {
m.callLog = append(m.callLog, text)
if v, ok := m.vectors[text]; ok {
return v, nil
}
// generate a deterministic unit-ish vector based on string hash
vec := make([]float32, m.dims)
for i := range vec {
vec[i] = float32(len(text)+i) * 0.01
}
m.vectors[text] = vec
return vec, nil
}
func (m *mockEmbeddingProvider) Dims() int { return m.dims }
// embeddableProduct is the test struct using SimString.
type embeddableProduct struct {
Name string `json:"name,omitempty" dgraph:"index=term"`
Description mg.SimString `json:"description,omitempty" dgraph:"embedding,index=term"`
UID string `json:"uid,omitempty"`
DType []string `json:"dgraph.type,omitempty"`
}
// embeddableCustomMetric tests overriding metric and exponent.
type embeddableCustomMetric struct {
Name string `json:"name,omitempty" dgraph:"index=term"`
Description mg.SimString `json:"description,omitempty" dgraph:"embedding,metric=euclidean,exponent=5"`
UID string `json:"uid,omitempty"`
DType []string `json:"dgraph.type,omitempty"`
}
// embeddableWithThreshold tests the threshold=N tag option.
// Descriptions shorter than 20 runes should not be embedded.
type embeddableWithThreshold struct {
Name string `json:"name,omitempty" dgraph:"index=term"`
Description mg.SimString `json:"description,omitempty" dgraph:"embedding,threshold=20"`
UID string `json:"uid,omitempty"`
DType []string `json:"dgraph.type,omitempty"`
}
// createEmbeddingClient creates a test client with the given mock embedding provider.
func createEmbeddingClient(t *testing.T, provider mg.EmbeddingProvider) (mg.Client, func()) {
t.Helper()
uri := "file://" + GetTempDir(t)
client, err := mg.NewClient(uri,
mg.WithAutoSchema(true),
mg.WithEmbeddingProvider(provider),
)
require.NoError(t, err)
cleanup := func() {
_ = client.DropAll(context.Background())
client.Close()
mg.Shutdown()
}
return client, cleanup
}
// --- Unit tests ---
func TestSimStringMarshal(t *testing.T) {
s := mg.SimString("hello world")
b, err := s.MarshalJSON()
require.NoError(t, err)
require.Equal(t, `"hello world"`, string(b))
}
func TestSimStringUnmarshal(t *testing.T) {
var s mg.SimString
require.NoError(t, s.UnmarshalJSON([]byte(`"hello world"`)))
require.Equal(t, mg.SimString("hello world"), s)
}
func TestHasEmbeddingTagDetection(t *testing.T) {
provider := newMockProvider(4)
client, cleanup := createEmbeddingClient(t, provider)
defer cleanup()
ctx := context.Background()
product := &embeddableProduct{
Name: "Widget",
Description: "A small gadget",
}
err := client.Insert(ctx, product)
require.NoError(t, err, "Insert with SimString field should succeed")
require.NotEmpty(t, product.UID, "UID should be populated after insert")
// Provider should have been called exactly once for the description field
require.Len(t, provider.callLog, 1)
require.Equal(t, "A small gadget", provider.callLog[0])
}
func TestInsertWithEmbedding(t *testing.T) {
const dims = 5
provider := newMockProvider(dims)
// Register controlled vectors so similarity search is deterministic
provider.register("apple fruit sweet", []float32{0.9, 0.1, 0.1, 0.1, 0.1})
provider.register("banana yellow tropical", []float32{0.1, 0.9, 0.1, 0.1, 0.1})
provider.register("carrot orange vegetable", []float32{0.1, 0.1, 0.9, 0.1, 0.1})
client, cleanup := createEmbeddingClient(t, provider)
defer cleanup()
ctx := context.Background()
products := []*embeddableProduct{
{Name: "Apple", Description: "apple fruit sweet"},
{Name: "Banana", Description: "banana yellow tropical"},
{Name: "Carrot", Description: "carrot orange vegetable"},
}
err := client.Insert(ctx, products)
require.NoError(t, err, "Batch insert with SimString should succeed")
for _, p := range products {
require.NotEmpty(t, p.UID, "Each product should have a UID after insert")
}
// Provider called once per product
require.Len(t, provider.callLog, 3)
// Query back the apple and verify text is intact
var fetched embeddableProduct
err = client.Get(ctx, &fetched, products[0].UID)
require.NoError(t, err)
require.Equal(t, "apple fruit sweet", string(fetched.Description))
require.Equal(t, "Apple", fetched.Name)
}
func TestUpdateWithEmbedding(t *testing.T) {
const dims = 5
provider := newMockProvider(dims)
provider.register("original text", []float32{0.5, 0.5, 0.0, 0.0, 0.0})
provider.register("updated text", []float32{0.0, 0.0, 0.5, 0.5, 0.0})
client, cleanup := createEmbeddingClient(t, provider)
defer cleanup()
ctx := context.Background()
product := &embeddableProduct{
Name: "Thing",
Description: "original text",
}
require.NoError(t, client.Insert(ctx, product))
require.Len(t, provider.callLog, 1)
// Update description
product.Description = "updated text"
require.NoError(t, client.Update(ctx, product))
// Provider should have been called again for the updated text
require.Len(t, provider.callLog, 2)
require.Equal(t, "updated text", provider.callLog[1])
}
// TestPartialUpdatePreservesShadowVector is a regression test for the
// omitempty-aware skip in injectShadowVectors. A partial Update of a struct
// that contains a SimString field (e.g. setting only UID + a non-SimString
// field) must not delete the existing shadow vector. The primary predicate
// is preserved by omitempty — Dgraph never sees the SimString in the mutation
// NQuads — so the shadow path must follow the same lifecycle.
//
// Real-world failure that motivated this test: codebase-graph's indexer
// upserts a Function with its doc comment in pass A, then issues a partial
// `cli.Update(&Function{UID, Calls})` in pass B to add CALLS edges. Without
// the omitempty skip, every pass-B Update silently wiped the doc-comment
// vector written in pass A, leaving the shadow predicate sparse and breaking
// similarity ranking once `similar_to`'s k saturated the candidate count.
func TestPartialUpdatePreservesShadowVector(t *testing.T) {
const dims = 4
provider := newMockProvider(dims)
origVec := []float32{1.0, 0.0, 0.0, 0.0}
provider.register("the original description", origVec)
client, cleanup := createEmbeddingClient(t, provider)
defer cleanup()
ctx := context.Background()
p := &embeddableProduct{Name: "Thing", Description: "the original description"}
require.NoError(t, client.Insert(ctx, p))
require.NotEmpty(t, p.UID)
require.Len(t, provider.callLog, 1, "Insert should embed once")
findByOrigVec := func(t *testing.T) string {
t.Helper()
dgoClient, cleanupDgo, err := client.DgraphClient()
require.NoError(t, err)
defer cleanupDgo()
var result embeddableProduct
tx := dg.NewReadOnlyTxn(dgoClient)
err = mg.SimilarTo(tx, &result, "description", origVec, 1).Scan()
require.NoError(t, err)
return result.UID
}
// Sanity: shadow vec is reachable after Insert.
require.Equal(t, p.UID, findByOrigVec(t),
"shadow vec should be present after Insert")
// Partial Update: only UID + a non-SimString field. Description is the
// zero value and is omitted from the mutation by omitempty — the primary
// predicate is preserved, and the shadow vec must be too.
require.NoError(t, client.Update(ctx, &embeddableProduct{UID: p.UID, Name: "Thing v2"}))
// Provider must NOT be re-invoked — there's nothing to embed.
require.Len(t, provider.callLog, 1,
"partial Update must not re-embed when SimString is unset")
// Shadow vec must survive the partial update — this is the regression assertion.
require.Equal(t, p.UID, findByOrigVec(t),
"shadow vec must survive partial Update of unrelated fields")
}
func TestSimilarToQuery(t *testing.T) {
const dims = 5
provider := newMockProvider(dims)
// Use well-separated, non-zero vectors to ensure stable cosine similarity results.
// Group 1 (high first component): items 1-4
// Group 2 (high last component): items 5-8
// We query with a Group 1 vector and assert we don't get a Group 2 item back as top-1.
group1 := [][]float32{
{0.95, 0.20, 0.10, 0.10, 0.05},
{0.90, 0.25, 0.12, 0.08, 0.06},
{0.92, 0.22, 0.11, 0.09, 0.07},
{0.88, 0.28, 0.13, 0.07, 0.08},
}
group2 := [][]float32{
{0.05, 0.10, 0.10, 0.20, 0.95},
{0.06, 0.08, 0.12, 0.25, 0.90},
{0.07, 0.09, 0.11, 0.22, 0.92},
{0.08, 0.07, 0.13, 0.28, 0.88},
}
/* trunk-ignore(golangci-lint/prealloc) */
var products []*embeddableProduct
for i, v := range group1 {
name := fmt.Sprintf("Group1-%d", i+1)
provider.register(name, v)
products = append(products, &embeddableProduct{Name: name, Description: mg.SimString(name)})
}
for i, v := range group2 {
name := fmt.Sprintf("Group2-%d", i+1)
provider.register(name, v)
products = append(products, &embeddableProduct{Name: name, Description: mg.SimString(name)})
}
client, cleanup := createEmbeddingClient(t, provider)
defer cleanup()
ctx := context.Background()
require.NoError(t, client.Insert(ctx, products))
// Query vector is clearly in Group 1 (high first component)
queryVec := []float32{0.93, 0.21, 0.11, 0.09, 0.06}
dgoClient, cleanupDgo, err := client.DgraphClient()
require.NoError(t, err)
defer cleanupDgo()
var result embeddableProduct
tx := dg.NewReadOnlyTxn(dgoClient)
err = mg.SimilarTo(tx, &result, "description", queryVec, 1).Scan()
require.NoError(t, err)
require.NotEmpty(t, result.Name, "Should find a matching product")
require.True(t, strings.HasPrefix(result.Name, "Group1-"),
"Expected a Group1 result but got: %s", result.Name)
}
func TestSimilarToTextQuery(t *testing.T) {
const dims = 5
provider := newMockProvider(dims)
vecApple := []float32{1.0, 0.0, 0.0, 0.0, 0.0}
vecBanana := []float32{0.0, 1.0, 0.0, 0.0, 0.0}
vecQueryFruit := []float32{0.99, 0.01, 0.0, 0.0, 0.0} // clearly close to apple
provider.register("apple fruit sweet", vecApple)
provider.register("banana yellow tropical", vecBanana)
provider.register("fruit like apple", vecQueryFruit) // the query text
client, cleanup := createEmbeddingClient(t, provider)
defer cleanup()
ctx := context.Background()
products := []*embeddableProduct{
{Name: "Apple Product", Description: "apple fruit sweet"},
{Name: "Banana Product", Description: "banana yellow tropical"},
}
require.NoError(t, client.Insert(ctx, products))
// SimilarToText should embed "fruit like apple" → vecQueryFruit → nearest is Apple Product
var result embeddableProduct
err := mg.SimilarToText(client, ctx, &result, "description", "fruit like apple", 1)
require.NoError(t, err, "SimilarToText should not error")
require.Equal(t, "Apple Product", result.Name)
}
func TestUpdateSchemaRegistersVecPredicate(t *testing.T) {
provider := newMockProvider(4)
client, cleanup := createEmbeddingClient(t, provider)
defer cleanup()
ctx := context.Background()
// Trigger explicit schema update
err := client.UpdateSchema(ctx, &embeddableProduct{})
require.NoError(t, err)
// QueryRaw against schema introspection to verify the vector predicate was registered
raw, err := client.QueryRaw(ctx, `schema(pred: [description__vec]) { type }`, nil)
require.NoError(t, err)
rawStr := string(raw)
require.Contains(t, rawStr, "description__vec", "Schema should contain the shadow vector predicate")
require.Contains(t, rawStr, "float32vector", "Shadow predicate should be of type float32vector")
}
func TestCustomMetricEmbedding(t *testing.T) {
provider := newMockProvider(4)
client, cleanup := createEmbeddingClient(t, provider)
defer cleanup()
ctx := context.Background()
err := client.UpdateSchema(ctx, &embeddableCustomMetric{})
require.NoError(t, err)
// QueryRaw to verify the vector predicate schema
raw, err := client.QueryRaw(ctx, `schema(pred: [description__vec]) { type }`, nil)
require.NoError(t, err)
require.Contains(t, string(raw), "description__vec", "Shadow predicate should exist")
// Euclidean metric is embedded in the index definition; verify the predicate type at minimum
require.Contains(t, string(raw), "float32vector", "Shadow predicate should be float32vector type")
}
func TestNoProviderNoEmbedding(t *testing.T) {
// Client without embedding provider: Insert should still work normally for SimString fields
uri := "file://" + GetTempDir(t)
client, err := mg.NewClient(uri, mg.WithAutoSchema(true))
require.NoError(t, err)
defer func() {
_ = client.DropAll(context.Background())
client.Close()
mg.Shutdown()
}()
ctx := context.Background()
product := &embeddableProduct{
Name: "NoVec",
Description: "plain text no embedding",
}
err = client.Insert(ctx, product)
require.NoError(t, err, "Insert should succeed even without an EmbeddingProvider")
require.NotEmpty(t, product.UID)
}
func TestSimStringTermSearch(t *testing.T) {
provider := newMockProvider(4)
client, cleanup := createEmbeddingClient(t, provider)
defer cleanup()
ctx := context.Background()
products := []*embeddableProduct{
{Name: "Kettle", Description: "stainless steel electric kettle for boiling water"},
{Name: "Toaster", Description: "four slice toaster with browning control"},
{Name: "Blender", Description: "high speed blender for smoothies and soups"},
}
require.NoError(t, client.Insert(ctx, products))
// Term search on the description predicate of a SimString field.
// allofterms matches nodes where the predicate contains all listed terms.
var result embeddableProduct
q := client.Query(ctx, &result).Filter("allofterms(description, \"electric kettle\")")
err := q.Node()
require.NoError(t, err)
require.Equal(t, "Kettle", result.Name,
"Term search on SimString description should return the matching product")
// anyofterms: should match both Kettle and Blender
var results []embeddableProduct
q2 := client.Query(ctx, &results).Filter("anyofterms(description, \"kettle blender\")")
err = q2.Nodes()
require.NoError(t, err)
require.Len(t, results, 2, "anyofterms should match two products")
}
func TestThresholdEmbedding(t *testing.T) {
const dims = 4
provider := newMockProvider(dims)
provider.register("long enough text to embed", []float32{1.0, 0.0, 0.0, 0.0})
client, cleanup := createEmbeddingClient(t, provider)
defer cleanup()
ctx := context.Background()
// Insert with a description below the 20-rune threshold — provider should NOT be called.
short := &embeddableWithThreshold{Name: "Short", Description: "too short"}
require.NoError(t, client.Insert(ctx, short))
require.Empty(t, provider.callLog, "Provider should not be called for below-threshold text")
// Insert with a description above the threshold — provider SHOULD be called.
long := &embeddableWithThreshold{
Name: "Long",
Description: "long enough text to embed",
}
require.NoError(t, client.Insert(ctx, long))
require.Len(t, provider.callLog, 1, "Provider should be called for above-threshold text")
// Update the long item to a short description — shadow vector should be cleared.
// After clearing, a similarity query for the original text should not return it.
long.Description = "short"
require.NoError(t, client.Update(ctx, long))
// Provider call count should not increase (below threshold on update too).
require.Len(t, provider.callLog, 1, "Provider should not be called when updated text is below threshold")
// The shadow vec for the long item should now be absent — verify via raw schema query
// that the predicate exists but the node won't appear in similar_to results.
dgoClient, cleanupDgo, err := client.DgraphClient()
require.NoError(t, err)
defer cleanupDgo()
queryVec := []float32{1.0, 0.0, 0.0, 0.0}
tx := dg.NewReadOnlyTxn(dgoClient)
var result embeddableWithThreshold
err = mg.SimilarTo(tx, &result, "description", queryVec, 1).Scan()
// Either no results (empty UID) or the short item (which was never embedded) —
// the long item's cleared vector should not be the top match.
require.NoError(t, err)
require.NotEqual(t, long.UID, result.UID,
"Cleared shadow vector should not appear in similarity results")
}
// ── Ollama live integration ────────────────────────────────────────────
const (
ollamaBaseURL = "http://localhost:11434"
ollamaModel = "bge-m3:latest"
ollamaDims = 1024
)
// ollamaRunning probes Ollama's /api/tags endpoint with a short timeout.
// Returns true only when Ollama is reachable and responds 200.
func ollamaRunning() bool {
c := &http.Client{Timeout: 2 * time.Second}
resp, err := c.Get(ollamaBaseURL + "/api/tags")
if err != nil {
return false
}
resp.Body.Close()
return resp.StatusCode == http.StatusOK
}
// skipUnlessOllama calls t.Skip if Ollama is not reachable.
func skipUnlessOllama(t *testing.T) {
t.Helper()
if !ollamaRunning() {
t.Skipf("Ollama not reachable at %s — skipping live integration test", ollamaBaseURL)
}
}
// sportingGoodsProduct is the test struct used in the live embedding integration tests.
type sportingGoodsProduct struct {
Name string `json:"name,omitempty" dgraph:"index=term"`
Category string `json:"category,omitempty" dgraph:"index=term"`
Description mg.SimString `json:"description,omitempty" dgraph:"embedding,index=term"`
UID string `json:"uid,omitempty"`
DType []string `json:"dgraph.type,omitempty"`
}
func newOllamaProvider() *mg.OpenAICompatibleProvider {
return mg.NewOpenAICompatibleProvider(mg.OpenAICompatibleConfig{
BaseURL: ollamaBaseURL,
Model: ollamaModel,
Dims: ollamaDims,
})
}
// TestOllamaIntegration exercises insert, upsert, update, and SimilarToText
// against a real Ollama instance running bge-m3:latest.
func TestOllamaIntegration(t *testing.T) {
skipUnlessOllama(t)
provider := newOllamaProvider()
uri := "file://" + GetTempDir(t)
client, err := mg.NewClient(uri,
mg.WithAutoSchema(true),
mg.WithEmbeddingProvider(provider),
)
require.NoError(t, err)
defer func() {
_ = client.DropAll(context.Background())
client.Close()
mg.Shutdown()
}()
ctx := context.Background()
// 1. Insert a corpus of semantically varied products
products := []*sportingGoodsProduct{
{
Name: "Trail Runner X",
Category: "footwear",
Description: "Lightweight trail running shoe with aggressive grip for mountain terrain",
},
{
Name: "Road Racer Pro",
Category: "footwear",
Description: "Carbon-plated road running shoe for fast road races and marathons",
},
{
Name: "Summit Hardshell",
Category: "outerwear",
Description: "Waterproof hardshell jacket for alpine climbing and severe weather",
},
{
Name: "Base Layer Merino",
Category: "clothing",
Description: "Soft merino wool thermal base layer for cold weather activities",
},
{
Name: "Carbon Fibre Kayak",
Category: "watersports",
Description: "Ultra-light carbon fibre sea kayak for ocean touring and expedition paddling",
},
{
Name: "Rock Climbing Harness",
Category: "climbing",
Description: "Comfortable sit harness for sport climbing and indoor bouldering gym use",
},
{
Name: "Trail Mix Nutrition Bar",
Category: "nutrition",
Description: "High-energy snack bar for long hikes, runs, and endurance activities",
},
{
Name: "GPS Watch Ultra",
Category: "electronics",
Description: "Multi-sport GPS watch with heart rate monitoring and route navigation",
},
}
err = client.Insert(ctx, products)
require.NoError(t, err, "Insert corpus should succeed")
for _, p := range products {
require.NotEmpty(t, p.UID, "Each product should have a UID: %s", p.Name)
}
t.Logf("Inserted %d products", len(products))
// 2. Verify the shadow schema was registered
raw, err := client.QueryRaw(ctx, `schema(pred: [description__vec]) { type }`, nil)
require.NoError(t, err)
require.Contains(t, string(raw), "float32vector", "Shadow predicate should be registered")
// 3. SimilarToText: query for running shoes
var shoeResult sportingGoodsProduct
err = mg.SimilarToText(client, ctx, &shoeResult, "description", "running shoes for trails", 1)
require.NoError(t, err)
t.Logf("Running shoe query → %q (%s)", shoeResult.Name, shoeResult.Description)
require.NotEmpty(t, shoeResult.Name, "Should find a product")
require.Contains(t, strings.ToLower(shoeResult.Category), "footwear",
"Top result for 'running shoes for trails' should be footwear, got %q", shoeResult.Name)
// 4. SimilarToText: query for waterproof outerwear
var jacketResult sportingGoodsProduct
err = mg.SimilarToText(client, ctx, &jacketResult, "description", "waterproof jacket for bad weather", 1)
require.NoError(t, err)
t.Logf("Jacket query → %q (%s)", jacketResult.Name, jacketResult.Description)
require.NotEmpty(t, jacketResult.Name)
require.Equal(t, "Summit Hardshell", jacketResult.Name,
"Top result for waterproof jacket should be Summit Hardshell")
// ── 5. Update: change Trail Runner X description and re-query ─────────────
trailRunner := products[0]
trailRunner.Description = "Rugged trail running shoe with rock plate and waterproof membrane for muddy conditions"
err = client.Update(ctx, trailRunner)
require.NoError(t, err, "Update should succeed")
// Re-query with the updated semantics — still expects a trail running shoe
var updatedResult sportingGoodsProduct
err = mg.SimilarToText(client, ctx, &updatedResult, "description", "waterproof trail shoe for mud", 1)
require.NoError(t, err)
t.Logf("After update query → %q", updatedResult.Name)
require.NotEmpty(t, updatedResult.Name)
// 6. Upsert: update Road Racer Pro by predicate
roadRacer := products[1]
roadRacer.Description = "Featherlight carbon road shoe for sub-3-hour marathon performance"
err = client.Upsert(ctx, roadRacer, "name")
require.NoError(t, err, "Upsert should succeed")
// ── 7. SimilarToText: confirm marathon query still maps to road shoe ───────
var marathonResult sportingGoodsProduct
err = mg.SimilarToText(client, ctx, &marathonResult, "description", "shoe for running a marathon", 1)
require.NoError(t, err)
t.Logf("Marathon query → %q", marathonResult.Name)
require.NotEmpty(t, marathonResult.Name)
require.Equal(t, "footwear", marathonResult.Category,
"Marathon shoe query should return a footwear product")
}