diff --git a/.changeset/big-singers-buy.md b/.changeset/big-singers-buy.md new file mode 100644 index 00000000..96ec3e65 --- /dev/null +++ b/.changeset/big-singers-buy.md @@ -0,0 +1,10 @@ +--- +'@rushdb/javascript-sdk': minor +'@rushdb/mcp-server': minor +'rushdb-dashboard': minor +'rushdb-core': minor +'rushdb-website': minor +'rushdb-docs': minor +--- + +Decoupling billing from a platform diff --git a/.changeset/cold-pumpkins-move.md b/.changeset/cold-pumpkins-move.md new file mode 100644 index 00000000..32a17951 --- /dev/null +++ b/.changeset/cold-pumpkins-move.md @@ -0,0 +1,10 @@ +--- +'@rushdb/javascript-sdk': minor +'@rushdb/mcp-server': minor +'rushdb-dashboard': minor +'rushdb-core': minor +'rushdb-website': minor +'rushdb-docs': minor +--- + +Add native vector support and docs update diff --git a/.changeset/old-horses-explain.md b/.changeset/old-horses-explain.md new file mode 100644 index 00000000..7e5dd819 --- /dev/null +++ b/.changeset/old-horses-explain.md @@ -0,0 +1,10 @@ +--- +'@rushdb/javascript-sdk': minor +'@rushdb/mcp-server': minor +'rushdb-dashboard': minor +'rushdb-core': minor +'rushdb-website': minor +'rushdb-docs': minor +--- + +Refactor dashboard diff --git a/.changeset/plenty-flies-scream.md b/.changeset/plenty-flies-scream.md new file mode 100644 index 00000000..0827d4eb --- /dev/null +++ b/.changeset/plenty-flies-scream.md @@ -0,0 +1,10 @@ +--- +'@rushdb/javascript-sdk': minor +'@rushdb/mcp-server': minor +'rushdb-dashboard': minor +'rushdb-core': minor +'rushdb-website': minor +'rushdb-docs': minor +--- + +Add tutorials and BYOV feature diff --git a/.changeset/poor-starfishes-deliver.md b/.changeset/poor-starfishes-deliver.md new file mode 100644 index 00000000..9d697236 --- /dev/null +++ b/.changeset/poor-starfishes-deliver.md @@ -0,0 +1,10 @@ +--- +'@rushdb/javascript-sdk': minor +'@rushdb/mcp-server': minor +'rushdb-dashboard': minor +'rushdb-core': minor +'rushdb-website': minor +'rushdb-docs': minor +--- + +Improve separation between os and cloud versions diff --git a/.changeset/rich-mails-care.md b/.changeset/rich-mails-care.md new file mode 100644 index 00000000..5838c341 --- /dev/null +++ b/.changeset/rich-mails-care.md @@ -0,0 +1,10 @@ +--- +'@rushdb/javascript-sdk': minor +'@rushdb/mcp-server': minor +'rushdb-dashboard': minor +'rushdb-core': minor +'rushdb-website': minor +'rushdb-docs': minor +--- + +Add oauth and mcp improvements diff --git a/.changeset/sweet-candles-tease.md b/.changeset/sweet-candles-tease.md new file mode 100644 index 00000000..c2e269fb --- /dev/null +++ b/.changeset/sweet-candles-tease.md @@ -0,0 +1,10 @@ +--- +'@rushdb/javascript-sdk': minor +'@rushdb/mcp-server': minor +'rushdb-dashboard': minor +'rushdb-core': minor +'rushdb-website': minor +'rushdb-docs': minor +--- + +Update docs portal diff --git a/README.md b/README.md index 481c679d..6ba8383c 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,9 @@ # RushDB -### Developer‑first property‑centric graph store +### Turn Any Data into Intelligence -RushDB lets you push raw JSON/CSV, auto-normalize it into a labeled meta property graph, and query records, schema, relationships, values, vectors and aggregations through one JSON search interface. No upfront schema design, no new query language to learn. -RushDB transforms how you work with graph data — no schema required, no complex queries, just push your data and go. +Push any JSON — records, events, AI outputs, configs. RushDB structures it, connects it, and makes it queryable instantly. No upfront schema design, no new query language to learn. [![GitHub Stars](https://img.shields.io/github/stars/rush-db/rushdb?style=social)](https://github.com/rush-db/rushdb) [![Follow on X (Twitter)](https://img.shields.io/twitter/follow/rushdb?style=social)](https://x.com/RushDatabase) @@ -99,7 +98,7 @@ For deeper architectural exposition see the blog article on [LMPG](https://rushd ### 1. Get an API Key (Cloud) or Run Locally RushDB Cloud: create a project at https://app.rushdb.com → copy API key. -Self-host (requires Neo4j 5.25.1+ with APOC & GDS): +Self-host (requires Neo4j 2026.01.4+ with compatible APOC & GDS plugins): ```bash docker run -p 3000:3000 \ --name rushdb \ @@ -212,9 +211,9 @@ Benefits: ## 🛠️ Self-Hosting ### Requirements -- Neo4j 5.25.1+ -- APOC plugin -- Graph Data Science plugin (for vector similarity & advanced aggregates) +- Neo4j 2026.01.4+ +- APOC Core plugin (compatible with Neo4j 2026.01.4+) +- Graph Data Science plugin (compatible with Neo4j 2026.01.4+, for vector similarity & advanced aggregates) ### Minimal Docker Compose ```yaml @@ -266,7 +265,7 @@ services: - NEO4J_USERNAME=neo4j - NEO4J_PASSWORD=password neo4j: - image: neo4j:5.25.1 + image: neo4j:2026.01.4 healthcheck: test: [ "CMD-SHELL", "wget --no-verbose --tries=1 --spider localhost:7474 || exit 1" ] interval: 5s @@ -278,7 +277,11 @@ services: environment: - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes - NEO4J_AUTH=neo4j/password - - NEO4J_PLUGINS=["apoc", "graph-data-science"] + # Optional: auto-download plugins from the internet + # - NEO4J_PLUGINS=["apoc", "graph-data-science"] + volumes: + # Optional: mount local plugin jars (required for Neo4j 2026.x where auto-download may not work) + - ./neo4j-plugins:/var/lib/neo4j/plugins ``` diff --git a/docs/docs/concepts/agent-memory-model.md b/docs/docs/concepts/agent-memory-model.md new file mode 100644 index 00000000..bfeb29a3 --- /dev/null +++ b/docs/docs/concepts/agent-memory-model.md @@ -0,0 +1,98 @@ +--- +sidebar_position: 2 +--- + +# Agent Memory Model + +RushDB is designed from the ground up as a **structured memory store for AI agents**. Unlike flat vector databases or document stores, RushDB gives agents three distinct memory layers — each addressing a different dimension of knowledge — and a retrieval stack that composes them at query time. + +## The Three Memory Layers + +| Layer | What it stores | RushDB primitive | +|---|---|---| +| **Episodic** | Individual facts, events, entities, and their connections | Records + Relationships | +| **Semantic** | Meaning encoded as dense vectors (embeddings) | Vector Properties + AI Indexes | +| **Structural** | Schema: what labels exist, what properties they carry, how they connect | Ontology API | + +### Episodic Memory — Records and Relationships + +Every discrete piece of knowledge is a **Record**: a typed key-value object carrying a label, properties, and a system-generated ID. Records connect to one another via **Relationships**, forming a traversable knowledge graph. An agent can store anything from a conversation turn to a product entity as a record, then retrieve it by property values, label, or graph traversal. + +→ See [Records](./records.md) and [Relationships](./relationships.md) + +### Semantic Memory — Vector Properties + +A subset of record properties carry dense vector representations (embeddings). Because all properties in RushDB are first-class graph nodes shared across every record with the same `(name, type)`, a vector-indexed property is simultaneously a semantic index over every record it connects to. Embeddings can be supplied by the application (Bring Your Own Vector) or generated automatically by RushDB. + +→ See [Properties — Vector Properties and Semantic Indexing](./properties.md#vector-properties-and-semantic-indexing) + +### Structural Memory — Ontology + +The **Ontology API** returns a live snapshot of the graph's schema: all labels, all properties per label with types and value ranges, and the full relationship map. An agent calls this once per session to bootstrap awareness of what is in the database — no hardcoded schema, no external documentation required. + +→ See [Ontology & Schema Discovery](./ontology-schema-discovery.md) + +--- + +## The Retrieval Stack + +A well-designed agent retrieval pipeline uses all three layers in sequence: + +```mermaid +flowchart TD + A([Agent]) --> B["1. Ontology discovery\nPOST /ai/ontology/md"] + B --> C["2. Faceted filter\nSearchQuery where clause"] + C --> D["3. Semantic re-rank\nvector.similarity on filtered candidates"] + D --> E["4. Structured results\nrecords with scores"] + E --> F([Agent response]) + + style A fill:#f5f5f5,stroke:#9e9e9e + style B fill:#e3f2fd,stroke:#1976d2 + style C fill:#e8f5e9,stroke:#388e3c + style D fill:#fff8e1,stroke:#f57c00 + style E fill:#f3e5f5,stroke:#7b1fa2 + style F fill:#f5f5f5,stroke:#9e9e9e +``` + +**Step 1 — Discover the schema.** Before constructing any query, the agent calls the ontology endpoint to learn what labels, properties, and relationships exist. This prevents hallucinated field names and enables dynamic query construction. + +**Step 2 — Filter structurally.** The `where` clause narrows the candidate set by exact or range conditions on scalar properties. This is fast (index-backed) and deterministic. It is the right tool when the agent knows what it is looking for. + +**Step 3 — Re-rank semantically.** After structural filtering, a `vector.similarity` aggregation scores each candidate against the agent's query embedding. This surfaces the most *relevant* records from the structurally valid candidate set. + +**Step 4 — Return to agent.** The sorted, scored result set is returned. The agent reasons over structured records — not raw text chunks — because RushDB preserves full property context alongside the similarity score. + +--- + +## Self-Awareness Without External Documentation + +A central design goal of RushDB is that agents should be able to operate against an unknown or evolving knowledge graph **without any out-of-band schema documentation**. Two mechanisms make this possible: + +1. **`__proptypes`** — every Record carries a `__proptypes` field listing the name and type of each property it holds. This makes every record self-describing. + +2. **The Ontology API** — aggregates all `__proptypes` metadata across the project and returns it as a schema snapshot. An agent that calls `/ai/ontology/md` at the start of a session receives the full graph schema in a single, token-efficient Markdown string. + +Together, these enable a zero-configuration agentic loop: + +``` +Boot → call ontology → understand what exists in the graph + → construct SearchQuery from real labels/properties + → retrieve relevant records + → act on structured, typed results +``` + +--- + +## Composing the Retrieval Approaches + +The three retrieval approaches are not mutually exclusive — they compose: + +| Approach | When to use | Mechanism | +|---|---|---| +| **Structural only** | Known labels and property values | `where` filter | +| **Semantic with pre-filter** | Meaning-based lookup, optionally scoped by structural conditions | `db.ai.search()` with `where` | +| **Structural + semantic in one query** | Full SearchQuery features (groupBy, collect, multi-hop) alongside similarity scoring | `where` + `vector.similarity` aggregation | + +Both `db.ai.search()` and `db.records.find()` support pre-filtering before scoring — choose based on whether you need managed text embedding (`db.ai.search()`) or the full SearchQuery feature set (`db.records.find()`). + +→ See [Properties — Composing Faceted and Semantic Search](./properties.md#composing-faceted-and-semantic-search) for code examples. diff --git a/docs/docs/concepts/billing-model.md b/docs/docs/concepts/billing-model.md new file mode 100644 index 00000000..b6546517 --- /dev/null +++ b/docs/docs/concepts/billing-model.md @@ -0,0 +1,66 @@ +--- +sidebar_position: 8 +--- + +# Billing Model + +## Overview + +RushDB uses a **Knowledge Units (KU)** billing model. You pay for the structured knowledge RushDB creates and maintains from your data — not for infrastructure, nodes, edges, storage, or compute. + +``` +Total KU consumed in billing period × price per KU = your bill +``` + +This model aligns cost with value: you pay more only when RushDB is doing more work to organize and serve your data. + +## Monthly Billing Cycle + +KU consumption resets at the start of each billing period. Your plan includes a base KU allowance. Consumption above the included allowance is billed as overage (Pro and above). + +``` +Bill = included KU (flat rate) + overage KU × overage rate +``` + +## Plan Comparison + +| | Free | Pro | Scale | Enterprise | +|---|---|---|---|---| +| Included KU / month | 100,000 | 10,000,000 | Usage-based | Custom | +| Overage | Not available | Per KU | Per KU | Negotiated | +| Projects | 2 | Unlimited | Unlimited | Unlimited | +| Self-hosted support | ✓ | ✓ | ✓ | ✓ | +| SLA | — | — | ✓ | ✓ | +| BYOC (own Neo4j / Aura) | ✓ | ✓ | ✓ | ✓ | + +## One Metric + +KU is the only number that matters for billing. RAM, CPU, storage, node counts, edge counts, and query time are infrastructure concerns — RushDB absorbs them. Your bill reflects the knowledge work done, nothing else. + +## Soft and Hard Limits + +- **Soft limit**: RushDB sends an alert when you reach 80% of your monthly KU allowance. +- **Hard limit**: Write operations are blocked when your allowance is exhausted. Read operations continue unaffected. +- You can configure alerts and caps from the **KU Usage** page in your dashboard. + +## Self-Hosted Billing + +Self-hosted deployments (`RUSHDB_SELF_HOSTED=true`) have no KU limits and no billing. The OSS engine runs entirely without quota enforcement. + +For teams that need the full platform (dashboard, projects, API key management, team members) without usage caps, contact us about an **Enterprise platform license**. + +## Changes to Limits Mid-Period + +- Upgrading your plan: new KU limit takes effect immediately. +- Downgrading your plan: new KU limit takes effect at the start of the next billing period. +- Cancelling: your current allowance remains active until the end of the paid period. + +## KU Usage Dashboard + +Your KU consumption is visible in real-time on the **KU Usage** page in your workspace dashboard: + +- Total KU consumed this month +- Remaining KU +- Estimated end-of-month cost +- Daily consumption trend chart +- Ingestion and query spike timeline diff --git a/docs/docs/concepts/knowledge-units.md b/docs/docs/concepts/knowledge-units.md new file mode 100644 index 00000000..a32a99b7 --- /dev/null +++ b/docs/docs/concepts/knowledge-units.md @@ -0,0 +1,109 @@ +--- +sidebar_position: 7 +--- + +# Knowledge Units (KU) + +## What is a Knowledge Unit? + +A **Knowledge Unit (KU)** is the fundamental economic unit of RushDB. It represents a unit of structured knowledge created and maintained by RushDB from your raw data — during ingestion, modeling, or reasoning. + +``` +Raw data → RushDB → Structured Knowledge → KU +``` + +The more data you push and the richer its structure, the more knowledge RushDB creates, and the more KU are consumed. + +## Developer Mental Model + +You never need to think about nodes, edges, CPU, RAM, or storage. You simply think: + +``` +More data → more knowledge → more KU +``` + +RushDB handles all the underlying storage and graph mechanics automatically. You pay only for the knowledge created and maintained on your behalf. + +## What Generates KU? + +KU is generated by operations that create or maintain structured knowledge in RushDB: + +| Activity | Example | +|---|---| +| Ingesting records | `db.records.create()`, `records.importJson()` | +| Building relationships | `db.records.attach()` | +| Storing embeddings (number arrays) | Passing `number[]` arrays during ingestion | +| **Compute-intensive operations** | Vector similarity search, raw Cypher execution, multi-hop traversals | +| Maintaining stored knowledge | Ongoing footprint of stored records | + +**Standard reads and queries (find, filter, paginate) never consume KU.** Compute-intensive operations do because their cost scales with dataset size rather than the data written — running a semantic search across 10M records costs significantly more server compute than fetching a record by ID. + +> **Note:** The internal weights assigned to each operation type are not exposed. You interact only with your total KU consumption. + +## KU and Plan Limits + +Each plan includes a monthly KU allowance: + +| Plan | Included KU / month | +|---|---| +| Free | 100,000 KU | +| Pro | 10,000,000 KU | +| Scale | Usage-based | +| Enterprise | Custom / platform license | + +When your workspace reaches its KU limit, write operations will be blocked until the next billing period or until you upgrade your plan. + +## How Nested JSON is Billed + +When you ingest nested JSON, RushDB **automatically decomposes it** into separate linked records: + +```json +// Input: one nested object +{ "user": { "profile": { "name": "Alice", "age": 30 } } } + +// RushDB creates: +// → Record: user (0 own properties) +// → Record: profile (2 properties: name, age) → ~2 KU +// → Relationship: user → profile → ~0.5 KU +// Total: ~2.5 KU +``` + +This means nesting **does not multiply properties** — it creates additional records and relationships, each billed independently. + +| Component | Cost | +|---|---| +| Each record created | weight per record | +| Each property stored on a record | weight per property | +| Each relationship formed between records | fractional weight (< 1×) | + +> **Note:** Exact per-operation weights are not publicly exposed. You interact only with your total KU consumption. + +## KU Calculator + +You can estimate your monthly KU consumption with this approximation: + +``` +estimated monthly KU + ≈ records_per_day × 30 × (avg_properties + avg_child_records × relationship_weight) +``` + +Where: +- `records_per_day` — average number of top-level records ingested per day +- `avg_properties` — average number of scalar properties per record +- `avg_child_records` — average number of nested objects per record (each becomes a separate linked child record) +- `relationship_weight` — the fractional KU cost per relationship (less than 1×) + +For a quick estimate, nesting depth maps approximately to a combined multiplier of `1.0` (flat) → `3.0` (deeply nested) on the base property count. This is an approximation — actual KU depends on the internal weighting of each operation type. + +## Storage Footprint + +RushDB charges a small daily **storage footprint** KU for every record currently stored in your project. This covers the ongoing infrastructure cost of keeping your data queryable. + +- Storage KU is calculated once per day, prorated against your monthly plan allowance. +- The charge is proportional to your current record count — fewer records, lower footprint. +- **Deleting records reduces your ongoing footprint** from the next daily cycle onward. +- KU consumed *at creation time* is never reversed by a later deletion. + +## Self-Hosted + +When running RushDB in self-hosted mode (`RUSHDB_SELF_HOSTED=true`), KU tracking is disabled and no limits apply. Self-hosted deployments are fully unlimited. diff --git a/docs/docs/concepts/ontology-schema-discovery.md b/docs/docs/concepts/ontology-schema-discovery.md new file mode 100644 index 00000000..c2b1ddac --- /dev/null +++ b/docs/docs/concepts/ontology-schema-discovery.md @@ -0,0 +1,128 @@ +--- +sidebar_position: 7 +--- + +# Ontology & Schema Discovery + +The **Ontology API** returns a live, computed snapshot of what exists in your RushDB project: every label, every property per label (with its type and value distribution), and the full relationship map between labels. Agents use it to bootstrap schema awareness at the start of a session — no hardcoded schema, no external documentation required. + +## Why This Matters for Agents + +Traditional databases assume that the application knows the schema ahead of time. This works when the schema is static and human-authored. AI agents face a different reality: + +- The knowledge graph may have been populated by other agents, batch imports, or live event streams. +- The schema drifts over time as new label types and properties appear. +- Agents cannot be pre-programmed with field names they have never seen. + +RushDB's answer is **schema-on-read for agents**: call `/ai/ontology/md` at the start of each session and receive the full, current schema as a single response. The agent can then construct valid `SearchQuery` objects referencing only labels and properties that actually exist. + +--- + +## What the Ontology Contains + +| Component | Description | +|---|---| +| **Label inventory** | All label names currently in the project, with record counts | +| **Property manifest per label** | Property name, type, and either sample values (strings/booleans) or a min–max range (numbers/datetimes) | +| **Relationship map** | Which labels connect to which, via which relationship type, and in which direction | + +This is sufficient for an agent to: + +1. Know which labels exist (and how many records each has) +2. Know which fields are queryable on each label and what values they carry +3. Know which graph traversals are valid (which labels are reachable from which) +4. Construct faceted filter ranges without any extra round-trip queries + +--- + +## Two Formats + +### Markdown — for LLM context injection + +`POST /api/v1/ai/ontology/md` + +Returns the schema as compact Markdown tables. This format is optimised for direct injection into an LLM system prompt or tool result — token-efficient, human-readable, and immediately usable by a language model. + +```text +# Graph Ontology + +## Labels + +| Label | Count | +|-----------|------:| +| `Article` | 4821 | +| `Author` | 312 | +| `Tag` | 87 | + +--- + +## `Article` (4821 records) + +### Properties + +| Property | Type | Values / Range | +|---------------|----------|----------------------------------------------| +| `title` | string | `"Graph databases…"`, `"Intro to…"` (+4819) | +| `published` | boolean | `true`, `false` | +| `score` | number | `0.0`..`9.8` | +| `publishedAt` | datetime | `2020-01-01`..`2026-03-29` | + +### Relationships + +| Type | Direction | Other Label | +|---------------|-----------|-------------| +| `WRITTEN_BY` | out | `Author` | +| `TAGGED_WITH` | out | `Tag` | +``` + +### JSON — for programmatic tool calls + +`POST /api/v1/ai/ontology` + +Returns the same data as a structured JSON array. Use this format when an agent needs to programmatically extract specific labels or property names before constructing a query. + +--- + +## The Self-Awareness Loop + +```mermaid +sequenceDiagram + participant Agent + participant RushDB + + Agent->>RushDB: POST /ai/ontology/md + RushDB-->>Agent: Markdown schema (labels, properties, relationships, values) + Note over Agent: Agent knows what exists in the graph + + Agent->>RushDB: records.find({ labels: ['Article'], where: { published: true } }) + RushDB-->>Agent: Matching records + + Note over Agent: Agent constructs response from real, structured data +``` + +The loop is stateless from RushDB's side. The agent calls the ontology endpoint whenever it needs a fresh view of the schema, then proceeds to query. There is no session to open or schema to register. + +--- + +## Dynamic Facet Discovery + +Because the ontology includes value distributions for each property, agents can construct UI filters or reasoning steps entirely from the ontology response — no separate "what values exist?" queries needed. + +**Example:** An agent building a product-search interface calls `/ai/ontology/md`, sees that the `Product` label has a `status` string property with sample values `["available", "discontinued", "pre-order"]`, and renders those as filter chips — without ever querying records for distinct values. + +**Example:** An agent reasoning over a CRM dataset calls `/ai/ontology`, sees that `Deal.amount` ranges from `500` to `250000`, and uses that range to construct a meaningful `$gte` / `$lte` filter in the next query. + +--- + +## Caching + +The ontology is computed once and cached by RushDB. Cache entries are invalidated when labels, properties, or relationships change in the project. Calling `/ai/ontology/md` at the start of every agent session incurs negligible overhead — a single network round trip returning a compact text payload. + +--- + +## Further Reading + +- [Labels](./labels.md) — how labels work as the primary organisational axis +- [Properties](./properties.md) — property types and the `__proptypes` self-description mechanism +- [Agent Memory Model](./agent-memory-model.md) — where ontology fits in the full retrieval stack +- [AI & Semantic Search](../../rest-api/ai.md) — complete REST API reference for ontology endpoints diff --git a/docs/docs/concepts/pricing.md b/docs/docs/concepts/pricing.md new file mode 100644 index 00000000..8b74182a --- /dev/null +++ b/docs/docs/concepts/pricing.md @@ -0,0 +1,95 @@ +--- +sidebar_position: 9 +--- + +# Pricing + +## Simple, Predictable Pricing + +RushDB pricing is based on [Knowledge Units (KU)](./knowledge-units.md) — a single unit that represents the structured knowledge created and maintained from your data. No infrastructure tiers, no node counts, no storage pricing. + +``` +You pay for knowledge created. Nothing else. +``` + +## Plans + +### Free + +- **100,000 KU / month** included +- Up to 2 projects +- Self-hosted support +- Bring Your Own Cloud (BYOC) — connect to your own Neo4j or Aura instance +- Community support +- No credit card required + +Perfect for prototypes, side projects, and getting started. + +### Pro — $29/month + +- **10,000,000 KU / month** included +- Overage at **$3 per additional million KU** — no hard stop, apps keep running +- Unlimited projects +- Priority support +- Team members (up to 3, then $10/member) +- Bring Your Own Cloud (BYOC) — connect to your own Neo4j or Aura instance + +Ideal for production applications and growing teams. Predictable base cost, pay-as-you-go beyond the included allowance. + +### Scale — from $99/month + +- **Usage-based** — $99 platform fee + **$2 per million KU** consumed +- No included KU bundle — cheaper per-KU rate than Pro at volume +- SLA guarantee +- Advanced support +- Unlimited team members +- Bring Your Own Cloud (BYOC) — connect to your own Neo4j or Aura instance + +For high-volume or highly variable workloads where you want the lowest per-KU rate without worrying about tiers. The $2/M KU rate on Scale is 33% cheaper than Pro's overage rate. + +### Enterprise + +- **Platform license** — flat fee, unlimited KU +- Bring Your Own Cloud (BYOC) +- Embedded / OEM use +- Dedicated support and SLA +- Custom contract + +For organisations embedding RushDB into their products or needing full data sovereignty. + +## Estimating Your KU Usage + +Use this formula to estimate your monthly KU consumption: + +``` +estimated KU ≈ records_per_day × 30 × avg_fields_per_record × nesting_factor +``` + +**Example:** +- 1,000 records/day +- 10 fields per record on average +- Flat structure (nesting factor ≈ 1.0) + +``` +1,000 × 30 × 10 × 1.0 = 300,000 KU/month → Pro plan +``` + +The interactive KU Calculator on the [pricing page](https://rushdb.com/pricing) can help you get a more precise estimate. + +## Self-Hosted + +Running RushDB on your own infrastructure? Self-hosted mode is **free and unlimited** — no KU limits, no billing. See the [self-hosting guide](../get-started/quick-tutorial) to get started. + +## FAQ + +**Can I exceed my plan's KU limit?** +On the Free plan, writes are blocked when the limit is reached — reads always continue. On Pro, overage is billed at $3 per million KU beyond the 10M included. On Scale there is no hard limit — you pay $2 per million KU consumed on top of the $99/month base. + +**Does deleting data reduce my KU usage?** +KU from creation operations is never reversed. However, once data is deleted, its ongoing stored footprint stops contributing to KU from that point forward. + +**Do read operations consume KU?** +Standard read and search operations do not consume KU. Heavy analytical operations (multi-hop traversals, vector similarity search at scale) may consume a small amount of KU. + +**Is there a free trial for paid plans?** +Yes — start on the Free plan with no credit card. Upgrade at any time and your remaining free KU carries over for the rest of the billing period. diff --git a/docs/docs/concepts/properties.md b/docs/docs/concepts/properties.md index 6ad9f59b..a6c8edba 100644 --- a/docs/docs/concepts/properties.md +++ b/docs/docs/concepts/properties.md @@ -186,22 +186,11 @@ Represents the absence of a value. } ``` -### `vector` -Arrays of floating-point numbers or integers, particularly useful for vector similarity searches and machine learning operations. - -```js -{ - name: "imageEmbedding", - type: "vector", - value: [0.99070, 0.78912, 1.0, 0.0] -} -``` - ### Arrays RushDB also supports arrays as property values, but they must contain consistent value types: -> **Note:** Every data type mentioned above (except `vector`, since it's already an array by default) supports an array representation. +> **Note:** Every data type mentioned above supports an array representation. ```js // String array diff --git a/docs/docs/concepts/records.md b/docs/docs/concepts/records.md index 0a9de354..e79f757d 100644 --- a/docs/docs/concepts/records.md +++ b/docs/docs/concepts/records.md @@ -7,7 +7,7 @@ In RushDB, Records are fundamental data structures that store meaningful key-val ## How it works -Records in RushDB can be thought of as nodes in a graph database or rows in a traditional database. While the underlying implementation utilizes complex graph structures, from a user perspective, a Record is simply a key-value object containing properties. +Records in RushDB are the fundamental units of structured knowledge. While the underlying engine uses a rich graph model to represent and connect data, from a developer perspective a Record is simply a typed key-value object containing properties — like a row in a database or a document in a document store, but with seamless relationship traversal built in. Each record in RushDB consists of: @@ -85,7 +85,6 @@ RushDB supports a wide range of data types to accommodate diverse data needs: | `datetime` | ISO 8601 format, including timezones | `"2012-12-21T18:29:37Z"` | | `boolean` | True or false values | `true`, `false` | | `null` | Explicit null value | `null` | -| `vector` | Arrays of floating-point numbers or integers | `[0.99070, 0.78912, 1, 0]` | ### Arrays diff --git a/docs/docs/concepts/search/aggregations.md b/docs/docs/concepts/search/aggregations.md index b21f9c45..88411aa3 100644 --- a/docs/docs/concepts/search/aggregations.md +++ b/docs/docs/concepts/search/aggregations.md @@ -42,13 +42,9 @@ The following aggregation functions are supported: - `sum` - Calculate sum of a numeric field - `collect` - Gather field values or entire records into an array - `timeBucket` - Bucket a datetime field into calendar intervals (day/week/month/quarter/year/hour/minute/second or custom N-sized months/hours/minutes/seconds/years) -- `gds.similarity.*` - Calculate vector similarity using various algorithms: - - `cosine` - Cosine similarity [-1,1] - - `euclidean` - Euclidean distance normalized to (0,1] - - `euclideanDistance` - Raw euclidean distance [0,∞) - - `jaccard` - Jaccard similarity [0,1] - - `overlap` - Overlap coefficient [0,1] - - `pearson` - Pearson correlation [-1,1] +- `vector.similarity.*` - Calculate vector similarity using native Neo4j functions: + - `vector.similarity.cosine` - Cosine similarity [0,1] + - `vector.similarity.euclidean` - Euclidean distance normalized to (0,1] ## Grouping Results (groupBy) @@ -924,21 +920,17 @@ graph LR A[DOCUMENT] --has--> B[CHUNK] ``` -### gds.similarity.* +### vector.similarity.* **Parameters:** -- `fn`: 'gds.similarity.[algorithm]' - The similarity algorithm to use - - `gds.similarity.cosine` - Cosine similarity [-1,1] - - `gds.similarity.euclidean` - Euclidean distance normalized to (0,1] - - `gds.similarity.euclideanDistance` - Raw euclidean distance [0,∞) - - `gds.similarity.jaccard` - Jaccard similarity [0,1] - - `gds.similarity.overlap` - Overlap coefficient [0,1] - - `gds.similarity.pearson` - Pearson correlation [-1,1] -- `field`: string - The vector field to compare +- `fn`: 'vector.similarity.cosine' | 'vector.similarity.euclidean' - The similarity function to use + - `vector.similarity.cosine` - Cosine similarity [0,1] + - `vector.similarity.euclidean` - Euclidean distance normalized to (0,1] +- `field`: string - The numeric array field to compare - `alias`: string - The record alias to use - `query`: number[] - The query vector to calculate similarity against -Example showing vector search with where clause and similarity aggregation: +Example showing similarity re-ranking with aggregation: ```typescript { @@ -947,7 +939,7 @@ Example showing vector search with where clause and similarity aggregation: aggregate: { // Calculate similarity score using root level alias similarity: { - fn: 'gds.similarity.cosine', + fn: 'vector.similarity.cosine', field: 'embedding', query: [1, 2, 3, 4, 5], alias: '$record' diff --git a/docs/docs/concepts/search/introduction.md b/docs/docs/concepts/search/introduction.md index 12a36169..bfea2846 100644 --- a/docs/docs/concepts/search/introduction.md +++ b/docs/docs/concepts/search/introduction.md @@ -16,7 +16,7 @@ RushDB's Search API offers a comprehensive set of features: - **Aggregation**: Perform calculations and transform data using [aggregation functions](./aggregations.md) - **Pagination and Sorting**: Control result volume and order with [pagination and sorting options](./pagination-order.md) - **Label-Based Filtering**: Target specific types of records using [label filtering](./labels.md) -- **Vector Search**: Find records based on vector similarity for AI and machine learning applications +- **Semantic Search**: Find records by meaning using AI embedding indexes — see [AI Search](../../rest-api/ai) ## SearchQuery Structure diff --git a/docs/docs/concepts/search/where.md b/docs/docs/concepts/search/where.md index 5ba514a8..1e57fad9 100644 --- a/docs/docs/concepts/search/where.md +++ b/docs/docs/concepts/search/where.md @@ -323,90 +323,6 @@ You can also use array operators with datetime values: } ``` -### Vector Operators - -RushDB supports vector similarity searches through the `$vector` operator, which is useful for semantic searches, embeddings comparison, and machine learning applications. - -```typescript -{ - where: { - embedding: { - $vector: { - fn: "gds.similarity.cosine", // Similarity function - query: [1, 2, 3, 4, 5], // Query vector - threshold: 0.75 // Minimum similarity threshold - } - } - } -} -``` - -Available similarity functions: -- `cosine`: Cosine similarity [-1,1] -- `euclidean`: Euclidean distance normalized to (0,1] -- `euclideanDistance`: Raw euclidean distance [0,∞) -- `jaccard`: Jaccard similarity [0,1] -- `overlap`: Overlap coefficient [0,1] -- `pearson`: Pearson correlation [-1,1] - -The `threshold` parameter can be: -- A simple number (with different default behaviors): - - For `euclidean` and `euclideanDistance` functions, a simple threshold is treated as `$lte` (less than or equal to) - - For all other functions (`cosine`, `jaccard`, `overlap`, `pearson`), a simple threshold is treated as `$gte` (greater than or equal to) -- An object with comparison operators for more precise filtering: - -```typescript -{ - where: { - embedding: { - $vector: { - fn: "gds.similarity.cosine", - query: [1, 2, 3, 4, 5], - threshold: { - $gte: 0.5, // Similarity >= 0.5 - $lte: 0.8, // Similarity <= 0.8 - $ne: 0.75 // Similarity != 0.75 - } - } - } - } -} -``` - -#### Default Threshold Behavior - -When providing a simple number as threshold, the comparison differs by function type: - -```typescript -// For cosine similarity, higher values mean more similar -// So threshold: 0.75 means "find vectors with similarity >= 0.75" -{ - where: { - embedding: { - $vector: { - fn: "gds.similarity.cosine", - query: [1, 2, 3, 4, 5], - threshold: 0.75 // Interpreted as $gte: 0.75 - } - } - } -} - -// For euclidean distance, lower values mean more similar -// So threshold: 0.5 means "find vectors with distance <= 0.5" -{ - where: { - embedding: { - $vector: { - fn: "gds.similarity.euclidean", - query: [1, 2, 3, 4, 5], - threshold: 0.5 // Interpreted as $lte: 0.5 - } - } - } -} -``` - ## Field Existence Operator ### $exists @@ -483,7 +399,6 @@ Available types: - `"boolean"`: True/false values - `"datetime"`: Date and time values - `"null"`: Null values -- `"vector"`: Vector/array values for similarity search **Examples:** @@ -502,13 +417,6 @@ Available types: } } -// Find records with vector embeddings -{ - where: { - embedding: { $type: "vector" } - } -} - // Combine with other operators to find string fields that contain specific text { where: { @@ -968,7 +876,7 @@ This query finds active users who have provided an email address but no phone nu
-Vector search with relationship filtering +Multi-hop relationship filtering with text matching ```typescript { @@ -976,23 +884,14 @@ This query finds active users who have provided an email address but no phone nu DOCUMENT: { title: { $contains: "Neural Networks" }, CHUNK: { - content: { $contains: "embedding" }, - embedding: { - $vector: { - fn: "gds.similarity.cosine", - query: [0.1, 0.2, 0.3, 0.4, 0.5], - threshold: { - $gte: 0.75 - } - } - } + content: { $contains: "embedding" } } } } } ``` -This query finds records related to documents about neural networks, specifically chunks that mention "embedding" and have a high vector similarity to the provided embedding. +This query finds records related to documents about neural networks that have chunks mentioning "embedding". For semantic (embedding-based) search, use the dedicated [AI search endpoint](/rest-api/ai#semantic-search).
diff --git a/docs/docs/concepts/storage.md b/docs/docs/concepts/storage.md index ff66e890..feecbae7 100644 --- a/docs/docs/concepts/storage.md +++ b/docs/docs/concepts/storage.md @@ -3,7 +3,10 @@ sidebar_position: 3 --- # Storage -RushDB leverages [Neo4j](https://neo4j.com/docs/get-started/get-started-with-neo4j/) (version 5.25.1 or higher) as its underlying storage engine, enhanced with the [APOC](https://neo4j.com/labs/apoc/) (Awesome Procedures On Cypher) and [GDS](https://neo4j.com/docs/graph-data-science/current/) (Graph Data Science) plugins to perform efficient vector similarity searches and advanced graph operations. +RushDB uses a **dual storage architecture**: + +- **[Neo4j](https://neo4j.com/docs/get-started/get-started-with-neo4j/) (version 2026.01.4 or higher)** — stores all user-defined records and properties. The [APOC](https://neo4j.com/labs/apoc/) (Awesome Procedures On Cypher) plugin is required for JSON serialization, map/collection utilities, and property management. Vector similarity search uses Neo4j's **native vector index** (`db.index.vector.*` and `vector.similarity.cosine()`), available built-in from Neo4j 5.x / 2026.x — no additional plugin required. +- **SQL database (SQLite by default, PostgreSQL recommended for production)** — stores all dashboard entities: users, workspaces, projects, and API tokens. This layer is managed with [Drizzle ORM](https://orm.drizzle.team/) and runs migrations automatically on startup. ## Graph Database vs. Traditional Databases @@ -22,13 +25,24 @@ In Neo4j, relationships are physical connections in the database, not just forei ## Neo4j Foundation -Neo4j provides RushDB with a robust graph database foundation, allowing for: +Neo4j is responsible for all record and property data in RushDB, providing: - High-performance graph traversals - ACID-compliant transactions - Property graph model flexibility - Scalable data storage and retrieval -The integration with [APOC](https://neo4j.com/labs/apoc/4.4/overview/) and [GDS](https://neo4j.com/docs/graph-data-science/current/introduction/) plugins extends Neo4j's native capabilities with vector-based operations critical for machine learning workflows and similarity search functions. +The [APOC](https://neo4j.com/labs/apoc/) plugin extends Neo4j with JSON conversion (`apoc.convert.*`), map utilities (`apoc.map.*`), and collection helpers (`apoc.coll.*`) that RushDB relies on for property storage and updates. Vector similarity search is handled entirely by **Neo4j's native vector index** — RushDB creates a `VECTOR INDEX` on embedding relationships and ranks filtered candidates with `vector.similarity.cosine()`. No GDS plugin is required. RushDB supports Neo4j 2026.01.4 and newer. + +## SQL Foundation + +Dashboard entities — users, workspaces, projects, and API tokens — are stored in a SQL database managed by [Drizzle ORM](https://orm.drizzle.team/). This separation keeps operational metadata out of the graph and allows standard relational tooling (migrations, studio, backups) to be used for account management. + +| Environment | Database | Configuration | +|-------------|----------|---------------| +| Local development | SQLite (`rushdb.db`) | `SQL_DB_TYPE=sqlite` | +| Production | PostgreSQL | `SQL_DB_TYPE=postgres`, `SQL_DB_URL=postgresql://...` | + +Schema migrations are applied automatically on startup. ## Data Overhead @@ -108,7 +122,7 @@ Properties are not shared amongst projects (database instances), ensuring comple ## Data Types -RushDB supports a wide range of data types to accommodate diverse data needs and provide a flexible environment for your applications. Below is a comprehensive find of the supported data types along with their descriptions: +RushDB supports a wide range of data types to accommodate diverse data needs and provide a flexible environment for your applications. Below is a comprehensive list of the supported data types along with their descriptions: ### `string` This data type is used for any textual information and can hold text of unlimited length. @@ -126,11 +140,6 @@ This data type can only have two possible values: `true` or `false`. ### `null` This data type has only one possible value: `null`. -### `vector` -This data type accommodates arrays of both floating-point numbers and integers. It handles values like -`[0.99070,0.78912, 1, 0]`. This is particularly useful for vector similarity searches and machine learning operations. - - --- ### Arrays @@ -138,9 +147,9 @@ In essence, RushDB supports all the data types that JSON does. However, when it hold them as **Property** values, but it's important to note that it can only store consistent values within those arrays. To learn more, check out the [Properties](../concepts/properties) section. -> **Note:** Every data type mentioned above (except `vector`, since it's already an array by default) supports an array representation. +> **Note:** Every data type mentioned above supports an array representation. -Here some valid examples: +Here are some valid examples: - `["apple", "banana", "carrot"]` - good - `[null, null, null, null, null]` - weird, but works fine 🤔 - `[4, 8, 15, 16, 23, 42]` - works as well @@ -261,21 +270,18 @@ Each of these records is also connected to Property nodes which define the metad ## Database Indexes and Constraints -When RushDB initializes a new database connection, it automatically creates several indexes and constraints to ensure data integrity and optimize query performance: +When RushDB connects to Neo4j, it automatically creates several indexes and constraints on graph nodes to ensure data integrity and optimize query performance. + +> **Note:** Users, workspaces, projects, and API tokens are stored in the SQL layer and are not represented as Neo4j nodes. Their uniqueness and integrity are enforced by SQL constraints and Drizzle ORM migrations. ### Core Constraints -The following uniqueness constraints are created to enforce data consistency: +The following uniqueness constraints are created on graph nodes: | Constraint Name | Node Label | Property | Description | |-----------------|------------|----------|-------------| -| `constraint_user_login` | `__RUSHDB__LABEL__USER__` | `login` | Ensures each user has a unique login | -| `constraint_user_id` | `__RUSHDB__LABEL__USER__` | `id` | Ensures each user has a unique ID | -| `constraint_token_id` | `__RUSHDB__LABEL__TOKEN__` | `id` | Ensures each token has a unique ID | -| `constraint_project_id` | `__RUSHDB__LABEL__PROJECT__` | `id` | Ensures each project has a unique ID | -| `constraint_workspace_id` | `__RUSHDB__LABEL__WORKSPACE__` | `id` | Ensures each workspace has a unique ID | | `constraint_record_id` | `__RUSHDB__LABEL__RECORD__` | `__RUSHDB__KEY__ID__` | Ensures each record has a unique ID | -| `constraint_property_id` | `__RUSHDB__LABEL__PROPERTY__` | `id` | Ensures each property has a unique ID | +| `constraint_property_id` | `__RUSHDB__LABEL__PROPERTY__` | `id` | Ensures each property node has a unique ID | ### Performance Indexes @@ -290,10 +296,9 @@ The following indexes are created to optimize query performance: These indexes and constraints are essential for RushDB's performance and data integrity, particularly when dealing with large datasets and complex queries across the property graph model. They ensure that: -1. Record IDs are always unique within the database +1. Record IDs are always unique within the graph database 2. Project isolation is maintained in multi-tenant environments -3. Property lookups are efficient, especially during joins and traversals -4. User management operations perform optimally +3. Property lookups are efficient, especially during graph traversals and joins Learn more at [REST API - Import Data](../rest-api/records/import-data) or through the language-specific SDKs: - [TypeScript SDK](../typescript-sdk/records/import-data) @@ -304,7 +309,7 @@ Learn more at [REST API - Import Data](../rest-api/records/import-data) or throu This approach is carefully designed to: - Enable efficient indexing and querying - Support advanced graph traversals and pattern matching -- Facilitate vector similarity searches with minimal computational cost +- Facilitate semantic search and similarity re-ranking with minimal computational cost By structuring data this way, RushDB achieves a balance between storage overhead and query performance, optimizing for use cases that require both traditional database operations and advanced graph analytics capabilities. diff --git a/docs/docs/concepts/transactions.mdx b/docs/docs/concepts/transactions.mdx index 6abf069a..24bc6f2d 100644 --- a/docs/docs/concepts/transactions.mdx +++ b/docs/docs/concepts/transactions.mdx @@ -1,7 +1,7 @@ --- sidebar_position: 7 --- -import Tabs from '@theme/Tabs'; +import Tabs from '@site/src/components/LanguageTabs'; import TabItem from '@theme/TabItem'; # Transactions @@ -187,7 +187,7 @@ When multiple users or services access the same data simultaneously, transaction await db.records.update( { target: record, - label: record.label(), + label: record.label, data: { status: "processing" } }, tx1 diff --git a/docs/docs/get-started/get-api-key.mdx b/docs/docs/get-started/get-api-key.mdx index 45c6a9ef..765ff383 100644 --- a/docs/docs/get-started/get-api-key.mdx +++ b/docs/docs/get-started/get-api-key.mdx @@ -10,14 +10,13 @@ To use RushDB, you'll need an API token for authentication. Here's how to get on ## 1. Create an Account -Visit [RushDB Dashboard](https://dashboard.rushdb.com) and sign up for an account if you haven't already. +Visit [RushDB Dashboard](https://app.rushdb.com) and sign up for an account if you haven't already. ## 2. Create a Project 1. After signing in, click on "New Project" 2. Enter a name for your project -3. Choose your preferred region -4. Click "Create" +3. Click "Create" ## 3. Generate API key @@ -37,30 +36,33 @@ Your API token will be displayed only once. Make sure to: Example of using environment variables: -import Tabs from '@theme/Tabs'; +import Tabs from '@site/src/components/LanguageTabs'; import TabItem from '@theme/TabItem'; - -```typescript -// Load from environment variable -const db = new RushDB(process.env.RUSHDB_API_KEY); -``` - - + ```python import os db = RushDB(api_key=os.environ['RUSHDB_API_KEY']) ``` - + +```typescript +// RushDB reads RUSHDB_API_KEY from the environment automatically +const db = new RushDB(); + +// Or pass it explicitly: +// const db = new RushDB(process.env.RUSHDB_API_KEY); +``` + + ```bash # Set in your shell -export RUSHDB_API_KEY='RUSHDB_API_KEY' +export RUSHDB_API_KEY='your-api-key' # Use in requests -curl -H "Authorization: Bearer $RUSHDB_API_TOKEN" \ +curl -H "Authorization: Bearer $RUSHDB_API_KEY" \ https://api.rushdb.com/api/v1/records ``` diff --git a/docs/docs/get-started/quick-tutorial.mdx b/docs/docs/get-started/quick-tutorial.mdx index ec8795de..25db20cc 100644 --- a/docs/docs/get-started/quick-tutorial.mdx +++ b/docs/docs/get-started/quick-tutorial.mdx @@ -6,348 +6,511 @@ sidebar_position: 1 # Quick Tutorial -This tutorial will help you get started with RushDB by walking through a simple example of creating and querying a small social network using the RushDB SDK. +In this tutorial you'll build a small **knowledge base** — push a batch of articles, search by meaning, filter by field value, link articles to their authors, and wrap multi-step writes in a transaction. -## Prerequisites - -- Create a RushDB account and get an API token (see [Get API Key](../get-started/get-api-key)) -- Your preferred programming language: Python, TypeScript/JavaScript, or any HTTP client for REST API +Every concept introduced here applies equally to agent memory, product catalogs, document stores, or any other data shape you throw at RushDB. -## Step 1: Initialize RushDB +## Prerequisites -Choose your preferred SDK: +- A RushDB account and API token — see [Get API Key](../get-started/get-api-key) +- TypeScript/JavaScript, Python, or any HTTP client -import Tabs from '@theme/Tabs'; +import Tabs from '@site/src/components/LanguageTabs'; import TabItem from '@theme/TabItem'; +## Step 1: Initialize + - + +```python +from rushdb import RushDB + +db = RushDB("RUSHDB_API_KEY") +``` + + ```typescript import RushDB from '@rushdb/javascript-sdk'; -// Initialize with your API token const db = new RushDB('RUSHDB_API_KEY'); - -// Or with additional configuration options -// const db = new RushDB('RUSHDB_API_KEY', { -// url: 'https://api.rushdb.com/api/v1', -// timeout: 5000 -// }); ``` - -```python -from rushdb import RushDB + +```bash +export RUSHDB_API_KEY="your-api-key" +``` + + -# Connect with your API token -db = RushDB("RUSHDB_API_KEY") +## Step 2: Push [Records](../concepts/records.md) + +Use `importJson` to push a batch of articles in one call. RushDB infers field types automatically and returns record instances you can use immediately. + + + +```python +articles = db.records.create_many( + label="ARTICLE", + data=[ + { + "title": "Getting started with graph databases", + "content": "Graph databases model data as nodes and edges, making relationship queries fast and intuitive.", + "tags": ["databases", "graphs"], + "author": "alice" + }, + { + "title": "Vector search explained", + "content": "Vector embeddings let you search by semantic meaning rather than exact keywords.", + "tags": ["ai", "search"], + "author": "bob" + }, + { + "title": "Building AI agents with persistent memory", + "content": "Agents that remember past interactions can reason across sessions and personalize responses.", + "tags": ["ai", "agents"], + "author": "alice" + } + ], + options={ "returnResult": True } +) +``` + + +```typescript +const { data: articles } = await db.records.importJson({ + label: 'ARTICLE', + data: [ + { + title: 'Getting started with graph databases', + content: 'Graph databases model data as nodes and edges, making relationship queries fast and intuitive.', + tags: ['databases', 'graphs'], + author: 'alice' + }, + { + title: 'Vector search explained', + content: 'Vector embeddings let you search by semantic meaning rather than exact keywords.', + tags: ['ai', 'search'], + author: 'bob' + }, + { + title: 'Building AI agents with persistent memory', + content: 'Agents that remember past interactions can reason across sessions and personalize responses.', + tags: ['ai', 'agents'], + author: 'alice' + } + ], + options: { returnResult: true } +}) ``` - + ```bash -# Set your API token for future requests -export TOKEN="RUSHDB_API_KEY" +curl -X POST "https://api.rushdb.com/api/v1/records/import/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "label": "ARTICLE", + "data": [ + { + "title": "Getting started with graph databases", + "content": "Graph databases model data as nodes and edges, making relationship queries fast and intuitive.", + "tags": ["databases", "graphs"], + "author": "alice" + }, + { + "title": "Vector search explained", + "content": "Vector embeddings let you search by semantic meaning rather than exact keywords.", + "tags": ["ai", "search"], + "author": "bob" + }, + { + "title": "Building AI agents with persistent memory", + "content": "Agents that remember past interactions can reason across sessions and personalize responses.", + "tags": ["ai", "agents"], + "author": "alice" + } + ] + }' ``` -## Step 2: Create [Records](../concepts/records.md) with Labels +:::tip Same pattern for agent memory +Swap `label: 'ARTICLE'` for `label: 'MEMORY'` and push conversation snippets, tool results, or any structured context. The rest of the tutorial — semantic search, filters, relationships — applies unchanged. +::: + +## Step 3: Semantic Search -Let's create two users in our social network: +Create an embedding index on the `content` field, wait for backfill to complete, then search by meaning. - -```typescript -// Create users with the Person [label](../concepts/labels.md) -const alice = await db.records.create({ - label: "PERSON", - data: { - name: 'Alice', - age: 28, - interests: ['coding', 'hiking'] - } -}); - -const bob = await db.records.create({ - label: "PERSON", - data: { - name: 'Bob', - age: 32, - interests: ['photography', 'travel'] - } -}); + +```python +import time + +# Create the embedding index +index = db.ai.indexes.create({ + "label": "ARTICLE", + "propertyName": "content" +}).data + +# Poll until all records are indexed +stats = db.ai.indexes.stats(index["id"]).data +while stats["indexedRecords"] < stats["totalRecords"]: + time.sleep(0.5) + stats = db.ai.indexes.stats(index["id"]).data + +# Search by meaning +results = db.ai.search({ + "propertyName": "content", + "query": "how do agents remember things across conversations", + "labels": ["ARTICLE"], + "limit": 3 +}).data + +for result in results: + print(f"[{result['__score']:.3f}] {result['title']}") +# [0.891] Building AI agents with persistent memory +# [0.743] Vector search explained +# [0.612] Getting started with graph databases ``` - -```python -# Create users with the Person [label](../concepts/labels.md) -alice = db.records.create( - label="PERSON", - data={ - "name": "Alice", - "age": 28, - "interests": ["coding", "hiking"] - } -) + +```typescript +// Create the embedding index — backfill starts immediately +const { data: index } = await db.ai.indexes.create({ + label: 'ARTICLE', + propertyName: 'content' +}) -bob = db.records.create( - label="PERSON", - data={ - "name": "Bob", - "age": 32, - "interests": ["photography", "travel"] - } -) +// Poll until all records are indexed +let stats = await db.ai.indexes.stats(index.id) +while (stats.data.indexedRecords < stats.data.totalRecords) { + await new Promise(r => setTimeout(r, 500)) + stats = await db.ai.indexes.stats(index.id) +} + +// Search by meaning — returns results sorted by cosine similarity +const { data: results } = await db.ai.search({ + propertyName: 'content', + query: 'how do agents remember things across conversations', + labels: ['ARTICLE'], + limit: 3 +}) + +for (const result of results) { + console.log(`[${result.__score.toFixed(3)}] ${result.title}`) +} +// [0.891] Building AI agents with persistent memory +// [0.743] Vector search explained +// [0.612] Getting started with graph databases ``` - + ```bash -# Create Alice with PERSON label -curl -X POST "https://api.rushdb.com/api/v1/records" \ +# Create the embedding index +INDEX_ID=$(curl -s -X POST "https://api.rushdb.com/api/v1/ai/indexes" \ -H "Authorization: Bearer $RUSHDB_API_KEY" \ -H "Content-Type: application/json" \ - -d '{ - "label": "PERSON", - "data": { - "name": "Alice", - "age": 28, - "interests": ["coding", "hiking"] - } - }' + -d '{"label": "ARTICLE", "propertyName": "content"}' \ + | jq -r '.data.id') -# Save the ID from the response for Alice -export ALICE_ID="response_id_here" +# Check stats — wait until indexedRecords == totalRecords +curl "https://api.rushdb.com/api/v1/ai/indexes/$INDEX_ID/stats" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" -# Create Bob with PERSON label -curl -X POST "https://api.rushdb.com/api/v1/records" \ +# Search by meaning +curl -X POST "https://api.rushdb.com/api/v1/ai/search" \ -H "Authorization: Bearer $RUSHDB_API_KEY" \ -H "Content-Type: application/json" \ -d '{ - "label": "PERSON", - "data": { - "name": "Bob", - "age": 32, - "interests": ["photography", "travel"] - } + "propertyName": "content", + "query": "how do agents remember things across conversations", + "labels": ["ARTICLE"], + "limit": 3 }' - -# Save the ID from the response for Bob -export BOB_ID="response_id_here" ``` -## Step 3: Create [Relationships](../concepts/relationships.md) +Each result includes a `__score` field (0–1) — cosine similarity between the query embedding and the record's content embedding. Higher is more relevant. -Let's make Alice and Bob friends: +## Step 4: Structured Query + +Use `records.find` to filter by exact field values. This is independent of the embedding index. - -```typescript -// Create a FRIENDS_WITH relationship between Alice and Bob -await alice.attach({ - target: bob, - options: { - type: "FRIENDS_WITH" - } -}); -``` - - + ```python -# Create a FRIENDS_WITH relationship between Alice and Bob -alice.attach( - target=bob, - options={ - "type": "FRIENDS_WITH" +ai_articles = db.records.find({ + "labels": ["ARTICLE"], + "where": { + "tags": { "$in": ["ai"] } } -) +}) + +print([a.data["title"] for a in ai_articles]) +# ['Vector search explained', 'Building AI agents with persistent memory'] +``` + + +```typescript +const aiArticles = await db.records.find({ + labels: ['ARTICLE'], + where: { + tags: { $in: ['ai'] } + } +}) + +console.log(aiArticles.map(a => a.data.title)) +// ['Vector search explained', 'Building AI agents with persistent memory'] ``` - + ```bash -# Create a FRIENDS_WITH relationship between Alice and Bob -curl -X POST "https://api.rushdb.com/api/v1/relationships/$ALICE_ID" \ +curl -X POST "https://api.rushdb.com/api/v1/records/search" \ -H "Authorization: Bearer $RUSHDB_API_KEY" \ -H "Content-Type: application/json" \ -d '{ - "targetIds": ["'$BOB_ID'"], - "type": "FRIENDS_WITH" + "labels": ["ARTICLE"], + "where": { + "tags": { "$in": ["ai"] } + } }' ``` -## Step 4: Query Records with [Search](../concepts/search/introduction.md) +See the [Where clause reference](../concepts/search/where.md) for the full list of operators (`$gt`, `$lt`, `$contains`, `$not`, and more). -Let's find all people who are interested in outdoor activities: +## Step 5: Semantic Search with Filter - - -```typescript -// Find all people who are interested in outdoor activities using [where](../concepts/search/where.md) conditions -const outdoorsy = await db.records.find({ - labels: ['PERSON'], - where: { - interests: { $in: ['hiking', 'travel'] } - } -}); +Combine a `where` filter with `db.ai.search` to scope semantic search to a subset of records. RushDB narrows candidates by field values first, then ranks them by cosine similarity. -console.log('Found:', outdoorsy.map(person => person.data.name)); -``` - - + + ```python -# Find all people who are interested in outdoor activities using [where](../concepts/search/where.md) conditions -outdoorsy = db.records.find({ +# Only search within AI-tagged articles +filtered = db.ai.search({ + "propertyName": "content", + "query": "memory and learning", + "labels": ["ARTICLE"], "where": { - "interests": {"$in": ["hiking", "travel"]} + "tags": { "$in": ["ai"] } }, - "labels": ["PERSON"] + "limit": 5 +}).data + +for result in filtered: + print(f"[{result['__score']:.3f}] {result['title']}") +``` + + +```typescript +// Only search within AI-tagged articles +const { data: filtered } = await db.ai.search({ + propertyName: 'content', + query: 'memory and learning', + labels: ['ARTICLE'], + where: { + tags: { $in: ['ai'] } + }, + limit: 5 }) -print('Found:', [person.data["name"] for person in outdoorsy]) +for (const result of filtered) { + console.log(`[${result.__score.toFixed(3)}] ${result.title}`) +} ``` - + ```bash -# Find all people who are interested in outdoor activities -curl -X POST "https://api.rushdb.com/api/v1/records/search" \ +curl -X POST "https://api.rushdb.com/api/v1/ai/search" \ -H "Authorization: Bearer $RUSHDB_API_KEY" \ -H "Content-Type: application/json" \ -d '{ - "labels": ["PERSON"], + "propertyName": "content", + "query": "memory and learning", + "labels": ["ARTICLE"], "where": { - "interests": { - "$in": ["hiking", "travel"] - } - } + "tags": { "$in": ["ai"] } + }, + "limit": 5 }' ``` -## Step 5: Using [Transactions](../concepts/transactions.mdx) (Optional) +:::note +Without a `where` clause, RushDB still performs exact semantic ranking over the label-scoped candidates. Adding `where` reduces the candidate set first, which can improve latency on larger datasets. +::: + +## Step 6: [Relationships](../concepts/relationships.md) -Transactions ensure data consistency by making a series of operations atomic: +Link each article to an author record. Relationships are first-class in RushDB — they have a type, a direction, and can carry their own properties. - -```typescript -// Begin a transaction -const transaction = await db.transactions.begin(); + +```python +# Create an author record +author = db.records.create( + label="AUTHOR", + data={"name": "Alice", "email": "alice@example.com"} +) -try { - // Create a post - const post = await db.records.create({ - label: "POST", - data: { - title: "My Hiking Adventure", - content: "Today I went hiking in the mountains...", - createdAt: new Date().toISOString() - }, - transaction - }); +# Attach alice's articles to the author +for article in [a for a in articles if a.data.get("author") == "alice"]: + article.attach( + target=author, + options={"type": "WRITTEN_BY", "direction": "out"} + ) +``` + + +```typescript +// Create an author record +const author = await db.records.create({ + label: 'AUTHOR', + data: { name: 'Alice', email: 'alice@example.com' } +}) - // Create a relationship between Alice and the post - await alice.attach( - post, - { - type: 'CREATED' - }, - transaction - ) - - // Commit the transaction - await transaction.commit(); - console.log("Post created and linked to Alice successfully!"); -} catch (error) { - // Roll back the transaction if anything fails - await transaction.rollback(); - console.error("Error occurred, transaction rolled back:", error); +// Attach alice's articles to the author +for (const article of articles.filter(a => a.data.author === 'alice')) { + await article.attach({ + target: author, + options: { type: 'WRITTEN_BY' } + }) } ``` - + +```bash +# Create an author record — save the returned __id +AUTHOR_ID=$(curl -s -X POST "https://api.rushdb.com/api/v1/records" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "label": "AUTHOR", + "data": {"name": "Alice", "email": "alice@example.com"} + }' | jq -r '.data.__id') + +# Attach an article to the author (replace ARTICLE_ID with the actual ID) +curl -X POST "https://api.rushdb.com/api/v1/relationships/$ARTICLE_ID" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "targetIds": ["'"$AUTHOR_ID"'"], + "type": "WRITTEN_BY" + }' +``` + + + +## Step 7: [Transactions](../concepts/transactions.mdx) (Optional) + +Wrap multiple writes in a transaction to guarantee all-or-nothing atomicity. + + + ```python -# Using a transaction with context manager -with db.transactions.begin() as transaction: - # Create a post - post = db.records.create( - label="POST", +with db.transactions.begin() as tx: + new_article = db.records.create( + label="ARTICLE", data={ - "title": "My Hiking Adventure", - "content": "Today I went hiking in the mountains...", - "createdAt": "2024-05-17T10:30:00.000Z" + "title": "Transactions made simple", + "content": "ACID guarantees ensure your data stays consistent even when things fail.", + "tags": ["databases"], + "author": "bob" }, - transaction=transaction + transaction=tx ) - # Create a relationship between Alice and the post - alice.attach( - target=post, - options={ - "type": "CREATED" - }, - transaction=transaction + new_article.attach( + target=author, + options={"type": "WRITTEN_BY", "direction": "out"}, + transaction=tx ) + # Commits automatically on exit; rolls back on exception +``` + + +```typescript +const tx = await db.transactions.begin() - # The transaction will automatically commit if no errors occur - # or roll back if an exception is raised +try { + const newArticle = await db.records.create({ + label: 'ARTICLE', + data: { + title: 'Transactions made simple', + content: 'ACID guarantees ensure your data stays consistent even when things fail.', + tags: ['databases'], + author: 'bob' + }, + transaction: tx + }) + + await newArticle.attach({ + target: author, + options: { type: 'WRITTEN_BY' }, + transaction: tx + }) + + await tx.commit() + console.log('Article created and linked — committed.') +} catch (err) { + await tx.rollback() + console.error('Rolled back:', err) +} ``` - + ```bash -# Start a transaction -TRANSACTION_ID=$(curl -X POST "https://api.rushdb.com/api/v1/transactions" \ +# Begin transaction +TX_ID=$(curl -s -X POST "https://api.rushdb.com/api/v1/transactions" \ -H "Authorization: Bearer $RUSHDB_API_KEY" \ -H "Content-Type: application/json" \ | jq -r '.id') -# Create a post with the transaction ID -POST_ID=$(curl -X POST "https://api.rushdb.com/api/v1/records" \ +# Create article within transaction +NEW_ARTICLE_ID=$(curl -s -X POST "https://api.rushdb.com/api/v1/records" \ -H "Authorization: Bearer $RUSHDB_API_KEY" \ -H "Content-Type: application/json" \ - -H "X-Transaction-ID: $TRANSACTION_ID" \ + -H "X-Transaction-ID: $TX_ID" \ -d '{ - "label": "POST", + "label": "ARTICLE", "data": { - "title": "My Hiking Adventure", - "content": "Today I went hiking in the mountains...", - "createdAt": "'$(date -u +"%Y-%m-%dT%H:%M:%S.000Z")'" + "title": "Transactions made simple", + "content": "ACID guarantees ensure your data stays consistent even when things fail.", + "tags": ["databases"], + "author": "bob" } - }' \ - | jq -r '.id') + }' | jq -r '.data.__id') -# Create relationship within the transaction -curl -X POST "https://api.rushdb.com/api/v1/relationships/$ALICE_ID" \ +# Attach to author within same transaction +curl -X POST "https://api.rushdb.com/api/v1/relationships/$NEW_ARTICLE_ID" \ -H "Authorization: Bearer $RUSHDB_API_KEY" \ -H "Content-Type: application/json" \ - -H "X-Transaction-ID: $TRANSACTION_ID" \ + -H "X-Transaction-ID: $TX_ID" \ -d '{ - "targetIds": ["'$POST_ID'"], - "type": "CREATED" + "targetIds": ["'"$AUTHOR_ID"'"], + "type": "WRITTEN_BY" }' -# Commit the transaction -curl -X POST "https://api.rushdb.com/api/v1/transactions/$TRANSACTION_ID/commit" \ +# Commit +curl -X POST "https://api.rushdb.com/api/v1/transactions/$TX_ID/commit" \ -H "Authorization: Bearer $RUSHDB_API_KEY" ``` + ## Next Steps -- Learn about basic concepts: - - [Records](../concepts/records.md) - - [Labels](../concepts/labels.md) - - [Relationships](../concepts/relationships.md) - - [Properties](../concepts/properties.md) - - [Search & Querying](../concepts/search/introduction.md) - - [Transactions](../concepts/transactions.mdx) -- Explore the SDK documentation in more detail: - - [TypeScript/JavaScript SDK](../typescript-sdk/introduction) + - [Records](../concepts/records.md) — data model and field types + - [Labels](../concepts/labels.md) — organizing records by category + - [Relationships](../concepts/relationships.md) — connecting records into a graph + - [Search & Querying](../concepts/search/introduction.md) — where clauses, ordering, pagination + - [Transactions](../concepts/transactions.mdx) — atomicity and consistency + - [TypeScript SDK](../typescript-sdk/introduction) - [Python SDK](../python-sdk/introduction) - [REST API](../rest-api/introduction) -- Try working with [transactions](../typescript-sdk/transactions) for ensuring data consistency -- Check out tutorials on specific use cases in the [Tutorials](../tutorials/reusable-search-query) section diff --git a/docs/docs/index.mdx b/docs/docs/index.mdx index c21a78e8..e6a03d82 100644 --- a/docs/docs/index.mdx +++ b/docs/docs/index.mdx @@ -2,135 +2,12 @@ id: index title: Introduction sidebar_position: 0 +hide_title: true +hide_table_of_contents: true +hide_breadcrumbs: true +pagination_next: null +pagination_prev: null --- -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; +import DocsHomePage from '@site/src/components/DocsHomePage'; -# Welcome to RushDB - -
-![RushDB Logo](https://raw.githubusercontent.com/rush-db/rushdb/main/rushdb-logo.svg) - -[Homepage](https://rushdb.com) — [Blog](https://rushdb.com/blog) — [Dashboard](https://app.rushdb.com) -
- -## Instant Graph Database for AI & Modern Apps - -**RushDB** is an open-source, graph-powered zero-config database designed to radically simplify data operations. Push any JSON or CSV data, and RushDB intelligently maps relationships, types, and labels without requiring you to understand the underlying graph model. - -### Why RushDB? - -- **Zero Configuration**: Start developing in minutes without complex database setup -- **Graph-Powered**: Built on Neo4j's robust foundation with advanced graph capabilities -- **Developer Experience First**: Intuitive APIs designed to keep you focused on building, not fighting your database -- **AI & Vector Ready**: Native support for embeddings, vector search, and knowledge graphs -- **Flexible Deployment**: Connect to your Neo4j instance (Aura or self-hosted) or use RushDB Cloud - -## Core Capabilities - -- **Intuitive Data Handling**: Push any JSON structure and RushDB intelligently organizes your data -- **Powerful Search**: Filter with precision using an expressive query system without learning a query language -- **Graph Traversal**: Navigate through connected data effortlessly to unlock hidden relationships -- **ACID Transactions**: Ensure data integrity with fully-compliant transaction support -- **Vector Similarity**: Build AI-powered applications with native vector search capabilities - -## Get Started Quickly - -RushDB offers multiple ways to interact with your data: - -- [TypeScript/JavaScript SDK](../typescript-sdk/introduction): Ideal for web and Node.js applications -- [Python SDK](../python-sdk/introduction): Perfect for data science and backend systems -- [REST API](../rest-api/introduction): Language-agnostic access for any platform - -For a deeper understanding of how RushDB works, explore our [Core Concepts](../concepts/storage) or dive into our [Getting Started Guide](/get-started/quick-tutorial). - -## Connect Your Way - -RushDB gives you options: - -- **RushDB Cloud**: 2 projects free forever with no maintenance required -- **Self-Hosted**: Connect to your own Neo4j instance in minutes - -## 🚀 Need Interactive Help? - -Get instant guidance through our **[RushDB Docs Chat](https://chatgpt.com/g/g-67d3a9c3088081918201be103b22b83f-rushdb-docs-chat)** - a custom GPT that provides code examples, best practices, and real-time support based on our official documentation. - ---- - -## Quick Example - - - - ```typescript - import RushDB from '@rushdb/javascript-sdk'; - - // Connect to RushDB - const db = new RushDB("RUSHDB_API_KEY"); - - // Push data with any structure you need - await db.records.importJson({ - label: "PRODUCT", - data: { - title: "Ergonomic Chair", - price: 299.99, - inStock: true, - features: ["adjustable height", "lumbar support", "neck rest"], - manufacturer: { - name: "ErgoDesigns", - location: "Zurich" - } - } - }); - - // Query with precision - no query language to learn - const results = await db.records.find({ - labels: ["PRODUCT"], - where: { - price: { $lt: 500 }, - features: { $in: ["lumbar support"] }, - manufacturer: { - location: "Zurich" - } - } - }); - ``` - - - ```python - from rushdb import RushDB - - # Connect to RushDB - db = RushDB("RUSHDB_API_KEY") - - # Push data with any structure you need - db.records.create_many( - label="PRODUCT", - data={ - "title": "Ergonomic Chair", - "price": 299.99, - "inStock": True, - "features": ["adjustable height", "lumbar support", "neck rest"], - "manufacturer": { - "name": "ErgoDesigns", - "location": "Zurich" - } - } - ) - - # Query with precision - no query language to learn - results = db.records.find({ - "where": { - "price": {"$lt": 500}, - "features": {"$in": ["lumbar support"]}, - "manufacturer": { - "location": "Zurich" - } - } - }) - ``` - - - - - -Explore [Tutorials](/tutorials/reusable-search-query) to see more examples and use cases. + diff --git a/docs/docs/mcp-server/configuration.mdx b/docs/docs/mcp-server/configuration.mdx index 3d1c60dc..ce349de9 100644 --- a/docs/docs/mcp-server/configuration.mdx +++ b/docs/docs/mcp-server/configuration.mdx @@ -5,25 +5,27 @@ sidebar_label: Configuration sidebar_position: 4 --- -The MCP server reads configuration from environment variables (via `.env` or your MCP client config). +# Configuration -## Environment variables +| Variable | Required | Default | Description | +|---|---|---|---| +| `RUSHDB_API_KEY` | **yes** | — | Your RushDB API token (from the dashboard) | +| `RUSHDB_API_URL` | no | `https://api.rushdb.com/api/v1` | Override for self-hosted or staging | -- `RUSHDB_API_KEY` (required): Your API key from the RushDB dashboard. -- `RUSHDB_API_URL` (optional): Base URL for the RushDB API. Defaults to `https://api.rushdb.com/api/v1`. +:::tip Security +Treat `RUSHDB_API_KEY` like a password — do not commit it to version control. Prefer MCP client `env` injection or OS keychains over `.env` files in repositories. +::: -You can provide these in several ways: +## Providing values -### 1) In MCP client configuration - -Put them under the `env` key for the server entry, for example in Claude Desktop: +### 1) MCP client configuration ```json { "mcpServers": { "rushdb": { "command": "npx", - "args": ["@rushdb/mcp-server"], + "args": ["-y", "@rushdb/mcp-server"], "env": { "RUSHDB_API_KEY": "your-rushdb-api-key-here", "RUSHDB_API_URL": "https://api.rushdb.com/api/v1" @@ -33,28 +35,16 @@ Put them under the `env` key for the server entry, for example in Claude Desktop } ``` -### 2) Using a `.env` file - -Create a `.env` file in `packages/mcp-server` (or your working directory) with: +### 2) `.env` file ``` RUSHDB_API_KEY=your-rushdb-api-key-here -# Optional (self-hosted/staging): +# Optional: RUSHDB_API_URL=https://your-hosted/api/v1 ``` -The MCP server attempts fallback resolution for `.env` when launched from different CWDs, so it can find your env vars reliably. - -### 3) Export in your shell +### 3) Shell export ```bash export RUSHDB_API_KEY="your-rushdb-api-key-here" -# Optional -export RUSHDB_API_URL="https://your-hosted/api/v1" ``` - -## Security tips - -- Treat your API key like a password; do not commit it to version control. -- Prefer client-level `env` injection or OS keychains when possible. -- Rotate keys periodically and revoke keys you no longer need. diff --git a/docs/docs/mcp-server/examples.mdx b/docs/docs/mcp-server/examples.mdx index 55c5507f..2d27cfa3 100644 --- a/docs/docs/mcp-server/examples.mdx +++ b/docs/docs/mcp-server/examples.mdx @@ -5,28 +5,77 @@ sidebar_label: Examples sidebar_position: 5 --- -Here are some concrete prompts you can use with your MCP client to interact with RushDB. +# Examples -## Listing labels +## Discovery -> Ask the RushDB MCP server to run `FindLabels` with `limit=20`. +Understand the data shape before querying: -## Creating and querying records +> "What labels and fields exist in my database? Use getOntologyMarkdown." -> Use `CreateRecord` to add a `Task` with data `{"title": "Write docs", "status": "open"}`. +The server calls `getOntologyMarkdown` once and returns a compact schema — label names, field types, value ranges, and the relationship map. Use exact label names from this response — they're case-sensitive. -> Now use `FindRecords` with `where={"status": "open"}` and `orderBy={"createdAt": "desc"}` limit 5. +--- + +## Simple query + +> "Find the top 5 movies by rating." + +Calls `findRecords` with `orderBy: {rating: "desc"}, limit: 5`. + +--- + +## Aggregation + +> "How many records exist per label? Show me a breakdown." -## Setting records +> "What's the average rating of MOVIE records, grouped by genre?" -> Use `SetRecord` for `recordId=""` with label `Task` and data `{"title": "Polish docs", "status": "in-progress"}`. +Calls `findRecords` with `aggregate + groupBy`. **Never include `limit` when `aggregate` is present** — it restricts the record scan and produces incorrect totals. -## Attaching relationships +> "Call getSearchQuerySpec, then run findRecords with labels=\["Order"\], aggregate=\{"total":\{"fn":"sum","field":"amount"\}\}, and groupBy=\["status"\]." + +--- + +## Relationships + +> "Find all actors who appeared in films directed by Christopher Nolan." + +Calls `findRecords` with nested `where` traversal: ACTOR → MOVIE → DIRECTOR. + +--- -> Call `AttachRelation` from `sourceId=""` to `targetIds=["", ""]` with `relationType="references"`. +## Bulk import -## Transactions +> "I'll paste a JSON array of employee records — import them as EMPLOYEE records, upsert on email." + +Calls `bulkCreateRecords` with `mergeBy: ["email"], mergeStrategy: "append"`. + +--- + +## Semantic search + +> "Find articles most similar to 'distributed database systems'." + +Calls `semanticSearch` with the query embedded against the indexed `content` property. Requires a ready embedding index (see workflow below). + +--- + +## Embedding index workflow + +Semantic search requires a ready index. This spans 3 tool calls and is non-obvious: + +> "Create an embedding index on the `content` property of ARTICLE records, then wait until it's ready, then find articles similar to 'machine learning'." + +1. `createEmbeddingIndex` — registers the policy (backfill runs async, not instant) +2. `getEmbeddingIndexStats` — poll until `indexedRecords === totalRecords` +3. `semanticSearch` — now runs against the live index + +--- -> Start a transaction, create two `Task` records, attach a relation, and commit. +## Tips -If something goes wrong, you can ask your client to call `TransactionRollback` with the `transactionId` returned from `TransactionBegin`. +- Always start a new session with: "Load the schema with getOntologyMarkdown" +- Use exact label names from the schema — they're case-sensitive +- For aggregations, tell the AI "don't use limit" to avoid incorrect totals +- Call `getSearchQuerySpec` before any query using dates, `groupBy`, or relationship traversal diff --git a/docs/docs/mcp-server/introduction.mdx b/docs/docs/mcp-server/introduction.mdx index ba722446..4bab749d 100644 --- a/docs/docs/mcp-server/introduction.mdx +++ b/docs/docs/mcp-server/introduction.mdx @@ -5,29 +5,36 @@ sidebar_label: Introduction sidebar_position: 1 --- -Welcome to the RushDB MCP (Model Context Protocol) server. This server exposes RushDB's Labeled Meta Property Graph (LMPG) database to MCP-compatible clients like Claude Desktop, Cursor, or any MCP tooling. +# MCP Server -With the MCP server you can: +Connect Claude, Cursor, or any MCP-compatible client to your RushDB database. Type natural language; the server handles discovery, query building, and execution. -- Browse labels and properties -- Create, read, update, and delete records -- Attach/Detach relationships -- Perform flexible queries with filtering, ordering, pagination, and aggregations -- Run bulk operations -- Export data to CSV -- Manage transactions +``` +You: How many MOVIE records have a rating above 8? -This section covers setup, configuration, available tools, examples, and troubleshooting. +Claude: [calls getOntologyMarkdown → confirms MOVIE label and rating field] + [calls findRecords: labels=["MOVIE"], aggregate={count:{fn:"count",alias:"$record"}}, where={rating:{$gt:8}}] -> Looking for SDKs instead? See the TypeScript SDK and Python SDK sections in the sidebar. +Result: 1 movie matched (Inception, rating 8.8) +``` -## Discovery-first Query Builder (Prompts) +## Mandatory workflow -The MCP server exposes a built-in system prompt via the MCP Prompts API: +The MCP server ships with a built-in system prompt that enforces a four-step workflow: + +1. **ONTOLOGY** — call `getOntologyMarkdown` first. Returns all label names (case-sensitive), field names and types, value ranges, and the full relationship map in a single call. +2. **INTENT** — classify the request: aggregation, listing, or mutation. +3. **QUERY SPEC** — before any `findRecords` call that uses dates, aggregation, `groupBy`, or relationship traversal, call `getSearchQuerySpec`. It returns the full operator reference, both `groupBy` modes, limit rules, and annotated examples. +4. **BUILD** — use only label and field names returned from discovery. Labels are case-sensitive. Never invent names or operators. + +This workflow prevents hallucinated labels, wrong operator names, and common pagination errors with aggregations. + +## Prompts API + +The system prompt is also accessible via the MCP Prompts API: - Name: `rushdb.queryBuilder` -- Goal: ensure the agent discovers actual labels/properties first (FindLabels/FindProperties), then builds validated SearchQuery objects (with correct use of where/orderBy/aggregate/groupBy) before calling find-related tools. -MCP clients that support Prompts should fetch this prompt at session start and set it as the system message. This makes natural language requests like “How many workers in my company?” resolve reliably without hallucinating labels or fields. +Clients that support Prompts can fetch it at session start and inject it as the system message. Clients that do not yet support it can call the `getQueryBuilderPrompt` tool for the same text. -If your client doesn’t support Prompts yet, call the utility tool `GetQueryBuilderPrompt` and use the returned text as your session’s system message. +→ [Quickstart](./quickstart) diff --git a/docs/docs/mcp-server/quickstart.mdx b/docs/docs/mcp-server/quickstart.mdx index 335f2d09..2ba26574 100644 --- a/docs/docs/mcp-server/quickstart.mdx +++ b/docs/docs/mcp-server/quickstart.mdx @@ -5,63 +5,75 @@ sidebar_label: Quickstart sidebar_position: 2 --- -Get up and running with the RushDB MCP server in minutes. +# Quickstart -## Prerequisites +## Claude Desktop -- A RushDB account and API key from https://rushdb.com (Dashboard: https://app.rushdb.com) -- An MCP client (for example, Claude Desktop) -- Node.js 18+ installed locally - -## Install - -```bash -npm install -g @rushdb/mcp-server -``` - -Alternatively, you can run the server with `npx` from your MCP client configuration. - -## Configure your MCP client (Claude Desktop example) - -Add or update your Claude Desktop config (e.g., `~/Library/Application Support/Claude/claude_desktop_config.json` on macOS): +Add to `~/Library/Application Support/Claude/claude_desktop_config.json`: ```json { "mcpServers": { "rushdb": { "command": "npx", - "args": ["@rushdb/mcp-server"], + "args": ["-y", "@rushdb/mcp-server"], "env": { - "RUSHDB_API_KEY": "your-rushdb-api-key-here", - "RUSHDB_API_URL": "https://api.rushdb.com/api/v1" + "RUSHDB_API_KEY": "your-api-key-here" } } } } ``` -Notes: -- `RUSHDB_API_KEY` is required. -- `RUSHDB_API_URL` is optional and defaults to `https://api.rushdb.com/api/v1`. Override it for self-hosted/staging. +Then restart Claude Desktop. That's it. ## Verify -Restart your MCP client. Ask it something like: +Ask Claude: -> Use the RushDB MCP server to list labels. +> "Call getOntologyMarkdown and show me what labels exist in my RushDB project." -You should see a tool call response listing labels, or a helpful message if your credentials are missing. +## Other clients -## Enable the Query Builder prompt (recommended) +**Cursor** — add to `.cursor/mcp.json`: -To make discovery-first behavior automatic, your MCP client should: +```json +{ + "mcpServers": { + "rushdb": { + "command": "npx", + "args": ["-y", "@rushdb/mcp-server"], + "env": { "RUSHDB_API_KEY": "your-api-key-here" } + } + } +} +``` + +**VS Code** (Copilot agent mode) — add to `.vscode/mcp.json`: + +```json +{ + "servers": { + "rushdb": { + "type": "stdio", + "command": "npx", + "args": ["-y", "@rushdb/mcp-server"], + "env": { "RUSHDB_API_KEY": "your-api-key-here" } + } + } +} +``` + +**Override API URL** (self-hosted / staging): add `"RUSHDB_API_URL": "https://your-host/api/v1"` to any `env` block above. + +## Enable the Query Builder prompt (recommended) -1) Call `ListPrompts` → find `rushdb.queryBuilder`. -2) Call `GetPrompt` with that name. -3) Set the returned system message for the model session that will use RushDB tools. +To make discovery-first behavior automatic: -Once enabled, the agent will first discover labels/properties and then construct validated SearchQuery objects for find-related calls. +1. Call `ListPrompts` → find `rushdb.queryBuilder` +2. Call `GetPrompt` with that name +3. Set the returned text as the system message for your RushDB session -No Prompts support? Use the fallback tool: +No Prompts API support? Use the fallback: -> Call `GetQueryBuilderPrompt` and set the response as the system message for your RushDB session. +> "Call `getQueryBuilderPrompt` and set the response as the system message." diff --git a/docs/docs/mcp-server/tools.mdx b/docs/docs/mcp-server/tools.mdx index 855ac38b..b78d9029 100644 --- a/docs/docs/mcp-server/tools.mdx +++ b/docs/docs/mcp-server/tools.mdx @@ -9,50 +9,53 @@ The RushDB MCP server exposes a comprehensive set of tools. Each tool includes a ## Database discovery -- FindLabels — List/filter record labels and counts -- FindProperties — List/filter properties -- FindRelationships — Search for relationships +- getOntologyMarkdown — Return all labels, properties, value ranges, and relationships as Markdown (call first at session start) +- getOntology — Return ontology as structured JSON (use when property `id` values are needed for `propertyValues`) +- findLabels — List/filter record labels and counts +- findProperties — List/filter properties by name, type, or label +- findRelationships — Search for relationships ## Record operations -- CreateRecord — Create a new record -- UpdateRecord — Update an existing record (partial) -- SetRecord — Replace all fields of a record -- DeleteRecord — Delete a record (alias of DeleteRecordById) -- DeleteRecordById — Delete a record by ID -- GetRecord — Get a record by ID -- GetRecordsByIds — Get multiple records by their IDs -- FindRecords — Search for records with where/limit/skip/orderBy/aggregate/groupBy -- FindOneRecord — Find a single record matching criteria -- FindUniqRecord — Find a unique record matching criteria +- createRecord — Create a new record +- updateRecord — Update an existing record (partial) +- setRecord — Replace all fields of a record +- deleteRecord — Delete a record (alias of deleteRecordById) +- deleteRecordById — Delete a record by ID +- getRecord — Get a record by ID +- getRecordsByIds — Get multiple records by their IDs +- findRecords — Search for records with where/limit/skip/orderBy/aggregate/groupBy +- findOneRecord — Find a single record matching criteria +- findUniqRecord — Find a unique record matching criteria ## Relationship management -- AttachRelation — Attach relationships between records -- DetachRelation — Detach relationships between records -- FindRelationships — Search for relationships +- attachRelation — Attach relationships between records +- detachRelation — Detach relationships between records +- findRelationships — Search for relationships ## Bulk operations -- BulkCreateRecords — Create multiple records at once -- BulkDeleteRecords — Delete multiple records by query +- bulkCreateRecords — Create multiple records at once +- bulkDeleteRecords — Delete multiple records by query ## Data export -- ExportRecords — Export records to CSV format +- exportRecords — Export records to CSV format -## Transactions +## Vector search & embeddings -- TransactionBegin — Begin a transaction -- TransactionCommit — Commit a transaction -- TransactionRollback — Roll back a transaction -- TransactionGet — Get info about a transaction +- findEmbeddingIndexes — List all embedding index policies and their status +- createEmbeddingIndex — Create an embedding index for a string property (async backfill) +- deleteEmbeddingIndex — Delete an embedding index and all stored vectors +- getEmbeddingIndexStats — Return backfill progress stats for an embedding index +- semanticSearch — Perform vector similarity search over indexed records ## Utilities -- GetSettings — Get current database settings -- OpenBrowser — Open a URL in a browser -- HelpAddToClient — Setup instructions for adding this server to MCP clients +- getSearchQuerySpec — Fetch the complete SearchQuery operator and syntax reference +- getQueryBuilderPrompt — Return the system prompt (fallback for clients without Prompts API) +- helpAddToClient — Setup instructions for adding this server to MCP clients --- @@ -62,25 +65,19 @@ The RushDB MCP server exposes a comprehensive set of tools. Each tool includes a Ask your MCP client: -> Use the RushDB MCP server to run FindLabels with limit=10. +> Use the RushDB MCP server to run findLabels with limit=10. ### Create a record -> Use CreateRecord to add a record with label "Task" and data `{"title": "Write docs", "status": "open"}`. +> Use createRecord to add a record with label "Task" and data `{"title": "Write docs", "status": "open"}`. ### Find records -> Call FindRecords where `{"status": "open"}` orderBy `{"createdAt": "desc"}` limit 5. +> Call findRecords where `{"status": "open"}` orderBy `{"createdAt": "desc"}` limit 5. ### Attach relationships -> AttachRelation from sourceId `""` to targetIds `["", ""]` with relationType `"references"`. - -### Transactions - -> Begin a transaction, create two records, attach a relation, then commit. - -If needed, your client can call `TransactionRollback` to undo the changes. +> attachRelation from sourceId `""` to targetIds `["", ""]` with relationType `"references"`. --- @@ -90,9 +87,45 @@ For detailed input schemas, see the tool definitions in the MCP server source (` ## Tool reference -Below, each tool is described with its purpose and arguments. Types reflect the MCP input schema; required flags and defaults are noted. +:::tip Start here +Call `getOntologyMarkdown` before any other tool. It returns all label names (case-sensitive), field names, and the full relationship map — everything you need to build accurate queries. +::: + +### getOntologyMarkdown + +Return the full database schema as Markdown: all label names (case-sensitive), their properties with types and value ranges, and the complete relationship map. -### FindLabels +**Call this first** at the start of every session before calling any find or mutation tool. The returned label and field names are the single source of truth — never invent names from context. + +Arguments: + +| Name | Type | Required | Default | Description | +|--------|------------------|----------|---------|-------------| +| labels | array of strings | no | — | Restrict output to specific labels; omit for the full schema | + +Example prompt: + +> Call `getOntologyMarkdown` to load the full schema before running any queries. + +--- + +### getOntology + +Return the same ontology data as structured JSON instead of Markdown. Use this variant when you need property `id` values for a subsequent `propertyValues` call. + +Arguments: + +| Name | Type | Required | Default | Description | +|--------|------------------|----------|---------|-------------| +| labels | array of strings | no | — | Restrict output to specific labels | + +Example prompt: + +> Call `getOntology` with `labels=["Product"]` to get property IDs for the Product schema. + +--- + +### findLabels List or filter record labels and their counts. @@ -107,11 +140,11 @@ Arguments: Example prompt: -> Run `FindLabels` with `limit=10` ordered by `{ "count": "desc" }`. +> Run `findLabels` with `limit=10` ordered by `{ "count": "desc" }`. --- -### CreateRecord +### createRecord Create a new record with the specified label and data. @@ -133,11 +166,11 @@ Upsert behavior: Example prompt: -> Call `CreateRecord` with `label="User"`, `data={"email":"a@b.com","name":"Ann"}`, and `options={"mergeBy":["email"],"mergeStrategy":"append"}` to upsert by email. +> Call `createRecord` with `label="User"`, `data={"email":"a@b.com","name":"Ann"}`, and `options={"mergeBy":["email"],"mergeStrategy":"append"}` to upsert by email. --- -### UpdateRecord +### updateRecord Partially update fields of an existing record. @@ -152,11 +185,11 @@ Arguments: Example prompt: -> Use `UpdateRecord` with `recordId=""`, `label="Task"`, and `data={"status":"done"}`. +> Use `updateRecord` with `recordId=""`, `label="Task"`, and `data={"status":"done"}`. --- -### SetRecord +### setRecord Replace all fields of a record (full update). @@ -171,13 +204,13 @@ Arguments: Example prompt: -> Call `SetRecord` for `recordId=""`, `label="Task"`, data `{"title":"Polish docs","status":"in-progress"}`. +> Call `setRecord` for `recordId=""`, `label="Task"`, data `{"title":"Polish docs","status":"in-progress"}`. --- -### DeleteRecord / DeleteRecordById +### deleteRecord / deleteRecordById -Delete a record by its ID. `DeleteRecord` is an alias of `DeleteRecordById`. +Delete a record by its ID. `deleteRecord` is an alias of `deleteRecordById`. Arguments: @@ -188,11 +221,11 @@ Arguments: Example prompt: -> Use `DeleteRecordById` with `recordId=""`. +> Use `deleteRecordById` with `recordId=""`. --- -### GetRecord +### getRecord Retrieve a specific record by ID. @@ -204,11 +237,11 @@ Arguments: Example prompt: -> Call `GetRecord` for `recordId=""`. +> Call `getRecord` for `recordId=""`. --- -### GetRecordsByIds +### getRecordsByIds Retrieve multiple records by an array of IDs. @@ -220,11 +253,11 @@ Arguments: Example prompt: -> Call `GetRecordsByIds` with `recordIds=["",""]`. +> Call `getRecordsByIds` with `recordIds=["",""]`. --- -### FindRecords +### findRecords Search for records with advanced filtering, sorting, paging, grouping, and aggregation. @@ -242,11 +275,11 @@ Arguments: Example prompt: -> Run `FindRecords` with `where={"status":"open"}`, `orderBy={"createdAt":"desc"}`, `limit=5`. +> Run `findRecords` with `where={"status":"open"}`, `orderBy={"createdAt":"desc"}`, `limit=5`. --- -### FindOneRecord +### findOneRecord Find a single record that matches the criteria (returns one or none). @@ -259,11 +292,11 @@ Arguments: Example prompt: -> Use `FindOneRecord` with `where={"email":"user@example.com"}`. +> Use `findOneRecord` with `where={"email":"user@example.com"}`. --- -### FindUniqRecord +### findUniqRecord Find a unique record that matches the criteria (errors if multiple match in some clients; the server returns not-found text if none). @@ -276,11 +309,11 @@ Arguments: Example prompt: -> Use `FindUniqRecord` with `labels=["User"]` and `where={"username":"alice"}`. +> Use `findUniqRecord` with `labels=["User"]` and `where={"username":"alice"}`. --- -### AttachRelation +### attachRelation Create relationships from a source record to one or more target records. @@ -297,11 +330,11 @@ Arguments: Example prompt: -> Call `AttachRelation` from `sourceId=""` to `targetIds=["",""]` with `relationType="references"`. +> Call `attachRelation` from `sourceId=""` to `targetIds=["",""]` with `relationType="references"`. --- -### DetachRelation +### detachRelation Remove relationships between records. @@ -318,11 +351,11 @@ Arguments: Example prompt: -> Use `DetachRelation` with `sourceId=""`, `targetId=""`, and `relationType="references"`. +> Use `detachRelation` with `sourceId=""`, `targetId=""`, and `relationType="references"`. --- -### FindRelationships +### findRelationships Search for relationships with filters, sorting, and paging. @@ -337,11 +370,11 @@ Arguments: Example prompt: -> Run `FindRelationships` with `limit=20`. +> Run `findRelationships` with `limit=20`. --- -### BulkCreateRecords +### bulkCreateRecords Create multiple records in a single operation. @@ -357,17 +390,17 @@ Arguments: | options.returnResult | boolean | no | true | Return created/upserted records (IDs always returned separately) | Upsert notes: -- Same semantics as `CreateRecord`, but applied across the batch. +- Same semantics as `createRecord`, but applied across the batch. - If records are flat objects, uses the `createMany` path; otherwise falls back to JSON import BFS with upsert. - For large batches consider reducing the size or increasing transaction TTL if timeouts occur. Example prompt: -> Call `BulkCreateRecords` for `label="User"` with `data=[{"email":"a@b.com","name":"Ann"},{"email":"b@c.com","name":"Bill"}]` and `options={"mergeBy":["email"],"mergeStrategy":"append"}`. +> Call `bulkCreateRecords` for `label="User"` with `data=[{"email":"a@b.com","name":"Ann"},{"email":"b@c.com","name":"Bill"}]` and `options={"mergeBy":["email"],"mergeStrategy":"append"}`. --- -### BulkDeleteRecords +### bulkDeleteRecords Delete multiple records matching a query. @@ -381,11 +414,11 @@ Arguments: Example prompt: -> Use `BulkDeleteRecords` with `where={"status":"obsolete"}`. +> Use `bulkDeleteRecords` with `where={"status":"obsolete"}`. --- -### ExportRecords +### exportRecords Export records to CSV. @@ -400,11 +433,11 @@ Arguments: Example prompt: -> Call `ExportRecords` with `labels=["Task"]` and `limit=100`. +> Call `exportRecords` with `labels=["Task"]` and `limit=100`. --- -### PropertyValues +### propertyValues Get values for a specific property. @@ -420,11 +453,11 @@ Arguments: Example prompt: -> Run `PropertyValues` with `propertyId=""` and `limit=20`. +> Run `propertyValues` with `propertyId=""` and `limit=20`. --- -### FindProperties +### findProperties Search for properties with filters, sorting, and paging. @@ -439,11 +472,11 @@ Arguments: Example prompt: -> Call `FindProperties` with `limit=25` ordered by `{ "count": "desc" }`. +> Call `findProperties` with `limit=25` ordered by `{ "count": "desc" }`. --- -### FindPropertyById +### findPropertyById Retrieve a specific property by ID. @@ -455,11 +488,11 @@ Arguments: Example prompt: -> Use `FindPropertyById` with `propertyId=""`. +> Use `findPropertyById` with `propertyId=""`. --- -### DeleteProperty +### deleteProperty Delete a property by ID. @@ -471,80 +504,138 @@ Arguments: Example prompt: -> Call `DeleteProperty` with `propertyId=""`. +> Call `deleteProperty` with `propertyId=""`. --- -### TransactionBegin +### getSearchQuerySpec -Begin a new database transaction. +Return the complete RushDB SearchQuery specification as a focused reference document. Call this before building any `findRecords` query that involves dates, aggregation, `groupBy`, or relationship traversal. -Arguments: +Covers: +- All `where` operators: string, number, boolean, datetime component objects, `$exists`, `$type`, logical grouping (`$and`/`$or`/`$not`/`$nor`/`$xor`) +- Relationship traversal syntax: `$alias`, `$relation`, `$id` +- All aggregate functions: `count`/`sum`/`avg`/`min`/`max`/`collect`/`timeBucket` +- Both `groupBy` modes: dimensional (one row per distinct value) and self-group (collapse to single KPI row) +- Late-ordering rules for correct full-scan totals +- `COLLECT` nesting for hierarchical output +- Limit rules by query mode, multi-hop path discovery, enum normalization +- Validation checklist and annotated query examples -| Name | Type | Required | Default | Description | -|------|--------|----------|---------|-------------| -| ttl | number | no | — | Time-to-live in milliseconds | +Arguments: none Example prompt: -> Run `TransactionBegin` with `ttl=60000` (1 minute). +> Call `getSearchQuerySpec` to load the query syntax reference, then build a `findRecords` aggregation query. --- -### TransactionCommit +### getQueryBuilderPrompt + +Return the RushDB system prompt as plain text. Returns the same text that is delivered automatically via the MCP Prompts API. Use this if your MCP client does not support the Prompts API. + +Arguments: none -Commit a transaction. +Example prompt: + +> Call `getQueryBuilderPrompt` and use the response as the system message for your session. + +--- + +### helpAddToClient + +Return setup instructions for adding the RushDB MCP server to a specific MCP client (Claude Desktop, Cursor, VS Code, etc.). + +Arguments: none + +Example prompt: + +> Call `helpAddToClient` to see how to add the RushDB MCP server to Claude Desktop. + +--- + +### findEmbeddingIndexes + +List all embedding index policies configured for the current project. Call this before creating a new index to verify whether one already exists for the target label and property. + +Each returned entry contains: `id`, `label`, `propertyName`, `modelKey`, `dimensions`, `enabled`, `status` (`pending` | `indexing` | `ready` | `error`), `createdAt`, `updatedAt`. + +Arguments: none + +Example prompt: + +> Call `findEmbeddingIndexes` to see which vector indexes exist and their current status. + +--- + +### createEmbeddingIndex + +Create a new embedding index policy for a string property. RushDB will asynchronously embed every existing value of `propertyName` on records with the given `label` and keep new values embedded on write. Once the index status becomes `"ready"` (monitor with `getEmbeddingIndexStats`), use `semanticSearch` to run natural-language queries against it. Arguments: -| Name | Type | Required | Default | Description | -|---------------|--------|----------|---------|-------------| -| transactionId | string | yes | — | Transaction ID to commit | +| Name | Type | Required | Default | Description | +|--------------|--------|----------|---------|-------------| +| label | string | yes | — | Record label to scope the index to (e.g. `"Book"`, `"Task"`) | +| propertyName | string | yes | — | Name of the string property whose values will be embedded | Example prompt: -> Call `TransactionCommit` with `transactionId=""`. +> Call `createEmbeddingIndex` with `label="Article"` and `propertyName="body"` to enable semantic search on article bodies. --- -### TransactionRollback +### deleteEmbeddingIndex -Rollback a transaction. +Delete an embedding index policy by its ID and strip all stored embedding vectors for that index. **Irreversible.** Always confirm with the user before calling. Use `findEmbeddingIndexes` to get the `indexId`. Arguments: -| Name | Type | Required | Default | Description | -|---------------|--------|----------|---------|-------------| -| transactionId | string | yes | — | Transaction ID to rollback | +| Name | Type | Required | Default | Description | +|---------|--------|----------|---------|-------------| +| indexId | string | yes | — | ID of the embedding index to delete | Example prompt: -> Use `TransactionRollback` with `transactionId=""`. +> Call `deleteEmbeddingIndex` with `indexId=""` after confirming the user wants to remove the index. --- -### TransactionGet +### getEmbeddingIndexStats -Get information about a transaction. +Return Neo4j-level statistics for an embedding index: `totalRecords` and `indexedRecords`. Use this to monitor backfill progress after creating an index — when `indexedRecords === totalRecords` the index is fully ready for semantic search. Arguments: -| Name | Type | Required | Default | Description | -|---------------|--------|----------|---------|-------------| -| transactionId | string | yes | — | Transaction ID to inspect | +| Name | Type | Required | Default | Description | +|---------|--------|----------|---------|-------------| +| indexId | string | yes | — | ID of the embedding index (from `findEmbeddingIndexes`) | Example prompt: -> Run `TransactionGet` with `transactionId=""`. +> Call `getEmbeddingIndexStats` with `indexId=""` to check how many records have been indexed so far. --- -### GetSettings +### semanticSearch -Return current database settings and configuration. +Perform semantic (vector) similarity search over records whose `propertyName` has been indexed with `createEmbeddingIndex`. Provide a free-text `query` — RushDB embeds it and returns the most similar records ranked by cosine similarity (`__score`). -Arguments: none +RushDB performs exact semantic search by applying label/optional `where` filtering before cosine ranking. + +Requires an embedding index in `"ready"` status for the given `label` + `propertyName`. + +Arguments: + +| Name | Type | Required | Default | Description | +|--------------|------------------|----------|---------|-------------| +| propertyName | string | yes | — | Name of the indexed property to search against | +| query | string | yes | — | Free-text query to embed and compare against stored vectors | +| labels | array of strings | yes | — | One or more record labels to scope the search; first label resolves the embedding index | +| where | object | no | — | Optional filter applied before cosine scoring | +| limit | number | no | 20 | Maximum number of results to return | +| skip | number | no | 0 | Number of results to skip for pagination | Example prompt: -> Call `GetSettings`. +> Call `semanticSearch` with `propertyName="body"`, `query="machine learning for beginners"`, and `labels=["Article"]` to find the most relevant articles. diff --git a/docs/docs/mcp-server/troubleshooting.mdx b/docs/docs/mcp-server/troubleshooting.mdx index 67dbf7cf..1acadaa7 100644 --- a/docs/docs/mcp-server/troubleshooting.mdx +++ b/docs/docs/mcp-server/troubleshooting.mdx @@ -5,48 +5,59 @@ sidebar_label: Troubleshooting sidebar_position: 6 --- -Common issues and how to fix them. +# Troubleshooting -## Missing API key +## HTTP errors -Error: +The MCP server maps HTTP status codes to actionable messages returned directly to the model: + +| Status | Message returned to LLM | +|--------|-------------------------| +| 400 | Bad request — query or payload is invalid. Check field names, operators, and required arguments. Call `getSearchQuerySpec` for correct syntax. | +| 401 | Unauthorized — API key is missing or invalid. | +| 403 | Forbidden — the API key lacks permission for this operation. | +| 404 | Not found — resource does not exist. Verify record IDs and label names (case-sensitive). Call `getOntologyMarkdown` to rediscover the schema. | +| 409 | Conflict — operation conflicts with existing data (e.g. duplicate key). | +| 422 | Unprocessable — field types or required fields don't match the schema. | +| 5xx | Server error — retry or contact support. | + +:::tip Self-correction +Because the model receives an actionable hint rather than a bare status code, it can self-correct — for example, calling `getOntologyMarkdown` automatically after a 404 without you needing to intervene. +::: + +--- + +## Common issues + +**Missing API key** ``` -RUSHDB_API_KEY environment variable is required. Set it in a .env file (packages/mcp-server/.env) or export it before running the server. +RUSHDB_API_KEY environment variable is required. ``` -Fix: -- Provide `RUSHDB_API_KEY` in your MCP client config under `env`, or -- Create a `.env` file with `RUSHDB_API_KEY=...`, or -- `export RUSHDB_API_KEY=...` in your shell +Fix: set `RUSHDB_API_KEY` in your MCP client `env` block, a `.env` file, or via `export`. -## Wrong or unreachable API URL +--- + +**Wrong or unreachable API URL** (`Invalid URL`, `Failed to fetch`, `Network error`) + +Fix: verify `RUSHDB_API_URL` (if set). Remove it to fall back to `https://api.rushdb.com/api/v1`. -Errors like: -- `Invalid URL` -- `Failed to fetch` -- `Network error` +--- -Fix: -- Verify `RUSHDB_API_URL` (if set) points to a valid RushDB API endpoint. -- If you’re self-hosting, make sure your server is reachable from your machine. -- Remove `RUSHDB_API_URL` to use the default `https://api.rushdb.com/api/v1`. +**"RushDB is not a constructor"** -## "RushDB is not a constructor" +Cause: CJS/ESM interop issue in some environments. -Cause: -- Some environments/bundlers differ in how CJS/ESM default exports are exposed, causing the constructor to be missing. +Fix: update to the latest `@rushdb/mcp-server` version and rebuild from source if needed. -Fix: -- The MCP server now resolves the constructor from `default`, `RushDB`, or the module itself, so updating to the latest version should address this. -- If building from source, rebuild the package and restart your MCP client. +--- -## Permissions or timeouts +**Permissions or timeouts** -- Check your network/firewall configuration. -- Increase client timeouts if your environment is slow. -- Try smaller `limit` values on large queries. +- Check network/firewall configuration +- Try smaller `limit` values on large queries -## Still stuck? +--- -Open an issue with logs at https://github.com/rush-db/rushdb/issues +Still stuck? Open an issue at https://github.com/rush-db/rushdb/issues diff --git a/docs/docs/python-sdk/ai/_category_.json b/docs/docs/python-sdk/ai/_category_.json new file mode 100644 index 00000000..1c831adf --- /dev/null +++ b/docs/docs/python-sdk/ai/_category_.json @@ -0,0 +1,10 @@ +{ + "label": "AI & Vectors", + "position": 1, + "collapsed": false, + "collapsible": true, + "link": { + "type": "doc", + "id": "python-sdk/ai/overview" + } +} diff --git a/docs/docs/python-sdk/ai/advanced-indexing.md b/docs/docs/python-sdk/ai/advanced-indexing.md new file mode 100644 index 00000000..3cf49cd2 --- /dev/null +++ b/docs/docs/python-sdk/ai/advanced-indexing.md @@ -0,0 +1,221 @@ +--- +sidebar_position: 2 +title: Advanced Indexing — BYOV +--- + +# Advanced Indexing — Bring Your Own Vectors + +**External indexes** (BYOV — Bring Your Own Vectors) let you supply pre-computed embedding vectors instead of having the server compute them. Use them when you need: + +- A custom or private model the server cannot access +- Multimodal embeddings (image, audio, document structure) +- Vectors already produced by your ML pipeline +- Reproducible embeddings not tied to the server's active model + +--- + +## Creating an external index + +Pass `"sourceType": "external"` in the params dict. `dimensions` is **required** because the server never calls an embedding model and cannot infer the vector size: + +```python +# Explicit sourceType +response = db.ai.indexes.create({ + "label": "Article", + "propertyName": "body", + "sourceType": "external", + "dimensions": 768, + "similarityFunction": "cosine", +}) +print(response.data["status"]) # 'awaiting_vectors' +``` + +An external index starts with status `awaiting_vectors` and transitions to `ready` once at least one vector has been written. + +### External vs managed comparison + +| | Managed | External | +|---|---|---| +| `sourceType` | `"managed"` | `"external"` | +| Initial status | `"pending"` | `"awaiting_vectors"` | +| Who computes embeddings | RushDB server (configured model) | Your application | +| `dimensions` required | No (uses server default) | **Yes** | +| Backfill for existing records | Automatic | Manual via `upsert_vectors` or inline writes | + +--- + +## `db.ai.indexes.upsert_vectors()` + +The bulk upload API — ideal for seeding an index from a dataset or syncing after a batch pipeline. + +```python +db.ai.indexes.upsert_vectors( + index_id: str, + params: dict # {"items": [{"recordId": str, "vector": list[float]}]} +) -> ApiResponse +``` + +```python +# Fetch your records and embed them with your own model +records_response = db.records.find({"where": {"__label": "Article"}}) + +items = [] +for record in records_response.data: + vector = my_embedder.embed(record["body"]) # your embedding model + items.append({"recordId": record["__id"], "vector": vector}) + +db.ai.indexes.upsert_vectors(ext_index_id, {"items": items}) +``` + +The request is **idempotent** — calling it again with the same `recordId` replaces the stored vector. + +--- + +## Writing vectors at record creation time + +Instead of a two-step create → upsert_vectors flow, you can write vectors inline using the `vectors` parameter on any write operation. See [Write Records with Vectors](./write-with-vectors.md) for the full reference. + +```python +# One step: create record AND write its vector +record = db.records.create( + label="Article", + data={"title": "Warp drives", "body": "Alcubierre metric..."}, + vectors=[{"propertyName": "body", "vector": my_embedder.embed("Alcubierre metric...")}], +) +``` + +--- + +## Disambiguation {#disambiguation} + +When the same `(label, propertyName)` pair is covered by more than one external index (different `similarityFunction` or `dimensions`), specify `similarityFunction` to resolve which index to use: + +```python +# Two indexes: Article:body/cosine and Article:body/euclidean + +# ✅ Explicit — writes to the cosine index only +db.records.create( + label="Article", + data={"title": "Widget", "body": "..."}, + vectors=[{ + "propertyName": "body", + "vector": vec, + "similarityFunction": "cosine", # required when ambiguous + }], +) + +# ✅ Explicit — searches the euclidean index only +db.ai.search({ + "labels": ["Article"], + "propertyName": "body", + "queryVector": vec, + "similarityFunction": "euclidean", +}) + +# ❌ Missing similarityFunction when two indexes exist → 422 Unprocessable Entity +db.records.create( + label="Article", + data={"title": "Gadget"}, + vectors=[{"propertyName": "body", "vector": vec}], # ambiguous! +) +``` + +### Index signature uniqueness + +Two index policies are considered **identical** (and a second `create` returns `409 Conflict`) when all five fields match: + +| Field | Effect on uniqueness | +|----------------------|----------------------| +| `label` | ✅ | +| `propertyName` | ✅ | +| `sourceType` | ✅ | +| `similarityFunction` | ✅ | +| `dimensions` | ✅ | + +Changing any one field produces a distinct index and both are allowed to coexist. + +--- + +## Complete BYOV worked example + +```python +from rushdb import RushDB + +db = RushDB("your-api-key") + +# 1. Create the external index +idx_response = db.ai.indexes.create({ + "label": "Doc", + "propertyName": "content", + "sourceType": "external", + "dimensions": 3, + "similarityFunction": "cosine", +}) +ext_index_id = idx_response.data["id"] +# status: 'awaiting_vectors' + +# 2. Create records + write inline vectors (one round trip per record) +articles = [ + {"title": "Alpha", "content": "First article", "vector": [1, 0, 0]}, + {"title": "Beta", "content": "Second article", "vector": [0, 1, 0]}, + {"title": "Gamma", "content": "Third article", "vector": [0, 0, 1]}, +] + +for article in articles: + db.records.create( + label="Doc", + data={"title": article["title"], "content": article["content"]}, + vectors=[{"propertyName": "content", "vector": article["vector"]}], + ) + +# 3. Search using a pre-computed query vector +results = db.ai.search({ + "labels": ["Doc"], + "propertyName": "content", + "queryVector": [1, 0, 0], # closest to Alpha + "limit": 3, +}) + +print(results.data[0]["title"]) # 'Alpha' +print(results.data[0]["__score"]) # ~1.0 +``` + +--- + +## Batch import with `$vectors` + +For bulk seeding, use `db.records.import_json()` with a `$vectors` key on each object: + +```python +db.records.import_json({ + "Doc": [ + {"title": "Alpha", "content": "First article", "$vectors": [{"propertyName": "content", "vector": [1, 0, 0]}]}, + {"title": "Beta", "content": "Second article", "$vectors": [{"propertyName": "content", "vector": [0, 1, 0]}]}, + {"title": "Gamma", "content": "Third article", "$vectors": [{"propertyName": "content", "vector": [0, 0, 1]}]}, + ] +}) +``` + +`$vectors` entries are **stripped** from the stored record data — they do not appear as properties or child records. + +--- + +## Mixing managed and external indexes + +You can have both a managed index and an external index on the same property simultaneously: + +```python +# Managed — server embeds for full-text search +db.ai.indexes.create({"label": "Product", "propertyName": "description"}) + +# External — your custom multimodal model +db.ai.indexes.create({ + "label": "Product", + "propertyName": "description", + "sourceType": "external", + "dimensions": 512, + "similarityFunction": "cosine", +}) +``` + +Specify `similarityFunction` in `db.ai.search()` to route the query to the intended index. diff --git a/docs/docs/python-sdk/ai/indexing.md b/docs/docs/python-sdk/ai/indexing.md new file mode 100644 index 00000000..788363f3 --- /dev/null +++ b/docs/docs/python-sdk/ai/indexing.md @@ -0,0 +1,213 @@ +--- +sidebar_position: 1 +title: Embedding Indexes +--- + +# Embedding Indexes + +An **embedding index** is a policy that tells RushDB to vectorize a specific string property for a label. Once `status` is `ready`, every record matching that label+property pair is searchable via `db.ai.search()`. + +--- + +## How indexes work + +Indexes are scoped to `(label, propertyName)`. `Book:description` and `Article:description` are completely independent — they maintain separate vector stores and never interfere. + +``` +Index policy + label: "Book" + propertyName: "description" + sourceType: "managed" + dimensions: 1536 + status: "ready" + +↓ backfill runs automatically + +Book records get vectors stored on their VALUE relationships: + rel._emb_managed_cosine_1536 = [0.1, 0.2, ...] +``` + +When new records are created or existing records are updated, the index transitions back to `pending` and vectors are recomputed on the next backfill cycle. + +--- + +## `db.ai.indexes.find()` + +List all embedding index policies for the current project. + +```python +response = db.ai.indexes.find() +for index in response.data: + print(f"{index['label']}.{index['propertyName']} — {index['status']}") +``` + +### Example response data + +```python +[ + { + "id": "idx_abc123", + "projectId": "proj_xyz", + "label": "Article", + "propertyName": "description", + "sourceType": "managed", + "similarityFunction": "cosine", + "modelKey": "text-embedding-3-small", + "dimensions": 1536, + "vectorPropertyName": "_emb_managed_cosine_1536", + "enabled": True, + "status": "ready", + "createdAt": "2025-01-10T12:00:00.000Z", + "updatedAt": "2025-01-10T12:05:00.000Z", + } +] +``` + +--- + +## `db.ai.indexes.create()` + +Create a new embedding index policy for a string property. + +```python +db.ai.indexes.create(params: dict) -> ApiResponse[dict] +``` + +| `params` key | Type | Required | Description | +|----------------------|--------|----------|---------------------------------------------------------------------------------------------------------| +| `label` | string | **yes** | Label to scope this index to (e.g. `"Article"`) | +| `propertyName` | string | **yes** | Name of the property to embed (e.g. `"description"`) | +| `sourceType` | string | no | `"managed"` (default) or `"external"`. See [Advanced Indexing](./advanced-indexing.md). | +| `similarityFunction` | string | no | `"cosine"` (default) or `"euclidean"` | +| `dimensions` | number | no | Vector dimensionality. Defaults to server `RUSHDB_EMBEDDING_DIMENSIONS`. **Required** for external indexes. | + +```python +# Simplest form — uses server-configured model and dimensions +response = db.ai.indexes.create({ + "label": "Article", + "propertyName": "description" +}) +print(response.data["status"]) # 'pending' → backfill starts immediately + +# With explicit parameters +response = db.ai.indexes.create({ + "label": "Article", + "propertyName": "description", + "similarityFunction": "cosine", + "dimensions": 1536 +}) +``` + +> Attempting to create a duplicate `(label, propertyName, sourceType, similarityFunction, dimensions)` tuple returns `409 Conflict`. + +### Index lifecycle + +| Status | Description | +|--------------------|-----------------------------------------------------------------| +| `pending` | Policy created, waiting for backfill scheduler | +| `indexing` | Backfill in progress | +| `awaiting_vectors` | External index — waiting for client to push vectors | +| `ready` | All existing records have vectors; search is available | +| `error` | Backfill failed; check server logs for the cause | + +--- + +## `db.ai.indexes.stats(index_id)` + +Returns the fill rate for an index — useful for progress monitoring or health checks. + +```python +response = db.ai.indexes.stats(index_id) +stats = response.data +print(f"{stats['indexedRecords']} / {stats['totalRecords']} records indexed") +``` + +--- + +## `db.ai.indexes.delete(index_id)` + +Remove an embedding index policy. The underlying Neo4j DDL vector index is only dropped when **zero embeddings remain** across the entire project. + +```python +db.ai.indexes.delete(index_id) +``` + +--- + +## Waiting for an index to become ready + +For managed indexes, backfill runs asynchronously. Poll `db.ai.indexes.find()` until `status` is `ready`: + +```python +import time + +def wait_for_index_ready(db, index_id, timeout_s=90): + deadline = time.time() + timeout_s + while time.time() < deadline: + response = db.ai.indexes.find() + idx = next((i for i in response.data if i["id"] == index_id), None) + if idx and idx["status"] == "ready": + return + if idx and idx["status"] == "error": + raise RuntimeError("Index entered error state") + time.sleep(3) + raise TimeoutError("Index did not become ready in time") + +response = db.ai.indexes.create({"label": "Book", "propertyName": "description"}) +wait_for_index_ready(db, response.data["id"]) +# now safe to call db.ai.search(...) +``` + +--- + +## Multiple indexes on the same property + +You can have more than one index per `(label, propertyName)` pair, provided the signature differs: + +```python +# Cosine index +db.ai.indexes.create({ + "label": "Product", + "propertyName": "description", + "similarityFunction": "cosine", + "dimensions": 768, +}) + +# Euclidean index on the same property +db.ai.indexes.create({ + "label": "Product", + "propertyName": "description", + "similarityFunction": "euclidean", + "dimensions": 768, +}) +``` + +When searching or writing vectors against a property with multiple indexes, specify `similarityFunction` to disambiguate. See [Advanced Indexing](./advanced-indexing.md#disambiguation) for details. + +--- + +## Index shape + +```python +{ + "id": str, + "projectId": str, + "label": str, + "propertyName": str, + "modelKey": str, + "sourceType": str, # 'managed' | 'external' + "similarityFunction": str, # 'cosine' | 'euclidean' + "dimensions": int, + "vectorPropertyName": str, # internal Neo4j property name for the vector + "enabled": bool, + "status": str, # 'pending' | 'indexing' | 'awaiting_vectors' | 'ready' | 'error' + "createdAt": str, + "updatedAt": str, +} +``` + +--- + +## `List[str]` properties + +String list properties are supported. Each item in the list is embedded individually, then mean-pooled into a single vector stored on the relationship. diff --git a/docs/docs/python-sdk/ai/overview.md b/docs/docs/python-sdk/ai/overview.md new file mode 100644 index 00000000..f48463c2 --- /dev/null +++ b/docs/docs/python-sdk/ai/overview.md @@ -0,0 +1,198 @@ +--- +sidebar_position: 0 +title: Overview +--- + +# AI & Semantic Search + +RushDB is a **self-aware memory layer for agents, humans, and apps**. It continuously understands its own structure — labels, fields, value distributions, relationships — and exposes that knowledge so agents can reason over real data without hallucinating schema details, and apps can retrieve semantically relevant context on demand. + +The `db.ai` namespace covers three capabilities: + +| Capability | Description | +|---|---| +| **Graph Ontology** | Self-describing schema discovery: label names, field types, value ranges, and the relationship map — always up to date | +| **Embedding Indexes** | Per-label vector policies that turn string properties into long-term semantic memory | +| **Semantic Search** | Cosine/euclidean similarity retrieval over indexed properties, for agents and apps alike | + +--- + +## How it fits together + +``` +┌─────────────────────────────────────────────────────┐ +│ Your data (records + relationships) │ +│ │ +│ BOOK { title: "...", description: "..." } │ +└────────────────────┬────────────────────────────────┘ + │ + db.ai.indexes.create() + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ Embedding index policy │ +│ label: BOOK property: description dims: 1536 │ +│ sourceType: managed | external │ +└────────────────────┬────────────────────────────────┘ + │ + Backfill (managed) / inline vectors (external) + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ Vector stored on VALUE relationship │ +│ rel._emb_managed_cosine_1536 = [0.1, 0.2, ...] │ +└────────────────────┬────────────────────────────────┘ + │ + db.ai.search({ query / queryVector }) + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ Records ranked by similarity score │ +│ result.__score = 0.94 (cosine similarity) │ +└─────────────────────────────────────────────────────┘ +``` + +--- + +## Quick links + +| Topic | Description | +|---|---| +| [Ontology](#graph-ontology) | Schema discovery with `get_ontology_markdown` / `get_ontology` | +| [Indexing](./indexing.md) | Create and manage managed embedding indexes | +| [Advanced Indexing — BYOV](./advanced-indexing.md) | Bring Your Own Vectors: external indexes, inline writes | +| [Semantic Search](./search.md) | Query by meaning with `db.ai.search()` | +| [Writing with Vectors](./write-with-vectors.md) | Attach vectors at create / upsert / import_json time | + +--- + +## Graph Ontology + +The ontology methods expose a live snapshot of your database structure — without any manual schema definitions. + +### `db.ai.get_ontology_markdown()` + +Returns the full schema as compact Markdown — the **recommended format for LLM context injection**. + +```python +db.ai.get_ontology_markdown( + params: dict | None = None, # {"labels": ["Order"]} to scope output + transaction=None +) -> ApiResponse[str] +``` + +```python +from rushdb import RushDB + +db = RushDB("RUSHDB_API_KEY") + +# Inject into LLM at session start +response = db.ai.get_ontology_markdown() +schema = response.data + +messages = [ + {"role": "system", "content": f"You are a data assistant.\n\n{schema}"}, + {"role": "user", "content": "How many paid orders are there?"} +] + +# Scope to specific labels +order_response = db.ai.get_ontology_markdown({"labels": ["Order"]}) +``` + +
+Example output + +```text +# Graph Ontology + +## Labels + +| Label | Count | +|-----------|------:| +| `Order` | 1840 | +| `User` | 312 | +| `Product` | 95 | + +--- + +## `Order` (1840 records) + +### Properties + +| Property | Type | Values / Range | +|-------------|----------|------------------------------------------| +| `status` | string | `pending`, `paid`, `shipped` (+2 more) | +| `total` | number | `4.99`..`2499.00` | +| `createdAt` | datetime | `2024-01-03`..`2026-02-27` | + +### Relationships + +| Type | Direction | Other Label | +|-------------|-----------|-------------| +| `PLACED_BY` | out | `User` | +| `CONTAINS` | out | `Product` | +``` + +
+ +--- + +### `db.ai.get_ontology()` + +Returns the same ontology as a structured list of dicts — useful for schema UIs, auto-complete, or looking up property IDs for `db.properties.values()`. + +```python +db.ai.get_ontology( + params: dict | None = None, + transaction=None +) -> ApiResponse[list[dict]] +``` + +```python +# List all labels with counts +response = db.ai.get_ontology() +for item in response.data: + print(f"{item['label']}: {item['count']} records") + +# Look up property ID for value enumeration +response = db.ai.get_ontology({"labels": ["Order"]}) +order_schema = response.data[0] +status_prop = next(p for p in order_schema["properties"] if p["name"] == "status") + +values_response = db.properties.values({"id": status_prop["id"]}) +# ['pending', 'paid', 'shipped', 'cancelled', 'refunded'] +``` + +Each item in `response.data`: + +```python +{ + "label": str, + "count": int, + "properties": [ + { + "id": str, # use with db.properties.values() + "name": str, + "type": str, # 'string' | 'number' | 'boolean' | 'datetime' + "values": list, # up to 10 samples (string/boolean only) + "min": str | float | None, # number/datetime only + "max": str | float | None, + } + ], + "relationships": [ + { + "label": str, + "type": str, + "direction": str, # 'in' | 'out' + } + ] +} +``` + +:::note Caching +Both methods share a **1-hour cache** per project. The first call after TTL expiry triggers a full graph scan; all subsequent calls within the hour are instant. +::: + +:::tip Agent quickstart +Call `db.ai.get_ontology_markdown()` first in every AI session. Without it, models will hallucinate field and label names. +::: diff --git a/docs/docs/python-sdk/ai/search.md b/docs/docs/python-sdk/ai/search.md new file mode 100644 index 00000000..f588ed96 --- /dev/null +++ b/docs/docs/python-sdk/ai/search.md @@ -0,0 +1,207 @@ +--- +sidebar_position: 3 +title: Semantic Search +--- + +# Semantic Search + +`db.ai.search()` performs semantic vector search across records that have an associated embedding index. + +--- + +## Signature + +```python +db.ai.search(params: dict) -> ApiResponse[list[dict]] +``` + +| `params` key | Type | Required | Description | +|----------------------|----------------------------|--------------|-----------------------------------------------------------------------------------------------------| +| `propertyName` | string | **yes** | The indexed property to search against (e.g. `"description"`) | +| `labels` | string or list of strings | **yes** | Label(s) to search within (min 1) | +| `query` | string | conditionally | Free-text query to embed. Required for managed indexes; **not allowed** for external indexes. | +| `queryVector` | list of floats | conditionally | Pre-computed query vector. Required for external indexes. Also accepted for managed indexes (bypasses server embedding). | +| `similarityFunction` | string | no | `"cosine"` or `"euclidean"`. Required when multiple indexes target the same `(label, propertyName)`. | +| `dimensions` | number | no | Disambiguates when multiple indexes match. Inferred from `len(queryVector)` when `queryVector` is supplied. | +| `where` | dict | no | Standard RushDB filter expression applied **before** similarity scoring. | +| `skip` | number | no | Pagination offset (default `0`) | +| `limit` | number | no | Maximum results to return (default `20`) | + +--- + +## Result shape + +Results are flat dicts with `__score` injected alongside your record fields, ordered by `__score` descending (closest match first): + +```python +{ + "__id": str, # RushDB record ID + "__label": str, # Record label + "__score": float, # Similarity score, 0–1 (higher = more similar) + # ... your fields + "title": str, + "description": str, +} +``` + +--- + +## Managed search (query text) + +For a **managed** index, pass `query` — a natural-language string. The server embeds it using the same model that built the index. + +```python +response = db.ai.search({ + "propertyName": "description", + "query": "machine learning for beginners", + "labels": ["Article"], + "limit": 5, +}) + +for result in response.data: + print(f"[{result['__score']:.3f}] {result['title']}") +``` + +--- + +## External search (query vector) + +For an **external** index, pass `queryVector` — a pre-computed embedding produced by your own model. No text is sent to an embedding model. + +```python +vec = my_embedder.embed("machine learning for beginners") + +response = db.ai.search({ + "propertyName": "body", + "queryVector": vec, + "labels": ["Article"], + "limit": 10, +}) +``` + +- `query` is **not allowed** with external indexes. +- `queryVector` is **not required** for managed indexes but is accepted (bypasses server embedding). +- When `queryVector` is supplied, `dimensions` can be omitted — the server infers it from `len(queryVector)`. + +--- + +## Filtering with `where` + +The `where` clause acts as a **prefilter** — only records satisfying the filter are candidates for similarity ranking. All `where` operators supported by `db.records.find()` are available here. + +```python +response = db.ai.search({ + "propertyName": "description", + "query": "wireless headphones", + "labels": ["Product"], + "where": { + "category": {"$eq": "electronics"}, + "inStock": {"$eq": True}, + "price": {"$lt": 100}, + }, + "limit": 20, +}) +``` + +--- + +## Multi-label search + +Pass a list of labels to search across multiple entity types simultaneously: + +```python +response = db.ai.search({ + "propertyName": "body", + "query": "machine learning trends", + "labels": ["Article", "Post", "Comment"], + "limit": 10, +}) + +# Each result carries __label so you can tell them apart +for result in response.data: + print(result["__label"], f"{result['__score']:.3f}", result.get("title") or result.get("text")) +``` + +All listed labels must have an embedding index on the same `propertyName`, or the request returns `404` for the missing labels. + +--- + +## Disambiguation + +When two indexes exist for the same `(label, propertyName)`, specify `similarityFunction` to select the target index: + +```python +# Two indexes: Product:embedding/cosine and Product:embedding/euclidean +response = db.ai.search({ + "labels": ["Product"], + "propertyName": "embedding", + "queryVector": vec, + "similarityFunction": "cosine", # required — otherwise 422 +}) +``` + +--- + +## Pagination + +```python +PAGE = 20 + +# Page 1 +page1 = db.ai.search({ + "propertyName": "description", + "query": "sustainable packaging", + "labels": ["Product"], + "limit": PAGE, + "skip": 0, +}) + +# Page 2 +page2 = db.ai.search({ + "propertyName": "description", + "query": "sustainable packaging", + "labels": ["Product"], + "limit": PAGE, + "skip": PAGE, +}) +``` + +--- + +## Full example: AI agent with semantic search + +```python +from rushdb import RushDB + +db = RushDB("RUSHDB_API_KEY") + +def build_agent_system_prompt() -> str: + schema = db.ai.get_ontology_markdown().data + return f"You are a data assistant for RushDB.\n\n{schema}" + +def semantic_search(query: str, label: str, limit: int = 5) -> list[dict]: + response = db.ai.search({ + "propertyName": "description", + "query": query, + "labels": [label], + "limit": limit, + }) + return response.data + +# Retrieve context then pass to LLM +results = semantic_search("climate change research", "Article") +for r in results: + print(f"[{r['__score']:.3f}] {r['title']}") +``` + +--- + +## Error reference + +| HTTP | Cause | +|------|-------| +| `404 Not Found` | No enabled embedding index found for `(label, propertyName)` | +| `422 Unprocessable Entity` | Multiple indexes match and `similarityFunction` was not specified | +| `422 Unprocessable Entity` | `query` text supplied for an external index (server cannot embed it) | +| `422 Unprocessable Entity` | `queryVector` length does not match index `dimensions` | +| `503 Service Unavailable` | Embedding model unavailable (managed indexes only) | diff --git a/docs/docs/python-sdk/ai/write-with-vectors.md b/docs/docs/python-sdk/ai/write-with-vectors.md new file mode 100644 index 00000000..995f090b --- /dev/null +++ b/docs/docs/python-sdk/ai/write-with-vectors.md @@ -0,0 +1,270 @@ +--- +sidebar_position: 4 +title: Writing Records with Vectors +--- + +# Writing Records with Vectors + +RushDB lets you attach pre-computed embedding vectors to records **at write time**, eliminating the need for a separate `db.ai.indexes.upsert_vectors()` call. Any operation that creates or modifies records supports this through the `vectors` parameter (or the `$vectors` key in batch JSON imports). + +This feature requires at least one [external index](./advanced-indexing.md) to exist for the target `(label, propertyName)`. + +--- + +## `vectors` parameter format + +All write methods accept a `vectors` list of dicts: + +```python +vectors = [ + { + "propertyName": "description", # required + "vector": [0.1, 0.9, 0.4, ...], # required + "similarityFunction": "cosine", # required only when multiple indexes share (label, propertyName) + } +] +``` + +--- + +## `db.records.create()` with vectors + +```python +record = db.records.create( + label="Article", + data={ + "title": "How transformers work", + "body": "Attention is all you need ...", + }, + vectors=[ + {"propertyName": "body", "vector": my_embedder.embed("Attention is all you need ...")} + ], +) + +print(record.data["__id"]) # record created AND vector written atomically +``` + +--- + +## `db.records.upsert()` with vectors + +`upsert` is idempotent on the record's natural key (`mergeBy`). Passing `vectors` writes or replaces the stored vector for each `propertyName` in the same call: + +```python +# First call — creates the record + writes vector +r1 = db.records.upsert( + label="Article", + data={"slug": "transformers-101", "title": "Transformers 101", "body": "..."}, + options={"mergeBy": ["slug"], "mergeStrategy": "append"}, + vectors=[{"propertyName": "body", "vector": v1}], +) + +# Second call — same slug → updates data + replaces the vector +r2 = db.records.upsert( + label="Article", + data={"slug": "transformers-101", "title": "Transformers 101 (revised)", "body": "Updated ..."}, + options={"mergeBy": ["slug"], "mergeStrategy": "append"}, + vectors=[{"propertyName": "body", "vector": v2}], +) + +# r1.__id == r2.__id — same record +``` + +--- + +## `db.records.set()` with vectors + +`set` replaces all properties of a record with new values. Including `vectors` writes those vectors at the same time: + +```python +# Full replace — data AND vector updated together +db.records.set( + target=record, + label="Product", + data={"name": "Widget Pro", "price": 19.99}, + vectors=[{"propertyName": "description", "vector": new_vec}], +) +``` + +--- + +## `db.records.import_json()` with `$vectors` + +For bulk ingestion, add a `$vectors` key alongside properties in each JSON object. The format is the same as the `vectors` list: + +```python +db.records.import_json({ + "Article": [ + { + "title": "Alpha", + "body": "First article about AI", + "$vectors": [{"propertyName": "body", "vector": [1, 0, 0]}] + }, + { + "title": "Beta", + "body": "Second article about ML", + "$vectors": [{"propertyName": "body", "vector": [0, 1, 0]}] + }, + { + "title": "Gamma", + "body": "Third article about DL", + "$vectors": [{"propertyName": "body", "vector": [0, 0, 1]}] + }, + ] +}) +``` + +`$vectors` entries are stripped before the record is persisted. They: +- **Do not** appear as record properties +- **Do not** create child records +- **Do not** appear in query results + +--- + +## `db.records.create_many()` with vectors + +`create_many` is optimised for flat rows. Use the top-level `vectors` parameter — a list indexed by row position — to attach a vector to each record without nesting inside your flat data: + +```python +db.records.create_many( + label="Product", + data=[ + {"name": "Alpha", "description": "First product"}, + {"name": "Beta", "description": "Second product"}, + {"name": "Gamma", "description": "Third product"}, + ], + vectors=[ + [{"propertyName": "description", "vector": [1, 0, 0]}], # row 0 + [{"propertyName": "description", "vector": [0, 1, 0]}], # row 1 + [{"propertyName": "description", "vector": [0, 0, 1]}], # row 2 + ], +) +``` + +### Sparse vectors + +Leave rows without vectors by providing a shorter `vectors` list — any unspecified trailing rows are skipped: + +```python +db.records.create_many( + label="Product", + data=[{"name": "Alpha"}, {"name": "Beta"}, {"name": "Gamma"}], + # only row 0 gets a vector; rows 1 and 2 are skipped + vectors=[[{"propertyName": "description", "vector": my_vec}]], +) +``` + +--- + +## `db.records.import_csv()` with vectors + +CSV data is a raw string, so per-row vectors are supplied as a separate `vectors` parameter using the same indexed-list format. Row indices are 0-based and refer to data rows after the header is consumed: + +```python +csv_data = """name,description +Alpha,First product +Beta,Second product +Gamma,Third product""" + +db.records.import_csv( + label="Product", + data=csv_data, + vectors=[ + [{"propertyName": "description", "vector": [1, 0, 0]}], # csv row 0 + [{"propertyName": "description", "vector": [0, 1, 0]}], # csv row 1 + [{"propertyName": "description", "vector": [0, 0, 1]}], # csv row 2 + ], +) +``` + +The server returns `400 Bad Request` if `vectors` length exceeds the number of CSV data rows (validated after CSV parsing). + +--- + +## Specifying `similarityFunction` for disambiguation + +When a `(label, propertyName)` has multiple external indexes registered (e.g. one cosine and one euclidean), include `similarityFunction` in each vector entry so the server routes the write to the correct index: + +```python +# Write to the cosine index +db.records.create( + label="Product", + data={"name": "Widget"}, + vectors=[ + {"propertyName": "embedding", "vector": vec, "similarityFunction": "cosine"} + ], +) +``` + +Omitting `similarityFunction` when multiple indexes match returns `422 Unprocessable Entity`. + +--- + +## Multiple vectors in one call + +Write vectors for multiple properties or indexes in a single operation: + +```python +db.records.create( + label="Document", + data={"title": "Multi-modal doc", "abstract": "...", "fullText": "..."}, + vectors=[ + {"propertyName": "abstract", "vector": abstract_vec}, + {"propertyName": "fullText", "vector": full_text_vec}, + ], +) +``` + +Each entry is matched independently against the available external indexes. + +--- + +## Complete worked example + +```python +from rushdb import RushDB + +db = RushDB("your-api-key") +emb = YourEmbeddingModel() + +# 1. Create an external index (safe to call multiple times — 409 on duplicate) +try: + idx_response = db.ai.indexes.create({ + "label": "Article", + "propertyName": "body", + "sourceType": "external", + "dimensions": 768, + "similarityFunction": "cosine", + }) + index_id = idx_response.data["id"] +except Exception: + index_id = next( + i["id"] for i in db.ai.indexes.find().data + if i["label"] == "Article" and i["propertyName"] == "body" + ) + +# 2. Create records from your pipeline, embedding as you go +docs = [ + {"title": "Alpha", "body": "First doc"}, + {"title": "Beta", "body": "Second doc"}, +] + +for doc in docs: + db.records.create( + label="Article", + data=doc, + vectors=[{"propertyName": "body", "vector": emb.embed(doc["body"])}], + ) + +# 3. Search +query_vec = emb.embed("first document") +results = db.ai.search({ + "labels": ["Article"], + "propertyName": "body", + "queryVector": query_vec, + "limit": 3, +}) + +for r in results.data: + print(f"[{r['__score']:.3f}] {r['title']}") +``` diff --git a/docs/docs/python-sdk/introduction.md b/docs/docs/python-sdk/introduction.md index fd882f1f..f121bbce 100644 --- a/docs/docs/python-sdk/introduction.md +++ b/docs/docs/python-sdk/introduction.md @@ -3,126 +3,81 @@ title: Introduction sidebar_position: 0 --- -# RushDB Python SDK +# Python SDK -The RushDB Python SDK provides a powerful, intuitive interface for interacting with RushDB from Python applications. Whether you're building data science pipelines, web applications, or AI-driven services, this SDK offers a clean, Pythonic way to work with your graph data. +Push JSON, query by value or meaning, traverse graphs — from Python. -## Features - -- **Intuitive API Design**: Simple methods that map directly to common database operations -- **Type Hinting Support**: Comprehensive type annotations for better IDE support -- **Transaction Management**: ACID-compliant transactions with context manager support -- **Flexible Query System**: Expressive query capabilities without learning a graph query language -- **Vector Support**: Built-in handling for vector embeddings and similarity search -- **Data Import Tools**: Easy import of structured data from JSON, CSV, and other formats - -## Installation - -Install the RushDB Python SDK using pip: +## Install ```bash pip install rushdb ``` -## Quick Start - -### Initialize Client +## Connect ```python from rushdb import RushDB -# Connect to RushDB with your API token db = RushDB("RUSHDB_API_KEY") ``` -### Basic Operations +Get your API token from the [RushDB Dashboard](https://app.rushdb.com/). + +## First write ```python -# Create a record -user = db.records.create( - label="USER", +# Nested objects become linked records automatically +db.records.create_many( + label="MOVIE", data={ - "name": "John Doe", - "email": "john@example.com", - "age": 30 - }, - options={"suggestTypes": True} + "title": "Inception", + "rating": 8.8, + "genre": "sci-fi", + "ACTOR": [ + {"name": "Leonardo DiCaprio", "country": "USA"}, + {"name": "Ken Watanabe", "country": "Japan"} + ] + } ) +# Created: MOVIE → ACTOR × 2 (relationships wired automatically) +``` + +## First read -# Find records +```python result = db.records.find({ - "where": { - "age": {"$gte": 18}, - "name": {"$startsWith": "J"} - }, + "labels": ["MOVIE"], + "where": {"rating": {"$gte": 8}}, "limit": 10 }) -# Iterate over results -for user in result: - print(f"Found user: {user.get('name')}") - -# Check result metadata -print(f"Found {len(result)} users out of {result.total} total") - -# Update a record -user.update({ - "last_login": "2025-05-04T12:30:45Z" -}) +for movie in result: + print(movie["title"]) -# Create relationships -company = db.records.create( - label="COMPANY", - data={"name": "Acme Inc."} -) - -# Attach records with a relationship -user.attach( - target=company, - options={"type": "WORKS_AT", "direction": "out"} -) +print(f"{result.total} total") ``` -## Using Transactions +## Configuration -Ensure data consistency with transactions: +| Parameter | Default | Description | +|---|---|---| +| `api_key` | — | Your RushDB API token (required) | +| `url` | `https://app.rushdb.com` | RushDB instance URL | ```python -# Begin a transaction -with db.transactions.begin() as transaction: - # Create a user - user = db.records.create( - label="USER", - data={"name": "Alice Smith"}, - transaction=transaction - ) - - # Create a product - product = db.records.create( - label="PRODUCT", - data={"name": "Smartphone", "price": 799.99}, - transaction=transaction - ) - - # Create a purchase relationship - user.attach( - target=product, - options={"type": "PURCHASED", "direction": "out"}, - transaction=transaction - ) - - # Everything will be committed if no errors occur - # If an error occurs, the transaction will be automatically rolled back +# Self-hosted instance +db = RushDB("RUSHDB_API_KEY", url="https://your-rushdb-instance.com") ``` -## Next Steps +## Namespaces -Explore the detailed documentation for each component of the SDK: +| Namespace | Purpose | +|---|---| +| `db.records` | Create, read, update, delete records | +| `db.relationships` | Attach / detach record links | +| `db.labels` | List labels in the database | +| `db.properties` | Inspect property metadata | +| `db.transactions` | Begin / commit / rollback | +| `db.ai` | Ontology, embedding indexes, semantic search | -- [Records](./records/create-records.md) - Create, read, update, and delete record operations -- [Properties](./properties.md) - Manage data properties -- [Labels](./labels.md) - Work with node labels -- [Relationships](./relationships.md) - Handle connections between records -- [Transactions](./transactions.md) - Manage transaction operations for data consistency -For more advanced use cases, check our [Tutorials](../tutorials/reusable-search-query) section. diff --git a/docs/docs/python-sdk/labels.md b/docs/docs/python-sdk/labels.md index 75ee8007..89e2e998 100644 --- a/docs/docs/python-sdk/labels.md +++ b/docs/docs/python-sdk/labels.md @@ -1,579 +1,20 @@ --- -sidebar_position: 2 +sidebar_position: 5 --- # Labels -In RushDB, [labels](../concepts/labels.md) are used to categorize records and define their types. The Python SDK provides methods for managing labels, finding records by labels, and working with label hierarchies. - -## Overview - -Labels in RushDB serve several important purposes: -- Categorizing records into logical groups -- Defining the type or class of a record -- Enabling efficient filtering and searching -- Supporting hierarchical data modeling - -## Prerequisites - -Before working with labels, make sure you have initialized the RushDB client with your API token: - -```python -from rushdb import RushDB - -db = RushDB("RUSHDB_API_KEY", base_url="https://api.rushdb.com/api/v1") -``` - -## Creating Records with Labels - -When creating records, you specify labels to categorize them: - -```python -# Create a record with a single label -person = db.records.create( - label="PERSON", - data={ - "name": "John Doe", - "age": 30 - } -) - -# The record now has the label "PERSON" -print(person.label) # Output: "PERSON" -``` - -## Working with Label Case - -By default, labels are stored as provided. However, you can use the `capitalizeLabels` option to automatically capitalize labels: - -```python -# Create a record with automatic label capitalization -product = db.records.create( - label="product", # Will be automatically capitalized to "PRODUCT" - data={ - "name": "Smartphone", - "price": 999.99 - }, - options={ - "capitalizeLabels": True - } -) - -print(product.label) # Output: "PRODUCT" -``` - -## Finding Records by Label - -You can search for records by their labels using the `find()` method with the `labels` parameter: - -```python -# Find all records with the "PERSON" label -people = db.records.find({ - "labels": ["PERSON"] -}) - -# Find records with either "EMPLOYEE" or "CONTRACTOR" labels -workers = db.records.find({ - "labels": ["EMPLOYEE", "CONTRACTOR"] -}) - -# Combine label filtering with other search criteria -senior_engineers = db.records.find({ - "labels": ["EMPLOYEE"], - "where": { - "position": "Senior Engineer", - "yearsOfExperience": {"$gte": 5} - } -}) -``` - -## Label Hierarchy and Inheritance - -RushDB supports label inheritance, allowing you to model hierarchical relationships between labels. For example, an "EMPLOYEE" can also be a "PERSON": - -```python -# Create a record with multiple labels -employee = db.records.create_many( - label="EMPLOYEE", - data={ - "name": "Jane Smith", - "email": "jane@example.com", - "department": "Engineering", - "PERSON": { # Nested object creates a relationship with the label PERSON - "age": 28, - "address": "123 Main St" - } - }, - options={ - "relationshipType": "IS_A" # Establishes an inheritance relationship - } -) - -# Finding the employee will also include PERSON properties -found_employee = db.records.find({ - "labels": ["EMPLOYEE"], - "where": { - "name": "Jane Smith" - } -}) -``` - -## Discovering and Searching Labels with LabelsAPI - -The `LabelsAPI` provides dedicated functionality for discovering and working with record labels in the database. This API allows you to find what types of records exist in your database and search for labels based on the properties of records that have those labels. - -**Important**: The LabelsAPI uses a Record-centric approach. When searching for labels, you specify properties of the records that have those labels, not properties of the labels themselves. This means the `where` clause contains Record properties to find labels from records that match those criteria. - -### Overview - -The LabelsAPI enables you to: -- Discover all labels (record types) in the database -- Search for labels based on record properties -- Understand the data structure and schema of your database -- Monitor label usage and distribution -- Work with labels in transaction contexts - -### Accessing the LabelsAPI - -You access the LabelsAPI through the main RushDB client: - -```python -from rushdb import RushDB - -# Initialize the client -db = RushDB("RUSHDB_API_KEY", base_url="https://api.rushdb.com/api/v1") - -# Access the labels API -labels_api = db.labels -``` - -### The find() Method - -The `find()` method is the primary way to discover and search for labels in your database. It uses a Record-centric approach where you can filter labels based on the properties of records that have those labels. - -#### Method Signature - -```python -def find( - self, - search_query: Optional[SearchQuery] = None, - transaction: Optional[Transaction] = None, -) -> List[str] -``` - -#### Parameters - -- **search_query** (`Optional[SearchQuery]`): Search criteria to filter labels. Uses a Record-centric approach where the `where` clause contains Record properties to find labels from records that match those criteria: - - `where`: Filter conditions for record properties (not label properties) - - `labels`: Not typically used in LabelsAPI as you're discovering labels - - `orderBy`: Not applicable for label discovery -- **transaction** (`Optional[Transaction]`): Optional transaction context for the operation - -#### Return Value - -Returns a `List[str]` containing label names (strings) that exist in the database. Each string represents a unique label/type used in the database. - -### Basic Label Discovery - -#### Find All Labels - -```python -# Get all labels in the database -all_labels = db.labels.find() - -print("Available record types:", all_labels) -# Output: ['USER', 'COMPANY', 'PROJECT', 'EMPLOYEE', 'DEPARTMENT'] - -# Check how many different types of records exist -print(f"Database contains {len(all_labels)} different record types") -``` - -#### Discover Labels Based on Record Properties - -```python -# Find labels from records that are active -active_record_labels = db.labels.find({ - "where": { - "isActive": True - } -}) - -print("Labels from active records:", active_record_labels) -# Output: ['USER', 'PROJECT', 'EMPLOYEE'] - -# Find labels from records in Engineering department -engineering_labels = db.labels.find({ - "where": { - "department": "Engineering" - } -}) - -print("Labels used in Engineering:", engineering_labels) -# Output: ['EMPLOYEE', 'MANAGER', 'PROJECT'] -``` - -### Advanced Label Searching - -#### Filter by Record Creation Date - -```python -# Find labels from recently created records -recent_labels = db.labels.find({ - "where": { - "createdAt": {"$gte": "2024-01-01T00:00:00Z"} - } -}) - -print("Labels from records created this year:", recent_labels) -``` - -#### Filter by Complex Record Properties - ```python -# Find labels from high-value records -valuable_record_labels = db.labels.find({ - "where": { - "$or": [ - {"revenue": {"$gte": 1000000}}, # High revenue companies - {"salary": {"$gte": 150000}}, # High salary employees - {"budget": {"$gte": 500000}} # High budget projects - ] - } -}) +# All labels and their record counts +result = db.labels.find({}) +# → [LabelResult(name='MOVIE', count=3), LabelResult(name='ACTOR', count=3), ...] -print("Labels from high-value records:", valuable_record_labels) +# Labels that have records matching a condition +result = db.labels.find({"where": {"rating": {"$gte": 8}}}) +# → [LabelResult(name='MOVIE', count=1)] ``` -#### Find Labels by Record Status - -```python -# Find labels from records matching specific status -published_labels = db.labels.find({ - "where": { - "status": {"$in": ["published", "active", "approved"]} - } -}) - -# Find labels from records with specific properties -tech_labels = db.labels.find({ - "where": { - "$and": [ - {"industry": "Technology"}, - {"employees": {"$gte": 50}}, - {"isPublic": True} - ] - } -}) -``` - -### Label Analytics and Insights - -#### Analyze Database Schema - -```python -# Get comprehensive view of your database schema -all_labels = db.labels.find() - -print("Database Schema Overview:") -print(f"Total record types: {len(all_labels)}") -for label in sorted(all_labels): - print(f" - {label}") - -# Find labels by different criteria to understand data distribution -active_labels = db.labels.find({"where": {"isActive": True}}) -inactive_labels = db.labels.find({"where": {"isActive": False}}) - -print(f"\nActive record types: {len(active_labels)}") -print(f"Inactive record types: {len(inactive_labels)}") -``` - -#### Monitor Label Usage Patterns - -```python -# Discover which types of records exist in different departments -departments = ["Engineering", "Sales", "Marketing", "HR"] -label_distribution = {} - -for dept in departments: - dept_labels = db.labels.find({ - "where": {"department": dept} - }) - label_distribution[dept] = dept_labels - print(f"{dept} department uses labels: {dept_labels}") - -# Find common labels across departments -common_labels = set(label_distribution["Engineering"]) -for dept_labels in label_distribution.values(): - common_labels &= set(dept_labels) - -print(f"Labels common across all departments: {list(common_labels)}") -``` - -### Using Labels API with Transactions - -The LabelsAPI supports transactions for consistent discovery: - -```python -# Start a transaction -tx = db.tx.begin() - -try: - # Discover labels within the transaction context - labels = db.labels.find({ - "where": { - "department": "Sales", - "isActive": True - } - }, transaction=tx) - - print(f"Found labels in Sales: {labels}") - - # Perform additional operations in the same transaction - for label in labels: - # Query records of each discovered type - records = db.records.find({ - "labels": [label], - "where": {"department": "Sales"} - }, transaction=tx) - print(f"Found {len(records)} {label} records in Sales") - - # Commit the transaction - tx.commit() -except Exception as e: - # Roll back on error - tx.rollback() - print(f"Transaction failed: {e}") -``` - -### Practical Use Cases - -#### Database Migration and Schema Discovery - -```python -# Discover existing schema before migration -def analyze_database_schema(): - """Analyze the current database schema and structure.""" - - # Get all labels - all_labels = db.labels.find() - - schema_info = {} - for label in all_labels: - # Find sample records for each label to understand structure - sample_records = db.records.find({ - "labels": [label] - }, limit=5) - - # Analyze properties - properties = set() - for record in sample_records: - properties.update(record.data.keys()) - - schema_info[label] = { - "sample_properties": list(properties), - "sample_count": len(sample_records) - } - - return schema_info - -# Run schema analysis -schema = analyze_database_schema() -for label, info in schema.items(): - print(f"\n{label}:") - print(f" Sample properties: {info['sample_properties']}") - print(f" Sample records found: {info['sample_count']}") -``` - -#### Data Quality Assessment - -```python -# Find labels from incomplete or problematic records -def assess_data_quality(): - """Assess data quality by finding labels from problematic records.""" - - # Find labels from records missing critical fields - incomplete_labels = db.labels.find({ - "where": { - "$or": [ - {"name": None}, - {"createdAt": None}, - {"id": None} - ] - } - }) - - # Find labels from very old records that might need updating - old_labels = db.labels.find({ - "where": { - "updatedAt": {"$lt": "2023-01-01T00:00:00Z"} - } - }) - - return { - "incomplete_data_labels": incomplete_labels, - "outdated_labels": old_labels - } - -# Run data quality assessment -quality_report = assess_data_quality() -print("Data Quality Report:") -print(f"Labels with incomplete data: {quality_report['incomplete_data_labels']}") -print(f"Labels with outdated records: {quality_report['outdated_labels']}") -``` - -### Performance Considerations - -When using the LabelsAPI: - -1. **Use specific filters**: Apply `where` conditions to reduce the scope of label discovery -2. **Cache results**: Label discovery results can be cached as they don't change frequently -3. **Combine with record queries**: Use LabelsAPI to discover types, then use RecordsAPI for detailed data -4. **Monitor database growth**: Regular label discovery helps track database schema evolution -5. **Use in schema validation**: Incorporate label discovery in data validation pipelines - -### Error Handling - -```python -try: - labels = db.labels.find({ - "where": {"department": "NonexistentDepartment"} - }) - print(f"Found labels: {labels}") # Will return empty list if no matches -except Exception as e: - print(f"Error discovering labels: {e}") - # Handle the error appropriately -``` - -### Integration with Record Operations - -The LabelsAPI works seamlessly with record operations for comprehensive data management: - -```python -# 1. Discover available labels -available_labels = db.labels.find() -print(f"Available record types: {available_labels}") - -# 2. Find labels from specific types of data -user_related_labels = db.labels.find({ - "where": { - "$or": [ - {"email": {"$ne": None}}, - {"username": {"$ne": None}}, - {"role": {"$ne": None}} - ] - } -}) - -# 3. Query records for each discovered label -for label in user_related_labels: - records = db.records.find({ - "labels": [label] - }) - print(f"Found {len(records)} records with label '{label}'") - -# 4. Create new records based on discovered patterns -if "USER" in available_labels: - # Safe to create USER records - new_user = db.records.create( - label="USER", - data={"name": "New User", "email": "new@example.com"} - ) -``` - -## API Reference - -### LabelsAPI.find() - -The `find()` method discovers and retrieves labels (record types) from the database based on record properties. - -#### Method Signature - -```python -def find( - self, - search_query: Optional[SearchQuery] = None, - transaction: Optional[Transaction] = None, -) -> List[str] -``` - -#### Parameters - -- **search_query** (`Optional[SearchQuery]`): Search criteria to filter labels using a Record-centric approach - - **where** (dict): Filter conditions for record properties. The API finds labels from records that match these conditions - - **labels**: Not typically used in LabelsAPI since you're discovering labels - - **orderBy**: Not applicable for label discovery -- **transaction** (`Optional[Transaction]`): Transaction context for the operation - -#### Return Value - -- **List[str]**: List of unique label names (strings) found in the database - -#### Examples - -```python -# Get all labels -all_labels = db.labels.find() - -# Get labels from active records -active_labels = db.labels.find({ - "where": {"isActive": True} -}) - -# Get labels from records in specific department -dept_labels = db.labels.find({ - "where": {"department": "Engineering"} -}, transaction=tx) -``` - -### SearchQuery Structure for Labels - -When using the LabelsAPI, the SearchQuery follows this structure: - -```python -from rushdb.models.search_query import SearchQuery - -# Example SearchQuery for label discovery -query = SearchQuery( - where={ - # Record properties to filter by - "isActive": True, - "department": "Engineering", - "createdAt": {"$gte": "2024-01-01T00:00:00Z"} - } -) - -labels = db.labels.find(query) -``` - -## Best Practices for Working with Labels - -1. **Use consistent naming conventions** - Consider using uppercase for labels (e.g., "PERSON" instead of "Person") for consistency with graph database conventions. - -2. **Leverage the `capitalizeLabels` option** - Use this option to ensure consistent capitalization across your database. - -3. **Use specific labels** - More specific labels make searching and filtering more efficient. - -4. **Consider label hierarchies** - Use label inheritance to model "is-a" relationships between entities. - -5. **Combine labels with where clauses** - For precise filtering, combine label filtering with property conditions in the where clause. - -6. **Be mindful of performance** - Searching with very common labels might return large result sets. Use additional filters to narrow down results. - -7. **Use LabelsAPI for schema discovery** - Regularly use the LabelsAPI to understand your database structure and monitor schema evolution. - -8. **Cache label discovery results** - Since labels don't change frequently, consider caching the results of label discovery operations. - -9. **Filter by record properties for targeted discovery** - Use the Record-centric approach to discover labels from specific subsets of your data. - -10. **Integrate with data validation** - Use label discovery to validate that expected record types exist before performing operations. - -11. **Monitor label distribution** - Use LabelsAPI to understand how different types of data are distributed across your database. +`db.labels.find()` accepts a standard [SearchQuery](../concepts/search/where) `where` clause to filter labels by the fields of their records. It returns all labels that match regardless of how many records satisfy the condition. -12. **Combine with record operations** - Use LabelsAPI to discover types, then use RecordsAPI for detailed record manipulation. -## Related Documentation -- [Labels Concept](../concepts/labels.md) - Learn more about how labels work in RushDB -- [Search by Labels](../concepts/search/labels.md) - Advanced techniques for searching by labels -- [Record Creation](./records/create-records.md) - Creating records with labels -- [Finding Records](./records/get-records.md) - Search techniques including label filtering diff --git a/docs/docs/python-sdk/properties.md b/docs/docs/python-sdk/properties.md index c38c6aa1..8689e842 100644 --- a/docs/docs/python-sdk/properties.md +++ b/docs/docs/python-sdk/properties.md @@ -1,254 +1,57 @@ --- -sidebar_position: 3 +sidebar_position: 4 --- # Properties -The `PropertiesAPI` class provides methods for managing and querying properties in RushDB. - -## Class Definition +## `db.properties.find()` ```python -class PropertiesAPI(BaseAPI): -``` - -## Methods +# All properties +props = db.properties.find() -### find() - -Retrieves a find of properties based on optional search criteria. - -**Signature:** -```python -def find( - self, - search_query: Optional[SearchQuery] = None, - transaction: Optional[Transaction] = None -) -> List[Property] +# Filtered +props = db.properties.find({ + "where": {"type": "string"}, + "limit": 20 +}) ``` -**Arguments:** -- `search_query` (Optional[SearchQuery]): Search query parameters for filtering properties -- `transaction` (Optional[Transaction]): Optional transaction object +## `db.properties.find_by_id()` -**Returns:** -- `List[Property]`: List of properties matching the search criteria - -**Example:** ```python -# Find all properties -properties = client.properties.find() - -# Find properties with specific criteria -query = { - "where": { - "name": {"$startsWith": "user_"}, # Properties starting with 'user_' - "type": "string" # Only string type properties - }, - "limit": 10 # Limit to 10 results -} -filtered_properties = client.properties.find(query) +prop = db.properties.find_by_id("prop-123") ``` -### find_by_id() +## `db.properties.values()` -Retrieves a specific property by its ID. +Returns distinct values for a property — useful for building filter UIs or feeding into `db.ai.get_ontology()`. -**Signature:** ```python -def find_by_id( - self, - property_id: str, - transaction: Optional[Transaction] = None -) -> Property -``` - -**Arguments:** -- `property_id` (str): Unique identifier of the property -- `transaction` (Optional[Transaction]): Optional transaction object - -**Returns:** -- `Property`: Property details - -**Example:** -```python -# Retrieve a specific property by ID -property_details = client.properties.find_by_id("prop_123456") -``` - -### delete() - -Deletes a property by its ID. - -**Signature:** -```python -def delete( - self, - property_id: str, - transaction: Optional[Transaction] = None -) -> None -``` - -**Arguments:** -- `property_id` (str): Unique identifier of the property to delete -- `transaction` (Optional[Transaction]): Optional transaction object - -**Returns:** -- `None` - -**Example:** -```python -# Delete a property -client.properties.delete("prop_123456") -``` - -### values() - -Retrieves values for a specific property with optional filtering, sorting and pagination using SearchQuery. - -**Signature:** -```python -def values( - self, - property_id: str, - search_query: Optional[SearchQuery] = None, - transaction: Optional[Transaction] = None -) -> PropertyValuesData -``` - -**Arguments:** -- `property_id` (str): Unique identifier of the property -- `search_query` (Optional[SearchQuery]): Search query parameters for filtering the records containing this property. This can include: - - `where`: Filter criteria for records containing this property - - `labels`: Array of labels to filter records by - - `query`: Filter values by this text string - - `orderBy`: Sort direction (`asc` or `desc`) - - `skip`: Number of values to skip (for pagination) - - `limit`: Maximum number of values to return -- `transaction` (Optional[Transaction]): Optional transaction object - -**Returns:** -- `PropertyValuesData`: Property values data, including optional min/max and list of values - -**Example:** -```python -# Get property values with filtering -values_data = client.properties.values( - property_id="prop_age", +values_data = db.properties.values( + property_id="prop-123", search_query={ - "where": { - "status": "active", # Only get values from active records - "region": "US" # Only from US region - }, - "query": "2", # Filter values containing "2" - "orderBy": "desc", # Sort values in descending order - "skip": 0, # Start from the first value - "limit": 100 # Return up to 100 values + "query": "sci", # filter values containing this text + "orderBy": "asc", + "limit": 100 } ) -# Access values -print(values_data.get('values', [])) # List of property values -print(values_data.get('min')) # Minimum value (for numeric properties) -print(values_data.get('max')) # Maximum value (for numeric properties) -``` - -## Comprehensive Usage Example - -```python -# Find all properties -all_properties = client.properties.find() -for prop in all_properties: - print(f"Property ID: {prop['id']}") - print(f"Name: {prop['name']}") - print(f"Type: {prop['type']}") - print(f"Metadata: {prop.get('metadata', 'No metadata')}") - print("---") - -# Detailed property search -query = { - "where": { - "type": "number", # Only numeric properties - "name": {"$contains": "score"} # Properties with 'score' in name - }, - "limit": 5 # Limit to 5 results -} -numeric_score_properties = client.properties.find(query) - -# Get values for a specific property -if numeric_score_properties: - first_prop = numeric_score_properties[0] - prop_values = client.properties.values( - property_id=first_prop['id'], - search_query={ - "orderBy": "desc", - "limit": 50 - } - ) - print(f"Values for {first_prop['name']}:") - print(f"Min: {prop_values.get('min')}") - print(f"Max: {prop_values.get('max')}") - - # Detailed property examination - detailed_prop = client.properties.find_by_id(first_prop['id']) - print("Detailed Property Info:", detailed_prop) -``` - -## Property Types and Structures - -RushDB supports the following property types: -- `"boolean"`: True/False values -- `"datetime"`: Date and time values -- `"null"`: Null/empty values -- `"number"`: Numeric values -- `"string"`: Text values - -### Property Structure Example -```python -property = { - "id": "prop_unique_id", - "name": "user_score", - "type": "number", - "metadata": Optional[str] # Optional additional information -} - -property_with_value = { - "id": "prop_unique_id", - "name": "user_score", - "type": "number", - "value": 95.5 # Actual property value -} +print(values_data.get("values")) # list of values +print(values_data.get("min")) # numeric min +print(values_data.get("max")) # numeric max ``` -## Transactions +## `db.properties.delete()` -Properties API methods support optional transactions for atomic operations: +Deletes a property and removes it from **all records** that have it. ```python -# Using a transaction -with client.transactions.begin() as transaction: - # Perform multiple property-related operations - property_to_delete = client.properties.find( - {"where": {"name": "temp_property"}}, - transaction=transaction - )[0] - - client.properties.delete( - property_id=property_to_delete['id'], - transaction=transaction - ) - # Transaction will automatically commit if no errors occur +db.properties.delete("prop-123") ``` -## Error Handling +:::note +Deleting a property removes its data from every record in the database, not just one record. +::: -When working with the PropertiesAPI, be prepared to handle potential errors: -```python -try: - # Attempt to find or delete a property - property_details = client.properties.find_by_id("non_existent_prop") -except RushDBError as e: - print(f"Error: {e}") - print(f"Error Details: {e.details}") -``` diff --git a/docs/docs/python-sdk/python-reference/SearchQuery.md b/docs/docs/python-sdk/python-reference/SearchQuery.md new file mode 100644 index 00000000..c90fce4a --- /dev/null +++ b/docs/docs/python-sdk/python-reference/SearchQuery.md @@ -0,0 +1,538 @@ +--- +sidebar_position: 4 +--- + +# SearchQuery + +`SearchQuery` is a dictionary type that defines the structure for querying [records](../../concepts/records) in RushDB. It provides a flexible way to filter, sort, paginate, and aggregate data. For more information on search concepts, see the [search documentation](../../concepts/search/introduction.md). + +## Query Shape + +```python +search_query = { + "labels": [...], # list[str] — filter by record type(s); multi-label = OR + "where": {...}, # dict — filter conditions; see Where Clause below + "aggregate": {...}, # dict — aggregation map; see Aggregation below + "groupBy": [...], # list[str] — shapes aggregate output; see GroupBy below + "orderBy": {...}, # str | dict — 'asc'|'desc' or { field: 'asc'|'desc' } + "limit": 100, # int — max root records (default 100, max 1000) + "skip": 0 # int — pagination offset +} +``` + +## Query Components + +### Labels + +```python +{ "labels": ["USER", "ADMIN"] } +``` + +Specifies which record types to search. Multiple labels are combined with OR. If omitted, all types are searched. + +### Pagination + +| Field | Type | Description | +|---------|-------|------------------------------------------| +| `limit` | `int` | Maximum number of records to return | +| `skip` | `int` | Number of records to skip (for paging) | + +### Order + +```python +{ "orderBy": { "createdAt": "desc" } } # field → direction +{ "orderBy": "asc" } # global direction +``` + +### Where Clause + +The `where` dictionary filters records based on property values and relationships. + +#### Number Operators + +| Operator | Meaning | +|----------|-----------------------| +| `$gt` | Greater than | +| `$gte` | Greater than or equal | +| `$lt` | Less than | +| `$lte` | Less than or equal | +| `$ne` | Not equal | +| `$in` | Matches any in list | +| `$nin` | Matches none in list | +| `$exists`| Field exists / absent | + +```python +{ "where": { "age": { "$gte": 21, "$lt": 65 } } } +{ "where": { "score": { "$in": [10, 20, 30] } } } +``` + +#### String Operators + +| Operator | Meaning | +|---------------|--------------------------------------| +| `$contains` | Substring match (case-insensitive) | +| `$startsWith` | Prefix match (case-insensitive) | +| `$endsWith` | Suffix match (case-insensitive) | +| `$ne` | Not equal | +| `$in` | Matches any value in list | +| `$nin` | Matches none of the values | +| `$exists` | Field exists / absent | + +```python +{ "where": { "name": { "$contains": "John" } } } +{ "where": { "email": { "$endsWith": "@gmail.com" } } } +{ "where": { "status": { "$in": ["active", "pending"] } } } +``` + +#### Boolean Operators + +```python +{ "where": { "isActive": True } } # direct match +{ "where": { "isActive": { "$ne": False } } } # not equal +{ "where": { "verified": { "$exists": True } } } # field must exist +``` + +#### Datetime Operators + +Datetime fields support ISO 8601 exact match **or** component objects for range comparisons. + +```python +# Exact ISO match +{ "where": { "createdAt": "2023-01-01T00:00:00Z" } } + +# Component object — exact point in time +{ "where": { "createdAt": { "$year": 2023, "$month": 1, "$day": 1 } } } +``` + +Available components: `$year`, `$month`, `$day`, `$hour`, `$minute`, `$second`, `$millisecond`, `$microsecond`, `$nanosecond`. + +:::warning Use component objects for range comparisons +Never use plain ISO strings with `$gt` / `$lt`: + +```python +# Records created in 2024 +{ "where": { "createdAt": { "$gte": { "$year": 2024 }, "$lt": { "$year": 2025 } } } } + +# Records from Q1 2023 +{ "where": { "issuedAt": { "$gte": { "$year": 2023, "$month": 1 }, "$lt": { "$year": 2023, "$month": 4 } } } } + +# Records from a specific day +{ "where": { "eventDate": { "$gte": { "$year": 2024, "$month": 3, "$day": 15 }, "$lt": { "$year": 2024, "$month": 3, "$day": 16 } } } } + +# Records from the 1990s +{ "where": { "publishedAt": { "$gte": { "$year": 1990 }, "$lt": { "$year": 2000 } } } } +``` +::: + +#### Type Expression + +Check whether a field is stored as a specific type: + +```python +{ "where": { "age": { "$type": "number" } } } # "string"|"number"|"boolean"|"datetime"|"null"|"vector" +{ "where": { "tags": { "$type": "string" } } } +``` + +#### $id Operator + +Filter records by their own ID without a separate lookup: + +```python +# Records from a known set of IDs +result = db.records.find({ + "where": { "$id": { "$in": ["id1", "id2", "id3"] } } +}) + +# Filter a nested node by specific ID +result = db.records.find({ + "labels": ["COMPANY"], + "where": { + "EMPLOYEE": { "$id": "specific-employee-id" } + } +}) +``` + +#### Logical Operators + +| Operator | Meaning | +|----------|--------------------------------| +| `$and` | All conditions must match | +| `$or` | At least one must match | +| `$not` | Condition must NOT match | +| `$nor` | None of the conditions match | +| `$xor` | Exactly one condition matches | + +```python +# Implicit AND (multiple keys at same level) +{ "where": { "status": "active", "age": { "$gte": 18 } } } + +# Explicit $and +{ "where": { "$and": [{ "status": "active" }, { "age": { "$gte": 18 } }] } } + +# $or +{ "where": { "$or": [{ "status": "active" }, { "status": "pending" }] } } + +# $not +{ "where": { "$not": { "status": "deleted" } } } + +# $nor — none of these statuses +{ "where": { "$nor": [{ "status": "deleted" }, { "status": "archived" }] } } + +# $xor — exactly one must be true +{ "where": { "$xor": [{ "isPremium": True }, { "hasFreeTrialAccess": True }] } } +``` + +#### Relationship Traversal + +Any key in a `where` block that is a label name (not an operator) is interpreted as a related-record traversal: + +```python +# Filter by related record properties +result = db.records.find({ + "labels": ["COMPANY"], + "where": { + "DEPARTMENT": { # traverse to related DEPARTMENT records + "name": "Engineering", + "headcount": { "$gte": 10 } + } + } +}) + +# Multi-level nesting +result = db.records.find({ + "labels": ["COMPANY"], + "where": { + "DEPARTMENT": { + "name": "Engineering", + "PROJECT": { # DEPARTMENT → PROJECT + "status": "active" + } + } + } +}) + +# $alias — name a traversal for use in aggregate / groupBy +result = db.records.find({ + "labels": ["COMPANY"], + "where": { + "EMPLOYEE": { "$alias": "$employee" } + }, + "aggregate": { + "headcount": { "fn": "count", "unique": True, "alias": "$employee" } + } +}) + +# $relation — constrain relationship type and/or direction +result = db.records.find({ + "labels": ["USER"], + "where": { + "POST": { + "$relation": { "type": "AUTHORED", "direction": "in" }, + "title": { "$contains": "Graph" } + } + } +}) +# Shorthand (type only): "$relation": "AUTHORED" +``` + +## Aggregation + +Each key of the `aggregate` dict maps to either an **inline ref** or an **aggregation function**. + +### Inline Refs + +Copy a field value into the output row without applying a function: + +```python +"aggregate": { + "companyName": "$record.name", # root-label field + "projectBudget": "$record.budget" # another root field +} +``` + +### Aggregation Functions + +| Function | Description | +|---------------|-----------------------------------------------------| +| `count` | Count matching records | +| `sum` | Sum a numeric field | +| `avg` | Average a numeric field | +| `min` | Minimum value of a field | +| `max` | Maximum value of a field | +| `collect` | Gather field values into a list | +| `timeBucket` | Group records into time buckets (time-series) | + +`alias` defaults to `"$record"` for root-label fields; set it to the `$alias` declared in `where` for related nodes. + +```python +# Per-company employee statistics +result = db.records.find({ + "labels": ["COMPANY"], + "where": { "EMPLOYEE": { "$alias": "$employee" } }, + "aggregate": { + "companyName": "$record.name", + "headcount": { "fn": "count", "unique": True, "alias": "$employee" }, + "totalWage": { "fn": "sum", "field": "salary", "alias": "$employee" }, + "avgSalary": { "fn": "avg", "field": "salary", "alias": "$employee", "precision": 0 }, + "minSalary": { "fn": "min", "field": "salary", "alias": "$employee" }, + "maxSalary": { "fn": "max", "field": "salary", "alias": "$employee" } + } +}) +``` + +### Collect Options + +| Option | Type | Description | +|-----------|-----------|------------------------------------------| +| `field` | `str` | Field to gather; omit to collect records | +| `unique` | `bool` | Deduplicate (default `True`) | +| `limit` | `int` | Max items in the collected list | +| `skip` | `int` | Skip N items in the collected list | +| `orderBy` | `dict` | Sort collected items | + +```python +"employeeNames": { + "fn": "collect", + "field": "name", + "alias": "$employee", + "unique": True, + "orderBy": { "name": "asc" }, + "limit": 10 +} +``` + +### Nested Collect + +Nest `collect` inside another `collect` for hierarchical output. Only `fn: "collect"` is valid inside a nested `aggregate` block. + +```python +org_tree = db.records.find({ + "labels": ["COMPANY"], + "where": { + "DEPARTMENT": { + "$alias": "$dept", + "PROJECT": { "$alias": "$proj" } + } + }, + "aggregate": { + "company": "$record.name", + "departments": { + "fn": "collect", + "alias": "$dept", + "aggregate": { + "projects": { + "fn": "collect", + "alias": "$proj", + "orderBy": { "name": "asc" }, + "limit": 20 + } + } + } + } +}) +# Output: [{ "company": "Acme", "departments": [{ "name": "Eng", "projects": [...] }, ...] }] +``` + +### TimeBucket — Time-Series Aggregation + +```python +# Daily order count +daily = db.records.find({ + "labels": ["ORDER"], + "aggregate": { + "day": { "fn": "timeBucket", "field": "createdAt", "granularity": "day", "alias": "$record" }, + "count": { "fn": "count", "alias": "$record" } + }, + "groupBy": ["day"], + "orderBy": { "day": "asc" } +}) + +# Monthly revenue +monthly = db.records.find({ + "labels": ["ORDER"], + "aggregate": { + "month": { "fn": "timeBucket", "field": "issuedAt", "granularity": "month", "alias": "$record" }, + "revenue": { "fn": "sum", "field": "amount", "alias": "$record" } + }, + "groupBy": ["month"], + "orderBy": { "month": "asc" } +}) + +# Bi-monthly buckets +bi_monthly = db.records.find({ + "labels": ["ORDER"], + "aggregate": { + "period": { "fn": "timeBucket", "field": "issuedAt", "granularity": "months", "size": 2, "alias": "$record" }, + "count": { "fn": "count", "alias": "$record" } + }, + "groupBy": ["period"], + "orderBy": { "period": "asc" } +}) +``` + +`granularity` options: `"day"`, `"week"`, `"month"`, `"quarter"`, `"year"`, `"months"`, `"hours"`, `"minutes"`, `"seconds"` (plural forms accept a `"size"` for custom window widths). + +## GroupBy + +`groupBy` operates in two modes: + +### Mode A — Dimensional (one row per distinct value) + +Entries are `"$alias.propertyName"` strings. Each distinct value combination becomes its own output row. + +```python +# Count and avg per deal stage +by_stage = db.records.find({ + "labels": ["DEAL"], + "aggregate": { + "count": { "fn": "count", "alias": "$record" }, + "avgAmt": { "fn": "avg", "field": "amount", "alias": "$record", "precision": 2 } + }, + "groupBy": ["$record.stage"], + "orderBy": { "count": "desc" } +}) +# Output: [{ "stage": "won", "count": 42, "avgAmt": 15200.0 }, ...] + +# Pivot on two keys +pivot = db.records.find({ + "labels": ["PROJECT"], + "aggregate": { "count": { "fn": "count", "alias": "$record" } }, + "groupBy": ["$record.category", "$record.active"], + "orderBy": { "count": "desc" } +}) +``` + +### Mode B — Self-group (one row with global KPIs) + +Put the **aggregation key names** themselves into `groupBy` (not `$alias.field` paths). + +```python +kpis = db.records.find({ + "labels": ["EMPLOYEE"], + "aggregate": { + "totalSalary": { "fn": "sum", "field": "salary", "alias": "$record" }, + "headcount": { "fn": "count", "alias": "$record" }, + "avgSalary": { "fn": "avg", "field": "salary", "alias": "$record", "precision": 0 } + }, + "groupBy": ["totalSalary", "headcount", "avgSalary"], + "orderBy": { "totalSalary": "asc" } # ← required for correct full-scan total +}) +# Output: [{ "totalSalary": 4875000, "headcount": 95, "avgSalary": 51315 }] +``` + +## Critical Rules + +> **Never set `limit` when `aggregate` is present** (except to cap root records in per-record flat aggregation). `limit` restricts the record scan, so `sum`/`avg`/etc. cover only the first N rows and return wrong results. +> +> ```python +> # ❌ WRONG — limit cuts the scan, totalBudget is only partial +> db.records.find({ +> "labels": ["PROJECT"], +> "aggregate": { "totalBudget": { "fn": "sum", "field": "budget", "alias": "$record" } }, +> "groupBy": ["totalBudget"], +> "limit": 100 # DO NOT add this +> }) +> +> # ✅ CORRECT — no limit; orderBy on aggregation key triggers late ordering +> db.records.find({ +> "labels": ["PROJECT"], +> "aggregate": { "totalBudget": { "fn": "sum", "field": "budget", "alias": "$record" } }, +> "groupBy": ["totalBudget"], +> "orderBy": { "totalBudget": "asc" } # triggers late ordering → correct full-scan total +> }) +> ``` +> +> For self-group queries, always include `orderBy` on an aggregation key to trigger late ordering (ORDER BY + LIMIT runs after the full aggregation scan). + +## Usage Examples + +### Basic Filter + +```python +result = db.records.find({ + "labels": ["USER"], + "where": { "age": { "$gte": 30 } } +}) +``` + +### Complex Logical Filter + +```python +result = db.records.find({ + "labels": ["USER"], + "where": { + "$and": [ + { "active": True }, + { + "$or": [ + { "email": { "$endsWith": "@gmail.com" } }, + { "email": { "$endsWith": "@outlook.com" } } + ] + } + ] + } +}) +``` + +### Datetime Range + +```python +result = db.records.find({ + "labels": ["ORDER"], + "where": { + "createdAt": { "$gte": { "$year": 2024 }, "$lt": { "$year": 2025 } } + } +}) +``` + +### Filter by Record ID + +```python +result = db.records.find({ + "where": { "$id": { "$in": ["id1", "id2", "id3"] } } +}) +``` + +### Relationship Traversal with Aggregation + +```python +result = db.records.find({ + "labels": ["COMPANY"], + "where": { "EMPLOYEE": { "$alias": "$employee", "salary": { "$gte": 50000 } } }, + "aggregate": { + "companyName": "$record.name", + "headcount": { "fn": "count", "unique": True, "alias": "$employee" }, + "totalWage": { "fn": "sum", "field": "salary", "alias": "$employee" }, + "employeeNames": { + "fn": "collect", "field": "name", "alias": "$employee", + "unique": True, "orderBy": { "name": "asc" }, "limit": 10 + } + } +}) +``` + +### Time-Series (TimeBucket) + +```python +result = db.records.find({ + "labels": ["ORDER"], + "aggregate": { + "month": { "fn": "timeBucket", "field": "issuedAt", "granularity": "month", "alias": "$record" }, + "revenue": { "fn": "sum", "field": "amount", "alias": "$record" } + }, + "groupBy": ["month"], + "orderBy": { "month": "asc" } +}) +``` + +### Pagination and Sorting + +```python +page2 = db.records.find({ + "labels": ["PRODUCT"], + "where": { "category": "Electronics" }, + "skip": 20, + "limit": 20, + "orderBy": { "price": "asc" } +}) +``` diff --git a/docs/docs/python-sdk/python-reference/_category_.json b/docs/docs/python-sdk/python-reference/_category_.json index 6db2b31b..17057f9d 100644 --- a/docs/docs/python-sdk/python-reference/_category_.json +++ b/docs/docs/python-sdk/python-reference/_category_.json @@ -1,6 +1,6 @@ { "label": "Python Reference", - "position": 10, + "position": 9, "collapsed": true, "collapsible": true } diff --git a/docs/docs/python-sdk/raw-queries.md b/docs/docs/python-sdk/raw-queries.md index 571f566a..9f75a7e2 100644 --- a/docs/docs/python-sdk/raw-queries.md +++ b/docs/docs/python-sdk/raw-queries.md @@ -1,10 +1,12 @@ --- -sidebar_position: 6 +sidebar_position: 8 --- # Raw Queries -> **Important (cloud-only):** This endpoint is available only on the RushDB managed cloud service or when your project is connected to a custom database through RushDB Cloud. It is not available for self-hosted or local-only deployments — attempting to use it against a non-cloud instance will fail. +:::warning Requires a connected Neo4j instance +This endpoint is only available when your project is connected to your own Neo4j database. Connecting a custom Neo4j instance is available on the free tier — see the RushDB dashboard to set it up. +::: ### Python SDK example diff --git a/docs/docs/python-sdk/records/_category_.json b/docs/docs/python-sdk/records/_category_.json index a0f3bf9a..530e80f3 100644 --- a/docs/docs/python-sdk/records/_category_.json +++ b/docs/docs/python-sdk/records/_category_.json @@ -1,5 +1,5 @@ { "label": "Records", - "position": 1, + "position": 2, "collapsible": false } diff --git a/docs/docs/python-sdk/records/create-records.md b/docs/docs/python-sdk/records/create-records.md index 214cfee1..8782f089 100644 --- a/docs/docs/python-sdk/records/create-records.md +++ b/docs/docs/python-sdk/records/create-records.md @@ -4,702 +4,97 @@ sidebar_position: 1 # Create Records -RushDB Python SDK provides flexible methods for creating [records](../../concepts/records.md). You can create single records or multiple records at once, with automatic data type inference and relationship handling. +Three methods for writing records. For nested/graph data see [Import Data](./import-data.md). -## Overview - -The Python SDK offers three main methods for creating records: -- `create()` - Create a single record with a label and data -- `create_many()` - Create multiple records in a batch operation -- `upsert()` - Create a new record or update an existing one based on matching criteria - -Batch Upsert: You can also trigger upsert behavior directly in `create_many()` by providing `mergeBy` and/or `mergeStrategy` inside `options` (same semantics as single-record `upsert`). If either is present, each item in the batch will attempt to match and update existing records instead of always creating new ones. - -All methods support options for controlling data processing and formatting. - -## Prerequisites - -Before creating records, make sure you have initialized the RushDB client with your API token: - -```python -from rushdb import RushDB - -db = RushDB("RUSHDB_API_KEY", base_url="https://api.rushdb.com/api/v1") -``` - -## Creating a Single Record - -The `create()` method creates a single record with the specified label and data. - -### Syntax - -```python -db.records.create( - label: str, - data: Dict[str, Any], - options: Optional[Dict[str, bool]] = None, - transaction: Optional[Transaction] = None -) -> Record -``` - -### Parameters - -| Parameter | Type | Description | -|-----------|------|-------------| -| `label` | str | [Label](../../concepts/labels.md) for the new record | -| `data` | Dict[str, Any] | Record data as key-value pairs | -| `options` | Optional[Dict[str, Any]] | Optional configuration parameters (including batch upsert) | -| `transaction` | Optional[Transaction] | Optional [transaction](../../concepts/transactions.mdx) object | - -#### Options Dictionary - -| Option | Type | Default | Description | -|--------|------|---------|-------------| -| `suggestTypes` | bool | `True` | **Default is `True`** - Automatically infers data types for [properties](../../concepts/properties.md). To disable type inference and store all values as strings, explicitly set to `False` | -| `castNumberArraysToVectors` | bool | `False` | When true, converts numeric arrays to vector type | -| `convertNumericValuesToNumbers` | bool | `False` | When true, converts string numbers to number type | -| `capitalizeLabels` | bool | `False` | When true, converts all [labels](../../concepts/labels.md) to uppercase | -| `relationshipType` | str | `None` | Custom [relationship](../../concepts/relationships.md) type for nested objects | -| `returnResult` | bool | `True` | Whether to return the created records | -| `mergeBy` | List[str] | `[]` / omitted | Batch upsert match keys. Empty or omitted with `mergeStrategy` means use all incoming property keys. | -| `mergeStrategy` | str | `'append'` | `'append'` adds/updates; `'rewrite'` replaces all properties for matched records. | - -:::info Default Behavior -By default, `suggestTypes` is set to `True` for all write operations (create, upsert, import). This means RushDB automatically infers data types from your values. To store all properties as strings without type inference, you must explicitly set `suggestTypes=False` in the options. -::: - -### Returns - -A `Record` object representing the newly created record. - -### Examples - -#### Basic Record Creation +## `db.records.create()` ```python -# Create a simple record -person = db.records.create( - label="PERSON", - data={ - "name": "John Doe", - "age": 30, - "email": "john@example.com" - } +movie = db.records.create( + label="MOVIE", + data={"title": "Inception", "rating": 8.8, "genre": "sci-fi"} ) - -print(f"Created record with ID: {person.id}") -print(f"Record label: {person.label}") +# → Record { __id, __label, title, rating, genre } ``` -#### Record with Complex Data Types - -```python -# Create a record with various data types -product = db.records.create( - label="PRODUCT", - data={ - "name": "Smartphone X", - "price": 899.99, - "isAvailable": True, - "tags": ["electronics", "smartphone", "new"], - "releaseDate": "2025-03-15T00:00:00Z", - "specifications": { - "dimensions": "150x70x8mm", - "weight": "180g", - "color": "Midnight Blue" - }, - "ratings": [4.7, 4.8, 4.9, 5.0] # Could be converted to a vector - }, - options={ - "returnResult": True, - "suggestTypes": True, - "castNumberArraysToVectors": True - } -) -``` - -#### With Type Control - -When you need precise control over how data types are handled: - -```python -# Create a record with explicit options -customer = db.records.create( - label="customer", # Will be capitalized to "CUSTOMER" - data={ - "id": "C-12345", # Will be stored as string - "name": "Jane Smith", - "joinDate": "2025-01-20T09:30:00Z", # Will be stored as datetime - "loyalty_points": "250", # Will be converted to number - "scores": ["95", "87", "92"] # Will be converted to numbers - }, - options={ - "suggestTypes": True, - "convertNumericValuesToNumbers": True, - "capitalizeLabels": True - } -) - -# Access the property types -print(customer.proptypes) -``` - -## Creating Multiple Records - -The `create_many()` method allows you to create multiple records in a single operation, which is more efficient for batch operations. +## `db.records.create_many()` -### Syntax +Flat rows only — no nested objects. For nested data use [`import_json`](./import-data.md). ```python -db.records.create_many( - label: str, - data: Union[Dict[str, Any], List[Dict[str, Any]]], - options: Optional[Dict[str, bool]] = None, - transaction: Optional[Transaction] = None -) -> List[Record] -``` - -### Parameters - -| Parameter | Type | Description | -|-----------|------|-------------| -| `label` | str | [Label](../../concepts/labels.md) for all created records | -| `data` | Union[Dict[str, Any], List[Dict[str, Any]]] | List of record data or a single dictionary | -| `options` | Optional[Dict[str, bool]] | Optional configuration parameters | -| `transaction` | Optional[Transaction] | Optional [transaction](../../concepts/transactions.mdx) object | - -### Returns - -A list of `Record` objects representing the newly created records. - -### Examples - -#### Creating Multiple Simple Records - -```python -# Create multiple employee records -employees = db.records.create_many( - label="EMPLOYEE", +result = db.records.create_many( + label="ACTOR", data=[ - { - "name": "John Smith", - "position": "Developer", - "department": "Engineering" - }, - { - "name": "Sarah Johnson", - "position": "Product Manager", - "department": "Product" - }, - { - "name": "Michael Chen", - "position": "Data Scientist", - "department": "Data" - } - ], - options={ - "returnResult": True, - "mergeBy": ["name", "department"], # enables batch upsert - "mergeStrategy": "append" - } -) - -# Access the created records -for employee in employees: - print(f"Created {employee.label} record: {employee.id}") -``` - -#### Working with Structured Data - -```python -# Create records with relationships -products_data = [ - { - "name": "Laptop Pro", - "price": "1299.99", # Will be converted to number - "category": "Computers", - "specs": { - "processor": "i9 13th Gen", - "memory": "32GB", - "storage": "1TB SSD" - }, - "inStock": True, - "featureVector": [0.23, 0.45, 0.67, 0.89] # Will be stored as vector - }, - { - "name": "Smartphone Ultra", - "price": "999.99", # Will be converted to number - "category": "Phones", - "specs": { - "processor": "Snapdragon 8 Gen 3", - "memory": "12GB", - "storage": "512GB" - }, - "inStock": False, - "featureVector": [0.33, 0.55, 0.77, 0.99] # Will be stored as vector - } -] - -products = db.records.create_many( - label="product", # Will be capitalized to "PRODUCT" - data=products_data, - options={ - "returnResult": True, - "suggestTypes": True, - "convertNumericValuesToNumbers": True, - "castNumberArraysToVectors": True, - "capitalizeLabels": True, - "relationshipType": "HAS_SPECS", # Custom relationship for nested objects - "mergeBy": ["name"], # Upsert by product name - "mergeStrategy": "rewrite" # Replace product properties fully - } -) -``` - -#### With Nested Objects and Arrays - -RushDB automatically handles nested objects and arrays by creating proper [relationships](../../concepts/relationships.md) between [records](../../concepts/records.md): - -```python -# Create a company with employees as nested objects -company_data = { - "name": "Tech Innovations Inc.", - "founded": "2020-01-01T00:00:00Z", - "location": "San Francisco, CA", - "employees": [ - { - "name": "Alice Cooper", - "role": "CEO", - "joinDate": "2020-01-01T00:00:00Z" - }, - { - "name": "Bob Williams", - "role": "CTO", - "joinDate": "2020-02-15T00:00:00Z" - } + {"name": "Leonardo DiCaprio", "country": "USA"}, + {"name": "Ken Watanabe", "country": "Japan"} ] -} - -# This will create a COMPANY record connected to two EMPLOYEE records -# with custom relationship type "EMPLOYS" -result = db.records.create_many( - label="COMPANY", - data=company_data, - options={ - "relationshipType": "EMPLOYS", - "capitalizeLabels": True, - "returnResult": True, - "mergeBy": ["name"], # Company name uniqueness for idempotent import - "mergeStrategy": "append" - } ) +# → SearchResult { data: [...], total: 2 } ``` -## Upserting Records - -The `upsert()` method provides a powerful way to create or update records in a single operation. It attempts to find an existing record based on specified properties and either creates a new one or updates the existing record according to your chosen strategy. +## `db.records.upsert()` -### Syntax +Create-or-update based on matching criteria. ```python -db.records.upsert( - label: Optional[str], - data: Dict[str, Any], - options: Optional[Dict[str, Any]] = None, - transaction: Optional[Transaction] = None -) -> Record -``` - -### Parameters - -| Parameter | Type | Description | -|-----------|------|-------------| -| `label` | Optional[str] | Optional [label](../../concepts/labels.md) for the record | -| `data` | Dict[str, Any] | Record data as key-value pairs | -| `options` | Optional[Dict[str, Any]] | Optional configuration parameters | -| `transaction` | Optional[Transaction] | Optional [transaction](../../concepts/transactions.mdx) object | - -#### Options Dictionary - -| Option | Type | Default | Description | -|--------|------|---------|-------------| -| `mergeBy` | List[str] | `[]` | Property names to match on. If empty/undefined, matches on all incoming properties | -| `mergeStrategy` | str | `'append'` | Strategy for updating: `'append'` (add/update properties, keep others) or `'rewrite'` (replace all properties) | -| `suggestTypes` | bool | `True` | **Default is `True`** - Automatically infers data types for [properties](../../concepts/properties.md). To disable type inference and store all values as strings, explicitly set to `False` | -| `castNumberArraysToVectors` | bool | `False` | When true, converts numeric arrays to vector type | -| `convertNumericValuesToNumbers` | bool | `False` | When true, converts string numbers to number type | - -:::info Default Behavior -By default, `suggestTypes` is set to `True` for all write operations including upsert. This means RushDB automatically infers data types from your values. To store all properties as strings without type inference, you must explicitly set `suggestTypes=False` in the options. -::: - -### Returns - -A `Record` object representing the created or updated record. - -### Merge Strategies - -#### Append Strategy - -The `append` strategy (default) updates or adds properties while preserving existing ones: - -```python -# Initial create -product = db.records.upsert( - label="Product", - data={ - "sku": "SKU-001", - "name": "Widget", - "price": 10.0, - "category": "Tools" - }, - options={ - "mergeBy": ["sku"], - "mergeStrategy": "append", - "suggestTypes": True - } -) - -# Update price and add stock - name and category are preserved -updated = db.records.upsert( - label="Product", - data={ - "sku": "SKU-001", - "price": 15.0, - "stock": 100 - }, - options={ - "mergeBy": ["sku"], - "mergeStrategy": "append", - "suggestTypes": True - } -) - -print(updated.data) -# { -# "sku": "SKU-001", -# "name": "Widget", # Preserved -# "category": "Tools", # Preserved -# "price": 15.0, # Updated -# "stock": 100 # Added -# } -``` - -#### Rewrite Strategy - -The `rewrite` strategy replaces all properties with the incoming data: - -```python -# Rewrite - removes unspecified fields -rewritten = db.records.upsert( - label="Product", - data={ - "sku": "SKU-001", - "name": "New Widget", - "price": 20.0 - }, - options={ - "mergeBy": ["sku"], - "mergeStrategy": "rewrite", - "suggestTypes": True - } -) - -print(rewritten.data) -# { -# "sku": "SKU-001", -# "name": "New Widget", -# "price": 20.0 -# # category and stock are removed -# } -``` - -### Common Use Cases - -#### Idempotent Data Imports - -```python -from datetime import datetime - -# Can be safely run multiple times without creating duplicates -user = db.records.upsert( - label="User", - data={ - "email": "john@example.com", - "name": "John Doe", - "lastLogin": datetime.now().isoformat() - }, - options={ - "mergeBy": ["email"], - "mergeStrategy": "append", - "suggestTypes": True - } -) -``` - -#### Multi-Tenant Applications - -```python -# Match on both tenant and entity identifiers -setting = db.records.upsert( - label="Setting", - data={ - "tenantId": "tenant-123", - "userId": "user-456", - "theme": "dark", - "notifications": True - }, - options={ - "mergeBy": ["tenantId", "userId"], - "mergeStrategy": "append", - "suggestTypes": True - } -) -``` - -#### Configuration Management - -```python -from datetime import datetime - -# Update configuration by key -config = db.records.upsert( - label="Config", - data={ - "key": "api_timeout", - "value": 30000, - "updatedAt": datetime.now().isoformat() - }, - options={ - "mergeBy": ["key"], - "mergeStrategy": "append", - "suggestTypes": True - } -) -``` - -#### Inventory Updates - -```python -from datetime import datetime - -# Update stock while preserving product details -inventory = db.records.upsert( - label="Product", - data={ - "productCode": "PROD-789", - "stock": 50, - "lastRestocked": datetime.now().isoformat() - }, - options={ - "mergeBy": ["productCode"], - "mergeStrategy": "append", - "suggestTypes": True - } -) -``` - -#### Switching Between Strategies - -```python -# Start with append to build up data -doc = db.records.upsert( - label="Document", - data={ - "docId": "DOC-1", - "title": "My Document", - "content": "Initial content", - "version": 1 - }, - options={ - "mergeBy": ["docId"], - "mergeStrategy": "append" - } -) - -# Add more fields with append -doc = db.records.upsert( - label="Document", - data={ - "docId": "DOC-1", - "author": "John Doe", - "tags": ["important", "draft"] - }, - options={ - "mergeBy": ["docId"], - "mergeStrategy": "append" - } -) - -# Clean slate with rewrite -doc = db.records.upsert( - label="Document", - data={ - "docId": "DOC-1", - "title": "Final Document", - "version": 2 - }, - options={ - "mergeBy": ["docId"], - "mergeStrategy": "rewrite" - } +# Match on 'title'; update rating if found, create if not +movie = db.records.upsert( + label="MOVIE", + data={"title": "Inception", "rating": 9.0, "genre": "sci-fi"}, + options={"mergeBy": ["title"], "mergeStrategy": "append"} ) -# Now only docId, title, and version remain ``` -### Matching Behavior +### Merge strategies -#### With Specific MergeBy Fields +| Strategy | Behaviour | +|---|---| +| `append` (default) | Add / update incoming fields; preserve all other existing fields | +| `rewrite` | Replace all fields with incoming data; unmentioned fields are removed | -When `mergeBy` contains specific field names, only those fields are used for matching: +### `mergeBy` behaviour -```python -# Matches only on 'email' -user = db.records.upsert( - label="User", - data={ - "email": "user@example.com", - "name": "John", - "age": 30 - }, - options={ - "mergeBy": ["email"], - "mergeStrategy": "append" - } -) -``` +| `mergeBy` value | Match behaviour | +|---|---| +| `['field']` | Match only on listed fields | +| `[]` or omitted | Match on ALL incoming property keys | -#### Without MergeBy (All Properties Match) +## Options -When `mergeBy` is empty or undefined, matching is performed on all incoming properties: +| Option | Default | Description | +|---|---|---| +| `suggestTypes` | `True` | Infer property types automatically | +| `convertNumericValuesToNumbers` | `False` | Convert string numbers to number type | +| `capitalizeLabels` | `False` | Uppercase all inferred label names | +| `relationshipType` | `__RUSHDB__RELATION__DEFAULT__` | Relationship type for nested links | +| `returnResult` | `True` | Return created records in response | +| `mergeBy` | — | Fields to match on for upsert | +| `mergeStrategy` | `append` | `append` or `rewrite` | -```python -# Matches only if ALL properties (email, name, age) match exactly -user = db.records.upsert( - label="User", - data={ - "email": "user@example.com", - "name": "John", - "age": 30 - }, - options={ - "mergeStrategy": "append" - } -) - -# This would create a new record (age doesn't match) -different = db.records.upsert( - label="User", - data={ - "email": "user@example.com", - "name": "John", - "age": 31 - }, - options={ - "mergeStrategy": "append" - } -) -``` - -### Using with Transactions +## With a transaction ```python -# Begin transaction -tx = db.tx.begin() - +tx = db.transactions.begin() try: - # Upsert product - product = db.records.upsert( - label="Product", - data={ - "sku": "SKU-001", - "name": "Widget", - "price": 10.0 - }, - options={ - "mergeBy": ["sku"], - "mergeStrategy": "append" - }, + movie = db.records.create( + label="MOVIE", + data={"title": "Inception"}, transaction=tx ) - - # Upsert inventory - inventory = db.records.upsert( - label="Inventory", - data={ - "productSku": "SKU-001", - "quantity": 100, - "warehouse": "A" - }, - options={ - "mergeBy": ["productSku", "warehouse"], - "mergeStrategy": "append" - }, + actor = db.records.create( + label="ACTOR", + data={"name": "Leonardo DiCaprio"}, + transaction=tx + ) + db.relationships.attach( + source=movie, + target=actor, + options={"type": "STARS"}, transaction=tx ) - - # Commit transaction tx.commit() -except Exception as e: - # Rollback on error +except Exception: tx.rollback() - raise e -``` - -### Working with Complex Data Types - -```python -# Upsert with various data types -config = db.records.upsert( - label="Config", - data={ - "configId": "config-1", - "enabled": True, - "maxRetries": 3, - "timeout": 30.5, - "tags": ["production", "critical"], - "vector": [0.1, 0.2, 0.3, 0.4] - }, - options={ - "mergeBy": ["configId"], - "mergeStrategy": "append", - "suggestTypes": True, - "castNumberArraysToVectors": True - } -) - -# Update some values -updated = db.records.upsert( - label="Config", - data={ - "configId": "config-1", - "enabled": False, - "maxRetries": 5 - }, - options={ - "mergeBy": ["configId"], - "mergeStrategy": "append" - } -) -# timeout, tags, and vector are preserved + raise ``` -## Best Practices - -1. **Automatic Type Inference is Enabled by Default**: RushDB automatically infers data types with `suggestTypes=True` for all write operations (create, create_many, upsert). This is the recommended approach for most use cases. Only set `suggestTypes=False` if you explicitly need all values stored as strings. - -2. **Batch Operations**: Use `create_many()` for better performance when creating multiple [records](../../concepts/records.md). - -3. **Nested Data**: Use nested objects and arrays to create related records automatically with proper [relationships](../../concepts/relationships.md). - -4. **Transactions**: For operations that need to be atomic, use the optional [transaction](../../concepts/transactions.mdx) parameter. - -5. **Data Validation**: Validate your data on the client side before sending it to RushDB. - -6. **Label Convention**: Consider using uppercase for [labels](../../concepts/labels.md) (e.g., "PERSON" instead of "Person") for consistency with graph database conventions. - -7. **Choose Appropriate MergeBy Fields for Upsert**: Use fields that uniquely identify your records (like `email`, `sku`, `userId`) to ensure proper matching. - -8. **Select the Right Merge Strategy**: Use `append` to preserve existing data and update specific fields; use `rewrite` for complete record replacement. -9. **Use Upsert for Idempotent Operations**: Upsert is ideal for data synchronization and import operations where you want to safely re-run operations without creating duplicates. diff --git a/docs/docs/python-sdk/records/delete-records.md b/docs/docs/python-sdk/records/delete-records.md index 93c6978a..0aacc628 100644 --- a/docs/docs/python-sdk/records/delete-records.md +++ b/docs/docs/python-sdk/records/delete-records.md @@ -4,249 +4,44 @@ sidebar_position: 7 # Delete Records -RushDB Python SDK provides methods for deleting [records](../../concepts/records.md) from your database. You can delete individual records by ID or delete multiple records matching specific criteria. - -## Overview - -The delete methods allow you to: -- Delete a single record by ID -- Delete multiple records using search query filters -- Delete records directly from Record objects -- Perform conditional bulk deletions - -## Prerequisites - -Before deleting records, make sure you have initialized the RushDB client with your API token: +## `db.records.delete_by_id()` ```python -from rushdb import RushDB +# Single record +db.records.delete_by_id("movie-123") -db = RushDB("RUSHDB_API_KEY", base_url="https://api.rushdb.com/api/v1") -``` +# Multiple records +db.records.delete_by_id(["movie-123", "movie-456"]) -## Deleting a Single Record by ID - -The `delete_by_id()` method allows you to delete a record using its unique identifier. - -### Syntax - -```python -db.records.delete_by_id( - id_or_ids: Union[str, List[str]], - transaction: Optional[Transaction] = None -) -> Dict[str, str] +# From record object +movie.delete() ``` -### Parameters - -| Parameter | Type | Description | -|-----------|------|-------------| -| `id_or_ids` | Union[str, List[str]] | Single ID or list of IDs to delete | -| `transaction` | Optional[Transaction] | Optional [transaction](../../concepts/transactions.mdx) object | - -### Returns - -A dictionary with the response data confirming the deletion. - -### Examples - -#### Deleting a Single Record - -```python -# First, create or retrieve a record -product = db.records.create( - label="PRODUCT", - data={ - "name": "Discontinued Item", - "price": 19.99 - } -) - -# Delete the record by its ID -response = db.records.delete_by_id(product.id) - -print(f"Deletion response: {response}") -# Example output: {'message': 'Record deleted successfully'} -``` - -#### Deleting Multiple Records by ID - -```python -# Delete multiple records by their IDs -response = db.records.delete_by_id([ - "018e4c71-f35a-7000-89cd-850db63a1e77", - "018e4c75-a2b3-7000-89cd-850db63a1e77", - "018e4c79-c4d5-7000-89cd-850db63a1e77" -]) - -print(f"Deletion response: {response}") -# Example output: {'message': '3 record(s) deleted successfully'} -``` - -## Deleting Records with Query Filters - -The `delete()` method allows you to delete multiple records that match specific criteria. The search query parameters are consistent across all RushDB APIs and follow the same structure as used in [search operations](../../concepts/search/introduction.md). - -### Syntax - -```python -db.records.delete( - query: SearchQuery, - transaction: Optional[Transaction] = None -) -> Dict[str, str] -``` - -### Parameters - -| Parameter | Type | Description | -|-----------|------|-------------| -| `query` | SearchQuery | Query to match records for deletion. See [Search Introduction](../../concepts/search/introduction.md) | -| `transaction` | Optional[Transaction] | Optional [transaction](../../concepts/transactions.mdx) object | - -### Returns +## `db.records.delete()` -A dictionary with the response data confirming the deletion. - -### Examples - -#### Basic Query Deletion +Delete all records matching a query. ```python -# Delete records matching specific criteria -response = db.records.delete({ - "labels": ["PRODUCT"], # See Labels in search: https://docs.rushdb.com/concepts/search/labels - "where": { # See Where clause: https://docs.rushdb.com/concepts/search/where - "price": {"$lt": 10}, - "discontinued": True - } +db.records.delete({ + "labels": ["MOVIE"], + "where": {"rating": {"$lt": 5}} }) - -print(f"Deletion response: {response}") -# Example output: {'message': '5 record(s) deleted successfully'} -``` - -#### Advanced Query Deletion - -```python -# Delete records with complex conditions using $or operator -response = db.records.delete({ - "where": { - "$or": [ # Logical operators as described in Where clause documentation - { - "status": "archived", - "lastModified": {"$lt": "2024-01-01T00:00:00Z"} - }, - { - "status": "inactive", - "isTemporary": True - } - ] - }, - "labels": ["DOCUMENT", "ATTACHMENT"] # Records with either DOCUMENT or ATTACHMENT label -}) - -print(f"Deletion response: {response}") -# Example output: {'message': '12 record(s) deleted successfully'} ``` -## Deleting Records from Record Objects +:::warning +Calling `delete()` without a `where` clause deletes **all** records with the given label. +::: -You can also delete records directly from Record objects. - -### Syntax +## With a transaction ```python -record.delete( - transaction: Optional[Transaction] = None -) -> Dict[str, str] -``` - -### Parameters - -| Parameter | Type | Description | -|-----------|------|-------------| -| `transaction` | Optional[Transaction] | Optional [transaction](../../concepts/transactions.mdx) object | - -### Returns - -A dictionary with the response data confirming the deletion. - -### Example - -```python -# Create a record -user = db.records.create( - label="USER", - data={ - "name": "John Doe", - "email": "john@example.com" - } -) - -# Perform operations with the record -# ... - -# Delete the record when no longer needed -response = user.delete() - -print(f"Deletion response: {response}") -# Example output: {'message': 'Record deleted successfully'} -``` - -## Working with Transactions - -For operations that need to be atomic, you can use transactions: - -```python -# Start a transaction -tx = db.tx.begin() - +tx = db.transactions.begin() try: - # Create records in the transaction - product1 = db.records.create( - label="PRODUCT", - data={"name": "Item 1", "price": 10.99}, - transaction=tx - ) - - product2 = db.records.create( - label="PRODUCT", - data={"name": "Item 2", "price": 20.99}, - transaction=tx - ) - - # Delete the first product - db.records.delete_by_id( - id_or_ids=product1.id, - transaction=tx - ) - - # Delete other records matching a query - db.records.delete( - query={"labels": ["PRODUCT"], "where": {"discontinued": True}}, - transaction=tx - ) - - # Commit all changes + db.records.delete_by_id("movie-123", transaction=tx) tx.commit() -except Exception as e: - # If any operation fails, roll back all changes +except Exception: tx.rollback() - print(f"Transaction failed: {e}") + raise ``` -## Handling Relationships - -When deleting records, all [relationships](../../concepts/relationships.md) associated with those records are automatically deleted. This ensures database integrity and prevents orphaned relationships. - -## Best Practices -1. **Use IDs for specific deletions** when you know exactly which records to remove. -2. **Use queries for conditional deletions** when you need to delete records based on specific criteria. -3. **Use transactions** when deleting multiple related records to ensure data consistency. -4. **Consider performance** for large-scale deletions by using appropriate filters. -5. **Handle exceptions** properly to manage failed delete operations. -6. **Verify deletions** after bulk operations to ensure the expected records were removed. -7. **Use [label filtering](../../concepts/search/labels.md)** to narrow down the scope of deletion operations. -8. **Leverage search operators** from the [Where clause documentation](../../concepts/search/where.md) for precise targeting of records to delete. -9. **Remember that search parameters** are consistent across all RushDB operations, including [find()](../../concepts/search/introduction.md), delete(), and other methods. diff --git a/docs/docs/python-sdk/records/get-records.md b/docs/docs/python-sdk/records/get-records.md index a43d5961..4cb8d1e3 100644 --- a/docs/docs/python-sdk/records/get-records.md +++ b/docs/docs/python-sdk/records/get-records.md @@ -1,490 +1,176 @@ --- -sidebar_position: 4 +sidebar_position: 5 --- # Get Records -The Search API is one of the most powerful features of RushDB, allowing you to find records, navigate relationships, and transform results to exactly match your application's needs. This guide demonstrates how to effectively use the Python SDK to search and query data in your RushDB database. - -## Direct Record Search - -The RushDB Python SDK provides several ways to search for records, from simple lookups to complex queries with filtering, sorting, and pagination. - -### Basic Searching with `find()` - -The most versatile search method is `find()`, which accepts a search query dictionary to filter, sort, and paginate results. - ```python -# Basic search for records with the "USER" label result = db.records.find({ - "labels": ["USER"], - "where": { - "isActive": True - }, + "labels": ["MOVIE"], + "where": {"rating": {"$gte": 8}}, "limit": 10, - "orderBy": {"createdAt": "desc"} - + "orderBy": {"rating": "desc"} }) -# Access the returned records -print(f"Found {len(result)} records out of {result.total} total users") +for movie in result: + print(movie.get("title"), movie.get("rating")) -# Iterate over results -for user in result: - print(f"User: {user.get('name', 'Unknown')}") - -# Access specific records -first_user = result[0] if result else None +print(f"{len(result)} shown, {result.total} total") ``` -Search queries support a powerful and flexible syntax for filtering records. For a detailed explanation of all the available operators and capabilities, see the [Where clause documentation](../../concepts/search/where). - -### Finding Records by ID with `find_by_id()` - -When you already know the ID of the record(s) you need: +## `db.records.find_by_id()` ```python -# Find a single record by ID -user = db.records.find_by_id("user-123") +# Single record +movie = db.records.find_by_id("movie-123") -# Find multiple records by ID -users = db.records.find_by_id(["user-123", "user-456", "user-789"]) +# Multiple records +movies = db.records.find_by_id(["movie-123", "movie-456"]) ``` -### Relationship Traversal +## SearchQuery parameters -One of RushDB's most powerful features is the ability to search across relationships between records: +| Parameter | Type | Description | +|---|---|---| +| `labels` | `list[str]` | Filter by one or more labels | +| `where` | `dict` | Field conditions and operators | +| `orderBy` | `dict` | `{"field": "asc" \| "desc"}` | +| `limit` | `int` | Max records to return. **Omit when using `aggregate`** | +| `skip` | `int` | Records to skip (pagination offset) | +| `aggregate` | `dict` | Aggregation functions | +| `groupBy` | `list[str]` | Group aggregated results | -```python -# Find all blog posts by users who work at tech companies -result = db.records.find({ - "labels": ["POST"], - "where": { - "USER": { # Traverse to related USER records - "COMPANY": { # Traverse to related COMPANY records - "industry": "Technology" - } - }, - "publishedAt": {"$lte": datetime.now()} # Only published posts - }, - "orderBy": {"publishedAt": "desc"}, - "limit": 20 -}) - -posts = result.data -total = result.total -``` - -For more complex relationship queries, you can specify relationship types and directions: +## Relationship traversal ```python -# Find users who follow specific topics +# MOVIE that has ACTOR named DiCaprio result = db.records.find({ - "labels": ["USER"], + "labels": ["MOVIE"], "where": { - "TOPIC": { - "$relation": { - "type": "FOLLOWS", - "direction": "out" # User -> FOLLOWS -> Topic - }, - "name": {"$in": ["Python", "GraphDB", "RushDB"]} + "ACTOR": { + "name": {"$contains": "DiCaprio"} } } }) -users = result.data -total = result.total -``` - -See the [Where clause documentation](../../concepts/search/where#relationship-queries) for more details on relationship queries. - -### Vector Search - -RushDB supports vector similarity searches for AI and machine learning applications: - -```python -# Find documents similar to a query embedding +# With explicit relationship type and direction result = db.records.find({ - "labels": ["DOCUMENT"], + "labels": ["MOVIE"], "where": { - "embedding": { - "$vector": { - "fn": "gds.similarity.cosine", # Similarity function - "query": query_embedding, # Your vector embedding - "threshold": {"$gte": 0.75} # Minimum similarity threshold - } + "ACTOR": { + "$relation": {"type": "STARS_IN", "direction": "in"}, + "country": "USA" } - }, - "limit": 10 -}) - -documents = result.data -total = result.total -``` - -See the [Vector operators documentation](../../concepts/search/where#vector-operators) for more details on vector search capabilities. - -### Field Existence and Type Checking - -RushDB provides operators to check for field existence and data types, which is particularly useful when working with heterogeneous data: - -```python -# Find users who have provided an email but not a phone number -email_only_users = db.records.find({ - "labels": ["USER"], - "where": { - "$and": [ - {"email": {"$exists": True}}, # Must have email - {"phone_number": {"$exists": False}} # Must not have phone number - ] - } -}) - -# Find records where age is actually stored as a number (not string) -proper_age_records = db.records.find({ - "labels": ["USER"], - "where": { - "age": {"$type": "number"} - } -}) - -# Complex query combining type and existence checks -valid_profiles = db.records.find({ - "labels": ["PROFILE"], - "where": { - "$and": [ - {"bio": {"$type": "string"}}, # Bio must be text - {"bio": {"$contains": "developer"}}, # Bio mentions developer - {"skills": {"$exists": True}}, # Skills must exist - {"avatar": {"$exists": False}} # No avatar uploaded yet - ] } }) ``` -The `$exists` operator is useful for: -- Data validation and cleanup -- Finding incomplete profiles -- Filtering by optional fields - -The `$type` operator is useful for: -- Working with imported data that might have inconsistent types -- Validating data integrity -- Ensuring type consistency before operations - -See the [Field existence operators documentation](../../concepts/search/where#field-existence-operator) for more details. - -### Pagination and Sorting - -Control the order and volume of results: +## Where operators ```python -# Get the second page of results (20 items per page) -result = db.records.find({ - "labels": ["PRODUCT"], - "where": { - "category": "Electronics" - }, - "skip": 20, # Skip the first 20 results - "limit": 20, # Return 20 results - "orderBy": { - "price": "asc" # Sort by price ascending - } -}) - -products = result.data -total_products = result.total +# Common operators +{"rating": {"$gt": 7, "$lt": 10}} # gt, gte, lt, lte +{"genre": {"$in": ["sci-fi", "drama"]}} # in, nin +{"title": {"$contains": "Inc"}} # contains, startsWith, endsWith +{"sequel": {"$exists": False}} # exists, not exists +{"id": {"$id": "movie-123"}} # match by __id ``` -For more details on pagination and sorting options, see the [Pagination and ordering documentation](../../concepts/search/pagination-order). - -### Aggregations - -Transform and aggregate your search results: +## Aggregations ```python -# Calculate sales statistics result = db.records.find({ - "labels": ["ORDER"], - "where": { - "status": "completed", - "createdAt": {"$gte": "2023-01-01T00:00:00Z"} - }, + "labels": ["MOVIE"], "aggregate": { - "totalSales": { - "fn": "sum", - "alias": "$record", - "field": "amount" - }, - "orderCount": { - "fn": "count", - "alias": "$record" - }, - "avgOrderValue": { - "fn": "avg", - "alias": "$record", - "field": "amount" - } + "count": {"fn": "count", "alias": "$record"}, + "avgRating": {"fn": "avg", "alias": "$record", "field": "rating"}, + "titles": {"fn": "collect","alias": "$record", "field": "title"} } + # Do NOT add "limit" when using aggregate }) - -stats = result.data -total = result.total ``` -For comprehensive details on available aggregation functions and usage, see the [Aggregations documentation](../../concepts/search/aggregations). +Aggregation functions: `count` · `sum` · `avg` · `min` · `max` · `collect` -### Grouping Results (groupBy) +:::danger +**Never set `limit` with `aggregate`** — it restricts the record scan and produces wrong sums/averages. +::: -You can group aggregated results with the `groupBy` field at the root of your search query. A group key references an alias plus a property (root alias is implicitly `$record`). The Python SDK follows the same semantics as the core SearchQuery. - -See the dedicated [Grouping guide](../../concepts/search/group-by) for deeper patterns and edge cases. +## GroupBy ```python result = db.records.find({ - "labels": ["ORDER"], + "labels": ["MOVIE"], "aggregate": { - "count": {"fn": "count", "alias": "$record"}, - "avgTotal": {"fn": "avg", "field": "total", "alias": "$record"} + "count": {"fn": "count", "alias": "$record"}, + "avgRating": {"fn": "avg", "alias": "$record", "field": "rating"} }, - "groupBy": ["$record.status"], - "orderBy": {"count": "desc"}, - "limit": 1000 + "groupBy": ["$record.genre"], + "orderBy": {"count": "desc"} # late-ordering: ensures correct totals }) - -for row in result: - # Each row represents one status group - print(row['status'], row['count'], row['avgTotal']) ``` -Group by a related alias (declare the alias in where traversal): +## TimeBucket (time-series) ```python result = db.records.find({ - "labels": ["DEPARTMENT"], - "where": { - "PROJECT": {"$alias": "$project"} - }, + "labels": ["MOVIE"], "aggregate": { - "projectCount": {"fn": "count", "alias": "$project"}, - "projects": {"fn": "collect", "field": "name", "alias": "$project", "unique": True} + "month": {"fn": "timeBucket", "field": "releasedAt", "granularity": "month", "alias": "$record"}, + "count": {"fn": "count", "alias": "$record"} }, - "groupBy": ["$record.name"], - "orderBy": {"projectCount": "desc"} + "groupBy": ["month"], + "orderBy": {"month": "asc"} }) ``` -Multiple grouping keys (pivot style): +Granularity values: `"day"` · `"week"` · `"month"` · `"quarter"` · `"year"` · `"hours"` · `"minutes"` · `"seconds"` (use plural forms with `"size"` for custom windows). -```python -result = db.records.find({ - "labels": ["PROJECT"], - "aggregate": {"count": {"fn": "count", "alias": "$record"}}, - "groupBy": ["$record.category", "$record.active"], - "orderBy": {"count": "desc"} -}) -``` - -Notes: -- At least one aggregation is required for `groupBy` to take effect. -- Group keys appear in each result row by their property name (without the alias prefix). -- To retain nested arrays while grouping, use `collect` inside `aggregate` and group only on the parent alias. -- `collect` is unique by default; set `"unique": False` to allow duplicates. - -### Searching Within a Record's Context - -You can search for records within the context of a specific record's relationships using the `record_id` parameter: +## Nested collect ```python -# Find all records related to a specific user -result = db.records.find( - search_query={ - "labels": ["POST", "COMMENT"], - "where": { - "isPublished": True - } - }, - record_id="user_123" # Search within this user's context -) - -related_records = result.data -total = result.total - -# Find only posts created by a specific user -result = db.records.find( - search_query={ - "labels": ["POST"], - "orderBy": {"createdAt": "desc"} +result = db.records.find({ + "labels": ["MOVIE"], + "where": { + "ACTOR": {"$alias": "$actor"} }, - record_id="user_123" -) - -user_posts = result.data - -# Search for documents shared with a specific team -result = db.records.find( - search_query={ - "labels": ["DOCUMENT"], - "where": { - "status": "shared", - "category": {"$in": ["proposal", "contract"]} + "aggregate": { + "actors": { + "fn": "collect", + "alias": "$actor", + "aggregate": { + "roles": {"fn": "collect", "alias": "$actor", "field": "role"} + } } - }, - record_id="team_456" -) - -team_documents = result.data -``` - -This is particularly useful when you want to: -- Find all records that have relationships with a specific record -- Search within the scope of a particular entity's connected data -- Implement features like "user's posts", "team's documents", or "company's projects" - -## Return Format and Error Handling - -The `find()` method returns a [`SearchResult`](../python-reference/search-result.md) object that provides list-like access and comprehensive metadata: - -```python -# The method returns a SearchResult object -result = db.records.find({ - "labels": ["USER"], - "limit": 10 -}) - -# len(result) = records in this result set (affected by limit) -print(f"Retrieved {len(result)} records in this page") - -# total = all records matching criteria in the entire database -print(f"Total matching records in database: {result.total}") - -# Example: if you have 1,000 users total but limit to 10: -# len(result) = 10 (records returned in this request) -# result.total = 1000 (total users matching your criteria) - -# Iterate over results -for record in result: - print(f"User: {record.get('name')}") - -# Access records by index -first_user = result[0] if result else None - -# Check if there are more records beyond this page -if result.has_more: - print("There are more records available") - -# Handle cases where no records are found -if not result: - print("No records found matching the criteria") - -# Use total count for pagination calculations -pages = (result.total + 9) // 10 # Calculate number of pages (10 per page) -``` - -### Understanding Total vs Length - -It's important to understand the difference between these two key concepts: - -- **`result.total`** - The total number of records in your database that match your search criteria -- **`len(result)`** - The number of records actually returned in this specific request (limited by `limit` parameter) - -```python -# Example: searching users in a database with 10,000 total users -result = db.records.find({ - "labels": ["USER"], - "where": {"active": True}, # Let's say 8,500 users are active - "limit": 25 # But we only want 25 per page + } }) - -print(f"Records in this page: {len(result)}") # Will show: 25 -print(f"Total active users: {result.total}") # Will show: 8,500 -print(f"Has more pages: {result.has_more}") # Will show: True - -# This is useful for building pagination UIs: -current_page = 1 -per_page = 25 -total_pages = (result.total + per_page - 1) // per_page # = 340 pages -print(f"Page {current_page} of {total_pages}") ``` -### Error Handling +Only `fn: "collect"` is valid inside nested `aggregate` blocks. -The `find()` method includes built-in error handling that returns an empty SearchResult on exceptions: +## SearchResult ```python -# If an error occurs, the method returns an empty SearchResult instead of raising an exception -result = db.records.find({ - "labels": ["INVALID_LABEL"], - "where": { - "nonexistent_field": "some_value" - } -}) - -# Always returns a SearchResult, even on errors -print(f"Found {len(result)} records") # Will print "Found 0 records" -print(f"Total: {result.total}") # Will print "Total: 0" - -# Safe iteration -for record in result: - print("This won't execute if result is empty") +result = db.records.find({"labels": ["MOVIE"], "limit": 10}) -# Boolean check is safe -if result: - print("This won't execute if result is empty") - -# For more explicit error handling in production code, you may want to validate -# your queries before calling find() or implement additional error checking +len(result) # records returned in this page (≤ limit) +result.total # total records matching in the database +result.has_more # True if more pages remain +result[0] # access by index +for r in result: # iterable + pass ``` -## Search Within Transactions - -All search operations can be performed within transactions for consistency: +## With a transaction ```python -# Begin a transaction -tx = db.tx.begin() - +tx = db.transactions.begin() try: - # Perform search within the transaction - result = db.records.find({ - "labels": ["USER"], - "where": {"is_active": True} - }, transaction=tx) - - # Use the results to make changes - for user in result: - if user.last_login < older_than_3_months: - db.records.update({ - "target": user, - "data": {"is_active": False} - }, transaction=tx) - - # Commit the transaction when done + result = db.records.find({"labels": ["MOVIE"]}, transaction=tx) tx.commit() -except Exception as error: - # Roll back the transaction on error +except Exception: tx.rollback() - raise error + raise ``` -For more details on transactions, see the [Transactions documentation](../../python-sdk/transactions). - -## Performance Best Practices - -When working with the Search API, follow these best practices for optimal performance: - -1. **Be Specific with Labels**: Always specify labels to narrow the search scope. -2. **Use Indexed Properties**: Prioritize filtering on properties that have indexes. -3. **Limit Results**: Use pagination to retrieve only the records you need. -4. **Optimize Relationship Traversal**: Avoid deep relationship traversals when possible. -5. **Use Aliases Efficiently**: Define aliases only for records you need to reference in aggregations. -6. **Filter Early**: Apply filters as early as possible in relationship traversals to reduce the amount of data processed. - -## Next Steps -- Explore [filtering with where clauses](../../concepts/search/where) in depth -- Learn about [data aggregation capabilities](../../concepts/search/aggregations) -- Understand [pagination and sorting options](../../concepts/search/pagination-order) -- Discover how to filter by [record labels](../../concepts/search/labels) -- Learn about the [`SearchResult](../python-reference/search-result.md) class returned by find operations -- See how to use [Records API](../../python-sdk/records/create-records.md) for other operations diff --git a/docs/docs/python-sdk/records/import-data.md b/docs/docs/python-sdk/records/import-data.md index 6256d2ce..9dda42d7 100644 --- a/docs/docs/python-sdk/records/import-data.md +++ b/docs/docs/python-sdk/records/import-data.md @@ -4,231 +4,98 @@ sidebar_position: 1 # Import Data -The RushDB Python SDK provides powerful methods for importing data into your database. You can import data from various formats including JSON and CSV, with options to customize how the data is processed and stored. +Pass nested dicts — RushDB walks the structure and links each level as a related record. -## Overview - -The import functionality in the Python SDK allows you to: -- Import JSON data structures -- Import CSV data from files or strings -- Control data type inference and handling -- Set default relationship types -- Configure property value handling -- Perform batch upsert (create-or-update) during import using `mergeBy` / `mergeStrategy` - -## Importing CSV Data - -### import_csv() - -Imports records from CSV data into RushDB. - -**Signature:** -```python -def import_csv( - self, - label: str, - data: str, - options: Optional[Dict[str, bool]] = None, - parse_config: Optional[Dict[str, Any]] = None, - transaction: Optional[Transaction] = None -) -> List[Dict[str, Any]] -``` - -**Arguments:** -- `label` (str): Label for all imported records -- `data` (str): CSV data to import as a string -- `options` (Optional[Dict[str, bool]]): Import options - - `suggestTypes` (bool, **default: `True`**): Automatically infers data types for properties. Set to `False` to disable type inference and store all values as strings - - `castNumberArraysToVectors` (bool): When true, converts numeric arrays to vector type - - `convertNumericValuesToNumbers` (bool): When true, converts string numbers to number type - - `capitalizeLabels` (bool): When true, converts all labels to uppercase - - `relationshipType` (str): Default relationship type between nodes - - `returnResult` (bool): When true, returns imported records in response - - `mergeBy` (List[str]): Optional list of property names for matching existing records (upsert). If omitted and `mergeStrategy` provided, all incoming property keys are used. Empty list also means all keys. - - `mergeStrategy` (str): `'append'` (default) to add/update properties preserving others, or `'rewrite'` to replace all existing properties. - -:::info Default Behavior -By default, `suggestTypes` is set to `True` for all import operations (CSV and JSON). This means RushDB automatically infers data types from your values. To store all properties as strings without type inference, you must explicitly set `suggestTypes=False` in the options. -::: -- `parse_config` (Optional[Dict[str, Any]]): CSV parsing configuration (PapaParse compatible subset): - - `delimiter` (str): Field delimiter character - - `header` (bool): Treat first row as header row - - `skipEmptyLines` (bool | 'greedy'): Skip empty lines - - `dynamicTyping` (bool): Convert numeric/boolean values automatically - - `quoteChar` (str): Character used for quoting fields - - `escapeChar` (str): Character used for escaping quotes - - `newline` (str): Explicit newline sequence -- `transaction` (Optional[Transaction]): Optional transaction object - -**Returns:** -- `List[Dict[str, Any]]`: Imported records data (if returnResult is True) - -**Example:** ```python -# Import records from CSV string -csv_data = """name,email,age -John Doe,john@example.com,30 -Jane Smith,jane@example.com,25 -Bob Wilson,bob@example.com,45""" - -records = client.records.import_csv( - label="CUSTOMER", - data=csv_data, - options={ - "returnResult": True, - "suggestTypes": True, - "convertNumericValuesToNumbers": True, - "mergeBy": ["email"], # upsert match key - "mergeStrategy": "append" # or "rewrite" +db.records.create_many( + label="MOVIE", + data={ + "title": "Inception", + "rating": 8.8, + "ACTOR": [ + {"name": "Leonardo DiCaprio", "country": "USA"}, + {"name": "Ken Watanabe", "country": "Japan"} + ] } ) - -# Import records from CSV file -with open('employees.csv', 'r') as file: - csv_content = file.read() - -records = client.records.import_csv( - label="EMPLOYEE", - data=csv_content, - options={ - "returnResult": True, - "suggestTypes": True, - "mergeStrategy": "rewrite" # replace properties for matched employees - }, - parse_config={"header": True, "skipEmptyLines": True, "dynamicTyping": True} -) +# MOVIE → ACTOR × 2: all created and linked automatically ``` -## Importing JSON Data +## Flat batch (`create_many`) -### create_many() - -Imports records from JSON data into RushDB. - -**Signature:** -```python -def create_many( - self, - label: str, - data: Union[Dict[str, Any], List[Dict[str, Any]]], - options: Optional[Dict[str, Any]] = None, - transaction: Optional[Transaction] = None -) -> List[Dict[str, Any]] -``` +Use `create_many` with a list for flat rows — no nested objects inside items. -**Arguments:** -- `label` (str): Label for the root node(s) -- `data` (Union[Dict[str, Any], List[Dict[str, Any]]]): JSON data to import as dict or find of dicts -- `options` (Optional[Dict[str, Any]]): Import options - - `suggestTypes` (bool, **default: `True`**): Automatically infers data types for properties. Set to `False` to disable type inference and store all values as strings - - `castNumberArraysToVectors` (bool): When true, converts numeric arrays to vector type - - `convertNumericValuesToNumbers` (bool): When true, converts string numbers to number type - - `capitalizeLabels` (bool): When true, converts all labels to uppercase - - `relationshipType` (str): Default relationship type between nodes - - `returnResult` (bool): When true, returns imported records in response - - `mergeBy` (List[str]): Upsert match keys for batch create/import; empty or omitted with `mergeStrategy` means all keys. - - `mergeStrategy` (str): `'append'` (default) or `'rewrite'`. -- `transaction` (Optional[Transaction]): Optional transaction object - -**Returns:** -- `List[Dict[str, Any]]`: Imported records data (if returnResult is True) - -**Example:** ```python -# Import a single JSON object -person_data = { - "name": "John Doe", - "age": "30", - "addresses": [ - { - "type": "home", - "street": "123 Main St", - "city": "Anytown" - }, - { - "type": "work", - "street": "456 Business Rd", - "city": "Workville" - } +db.records.create_many( + label="ACTOR", + data=[ + {"name": "Leonardo DiCaprio", "country": "USA"}, + {"name": "Ken Watanabe", "country": "Japan"} ], - "scores": [85, 90, 95], - "active": True -} - -records = client.records.create_many( - label="PERSON", - data=person_data, - options={ - "returnResult": True, - "suggestTypes": True, - "convertNumericValuesToNumbers": True, - "relationshipType": "OWNS" - } -) - -# Import multiple JSON objects -employees_data = [ - { - "name": "Alice Johnson", - "department": "Engineering", - "skills": ["Python", "JavaScript", "AWS"] - }, - { - "name": "Bob Smith", - "department": "Marketing", - "skills": ["SEO", "Content Writing", "Analytics"] - } -] - -records = client.records.create_many( - label="EMPLOYEE", - data=employees_data, - options={ - "returnResult": True, - "suggestTypes": True, - "mergeBy": ["name", "department"], # composite match - "mergeStrategy": "append" - } + options={"suggestTypes": True} ) ``` -## Data Type Handling - -### Automatic Type Inference +## CSV import (`import_csv`) -**By default, `suggestTypes` is set to `True` for all import operations** (import_csv and create_many). This means RushDB automatically infers the following data types from your values: +```python +with open("actors.csv") as f: + csv_content = f.read() -- `string`: Text values -- `number`: Numeric values -- `boolean`: `True`/`False` values -- `null`: `None` values -- `datetime`: ISO8601 format strings (e.g., "2025-04-23T10:30:00Z") -- `vector`: Arrays of numbers (when `castNumberArraysToVectors` is `True`) +db.records.import_csv( + label="ACTOR", + data=csv_content, + options={"returnResult": False}, + parse_config={"header": True, "dynamicTyping": True} +) +``` -To disable automatic type inference and store all values as strings, you must **explicitly set `suggestTypes=False`** in your options dictionary. +### `parse_config` options -### Additional Type Conversions +| Option | Default | Description | +|---|---|---| +| `delimiter` | `,` | Column separator | +| `header` | `True` | First row is header | +| `skipEmptyLines` | `True` | Ignore blank rows | +| `dynamicTyping` | `True` | Auto-convert numbers and booleans | +| `quoteChar` | `"` | Quote character | +| `escapeChar` | `"` | Escape character | +| `newline` | auto | Explicit newline sequence | -When `convertNumericValuesToNumbers` is enabled, string values that represent numbers (e.g., '123') will be automatically converted to their numeric equivalents (e.g., 123). +## Options -### Array Handling +| Option | Default | Description | +|---|---|---| +| `suggestTypes` | `True` | Infer property types automatically | +| `convertNumericValuesToNumbers` | `False` | Convert string numbers to number type | +| `capitalizeLabels` | `False` | Uppercase all inferred label names | +| `relationshipType` | `__RUSHDB__RELATION__DEFAULT__` | Relationship type for nested links | +| `returnResult` | `True` | Return created records in response | +| `mergeBy` | — | Fields to match on for upsert | +| `mergeStrategy` | `append` | `append` or `rewrite` | -Arrays with consistent data types (e.g., all numbers, all strings) will be handled seamlessly according to their type. However, for inconsistent arrays (e.g., `[1, 'two', None, False]`), all values will be automatically converted to strings to mitigate data loss, and the property type will be stored as `string`. +## Upsert during import -## Graph Construction +```python +# Append — update matched, preserve other fields +db.records.create_many( + label="ACTOR", + data=actors, + options={"mergeBy": ["name"], "mergeStrategy": "append"} +) -When importing nested JSON data, RushDB automatically creates relationships between parent and child nodes. For example, if you import a person with addresses, RushDB will create: +# Rewrite — replace all properties for matched records +db.records.import_csv( + label="ACTOR", + data=csv_content, + options={"mergeBy": ["name"], "mergeStrategy": "rewrite"} +) +``` -1. A node with the "PERSON" label for the person data -2. Nodes with the "ADDRESS" label for each address -3. Relationships from the person to each address (using the default relationship type or the one specified) +## Quick rules recap -This allows you to maintain complex data structures in a graph database format without manually creating the relationships. +- `create_many` with a dict or list of dicts: flat or nested JSON +- `import_csv`: CSV string input with `parse_config`; `dynamicTyping` inherits from `options.suggestTypes` when omitted +- Set `returnResult: False` for large imports to improve performance -## Performance Considerations -- For large imports, consider setting `returnResult: False` to improve performance -- Imports are processed in batches for optimal database performance -- Consider using transactions for large imports to ensure data consistency -- For very large datasets (millions of records), consider breaking the import into multiple smaller operations -- For upsert imports, prefer stable unique keys in `mergeBy` to reduce match overhead. diff --git a/docs/docs/python-sdk/records/update-records.md b/docs/docs/python-sdk/records/update-records.md index 72aca381..1035fbda 100644 --- a/docs/docs/python-sdk/records/update-records.md +++ b/docs/docs/python-sdk/records/update-records.md @@ -4,268 +4,54 @@ sidebar_position: 6 # Update Records -RushDB Python SDK provides two main methods for updating [records](../../concepts/records.md): `update()` for partial updates and `set()` for complete replacement of record data. +## `db.records.update()` — partial update -## Overview - -The update methods allow you to: -- Update specific properties while preserving others (`update()`) -- Completely replace record data (`set()`) -- Apply changes either through the RecordsAPI or directly on Record objects - -## Prerequisites - -Before updating records, make sure you have initialized the RushDB client with your API token: +Unspecified fields are preserved. ```python -from rushdb import RushDB - -db = RushDB("RUSHDB_API_KEY", base_url="https://api.rushdb.com/api/v1") -``` - -## Updating Records with `update()` - -The `update()` method allows you to modify specific properties of a record while preserving other existing properties. - -### Syntax +# Update via record object +movie.update({"rating": 9.0}) -```python -# Using RecordsAPI +# Update by ID db.records.update( - record_id: str, - data: Dict[str, Any], - transaction: Optional[Transaction] = None -) -> Dict[str, str] - -# Using Record object -record.update( - data: Dict[str, Any], - transaction: Optional[Transaction] = None -) -> Dict[str, str] -``` - -### Parameters - -| Parameter | Type | Description | -|-----------|------|-------------| -| `record_id` | str | ID of the [record](../../concepts/records.md) to update (when using RecordsAPI) | -| `data` | Dict[str, Any] | Partial record data containing only the properties to update | -| `transaction` | Optional[Transaction] | Optional [transaction](../../concepts/transactions.mdx) object | - -### Returns - -A dictionary with the response data confirming the update. - -### Examples - -#### Using RecordsAPI - -```python -# First, create or retrieve a record -person = db.records.create( - label="PERSON", - data={ - "name": "John Doe", - "age": 30, - "email": "john@example.com", - "active": True - } -) - -# Later, update specific properties using the record's ID -response = db.records.update( - record_id=person.id, - data={ - "age": 31, - "title": "Senior Developer", - "active": False - } + record_id=movie.__id, + data={"rating": 9.0} ) - -# The record now contains both original and updated properties: -# name: "John Doe" (preserved) -# age: 31 (updated) -# email: "john@example.com" (preserved) -# active: False (updated) -# title: "Senior Developer" (added) -``` - -#### Using Record Object - -```python -# If you have a record object, you can update it directly -person = db.records.create( - label="PERSON", - data={ - "name": "Jane Smith", - "age": 28, - "department": "Engineering" - } -) - -# Update the record -response = person.update({ - "age": 29, - "department": "Product", - "role": "Product Manager" -}) - -# The record now contains: -# name: "Jane Smith" (preserved) -# age: 29 (updated) -# department: "Product" (updated) -# role: "Product Manager" (added) ``` -## Replacing Records with `set()` - -The `set()` method completely replaces all properties of a record with new data. +## `db.records.set()` — full replacement -### Syntax +All previous fields are removed, then replaced with the new data. ```python -# Using RecordsAPI -db.records.set( - record_id: str, - data: Dict[str, Any], - transaction: Optional[Transaction] = None -) -> Dict[str, str] - -# Using Record object -record.set( - data: Dict[str, Any], - transaction: Optional[Transaction] = None -) -> Dict[str, str] -``` - -### Parameters - -| Parameter | Type | Description | -|-----------|------|-------------| -| `record_id` | str | ID of the [record](../../concepts/records.md) to replace (when using RecordsAPI) | -| `data` | Dict[str, Any] | New record data that will completely replace existing properties | -| `transaction` | Optional[Transaction] | Optional [transaction](../../concepts/transactions.mdx) object | - -### Returns - -A dictionary with the response data confirming the replacement. - -### Examples - -#### Using RecordsAPI - -```python -# First, create or retrieve a record -product = db.records.create( - label="PRODUCT", - data={ - "name": "Smartphone X", - "price": 899.99, - "category": "Electronics", - "features": ["5G", "Water Resistant"] - } -) +# Set via record object +movie.set({"title": "Inception", "rating": 9.0, "genre": "sci-fi"}) -# Later, completely replace the record data -response = db.records.set( - record_id=product.id, - data={ - "name": "Smartphone X Pro", - "price": 1099.99, - "inStock": True, - "specifications": { - "storage": "256GB", - "color": "Midnight Blue" - } - } +# Set by ID +db.records.set( + record_id=movie.__id, + data={"title": "Inception", "rating": 9.0, "genre": "sci-fi"} ) - -# The record now ONLY contains the new properties: -# name: "Smartphone X Pro" -# price: 1099.99 -# inStock: True -# specifications: { storage: "256GB", color: "Midnight Blue" } -# Note: "category" and "features" properties are removed ``` -#### Using Record Object - -```python -# If you have a record object, you can replace it directly -product = db.records.create( - label="PRODUCT", - data={ - "name": "Laptop Basic", - "price": 699.99, - "category": "Computer" - } -) - -# Replace all record data -response = product.set({ - "name": "Laptop Pro", - "price": 1299.99, - "memory": "16GB", - "processor": "i7" -}) - -# The record now ONLY contains: -# name: "Laptop Pro" -# price: 1299.99 -# memory: "16GB" -# processor: "i7" -# Note: "category" property is removed -``` +## Parameters -## Working with Transactions +| Parameter | Type | Description | +|---|---|---| +| `record_id` | `str` | ID of the record to update | +| `data` | `dict` | Properties to write | +| `transaction` | `Transaction` | Optional transaction | -For operations that need to be atomic, you can use transactions: +## With a transaction ```python -# Start a transaction -tx = db.tx.begin() - +tx = db.transactions.begin() try: - # Update multiple records in the same transaction - product1 = db.records.create( - label="PRODUCT", - data={"name": "Item 1", "price": 10.99}, - transaction=tx - ) - - product2 = db.records.create( - label="PRODUCT", - data={"name": "Item 2", "price": 20.99}, - transaction=tx - ) - - # Update first product - db.records.update( - record_id=product1.id, - data={"price": 11.99, "featured": True}, - transaction=tx - ) - - # Replace second product - db.records.set( - record_id=product2.id, - data={"name": "Item 2 Pro", "price": 29.99, "featured": True}, - transaction=tx - ) - - # Commit all changes + db.records.update(record_id=movie.__id, data={"rating": 9.0}, transaction=tx) tx.commit() -except Exception as e: - # If any operation fails, roll back all changes +except Exception: tx.rollback() - print(f"Transaction failed: {e}") + raise ``` -## Best Practices -1. **Use `update()` for partial updates** when you want to preserve existing data. -2. **Use `set()` for complete replacement** when you want to ensure the record only has the properties you specify. -3. **Use Record objects directly** for more concise code when you already have a reference to the record. -4. **Use transactions** when updating multiple related records to ensure data consistency. -5. **Validate data** on the client side before sending update requests. -6. **Handle exceptions** properly to manage failed update operations. diff --git a/docs/docs/python-sdk/relationships.md b/docs/docs/python-sdk/relationships.md index 074adeb2..84b4609f 100644 --- a/docs/docs/python-sdk/relationships.md +++ b/docs/docs/python-sdk/relationships.md @@ -1,817 +1,72 @@ --- -sidebar_position: 4 +sidebar_position: 3 --- # Relationships -[Relationships](../concepts/relationships.md) in RushDB connect records to form a rich, interconnected network of data. The Python SDK provides powerful methods for creating, managing, and traversing relationships between records. - -## Overview - -Relationships in RushDB enable you to: -- Connect related records -- Model complex domain relationships -- Query data based on connections -- Build graph-like data structures -- Navigate between connected entities - -## Prerequisites - -Before working with relationships, make sure you have initialized the RushDB client with your API token: - -```python -from rushdb import RushDB - -db = RushDB("RUSHDB_API_KEY", base_url="https://api.rushdb.com/api/v1") -``` - -## Creating Records with Relationships - -When creating records, you can automatically establish relationships through nested objects: - -```python -# Create a company with departments and employees -company_data = { - "name": "Acme Inc.", - "founded": "2010-01-15T00:00:00Z", - "departments": [ # This creates relationships to DEPARTMENT records - { - "name": "Engineering", - "employees": [ # This creates relationships to EMPLOYEE records - { - "name": "Alice Chen", - "position": "Senior Developer" - }, - { - "name": "Bob Smith", - "position": "QA Engineer" - } - ] - }, - { - "name": "Marketing", - "employees": [ - { - "name": "Carol Davis", - "position": "Marketing Director" - } - ] - } - ] -} - -# Create the company with all related records -records = db.records.create_many( - label="COMPANY", - data=company_data, - options={ - "relationshipType": "HAS_DEPARTMENT", # Custom relationship type for departments - "returnResult": True - } -) -``` - -## Explicitly Creating Relationships with attach() - -You can also explicitly create relationships between existing records using the `attach()` method: - -### Using RecordsAPI - -```python -# Create two separate records -user = db.records.create( - label="USER", - data={"name": "John Doe", "email": "john@example.com"} -) - -project = db.records.create( - label="PROJECT", - data={"name": "Website Redesign", "deadline": "2025-06-30T00:00:00Z"} -) - -# Create a relationship between them -response = db.records.attach( - source=user.id, - target=project.id, - options={ - "type": "MANAGES", # Relationship type - "direction": "out" # Relationship direction (user -> project) - } -) -``` - -### Using Record Objects Directly - -```python -# Create two separate records -team = db.records.create( - label="TEAM", - data={"name": "Frontend Team", "size": 5} -) - -employee = db.records.create( - label="EMPLOYEE", - data={"name": "Alice Johnson", "role": "Developer"} -) - -# Create a relationship from the team to the employee -response = team.attach( - target=employee, - options={ - "type": "INCLUDES", - "direction": "out" # From team to employee - } -) -``` - -## Creating Multiple Relationships at Once - -```python -# Create a manager record -manager = db.records.create( - label="MANAGER", - data={"name": "Sarah Williams", "department": "Engineering"} -) - -# Create multiple employee records -employees = db.records.create_many( - label="EMPLOYEE", - data=[ - {"name": "John Smith", "skills": ["Python", "JavaScript"]}, - {"name": "Jane Brown", "skills": ["Java", "SQL"]}, - {"name": "Mike Davis", "skills": ["React", "TypeScript"]} - ] -) - -# Create relationships from the manager to all employees at once -response = manager.attach( - target=[emp.id for emp in employees], - options={ - "type": "MANAGES", - "direction": "out" - } -) -``` - -## Bulk Relationship Creation by Key Match - -When importing tabular data in separate steps, you can create relationships in bulk by matching a key on the source label to a key on the target label. Use `relationships.create_many` for this. - ```python -# Create USER -[:ORDERED]-> ORDER for all pairs where -# USER.id = ORDER.userId and both belong to the same tenant -tenant_id = "ACME" - -db.relationships.create_many( - source={"label": "USER", "key": "id", "where": {"tenantId": tenant_id}}, - target={"label": "ORDER", "key": "userId", "where": {"tenantId": tenant_id}}, - type="ORDERED", - direction="out" # (source) -[:ORDERED]-> (target) -) -``` - -Parameters -- `source`: Dict describing the source side - - `label` (str): Source record label - - `key` (str): Property on the source used for equality match - - `where` (optional, dict): Additional filters for source records; same shape as SearchQuery `where` -- `target`: Dict describing the target side - - `label` (str): Target record label - - `key` (str): Property on the target used for equality match - - `where` (optional, dict): Additional filters for target records; same shape as SearchQuery `where` -- `type` (optional, str): Relationship type. Defaults to the RushDB default type when omitted -- `direction` (optional, str): 'in' or 'out'. Defaults to 'out' -- `transaction` (optional): Include to run the operation atomically - -Notes -- The join condition is always `source[key] = target[key]` combined with any additional `where` constraints. -- `where` follows the same operators as record search (e.g., `{"tenantId": "ACME"}` or `{"tenantId": "ACME"}`). -- This is efficient for connecting data created in separate imports (e.g., users and orders). - -Many-to-many (cartesian) creation - -If you omit `key` on both `source` and `target` you can opt-in to a many-to-many (cartesian) creation by passing `many_to_many=True`. This will create relationships between every matching source and every matching target produced by the provided `where` filters. - -Important safeguards - -- `many_to_many=True` requires non-empty `where` filters for both `source` and `target` to avoid accidentally creating an unbounded cartesian product. -- By default (when `many_to_many` is omitted or false) the server requires `source["key"]` and `target["key"]` to be provided and will join using `source[key] = target[key]`. -- Many-to-many operations can create large numbers of relationships and may be expensive; use specific filters and limits in your `where` clauses. - -Example: key-based join (same as above) - -```python -db.relationships.create_many( - source={"label": "USER", "key": "id", "where": {"tenantId": tenant_id}}, - target={"label": "ORDER", "key": "userId", "where": {"tenantId": tenant_id}}, - type="ORDERED", - direction="out" -) -``` - -Example: explicit many-to-many (cartesian) creation — opt-in - -```python -# Create every USER_MTM × TAG_MTM link where tenantId matches -db.relationships.create_many( - source={"label": "USER_MTM", "where": {"tenantId": tenant_id}}, - target={"label": "TAG_MTM", "where": {"tenantId": tenant_id}}, - type="HAS_TAG", - direction="out", - many_to_many=True -) -``` - -### Bulk Relationship Deletion by Key Match - -The API also supports deleting relationships created by a matching condition. Use `relationships.delete_many` with the same arguments as `create_many` to remove relationships in bulk. - -Examples mirror the creation API: - -Key-based deletion - -```python -db.relationships.delete_many( - source={"label": "USER", "key": "id", "where": {"tenantId": tenant_id}}, - target={"label": "ORDER", "key": "userId", "where": {"tenantId": tenant_id}}, - type="ORDERED", - direction="out" -) -``` - -Many-to-many deletion - -```python -db.relationships.delete_many( - source={"label": "USER_MTM", "where": {"tenantId": tenant_id}}, - target={"label": "TAG_MTM", "where": {"tenantId": tenant_id}}, - type="HAS_TAG", - direction="out", - many_to_many=True -) -``` - -## Removing Relationships with detach() - -You can remove relationships between records without deleting the records themselves using the `detach()` method: - -### Using RecordsAPI - -```python -# Remove a specific relationship type -db.records.detach( - source=user.id, - target=project.id, - options={ - "typeOrTypes": "MANAGES", - "direction": "out" - } +# Leo acted in Inception +db.records.attach( + source=leo, + target=inception, + options={"type": "ACTED_IN"} ) -# Remove multiple relationship types at once +# Detach db.records.detach( - source=manager.id, - target=employee.id, - options={ - "typeOrTypes": ["MANAGES", "MENTORS"], - "direction": "out" - } + source=leo, + target=inception, + options={"type": "ACTED_IN"} ) ``` -### Using Record Objects Directly +## `attach()` ```python -# Remove a relationship directly from a record object -team.detach( - target=employee, - options={ - "typeOrTypes": "INCLUDES", - "direction": "out" - } -) -``` - -## Finding Related Records - -You can find records based on their relationships: - -```python -# Find all employees of a specific department -result = db.records.find({ - "labels": ["EMPLOYEE"], - "where": { - "DEPARTMENT": { - "$relation": "WORKS_IN", # Relationship type - "$id": department.id # Match the specific related department - } - } -}) - -employees = result.data - -# Find all projects managed by a specific user -result = db.records.find({ - "labels": ["PROJECT"], - "where": { - "USER": { - "$relation": "MANAGES", - "$id": user.id - } - } -}) - -projects = result.data -``` - -## Using Custom Relationship Types - -By default, RushDB uses a standard relationship type, but you can specify custom types: - -```python -# When creating records with nested objects -company = db.records.create_many( - label="COMPANY", - data={ - "name": "Tech Corp", - "employees": [ - {"name": "Jane Smith", "position": "CTO"}, - {"name": "John Doe", "position": "Lead Developer"} - ] - }, - options={ - "relationshipType": "EMPLOYS" # Custom relationship type - } +# With direction +db.records.attach( + source=movie, + target=actor, + options={"type": "STARS_IN", "direction": "out"} ) -# When explicitly creating relationships +# One-to-many (target list) db.records.attach( - source=mentor.id, - target=mentee.id, - options={ - "type": "MENTORS", - "direction": "out" - } + source=movie, + target=[actor1, actor2, actor3], + options={"type": "STARS_IN"} ) ``` -## Searching and Querying Relationships with RelationsAPI - -The `RelationsAPI` provides dedicated functionality for searching and analyzing relationships directly. This API allows you to query relationships themselves rather than records, giving you insights into the connections within your graph. - -**Important**: The RelationsAPI uses a Record-centric approach. When filtering relationships, you specify properties of the records involved in those relationships, not properties of the relationships themselves. This means the `where` clause contains Record properties to find relationships involving records that match those criteria. - -### Overview - -The RelationsAPI enables you to: -- Search for specific relationships based on criteria -- Analyze relationship patterns across your data -- Discover connections between records -- Perform relationship-based analytics -- Monitor relationship types and their usage - -### Accessing the RelationsAPI - -You access the RelationsAPI through the main RushDB client: - -```python -from rushdb import RushDB - -# Initialize the client -db = RushDB("RUSHDB_API_KEY", base_url="https://api.rushdb.com/api/v1") - -# Access the relationships API -relationships_api = db.relationships -``` - -### The find() Method - -The `find()` method searches for relationships. It accepts a SearchQuery and returns an API response with the matched relationships. - -#### Method Signature - -```python -async def find( - self, - search_query: Optional[SearchQuery] = None, - transaction: Optional[Union[Transaction, str]] = None, -) -> ApiResponse[List[Relationship]] -``` - -#### Parameters - -- **search_query** (`Optional[SearchQuery]`): Search criteria to filter relationships. This uses the same SearchQuery "where" syntax as record queries, including nested label blocks and the `$relation` operator: - - `where`: Conditions for records involved in relationships. Examples: - - Nested related label with relationship filter: `{ "COMPANY": { "$relation": "WORKS_AT", "industry": "Technology" } }` - - Relationship with direction: `{ "POST": { "$relation": { "type": "AUTHORED", "direction": "in" }, "title": {"$contains": "Graph"} } }` - - Base-label filtering via `labels`: limit the primary side you’re describing, e.g., `{"labels": ["USER"], "where": {"name": {"$contains": "John"}, "COMPANY": {"$relation": "WORKS_AT"}}}` - - `limit` (int, optional): Max number of relationships to return - - `skip` (int, optional): Number of relationships to skip (for pagination) -- **transaction** (`Optional[Union[Transaction, str]]`): Optional transaction context - -#### Return Value - -Returns an `ApiResponse` with shape `{ "success": bool, "data": List[Relationship], "total": Optional[int] }`. - -### Basic Relationship Searching - -#### Find All Relationships - -```python -# Get all relationships in the database -response = await db.relationships.find() - -print(f"Success: {response['success']}") -print(f"Total relationships (if provided): {response.get('total')}") -for rel in response["data"][:5]: # Show first 5 - print(f"{rel['sourceId']} -> {rel['targetId']} ({rel['type']})") -``` - -#### Find Relationships with Pagination - -```python -# Get relationships with pagination (limit/skip) -first_page = await db.relationships.find({ - "limit": 50, - "skip": 0 -}) - -# Get next page -second_page = await db.relationships.find({ - "limit": 50, - "skip": 50 -}) -``` - -### Advanced Relationship Queries - -#### Filter by Record Properties - -The RelationsAPI uses the same `where` syntax as record queries. Use nested label blocks and `$relation` to describe the connection between entities: - -```python -# Find relationships where the USER side is active in Engineering -engineering_relationships = await db.relationships.find({ - "labels": ["USER"], - "where": { - "isActive": True, - "department": "Engineering" - } -}) - -# Find relationships where the related COMPANY is large and in Technology -tech_company_relationships = await db.relationships.find({ - "labels": ["USER"], - "where": { - "COMPANY": {"industry": "Technology", "employees": {"$gte": 100}} - } -}) - -# Find USER -> PROJECT relationships by type (optionally include direction) -user_project_relationships = await db.relationships.find({ - "labels": ["USER"], - "where": { - "PROJECT": {"$relation": {"type": "WORKS_ON", "direction": "out"}} - } -}) - -# Find relationships involving senior Developer EMPLOYEEs -senior_dev_relationships = await db.relationships.find({ - "labels": ["EMPLOYEE"], - "where": { - "role": "Developer", - "experience": {"$gte": 5}, - "isActive": True - } -}) -``` - -#### Complex Relationship Queries - -```python -# Find relationships involving engineering employees who are active -engineering_relationships = await db.relationships.find({ - "labels": ["EMPLOYEE"], - "where": { - "$and": [ - {"department": "Engineering"}, - {"isActive": True}, - {"role": {"$in": ["Developer", "QA Engineer", "DevOps"]}} - ] - } -}) - -# Find relationships where the base records were created recently -recent_record_relationships = await db.relationships.find({ - "where": { - "createdAt": {"$gte": "2024-01-01T00:00:00Z"} - }, - "limit": 25, - "skip": 0 -}) - -# Find relationships involving MANAGER records with teams >= 5 -manager_relationships = await db.relationships.find({ - "labels": ["MANAGER"], - "where": { - "department": "Engineering", - "teamSize": {"$gte": 5} - } -}) -``` - -### Relationship Analytics and Insights - -#### Count Relationships by Type - -```python -# Get all relationships and analyze by type -response = await db.relationships.find() - -# Count by type -type_counts = {} -for rel in response["data"]: - rel_type = rel["type"] - type_counts[rel_type] = type_counts.get(rel_type, 0) + 1 - -print("Relationship types and counts:") -for rel_type, count in sorted(type_counts.items()): - print(f" {rel_type}: {count}") -``` - -#### Find Highly Connected Records - -```python -# Find relationships involving all records first -response = await db.relationships.find() - -# Count outgoing relationships per record -outgoing_counts = {} -for rel in response["data"]: - source_id = rel["sourceId"] - outgoing_counts[source_id] = outgoing_counts.get(source_id, 0) + 1 - -# Find top 10 most connected records -top_connected = sorted(outgoing_counts.items(), key=lambda x: x[1], reverse=True)[:10] -print("Most connected records (outgoing):") -for record_id, count in top_connected: - print(f" {record_id}: {count} relationships") - -# Alternative: Find relationships for specific high-activity records -manager_relationships = await db.relationships.find({ - "labels": ["MANAGER"], - "where": { - "isActive": True, - "teamSize": {"$gte": 10} # Managers with large teams likely have many relationships - } -}) -``` - -### Using Relationships API with Transactions - -The RelationsAPI supports transactions for consistent querying: +## `detach()` ```python -# Start a transaction -tx = db.tx.begin() - -try: - # Query relationships involving COMPANY related via WORKS_AT within the transaction - response = await db.relationships.find({ - "labels": ["USER"], - "where": { - "COMPANY": {"$relation": "WORKS_AT", "department": "Sales"} - } - }, transaction=tx) - - # Perform additional operations in the same transaction - for rel in response["data"]: - # Update related records or create new relationships - pass - - # Commit the transaction - tx.commit() -except Exception as e: - # Roll back on error - tx.rollback() - print(f"Transaction failed: {e}") -``` - -### Pagination Best Practices - -When working with large numbers of relationships, use pagination effectively: - -```python -# Process relationships in batches -async def process_all_relationships(batch_size=100): - skip = 0 - processed_count = 0 - - while True: - # Get next batch - response = await db.relationships.find({"limit": batch_size, "skip": skip}) - - relationships = response["data"] - if not relationships: - break # No more relationships - - # Process this batch - for rel in relationships: - # Process individual relationship - processed_count += 1 - print(f"Processing relationship {rel['sourceId']} -> {rel['targetId']}") - - # Move to next batch - skip += batch_size - - print(f"Processed {processed_count} relationships so far...") - - print(f"Finished processing {processed_count} total relationships") - -# Run the batch processor -await process_all_relationships() -``` - -### Performance Considerations - -When using the RelationsAPI: - -1. **Use specific filters**: Apply `where` conditions to reduce the result set -2. **Limit result sizes**: Use pagination to avoid loading too many relationships at once -3. **Filter by relationship type**: Use type filters when you know the specific relationship types you need -4. **Index frequently queried properties**: Ensure properties used in filters are indexed -5. **Combine with record queries**: Use RelationsAPI to discover connections, then use RecordsAPI for detailed record data - -### Error Handling - -```python -try: - relationships = await db.relationships.find({ - "where": {"department": "InvalidDepartment"} - }) -except Exception as e: - print(f"Error querying relationships: {e}") - # Handle the error appropriately +db.records.detach( + source=movie, + target=actor, + options={"type": "STARS_IN"} +) ``` -### Integration with Record Operations - -The RelationsAPI works seamlessly with record operations: - -```python -# 1. Discover relationships involving specific types of records -management_rels_resp = await db.relationships.find({ - "labels": ["MANAGER"], - "where": { - "department": "Engineering", - "isActive": True - } -}) - -# 2. Extract record IDs from relationships -manager_ids = [rel["sourceId"] for rel in management_rels_resp["data"]] -employee_ids = [rel["targetId"] for rel in management_rels_resp["data"]] - -# 3. Query the actual records using RecordsAPI for detailed information -managers = await db.records.find_by_id(manager_ids) -employees = await db.records.find_by_id(employee_ids) - -# 4. Combine data for analysis -for rel in management_rels_resp["data"]: - manager = next(m for m in managers if m.id == rel["sourceId"]) # Adjust to your record object shape - employee = next(e for e in employees if e.id == rel["targetId"]) # Adjust as needed - print(f"{manager.name} manages {employee.name}") -``` +## Direction -## Working with Transactions +| Value | Meaning | +|---|---| +| `"out"` | source → target | +| `"in"` | target → source | -For operations that need to be atomic, you can use [transactions](../concepts/transactions.mdx) when creating or modifying relationships: +## With a transaction ```python -# Start a transaction -tx = db.tx.begin() - +tx = db.transactions.begin() try: - # Create records in the transaction - team = db.records.create( - label="TEAM", - data={"name": "Product Team"}, - transaction=tx - ) - - member1 = db.records.create( - label="EMPLOYEE", - data={"name": "Alice"}, - transaction=tx - ) - - member2 = db.records.create( - label="EMPLOYEE", - data={"name": "Bob"}, - transaction=tx - ) - - # Create relationships in the same transaction - team.attach( - target=[member1, member2], - options={"type": "HAS_MEMBER"}, - transaction=tx - ) - - # Commit all changes + db.records.attach(source=movie, target=actor, options={"type": "STARS_IN"}, transaction=tx) tx.commit() -except Exception as e: - # If any operation fails, roll back all changes +except Exception: tx.rollback() - print(f"Transaction failed: {e}") + raise ``` -## Best Practices for Working with Relationships - -1. **Use meaningful relationship types** - Choose relationship types that clearly express the connection's nature (e.g., "MANAGES", "BELONGS_TO") - -2. **Consider relationship direction** - Think about which way the relationship should point based on your domain model - -3. **Use nested objects for hierarchical data** - When creating hierarchical data, structure your JSON to reflect the relationships - -4. **Create relationships in transactions** - Use transactions when creating multiple related records to ensure data consistency - -5. **Be consistent with relationship types** - Use the same relationship types for similar connections throughout your application - -6. **Think in graphs** - Approach relationships as a graph model, considering paths between records - -7. **Balance denormalization and relationships** - In some cases, it may be better to duplicate data rather than create complex relationship chains - -8. **Use RelationsAPI for analysis** - Use the dedicated RelationsAPI for relationship analytics and discovery - -9. **Optimize relationship queries** - Use appropriate filters and pagination when querying large numbers of relationships - -10. **Combine APIs effectively** - Use RelationsAPI to discover connections, then RecordsAPI for detailed record data - -## API Reference - -### PaginationParams - -The `PaginationParams` TypedDict defines the structure for pagination options when querying relationships: - -```python -from typing import TypedDict - -class PaginationParams(TypedDict, total=False): - """TypedDict for pagination parameters in relationship queries. - - Defines the structure for pagination options when querying relationships, - allowing for efficient retrieval of large result sets. - """ - limit: int # Maximum number of relationships to return in a single request - skip: int # Number of relationships to skip from the beginning of the result set -``` - -#### Parameters - -- **limit** (`int`): Maximum number of relationships to return in a single request - - Default: 100 - - Maximum: 1000 - - Used for controlling the size of result sets and implementing pagination - -- **skip** (`int`): Number of relationships to skip from the beginning of the result set - - Default: 0 - - Used for implementing pagination by skipping already retrieved items - - Useful for getting subsequent pages of results - -#### Usage Example - -```python -# Define pagination parameters -pagination = PaginationParams( - limit=50, # Return at most 50 relationships - skip=100 # Skip the first 100 relationships -) - -# Use with the find method -relationships = await db.relationships.find( - search_query={"where": {"isActive": True}}, - pagination=pagination -) -``` - -### Relationship Object - -When you query relationships using the RelationsAPI, each item in `response["data"]` has the following structure: - -```python -relationship = response["data"][0] - -print(f"Source ID: {relationship['sourceId']}") # ID of the source record -print(f"Source Label: {relationship['sourceLabel']}") # Label of the source record -print(f"Target ID: {relationship['targetId']}") # ID of the target record -print(f"Target Label: {relationship['targetLabel']}") # Label of the target record -print(f"Type: {relationship['type']}") # Relationship type (e.g., "MANAGES") -``` +For traversal in queries, see [Get Records — Relationship traversal](./records/get-records.md#relationship-traversal). -## Related Documentation -- [Relationships Concept](../concepts/relationships.md) - Learn more about how relationships work in RushDB -- [Transactions](../concepts/transactions.mdx) - Using transactions for relationship consistency -- [Record Creation](./records/create-records.md) - Creating records with relationships -- [Finding Records](./records/get-records.md) - Search techniques including relationship-based queries diff --git a/docs/docs/python-sdk/transactions.md b/docs/docs/python-sdk/transactions.md index 45256579..3b140b88 100644 --- a/docs/docs/python-sdk/transactions.md +++ b/docs/docs/python-sdk/transactions.md @@ -1,315 +1,57 @@ --- -sidebar_position: 5 +sidebar_position: 6 --- # Transactions -[Transactions](../concepts/transactions.mdx) in RushDB ensure data consistency by grouping multiple operations into a single atomic unit. The Python SDK provides a simple and powerful way to work with transactions, allowing you to perform multiple related operations with guaranteed consistency. +Group writes atomically — all succeed or all roll back. -## Overview - -Transactions in RushDB enable you to: -- Perform multiple operations as a single atomic unit -- Ensure data consistency across related records -- Roll back changes automatically if any operation fails -- Prevent partial updates that could leave your data in an inconsistent state - -## Working with Transactions - -The RushDB Python SDK offers two ways to work with transactions: - -### 1. Explicit Transaction Management +## Context manager (idiomatic) ```python -from rushdb import RushDB - -db = RushDB("RUSHDB_API_KEY", base_url="https://api.rushdb.com/api/v1") - -# Start a transaction -tx = db.tx.begin() - -try: - # Perform operations within the transaction - product = db.records.create( - label="PRODUCT", - data={"name": "Smartphone X", "price": 999.99}, - transaction=tx - ) - - inventory = db.records.create( - label="INVENTORY", - data={"productId": product.id, "stock": 100}, - transaction=tx - ) - - # Create a relationship between records - product.attach( - target=inventory, - options={"type": "HAS_INVENTORY"}, - transaction=tx - ) - - # Commit the transaction once all operations are successful - tx.commit() - print("Transaction committed successfully") -except Exception as e: - # Roll back the transaction if any operation fails - tx.rollback() - print(f"Transaction rolled back due to error: {e}") +# Auto-commit on success, auto-rollback on exception +with db.transactions.begin() as tx: + leo = db.records.create(label="ACTOR", data={"name": "Leonardo DiCaprio"}, transaction=tx) + inception = db.records.create(label="MOVIE", data={"title": "Inception"}, transaction=tx) + db.records.attach(source=leo, target=inception, options={"type": "ACTED_IN"}, transaction=tx) +# committed automatically — no explicit commit() call needed ``` -### 2. Context Manager (with statement) +## Manual commit / rollback ```python -# Using transaction as a context manager +tx = db.transactions.begin() try: - with db.tx.begin() as tx: - # Create an order record - order = db.records.create( - label="ORDER", - data={"orderId": "ORD-12345", "total": 129.99}, - transaction=tx - ) - - # Create order items - item1 = db.records.create( - label="ORDER_ITEM", - data={"productId": "PROD-001", "quantity": 2, "price": 49.99}, - transaction=tx - ) - - item2 = db.records.create( - label="ORDER_ITEM", - data={"productId": "PROD-002", "quantity": 1, "price": 30.01}, - transaction=tx - ) - - # Connect items to the order - order.attach( - target=[item1, item2], - options={"type": "CONTAINS_ITEM"}, - transaction=tx - ) - - # Transaction is automatically committed when the block exits normally -except Exception as e: - # Transaction is automatically rolled back if an exception occurs - print(f"Transaction failed: {e}") -``` - -## Transaction Operations - -The Transaction API provides the following operations: - -### begin() - -Starts a new transaction. - -```python -tx = db.tx.begin() -``` - -### commit() - -Commits all operations in the transaction. - -```python -tx.commit() -``` - -### rollback() - -Rolls back all operations in the transaction. - -```python -tx.rollback() -``` - -## Supported Methods with Transactions - -Most RushDB Python SDK methods support an optional `transaction` parameter. Here are some examples: - -### Records API - -```python -# Create a record within a transaction -record = db.records.create( - label="USER", - data={"name": "John Doe"}, - transaction=tx -) - -# Update a record within a transaction -db.records.update( - record_id=record.id, - data={"status": "active"}, - transaction=tx -) - -# Delete a record within a transaction -db.records.delete_by_id( - id_or_ids=record.id, - transaction=tx -) - -# Find records within a transaction -result = db.records.find( - query={"labels": ["USER"]}, - transaction=tx -) - -users = result.data + movie = db.records.create(label="MOVIE", data={"title": "Inception"}, transaction=tx) + actor = db.records.create(label="ACTOR", data={"name": "Leonardo DiCaprio"}, transaction=tx) + db.records.attach(source=movie, target=actor, options={"type": "STARS_IN"}, transaction=tx) + tx.commit() +except Exception: + tx.rollback() + raise ``` -### Relationships +## API -```python -# Create a relationship within a transaction -db.records.attach( - source=user.id, - target=group.id, - options={"type": "BELONGS_TO"}, - transaction=tx -) +| Method | Description | +|---|---| +| `db.transactions.begin(ttl?)` | Start a new transaction | +| `tx.commit()` | Persist all operations | +| `tx.rollback()` | Discard all operations | -# Remove a relationship within a transaction -db.records.detach( - source=user.id, - target=group.id, - options={"typeOrTypes": "BELONGS_TO"}, - transaction=tx -) -``` +## Timeouts -## Complex Transaction Example - -Here's a more complex example showing how to use transactions to ensure data consistency in an e-commerce scenario: +| Setting | Value | +|---|---| +| Default TTL | 5000 ms | +| Maximum TTL | 30000 ms | ```python -def process_order(db, customer_id, items): - # Start a transaction - tx = db.tx.begin() - - try: - # 1. Create the order record - order = db.records.create( - label="ORDER", - data={ - "orderDate": datetime.now().isoformat(), - "status": "processing", - "totalAmount": sum(item["price"] * item["quantity"] for item in items) - }, - transaction=tx - ) - - # 2. Retrieve the customer - result = db.records.find( - query={"where": {"id": customer_id}}, - transaction=tx - ) - - if not result: - raise Exception(f"Customer {customer_id} not found") - - customer = result[0] - - # 3. Connect order to customer - customer.attach( - target=order, - options={"type": "PLACED_ORDER"}, - transaction=tx - ) - - # 4. Process each order item - order_items = [] - for item in items: - # 4.1. Check inventory - result = db.records.find( - query={ - "labels": ["INVENTORY"], - "where": {"productId": item["productId"]} - }, - transaction=tx - ) - - if not result or result[0]["stock"] < item["quantity"]: - raise Exception(f"Insufficient stock for product {item['productId']}") - - inventory = result[0] - - # 4.2. Create order item - order_item = db.records.create( - label="ORDER_ITEM", - data={ - "productId": item["productId"], - "quantity": item["quantity"], - "price": item["price"], - "subtotal": item["price"] * item["quantity"] - }, - transaction=tx - ) - - order_items.append(order_item) - - # 4.3. Update inventory - db.records.update( - record_id=inventory[0].id, - data={"stock": inventory[0]["stock"] - item["quantity"]}, - transaction=tx - ) - - # 5. Connect order items to order - order.attach( - target=order_items, - options={"type": "CONTAINS"}, - transaction=tx - ) - - # 6. Update order status - order.update( - data={"status": "confirmed"}, - transaction=tx - ) - - # Commit the transaction - tx.commit() - return {"success": True, "orderId": order.id} - - except Exception as e: - # Roll back the transaction if any step fails - tx.rollback() - return {"success": False, "error": str(e)} +tx = db.transactions.begin(ttl=15000) # 15 s timeout ``` -## Transaction Limitations - -1. **Timeouts**: Transactions have a timeout period. Long-running transactions may be automatically aborted. - -2. **Isolation Level**: RushDB uses the underlying Neo4j transaction isolation level, which is READ_COMMITTED. - -3. **Nested Transactions**: Nested transactions are not supported. You should use a single transaction for a set of related operations. - -4. **Transaction Size**: Very large transactions with many operations may impact performance. Consider breaking extremely large operations into smaller batches. - -## Best Practices - -1. **Keep transactions short** - Transaction locks are held until the transaction is committed or rolled back. - -2. **Handle exceptions properly** - Always include exception handling to ensure transactions are properly rolled back. - -3. **Use appropriate scope** - Only include necessary operations in a transaction. - -4. **Consider using the context manager** - The context manager approach guarantees proper transaction handling. - -5. **Avoid long-running transactions** - Long-running transactions can impact system performance. - -6. **Don't mix transactional and non-transactional operations** - Keep all related operations within the transaction. - -7. **Test transaction rollback scenarios** - Ensure your application properly handles transaction failures. +## Supported operations -## Related Documentation +`create` · `create_many` · `update` · `set` · `delete` · `delete_by_id` · `attach` · `detach` · `find` -- [Transactions Concept](../concepts/transactions.mdx) - Learn more about how transactions work in RushDB -- [Record Operations](./records/create-records.md) - Record operations supporting transactions -- [Relationships](./relationships.md) - Working with relationships in transactions diff --git a/docs/docs/rest-api/ai/_category_.json b/docs/docs/rest-api/ai/_category_.json new file mode 100644 index 00000000..845c101e --- /dev/null +++ b/docs/docs/rest-api/ai/_category_.json @@ -0,0 +1,10 @@ +{ + "label": "AI & Vectors", + "position": 1, + "collapsed": false, + "collapsible": true, + "link": { + "type": "doc", + "id": "rest-api/ai/overview" + } +} diff --git a/docs/docs/rest-api/ai/advanced-indexing.md b/docs/docs/rest-api/ai/advanced-indexing.md new file mode 100644 index 00000000..4df0bef9 --- /dev/null +++ b/docs/docs/rest-api/ai/advanced-indexing.md @@ -0,0 +1,231 @@ +--- +sidebar_position: 2 +title: Advanced Indexing — BYOV +--- + +# Advanced Indexing — Bring Your Own Vectors + +**External indexes** (BYOV — Bring Your Own Vectors) let you supply pre-computed embedding vectors instead of having the server compute them. Use them when you need: + +- A custom or private model the server cannot access +- Multimodal embeddings (image, audio, document structure) +- Vectors already produced by your ML pipeline +- Reproducible embeddings not tied to the server's active model + +--- + +## Creating an external index + +Pass `"sourceType": "external"` in the create request. `dimensions` is **required** because the server never calls an embedding model and cannot infer the vector size: + +```bash +curl -X POST https://api.rushdb.com/api/v1/ai/indexes \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "label": "Article", + "propertyName": "body", + "sourceType": "external", + "dimensions": 768, + "similarityFunction": "cosine" + }' +``` + +### Example Response + +```json +{ + "data": { + "id": "idx_ext_abc123", + "label": "Article", + "propertyName": "body", + "sourceType": "external", + "similarityFunction": "cosine", + "dimensions": 768, + "status": "awaiting_vectors" + }, + "success": true +} +``` + +An external index starts with status `awaiting_vectors` and transitions to `ready` once at least one vector has been written. + +### External vs managed comparison + +| | Managed | External | +|---|---|---| +| `sourceType` | `"managed"` | `"external"` | +| Initial status | `"pending"` | `"awaiting_vectors"` | +| Who computes embeddings | RushDB server (configured model) | Your application | +| `dimensions` required | No (uses server default) | **Yes** | +| Backfill for existing records | Automatic | Manual via `upsertVectors` or inline writes | + +--- + +## Pushing vectors with `POST /api/v1/ai/indexes/:id/vectors/upsert` + +The bulk upload API — ideal for seeding an index from a dataset or syncing after a batch pipeline. + +```http +POST /api/v1/ai/indexes/:id/vectors/upsert +``` + +### Request Body + +| Field | Type | Required | Description | +|---------|-------|----------|---------------------------------------------------------| +| `items` | array | **yes** | Array of `{ "recordId": string, "vector": number[] }` objects | + +### Example Request + +```bash +curl -X POST https://api.rushdb.com/api/v1/ai/indexes/idx_ext_abc123/vectors/upsert \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "items": [ + { "recordId": "rec_001", "vector": [0.1, 0.2, 0.3] }, + { "recordId": "rec_002", "vector": [0.7, 0.8, 0.9] } + ] + }' +``` + +The request is **idempotent** — calling it again with the same `recordId` replaces the stored vector. + +--- + +## Writing vectors at record creation time + +Instead of a two-step create → upsertVectors flow, you can write vectors inline using the `vectors` field on any write endpoint. See [Write Records with Vectors](./write-with-vectors.md) for the full reference. + +```bash +# One step: create record AND write its vector +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "label": "Article", + "data": { "title": "Warp drives", "body": "Alcubierre metric..." }, + "vectors": [{ "propertyName": "body", "vector": [0.1, 0.2, 0.3] }] + }' +``` + +--- + +## Disambiguation {#disambiguation} + +When the same `(label, propertyName)` pair is covered by more than one external index (different `similarityFunction` or `dimensions`), specify `similarityFunction` to resolve which index to use: + +```bash +# Two indexes: Article:body/cosine and Article:body/euclidean + +# ✅ Explicit — writes to the cosine index only +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "label": "Article", + "data": { "title": "Widget", "body": "..." }, + "vectors": [{ "propertyName": "body", "vector": [0.1, 0.9, 0.4], "similarityFunction": "cosine" }] + }' + +# ❌ Missing similarityFunction when two indexes exist → 422 Unprocessable Entity +``` + +### Index signature uniqueness + +Two index policies are considered **identical** (and a second create returns `409 Conflict`) when all five fields match: + +| Field | Effect on uniqueness | +|----------------------|----------------------| +| `label` | ✅ | +| `propertyName` | ✅ | +| `sourceType` | ✅ | +| `similarityFunction` | ✅ | +| `dimensions` | ✅ | + +Changing any one field produces a distinct index and both are allowed to coexist. + +--- + +## Complete BYOV worked example + +```bash +# 1. Create the external index +INDEX_ID=$(curl -s -X POST https://api.rushdb.com/api/v1/ai/indexes \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{"label":"Doc","propertyName":"content","sourceType":"external","dimensions":3,"similarityFunction":"cosine"}' | \ + jq -r '.data.id') + +# 2. Create records with inline vectors +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{"label":"Doc","data":{"title":"Alpha","content":"First article"},"vectors":[{"propertyName":"content","vector":[1,0,0]}]}' + +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{"label":"Doc","data":{"title":"Beta","content":"Second article"},"vectors":[{"propertyName":"content","vector":[0,1,0]}]}' + +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{"label":"Doc","data":{"title":"Gamma","content":"Third article"},"vectors":[{"propertyName":"content","vector":[0,0,1]}]}' + +# 3. Search using a pre-computed query vector (closest to Alpha) +curl -X POST https://api.rushdb.com/api/v1/ai/search \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{"labels":["Doc"],"propertyName":"content","queryVector":[1,0,0],"limit":3}' +``` + +--- + +## Batch import with `$vectors` + +For bulk seeding, use `POST /api/v1/records/import/json` with a `$vectors` key on each object: + +```bash +curl -X POST https://api.rushdb.com/api/v1/records/import/json \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "Doc": [ + { "title": "Alpha", "content": "First article", "$vectors": [{ "propertyName": "content", "vector": [1, 0, 0] }] }, + { "title": "Beta", "content": "Second article", "$vectors": [{ "propertyName": "content", "vector": [0, 1, 0] }] }, + { "title": "Gamma", "content": "Third article", "$vectors": [{ "propertyName": "content", "vector": [0, 0, 1] }] } + ] + }' +``` + +`$vectors` entries are **stripped** from the stored record data — they do not appear as record properties or child records. + +--- + +## Mixing managed and external indexes + +You can have both a managed index and an external index on the same property simultaneously: + +```bash +# Managed — server embeds for full-text search +curl -X POST https://api.rushdb.com/api/v1/ai/indexes \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{"label": "Product", "propertyName": "description"}' + +# External — your custom multimodal model +curl -X POST https://api.rushdb.com/api/v1/ai/indexes \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "label": "Product", + "propertyName": "description", + "sourceType": "external", + "dimensions": 512, + "similarityFunction": "cosine" + }' +``` + +Specify `similarityFunction` in `POST /api/v1/ai/search` to route the query to the intended index. diff --git a/docs/docs/rest-api/ai/indexing.md b/docs/docs/rest-api/ai/indexing.md new file mode 100644 index 00000000..2d8c797b --- /dev/null +++ b/docs/docs/rest-api/ai/indexing.md @@ -0,0 +1,279 @@ +--- +sidebar_position: 1 +title: Embedding Indexes +--- + +# Embedding Indexes + +An **embedding index** is a policy that tells RushDB to vectorize a specific string property for a label. Once `status` is `ready`, every record matching that label+property pair is searchable via `POST /api/v1/ai/search`. + +--- + +## How indexes work + +Indexes are scoped to `(label, propertyName)`. `Book:description` and `Article:description` are completely independent — they maintain separate vector stores and never interfere. + +``` +Index policy + label: "Book" + propertyName: "description" + sourceType: "managed" + dimensions: 1536 + status: "ready" + +↓ backfill runs automatically + +Book records get vectors stored on their VALUE relationships: + rel._emb_managed_cosine_1536 = [0.1, 0.2, ...] +``` + +When new records are created or existing records are updated, the index transitions back to `pending` and vectors are recomputed on the next backfill cycle. + +--- + +## List Embedding Indexes + +```http +GET /api/v1/ai/indexes +``` + +Returns all embedding index policies for the project. + +### Example Response + +```json +{ + "data": [ + { + "id": "idx_abc123", + "projectId": "proj_xyz", + "label": "Article", + "propertyName": "description", + "sourceType": "managed", + "similarityFunction": "cosine", + "modelKey": "text-embedding-3-small", + "dimensions": 1536, + "vectorPropertyName": "_emb_managed_cosine_1536", + "enabled": true, + "status": "ready", + "createdAt": "2025-01-10T12:00:00.000Z", + "updatedAt": "2025-01-10T12:05:00.000Z" + } + ], + "success": true +} +``` + +--- + +## Create Embedding Index + +```http +POST /api/v1/ai/indexes +``` + +Creates a new managed embedding index policy scoped to a label. The property must exist in the graph and have type `string` (scalar or list). + +### Request Body + +| Field | Type | Required | Description | +|----------------------|--------|----------|-----------------------------------------------------------------------------------------------------------------------| +| `label` | string | **yes** | Label to scope this index to (e.g. `"Article"`, `"Product"`) | +| `propertyName` | string | **yes** | Name of the property to embed (e.g. `"description"`) | +| `sourceType` | string | no | `"managed"` (default) or `"external"`. See [Advanced Indexing](./advanced-indexing.md). | +| `similarityFunction` | string | no | `"cosine"` (default) or `"euclidean"` | +| `dimensions` | number | no | Vector dimensionality. Defaults to server `RUSHDB_EMBEDDING_DIMENSIONS`. **Required** for external indexes. | + +> **Model config is server-side.** The embedding model is set via `RUSHDB_EMBEDDING_MODEL` / `RUSHDB_EMBEDDING_DIMENSIONS` env vars. + +### Example — simplest form + +```bash +curl -X POST https://api.rushdb.com/api/v1/ai/indexes \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{"label": "Article", "propertyName": "description"}' +``` + +### Example — with explicit parameters + +```bash +curl -X POST https://api.rushdb.com/api/v1/ai/indexes \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "label": "Article", + "propertyName": "description", + "similarityFunction": "cosine", + "dimensions": 1536 + }' +``` + +### Example Response + +```json +{ + "data": { + "id": "idx_abc123", + "label": "Article", + "propertyName": "description", + "sourceType": "managed", + "similarityFunction": "cosine", + "dimensions": 1536, + "status": "pending" + }, + "success": true +} +``` + +### Index lifecycle + +| Status | Description | +|--------------------|-----------------------------------------------------------------| +| `pending` | Policy created, waiting for backfill scheduler | +| `indexing` | Backfill in progress | +| `awaiting_vectors` | External index — waiting for client to push vectors | +| `ready` | All existing records have vectors; search is available | +| `error` | Backfill failed; check server logs for the cause | + +### Error cases + +| Status | When | +|--------|-------------------------------------------------------------------------------------------------| +| `404` | The property does not exist in the project graph | +| `422` | The property exists but is not `string` type | +| `422` | Embedding model is not configured on the server | +| `409` | An index for this `(label, propertyName, sourceType, similarityFunction, dimensions)` tuple already exists | + +--- + +## Delete Embedding Index + +```http +DELETE /api/v1/ai/indexes/:id +``` + +Deletes an embedding index policy. The underlying Neo4j DDL vector index is only dropped when **zero embeddings remain** across the entire project — this avoids unnecessary rebuilds when multiple policies share the same `(dimensions, similarityFunction)`. + +### Example Request + +```bash +curl -X DELETE https://api.rushdb.com/api/v1/ai/indexes/idx_abc123 \ + -H "Authorization: Bearer $RUSHDB_API_KEY" +``` + +### Example Response + +```json +{ "data": { "deleted": true }, "success": true } +``` + +--- + +## Get Embedding Index Stats + +```http +GET /api/v1/ai/indexes/:id/stats +``` + +Returns the current indexing progress — useful for progress monitoring or health checks before running search. + +### Example Response + +```json +{ + "data": { + "totalRecords": 1840, + "indexedRecords": 1234 + }, + "success": true +} +``` + +--- + +## Waiting for an index to become ready + +For managed indexes, backfill runs asynchronously. Poll `GET /api/v1/ai/indexes` until `status` is `ready`: + +```bash +# Shell polling loop +while true; do + STATUS=$(curl -s https://api.rushdb.com/api/v1/ai/indexes \ + -H "Authorization: Bearer $RUSHDB_API_KEY" | \ + jq -r '.data[] | select(.id == "idx_abc123") | .status') + echo "Status: $STATUS" + if [ "$STATUS" = "ready" ]; then break; fi + if [ "$STATUS" = "error" ]; then echo "Index entered error state" && exit 1; fi + sleep 3 +done +``` + +Or in JavaScript: + +```javascript +async function waitForIndexReady(apiKey, indexId, timeoutMs = 90_000) { + const deadline = Date.now() + timeoutMs + while (Date.now() < deadline) { + const res = await fetch('https://api.rushdb.com/api/v1/ai/indexes', { + headers: { Authorization: `Bearer ${apiKey}` } + }) + const { data: indexes } = await res.json() + const idx = indexes.find(i => i.id === indexId) + if (idx?.status === 'ready') return + if (idx?.status === 'error') throw new Error('Index entered error state') + await new Promise(r => setTimeout(r, 3_000)) + } + throw new Error('Index did not become ready in time') +} +``` + +--- + +## Multiple indexes on the same property + +You can have more than one index per `(label, propertyName)` pair, provided the signature differs (`sourceType`, `similarityFunction`, or `dimensions`): + +```bash +# Cosine index +curl -X POST https://api.rushdb.com/api/v1/ai/indexes \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{"label": "Product", "propertyName": "description", "similarityFunction": "cosine", "dimensions": 768}' + +# Euclidean index on the same property +curl -X POST https://api.rushdb.com/api/v1/ai/indexes \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{"label": "Product", "propertyName": "description", "similarityFunction": "euclidean", "dimensions": 768}' +``` + +When searching or writing vectors against a property with multiple indexes, specify `similarityFunction` to disambiguate. See [Advanced Indexing](./advanced-indexing.md#disambiguation) for details. + +--- + +## Index response shape + +```json +{ + "id": "string", + "projectId": "string", + "label": "string", + "propertyName": "string", + "modelKey": "string", + "sourceType": "managed | external", + "similarityFunction": "cosine | euclidean", + "dimensions": 1536, + "vectorPropertyName": "string", + "enabled": true, + "status": "string", + "createdAt": "string", + "updatedAt": "string" +} +``` + +--- + +## `List` properties + +String array properties are supported. Each item in the array is embedded individually, then mean-pooled into a single vector stored on the relationship. diff --git a/docs/docs/rest-api/ai/overview.md b/docs/docs/rest-api/ai/overview.md new file mode 100644 index 00000000..28195991 --- /dev/null +++ b/docs/docs/rest-api/ai/overview.md @@ -0,0 +1,188 @@ +--- +sidebar_position: 0 +title: Overview +--- + +# AI & Semantic Search + +RushDB is a **self-aware memory layer for agents, humans, and apps**. It continuously understands its own structure — labels, fields, value distributions, relationships — and exposes that knowledge so agents can reason over real data without hallucinating schema details, and apps can retrieve semantically relevant context on demand. + +The AI API covers three capabilities: + +| Capability | Description | +|---|---| +| **Graph Ontology** | Self-describing schema discovery: label names, field types, value ranges, and the relationship map — always up to date | +| **Embedding Indexes** | Per-label vector policies that turn string properties into long-term semantic memory | +| **Semantic Search** | Cosine/euclidean similarity retrieval over indexed properties, for agents and apps alike | + +--- + +## How it fits together + +``` +┌─────────────────────────────────────────────────────┐ +│ Your data (records + relationships) │ +│ │ +│ BOOK { title: "...", description: "..." } │ +└────────────────────┬────────────────────────────────┘ + │ + POST /api/v1/ai/indexes + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ Embedding index policy │ +│ label: BOOK property: description dims: 1536 │ +│ sourceType: managed | external │ +└────────────────────┬────────────────────────────────┘ + │ + Backfill (managed) / inline vectors (external) + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ Vector stored on VALUE relationship │ +│ rel._emb_managed_cosine_1536 = [0.1, 0.2, ...] │ +└────────────────────┬────────────────────────────────┘ + │ + POST /api/v1/ai/search + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ Records ranked by similarity score │ +│ result.__score = 0.94 (cosine similarity) │ +└─────────────────────────────────────────────────────┘ +``` + +--- + +## Quick links + +| Topic | Description | +|---|---| +| [Ontology](#graph-ontology) | Schema discovery with `POST /api/v1/ai/ontology/md` and `POST /api/v1/ai/ontology` | +| [Indexing](./indexing.md) | Create and manage managed embedding indexes | +| [Advanced Indexing — BYOV](./advanced-indexing.md) | Bring Your Own Vectors: external indexes, inline writes | +| [Semantic Search](./search.md) | Query by meaning with `POST /api/v1/ai/search` | +| [Writing with Vectors](./write-with-vectors.md) | Attach vectors at create / upsert / importJson time | + +--- + +## Graph Ontology + +The ontology endpoints expose a live snapshot of your database structure — without any manual schema definitions. + +### Get Ontology (Markdown) + +```http +POST /api/v1/ai/ontology/md +``` + +Returns the full schema as compact Markdown — the **recommended format for LLM context injection**: token-efficient, human-readable, and ready to paste into a system prompt or tool result. + +#### Request Body + +| Field | Type | Required | Description | +|----------|------------------|----------|-----------------------------------------------------------------------------| +| `labels` | array of strings | no | Restrict output to specific labels. Omit (or pass `[]`) for the full schema. | + +#### Example Request + +```bash +curl -X POST https://api.rushdb.com/api/v1/ai/ontology/md \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{}' +``` + +#### Example Response + +```text +# Graph Ontology + +## Labels + +| Label | Count | +|-----------|------:| +| `Order` | 1840 | +| `User` | 312 | +| `Product` | 95 | + +--- + +## `Order` (1840 records) + +### Properties + +| Property | Type | Values / Range | +|-------------|----------|------------------------------------------| +| `status` | string | `pending`, `paid`, `shipped` (+2 more) | +| `total` | number | `4.99`..`2499.00` | +| `createdAt` | datetime | `2024-01-03`..`2026-02-27` | + +### Relationships + +| Type | Direction | Other Label | +|-------------|-----------|-------------| +| `PLACED_BY` | out | `User` | +| `CONTAINS` | out | `Product` | +``` + +#### Filtered request (single label) + +```bash +curl -X POST https://api.rushdb.com/api/v1/ai/ontology/md \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{"labels": ["Order"]}' +``` + +Returns only the `Order` section. The underlying cache still covers the full schema — filtering is applied in-memory. + +--- + +### Get Ontology (JSON) + +```http +POST /api/v1/ai/ontology +``` + +Returns the same ontology as a structured JSON array. Each element describes one label. + +#### Request Body + +| Field | Type | Required | Description | +|----------|------------------|----------|--------------------------------------------------------| +| `labels` | array of strings | no | Restrict to specific labels. Omit for the full schema. | + +#### Response Schema + +```json +[ + { + "label": "Order", + "count": 1840, + "properties": [ + { "id": "prop_abc123", "name": "status", "type": "string", "values": ["pending", "paid", "shipped"] }, + { "id": "prop_def456", "name": "total", "type": "number", "min": 4.99, "max": 2499.00 } + ], + "relationships": [ + { "label": "User", "type": "PLACED_BY", "direction": "out" }, + { "label": "Product", "type": "CONTAINS", "direction": "out" } + ] + } +] +``` + +- `properties[].id` — pass to `GET /api/v1/properties/:id/values` to enumerate all distinct values +- `properties[].values` — up to 10 samples (string/boolean only) +- `properties[].min` / `.max` — range info (number/datetime only) +- `relationships[].direction` — `out` = this label is source; `in` = this label is target + +--- + +:::note Caching +Both endpoints share a **1-hour cache** on the ProjectNode. First call after TTL expiry triggers a full graph scan; all subsequent calls within the hour are instant. +::: + +:::tip Agent quickstart +Call `POST /api/v1/ai/ontology/md` first in every AI session. Without it, models will hallucinate label and field names. +::: diff --git a/docs/docs/rest-api/ai/search.md b/docs/docs/rest-api/ai/search.md new file mode 100644 index 00000000..a044cbe3 --- /dev/null +++ b/docs/docs/rest-api/ai/search.md @@ -0,0 +1,205 @@ +--- +sidebar_position: 3 +title: Semantic Search +--- + +# Semantic Search + +```http +POST /api/v1/ai/search +``` + +Embeds the supplied query text (or uses a pre-computed vector) and returns the most relevant records by similarity score. The property referenced by `propertyName` must have a `ready` embedding index. + +RushDB performs exact semantic search: candidates are narrowed with label and `where` filters first, then ranked by cosine or euclidean similarity. + +--- + +## Request Body + +| Field | Type | Required | Description | +|----------------------|----------------------------|--------------|-----------------------------------------------------------------------------------------------------| +| `propertyName` | string | **yes** | The indexed property to search against (e.g. `"description"`) | +| `labels` | string or array of strings | **yes** | Label(s) to search within (min 1) | +| `query` | string | conditionally | Free-text query to embed. Required for managed indexes; **not allowed** for external indexes. | +| `queryVector` | array of numbers | conditionally | Pre-computed query vector. Required for external indexes. Also accepted for managed indexes (bypasses server embedding). | +| `similarityFunction` | string | no | `"cosine"` or `"euclidean"`. Required when multiple indexes target the same `(label, propertyName)`. | +| `dimensions` | number | no | Disambiguates when multiple indexes match. Inferred from `queryVector.length` when `queryVector` is supplied. | +| `where` | object | no | Standard RushDB filter expression applied **before** similarity scoring. | +| `skip` | number | no | Pagination offset (default `0`) | +| `limit` | number | no | Maximum results to return (default `20`) | + +--- + +## Result shape + +Results are flat records with `__score` injected alongside your fields, ordered by `__score` descending (closest match first): + +```json +{ + "data": [ + { + "__id": "rec_abc123", + "__label": "Product", + "__score": 0.921, + "description": "Same-day shipping with hassle-free returns policy", + "status": "active" + }, + { + "__id": "rec_def456", + "__label": "Product", + "__score": 0.887, + "description": "Free returns within 30 days, express shipping available", + "status": "featured" + } + ], + "success": true +} +``` + +--- + +## Managed search (query text) + +For a **managed** index, pass `query` — a natural-language string. The server embeds it using the same model that was used when building the index, then ranks candidates by similarity. + +```bash +curl -X POST https://api.rushdb.com/api/v1/ai/search \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "propertyName": "description", + "query": "fast delivery and easy returns", + "labels": ["Product"], + "limit": 5 + }' +``` + +--- + +## External search (query vector) + +For an **external** index, pass `queryVector` — a pre-computed embedding produced by your own model. No text is sent to an embedding model. + +```bash +curl -X POST https://api.rushdb.com/api/v1/ai/search \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "propertyName": "body", + "queryVector": [0.1, 0.2, 0.3], + "labels": ["Article"], + "limit": 10 + }' +``` + +- `query` is **not allowed** with external indexes — the server has no model to embed it. +- `queryVector` is **not required** for managed indexes but is accepted (bypasses server embedding). +- When `queryVector` is supplied, `dimensions` can be omitted — the server infers it from `queryVector.length`. + +--- + +## Filtering with `where` + +The `where` clause acts as a **prefilter** — only records satisfying the filter are candidates for similarity ranking. All filter operators supported by `POST /api/v1/records/search` are available here. + +```bash +curl -X POST https://api.rushdb.com/api/v1/ai/search \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "propertyName": "description", + "query": "wireless headphones", + "labels": ["Product"], + "where": { + "category": { "$eq": "electronics" }, + "inStock": { "$eq": true }, + "price": { "$lt": 100 } + }, + "limit": 20 + }' +``` + +--- + +## Multi-label search + +Pass an array of labels to search across multiple entity types simultaneously: + +```bash +curl -X POST https://api.rushdb.com/api/v1/ai/search \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "propertyName": "body", + "query": "machine learning trends", + "labels": ["Article", "Post", "Comment"], + "limit": 10 + }' +``` + +Each result carries `__label` so you can tell the entity types apart: + +```json +{ + "data": [ + { "__id": "rec_1", "__label": "Article", "__score": 0.93, "title": "...", "body": "..." }, + { "__id": "rec_2", "__label": "Post", "__score": 0.87, "body": "..." }, + { "__id": "rec_3", "__label": "Comment", "__score": 0.82, "body": "..." } + ], + "success": true +} +``` + +All listed labels must have an embedding index on the same `propertyName`, or the request returns `404` for the missing labels. + +--- + +## Disambiguation + +When two indexes exist for the same `(label, propertyName)`, specify `similarityFunction` to select the target index: + +```bash +# Two indexes: Product:embedding/cosine and Product:embedding/euclidean +curl -X POST https://api.rushdb.com/api/v1/ai/search \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "labels": ["Product"], + "propertyName": "embedding", + "queryVector": [0.1, 0.9, 0.4], + "similarityFunction": "cosine" + }' +``` + +Omitting `similarityFunction` when multiple indexes match returns `422 Unprocessable Entity`. + +--- + +## Pagination + +```bash +# Page 1 +curl -X POST https://api.rushdb.com/api/v1/ai/search \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{"propertyName":"description","query":"...", "labels":["Product"],"limit":20,"skip":0}' + +# Page 2 +curl -X POST https://api.rushdb.com/api/v1/ai/search \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{"propertyName":"description","query":"...", "labels":["Product"],"limit":20,"skip":20}' +``` + +--- + +## Error reference + +| HTTP | Cause | +|------|-------| +| `404 Not Found` | No enabled embedding index found for `(label, propertyName)` | +| `422 Unprocessable Entity` | Multiple indexes match and `similarityFunction` was not specified | +| `422 Unprocessable Entity` | `query` text supplied for an external index (server cannot embed it) | +| `422 Unprocessable Entity` | `queryVector` length does not match index `dimensions` | +| `503 Service Unavailable` | Embedding model unavailable (managed indexes only) | diff --git a/docs/docs/rest-api/ai/write-with-vectors.md b/docs/docs/rest-api/ai/write-with-vectors.md new file mode 100644 index 00000000..76a62b37 --- /dev/null +++ b/docs/docs/rest-api/ai/write-with-vectors.md @@ -0,0 +1,232 @@ +--- +sidebar_position: 4 +title: Writing Records with Vectors +--- + +# Writing Records with Vectors + +RushDB lets you attach pre-computed embedding vectors to records **at write time**, eliminating the need for a separate `POST /api/v1/ai/indexes/:id/vectors/upsert` call. Any endpoint that creates or modifies records accepts a `vectors` field (or the `$vectors` key in batch JSON imports). + +This feature requires at least one [external index](./advanced-indexing.md) to exist for the target `(label, propertyName)`. + +--- + +## `vectors` field format + +All write endpoints accept a `vectors` array: + +```json +"vectors": [ + { + "propertyName": "description", + "vector": [0.1, 0.9, 0.4], + "similarityFunction": "cosine" + } +] +``` + +| Field | Type | Required | Description | +|----------------------|------------------|----------|-----------------------------------------------------------| +| `propertyName` | string | **yes** | Property name this vector is associated with | +| `vector` | array of numbers | **yes** | Pre-computed embedding vector | +| `similarityFunction` | string | no | Required when multiple indexes exist on the same property | + +--- + +## `POST /api/v1/records` — create with vectors + +The record is created **and** the vector is written atomically: + +```bash +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "label": "Article", + "data": { + "title": "How transformers work", + "body": "Attention is all you need ..." + }, + "vectors": [ + { "propertyName": "body", "vector": [0.1, 0.2, 0.3] } + ] + }' +``` + +--- + +## `POST /api/v1/records` — upsert with vectors + +Upsert is idempotent on the record's natural key (set via `mergeBy`). Include `vectors` to write or replace the stored vector in the same call: + +```bash +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "label": "Article", + "data": { "slug": "transformers-101", "title": "Transformers 101", "body": "..." }, + "options": { "mergeBy": ["slug"], "mergeStrategy": "append" }, + "vectors": [{ "propertyName": "body", "vector": [0.1, 0.2, 0.3] }] + }' +``` + +--- + +## `PUT /api/v1/records/:id` — set with vectors + +`PUT` replaces all properties of a record with new values. Including `vectors` writes those vectors at the same time: + +```bash +curl -X PUT https://api.rushdb.com/api/v1/records/rec_abc123 \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "label": "Product", + "data": { "name": "Widget Pro", "price": 19.99 }, + "vectors": [{ "propertyName": "description", "vector": [0.5, 0.6, 0.7] }] + }' +``` + +--- + +## `POST /api/v1/records/import/json` with `$vectors` + +For bulk ingestion via `importJson`, add a `$vectors` key alongside properties in each JSON object: + +```bash +curl -X POST https://api.rushdb.com/api/v1/records/import/json \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "Article": [ + { + "title": "Alpha", + "body": "First article about AI", + "$vectors": [{ "propertyName": "body", "vector": [1, 0, 0] }] + }, + { + "title": "Beta", + "body": "Second article about ML", + "$vectors": [{ "propertyName": "body", "vector": [0, 1, 0] }] + }, + { + "title": "Gamma", + "body": "Third article about DL", + "$vectors": [{ "propertyName": "body", "vector": [0, 0, 1] }] + } + ] + }' +``` + +`$vectors` entries are **stripped** before the record is persisted. They: +- **Do not** appear as record properties +- **Do not** create child records +- **Do not** appear in query results + +--- + +## `POST /api/v1/records/import/json` (flat rows) with `vectors` + +When using the flat-rows format (equivalent to `createMany`), provide a top-level `vectors` array indexed by row position: + +```bash +curl -X POST https://api.rushdb.com/api/v1/records/import/json \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "label": "Product", + "data": [ + { "name": "Alpha", "description": "First product" }, + { "name": "Beta", "description": "Second product" }, + { "name": "Gamma", "description": "Third product" } + ], + "vectors": [ + [{ "propertyName": "description", "vector": [1, 0, 0] }], + [{ "propertyName": "description", "vector": [0, 1, 0] }], + [{ "propertyName": "description", "vector": [0, 0, 1] }] + ] + }' +``` + +### Sparse vectors + +Leave rows without vectors by providing a shorter `vectors` array — any rows beyond `vectors.length` are skipped: + +```bash +# Only row 0 gets a vector; rows 1 and 2 are skipped +curl -X POST https://api.rushdb.com/api/v1/records/import/json \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "label": "Product", + "data": [{ "name": "Alpha" }, { "name": "Beta" }, { "name": "Gamma" }], + "vectors": [[{ "propertyName": "description", "vector": [1, 0, 0] }]] + }' +``` + +--- + +## `POST /api/v1/records/import/csv` with `vectors` + +CSV data is a raw string, so per-row vectors are supplied as a separate `vectors` array using the same indexed format. Row indices are 0-based and refer to data rows after the header is consumed: + +```bash +curl -X POST https://api.rushdb.com/api/v1/records/import/csv \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "label": "Product", + "data": "name,description\nAlpha,First product\nBeta,Second product\nGamma,Third product", + "vectors": [ + [{ "propertyName": "description", "vector": [1, 0, 0] }], + [{ "propertyName": "description", "vector": [0, 1, 0] }], + [{ "propertyName": "description", "vector": [0, 0, 1] }] + ] + }' +``` + +The server returns `400 Bad Request` if `vectors.length` exceeds the number of CSV data rows (validated after CSV parsing). + +--- + +## Specifying `similarityFunction` for disambiguation + +When a `(label, propertyName)` has multiple external indexes, include `similarityFunction` in each vector entry to route to the correct index: + +```bash +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "label": "Product", + "data": { "name": "Widget" }, + "vectors": [ + { "propertyName": "embedding", "vector": [0.1, 0.9], "similarityFunction": "cosine" } + ] + }' +``` + +Omitting `similarityFunction` when multiple indexes match returns `422 Unprocessable Entity`. + +--- + +## Multiple vectors in one call + +Write vectors for multiple properties in a single request: + +```bash +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "label": "Document", + "data": { "title": "Multi-modal doc", "abstract": "...", "fullText": "..." }, + "vectors": [ + { "propertyName": "abstract", "vector": [0.1, 0.2, 0.3] }, + { "propertyName": "fullText", "vector": [0.7, 0.8, 0.9] } + ] + }' +``` + +Each entry is matched independently against the available external indexes. diff --git a/docs/docs/rest-api/introduction.md b/docs/docs/rest-api/introduction.md index 09c9f7fd..3d5e1995 100644 --- a/docs/docs/rest-api/introduction.md +++ b/docs/docs/rest-api/introduction.md @@ -3,81 +3,30 @@ title: Introduction sidebar_position: 0 --- -# RushDB REST API +# REST API -Welcome to the RushDB REST API documentation! The RushDB REST API provides a modern, flexible interface for managing your data, relationships, and metadata in RushDB. Whether you are building applications, automating workflows, or integrating with other systems, the API gives you full control over your graph data with simple HTTP requests. +Base URL: `https://api.rushdb.com/api/v1` +Auth: `Authorization: Bearer YOUR_TOKEN` -## What is RushDB? - -RushDB is an instant, cloud-native database built on top of Neo4j, designed for modern applications and data science/ML operations. It automates data normalization, manages relationships, and features automatic type inference, so you can focus on building features instead of managing data infrastructure. - -## Key Features - -- **Flexible Data Model**: Store structured, semi-structured, and nested data as records and relationships. -- **Relationship Management**: Easily create, query, and manage relationships between records. -- **Batch Operations**: Import and export data in bulk using JSON or CSV. -- **ACID Transactions**: Perform multiple operations atomically for data consistency. -- **Powerful Search**: Query records with advanced filters, ordering, and pagination. -- **Property & Label APIs**: Manage metadata, property types, and record labels. -- **Secure & Scalable**: Built for both cloud and self-hosted deployments, with robust authentication and access control. - -## How to Use the API - -- **Base URL**: The API is available at `https://api.rushdb.com/api/v1` for cloud users, or your custom URL for self-hosted deployments. -- **Authentication**: All endpoints require authentication via a token header. Get your API token from the [RushDB dashboard](https://app.rushdb.com). -- **Content-Type**: All requests and responses use JSON unless otherwise specified. - -## API Specifications - -The RushDB API is documented using OpenAPI (Swagger) specification for easy integration and exploration: - -- **Swagger UI**: [Interactive API Documentation](https://api.rushdb.com/api) -- **OpenAPI JSON**: [JSON Schema Specification](https://api.rushdb.com/api-json) -- **OpenAPI YAML**: [YAML Specification](https://api.rushdb.com/api-yaml) - -You can use these specifications to: -- Generate client libraries in your preferred programming language -- Import the API into tools like Postman, Insomnia, or SwaggerHub -- Understand request/response formats with machine-readable schemas - -## Common Use Cases - -- Create, update, and delete records -- Manage relationships between records -- Import/export data in bulk -- Search and filter records with complex queries -- Manage property types and labels -- Use transactions for atomic multi-step operations - -## Getting Started - -1. **Get an API Key**: Sign up at [app.rushdb.com](https://app.rushdb.com) or set up a self-hosted instance. -2. **Read the Endpoint Docs**: Explore the sidebar for detailed documentation on each API endpoint, including request/response formats and examples. -3. **Try It Out**: Use cURL, Postman, or your favorite HTTP client to interact with the API. - -## Example: Create a Record - -```http -POST /api/v1/records -Content-Type: application/json -token: RUSHDB_API_KEY - -{ - "label": "Person", - "data": { - "name": "John Doe", - "age": 30, - "email": "john.doe@email.com" - } -} +```bash +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"label":"MOVIE","data":{"title":"Inception","rating":8.8}}' ``` -## Support & Resources +Interactive docs: [Swagger UI](https://api.rushdb.com/api) · [OpenAPI JSON](https://api.rushdb.com/api-json) -- [RushDB Documentation](https://docs.rushdb.com) -- [RushDB Homepage](https://rushdb.com) -- [Community & Support](https://rushdb.com/contact) +## Endpoints + +| Group | Description | +|---|---| +| [Records](./records/create-records) | Create, read, update, delete, search, import, export | +| [Relationships](./relationships) | Attach and detach edges between records | +| [Labels](./labels) | Query which types exist and their counts | +| [Properties](./properties) | Inspect field names, types, and value ranges | +| [Transactions](./transactions) | Atomic multi-step operations | +| [AI & Semantic Search](./ai) | Schema export + vector similarity search | +| [Raw Queries](./raw-queries) | Cypher pass-through (cloud only) | ---- -Browse the sidebar to learn more about each API endpoint, best practices, and advanced features! diff --git a/docs/docs/rest-api/labels.md b/docs/docs/rest-api/labels.md index 90176d8a..05e007c1 100644 --- a/docs/docs/rest-api/labels.md +++ b/docs/docs/rest-api/labels.md @@ -1,105 +1,27 @@ --- -sidebar_position: 4 +sidebar_position: 5 --- -# Labels API +# Labels -RushDB provides a Labels API that allows you to retrieve information about the [labels](../concepts/labels.md) used in your records. Labels are a powerful way to categorize and organize [records](../concepts/records.md) in your database. +## `POST /api/v1/labels/search` -## Overview +Returns all labels and their record counts. Pass a `where` clause to filter by record properties. -The Labels API allows you to: -- Retrieve all labels used in your project -- Get the count of records with each label -- Filter labels based on record properties +```bash +# All labels +curl -X POST https://api.rushdb.com/api/v1/labels/search \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{}' -All labels endpoints require authentication using a token header. - -## List Labels - -```http -POST /api/v1/labels/search +# Labels that have records matching a condition +curl -X POST https://api.rushdb.com/api/v1/labels/search \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"where": {"rating": {"$gte": 8}}}' ``` -Returns a find of all [labels](../concepts/labels.md) in the current project along with the count of records having each label. You can filter the results using the `where` clause. - -### Request Body - -| Field | Type | Description | -|---------|--------|----------------------------------------------------------------------------------------------------------------| -| `where` | Object | Optional [filter criteria](../concepts/search/introduction.md) to narrow down which labeled records to include | - -### Example Request - -```json -{ - "where": { - "country": "USA" - } -} -``` - -This will return labels for all records where the `country` property equals "USA". - -### Response - -```json -{ - "success": true, - "data": { - "Person": 35, - "Company": 12, - "Customer": 24 - } -} -``` - -The response is a map where each key is a label name and each value is the count of records with that label. - -## Filtering Labels - -You can use [complex queries](../concepts/search/introduction.md) to filter which labeled records to include: - -### Example with Multiple Conditions - -```json -{ - "where": { - "age": { "$gt": 30 }, - "active": true - } -} -``` - -This will return labels for records where `age` is greater than 30 AND `active` is true. - -### Example with OR Logic - -```json -{ - "where": { - "$or": [ - { "country": "USA" }, - { "country": "Canada" } - ] - } -} -``` - -This will return labels for records where `country` is either "USA" OR "Canada". - -## Working with Labels - -### Best Practices - -1. **Consistent naming conventions**: Use a consistent pattern for [label](../concepts/labels.md) names (e.g., singular nouns, PascalCase) -2. **Meaningful labels**: Choose labels that describe what the record represents, not just its attributes -3. **Hierarchical labeling**: Consider using more specific labels for specialized record types (e.g., "Employee" and "Manager" instead of just "Person") -4. **Multiple labels**: Remember that records can have multiple labels in RushDB, allowing for flexible classification +Response: map of `{ label: count }`. -### Common Use Cases -- **Data organization**: Group related records for easier querying and visualization -- **Access control**: Set permissions based on record labels -- **Conditional processing**: Apply different business logic depending on record types -- **Schema validation**: Enforce data structure based on record labels diff --git a/docs/docs/rest-api/properties.md b/docs/docs/rest-api/properties.md index 057c5a83..9cef4d66 100644 --- a/docs/docs/rest-api/properties.md +++ b/docs/docs/rest-api/properties.md @@ -1,267 +1,52 @@ --- -sidebar_position: 3 +sidebar_position: 4 --- -# Properties API +# Properties -RushDB provides a powerful Properties API that enables you to manage the properties associated with your records. This API allows you to find, retrieve, create, update, and delete properties, as well as manage property values. +## `POST /api/v1/properties/search` -## Overview - -The Properties API allows you to: -- List all properties in your project -- Get details about a specific property -- Get distinct values for a property -- Delete properties - -All properties endpoints require authentication using a token header. - -## Property Types - -RushDB supports the following property types: - -| Type | Description | -|------|-------------| -| `string` | Text values | -| `number` | Numeric values | -| `boolean` | True/false values | -| `null` | Null values | -| `datetime` | ISO8601 format datetime values | -| `vector` | Arrays of numbers (for embeddings/vector search) | - -## List Properties - -```http -POST /api/v1/properties/search -``` - -Returns a find of all properties in the current project, with filtering options. - -### Request Body - -| Field | Type | Description | -|-----------|--------|-------------| -| `where` | Object | Optional filter criteria ([learn more](../../concepts/search/where)) | -| `labels` | Array | Optional array of labels to filter records by ([learn more](../../concepts/search/labels)) | - -### Example Request - -```json -{ - "where": { - "type": "string" - } -} -``` - -### Response - -```json -{ - "success": true, - "data": [ - { - "id": "018dfc84-d6cb-7000-89cd-850db63a1e78", - "name": "name", - "type": "string", - "projectId": "018dfc84-d6cb-7000-89cd-850db63a1e76", - "metadata": "" - }, - { - "id": "018dfc84-d6cb-7000-89cd-850db63a1e79", - "name": "email", - "type": "string", - "projectId": "018dfc84-d6cb-7000-89cd-850db63a1e76", - "metadata": "" - } - ] -} -``` - -## Get Property - -```http -GET /api/v1/properties/:propertyId -``` - -Retrieve detailed information about a specific property by its ID. - -### Parameters - -| Parameter | Type | Description | -|-------------|--------|-------------| -| `propertyId` | String | The ID of the property to retrieve | - -### Response - -```json -{ - "success": true, - "data": { - "id": "018dfc84-d6cb-7000-89cd-850db63a1e78", - "name": "name", - "type": "string", - "projectId": "018dfc84-d6cb-7000-89cd-850db63a1e76", - "metadata": "" - } -} -``` - -## Get Property Values - -```http -POST /api/v1/properties/:propertyId/values -``` - -Retrieves distinct values for a specific property across all records using SearchQuery filtering. - -### Parameters - -| Parameter | Type | Description | -|-------------|--------|-------------| -| `propertyId` | String | The ID of the property | - -### Request Body - -The request body supports SearchQuery parameters along with value-specific filtering: - -| Field | Type | Description | -|-----------|--------|-------------| -| `where` | Object | Optional. SearchQuery filter criteria ([learn more](../../concepts/search/where)) | -| `labels` | Array | Optional array of labels to filter records by ([learn more](../../concepts/search/labels)) | -| `skip` | Number | Optional. Number of values to skip (default: 0) | -| `limit` | Number | Optional. Maximum number of values to return (default: 100) | -| `query` | String | Optional. Filter values by this text string | -| `orderBy` | String | Optional. Sort direction (`asc` or `desc`) | - -### Example Request - -```http -POST /api/v1/properties/018dfc84-d6cb-7000-89cd-850db63a1e78/values -Content-Type: application/json - -{ - "where": { - "status": "active" - }, - "query": "jo", - "orderBy": "asc", - "skip": 0, - "limit": 10 -} -``` - -### Response - -```json -{ - "success": true, - "data": { - "values": ["John", "Johnny", "Jon"], - "min": null, - "max": null, - "type": "string" - } -} -``` - -For numeric properties, the response includes minimum and maximum values: - -```json -{ - "success": true, - "data": { - "values": [18, 19, 20, 21], - "min": 18, - "max": 21, - "type": "number" - } -} -``` - -## Delete Property - -```http -DELETE /api/v1/properties/:propertyId +```bash +curl -X POST https://api.rushdb.com/api/v1/properties/search \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"where": {"type": "string"}}' ``` -Deletes a property from all records. - -### Parameters - -| Parameter | Type | Description | -|-------------|--------|-------------| -| `propertyId` | String | The ID of the property to delete | +## `GET /api/v1/properties/:propertyId` -### Response - -```json -{ - "success": true, - "data": { - "message": "Property (018dfc84-d6cb-7000-89cd-850db63a1e78) has been successfully deleted." - } -} +```bash +curl https://api.rushdb.com/api/v1/properties/prop-123 \ + -H "Authorization: Bearer $RUSHDB_API_KEY" ``` -## Value Handling - -### Single Values +## `POST /api/v1/properties/:propertyId/values` -Single values are stored directly: +Returns distinct values for a property — useful for filter UIs. -```json -{ - "name": "John Doe", - "age": 30, - "active": true -} +```bash +curl -X POST https://api.rushdb.com/api/v1/properties/prop-123/values \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"query": "sci", "orderBy": "asc", "limit": 100}' ``` -### Multiple Values +| Field | Type | Description | +|---|---|---| +| `query` | `string` | Filter values containing this text | +| `orderBy` | `"asc" \| "desc"` | Sort direction | +| `skip` | `number` | Pagination offset | +| `limit` | `number` | Max values to return | -Arrays can store multiple values of the same type: - -```json -{ - "tags": ["important", "urgent", "follow-up"], - "scores": [85, 90, 95] -} -``` +## `DELETE /api/v1/properties/:propertyId` -### Value Separators +:::warning +Deletes the property and removes it from **all records** in the database. +::: -When updating properties, you can use value separators to split a string into multiple values: - -```json -{ - "name": "tags", - "type": "string", - "value": "important,urgent,follow-up", - "valueSeparator": "," -} +```bash +curl -X DELETE https://api.rushdb.com/api/v1/properties/prop-123 \ + -H "Authorization: Bearer $RUSHDB_API_KEY" ``` -This will result in an array of values: `["important", "urgent", "follow-up"]`. - -## Property Metadata - -Properties can have optional metadata, which can be used to store additional information about the property. This is useful for storing things like property descriptions, validation rules, or display preferences. - -```json -{ - "name": "email", - "type": "string", - "metadata": "{\"description\":\"User's email address\",\"required\":true,\"unique\":true}" -} -``` - -Metadata is stored as a JSON string and can contain any valid JSON data. - -## Best Practices -1. **Use consistent naming**: Follow a consistent naming convention for property names -2. **Set appropriate types**: Use the correct type for each property to facilitate operations like sorting and filtering -3. **Use metadata**: Leverage the metadata field to add useful information about your properties -4. **Batch updates**: When updating property values across many records, use the batch update endpoint -5. **Consider relationships**: For complex data models, consider using relationships between records instead of deeply nested property structures diff --git a/docs/docs/rest-api/raw-queries.md b/docs/docs/rest-api/raw-queries.md index c1c40906..829dab70 100644 --- a/docs/docs/rest-api/raw-queries.md +++ b/docs/docs/rest-api/raw-queries.md @@ -1,33 +1,39 @@ --- -sidebar_position: 7 +sidebar_position: 8 --- # Raw Queries -> **Important (cloud-only):** This endpoint is available only on the RushDB managed cloud service or when your project is connected to a custom database through RushDB Cloud. It is not available for self-hosted or local-only deployments — attempting to use it against a non-cloud instance will fail. +:::warning Requires a connected Neo4j instance +This endpoint is only available when your project is connected to your own Neo4j database. Connecting a custom Neo4j instance is available on the free tier — see the RushDB dashboard to set it up. +::: ### REST API -Endpoint: POST /query/raw - -Body: +```http +POST /api/v1/query/raw +``` -```json -{ - "query": "MATCH (n:Person) RETURN n LIMIT $limit", - "params": { "limit": 10 } -} +```bash +curl -X POST https://api.rushdb.com/api/v1/query/raw \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "query": "MATCH (n:Person) RETURN n LIMIT $limit", + "params": { "limit": 10 } + }' ``` Response: raw Neo4j driver result object. ### Real-world example: employees at a company -Request body: - -```json -{ - "query": "MATCH (c:Company { name: $company })<-[:EMPLOYS]-(p:Person) RETURN p { .name, .email, company: c.name } AS employee ORDER BY p.name LIMIT $limit", - "params": { "company": "Acme Corp", "limit": 50 } -} +```bash +curl -X POST https://api.rushdb.com/api/v1/query/raw \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "query": "MATCH (c:Company { name: $company })<-[:EMPLOYS]-(p:Person) RETURN p { .name, .email, company: c.name } AS employee ORDER BY p.name LIMIT $limit", + "params": { "company": "Acme Corp", "limit": 50 } + }' ``` diff --git a/docs/docs/rest-api/records/_category_.json b/docs/docs/rest-api/records/_category_.json index eeb03a2a..530e80f3 100644 --- a/docs/docs/rest-api/records/_category_.json +++ b/docs/docs/rest-api/records/_category_.json @@ -1,5 +1,5 @@ { - "label": "Records API", - "position": 1, + "label": "Records", + "position": 2, "collapsible": false } diff --git a/docs/docs/rest-api/records/create-records.md b/docs/docs/rest-api/records/create-records.md index 20c63f71..c909bce9 100644 --- a/docs/docs/rest-api/records/create-records.md +++ b/docs/docs/rest-api/records/create-records.md @@ -4,432 +4,101 @@ sidebar_position: 1 # Create Records -RushDB provides multiple ways to create records via its REST API. You can create single [records](../../concepts/records.md), control how your data is processed, and work with transactions for data consistency. - -## Overview - -The create records endpoints allow you to: -- Create a single record with properties and a label -- Upsert records (create or update based on matching criteria) -- Control data type inference and other formatting options -- Create records within transactions for data consistency - -All create record endpoints require authentication using a token header. - -## Create / Upsert a Record - -```http -POST /api/v1/records -``` - -This endpoint creates a record with the provided label and data. If `options.mergeBy` and/or `options.mergeStrategy` are supplied, it performs an upsert (create-or-update) instead of a plain create. - -### Request Body - -| Field | Type | Description | -|-------------|--------|-------------| -| `label` | String | Label for the new record | -| `data` | Object | Object containing property name/value pairs | -| `options` | Object | Optional configuration parameters (including upsert) | - -#### Options Object (Create & Upsert) - -| Option | Type | Default | Description | -|--------|------|---------|-------------| -| `suggestTypes` | Boolean | `true` | **Default is `true`** - Automatically infers data types for properties. To disable type inference and store all values as strings, explicitly set to `false` | -| `castNumberArraysToVectors` | Boolean | `false` | When true, converts numeric arrays to vector type | -| `convertNumericValuesToNumbers` | Boolean | `false` | When true, converts string numbers to number type | -| `mergeBy` | Array of Strings | `[]` / omitted | Upsert match keys. If omitted and `mergeStrategy` present, all incoming keys are used. Empty array means use all keys. | -| `mergeStrategy` | String | `'append'` | Upsert behavior when match found: `'append'` (add/update, keep others) or `'rewrite'` (replace all existing properties). Providing either this or `mergeBy` triggers upsert flow. | - -:::info Default Behavior -By default, `suggestTypes` is set to `true` for all write operations (create, upsert, import). This means RushDB automatically infers data types from your values. To store all properties as strings without type inference, you must explicitly set `suggestTypes: false` in the options. -::: - -### Example Create Request (no upsert) - -```json -{ - "label": "Person", - "data": { - "name": "John Doe", - "age": "30", - "isActive": true, - "skills": ["JavaScript", "Python", "SQL"], - "joinDate": "2025-04-23T10:30:00Z", - "score": 92.5 - }, - "options": { - "suggestTypes": true, - "convertNumericValuesToNumbers": true - } -} -``` - -### Response (Create) - -```json -{ - "__id": "018e4c71-f35a-7000-89cd-850db63a1e77", - "__label": "Person", - "__proptypes": { - "name": "string", - "age": "number", - "isActive": "boolean", - "skills": "string", - "joinDate": "datetime", - "score": "number" - }, - "name": "John Doe", - "age": 30, - "isActive": true, - "skills": ["JavaScript", "Python", "SQL"], - "joinDate": "2025-04-23T10:30:00Z", - "score": 92.5 -} -``` - -## Property-Based Approach - -If you need precise control over property types and values, you can use the property-based approach: - -```http -POST /api/v1/records +## `POST /api/v1/records` + +```bash +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "label": "MOVIE", + "data": {"title": "Inception", "rating": 8.8, "genre": "sci-fi"}, + "options": {"suggestTypes": true} + }' +``` + +### Request body + +| Field | Type | Description | +|---|---|---| +| `label` | string | Label for the new record | +| `data` | object | Property key-value pairs | +| `options` | object | See options table below | + +### Options + +| Option | Default | Description | +|---|---|---| +| `suggestTypes` | `true` | Infer property types automatically | +| `convertNumericValuesToNumbers` | `false` | Convert string numbers to number type | +| `capitalizeLabels` | `false` | Uppercase all inferred label names | +| `relationshipType` | `__RUSHDB__RELATION__DEFAULT__` | Relationship type for nested links | +| `returnResult` | `false` | Return the created record in the response | +| `mergeBy` | — | Fields to match on for upsert | +| `mergeStrategy` | `append` | `append` or `rewrite` | + +## Upsert (create or update) + +Supply `mergeBy` and/or `mergeStrategy` in `options` to trigger upsert behavior. + +```bash +# Match on 'title'; append/update rating if found +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "label": "MOVIE", + "data": {"title": "Inception", "rating": 9.0}, + "options": {"mergeBy": ["title"], "mergeStrategy": "append"} + }' +``` + +| `mergeBy` value | Match behaviour | +|---|---| +| `["field"]` | Match only on listed fields | +| `[]` or omitted | Match on ALL incoming property keys | + +| Strategy | Behaviour | +|---|---| +| `append` (default) | Add/update incoming fields; preserve all other existing fields | +| `rewrite` | Replace all fields; unmentioned fields are removed | + +## Precise type control (properties array) + +```bash +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "label": "MOVIE", + "properties": [ + {"name": "title", "type": "string", "value": "Inception"}, + {"name": "rating", "type": "number", "value": 8.8}, + {"name": "genres", "type": "string", "value": "sci-fi,thriller", "valueSeparator": ","}, + {"name": "releasedAt", "type": "datetime", "value": "2010-07-16T00:00:00Z"} + ] + }' +``` + +## With a transaction + +```bash +# 1. Begin a transaction +TX_ID=$(curl -s -X POST https://api.rushdb.com/api/v1/tx \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"ttl": 10000}' | jq -r '.data.id') + +# 2. Create records using the transaction header +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -H "X-Transaction-Id: $TX_ID" \ + -d '{"label": "MOVIE", "data": {"title": "Inception"}}' + +# 3. Commit +curl -X POST https://api.rushdb.com/api/v1/tx/$TX_ID/commit \ + -H "Authorization: Bearer $RUSHDB_API_KEY" ``` -### Request Body - -| Field | Type | Description | -|-------------|--------|-------------| -| `label` | String | Label for the new record | -| `properties` | Array | Array of property objects defining record data with explicit types | - -#### Property Object - -| Field | Type | Description | -|-----------|--------|-------------| -| `name` | String | The property name | -| `type` | String | The data type for the property ('string', 'number', 'boolean', 'datetime', etc.) | -| `value` | Any | The value of the property | -| `valueSeparator` | String | Optional separator to split string values into arrays | - -### Example Request - -```json -{ - "label": "Person", - "properties": [ - { - "name": "name", - "type": "string", - "value": "John Doe" - }, - { - "name": "age", - "type": "number", - "value": 30 - }, - { - "name": "isActive", - "type": "boolean", - "value": true - }, - { - "name": "skills", - "type": "string", - "value": "JavaScript,Python,SQL", - "valueSeparator": "," - }, - { - "name": "joinDate", - "type": "datetime", - "value": "2025-04-23T10:30:00Z" - }, - { - "name": "scores", - "type": "number", - "value": "85,90,95", - "valueSeparator": "," - } - ] -} -``` - -### Response - -```json -{ - "__id": "018e4c71-f35a-7000-89cd-850db63a1e77", - "__label": "Person", - "__proptypes": { - "name": "string", - "age": "number", - "isActive": "boolean", - "skills": "string", - "joinDate": "datetime", - "scores": "number" - }, - "name": "John Doe", - "age": 30, - "isActive": true, - "skills": ["JavaScript", "Python", "SQL"], - "joinDate": "2025-04-23T10:30:00Z", - "scores": [85, 90, 95] -} -``` - -## Upserting Via POST /api/v1/records - -Previously upsert required a dedicated endpoint (`/records/upsert`). Upsert is now unified into `POST /api/v1/records`. The legacy endpoint will continue to function for backward compatibility but new integrations should prefer the unified create/upsert endpoint. - -### Upsert Request Body - -| Field | Type | Description | -|-------------|--------|-------------| -| `label` | String | Optional label for the record | -| `data` | Object | Object containing property name/value pairs | -| `options` | Object | Configuration parameters including merge behavior (see Options table) | - -#### Upsert-Specific Options Highlights - -| Option | Type | Default | Description | -|--------|------|---------|-------------| -| `mergeBy` | Array of Strings | `[]` / omitted | Property names to match on. Empty or omitted with mergeStrategy provided falls back to all incoming keys. | -| `mergeStrategy` | String | `'append'` | `'append'` adds/updates provided properties; `'rewrite'` replaces all properties (unmentioned ones removed). | -| `suggestTypes` | Boolean | `true` | **Default is `true`** - Automatically infers data types for properties. To disable type inference and store all values as strings, explicitly set to `false` | -| `castNumberArraysToVectors` | Boolean | `false` | When true, converts numeric arrays to vector type | -| `convertNumericValuesToNumbers` | Boolean | `false` | When true, converts string numbers to number type | - -:::info Default Behavior -By default, `suggestTypes` is set to `true` for all write operations. This means RushDB automatically infers data types from your values during upsert operations. To store all properties as strings without type inference, you must explicitly set `suggestTypes: false` in the options. -::: - -### Merge Strategies - -#### Append Strategy -When using `mergeStrategy: 'append'`, the upsert operation: -- Adds new properties from the incoming data -- Updates existing properties with new values -- Preserves existing properties not included in the incoming data - -#### Rewrite Strategy -When using `mergeStrategy: 'rewrite'`, the upsert operation: -- Replaces all existing properties with the incoming data -- Removes properties not included in the incoming data -- Essentially performs a complete replacement of the record's properties - -### Example Upsert Requests - -#### Create or Update with Append Strategy (primary key: sku) - -```json -{ - "label": "Product", - "data": { - "sku": "SKU-001", - "name": "Laptop Pro", - "price": 1299.99, - "category": "Electronics" - }, - "options": { - "mergeBy": ["sku"], - "mergeStrategy": "append", - "suggestTypes": true - } -} -``` - -If a product with `sku: "SKU-001"` exists, this will update its properties while keeping any other existing properties. If it doesn't exist, a new product record will be created. - -#### Subsequent Update Preserving Fields (append) - -```json -{ - "label": "Product", - "data": { - "sku": "SKU-001", - "price": 1199.99, - "stock": 50 - }, - "options": { - "mergeBy": ["sku"], - "mergeStrategy": "append", - "suggestTypes": true - } -} -``` - -This updates the price and adds a stock field, while preserving the existing `name` and `category` properties. - -#### Update with Rewrite Strategy (full replacement) - -```json -{ - "label": "Product", - "data": { - "sku": "SKU-001", - "name": "Laptop Pro v2", - "price": 1399.99 - }, - "options": { - "mergeBy": ["sku"], - "mergeStrategy": "rewrite", - "suggestTypes": true - } -} -``` - -This replaces all properties of the product, removing `category` and `stock` fields from the previous example. - -#### Upsert with Multiple Match Fields - -```json -{ - "label": "User", - "data": { - "email": "user@example.com", - "tenantId": "tenant-123", - "name": "John Doe", - "role": "admin" - }, - "options": { - "mergeBy": ["email", "tenantId"], - "mergeStrategy": "append", - "suggestTypes": true - } -} -``` - -This matches on both `email` and `tenantId`, useful for multi-tenant applications. - -#### Upsert Without Explicit MergeBy (all keys become match fingerprint) - -```json -{ - "label": "Setting", - "data": { - "key": "theme", - "value": "dark", - "userId": "user-123" - }, - "options": { - "mergeStrategy": "append", - "suggestTypes": true - } -} -``` - -When `mergeBy` is empty or omitted, the match is performed on all properties in the incoming data. A record will only be updated if all property values match exactly. - -### Response (Upsert) - -```json -{ - "__id": "018e4c71-f35a-7000-89cd-850db63a1e77", - "__label": "Product", - "__proptypes": { - "sku": "string", - "name": "string", - "price": "number", - "category": "string" - }, - "sku": "SKU-001", - "name": "Laptop Pro", - "price": 1299.99, - "category": "Electronics" -} -``` - -### Use Cases - -The upsert operation is particularly useful for: - -- **Idempotent data imports**: Safely re-run imports without creating duplicates -- **User profile updates**: Update user information while preserving unmodified fields -- **Inventory management**: Update product stock levels while maintaining product details -- **Configuration management**: Update settings by key while preserving other settings -- **Multi-tenant applications**: Match records by tenant-specific identifiers -- **Data synchronization**: Keep external data sources in sync with your graph database - -### Best Practices - -- **Choose the right merge strategy**: Use `append` when you want to preserve existing data, `rewrite` when you need a clean slate -- **Use specific mergeBy fields**: Define clear unique identifiers for better performance and predictability (email, sku, externalId, tenantId+userId compound, etc.) -- **Consider multi-field matching**: For multi-tenant or complex scenarios, use multiple fields in `mergeBy` -- **Handle edge cases**: When `mergeBy` is empty, ensure your data structure supports matching on all fields -- **Use with transactions**: Combine upsert with [transactions](../../concepts/transactions.mdx) for atomic multi-record operations - -## Working with Multiple Records and Complex Data - -For batch operations and working with multiple records or complex data structures, please refer to the [Import Data documentation](./import-data.md). The Import Data API provides dedicated endpoints for: - -- Batch creation of multiple records in a single request -- Importing JSON or CSV data -- Creating nested record hierarchies -- Handling arrays of objects as linked records -- Setting relationship types between records -- Processing complex object graphs with automatic type inference - -The Import Data API is optimized for performance when working with large datasets or complex structures. It offers additional configuration options and better throughput for batch operations. - -## Creating Records in Transactions - -To ensure data consistency when creating multiple related [records](../../concepts/records.md), you can use [transactions](../../concepts/transactions.mdx): - -1. Create a transaction: -```http -POST /api/v1/tx -``` - -2. Use the returned transaction ID in your create record requests: -```http -POST /api/v1/records -Token: $RUSHDB_API_KEY -X-Transaction-Id: $YOUR_TRANSACTION_ID -``` - -3. Commit the transaction when all operations are successful: -```http -POST /api/v1/tx/YOUR_TRANSACTION_ID/commit -``` - -Or roll back if there's an error: -```http -POST /api/v1/tx/YOUR_TRANSACTION_ID/rollback -``` - -## Data Type Handling - -RushDB supports the following [property](../../concepts/properties.md) types: - -- `string`: Text values -- `number`: Numeric values -- `boolean`: True/false values -- `null`: Null values -- `datetime`: ISO8601 format strings (e.g., "2025-04-23T10:30:00Z") -- `vector`: Arrays of numbers (when `castNumberArraysToVectors` is true) - -### Automatic Type Inference - -**By default, `suggestTypes` is set to `true` for all write operations** (create, upsert, import). This means RushDB automatically infers data types from your values: -- Numeric values become `number` type -- `true`/`false` become `boolean` type -- ISO8601 strings become `datetime` type -- `null` becomes `null` type -- All other values become `string` type - -To disable automatic type inference and store all values as strings, you must **explicitly set `suggestTypes: false`** in your request options. - -### Additional Type Conversions - -When `convertNumericValuesToNumbers` is enabled, string values that represent numbers (e.g., '30') will be converted to their numeric equivalents (e.g., 30). - -When `castNumberArraysToVectors` is enabled, numeric arrays will be stored as `vector` type instead of `number` arrays. - -## Best Practices -- Use the default approach for typical use cases and when automatic type inference is desired -- Use the property-based approach when precise control over [property](../../concepts/properties.md) types is required -- Use the [Import Data API](./import-data.md) for batch operations and creating multiple records -- Use [transactions](../../concepts/transactions.mdx) when creating related records to ensure data consistency -- Validate data on the client side before sending it to the API diff --git a/docs/docs/rest-api/records/delete-records.md b/docs/docs/rest-api/records/delete-records.md index 0a7b3e1a..1b9eeaa5 100644 --- a/docs/docs/rest-api/records/delete-records.md +++ b/docs/docs/rest-api/records/delete-records.md @@ -1,126 +1,28 @@ --- -sidebar_position: 3 +sidebar_position: 7 --- # Delete Records -RushDB provides efficient APIs for deleting records from your database. This capability allows you to remove individual records by ID or delete multiple records at once using search query filters. +## `DELETE /api/v1/records/:entityId` -## Overview - -The delete endpoints allow you to: -- Delete a single record by ID -- Delete multiple records using [SearchQuery capabilities](../../concepts/search/introduction) -- Perform conditional bulk deletions -- Safely remove records with proper authentication - -All delete operations require authentication using a bearer token and handle relationships appropriately. - -## Delete a Single Record - -```http -DELETE /api/v1/records/{entityId} -``` - -This endpoint deletes a specific record identified by its unique ID. - -### Path Parameters - -| Parameter | Type | Description | -|------------|--------|-------------| -| `entityId` | String | The unique identifier of the record to delete | - -### Response - -```json -{ - "success": true, - "data": { - "message": "Record deleted successfully" - } -} -``` - -## Delete Multiple Records - -```http -POST /api/v1/records/delete -``` - -This endpoint deletes multiple records that match the specified search criteria. - -### Request Body - -You can use search parameters to filter the data you want to delete: - -| Field | Type | Description | -|-----------|--------|-------------| -| `where` | Object | Filter conditions for records ([learn more](../../concepts/search/where)) | -| `labels` | Array | Optional array of labels to filter records by ([learn more](../../concepts/search/labels)) | - -### Example Request - -```json -{ - "where": { - "age": { "$lt": 18 }, - "status": "inactive" - }, - "labels": ["USER"] -} -``` - -### Response - -```json -{ - "success": true, - "data": { - "message": "25 record(s) deleted successfully" - } -} +```bash +curl -X DELETE https://api.rushdb.com/api/v1/records/movie-123 \ + -H "Authorization: Bearer $RUSHDB_API_KEY" ``` -## Bulk Deletion with Complex Queries +## `POST /api/v1/records/delete` -For more advanced deletion scenarios, you can use the full power of RushDB's search query system: +Delete all records matching a query. -```json -{ - "where": { - "$or": [ - { "status": "archived", "lastModified": { "$lt": "2024-01-01" } }, - { "status": "deleted", "isTemporary": true } - ] - }, - "labels": ["DOCUMENT", "ATTACHMENT"] -} +```bash +curl -X POST https://api.rushdb.com/api/v1/records/delete \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"labels": ["MOVIE"], "where": {"rating": {"$lt": 5}}}' ``` -## Handling Relationships - -When deleting records, all relationships associated with those records are automatically deleted. This ensures database integrity and prevents orphaned relationships. - -## Delete Operation Safety - -RushDB implements several safeguards for delete operations: - -1. **Authentication**: All delete operations require a valid authentication token -2. **Authorization**: Users can only delete records in projects they have access to -3. **Validation**: Input data is validated before processing -4. **Transactions**: Delete operations are performed within transactions for data consistency -5. **Partial Failure Handling**: If a deletion affects multiple records and some operations fail, all changes are rolled back - -## Performance Considerations - -- For large-scale deletions, RushDB processes operations in batches -- Complex query conditions may increase processing time -- Consider using [label filtering](../../concepts/search/labels) to narrow down records before deletion -- For very large datasets, consider multiple smaller delete operations - -## Related Documentation +:::warning +Omitting `where` deletes **all** records with the given label. +::: -- [Search Introduction](../../concepts/search/introduction) -- [Where Clause](../../concepts/search/where) -- [Labels](../../concepts/search/labels) -- [Record Relationships](../../concepts/relationships) diff --git a/docs/docs/rest-api/records/export-data.md b/docs/docs/rest-api/records/export-data.md index e91c1e50..8fdb96ff 100644 --- a/docs/docs/rest-api/records/export-data.md +++ b/docs/docs/rest-api/records/export-data.md @@ -1,51 +1,36 @@ --- -sidebar_position: 2 +sidebar_position: 8 --- # Export Data -RushDB provides efficient APIs for exporting your database records in different formats. This capability allows you to retrieve and analyze your data externally or integrate it with other systems. - -## Overview - -The export endpoints allow you to: -- Export data in CSV format -- Filter and query the data to be exported using [SearchQuery capabilities](../../concepts/search/introduction) -- Order results as needed -- Handle large exports efficiently through pagination - -All export endpoints require authentication using a bearer token. - -## Export CSV Data +Export records as CSV. Accepts the same `where` / `orderBy` / `skip` / `limit` / `labels` parameters as a search query. ```http POST /api/v1/records/export/csv ``` -This endpoint exports data in CSV format with headers in the first row. - ### Request Body -You can send search parameters to filter the data you want to export: - -| Field | Type | Description | -|-----------|--------|-------------| -| `where` | Object | Filter conditions for records ([learn more](../../concepts/search/where)) | -| `orderBy` | String or Object | Sorting criteria ([learn more](../../concepts/search/pagination-order)) | -| `skip` | Number | Number of records to skip for pagination ([learn more](../../concepts/search/pagination-order)) | -| `limit` | Number | Maximum number of records to return (up to 1000) | -| `labels` | Array | Optional array of labels to filter records by ([learn more](../../concepts/search/labels)) | +| Field | Type | Description | +|-----------|------------------|--------------------------------------------------| +| `where` | object | Filter conditions | +| `orderBy` | string or object | Sort criteria | +| `skip` | number | Pagination offset | +| `limit` | number | Max records to return (up to 1000) | +| `labels` | array of strings | Restrict to specific labels | ### Example Request -```json -{ - "where": { - "age": { "$gt": 25 } - }, - "orderBy": { "name": "asc" }, - "limit": 1000 -} +```bash +curl -X POST https://api.rushdb.com/api/v1/records/export/csv \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "where": {"age": {"$gt": 25}}, + "orderBy": {"name": "asc"}, + "limit": 1000 + }' ``` ### Response @@ -54,43 +39,10 @@ You can send search parameters to filter the data you want to export: { "success": true, "data": { - "fileContent": "id,label,name,age,email\n018dfc84-d6cb-7000-89cd-850db63a1e77,PERSON,John Doe,30,john@example.com\n018dfc84-d78c-7000-89cd-85db63d6a120,PERSON,Jane Smith,28,jane@example.com", + "fileContent": "id,label,name,age,email\n018dfc84...,PERSON,John Doe,30,john@example.com", "dateTime": "2025-04-23T10:15:32.123Z" } } ``` -The `fileContent` field contains the CSV data string that can be saved directly to a file. - -## Data Processing - -When exporting data, RushDB: - -1. **Filters**: Applies any specified filters to select records using the [where clause](../../concepts/search/where) -2. **Sorts**: Orders records based on the `orderBy` parameter as described in [pagination and order](../../concepts/search/pagination-order) -3. **Paginates**: Processes data in efficient batches using [pagination capabilities](../../concepts/search/pagination-order) -4. **Transforms**: Converts internal data structures to CSV format -5. **Cleans**: Removes internal system properties before returning data - -## Performance Considerations - -- Exports process data in batches of 1000 records for optimal performance -- For large datasets, consider using pagination parameters (`skip` and `limit`) as described in the [pagination documentation](../../concepts/search/pagination-order) -- Complex queries may increase processing time -- RushDB automatically handles large exports by chunking the data retrieval -- Consider using [label filtering](../../concepts/search/labels) to narrow down the data scope before exporting - -## Working with Exported Data - -The exported CSV can be: -- Imported into spreadsheet software -- Processed by data analysis tools -- Used for backups and data archiving -- Imported into other databases - -## Related Documentation - -- [Search Introduction](../../concepts/search/introduction) -- [Where Clause](../../concepts/search/where) -- [Labels](../../concepts/search/labels) -- [Pagination and Order](../../concepts/search/pagination-order) +`fileContent` is a CSV string with headers on the first row. System properties are stripped automatically. diff --git a/docs/docs/rest-api/records/get-records.md b/docs/docs/rest-api/records/get-records.md index fb81b034..1249d782 100644 --- a/docs/docs/rest-api/records/get-records.md +++ b/docs/docs/rest-api/records/get-records.md @@ -1,271 +1,104 @@ --- -sidebar_position: 7 +sidebar_position: 5 --- # Get Records -RushDB provides flexible APIs for retrieving records from your database. This capability allows you to access individual records by ID or retrieve multiple records using powerful search queries. +## `GET /api/v1/records/:entityId` -## Overview - -The record retrieval endpoints allow you to: -- Get a single record by its ID -- Search for multiple records using [SearchQuery capabilities](../../concepts/search/introduction) -- Filter, sort, and paginate results -- Retrieve records with related data - -All record retrieval operations require authentication using a bearer token. - -## Get a Single Record - -```http -GET /api/v1/records/{entityId} -``` - -This endpoint retrieves a specific record identified by its unique ID. - -### Path Parameters - -| Parameter | Type | Description | -|------------|--------|-------------| -| `entityId` | String | The unique identifier of the record to retrieve | - -### Response - -```json -{ - "success": true, - "data": { - "id": "018e4c71-5f20-7db2-b0b1-e7e681542af9", - "label": "PERSON", - "name": "John Doe", - "age": 30, - "email": "john@example.com" - } -} -``` - -## Search for Records - -```http -POST /api/v1/records/search +```bash +curl https://api.rushdb.com/api/v1/records/movie-123 \ + -H "Authorization: Bearer $RUSHDB_API_KEY" ``` -This endpoint searches for records that match the specified criteria, with support for filtering, pagination, and sorting. - -### Request Body - -You can use search parameters to filter the data you want to retrieve: - -| Field | Type | Description | -|-----------|------------------|----------------------------------------------------------------------------------------------| -| `where` | `Object` | Filter conditions for records ([learn more](../../concepts/search/where)) | -| `orderBy` | `String` or `Object` | Sorting criteria ([learn more](../../concepts/search/pagination-order)) | -| `skip` | `Number` | Number of records to skip for pagination ([learn more](../../concepts/search/pagination-order)) | -| `limit` | `Number` | Maximum number of records to return (default: 1000) | -| `labels` | `Array` | Optional array of labels to filter records by ([learn more](../../concepts/search/labels)) | -| `aggregate` | `Object` | Optional aggregation map ([learn more](../../concepts/search/aggregations)) | -| `groupBy` | `Array` | Optional grouping keys (e.g. `["$record.status"]`) applied with aggregations | - -### Example Request - -```json -{ - "where": { - "age": { "$gt": 25 } - }, - "orderBy": { "name": "asc" }, - "skip": 0, - "limit": 50, - "labels": ["PERSON"] -} +## `POST /api/v1/records/search` + +```bash +curl -X POST https://api.rushdb.com/api/v1/records/search \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "labels": ["MOVIE"], + "where": {"rating": {"$gte": 8}}, + "orderBy": {"rating": "desc"}, + "limit": 10 + }' ``` -### Response - -```json -{ - "success": true, - "data": { - "data": [ - { - "id": "018e4c71-5f20-7db2-b0b1-e7e681542af9", - "label": "PERSON", - "name": "John Doe", - "age": 30, - "email": "john@example.com" - }, - { - "id": "018e4c71-6a38-7db2-b0b1-e7e681542c13", - "label": "PERSON", - "name": "Jane Smith", - "age": 28, - "email": "jane@example.com" +### Request body + +| Field | Type | Description | +|---|---|---| +| `labels` | `string[]` | Filter by one or more labels | +| `where` | `object` | Field conditions and operators | +| `orderBy` | `object` | `{"field": "asc" \| "desc"}` | +| `limit` | `number` | Max records. **Omit when using `aggregate`** | +| `skip` | `number` | Records to skip (pagination) | +| `aggregate` | `object` | Aggregation functions | +| `groupBy` | `string[]` | Group aggregated results | + +### Relationship traversal + +```bash +curl -X POST https://api.rushdb.com/api/v1/records/search \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "labels": ["MOVIE"], + "where": { + "ACTOR": { + "$relation": {"type": "STARS_IN", "direction": "in"}, + "country": "USA" } - // ... more records - ], - "total": 125 - } -} + } + }' ``` -## Search Related Records - -```http -POST /api/v1/records/{entityId}/search +### Aggregations + +```bash +curl -X POST https://api.rushdb.com/api/v1/records/search \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "labels": ["MOVIE"], + "aggregate": { + "count": {"fn": "count", "alias": "$record"}, + "avgRating": {"fn": "avg", "alias": "$record", "field": "rating"} + } + }' ``` -This endpoint searches for records that are related to a specific record, identified by its ID. - -### Path Parameters - -| Parameter | Type | Description | -|------------|--------|-------------| -| `entityId` | String | The unique identifier of the record to search from | - -### Request Body - -The request body is the same as for the regular search endpoint, allowing you to filter, paginate, and sort the related records. - -### Example Request - -```json -{ - "where": { - "status": "active" - }, - "orderBy": { "createdAt": "desc" }, - "limit": 20 -} -``` - -### Response - -```json -{ - "success": true, - "data": { - "data": [ - { - "id": "018e4c71-7b42-7db2-b0b1-e7e681543d21", - "label": "DOCUMENT", - "title": "Project Plan", - "status": "active", - "createdAt": "2025-04-12T10:30:15Z" - }, - // ... more records - ], - "total": 8 - } -} -``` - -## Advanced Filtering - -RushDB supports complex filtering through the `where` clause, allowing you to create sophisticated queries: - -```json -{ - "where": { - "$or": [ - { "status": "active", "priority": { "$gte": 2 } }, - { "status": "pending", "deadline": { "$lt": "2025-06-01" } } - ], - "assignedTo": { "$ne": null } - }, - "orderBy": [ - { "priority": "desc" }, - { "deadline": "asc" } - ], - "limit": 100 -} +:::danger +**Never set `limit` with `aggregate`** — it restricts the record scan and produces wrong totals. +::: + +### GroupBy + +```bash +curl -X POST https://api.rushdb.com/api/v1/records/search \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "labels": ["MOVIE"], + "aggregate": { + "count": {"fn": "count", "alias": "$record"}, + "avgRating": {"fn": "avg", "alias": "$record", "field": "rating"} + }, + "groupBy": ["$record.genre"], + "orderBy": {"count": "desc"} + }' ``` -### Field Existence and Type Checking +## `POST /api/v1/records/:entityId/search` -You can check for field existence and data types: +Contextual search within a specific record's relationships: -```json -{ - "where": { - "$and": [ - { "email": { "$exists": true } }, - { "phoneNumber": { "$exists": false } }, - { "age": { "$type": "number" } } - ] - } -} +```bash +curl -X POST https://api.rushdb.com/api/v1/records/movie-123/search \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"labels": ["ACTOR"], "where": {"country": "USA"}}' ``` -This query finds records that have an email address, don't have a phone number, and where age is stored as a number. - -See the [Where Clause documentation](../../concepts/search/where) for a complete reference of available operators. - -## Grouping & Aggregations - -Use `aggregate` together with `groupBy` to transform raw record search into aggregated row sets. - -Comprehensive details: [Grouping guide](../../concepts/search/group-by) - -Example: Count deals per stage. -```json -{ - "labels": ["HS_DEAL"], - "aggregate": { - "count": { "fn": "count", "alias": "$record" }, - "avgAmount": { "fn": "avg", "field": "amount", "alias": "$record" } - }, - "groupBy": ["$record.dealstage"], - "orderBy": { "count": "desc" }, - "limit": 1000 -} -``` - -Group by a related record property (declare alias in traversal): -```json -{ - "labels": ["DEPARTMENT"], - "where": { - "PROJECT": { "$alias": "$project" } - }, - "aggregate": { - "projectCount": { "fn": "count", "alias": "$project" }, - "projects": { "fn": "collect", "field": "name", "alias": "$project", "unique": true } - }, - "groupBy": ["$record.name"], - "orderBy": { "projectCount": "desc" } -} -``` - -Multiple grouping keys (pivot style): -```json -{ - "labels": ["PROJECT"], - "aggregate": { "count": { "fn": "count", "alias": "$record" } }, - "groupBy": ["$record.category", "$record.active"], - "orderBy": { "count": "desc" } -} -``` - -Rules: -- At least one aggregation is required for `groupBy` to have an effect. -- Each `groupBy` element uses syntax `.`; root alias is `$record`. -- Output contains one object per distinct combination of group keys plus aggregation outputs. -- Aggregated `collect` arrays are unique by default; set `"unique": false` to allow duplicates. -- To emulate hierarchical drill-down, group only at the parent layer and use nested `collect` for children. - -## Performance Considerations - -- Use appropriate `limit` values to control response size and query performance -- When working with large datasets, use pagination (`skip` and `limit`) as described in [pagination documentation](../../concepts/search/pagination-order) -- Complex query conditions may increase processing time -- Use [label filtering](../../concepts/search/labels) to narrow down the search scope before applying other filters -- For frequently accessed records, consider optimizing query patterns - -## Related Documentation -- [Search Introduction](../../concepts/search/introduction) -- [Where Clause](../../concepts/search/where) -- [Labels](../../concepts/search/labels) -- [Pagination and Order](../../concepts/search/pagination-order) -- [Record Relationships](../../concepts/relationships) diff --git a/docs/docs/rest-api/records/import-data.md b/docs/docs/rest-api/records/import-data.md index 296a1b36..c0eaf16d 100644 --- a/docs/docs/rest-api/records/import-data.md +++ b/docs/docs/rest-api/records/import-data.md @@ -4,232 +4,80 @@ sidebar_position: 1 # Import Data -RushDB provides powerful and flexible APIs for importing data into your database. You can import data in various formats including JSON and CSV, with options to customize how the data is processed and stored. - -## Overview - -The import endpoints allow you to: -- Import JSON data -- Import CSV data -- Control data type inference and handling -- Set default relationship types -- Configure property value handling -- Perform batch upsert (create-or-update) using `mergeBy` / `mergeStrategy` on import options - -All import endpoints require authentication using a token header. - -## Nested Data Processing - -When importing nested JSON data structures, RushDB automatically processes and organizes your data using a breadth-first search (BFS) algorithm. This approach efficiently: - -1. **Traverses hierarchical structures**: Processes your JSON tree level by level, ensuring proper parent-child relationships -2. **Optimizes object normalization**: Converts nested objects into separate records with appropriate relationships -3. **Preserves data integrity**: Maintains the original structure and relationships between your data elements - -For example, when importing a nested object like a person with embedded address information, the BFS algorithm will: -- Create a separate record for the person -- Create separate records for embedded objects (addresses) -- Establish relationships between parent and child records -- Apply proper labels derived from the JSON structure -- Set up property nodes with appropriate type inference - -For more details on how RushDB manages data storage and the underlying data import mechanism, see [Storage - Data Import Mechanism](../../concepts/storage#data-import-mechanism). - -## Import JSON Data - -```http -POST /api/v1/records/import/json +## `POST /api/v1/records/import/json` + +Pass nested JSON — RushDB walks the structure and creates linked records automatically. + +```bash +curl -X POST https://api.rushdb.com/api/v1/records/import/json \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "label": "MOVIE", + "data": { + "title": "Inception", + "rating": 8.8, + "ACTOR": [ + {"name": "Leonardo DiCaprio", "country": "USA"}, + {"name": "Ken Watanabe", "country": "Japan"} + ] + }, + "options": {"suggestTypes": true} + }' ``` -### Request Body - -| Field | Type | Description | -|-----------|--------|-------------| -| `data` | Object or Array | JSON data to import | -| `label` | String | Label for the root node(s) | -| `options` | Object | Optional configuration parameters | - -#### Options Object - -| Option | Type | Default | Description | -|--------|------|---------|-------------| -| `suggestTypes` | Boolean | `true` | **Default is `true`** - Automatically infers data types for properties. To disable type inference and store all values as strings, explicitly set to `false` | -| `castNumberArraysToVectors` | Boolean | `false` | When true, converts numeric arrays to vector type | -| `convertNumericValuesToNumbers` | Boolean | `false` | When true, converts string numbers to number type | -| `capitalizeLabels` | Boolean | `false` | When true, converts all labels to uppercase | -| `relationshipType` | String | `__RUSHDB__RELATION__DEFAULT__` | Default relationship type between nodes | -| `returnResult` | Boolean | `false` | When true, returns imported records in response | -| `mergeBy` | Array of Strings | `[]` / omitted | Upsert match keys for batch import. Empty or omitted (with mergeStrategy present) means all incoming property keys per record. | -| `mergeStrategy` | String | `'append'` | Upsert behavior: `'append'` adds/updates provided properties; `'rewrite'` replaces all existing properties. Providing either this or `mergeBy` triggers upsert path. | - -:::info Default Behavior -By default, `suggestTypes` is set to `true` for all import operations (JSON and CSV). This means RushDB automatically infers data types from your values. To store all properties as strings without type inference, you must explicitly set `suggestTypes: false` in the options. -::: - -### Example Request (Batch Upsert Import) - -```json -{ - "label": "Product", - "data": [ - { "sku": "SKU-001", "name": "Gadget", "price": 99.99 }, - { "sku": "SKU-002", "name": "Widget", "price": 149.99 } - ], - "options": { - "suggestTypes": true, - "mergeBy": ["sku"], - "mergeStrategy": "append", - "returnResult": true - } -} -``` - -If later you send: - -```json -{ - "label": "Product", - "data": [ - { "sku": "SKU-001", "price": 89.99 }, - { "sku": "SKU-002", "price": 139.99, "category": "Tools" } - ], - "options": { - "mergeBy": ["sku"], - "mergeStrategy": "append" - } -} +### Options + +| Option | Default | Description | +|---|---|---| +| `suggestTypes` | `true` | Infer property types automatically | +| `convertNumericValuesToNumbers` | `false` | Convert string numbers to number type | +| `capitalizeLabels` | `false` | Uppercase all inferred label names | +| `relationshipType` | `__RUSHDB__RELATION__DEFAULT__` | Relationship type for nested links | +| `returnResult` | `false` | Return created records in the response | +| `mergeBy` | — | Fields to match on for upsert | +| `mergeStrategy` | `append` | `append` or `rewrite` | + +## `POST /api/v1/records/import/csv` + +```bash +curl -X POST https://api.rushdb.com/api/v1/records/import/csv \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "label": "ACTOR", + "data": "name,country\nLeonardo DiCaprio,USA\nKen Watanabe,Japan", + "options": {"suggestTypes": true}, + "parseConfig": {"header": true, "dynamicTyping": true} + }' ``` -SKU-001 price updates; SKU-002 price updates and category is added; all other properties preserved. - -Using `"mergeStrategy": "rewrite"` would replace properties entirely for each matched record (unmentioned fields removed). - -```json -{ - "label": "Person", - "data": { - "name": "John Doe", - "age": "30", - "addresses": [ - { - "type": "home", - "street": "123 Main St", - "city": "Anytown" - }, - { - "type": "work", - "street": "456 Business Rd", - "city": "Workville" - } +### `parseConfig` options + +| Option | Default | Description | +|---|---|---| +| `delimiter` | `,` | Column separator | +| `header` | `true` | First row is header | +| `skipEmptyLines` | `true` | Ignore blank rows | +| `dynamicTyping` | `true` | Auto-convert numbers and booleans | +| `quoteChar` | `"` | Quote character | +| `escapeChar` | `"` | Escape character | +| `newline` | auto | Explicit newline sequence | + +## Upsert during import + +```bash +curl -X POST https://api.rushdb.com/api/v1/records/import/json \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "label": "ACTOR", + "data": [ + {"name": "Leonardo DiCaprio", "country": "USA"}, + {"name": "Ken Watanabe", "country": "Japan"} ], - "scores": [85, 90, 95], - "active": true - }, - "options": { - "suggestTypes": true, - "convertNumericValuesToNumbers": true, - "relationshipType": "OWNS" - } -} -``` - -### Response - -```json -{ - "success": true, - "data": true -} -``` - -If `returnResult: true` is specified in options, the response will include the imported records: - -```json -{ - "success": true, - "data": [ - { - "__id": "018dfc84-d6cb-7000-89cd-850db63a1e77", - "__label": "Person", - "__proptypes": { ... }, - "name": "John Doe", - "age": 30, - // Additional properties... - } - // Additional records... - ] -} + "options": {"mergeBy": ["name"], "mergeStrategy": "append"} + }' ``` -## Import CSV Data - -```http -POST /api/v1/records/import/csv -``` - -### Request Body - -| Field | Type | Description | -|-----------|--------|-------------| -| `data` | String | CSV data as a string | -| `label` | String | Label for the nodes | -| `options` | Object | Optional configuration parameters (same as JSON import) | - -CSV files must have headers in the first row. - -### Example Request - -```json -{ - "label": "Customer", - "data": "name,email,age\nJohn Doe,john@example.com,30\nJane Smith,jane@example.com,25", - "options": { - "suggestTypes": true, - "convertNumericValuesToNumbers": true - } -} -``` - -### Response - -Same as JSON import. - -## Data Transformation Process - -When importing data, RushDB processes your data through the following steps: - -1. **Parsing**: Converts your input format (JSON/CSV) into internal structures -2. **Type Inference**: If `suggestTypes` is enabled, analyzes values to determine appropriate data types -3. **Graph Construction**: Creates nodes and relationships based on your data structure -4. **Validation**: Checks against workspace limits -5. **Storage**: Inserts data into the database in optimized batches - -## Data Type Handling - -### Automatic Type Inference - -**By default, `suggestTypes` is set to `true` for all import operations** (JSON and CSV). This means RushDB automatically infers the following data types from your values: - -- `string`: Text values -- `number`: Numeric values -- `boolean`: `true`/`false` values -- `null`: Null values -- `datetime`: ISO8601 format strings (e.g., "2025-04-23T10:30:00Z") -- `vector`: Arrays of numbers (when `castNumberArraysToVectors` is true) - -To disable automatic type inference and store all values as strings, you must **explicitly set `suggestTypes: false`** in your request options. - -### Additional Type Conversions - -When `convertNumericValuesToNumbers` is enabled, string values that represent numbers (e.g., '123') will be automatically converted to their numeric equivalents (e.g., 123). - -### Array Handling - -Arrays with consistent data types (e.g., all numbers, all strings) will be handled seamlessly according to their type. However, for inconsistent arrays (e.g., `[1, 'two', null, false]`), all values will be automatically converted to strings to mitigate data loss, and the property type will be stored as `string`. - -## Performance Considerations -- Imports are processed in chunks of 1000 records for optimal performance -- For large imports (>25MB), consider splitting into multiple requests -- Setting `returnResult: false` is recommended for large imports to improve performance -- Batch upsert performance depends on match selectivity; prefer stable unique or near-unique keys in `mergeBy`. diff --git a/docs/docs/rest-api/records/update-records.md b/docs/docs/rest-api/records/update-records.md index 95528fae..28faf708 100644 --- a/docs/docs/rest-api/records/update-records.md +++ b/docs/docs/rest-api/records/update-records.md @@ -4,180 +4,32 @@ sidebar_position: 6 # Update Records -RushDB offers powerful methods to update existing records in your database. You can update record properties and labels through the REST API. +## `PATCH /api/v1/records/:entityId` — partial update -## Overview +Unspecified fields are preserved. -The update endpoints allow you to: -- Update specific properties while preserving others (PATCH) -- Completely replace record data (PUT) - -All update endpoints require authentication using a token header. - -## Update Record (PATCH) - -The PATCH method allows you to update specific properties of a record while preserving other existing properties. - -```http -PATCH /api/v1/records/{entityId} -``` - -### Path Parameters - -| Parameter | Type | Description | -|-----------|------|-------------| -| `entityId` | String | The unique identifier of the record to update | - -### Request Body - -| Field | Type | Description | -|-------|------|-------------| -| `label` | String | (Optional) New label for the record | -| `properties` | Array | Array of property objects to update or add | - -#### Property Object - -| Field | Type | Description | -|-------|------|-------------| -| `key` | String | Property name | -| `value` | Any | Property value | -| `type` | String | (Optional) Data type of the property | - -### Example Request - -```json -{ - "label": "Person", - "properties": [ - { - "key": "name", - "value": "John Smith" - }, - { - "key": "age", - "value": 32, - "type": "number" - }, - { - "key": "active", - "value": true, - "type": "boolean" - } - ] -} -``` - -### Response - -```json -{ - "id": "018dfc84-d6cb-7000-89cd-850db63a1e77", - "label": "Person", - "name": "John Smith", - "age": 32, - "email": "john@example.com", // Preserved from existing record - "active": true, - "_rushdb_properties_meta": { - // Metadata about properties - } -} -``` - -### How PATCH Works - -When you use PATCH to update a record: -1. The system first retrieves the current record data -2. Merges your new properties with the existing properties -3. Updates only the specified properties while preserving any properties not included in your request -4. Returns the complete updated record - -This makes PATCH ideal for updating specific fields without having to resend all record data. - -## Replace Record (PUT) - -The PUT method allows you to completely replace a record's data. - -```http -PUT /api/v1/records/{entityId} +```bash +curl -X PATCH https://api.rushdb.com/api/v1/records/movie-123 \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"data": {"rating": 9.0}}' ``` -### Path Parameters - -| Parameter | Type | Description | -|-----------|------|-------------| -| `entityId` | String | The unique identifier of the record to update | - -### Request Body - -Same as PATCH method, but all existing properties not included in the request will be removed. - -### Example Request - -```json -{ - "label": "Customer", - "properties": [ - { - "key": "name", - "value": "John Smith" - }, - { - "key": "age", - "value": 32 - } - ] -} -``` +## `PUT /api/v1/records/:entityId` — full replacement -### Response +All previous fields are removed, then replaced with the new data. -```json -{ - "id": "018dfc84-d6cb-7000-89cd-850db63a1e77", - "label": "Customer", - "name": "John Smith", - "age": 32, - "_rushdb_properties_meta": { - // Metadata about properties - } -} +```bash +curl -X PUT https://api.rushdb.com/api/v1/records/movie-123 \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"label": "MOVIE", "data": {"title": "Inception", "rating": 9.0, "genre": "sci-fi"}}' ``` -### How PUT Works - -When you use PUT to update a record: -1. The specified properties completely replace the existing record properties -2. Any properties not included in your request will be removed -3. The operation returns the new state of the record - -This makes PUT ideal when you want to ensure the record only has the exact properties you specify. - -## Error Handling - -Update operations may return the following error responses: +## Request body -| Status Code | Description | -|-------------|-------------| -| 400 | Bad Request - Invalid input format | -| 401 | Unauthorized - Authentication required | -| 403 | Forbidden - Insufficient permissions | -| 404 | Not Found - Record does not exist | -| 500 | Server Error - Processing failed | - -### Example Error Response - -```json -{ - "success": false, - "message": "Record with id '018dfc84-d6cb-7000-89cd-850db63a1e77' not found", - "statusCode": 404 -} -``` - -## Best Practices +| Field | Type | Description | +|---|---|---| +| `label` | `string` | (Optional) New label for the record | +| `data` | `object` | Properties to write | -1. **Use PATCH for partial updates** when you want to preserve existing data -2. **Use PUT for complete replacement** when you want to ensure the record only has the properties you specify -3. **Include property types** when you want to ensure proper data type conversion -4. **Check for 404 errors** when updating records that might not exist -5. **Retrieve current properties** with GET before updating to understand the record's current state diff --git a/docs/docs/rest-api/relationships.md b/docs/docs/rest-api/relationships.md index 43b8c1fd..804d5aaf 100644 --- a/docs/docs/rest-api/relationships.md +++ b/docs/docs/rest-api/relationships.md @@ -1,640 +1,102 @@ --- -sidebar_position: 5 +sidebar_position: 3 --- -# Relationships API +# Relationships -RushDB provides a powerful Relationships API that enables you to manage connections between [records](../concepts/records.md). This API allows you to create, retrieve, update, and delete [relationships](../concepts/relationships.md) between any records in your database. +## `POST /api/v1/records/:entityId/relationships` — attach -## Overview +```bash +# Single target +curl -X POST https://api.rushdb.com/api/v1/records/$MOVIE_ID/relationships \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"targetIds": "$ACTOR_ID", "type": "STARS_IN", "direction": "out"}' -The Relationships API allows you to: -- Create relationships between records -- Retrieve relationships for a specific record -- Search relationships across your entire database -- Delete specific or all relationships between records -- Specify relationship types and directions - -All relationships endpoints require authentication using a token header. - -## Create Many Relationships (by key match) - -```http -POST /api/v1/relationships/create-many -``` - -Creates relationships in bulk by matching a property from source-labeled records to a property from target-labeled records. - -### Request Body - -| Field | Type | Description | -|-----------------|--------|---------------------------------------------------------------------------------------------------| -| `source` | Object | Source selector: `{ label: string; key?: string; where?: object }` — `key` is required unless using `manyToMany` | -| `target` | Object | Target selector: `{ label: string; key?: string; where?: object }` — `key` is required unless using `manyToMany` | -| `type` | String | Optional. Relationship type to create. Defaults to `__RUSHDB__RELATION__DEFAULT__` | -| `direction` | String | Optional. Relationship direction: `in` or `out`. Defaults to `out` | -| `manyToMany` | Boolean| Optional. When true, allows creating a cartesian product between matched source and target sets. Requires non-empty `where` on both sides. | - -The matching condition is always `source[key] = target[key]`, combined with optional `where` filters on each side. -The `where` objects follow the standard SearchQuery `where` syntax used across the platform. - -Notes on many-to-many/cartesian creation - -- If `manyToMany` is set to `true`, the server will not require `source.key`/`target.key` and will create relationships between every matching source and every matching target produced by the `where` filters. -- `manyToMany=true` requires non-empty `where` filters for both `source` and `target` to avoid accidental unbounded cartesian products. -- When `manyToMany` is not provided or is false, `source.key` and `target.key` are required and the server joins with `source[key] = target[key]`. - -### Example Request - -```json -{ - "source": { "label": "USER", "key": "id", "where": { "tenantId": "ACME" } }, - "target": { "label": "ORDER", "key": "userId", "where": { "tenantId": "ACME" } }, - "type": "ORDERED", - "direction": "out" -} -``` - -### Response - -```json -{ - "success": true, - "data": { - "message": "Relations have been successfully created" - } -} - -## Delete Many Relationships (by key match or filters) - -```http -POST /api/v1/relationships/delete-many -``` - -Deletes relationships in bulk that match the provided source/target selectors. The request body mirrors the create-many API. - -### Request Body - -| Field | Type | Description | -|-----------------|--------|---------------------------------------------------------------------------------------------------| -| `source` | Object | Source selector: `{ label: string; key?: string; where?: object }` — `key` is required unless using `manyToMany` | -| `target` | Object | Target selector: `{ label: string; key?: string; where?: object }` — `key` is required unless using `manyToMany` | -| `type` | String | Optional. Relationship type to delete. If omitted, matches any type | -| `direction` | String | Optional. Relationship direction: `in` or `out`. Defaults to `out` | -| `manyToMany` | Boolean| Optional. When true, allows matching every source to every target produced by the `where` filters. Requires non-empty `where` on both sides. | - -### Example Request - -```json -{ - "source": { "label": "USER", "key": "id", "where": { "tenantId": "ACME" } }, - "target": { "label": "ORDER", "key": "userId", "where": { "tenantId": "ACME" } }, - "type": "ORDERED", - "direction": "out" -} -``` - -### Response - -```json -{ - "success": true, - "data": { - "message": "Relations have been successfully deleted" - } -} -``` -``` - -## Create Relationship - -```http -POST /api/v1/records/:entityId/relationships -``` - -Creates one or more [relationships](../concepts/relationships.md) from a source record to one or more target records. - -### Parameters - -| Parameter | Type | Description | -|------------|--------|-----------------------------| -| `entityId` | String | The ID of the source record | - -### Request Body - -| Field | Type | Description | -|-------------|-----------------|-------------------------------------------------------------------------------------------| -| `targetIds` | String or Array | ID(s) of target record(s) to create relationship(s) with | -| `type` | String | Optional. The type of relationship to create. Defaults to `__RUSHDB__RELATION__DEFAULT__` | -| `direction` | String | Optional. Direction of the relationship: `in` or `out`. Defaults to `out` | - -### Example Request - Single Target - -```json -{ - "targetIds": "018e4c71-f35a-7000-89cd-850db63a1e78", - "type": "WORKS_FOR" -} -``` - -### Example Request - Multiple Targets - -```json -{ - "targetIds": [ - "018e4c71-f35a-7000-89cd-850db63a1e78", - "018e4c71-f35a-7000-89cd-850db63a1e79" - ], - "type": "KNOWS", - "direction": "out" -} -``` - -### Response - -```json -{ - "success": true, - "data": { - "message": "Relations to Record 018e4c71-f35a-7000-89cd-850db63a1e77 have been successfully created" - } -} -``` - -## Get Record Relationships - -```http -GET /api/v1/records/:entityId/relationships -``` - -Retrieves all relationships for a specific [record](../concepts/records.md). - -### Parameters - -| Parameter | Type | Description | -|------------|--------|---------------------------------------------------------------------| -| `entityId` | String | The ID of the record | -| `skip` | Number | Optional. Number of relationships to skip (default: 0) | -| `limit` | Number | Optional. Maximum number of relationships to return (default: 1000) | - -### Response - -```json -{ - "success": true, - "data": { - "total": 3, - "data": [ - { - "sourceId": "018e4c71-f35a-7000-89cd-850db63a1e77", - "sourceLabel": "Person", - "targetId": "018e4c71-f35a-7000-89cd-850db63a1e78", - "targetLabel": "Company", - "type": "WORKS_FOR" - }, - { - "sourceId": "018e4c71-f35a-7000-89cd-850db63a1e77", - "sourceLabel": "Person", - "targetId": "018e4c71-f35a-7000-89cd-850db63a1e79", - "targetLabel": "Person", - "type": "KNOWS" - }, - { - "sourceId": "018e4c71-f35a-7000-89cd-850db63a1e80", - "sourceLabel": "Department", - "targetId": "018e4c71-f35a-7000-89cd-850db63a1e77", - "targetLabel": "Person", - "type": "HAS_MEMBER" - } - ] - } -} -``` - -## Delete Relationships - -```http -PUT /api/v1/records/:entityId/relationships -``` - -Deletes one or more relationships from a source record to one or more target records. - -### Parameters - -| Parameter | Type | Description | -|------------|--------|-----------------------------| -| `entityId` | String | The ID of the source record | - -### Request Body - -| Field | Type | Description | -|---------------|-----------------|--------------------------------------------------------------------------------------------------------------| -| `targetIds` | String or Array | ID(s) of target record(s) to delete relationship(s) with | -| `typeOrTypes` | String or Array | Optional. Type(s) of relationships to delete. If omitted, deletes relationships of any type | -| `direction` | String | Optional. Direction of the relationship: `in` or `out`. If omitted, deletes relationships in both directions | - -### Example Request - Delete All Relationship Types - -```json -{ - "targetIds": "018e4c71-f35a-7000-89cd-850db63a1e78" -} -``` - -### Example Request - Delete Specific Relationship Types - -```json -{ - "targetIds": [ - "018e4c71-f35a-7000-89cd-850db63a1e78", - "018e4c71-f35a-7000-89cd-850db63a1e79" - ], - "typeOrTypes": ["KNOWS", "WORKS_FOR"], - "direction": "out" -} -``` - -### Response - -```json -{ - "success": true, - "data": { - "message": "Relations to Record 018e4c71-f35a-7000-89cd-850db63a1e77 have been successfully deleted" - } -} -``` - -## Search Relationships - -```http -POST /api/v1/relationships/search -``` - -Searches for [relationships](../concepts/relationships.md) across your database with optional filtering. - -### Request Body - -| Field | Type | Description | -|---------|--------|----------------------------------------------------------------------------------------------| -| `where` | Object | Optional [filter criteria](../concepts/search/where.md) to search for specific relationships | - -### Query Parameters - -| Parameter | Type | Description | -|-----------|--------|-----------------------------------------------------------------------------------------------------------------| -| `skip` | Number | Optional. Number of relationships to skip for [pagination](../concepts/search/pagination-order.md) (default: 0) | -| `limit` | Number | Optional. Maximum number of relationships to return (default: 1000) | - -### Example Request - Filter by Record Properties - -```json -{ - "where": { - "sourceRecord": { - "name": "John Doe" - }, - "targetRecord": { - "name": "Acme Inc" - } - } -} -``` - -### Response - -```json -{ - "success": true, - "data": { - "total": 1, - "data": [ - { - "sourceId": "018e4c71-f35a-7000-89cd-850db63a1e77", - "sourceLabel": "Person", - "targetId": "018e4c71-f35a-7000-89cd-850db63a1e78", - "targetLabel": "Company", - "type": "WORKS_FOR" - } - ] - } -} -``` - -## Relationship Directionality - -RushDB supports three types of [relationship](../concepts/relationships.md) directionality: - -1. **Outgoing relationships (`direction: "out"`)**: - The source record points to the target record: `(source)-[relationship]->(target)` - -2. **Incoming relationships (`direction: "in"`)**: - The target record points to the source record: `(source)<-[relationship]-(target)` - -3. **Undirected relationships (no direction specified)**: - The relationship has no specific direction: `(source)-[relationship]-(target)` - -## Best Practices - -1. **Use meaningful relationship types**: Choose relationship types that clearly describe the connection between [records](../concepts/records.md) -2. **Consider directionality**: Choose the right direction for your relationships based on your domain model -3. **Use relationship metadata**: When your use case requires it, store additional information about relationships -4. **Use consistent naming**: Establish naming conventions for relationship types (e.g., uppercase with underscores) -5. **Mind performance**: For highly connected records, paginate relationships with the `skip` and `limit` parameters - - - ---- - -# Relationships - -RushDB provides dedicated endpoints to create, read, update, and delete relationships between records. These endpoints allow you to build complex graph structures and model real-world relationships in your data. - -## Overview - -The relationship management endpoints enable you to: -- Create relationships between records -- List relationships for a record -- Remove specific relationships -- Search across all relationships -- Manage relationship types and directions - -All relationship endpoints require authentication using a bearer token. - -## Create Relationship - -Create one or more relationships between records. - -```http -POST /api/v1/records/{entityId}/relationships -``` - -### Path Parameters - -| Parameter | Type | Description | -|------------|--------|-----------------------------------| -| `entityId` | String | Source record identifier (UUIDv7) | - -### Request Body - -| Field | Type | Description | -|-------------|-----------------|------------------------------------------------------------------------------------| -| `targetIds` | String or Array | Target record identifier(s). Cannot be empty or contain empty strings | -| `type` | String | (Optional) Relationship type. Cannot be an empty string | -| `direction` | String | (Optional) Relationship direction. Must be either "in" or "out". Defaults to "out" | - -### Example Request - -```json -{ - "targetIds": ["018dfc84-d6cb-7000-89cd-850db63a1e78"], - "type": "FOLLOWS", - "direction": "out" -} -``` - -#### Creating Multiple Relationships - -You can create multiple relationships in a single request by passing an array of target IDs: - -```json -{ - "targetIds": [ - "018dfc84-d6cb-7000-89cd-850db63a1e78", - "018dfc84-d6cb-7000-89cd-850db63a1e79" - ], - "type": "FOLLOWS", - "direction": "out" -} -``` - -### Response - -```json -{ - "message": "Relations created successfully" -} -``` - -## List Relationships - -Retrieve relationships for a specific record. - -```http -GET /api/v1/records/{entityId}/relationships -``` - -### Path Parameters - -| Parameter | Type | Description | -|------------|--------|----------------------------| -| `entityId` | String | Record identifier (UUIDv7) | - -### Query Parameters - -| Parameter | Type | Description | Default | -|-----------|--------|------------------------------------------------------|---------| -| `skip` | Number | (Optional) Number of relationships to skip | 0 | -| `limit` | Number | (Optional) Maximum number of relationships to return | 1000 | - -### Example Response - -```json -{ - "data": [ - { - "sourceId": "018dfc84-d6cb-7000-89cd-850db63a1e77", - "sourceLabel": "Person", - "targetId": "018dfc84-d6cb-7000-89cd-850db63a1e78", - "targetLabel": "Person", - "type": "FOLLOWS" - } - ], - "total": 1 -} -``` - -## Remove Relationship - -Remove one or more relationships between records. - -```http -PUT /api/v1/records/{entityId}/relationships +# Multiple targets +curl -X POST https://api.rushdb.com/api/v1/records/$MOVIE_ID/relationships \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"targetIds": ["$ACTOR1_ID", "$ACTOR2_ID"], "type": "STARS_IN"}' ``` -### Path Parameters - -| Parameter | Type | Description | -|------------|--------|-----------------------------------| -| `entityId` | String | Source record identifier (UUIDv7) | - -### Request Body +| Field | Type | Description | +|---|---|---| +| `targetIds` | `string \| string[]` | Target record ID(s) | +| `type` | `string` | Relationship type | +| `direction` | `"in" \| "out"` | Direction from source to target (`out` = default) | -| Field | Type | Description | -|---------------|-----------------|--------------------------------------------------------------------------------| -| `targetIds` | String or Array | Target record identifier(s). Cannot be empty or contain empty strings | -| `typeOrTypes` | String or Array | (Optional) One or more relationship type(s) to remove. Cannot be empty strings | -| `direction` | String | (Optional) Filter relationships by direction: "in" or "out" | +## `PUT /api/v1/records/:entityId/relationships` — detach -### Example Request - Single Type - -```json -{ - "targetIds": ["018dfc84-d6cb-7000-89cd-850db63a1e78"], - "typeOrTypes": "FOLLOWS", - "direction": "out" -} +```bash +curl -X PUT https://api.rushdb.com/api/v1/records/$MOVIE_ID/relationships \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"targetIds": "$ACTOR_ID", "typeOrTypes": ["STARS_IN"]}' ``` -### Example Request - Multiple Types +## `GET /api/v1/records/:entityId/relationships` — list -```json -{ - "targetIds": ["018dfc84-d6cb-7000-89cd-850db63a1e78"], - "typeOrTypes": ["FOLLOWS", "LIKES"], - "direction": "out" -} +```bash +curl "https://api.rushdb.com/api/v1/records/$MOVIE_ID/relationships?limit=50" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" ``` -### Response +## `POST /api/v1/relationships/create-many` — bulk create by key match -```json -{ - "message": "Relations removed successfully" -} +```bash +curl -X POST https://api.rushdb.com/api/v1/relationships/create-many \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "source": {"label": "MOVIE", "key": "id"}, + "target": {"label": "ACTOR", "key": "movieId"}, + "type": "STARS_IN", + "direction": "out" + }' ``` -## Search Relations - -```http -POST /api/v1/relationships/search -``` - -This endpoint searches for [relationships](../concepts/relationships.md) between records based on specified criteria. - -### Request Body +| Field | Type | Description | +|---|---|---| +| `source` | `{label, key?, where?}` | Source selector | +| `target` | `{label, key?, where?}` | Target selector | +| `type` | `string` | Relationship type | +| `direction` | `string` | `in` or `out` | +| `manyToMany` | `boolean` | Cartesian product mode — requires non-empty `where` on both sides | -The request body follows the standard [search parameters](../concepts/search/introduction.md) format. +## `POST /api/v1/relationships/delete-many` — bulk delete -### Query Parameters +Same shape as `create-many`. Omit `type` to delete any type. -| Parameter | Type | Description | -|-----------|--------|-------------------------------------------------------------------------------------------------------| -| `skip` | Number | Number of relationships to skip for [pagination](../concepts/search/pagination-order.md) (default: 0) | -| `limit` | Number | Maximum number of relationships to return (default: 1000) | - -### Response - -```json -{ - "success": true, - "data": { - "data": [ - // relationships matching the search criteria - ], - "total": 42 - } -} -``` - -## Search Relationships - -Search across all [relationships](../concepts/relationships.md) in the project. This endpoint allows you to query relationships with powerful filtering options. - -```http -POST /api/v1/relationships/search +```bash +curl -X POST https://api.rushdb.com/api/v1/relationships/delete-many \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "source": {"label": "MOVIE", "key": "id"}, + "target": {"label": "ACTOR", "key": "movieId"}, + "type": "STARS_IN" + }' ``` -### Query Parameters - -| Parameter | Type | Description | Default | -|-----------|--------|-----------------------------------------------------------------------------------------------------|---------| -| `skip` | Number | (Optional) Number of relationships to skip for [pagination](../concepts/search/pagination-order.md) | 0 | -| `limit` | Number | (Optional) Maximum number of relationships to return | 1000 | - -### Request Body - -The search endpoint accepts a SearchDto object with the following fields: - -| Field | Type | Description | -|-----------|--------|----------------------------------------------------------------------------------------------------| -| `where` | Object | (Optional) [Filter criteria](../concepts/search/where.md) for the search | -| `orderBy` | Object | (Optional) [Sorting criteria](../concepts/search/pagination-order.md#sorting-records-with-orderby) | -| `labels` | Array | (Optional) Filter by [record labels](../concepts/search/labels.md) | +:::warning +Setting `manyToMany: true` without `where` filters on both sides creates an unbounded cartesian product. +::: -### Example Request - With Filters +## `POST /api/v1/relationships/search` -```json -{ - "where": { - "sourceLabel": "Person", - "type": "FOLLOWS" - }, - "orderBy": { - "type": "ASC" - }, - "limit": 10 -} +```bash +curl -X POST https://api.rushdb.com/api/v1/relationships/search \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"where": {"sourceRecord": {"title": "Inception"}}}' ``` -### Response - -```json -{ - "data": [ - { - "sourceId": "018dfc84-d6cb-7000-89cd-850db63a1e77", - "sourceLabel": "Person", - "targetId": "018dfc84-d6cb-7000-89cd-850db63a1e78", - "targetLabel": "Person", - "type": "FOLLOWS" - } - ], - "total": 1 -} -``` - -## Relationship Types - -RushDB supports several relationship configurations: - -### Default Relationship -If no type is specified when creating a relationship, it uses the default type `__RUSHDB__RELATION__DEFAULT__`. This relationship type is useful for simple connections where semantic meaning isn't required. - -### Custom Types -You can define custom relationship types to represent specific semantic meanings in your data model. For example: -- `FOLLOWS` for social connections -- `BELONGS_TO` for hierarchical relationships -- `WORKS_FOR` for organizational relationships - -### Bidirectional Relationships -While relationships have a direction, you can create bidirectional relationships by: -1. Creating two relationships with opposite directions -2. Querying relationships without specifying direction - -### Relationship Properties -Relationships can have properties attached to them, which is useful for storing metadata about the connection, such as: -- Timestamps (when the relationship was established) -- Weights or strengths -- Additional context - -## Validation - -The API enforces the following validation rules: +## Direction -1. `targetIds` cannot be empty or contain empty strings -2. `type` and `typeOrTypes` cannot be empty strings when provided -3. `direction` must be either "in" or "out" when provided -4. Record IDs must be valid UUIDv7 strings -5. Source and target records must exist in the database +| Value | Meaning | +|---|---| +| `"out"` (default) | source → target | +| `"in"` | target → source | -## Best Practices -1. **Use meaningful relationship types** that describe the semantic connection between records -2. **Consider directionality** when designing your data model - choose directions that make semantic sense -3. **Batch relationship operations** when creating or modifying many relationships at once -4. **Use pagination** when retrieving large sets of relationships to improve performance -5. **Validate record existence** before creating relationships -6. **Index important relationship types** that are frequently queried -7. **Use consistent naming conventions** for relationship types (e.g., uppercase with underscores) -8. **Document relationship types** and their meanings in your application diff --git a/docs/docs/rest-api/transactions.md b/docs/docs/rest-api/transactions.md index da504408..b5423125 100644 --- a/docs/docs/rest-api/transactions.md +++ b/docs/docs/rest-api/transactions.md @@ -1,223 +1,87 @@ --- -sidebar_position: 5 +sidebar_position: 6 --- -# Transactions API +# Transactions -RushDB provides a powerful Transactions API that allows you to perform multiple database operations atomically. This ensures data consistency by either committing all operations or rolling back all changes if an error occurs. +## `POST /api/v1/tx` — begin -## Overview - -Transactions in RushDB: -- Allow multiple operations to be executed as a single atomic unit -- Provide ACID (Atomicity, Consistency, Isolation, Durability) guarantees -- Automatically rollback after timeout to prevent hanging transactions -- Can be explicitly committed or rolled back - -## Transaction Lifecycle - -1. **Create** a transaction to get a transaction ID -2. **Use** the transaction ID in subsequent API requests -3. **Commit** the transaction to make changes permanent, or **Rollback** to discard changes -4. If neither committed nor rolled back within the TTL (Time To Live), the transaction will automatically rollback - -## API Endpoints - -### Create Transaction - -Creates a new transaction and returns a transaction ID. - -```http -POST /api/v1/tx -``` - -#### Request Body - -| Field | Type | Description | -|-------|--------|-------------| -| `ttl` | Number | Optional. Time to live in milliseconds. Default: 5000ms. Maximum: 30000ms (30 seconds). | - -#### Example Request - -```json -{ - "ttl": 10000 -} -``` - -#### Response - -```json -{ - "success": true, - "data": { - "id": "018e5c31-f35a-7000-89cd-850db63a1e77" - } -} -``` - -### Get Transaction - -Check if a transaction exists. - -```http -GET /api/v1/tx/:txId +```bash +TX_ID=$(curl -s -X POST https://api.rushdb.com/api/v1/tx \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"ttl": 10000}' | jq -r '.data.id') ``` -#### Parameters - -| Parameter | Type | Description | -|-----------|--------|-------------| -| `txId` | String | The transaction ID | +| Field | Type | Description | +|---|---|---| +| `ttl` | `number` | TTL in ms. Default: 5000. Max: 30000 | -#### Response +## `GET /api/v1/tx/:txId` — check existence -```json -{ - "success": true, - "data": { - "id": "018e5c31-f35a-7000-89cd-850db63a1e77" - } -} +```bash +curl https://api.rushdb.com/api/v1/tx/$TX_ID \ + -H "Authorization: Bearer $RUSHDB_API_KEY" ``` -### Commit Transaction +## `POST /api/v1/tx/:txId/commit` -Commits all changes made within the transaction, making them permanent in the database. - -```http -POST /api/v1/tx/:txId/commit +```bash +curl -X POST https://api.rushdb.com/api/v1/tx/$TX_ID/commit \ + -H "Authorization: Bearer $RUSHDB_API_KEY" ``` -#### Parameters - -| Parameter | Type | Description | -|-----------|--------|-------------| -| `txId` | String | The transaction ID | +## `POST /api/v1/tx/:txId/rollback` -#### Response - -```json -{ - "success": true, - "data": { - "message": "Transaction (018e5c31-f35a-7000-89cd-850db63a1e77) has been successfully committed." - } -} +```bash +curl -X POST https://api.rushdb.com/api/v1/tx/$TX_ID/rollback \ + -H "Authorization: Bearer $RUSHDB_API_KEY" ``` -### Rollback Transaction +## Use with requests — `X-Transaction-Id` header -Discards all changes made within the transaction. +Pass the transaction ID as a header on any create, update, delete, or relationship endpoint: -```http -POST /api/v1/tx/:txId/rollback +```bash +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -H "X-Transaction-Id: $TX_ID" \ + -d '{"label": "MOVIE", "data": {"title": "Inception"}}' ``` -#### Parameters - -| Parameter | Type | Description | -|-----------|--------|-------------| -| `txId` | String | The transaction ID | - -#### Response - -```json -{ - "success": true, - "data": { - "message": "Transaction (018e5c31-f35a-7000-89cd-850db63a1e77) has been rolled back." - } -} +## Full example + +```bash +# 1. Begin +TX_ID=$(curl -s -X POST https://api.rushdb.com/api/v1/tx \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"ttl": 10000}' | jq -r '.data.id') + +# 2. Create records +MOVIE_ID=$(curl -s -X POST https://api.rushdb.com/api/v1/records \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -H "X-Transaction-Id: $TX_ID" \ + -d '{"label": "MOVIE", "data": {"title": "Inception"}, "options": {"returnResult": true}}' | jq -r '.data.__id') + +ACTOR_ID=$(curl -s -X POST https://api.rushdb.com/api/v1/records \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -H "X-Transaction-Id: $TX_ID" \ + -d '{"label": "ACTOR", "data": {"name": "Leonardo DiCaprio"}, "options": {"returnResult": true}}' | jq -r '.data.__id') + +# 3. Link +curl -X POST https://api.rushdb.com/api/v1/records/$MOVIE_ID/relationships \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -H "X-Transaction-Id: $TX_ID" \ + -d '{"targetIds": "'$ACTOR_ID'", "type": "STARS_IN"}' + +# 4. Commit +curl -X POST https://api.rushdb.com/api/v1/tx/$TX_ID/commit \ + -H "Authorization: Bearer $RUSHDB_API_KEY" ``` -## Using Transactions with Other APIs - -To use a transaction with other API endpoints, include the transaction ID in the `X-Transaction-Id` header. - -### Example -```http -POST /api/v1/records -Content-Type: application/json -token: RUSHDB_API_KEY -X-Transaction-Id: 018e5c31-f35a-7000-89cd-850db63a1e77 - -{ - "label": "Person", - "properties": [ - { - "name": "name", - "type": "string", - "value": "John Doe" - } - ] -} -``` - -## Transaction Timeout - -Transactions have a timeout mechanism to prevent hanging transactions: - -- Default timeout: 5 seconds (5000ms) -- Maximum timeout: 30 seconds (30000ms) -- If a transaction isn't committed or rolled back within its TTL, it will be automatically rolled back - -## Best Practices - -1. **Keep transactions short**: Long-running transactions can lead to resource contention. -2. **Set appropriate TTL**: Choose a TTL that gives your operations enough time to complete, but not so long that resources are unnecessarily tied up. -3. **Always commit or rollback**: Explicitly commit or rollback transactions rather than relying on automatic timeout. -4. **Error handling**: Implement proper error handling in your client code to rollback transactions if operations fail. -5. **Avoid unnecessary transactions**: For single operations, you don't need to use transactions. - -## Transaction Example Workflow - -```javascript -// 1. Create a transaction -const createTxResponse = await fetch('https://api.rushdb.com/api/v1/tx', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'token': 'RUSHDB_API_KEY' - }, - body: JSON.stringify({ ttl: 10000 }) -}); - -const { data: { id: txId } } = await createTxResponse.json(); - -try { - // 2. Perform operations within the transaction - await fetch('https://api.rushdb.com/api/v1/records', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'token': 'RUSHDB_API_KEY', - 'X-Transaction-Id': txId - }, - body: JSON.stringify({ - label: 'Person', - properties: [ - { name: 'name', type: 'string', value: 'John Doe' } - ] - }) - }); - - // 3. Commit the transaction if all operations succeeded - await fetch(`https://api.rushdb.com/api/v1/tx/${txId}/commit`, { - method: 'POST', - headers: { - 'token': 'RUSHDB_API_KEY' - } - }); -} catch (error) { - // 4. Rollback the transaction if any operation failed - await fetch(`https://api.rushdb.com/api/v1/tx/${txId}/rollback`, { - method: 'POST', - headers: { - 'token': 'RUSHDB_API_KEY' - } - }); - throw error; -} -``` diff --git a/docs/docs/tutorials/_category_.json b/docs/docs/tutorials/_category_.json index f2abe1a4..37b30f8b 100644 --- a/docs/docs/tutorials/_category_.json +++ b/docs/docs/tutorials/_category_.json @@ -1,6 +1,9 @@ { "label": "Tutorials", - "position": 2, "collapsed": false, - "collapsible": false + "collapsible": false, + "link": { + "type": "doc", + "id": "tutorials/index" + } } diff --git a/docs/docs/tutorials/agent-safe-query-planning.mdx b/docs/docs/tutorials/agent-safe-query-planning.mdx new file mode 100644 index 00000000..b20f8457 --- /dev/null +++ b/docs/docs/tutorials/agent-safe-query-planning.mdx @@ -0,0 +1,386 @@ +--- +sidebar_position: 22 +title: "Agent-Safe Query Planning with Ontology First" +description: A repeatable agent pattern — ontology first, query spec second, constrained execution, and failure recovery when labels or fields are wrong. +tags: [MCP, Agents, SearchQuery, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Agent-Safe Query Planning with Ontology First + +LLMs that interact with databases without schema grounding make mistakes that are hard to catch: they invent label names, assume property shapes, and produce queries that return zero results or subtly wrong ones. + +The fix is a disciplined execution loop: load the schema first, learn the query spec second, then build queries constrained to what actually exists. This tutorial teaches that loop in code and in the MCP server. + +--- + +## The guarded execution loop + +```mermaid +flowchart TD + A([Start]) --> B[Load ontology\ngetOntologyMarkdown] + B --> C[Load query spec\ngetSearchQuerySpec] + C --> D{Labels and properties\nknown?} + D -- No --> E[Re-check ontology\nRefine label names] + E --> D + D -- Yes --> F[Build query from\nvalidated params] + F --> G[Execute query\nfindRecords] + G --> H{Results\nnon-empty?} + H -- No --> I[Widen filter\nCheck $exists / values] + I --> F + H -- Yes --> J([Return results]) +``` + +Every agent session that touches RushDB should follow this shape. + +--- + +## Step 1: Load and ground on the ontology + +At the start of every agent session, fetch the ontology and store the exact label and property names observed. + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +// The agent calls this at the start of every session +async function loadOntology() { + const md = await db.ai.getOntologyMarkdown() + // Pass this markdown to the LLM as part of its system context + return md +} + +const schemaContext = await loadOntology() +// console.log(schemaContext) — shows all labels, properties, types +``` + + + + +```python +from rushdb import RushDB +import os + +db = RushDB(os.environ["RUSHDB_API_KEY"], base_url="https://api.rushdb.com/api/v1") + + +def load_ontology() -> str: + """Returns ontology markdown for injecting into agent system prompt.""" + return db.ai.get_ontology_markdown() + + +schema_context = load_ontology() +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +# Fetch markdown ontology for LLM context +curl -s -X POST "$BASE/ai/ontology/md" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{}' +``` + + + + +--- + +## Step 2: Validate label names before querying + +When the agent generates a query, validate that every label in `labels` and `where` appears in the ontology before executing. + + + + +```typescript +type OntologyLabel = { name: string; properties: string[] } + +function extractLabels(ontology: any): Set { + // Ontology structure varies — read the keys from getOntology() result + const labelSet = new Set() + if (Array.isArray(ontology)) { + for (const entry of ontology) { + if (entry.label) labelSet.add(entry.label) + } + } + return labelSet +} + +async function safeFind(query: Parameters[0]) { + const ontologyResult = await db.ai.getOntology() + const knownLabels = extractLabels(ontologyResult) + + // Validate labels array + const requestedLabels = query.labels ?? [] + const unknownLabels = requestedLabels.filter(l => !knownLabels.has(l)) + if (unknownLabels.length > 0) { + throw new Error( + `Unknown labels: ${unknownLabels.join(', ')}. Known labels: ${[...knownLabels].join(', ')}` + ) + } + + return db.records.find(query) +} + +// Usage +try { + const result = await safeFind({ + labels: ['CUSTOMER'], // validated against ontology + where: { status: 'active' }, + limit: 10 + }) + console.log(result.data) +} catch (err) { + console.error(err) + // Agent: re-check ontology, pick correct label, retry +} +``` + + + + +```python +def extract_labels(ontology) -> set: + labels = set() + if isinstance(ontology, list): + for entry in ontology: + if "label" in entry: + labels.add(entry["label"]) + return labels + + +def safe_find(query: dict) -> object: + ontology = db.ai.get_ontology() + known_labels = extract_labels(ontology) + + requested_labels = query.get("labels", []) + unknown = [l for l in requested_labels if l not in known_labels] + if unknown: + raise ValueError( + f"Unknown labels: {unknown}. Known: {list(known_labels)}" + ) + return db.records.find(query) + + +try: + result = safe_find({"labels": ["CUSTOMER"], "where": {"status": "active"}, "limit": 10}) +except ValueError as e: + print(e) + # Agent: re-check ontology, pick correct label, retry +``` + + + + +```bash +# Get ontology to verify label before querying +LABELS=$(curl -s -X POST "$BASE/ai/ontology" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{}' | jq '[.[].label]') +echo "Known labels: $LABELS" + +# Then query with validated label name +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["CUSTOMER"],"where":{"status":"active"},"limit":10}' +``` + + + + +--- + +## Step 3: Handle zero-result queries without hallucinating + +When a query returns zero results, the agent should widen the filter — not invent records or claim they exist. + + + + +```typescript +async function queryWithFallback(label: string, filter: Record) { + // Try full filter + let result = await db.records.find({ + labels: [label], + where: filter, + limit: 10 + }) + + if (result.data.length === 0) { + console.warn('Zero results with full filter. Checking each condition…') + + // Binary-search the filter to find which clause eliminates results + for (const key of Object.keys(filter)) { + const narrower = Object.fromEntries( + Object.entries(filter).filter(([k]) => k !== key) + ) + const partial = await db.records.find({ + labels: [label], + where: narrower, + limit: 1 + }) + if (partial.total > 0) { + console.warn(`Filter key "${key}" with value "${filter[key]}" eliminates all results`) + // Check what values actually exist + const dist = await db.records.find({ + labels: [label], + aggregate: { + count: { fn: 'count', alias: '$record' }, + [key]: `$record.${key}` + }, + groupBy: [key, 'count'], + orderBy: { count: 'desc' }, + limit: 10 + }) + console.log(`Actual "${key}" values:`, dist.data) + break + } + } + } + + return result +} +``` + + + + +```python +def query_with_fallback(label: str, filter_dict: dict) -> object: + result = db.records.find({"labels": [label], "where": filter_dict, "limit": 10}) + + if not result.data: + print("Zero results. Diagnosing filter…") + for key in list(filter_dict.keys()): + partial_where = {k: v for k, v in filter_dict.items() if k != key} + partial = db.records.find({"labels": [label], "where": partial_where, "limit": 1}) + if partial.total > 0: + print(f'Key "{key}" = "{filter_dict[key]}" eliminates all results') + dist = db.records.find({ + "labels": [label], + "aggregate": { + "count": {"fn": "count", "alias": "$record"}, + key: f"$record.{key}" + }, + "groupBy": [key, "count"], + "orderBy": {"count": "desc"}, + "limit": 10 + }) + print(f'Actual "{key}" values:', dist.data) + break + + return result +``` + + + + +```bash +# If query returns empty, enumerate actual values of the suspect field +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["CUSTOMER"], + "aggregate": { + "count": {"fn": "count", "alias": "$record"}, + "status": "$record.status" + }, + "groupBy": ["status", "count"], + "orderBy": {"count": "desc"} + }' +``` + + + + +--- + +## Step 4: Use the MCP query builder prompt in agent sessions + +RushDB's MCP server provides a `getQueryBuilderPrompt` tool that returns a system prompt enforcing ontology-first behavior. Inject it into your agent's system message. + +**In Claude or Cursor:** + +``` +Use the getQueryBuilderPrompt tool to load your operating instructions before making any queries. +``` + +**In code:** + + + + +```typescript +// For agents that call the MCP server programmatically +// The query builder prompt is returned by the getQueryBuilderPrompt MCP tool +// Inject it as system context alongside the ontology markdown + +const systemPrompt = [ + queryBuilderPrompt, // from getQueryBuilderPrompt MCP tool + '', + '## Current Schema', + schemaContext // from getOntologyMarkdown +].join('\n') +``` + + + + +```python +# Same pattern in Python — build system context from both sources +system_prompt = "\n".join([ + query_builder_prompt, # from getQueryBuilderPrompt MCP tool + "", + "## Current Schema", + schema_context # from get_ontology_markdown() +]) +``` + + + + +```bash +# No MCP server call from shell — use the REST ontology endpoint +# and combine with your own system prompt in the LLM call +``` + + + + +--- + +## The five rules for agent-safe queries + +These rules prevent the most common agent mistakes: + +1. **Always call `getOntologyMarkdown` first** — never assume label names from memory or conversation history +2. **Use only labels that appear in the ontology** — invented labels return zero results silently +3. **Use only property names that appear for those labels** — unknown properties in `where` are ignored without error, producing misleading results +4. **Enumerate categorical values before filtering** — never guess status/type/category strings +5. **Test direction before building traversal queries** — a wrong `direction` returns zero results instead of an error + +--- + +## Production caveat + +Ontology grounding is a first-call overhead: one `getOntologyMarkdown` request per agent session. For high-throughput agents that execute many queries per session, cache the ontology for the session duration and invalidate it if a query returns unexpected zero results (which may indicate a schema change mid-session). + +--- + +## Next steps + +- [Discovery Queries](./discovery-queries.mdx) — interactive schema exploration in code +- [MCP Quickstart for Real Operators](./mcp-operator-quickstart.mdx) — the same loop via MCP tools +- [Building Team Memory](./building-team-memory.mdx) — a real knowledge base for agents to query diff --git a/docs/docs/tutorials/ai-semantic-search.mdx b/docs/docs/tutorials/ai-semantic-search.mdx new file mode 100644 index 00000000..679e38b6 --- /dev/null +++ b/docs/docs/tutorials/ai-semantic-search.mdx @@ -0,0 +1,382 @@ +--- +sidebar_position: 5 +title: Semantic Search in 5 Minutes +description: Create embedding indexes, wait for backfill, and run your first semantic search query in TypeScript, Python, or REST. +tags: [AI, Search] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Semantic Search in 5 Minutes + +This tutorial walks you through the full semantic search workflow: +1. Push some records +2. Create an embedding index on a text property +3. Poll until the index is ready +4. Run semantic search +5. Run semantic search with a filter + +Prerequisites: a running RushDB instance with `RUSHDB_EMBEDDING_MODEL` configured (or RushDB Cloud with AI enabled). + +--- + +## Step 1: Push records + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB('RUSHDB_API_KEY') + +await db.records.importJson({ + label: 'Article', + data: [ + { + title: 'Intro to Machine Learning', + description: 'A beginner guide to supervised learning, neural networks, and model evaluation.', + tags: ['ml', 'beginner'] + }, + { + title: 'Graph Databases Explained', + description: 'How graph databases store relationships and why they outperform SQL for connected data.', + tags: ['databases', 'graphs'] + }, + { + title: 'Climate Science Overview', + description: 'Current research on global warming, carbon cycles, and renewable energy policy.', + tags: ['science', 'climate'] + } + ] +}) +``` + + + + +```python +from rushdb import RushDB + +db = RushDB("RUSHDB_API_KEY") + +db.records.import_json({ + "label": "Article", + "data": [ + { + "title": "Intro to Machine Learning", + "description": "A beginner guide to supervised learning, neural networks, and model evaluation.", + "tags": ["ml", "beginner"] + }, + { + "title": "Graph Databases Explained", + "description": "How graph databases store relationships and why they outperform SQL for connected data.", + "tags": ["databases", "graphs"] + }, + { + "title": "Climate Science Overview", + "description": "Current research on global warming, carbon cycles, and renewable energy policy.", + "tags": ["science", "climate"] + } + ] +}) +``` + + + + +```http +POST /api/v1/records/import/json +Content-Type: application/json +token: YOUR_API_KEY + +{ + "label": "Article", + "data": [ + { + "title": "Intro to Machine Learning", + "description": "A beginner guide to supervised learning, neural networks, and model evaluation.", + "tags": ["ml", "beginner"] + }, + { + "title": "Graph Databases Explained", + "description": "How graph databases store relationships and why they outperform SQL for connected data.", + "tags": ["databases", "graphs"] + }, + { + "title": "Climate Science Overview", + "description": "Current research on global warming, carbon cycles, and renewable energy policy.", + "tags": ["science", "climate"] + } + ] +} +``` + + + + +--- + +## Step 2: Create an embedding index + +Tell RushDB to vectorize the `description` field on `Article` records. + + + + +```typescript +const { data: index } = await db.ai.indexes.create({ + label: 'Article', + propertyName: 'description' +}) + +console.log(index.id, index.status) // e.g. 'idx_abc123', 'pending' +``` + + + + +```python +response = db.ai.indexes.create({ + "label": "Article", + "propertyName": "description" +}) +index = response.data +print(index["id"], index["status"]) # e.g. 'idx_abc123', 'pending' +``` + + + + +```http +POST /api/v1/ai/indexes +Content-Type: application/json +token: YOUR_API_KEY + +{ + "label": "Article", + "propertyName": "description" +} +``` + +Response: +```json +{ + "data": { + "id": "idx_abc123", + "label": "Article", + "propertyName": "description", + "status": "pending" + }, + "success": true +} +``` + + + + +> Attempting to create a duplicate `(label, propertyName)` pair returns `409 Conflict`. + +--- + +## Step 3: Wait for the index to become ready + +Backfill is asynchronous. Poll `stats` until `indexedRecords === totalRecords`. + + + + +```typescript +async function waitForIndex(indexId: string, intervalMs = 2000) { + while (true) { + const { data: stats } = await db.ai.indexes.stats(indexId) + console.log(`${stats.indexedRecords} / ${stats.totalRecords} embedded`) + if (stats.indexedRecords >= stats.totalRecords && stats.totalRecords > 0) break + await new Promise(r => setTimeout(r, intervalMs)) + } +} + +await waitForIndex(index.id) +``` + + + + +```python +import time + +def wait_for_index(index_id: str, interval: float = 2.0): + while True: + stats = db.ai.indexes.stats(index_id).data + print(f"{stats['indexedRecords']} / {stats['totalRecords']} embedded") + if stats["indexedRecords"] >= stats["totalRecords"] > 0: + break + time.sleep(interval) + +wait_for_index(index["id"]) +``` + + + + +```http +GET /api/v1/ai/indexes/idx_abc123/stats +token: YOUR_API_KEY +``` + +```json +{ "data": { "totalRecords": 3, "indexedRecords": 3 }, "success": true } +``` + +Poll until `indexedRecords === totalRecords`. + + + + +--- + +## Step 4: Semantic search + +RushDB always narrows candidates to the current project before ranking them by vector similarity. + + + + +```typescript +const { data: results } = await db.ai.search({ + propertyName: 'description', + query: 'neural networks and deep learning', + labels: ['Article'], + limit: 3 +}) + +for (const result of results) { + console.log(`[${result.__score.toFixed(3)}] ${result.title}`) +} +// [0.921] Intro to Machine Learning +// [0.743] Graph Databases Explained +// [0.612] Climate Science Overview +``` + + + + +```python +response = db.ai.search({ + "propertyName": "description", + "query": "neural networks and deep learning", + "labels": ["Article"], + "limit": 3 +}) + +for result in response.data: + print(f"[{result['__score']:.3f}] {result['title']}") +# [0.921] Intro to Machine Learning +# [0.743] Graph Databases Explained +# [0.612] Climate Science Overview +``` + + + + +```http +POST /api/v1/ai/search +Content-Type: application/json +token: YOUR_API_KEY + +{ + "propertyName": "description", + "query": "neural networks and deep learning", + "labels": ["Article"], + "limit": 3 +} +``` + +```json +{ + "data": [ + { "__id": "rec_1", "__label": "Article", "__score": 0.921, "title": "Intro to Machine Learning", "description": "..." }, + { "__id": "rec_2", "__label": "Article", "__score": 0.743, "title": "Graph Databases Explained", "description": "..." }, + { "__id": "rec_3", "__label": "Article", "__score": 0.612, "title": "Climate Science Overview", "description": "..." } + ], + "success": true +} +``` + + + + +--- + +## Step 5: Semantic search with filter + +Adding a `where` clause narrows the project-scoped candidate set further before cosine similarity ranking. + + + + +```typescript +const { data: results } = await db.ai.search({ + propertyName: 'description', + query: 'renewable energy and climate', + labels: ['Article'], + where: { + tags: { $in: ['science', 'climate'] } + }, + limit: 5 +}) + +for (const result of results) { + console.log(`[${result.__score.toFixed(3)}] ${result.title}`) +} +// Only Articles with tags science or climate are considered +``` + + + + +```python +response = db.ai.search({ + "propertyName": "description", + "query": "renewable energy and climate", + "labels": ["Article"], + "where": { + "tags": {"$in": ["science", "climate"]} + }, + "limit": 5 +}) + +for result in response.data: + print(f"[{result['__score']:.3f}] {result['title']}") +``` + + + + +```http +POST /api/v1/ai/search +Content-Type: application/json +token: YOUR_API_KEY + +{ + "propertyName": "description", + "query": "renewable energy and climate", + "labels": ["Article"], + "where": { + "tags": { "$in": ["science", "climate"] } + }, + "limit": 5 +} +``` + + + + +--- + +## Next steps + +- **Inject schema context into an LLM**: [TypeScript AI docs](../typescript-sdk/ai.md) | [Python AI docs](../python-sdk/ai.md) +- **REST API reference**: [REST AI docs](../rest-api/ai.md) +- **Multiple labels**: Pass 2+ labels in `labels` to search across all specified labels. diff --git a/docs/docs/tutorials/audit-trails.mdx b/docs/docs/tutorials/audit-trails.mdx new file mode 100644 index 00000000..60a58f0d --- /dev/null +++ b/docs/docs/tutorials/audit-trails.mdx @@ -0,0 +1,476 @@ +--- +sidebar_position: 24 +title: "Audit Trails with Immutable Events and Derived State" +description: Log business events as immutable records separate from current state so teams can reconstruct what happened, not just what is true now. +tags: [Lineage, Audit, Transactions, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Audit Trails with Immutable Events and Derived State + +Current-state records are useful for answering "what is true now?". They are not useful for answering "what happened, and when, and who did it?" + +An audit trail separates the two concerns. Events are immutable — append-only records that capture intent, actor, and timestamp. The current state record is derived from events but is mutable. Both live in the graph; only events form the log. + +--- + +## Graph shape + +```mermaid +graph LR + ENTITY[ENTITY
Current state] -->|HAS_EVENT| EVENT_1[EVENT
created] + ENTITY -->|HAS_EVENT| EVENT_2[EVENT
status_changed] + ENTITY -->|HAS_EVENT| EVENT_3[EVENT
field_updated] + EVENT_2 -->|PERFORMED_BY| ACTOR[USER or SERVICE] + EVENT_3 -->|PERFORMED_BY| ACTOR +``` + +| Label | What it represents | +|---|---| +| `ENTITY` | The mutable record reflecting current state | +| `EVENT` | An immutable fact: what changed, when, and who triggered it | +| `ACTOR` | The user or service that performed the action | + +--- + +## Step 1: Create an entity with its first event atomically + +Use a transaction to guarantee entity creation and the corresponding `created` event land together or not at all. + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +async function createOrderWithAudit( + orderData: Record, + actorId: string +) { + const tx = await db.tx.begin() + + try { + const order = await db.records.create( + { label: 'ORDER', data: { ...orderData, status: 'pending' } }, + tx + ) + + const event = await db.records.create( + { + label: 'EVENT', + data: { + type: 'created', + actorId, + entityId: order.__id, + occurredAt: new Date().toISOString(), + snapshot: JSON.stringify(orderData) + } + }, + tx + ) + + await db.records.attach( + { source: order, target: event, options: { type: 'HAS_EVENT', direction: 'out' } }, + tx + ) + + await db.tx.commit(tx) + return order + } catch (err) { + await db.tx.rollback(tx) + throw err + } +} + +const order = await createOrderWithAudit( + { customerId: 'cust-42', amount: 299.99, currency: 'USD' }, + 'user-101' +) +``` + + + + +```python +import json, os +from datetime import datetime, timezone +from rushdb import RushDB + +db = RushDB(os.environ["RUSHDB_API_KEY"], base_url="https://api.rushdb.com/api/v1") + + +def create_order_with_audit(order_data: dict, actor_id: str): + tx = db.transactions.begin() + try: + order = db.records.create("ORDER", {**order_data, "status": "pending"}, transaction=tx) + + event = db.records.create("EVENT", { + "type": "created", + "actorId": actor_id, + "entityId": order.id, + "occurredAt": datetime.now(timezone.utc).isoformat(), + "snapshot": json.dumps(order_data) + }, transaction=tx) + + db.records.attach(order.id, event.id, {"type": "HAS_EVENT", "direction": "out"}, transaction=tx) + + db.transactions.commit(tx) + return order + except Exception as e: + db.transactions.rollback(tx) + raise + + +order = create_order_with_audit( + {"customerId": "cust-42", "amount": 299.99, "currency": "USD"}, + "user-101" +) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +# Begin transaction +TX_ID=$(curl -s -X POST "$BASE/tx" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + | jq -r '.data.id') + +# Create order +ORDER_RESP=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -H "x-transaction-id: $TX_ID" \ + -d '{"label":"ORDER","data":{"customerId":"cust-42","amount":299.99,"currency":"USD","status":"pending"}}') +ORDER_ID=$(echo "$ORDER_RESP" | jq -r '.data.__id') + +# Create event +EVENT_RESP=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -H "x-transaction-id: $TX_ID" \ + -d "{\"label\":\"EVENT\",\"data\":{\"type\":\"created\",\"actorId\":\"user-101\",\"entityId\":\"$ORDER_ID\",\"occurredAt\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}}") +EVENT_ID=$(echo "$EVENT_RESP" | jq -r '.data.__id') + +# Link +curl -s -X POST "$BASE/records/$ORDER_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -H "x-transaction-id: $TX_ID" \ + -d "{\"targets\":[\"$EVENT_ID\"],\"options\":{\"type\":\"HAS_EVENT\",\"direction\":\"out\"}}" + +# Commit +curl -s -X POST "$BASE/tx/$TX_ID/commit" \ + -H "$H" -H "Authorization: Bearer $TOKEN" +``` + + + + +--- + +## Step 2: Record a state change with its audit event + +When the order status changes, update the entity and append a new EVENT — all in one transaction. + + + + +```typescript +async function changeStatus( + orderId: string, + newStatus: string, + actorId: string, + reason?: string +) { + // Read current state first (outside transaction — read-then-write pattern) + const current = await db.records.find({ + labels: ['ORDER'], + where: { __id: orderId } + }) + const prevStatus = current.data[0]?.status + + const tx = await db.tx.begin() + try { + await db.records.update(orderId, { status: newStatus }, tx) + + const event = await db.records.create( + { + label: 'EVENT', + data: { + type: 'status_changed', + actorId, + entityId: orderId, + from: prevStatus, + to: newStatus, + reason: reason ?? null, + occurredAt: new Date().toISOString() + } + }, + tx + ) + + await db.records.attach( + { + source: current.data[0], + target: event, + options: { type: 'HAS_EVENT', direction: 'out' } + }, + tx + ) + + await db.tx.commit(tx) + } catch (err) { + await db.tx.rollback(tx) + throw err + } +} + +await changeStatus(order.__id, 'shipped', 'service-fulfillment', 'Dispatched from warehouse') +``` + + + + +```python +def change_status(order_id: str, new_status: str, actor_id: str, reason: str | None = None): + current = db.records.find({"labels": ["ORDER"], "where": {"__id": order_id}}) + prev_status = current.data[0].data.get("status") if current.data else None + + tx = db.transactions.begin() + try: + db.records.update(order_id, {"status": new_status}, transaction=tx) + + event = db.records.create("EVENT", { + "type": "status_changed", + "actorId": actor_id, + "entityId": order_id, + "from": prev_status, + "to": new_status, + "reason": reason, + "occurredAt": datetime.now(timezone.utc).isoformat() + }, transaction=tx) + + db.records.attach( + current.data[0].id, + event.id, + {"type": "HAS_EVENT", "direction": "out"}, + transaction=tx + ) + db.transactions.commit(tx) + except Exception as e: + db.transactions.rollback(tx) + raise + + +change_status(order.id, "shipped", "service-fulfillment", "Dispatched from warehouse") +``` + + + + +```bash +TX_ID=$(curl -s -X POST "$BASE/tx" \ + -H "$H" -H "Authorization: Bearer $TOKEN" | jq -r '.data.id') + +# Update entity +curl -s -X PATCH "$BASE/records/$ORDER_ID" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -H "x-transaction-id: $TX_ID" \ + -d '{"status":"shipped"}' + +# Create event +EVENT_RESP=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -H "x-transaction-id: $TX_ID" \ + -d "{\"label\":\"EVENT\",\"data\":{\"type\":\"status_changed\",\"from\":\"pending\",\"to\":\"shipped\",\"actorId\":\"service-fulfillment\",\"entityId\":\"$ORDER_ID\",\"occurredAt\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}}") +EVENT_ID=$(echo "$EVENT_RESP" | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$ORDER_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -H "x-transaction-id: $TX_ID" \ + -d "{\"targets\":[\"$EVENT_ID\"],\"options\":{\"type\":\"HAS_EVENT\",\"direction\":\"out\"}}" + +curl -s -X POST "$BASE/tx/$TX_ID/commit" \ + -H "$H" -H "Authorization: Bearer $TOKEN" +``` + + + + +--- + +## Step 3: Query the full event history + +Retrieve all events for an entity ordered by occurrence time. + + + + +```typescript +const history = await db.records.find({ + labels: ['EVENT'], + where: { + ORDER: { + $relation: { type: 'HAS_EVENT', direction: 'in' }, + __id: order.__id + } + }, + orderBy: { occurredAt: 'asc' } +}) + +for (const event of history.data) { + console.log(`[${event.occurredAt}] ${event.type} by ${event.actorId}`) + if (event.from) { + console.log(` ${event.from} → ${event.to}`) + } +} +``` + + + + +```python +history = db.records.find({ + "labels": ["EVENT"], + "where": { + "ORDER": { + "$relation": {"type": "HAS_EVENT", "direction": "in"}, + "__id": order.id + } + }, + "orderBy": {"occurredAt": "asc"} +}) + +for event in history.data: + print(f"[{event.data.get('occurredAt')}] {event.data.get('type')} by {event.data.get('actorId')}") + if event.data.get("from"): + print(f" {event.data['from']} → {event.data['to']}") +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{ + \"labels\": [\"EVENT\"], + \"where\": { + \"ORDER\": { + \"\$relation\": {\"type\": \"HAS_EVENT\", \"direction\": \"in\"}, + \"__id\": \"$ORDER_ID\" + } + }, + \"orderBy\": {\"occurredAt\": \"asc\"} + }" +``` + + + + +--- + +## Step 4: Aggregate events for a compliance report + +Count events by type and actor across a time window. + + + + +```typescript +const complianceReport = await db.records.find({ + labels: ['EVENT'], + where: { + occurredAt: { $gte: '2025-01-01', $lte: '2025-03-31' }, + type: 'status_changed' + }, + aggregate: { + count: { fn: 'count', alias: '$record' }, + actorId: '$record.actorId' + }, + groupBy: ['actorId', 'count'], + orderBy: { count: 'desc' } +}) + +console.log('State changes by actor (Q1):') +for (const row of complianceReport.data) { + console.log(` ${row.actorId}: ${row.count}`) +} +``` + + + + +```python +report = db.records.find({ + "labels": ["EVENT"], + "where": { + "occurredAt": {"$gte": "2025-01-01", "$lte": "2025-03-31"}, + "type": "status_changed" + }, + "aggregate": { + "count": {"fn": "count", "alias": "$record"}, + "actorId": "$record.actorId" + }, + "groupBy": ["actorId", "count"], + "orderBy": {"count": "desc"} +}) + +print("State changes by actor (Q1):") +for row in report.data: + print(f" {row.data.get('actorId')}: {row.data.get('count')}") +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["EVENT"], + "where": { + "occurredAt": {"$gte": "2025-01-01", "$lte": "2025-03-31"}, + "type": "status_changed" + }, + "aggregate": { + "count": {"fn": "count", "alias": "$record"}, + "actorId": "$record.actorId" + }, + "groupBy": ["actorId", "count"], + "orderBy": {"count": "desc"} + }' +``` + + + + +--- + +## Design rules for immutable audit trails + +1. **Never update or delete EVENT records** — they are the immutable log; treat them as write-once +2. **Always write entity update + event in a single transaction** — no partial audit trails +3. **Store `from` and `to` on every state-change event** — makes reconstruction possible without replaying all prior events +4. **Store `actorId` on every event** — automated services have service IDs, not just human users +5. **Store `occurredAt` as an ISO 8601 string** — enables `$gte`/`$lte` filtering on dates + +--- + +## Production caveat + +Audit trails grow with every write. For high-write systems (thousands of events per hour), plan for periodic archival of old events. A safe pattern: copy events older than 90 days into a separate RushDB project (`archive-{year}`) and mark them as archived in the source project. This preserves queryability while keeping the primary project lean. + +--- + +## Next steps + +- [Versioning Records Without Losing Queryability](./versioning-records.mdx) — complement to audit trails +- [Compliance and Retention Patterns](./compliance-retention.mdx) — expiration, archival, and redaction +- [Temporal Graphs](./temporal-graphs.mdx) — point-in-time reconstruction from state chains diff --git a/docs/docs/tutorials/building-team-memory.mdx b/docs/docs/tutorials/building-team-memory.mdx new file mode 100644 index 00000000..abaa276e --- /dev/null +++ b/docs/docs/tutorials/building-team-memory.mdx @@ -0,0 +1,563 @@ +--- +sidebar_position: 21 +title: "Building Team Memory for Product and Support Workflows" +description: Ingest tickets, docs, decisions, incidents, and feature requests into a connected graph so your team can retrieve context instead of isolated documents. +tags: [Memory, Agents, AI Search, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Building Team Memory for Product and Support Workflows + +A team accumulates context constantly: decisions made in Slack, bugs filed in Linear, docs written in Notion, post-mortems filed after incidents. That knowledge is scattered across tools and largely unretrievable — until someone needs it. + +This tutorial shows how to build a connected team memory graph in RushDB where tickets, decisions, incidents, docs, and feature requests are first-class nodes, linked by causal and referential relationships. Once the graph exists, you can retrieve connected context instead of isolated documents. + +--- + +## Graph shape + +```mermaid +graph LR + TICKET[TICKET
Bug / support request] -->|CAUSED_BY| INCIDENT + TICKET -->|RESOLVED_BY| DECISION + DECISION -->|DOCUMENTED_IN| DOC + FEATURE[FEATURE_REQUEST] -->|TRACKED_IN| TICKET + INCIDENT -->|TRIGGERED| ALERT + DOC -->|REFERENCES| DOC + DECISION -->|SUPERSEDES| DECISION +``` + +The key labels are: + +| Label | What it represents | +|---|---| +| `TICKET` | A bug report, support ticket, or task | +| `DECISION` | An architectural or product decision, ADR-style | +| `INCIDENT` | A production incident or outage | +| `DOC` | A piece of documentation, runbook, post-mortem, or RFC | +| `FEATURE_REQUEST` | A request from customers, research, or internal feedback | +| `ALERT` | A monitoring alert that triggered during an incident | + +--- + +## Step 1: Ingest existing tickets and docs + +Start with a bulk import. Shape each entry before writing — assign `status`, `category`, and `createdAt` while the structure is fresh. + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +await db.records.importJson({ + label: 'TICKET', + data: [ + { + externalId: 'TICKET-1001', + title: 'Login fails when SSO is enabled', + status: 'open', + category: 'auth', + severity: 'high', + source: 'linear', + createdAt: '2025-03-01' + }, + { + externalId: 'TICKET-1002', + title: 'Dashboard crashes on date range filter', + status: 'resolved', + category: 'ui', + severity: 'medium', + source: 'linear', + createdAt: '2025-03-05' + } + ] +}) + +await db.records.importJson({ + label: 'DOC', + data: [ + { + externalId: 'DOC-007', + title: 'SSO Integration Architecture', + docType: 'adr', + status: 'accepted', + createdAt: '2025-01-10' + }, + { + externalId: 'DOC-201', + title: 'Dashboard Date Filter Postmortem', + docType: 'postmortem', + status: 'published', + createdAt: '2025-03-07' + } + ] +}) +``` + + + + +```python +from rushdb import RushDB +import os + +db = RushDB(os.environ["RUSHDB_API_KEY"], base_url="https://api.rushdb.com/api/v1") + +db.records.import_json({ + "label": "TICKET", + "data": [ + { + "externalId": "TICKET-1001", + "title": "Login fails when SSO is enabled", + "status": "open", + "category": "auth", + "severity": "high", + "source": "linear", + "createdAt": "2025-03-01" + }, + { + "externalId": "TICKET-1002", + "title": "Dashboard crashes on date range filter", + "status": "resolved", + "category": "ui", + "severity": "medium", + "source": "linear", + "createdAt": "2025-03-05" + } + ] +}) + +db.records.import_json({ + "label": "DOC", + "data": [ + { + "externalId": "DOC-007", + "title": "SSO Integration Architecture", + "docType": "adr", + "status": "accepted", + "createdAt": "2025-01-10" + }, + { + "externalId": "DOC-201", + "title": "Dashboard Date Filter Postmortem", + "docType": "postmortem", + "status": "published", + "createdAt": "2025-03-07" + } + ] +}) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +curl -s -X POST "$BASE/records/import/json" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "label": "TICKET", + "data": [ + {"externalId":"TICKET-1001","title":"Login fails when SSO is enabled","status":"open","category":"auth","severity":"high","source":"linear","createdAt":"2025-03-01"}, + {"externalId":"TICKET-1002","title":"Dashboard crashes on date range filter","status":"resolved","category":"ui","severity":"medium","source":"linear","createdAt":"2025-03-05"} + ] + }' +``` + + + + +--- + +## Step 2: Link related records + +After ingestion, fetch the records by their external IDs and attach causal and reference relationships. + + + + +```typescript +// Fetch records by externalId +const [ticketResult, docResult] = await Promise.all([ + db.records.find({ labels: ['TICKET'], where: { externalId: 'TICKET-1002' } }), + db.records.find({ labels: ['DOC'], where: { externalId: 'DOC-201' } }) +]) + +const ticket = ticketResult.data[0] +const doc = docResult.data[0] + +// TICKET --RESOLVED_BY--> DOC (postmortem) +await db.records.attach({ + source: ticket, + target: doc, + options: { type: 'RESOLVED_BY', direction: 'out' } +}) + +// Also link the SSO ticket to the SSO ADR doc +const [ssoTicket, ssoDoc] = await Promise.all([ + db.records.find({ labels: ['TICKET'], where: { externalId: 'TICKET-1001' } }), + db.records.find({ labels: ['DOC'], where: { externalId: 'DOC-007' } }) +]) + +await db.records.attach({ + source: ssoTicket.data[0], + target: ssoDoc.data[0], + options: { type: 'REFERENCES', direction: 'out' } +}) +``` + + + + +```python +ticket_result = db.records.find({"labels": ["TICKET"], "where": {"externalId": "TICKET-1002"}}) +doc_result = db.records.find({"labels": ["DOC"], "where": {"externalId": "DOC-201"}}) + +ticket = ticket_result.data[0] +doc = doc_result.data[0] + +db.records.attach(ticket.id, doc.id, {"type": "RESOLVED_BY", "direction": "out"}) + +# SSO ticket → SSO ADR +sso_ticket = db.records.find({"labels": ["TICKET"], "where": {"externalId": "TICKET-1001"}}).data[0] +sso_doc = db.records.find({"labels": ["DOC"], "where": {"externalId": "DOC-007"}}).data[0] + +db.records.attach(sso_ticket.id, sso_doc.id, {"type": "REFERENCES", "direction": "out"}) +``` + + + + +```bash +# Fetch ticket and doc IDs first +TICKET_ID=$(curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["TICKET"],"where":{"externalId":"TICKET-1002"}}' \ + | jq -r '.data[0].__id') + +DOC_ID=$(curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["DOC"],"where":{"externalId":"DOC-201"}}' \ + | jq -r '.data[0].__id') + +# Attach relationship +curl -s -X POST "$BASE/records/$TICKET_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$DOC_ID\"],\"options\":{\"type\":\"RESOLVED_BY\",\"direction\":\"out\"}}" +``` + + + + +--- + +## Step 3: Query connected context around a ticket + +Now retrieve all context relevant to an open ticket in one query. + + + + +```typescript +// All open auth tickets that REFERENCES an ADR +const authTicketsWithDocs = await db.records.find({ + labels: ['TICKET'], + where: { + status: 'open', + category: 'auth', + DOC: { + $relation: { type: 'REFERENCES', direction: 'out' } + } + } +}) + +for (const ticket of authTicketsWithDocs.data) { + console.log(`${ticket.externalId}: ${ticket.title}`) +} +``` + + + + +```python +auth_tickets_with_docs = db.records.find({ + "labels": ["TICKET"], + "where": { + "status": "open", + "category": "auth", + "DOC": { + "$relation": {"type": "REFERENCES", "direction": "out"} + } + } +}) + +for ticket in auth_tickets_with_docs.data: + print(f"{ticket.data.get('externalId')}: {ticket.data.get('title')}") +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["TICKET"], + "where": { + "status": "open", + "category": "auth", + "DOC": { + "$relation": {"type": "REFERENCES", "direction": "out"} + } + } + }' +``` + + + + +--- + +## Step 4: Semantic search over team knowledge + +Enable semantic search on the `title` property of TICKET and DOC records to find related context when the exact terms are unknown. + + + + +```typescript +// First create an index — run once, not on every query +await db.ai.indexes.create({ + label: 'TICKET', + propertyName: 'title' +}) + +await db.ai.indexes.create({ + label: 'DOC', + propertyName: 'title' +}) + +// Semantic search across both TICKET and DOC titles +const related = await db.ai.search({ + query: 'authentication failure after config change', + propertyName: 'title', + labels: ['TICKET', 'DOC'] +}) + +for (const item of related.data) { + console.log(`[${item.__labels}] ${item.title} — score: ${item.__score.toFixed(3)}`) +} +``` + + + + +```python +# Create indexes once +db.ai.indexes.create({"label": "TICKET", "propertyName": "title"}) +db.ai.indexes.create({"label": "DOC", "propertyName": "title"}) + +# Search +related = db.ai.search({ + "query": "authentication failure after config change", + "propertyName": "title", + "labels": ["TICKET", "DOC"] +}) + +for item in related.data: + print(f"[{item.__labels}] {item.data.get('title')} — score: {item.__score:.3f}") +``` + + + + +```bash +# Create indexes (run once) +curl -s -X POST "$BASE/ai/indexes" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"TICKET","propertyName":"title"}' + +curl -s -X POST "$BASE/ai/indexes" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"DOC","propertyName":"title"}' + +# Search +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "query": "authentication failure after config change", + "propertyName": "title", + "labels": ["TICKET", "DOC"] + }' +``` + + + + +--- + +## Step 5: Retrieve connected context for an agent prompt + +When an agent needs to answer "what do we know about the SSO bug?", retrieve connected nodes and assemble a compact context block. + + + + +```typescript +async function getTicketContext(externalId: string): Promise { + // 1. Find the ticket + const ticketResult = await db.records.find({ + labels: ['TICKET'], + where: { externalId } + }) + const ticket = ticketResult.data[0] + if (!ticket) return 'Ticket not found.' + + // 2. Find directly referenced docs + const docs = await db.records.find({ + labels: ['DOC'], + where: { + TICKET: { + $relation: { type: 'REFERENCES', direction: 'in' }, + __id: ticket.__id + } + } + }) + + // 3. Find semantically similar tickets + const similarTickets = await db.ai.search({ + query: ticket.title as string, + propertyName: 'title', + labels: ['TICKET'], + where: { status: 'resolved' }, + limit: 3 + }) + + // 4. Assemble context + const lines: string[] = [ + `## Ticket: ${ticket.externalId} — ${ticket.title}`, + `Status: ${ticket.status} | Category: ${ticket.category} | Severity: ${ticket.severity}`, + '', + '### Referenced Documents' + ] + for (const doc of docs.data) { + lines.push(`- [${doc.docType}] ${doc.title}`) + } + lines.push('') + lines.push('### Similar Resolved Tickets') + for (const t of similarTickets.data) { + lines.push(`- ${t.externalId}: ${t.title} (score: ${t.__score.toFixed(2)})`) + } + + return lines.join('\n') +} + +const context = await getTicketContext('TICKET-1001') +console.log(context) +``` + + + + +```python +def get_ticket_context(external_id: str) -> str: + ticket_result = db.records.find({ + "labels": ["TICKET"], + "where": {"externalId": external_id} + }) + if not ticket_result.data: + return "Ticket not found." + ticket = ticket_result.data[0] + + docs = db.records.find({ + "labels": ["DOC"], + "where": { + "TICKET": { + "$relation": {"type": "REFERENCES", "direction": "in"}, + "__id": ticket.id + } + } + }) + + similar = db.ai.search({ + "query": ticket.data.get("title", ""), + "propertyName": "title", + "labels": ["TICKET"], + "where": {"status": "resolved"}, + "limit": 3 + }) + + lines = [ + f"## Ticket: {ticket.data['externalId']} — {ticket.data['title']}", + f"Status: {ticket.data.get('status')} | Category: {ticket.data.get('category')}", + "", + "### Referenced Documents" + ] + for doc in docs.data: + lines.append(f"- [{doc.data.get('docType')}] {doc.data.get('title')}") + lines.append("") + lines.append("### Similar Resolved Tickets") + for t in similar.data: + lines.append(f"- {t.data.get('externalId')}: {t.data.get('title')} (score: {t.__score:.2f})") + + return "\n".join(lines) + + +print(get_ticket_context("TICKET-1001")) +``` + + + + +```bash +# Get ticket +TICKET_ID=$(curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["TICKET"],"where":{"externalId":"TICKET-1001"}}' \ + | jq -r '.data[0].__id') + +# Get referenced docs +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"labels\":[\"DOC\"],\"where\":{\"TICKET\":{\"\$relation\":{\"type\":\"REFERENCES\",\"direction\":\"in\"},\"__id\":\"$TICKET_ID\"}}}" + +# Semantic similar +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"query":"Login fails when SSO is enabled","propertyName":"title","labels":["TICKET"],"where":{"status":"resolved"},"limit":3}' +``` + + + + +--- + +## What to add next + +Once the base graph is established, enrich it incrementally: + +- **Webhooks from Linear/GitHub/PagerDuty** — auto-create TICKET and INCIDENT records on event (see [Event-Driven Ingestion](./event-driven-ingestion.mdx)) +- **Decisions as first-class nodes** — when a team makes a decision, create a DECISION record and link it to the TICKET or INCIDENT it resolved +- **FEATURE_REQUEST linked to TICKET** — when a customer request drives a bug fix or feature, link the FEATURE_REQUEST to the TICKET so the team can see customer impact upstream of every change + +--- + +## Production caveat + +Team memory graphs grow unboundedly unless pruned. Define a retention policy for closed tickets and resolved incidents older than a threshold (90 days, 1 year) and archive them with a status update rather than deleting — relationships to other nodes remain valid for historical queries even when the original record is archived. + +--- + +## Next steps + +- [Agent-Safe Query Planning with Ontology First](./agent-safe-query-planning.mdx) — run agents over this memory graph safely +- [Episodic Memory for Multi-Step Agents](./episodic-memory.mdx) — session-scoped context alongside persistent team memory +- [Data Lineage](./data-lineage.mdx) — trace decisions back to source data diff --git a/docs/docs/tutorials/byoc-vs-managed.mdx b/docs/docs/tutorials/byoc-vs-managed.mdx new file mode 100644 index 00000000..6fb2f8bd --- /dev/null +++ b/docs/docs/tutorials/byoc-vs-managed.mdx @@ -0,0 +1,278 @@ +--- +sidebar_position: 38 +title: "BYOC vs Managed vs Self-Hosted" +description: Compare RushDB's three deployment models — Managed, BYOC (Bring Your Own Cloud), and Self-Hosted — with a feature matrix, decision guide, and step-by-step migration path. +tags: [Deployment, BYOC, Neo4j, Self-Hosted] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# BYOC vs Managed vs Self-Hosted + +RushDB offers three deployment models. This page explains the trade-offs so you can pick the right architecture from the start — or understand what's involved in moving between them. + +--- + +## Three deployment models + +| | **Managed** | **BYOC** | **Self-Hosted** | +|---|---|---|---| +| **Who runs RushDB** | RushDB Cloud | RushDB Cloud | You | +| **Your Neo4j instance** | RushDB-managed | Yours (Aura or local) | Yours | +| **Data residency** | RushDB infrastructure | Your chosen region | Your infrastructure | +| **Setup time** | < 1 minute | ~15 minutes | 30–60 minutes | +| **Ops burden** | None | Minimal | Full | +| **Raw Cypher access** | ✗ | ✓ | ✓ | +| **Custom embedding endpoint** | ✗ | ✗ | ✓ | +| **Managed billing** | RushDB plans | RushDB plans + Neo4j costs | Infrastructure costs only | +| **Backups** | Automatic | Your responsibility | Your responsibility | +| **SLA / uptime** | RushDB SLA | RushDB SLA (RushDB layer) | DIY | +| **Compliance (SOC 2, HIPAA)** | Contact sales | Achievable | Achievable | + +--- + +## When to choose each + +### Managed — start here + +**Choose Managed when:** +- You need to be working in minutes with no infrastructure setup. +- Your data residency and compliance requirements are flexible. +- You'd rather not think about Neo4j capacity planning. +- You're prototyping, running a hobby project, or in early-stage. + +```js +// Nothing to configure — just use the cloud API key +import RushDB from '@rushdb/javascript-sdk' +const db = new RushDB('rbk_xxxxxxxx') +``` + +--- + +### BYOC — bring your own Neo4j + +**Choose BYOC when:** +- You already have a Neo4j Aura instance (or plan to). +- You need raw Cypher access for reporting or migrations. +- You want your graph data to stay within your cloud account or region. +- You want RushDB's API layer managed for you but want control over the graph store. + +BYOC gives you a RushDB project that connects to your Neo4j credentials. See [Connect an Aura Instance](./connect-aura-instance) for the full setup guide. + +```js +// Same SDK interface — BYOC is transparent to application code +import RushDB from '@rushdb/javascript-sdk' +const db = new RushDB('rbk_your_byoc_project_key') +``` + +**Feature unlocked — raw Cypher:** +```bash +curl -X POST https://api.rushdb.com/api/v1/query/raw \ + -H "Authorization: Bearer rbk_your_byoc_project_key" \ + -H "Content-Type: application/json" \ + -d '{"query": "MATCH (n) RETURN count(n) AS total"}' +``` + +--- + +### Self-Hosted — full control + +**Choose Self-Hosted when:** +- Your data must not leave your own infrastructure. +- You need custom embedding endpoints (open-source models, private model servers). +- You want to customize deployment topology (multi-region, air-gapped, private VPC). +- Regulatory requirements mandate on-premises hosting. + +See the [Deployment guide](./deployment) for the complete Docker Compose walkthrough. + +```js +import RushDB from '@rushdb/javascript-sdk' +const db = new RushDB( + process.env.RUSHDB_API_KEY, + { url: process.env.RUSHDB_API_URL } // points to your own instance +) +``` + +--- + +## Feature gate reference + +The following features are gated by deployment model: + +| Feature | Managed | BYOC | Self-Hosted | +|---|---|---|---| +| `POST /api/v1/query/raw` (raw Cypher) | ✗ | ✓ | ✓ | +| Custom embedding base URL | ✗ | ✗ | ✓ | +| External embedding indexes (BYOV) | ✓ | ✓ | ✓ | +| Dashboard access | ✓ | ✓ | ✓ | +| API key management | ✓ | ✓ | ✓ | +| Rate limiter configuration | ✗ | ✗ | ✓ | + +--- + +## Migrating between models + +### Managed → BYOC + +Managed and BYOC share the same RushDB API layer. There is no direct data export between Neo4j instances since BYOC uses *your* Neo4j that you provision separately. The migration flow is: + +**1. Document your data shape** + +Use `getOntologyMarkdown()` to capture your record labels, property names, and relationship types before migrating: + + + + +```js +const ontology = await db.getOntologyMarkdown() +// Save this as a snapshot of your schema +console.log(ontology) +``` + + + + +```python +ontology = db.get_ontology_markdown() +print(ontology) +``` + + + + +**2. Export your data** + +Use `records.find()` with pagination to export records by label: + + + + +```js +const PAGE_SIZE = 500 +let skip = 0 +const allRecords = [] + +while (true) { + const { data, total } = await db.records.find({ + labels: ['Product'], + skip, + limit: PAGE_SIZE + }) + allRecords.push(...data) + if (allRecords.length >= total) break + skip += PAGE_SIZE +} + +// Serialize for re-import +const payload = allRecords.map(r => ({ + __label: 'Product', + ...r +})) +``` + + + + +```python +PAGE_SIZE = 500 +skip = 0 +all_records = [] + +while True: + result = db.records.find( + labels=['Product'], + skip=skip, + limit=PAGE_SIZE + ) + all_records.extend(result.data) + if len(all_records) >= result.total: + break + skip += PAGE_SIZE +``` + + + + +**3. Create a BYOC project** + +In the [RushDB Dashboard](https://app.rushdb.com), create a new project using "Use my own Neo4j instance" and follow the [BYOC setup guide](./connect-aura-instance). + +**4. Re-import into the BYOC project** + +Point your SDK at the new API key and re-ingest: + + + + +```js +const byocDb = new RushDB('rbk_byoc_project_key') + +await byocDb.records.importJson({ + label: 'Product', + data: payload +}) +``` + + + + +```python +byoc_db = RushDB('rbk_byoc_project_key') + +byoc_db.records.import_json( + label='Product', + data=payload +) +``` + + + + +--- + +### BYOC / Managed → Self-Hosted + +Self-hosted uses the same REST API surface as the cloud. The migration steps are: + +1. [Deploy your self-hosted instance](./deployment) with `RUSHDB_SELF_HOSTED=true`. +2. Create a project and API key via the self-hosted dashboard at `http://your-host:3000`. +3. Export records from the cloud using the pagination pattern above. +4. Point the SDK at your self-hosted URL and re-import. + +```js +const selfHostedDb = new RushDB( + process.env.SELFHOSTED_API_KEY, + { url: 'https://rushdb.your-company.com/api/v1' } +) +``` + +--- + +### Self-Hosted → Managed + +The same export/re-import pattern applies. In addition, you can leverage raw Cypher on self-hosted to bulk-export complex relationship graphs before re-ingesting them via `importJson` on managed. + +--- + +## Quick decision tree + +``` +Do you need data to stay on your own infrastructure? + └─ Yes → Self-Hosted + +Do you already have a Neo4j instance, or need raw Cypher? + └─ Yes → BYOC + +Starting fresh with no ops requirements? + └─ Yes → Managed +``` + +--- + +## Next steps + +- **Get started with Managed** — [Quickstart](../get-started/quickstart) +- **Set up BYOC** — [Connect an Aura Instance](./connect-aura-instance) +- **Deploy self-hosted** — [Deployment](./deployment) +- **Manage projects post-deploy** — [Self-Hosted Project Setup](./self-hosted-project-setup) diff --git a/docs/docs/tutorials/byov-external-embeddings.mdx b/docs/docs/tutorials/byov-external-embeddings.mdx new file mode 100644 index 00000000..3a68c24f --- /dev/null +++ b/docs/docs/tutorials/byov-external-embeddings.mdx @@ -0,0 +1,308 @@ +--- +sidebar_position: 35 +title: "Bring Your Own Vectors (BYOV) — External Embeddings" +description: Use your own embedding model to generate vectors and store them in RushDB, then search with queryVector instead of query text. +tags: [AI, BYOV, Embeddings, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Bring Your Own Vectors (BYOV) — External Embeddings + +By default, RushDB generates embeddings for you using the configured server-side embedding model. With BYOV you generate the vectors yourself — using any model, any provider, even a locally fine-tuned one — and push them alongside your records. RushDB stores and indexes them, and you search with a pre-computed `queryVector` instead of a raw query string. + +Use BYOV when: +- You have a fine-tuned or domain-specific embedding model +- Your compliance requirements prohibit sending raw text to a third-party embedding API +- You want to co-locate embedding cost with your own infrastructure + +--- + +## How it differs from managed embeddings + +| | Managed | BYOV (external) | +|---|---|---| +| Who generates vectors | RushDB (server-side) | You (client-side) | +| Search parameter | `query: "text"` | `queryVector: number[]` | +| Index `sourceType` | `managed` (default) | `external` | +| Shorthand | — | `external: true` | +| Dimensions | Set by server `RUSHDB_EMBEDDING_DIMENSIONS` | Set per index by you | + +Both index types can coexist on the same label and property. + +--- + +## Step 1: Create an external embedding index + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +await db.ai.indexes.create({ + label: 'ARTICLE', + propertyName: 'body', + external: true, // shorthand for sourceType: 'external' + similarityFunction: 'cosine', + dimensions: 1536 // must match your model's output dimensions +}) +``` + + + + +```python +from rushdb import RushDB +import os + +db = RushDB(os.environ['RUSHDB_API_KEY'], base_url='https://api.rushdb.com/api/v1') + +db.ai.indexes.create({ + 'label': 'ARTICLE', + 'propertyName': 'body', + 'external': True, # shorthand for sourceType: 'external' + 'similarityFunction': 'cosine', + 'dimensions': 1536 +}) +``` + + + + +```bash +curl -X POST https://api.rushdb.com/api/v1/ai/indexes \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "label": "ARTICLE", + "propertyName": "body", + "sourceType": "external", + "similarityFunction": "cosine", + "dimensions": 1536 + }' +``` + + + + +An external index starts in `awaiting_vectors` status — it has no backfill work to do because RushDB never calls your embedding provider. It becomes `ready` as soon as vectors are stored. + +--- + +## Step 2: Push records with inline vectors + +Use the `$vectors` key inside each record in an `importJson` batch to deliver embeddings alongside the data in one call. + + + + +```typescript +// Your embedding function — swap with any provider +async function embed(text: string): Promise { + const res = await fetch('https://api.openai.com/v1/embeddings', { + method: 'POST', + headers: { 'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`, 'Content-Type': 'application/json' }, + body: JSON.stringify({ model: 'text-embedding-3-small', input: text }) + }) + const json = await res.json() + return json.data[0].embedding +} + +const articles = [ + { title: 'Intro to Graph Databases', body: 'Graph databases store data as nodes and edges...' }, + { title: 'Vector Search Explained', body: 'Vector search finds semantically similar documents...' } +] + +// Generate embeddings in parallel +const withVectors = await Promise.all( + articles.map(async (article) => ({ + ...article, + $vectors: [{ propertyName: 'body', vector: await embed(article.body) }] + })) +) + +await db.records.importJson({ label: 'ARTICLE', data: withVectors }) +``` + + + + +```python +import openai +import os + +openai_client = openai.OpenAI(api_key=os.environ['OPENAI_API_KEY']) + +def embed(text: str) -> list[float]: + resp = openai_client.embeddings.create(model='text-embedding-3-small', input=text) + return resp.data[0].embedding + +articles = [ + {'title': 'Intro to Graph Databases', 'body': 'Graph databases store data as nodes and edges...'}, + {'title': 'Vector Search Explained', 'body': 'Vector search finds semantically similar documents...'} +] + +with_vectors = [ + {**article, '$vectors': [{'propertyName': 'body', 'vector': embed(article['body'])}]} + for article in articles +] + +db.records.import_json({'label': 'ARTICLE', 'data': with_vectors}) +``` + + + + +```bash +# Generate embedding via OpenAI and push to RushDB in one pipeline +VECTOR=$(curl -s https://api.openai.com/v1/embeddings \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model":"text-embedding-3-small","input":"Graph databases store data as nodes and edges"}' \ + | jq '.data[0].embedding') + +curl -X POST https://api.rushdb.com/api/v1/records/import/json \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d "{\"label\":\"ARTICLE\",\"data\":[{\"title\":\"Intro to Graph Databases\",\"body\":\"Graph databases store data as nodes and edges\",\"\$vectors\":[{\"propertyName\":\"body\",\"vector\":$VECTOR}]}]}" +``` + + + + +--- + +## Step 3: Search with a pre-computed query vector + +Instead of `query: "text"` you pass `queryVector: number[]`. RushDB infers `dimensions` from the vector length. + + + + +```typescript +const queryVector = await embed('how do graph databases handle relationships?') + +const results = await db.ai.search({ + propertyName: 'body', + queryVector, // dimensions inferred from queryVector.length + labels: ['ARTICLE'], + limit: 5 +}) + +for (const article of results.data) { + console.log(`${article.title} — score: ${article.__score.toFixed(3)}`) +} +``` + + + + +```python +query_vector = embed('how do graph databases handle relationships?') + +results = db.ai.search({ + 'propertyName': 'body', + 'queryVector': query_vector, # dimensions inferred from vector length + 'labels': ['ARTICLE'], + 'limit': 5 +}) + +for article in results.data: + print(f"{article.data.get('title')} — score: {article.data.get('__score'):.3f}") +``` + + + + +```bash +QUERY_VECTOR=$(curl -s https://api.openai.com/v1/embeddings \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model":"text-embedding-3-small","input":"how do graph databases handle relationships?"}' \ + | jq '.data[0].embedding') + +curl -X POST https://api.rushdb.com/api/v1/ai/search \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d "{\"propertyName\":\"body\",\"queryVector\":$QUERY_VECTOR,\"labels\":[\"ARTICLE\"],\"limit\":5}" +``` + + + + +--- + +## Combining `queryVector` with structured filters + +`queryVector` works with `where` exactly like `query` — the `where` clause executes first, then semantic ranking runs over the filtered set. + + + + +```typescript +const queryVector = await embed('reducing database latency') + +const results = await db.ai.search({ + propertyName: 'body', + queryVector, + labels: ['ARTICLE'], + where: { + status: 'published', + category: 'infrastructure', + publishedAt: { $gte: '2025-01-01' } + }, + limit: 10 +}) +``` + + + + +```python +results = db.ai.search({ + 'propertyName': 'body', + 'queryVector': embed('reducing database latency'), + 'labels': ['ARTICLE'], + 'where': { + 'status': 'published', + 'category': 'infrastructure', + 'publishedAt': {'$gte': '2025-01-01'} + }, + 'limit': 10 +}) +``` + + + + +--- + +## Disambiguation: two indexes on the same property + +If you create both a `cosine` and `euclidean` index on the same label+property, specify `similarityFunction` in the `$vectors` entry to target the right one. + +```typescript +$vectors: [ + { propertyName: 'body', similarityFunction: 'cosine', vector: cosineEmbedding }, + { propertyName: 'body', similarityFunction: 'euclidean', vector: euclideanEmbedding } +] +``` + +| Vector array entries | Matching indexes | Outcome | +|---|---|---| +| 1 entry, no `similarityFunction` | 1 match | OK — writes to that index | +| 1 entry, no `similarityFunction` | 2 matches | `422 Ambiguous — specify similarityFunction` | +| 1 entry, no `similarityFunction` | 0 matches | `404 No external index found` | +| 1 entry, with `similarityFunction` | — | Matched by both dimension + function | + +--- + +## Next steps + +- [Hybrid Retrieval](./hybrid-retrieval.mdx) — `where` filters + semantic ranking in one call +- [GraphRAG](./graphrag.mdx) — use BYOV embeddings in a retrieval-augmented generation pipeline with graph traversal +- [RAG Evaluation](./rag-evaluation.mdx) — benchmark precision before and after swapping embedding models diff --git a/docs/docs/tutorials/choosing-relationship-types.mdx b/docs/docs/tutorials/choosing-relationship-types.mdx new file mode 100644 index 00000000..aeee70bf --- /dev/null +++ b/docs/docs/tutorials/choosing-relationship-types.mdx @@ -0,0 +1,375 @@ +--- +sidebar_position: 15 +title: "Choosing Relationship Types That Age Well" +description: When to use generic nesting-driven edges versus explicit typed relationships, and how that choice affects readability, search, and analytics downstream. +tags: [Graph Modeling, Concepts, Relationships] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Choosing Relationship Types That Age Well + +Every time you attach two records in RushDB, you pick a relationship type. This choice feels minor in the moment and becomes significant three months later — when another developer reads your graph, or when a query needs to traverse it in a direction you didn't anticipate. + +This tutorial covers the tradeoffs clearly so you can make the right call up front. + +--- + +## Two strategies + +### Strategy A: Generic nesting + +You let RushDB's automatic nesting create edges by importing JSON with deeply nested objects. The resulting relationship type is a generated name derived from the parent label. + +```typescript +await db.records.importJson({ + label: 'ORDER', + data: [{ + total: 149.00, + PRODUCT: [{ name: 'Lens Cap 58mm', price: 12.99 }] + }] +}) +``` + +This creates `ORDER` → `PRODUCT` records connected by an edge RushDB names after the child label. The relationship type exists but carries no semantic meaning beyond the structural parent-child edge. + +### Strategy B: Explicit typed relationships + +You create records independently and attach them with a named relationship type: + +```typescript +const order = await db.records.create({ label: 'ORDER', data: { total: 149.00 } }) +const product = await db.records.create({ label: 'PRODUCT', data: { name: 'Lens Cap 58mm', price: 12.99 } }) + +await db.records.attach({ + source: order, + target: product, + options: { type: 'CONTAINS', direction: 'out' } +}) +``` + +The edge now carries meaning: an `ORDER` *contains* a `PRODUCT`. + +--- + +## When to use each strategy + +```mermaid +graph TD + Q1{Will you traverse this\nrelationship by type?} + Q1 -->|Yes| EXP[Use explicit typed relationship] + Q1 -->|No| Q2{Does direction matter\nfor future queries?} + Q2 -->|Yes| EXP + Q2 -->|No| Q3{Are there multiple\nedge meanings between\nthe same two labels?} + Q3 -->|Yes| EXP + Q3 -->|No| GEN[Generic nesting is fine] +``` + +Use **explicit typed relationships** when: + +- You need to filter traversal by edge type (`$relation: { type: 'AUTHORED' }`) +- Two records can be connected in multiple ways (a user can both `AUTHORED` and `REVIEWED` a document) +- The direction of the relationship matters for different query perspectives +- You want the relationship to be self-documenting for other developers or agents reading the ontology + +Use **generic nesting** when: + +- The structure is purely hierarchical and the relationship type adds no meaning +- You are ingesting denormalized data (CSV rows, API responses) and the parent-child is implicit +- Speed of ingest matters more than traversal precision + +--- + +## The readability problem with generic edges + +Consider a graph where `USER` records are linked to `DOCUMENT` records. With generic edges: + +```typescript +const result = await db.records.find({ + labels: ['DOCUMENT'], + where: { + USER: { name: 'Lena Müller' } + } +}) +``` + +This query returns documents connected to Lena — but connected *how*? Did she write them? Read them? Approve them? The graph cannot answer that question. + +With typed relationships, the intent is explicit: + + + + +```typescript +// Documents Lena authored +const authored = await db.records.find({ + labels: ['DOCUMENT'], + where: { + USER: { + $alias: '$author', + $relation: { type: 'AUTHORED', direction: 'in' }, + name: 'Lena Müller' + } + } +}) + +// Documents Lena reviewed +const reviewed = await db.records.find({ + labels: ['DOCUMENT'], + where: { + USER: { + $alias: '$reviewer', + $relation: { type: 'REVIEWED', direction: 'in' }, + name: 'Lena Müller' + } + } +}) + +// Documents Lena authored OR reviewed +const involved = await db.records.find({ + labels: ['DOCUMENT'], + where: { + $or: [ + { USER: { $relation: { type: 'AUTHORED', direction: 'in' }, name: 'Lena Müller' } }, + { USER: { $relation: { type: 'REVIEWED', direction: 'in' }, name: 'Lena Müller' } } + ] + } +}) +``` + + + + +```python +# Documents authored +authored = db.records.find({ + "labels": ["DOCUMENT"], + "where": { + "USER": { + "$alias": "$author", + "$relation": {"type": "AUTHORED", "direction": "in"}, + "name": "Lena Müller" + } + } +}) + +# Documents reviewed +reviewed = db.records.find({ + "labels": ["DOCUMENT"], + "where": { + "USER": { + "$alias": "$reviewer", + "$relation": {"type": "REVIEWED", "direction": "in"}, + "name": "Lena Müller" + } + } +}) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +# Documents authored by Lena +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["DOCUMENT"], + "where": { + "USER": { + "$relation": {"type": "AUTHORED", "direction": "in"}, + "name": "Lena Müller" + } + } + }' +``` + + + + +--- + +## The analytics problem with generic edges + +Generic edges make dimensional aggregations ambiguous. With typed edges you can count distinct relationship types as separate metrics: + + + + +```typescript +const userStats = await db.records.find({ + labels: ['USER'], + where: { + DOCUMENT: { + $alias: '$authored', + $relation: { type: 'AUTHORED', direction: 'out' } + } + }, + aggregate: { + userName: '$record.name', + documentsAuthored: { fn: 'count', alias: '$authored', unique: true } + }, + groupBy: ['userName', 'documentsAuthored'], + orderBy: { documentsAuthored: 'desc' }, + limit: 20 +}) +``` + + + + +```python +user_stats = db.records.find({ + "labels": ["USER"], + "where": { + "DOCUMENT": { + "$alias": "$authored", + "$relation": {"type": "AUTHORED", "direction": "out"} + } + }, + "aggregate": { + "userName": "$record.name", + "documentsAuthored": {"fn": "count", "alias": "$authored", "unique": True} + }, + "groupBy": ["userName", "documentsAuthored"], + "orderBy": {"documentsAuthored": "desc"}, + "limit": 20 +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["USER"], + "where": { + "DOCUMENT": { + "$alias": "$authored", + "$relation": {"type": "AUTHORED", "direction": "out"} + } + }, + "aggregate": { + "userName": "$record.name", + "documentsAuthored": {"fn": "count", "alias": "$authored", "unique": true} + }, + "groupBy": ["userName", "documentsAuthored"], + "orderBy": {"documentsAuthored": "desc"}, + "limit": 20 + }' +``` + + + + +--- + +## Migrating from generic to typed edges + +If you imported data with generic nesting and now need typed relationships: + +1. Query the existing records using the generic traversal to find both endpoint IDs. +2. Attach new typed relationships between them. +3. Optionally detach the generic edges if your queries no longer need them. + + + + +```typescript +// Find all USER-DOCUMENT pairs connected by any edge +const pairs = await db.records.find({ + labels: ['DOCUMENT'], + where: { + USER: { $alias: '$user' } + }, + aggregate: { + documentId: '$record.__id', + userId: '$user.__id' + } +}) + +// Re-attach with typed relationship +for (const pair of pairs.data) { + await db.records.attach({ + source: { __id: pair.userId as string }, + target: { __id: pair.documentId as string }, + options: { type: 'AUTHORED', direction: 'out' } + }) +} +``` + + + + +```python +pairs = db.records.find({ + "labels": ["DOCUMENT"], + "where": {"USER": {"$alias": "$user"}}, + "aggregate": {"documentId": "$record.__id", "userId": "$user.__id"} +}) + +for pair in pairs.data: + db.records.attach( + pair["userId"], + pair["documentId"], + {"type": "AUTHORED", "direction": "out"} + ) +``` + + + + +```bash +# Fetch pairs, then loop to attach (illustrative — use SDK for bulk work) +PAIRS=$(curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["DOCUMENT"],"where":{"USER":{"$alias":"$user"}},"aggregate":{"documentId":"$record.__id","userId":"$user.__id"}}') + +echo "$PAIRS" | jq -c '.data[]' | while read -r pair; do + USER_ID=$(echo "$pair" | jq -r '.userId') + DOC_ID=$(echo "$pair" | jq -r '.documentId') + curl -s -X POST "$BASE/records/$USER_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$DOC_ID\"],\"options\":{\"type\":\"AUTHORED\",\"direction\":\"out\"}}" +done +``` + + + + +--- + +## Naming conventions that age well + +Relationship types that age well share a few properties: + +- **Verb-first**: `AUTHORED`, `CONTAINS`, `DEPENDS_ON`, `ASSIGNED_TO` — not `USER_DOCUMENT` or `LINK` +- **Direction-aware**: the verb should read correctly in the `out` direction: USER --`AUTHORED`--> DOCUMENT +- **Domain-specific**: prefer business verbs (`PURCHASED`, `APPROVED`) over generic ones (`HAS`, `RELATED_TO`) +- **Uppercase**: consistent with Neo4j conventions and easier to scan in query code + +Avoid: +- `HAS` — does not indicate what the relationship means +- `LINKED` — directionally ambiguous +- `REL_USER_DOCUMENT` — table-join style naming + +--- + +## Production caveat + +You cannot rename or change a relationship type after records are attached using it. If you need to change a relationship type at scale, you must query the existing edges, attach new ones, and detach the old ones as a bulk operation. Plan your types before ingestion, not after. + +--- + +## Next steps + +- [Modeling Hierarchies, Networks, and Feedback Loops](./modeling-hierarchies.mdx) — three common graph shapes and how to query each +- [Thinking in Graphs: From Tables to Traversals](./thinking-in-graphs.mdx) — the full mental model shift +- [SearchQuery Deep Dive](./searchquery-advanced-patterns.mdx) — `$relation`, `$alias`, and aggregation patterns diff --git a/docs/docs/tutorials/configuring-dashboard.md b/docs/docs/tutorials/configuring-dashboard.md index f22ecdb0..5db5bb18 100644 --- a/docs/docs/tutorials/configuring-dashboard.md +++ b/docs/docs/tutorials/configuring-dashboard.md @@ -1,5 +1,8 @@ --- -sidebar_position: 2 +title: Get API Key +description: Register for RushDB, create a project, and generate your first API token to start building. +sidebar_position: 1 +tags: [Getting Started] --- # Get API Key diff --git a/docs/docs/tutorials/connect-aura-instance.mdx b/docs/docs/tutorials/connect-aura-instance.mdx new file mode 100644 index 00000000..598af7b8 --- /dev/null +++ b/docs/docs/tutorials/connect-aura-instance.mdx @@ -0,0 +1,173 @@ +--- +sidebar_position: 34 +title: "Connecting a Neo4j Aura Instance (BYOC)" +description: Use your own Neo4j or Aura database as the RushDB data store — keep data in your cloud account while using the full RushDB API. +tags: [Deployment, BYOC, Neo4j, Aura] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Connecting a Neo4j Aura Instance (BYOC) + +**BYOC** (Bring Your Own Cloud) means RushDB's API and query layer runs on RushDB infrastructure while your Neo4j or Aura instance is the actual data store. Your graph data never leaves your cloud account. + +This is available on every plan including Free. + +--- + +## How it works + +```mermaid +graph LR + App[Your application] -->|REST / SDK| RushDB[RushDB API] + RushDB -->|Bolt protocol| Aura[Your Neo4j Aura instance] + Aura -->|Graph data| Aura +``` + +RushDB stores: +- **In your Neo4j/Aura** — all record nodes, relationship edges, and vector embeddings +- **In RushDB's Postgres** — project config, API keys, embedding index metadata, and billing records only + +--- + +## Step 1: Get your Neo4j Aura connection details + +1. Open [console.neo4j.io](https://console.neo4j.io) and select your instance. +2. Click **Connect** → **Drivers** to find your connection URI. It will look like: + ``` + neo4j+s://xxxxxxxx.databases.neo4j.io + ``` +3. Note the **username** (typically `neo4j`) and the **password** you set when creating the instance. + +:::tip Free Aura tier +Neo4j AuraDB Free (1 GB) is sufficient for development and small production workloads. Create one at [console.neo4j.io](https://console.neo4j.io) if you do not already have an account. +::: + +--- + +## Step 2: Create a BYOC project in the RushDB dashboard + +1. Sign in to [app.rushdb.com](https://app.rushdb.com) (or your self-hosted dashboard). +2. Click **New Project**. +3. Enable **Use my own Neo4j instance** (toggle in the project creation dialog). +4. Enter your connection details: + - **Connection URI** — `neo4j+s://xxxxxxxx.databases.neo4j.io` + - **Username** — `neo4j` + - **Password** — your Aura password +5. Click **Verify Connection**. RushDB performs a lightweight Bolt handshake to confirm credentials. +6. Click **Create Project**. + +RushDB creates the project entry in its own Postgres, but **all graph writes will go to your Aura instance**. + +--- + +## Step 3: Get your API key + +After the project is created, go to the **API Keys** tab and copy the generated key. This key authenticates requests to the RushDB API and is stored encrypted — RushDB never stores it in plaintext. + +--- + +## Step 4: Verify the connection + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB('YOUR_API_KEY') +// For self-hosted RushDB: +// const db = new RushDB('YOUR_API_KEY', { url: 'https://your-rushdb-host/api/v1' }) + +await db.records.create({ label: 'CONNECTION_TEST', data: { ok: true } }) +const result = await db.records.find({ labels: ['CONNECTION_TEST'] }) +console.log(result.total) // 1 — written to your Aura instance +``` + + + + +```python +from rushdb import RushDB + +db = RushDB('YOUR_API_KEY', base_url='https://api.rushdb.com/api/v1') + +db.records.create('CONNECTION_TEST', {'ok': True}) +result = db.records.find({'labels': ['CONNECTION_TEST']}) +print(result.total) # 1 — written to your Aura instance +``` + + + + +```bash +# Create a test record +curl -X POST https://api.rushdb.com/api/v1/records \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"label":"CONNECTION_TEST","data":{"ok":true}}' + +# Read it back +curl -X POST https://api.rushdb.com/api/v1/records/search \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"labels":["CONNECTION_TEST"]}' +``` + + + + +--- + +## Step 5: Validate with a raw Cypher query (optional) + +BYOC and self-hosted projects have access to `POST /api/v1/query/raw`. Use it to confirm data is landing in Aura directly. + +```bash +curl -X POST https://api.rushdb.com/api/v1/query/raw \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "query": "MATCH (n) RETURN labels(n) AS labels, count(n) AS count GROUP BY labels(n) LIMIT 20" + }' +``` + +You can also open the Aura console's built-in Browser and run the same Cypher query directly to confirm the same nodes appear. + +--- + +## BYOC with a self-hosted RushDB instance + +If you are running RushDB on your own infrastructure and want to point it at an Aura instance, set the Neo4j env vars in your `docker-compose.yml` to your Aura URI instead of `bolt://neo4j:7687`: + +```yaml +environment: + NEO4J_URL: neo4j+s://xxxxxxxx.databases.neo4j.io:7687 + NEO4J_USERNAME: neo4j + NEO4J_PASSWORD: your-aura-password +``` + +See [Self-Hosting RushDB](./deployment.mdx) for the complete Docker Compose setup. + +--- + +## What changes with BYOC vs managed + +| | Managed (default) | BYOC | +|---|---|---| +| Data location | RushDB-managed Neo4j | Your Aura / Neo4j | +| Raw Cypher access | No | Yes — `POST /query/raw` | +| Billing | KU-based | KU-based (same) | +| Wipe / restore | Via RushDB dashboard | Full Neo4j backup tools available | +| SLA for the graph | RushDB SLA | Your Neo4j Aura SLA | + +See [BYOC vs Managed vs Self-Hosted](./byoc-vs-managed.mdx) for a full comparison. + +--- + +## Next steps + +- [BYOC vs Managed vs Self-Hosted](./byoc-vs-managed.mdx) — choose the right topology +- [Self-Hosting RushDB](./deployment.mdx) — run both RushDB and the graph on your own infra +- [Project Setup After Deployment](./self-hosted-project-setup.mdx) — configure embedding models and team access per project diff --git a/docs/docs/tutorials/customer-360.mdx b/docs/docs/tutorials/customer-360.mdx new file mode 100644 index 00000000..48703240 --- /dev/null +++ b/docs/docs/tutorials/customer-360.mdx @@ -0,0 +1,484 @@ +--- +sidebar_position: 26 +title: "Customer 360 as a Connected Graph" +description: Model users, accounts, subscriptions, invoices, touchpoints, and support interactions as a connected graph so customer context becomes retrievable instead of siloed. +tags: [Domain Blueprint, Relationships, SearchQuery, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Customer 360 as a Connected Graph + +Customer context typically lives in five or more separated systems: CRM, billing, support, product analytics, and marketing automation. Getting a complete picture of one customer requires manual cross-referencing across all of them. + +A connected graph collapses that. Put every customer-related record into RushDB — accounts, subscriptions, invoices, touchpoints, support tickets, feature usage — and you can answer complex, cross-domain questions with one query. + +--- + +## Graph shape + +```mermaid +graph LR + USER[USER] -->|BELONGS_TO| ACCOUNT[ACCOUNT] + ACCOUNT -->|HAS_SUBSCRIPTION| SUBSCRIPTION[SUBSCRIPTION] + SUBSCRIPTION -->|HAS_INVOICE| INVOICE[INVOICE] + USER -->|CREATED| TICKET[SUPPORT TICKET] + ACCOUNT -->|HAS_TOUCHPOINT| TOUCHPOINT[TOUCHPOINT] + ACCOUNT -->|USES_FEATURE| FEATURE_USAGE[FEATURE_USAGE] +``` + +| Label | What it represents | +|---|---| +| `USER` | An individual user with login/profile data | +| `ACCOUNT` | An organization or company | +| `SUBSCRIPTION` | An active or cancelled plan | +| `INVOICE` | A billing invoice | +| `TOUCHPOINT` | A marketing or sales interaction (email, call, demo) | +| `SUPPORT_TICKET` | An inbound support request | +| `FEATURE_USAGE` | A log of which product features are used and how often | + +--- + +## Step 1: Ingest account and user data + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +// Create account +const account = await db.records.create({ + label: 'ACCOUNT', + data: { + name: 'Acme Corp', + plan: 'enterprise', + region: 'EU', + mrr: 1200, + createdAt: '2024-01-15' + } +}) + +// Create users and link them to the account +await db.records.importJson({ + label: 'USER', + data: [ + { email: 'alice@acme.com', name: 'Alice', role: 'admin', lastActiveAt: '2025-03-01' }, + { email: 'bob@acme.com', name: 'Bob', role: 'member', lastActiveAt: '2025-02-20' } + ] +}) + +const users = await db.records.find({ + labels: ['USER'], + where: { email: { $in: ['alice@acme.com', 'bob@acme.com'] } } +}) + +for (const user of users.data) { + await db.records.attach({ + source: user, + target: account, + options: { type: 'BELONGS_TO', direction: 'out' } + }) +} +``` + + + + +```python +from rushdb import RushDB +import os + +db = RushDB(os.environ["RUSHDB_API_KEY"], base_url="https://api.rushdb.com/api/v1") + +account = db.records.create("ACCOUNT", { + "name": "Acme Corp", + "plan": "enterprise", + "region": "EU", + "mrr": 1200, + "createdAt": "2024-01-15" +}) + +db.records.import_json({ + "label": "USER", + "data": [ + {"email": "alice@acme.com", "name": "Alice", "role": "admin", "lastActiveAt": "2025-03-01"}, + {"email": "bob@acme.com", "name": "Bob", "role": "member", "lastActiveAt": "2025-02-20"} + ] +}) + +users = db.records.find({ + "labels": ["USER"], + "where": {"email": {"$in": ["alice@acme.com", "bob@acme.com"]}} +}) + +for user in users.data: + db.records.attach(user.id, account.id, {"type": "BELONGS_TO", "direction": "out"}) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +ACCOUNT_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"ACCOUNT","data":{"name":"Acme Corp","plan":"enterprise","region":"EU","mrr":1200,"createdAt":"2024-01-15"}}' \ + | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/import/json" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"USER","data":[{"email":"alice@acme.com","name":"Alice","role":"admin"},{"email":"bob@acme.com","name":"Bob","role":"member"}]}' +``` + + + + +--- + +## Step 2: Add subscription and invoice history + + + + +```typescript +const subscription = await db.records.create({ + label: 'SUBSCRIPTION', + data: { + plan: 'enterprise', + status: 'active', + startedAt: '2024-01-15', + renewsAt: '2026-01-15', + seats: 25 + } +}) + +await db.records.attach({ + source: account, + target: subscription, + options: { type: 'HAS_SUBSCRIPTION', direction: 'out' } +}) + +// Monthly invoices +await db.records.importJson({ + label: 'INVOICE', + data: [ + { month: '2025-01', amount: 1200, status: 'paid', paidAt: '2025-01-05' }, + { month: '2025-02', amount: 1200, status: 'paid', paidAt: '2025-02-05' }, + { month: '2025-03', amount: 1200, status: 'overdue', paidAt: null } + ] +}) + +const invoices = await db.records.find({ labels: ['INVOICE'] }) +for (const invoice of invoices.data) { + await db.records.attach({ + source: subscription, + target: invoice, + options: { type: 'HAS_INVOICE', direction: 'out' } + }) +} +``` + + + + +```python +subscription = db.records.create("SUBSCRIPTION", { + "plan": "enterprise", + "status": "active", + "startedAt": "2024-01-15", + "renewsAt": "2026-01-15", + "seats": 25 +}) + +db.records.attach(account.id, subscription.id, {"type": "HAS_SUBSCRIPTION", "direction": "out"}) + +db.records.import_json({ + "label": "INVOICE", + "data": [ + {"month": "2025-01", "amount": 1200, "status": "paid", "paidAt": "2025-01-05"}, + {"month": "2025-02", "amount": 1200, "status": "paid", "paidAt": "2025-02-05"}, + {"month": "2025-03", "amount": 1200, "status": "overdue", "paidAt": None} + ] +}) + +invoices = db.records.find({"labels": ["INVOICE"]}) +for invoice in invoices.data: + db.records.attach(subscription.id, invoice.id, {"type": "HAS_INVOICE", "direction": "out"}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/import/json" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"INVOICE","data":[{"month":"2025-01","amount":1200,"status":"paid"},{"month":"2025-03","amount":1200,"status":"overdue"}]}' +``` + + + + +--- + +## Step 3: Query accounts with overdue invoices + +Find enterprise accounts that have at least one overdue invoice — the churn-risk signal. + + + + +```typescript +const overdueAccounts = await db.records.find({ + labels: ['ACCOUNT'], + where: { + plan: 'enterprise', + SUBSCRIPTION: { + $relation: { type: 'HAS_SUBSCRIPTION', direction: 'out' }, + INVOICE: { + $relation: { type: 'HAS_INVOICE', direction: 'out' }, + status: 'overdue' + } + } + } +}) + +console.log(`Accounts with overdue invoices: ${overdueAccounts.total}`) +for (const acct of overdueAccounts.data) { + console.log(` ${acct.name} — MRR: ${acct.mrr}`) +} +``` + + + + +```python +overdue_accounts = db.records.find({ + "labels": ["ACCOUNT"], + "where": { + "plan": "enterprise", + "SUBSCRIPTION": { + "$relation": {"type": "HAS_SUBSCRIPTION", "direction": "out"}, + "INVOICE": { + "$relation": {"type": "HAS_INVOICE", "direction": "out"}, + "status": "overdue" + } + } + } +}) + +print(f"Accounts with overdue invoices: {overdue_accounts.total}") +for acct in overdue_accounts.data: + print(f" {acct.data.get('name')} — MRR: {acct.data.get('mrr')}") +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["ACCOUNT"], + "where": { + "plan": "enterprise", + "SUBSCRIPTION": { + "$relation": {"type": "HAS_SUBSCRIPTION", "direction": "out"}, + "INVOICE": { + "$relation": {"type": "HAS_INVOICE", "direction": "out"}, + "status": "overdue" + } + } + } + }' +``` + + + + +--- + +## Step 4: MRR by region aggregate + + + + +```typescript +const mrrByRegion = await db.records.find({ + labels: ['ACCOUNT'], + where: { SUBSCRIPTION: { $relation: { type: 'HAS_SUBSCRIPTION', direction: 'out' }, status: 'active' } }, + aggregate: { + totalMrr: { fn: 'sum', field: 'mrr', alias: '$record' }, + region: '$record.region' + }, + groupBy: ['region', 'totalMrr'], + orderBy: { totalMrr: 'desc' } +}) + +for (const row of mrrByRegion.data) { + console.log(`${row.region}: $${row.totalMrr}`) +} +``` + + + + +```python +mrr_by_region = db.records.find({ + "labels": ["ACCOUNT"], + "where": { + "SUBSCRIPTION": { + "$relation": {"type": "HAS_SUBSCRIPTION", "direction": "out"}, + "status": "active" + } + }, + "aggregate": { + "totalMrr": {"fn": "sum", "field": "mrr", "alias": "$record"}, + "region": "$record.region" + }, + "groupBy": ["region", "totalMrr"], + "orderBy": {"totalMrr": "desc"} +}) + +for row in mrr_by_region.data: + print(f"{row.data.get('region')}: ${row.data.get('totalMrr')}") +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["ACCOUNT"], + "aggregate": { + "totalMrr": {"fn": "sum", "field": "mrr", "alias": "$record"}, + "region": "$record.region" + }, + "groupBy": ["region", "totalMrr"], + "orderBy": {"totalMrr": "desc"} + }' +``` + + + + +--- + +## Step 5: Full context for a support ticket + +When a support ticket comes in, retrieve the full account context automatically — plan, MRR, open invoices, and recent touchpoints — so the agent or support rep starts with the complete picture. + + + + +```typescript +async function getAccountContext(accountId: string) { + const [accountResult, subscriptionResult, overdueInvoices, recentTickets] = await Promise.all([ + db.records.find({ labels: ['ACCOUNT'], where: { __id: accountId } }), + db.records.find({ + labels: ['SUBSCRIPTION'], + where: { + ACCOUNT: { $relation: { type: 'HAS_SUBSCRIPTION', direction: 'in' }, __id: accountId }, + status: 'active' + } + }), + db.records.find({ + labels: ['INVOICE'], + where: { + SUBSCRIPTION: { + $relation: { type: 'HAS_INVOICE', direction: 'in' }, + ACCOUNT: { $relation: { type: 'HAS_SUBSCRIPTION', direction: 'in' }, __id: accountId } + }, + status: 'overdue' + } + }), + db.records.find({ + labels: ['SUPPORT_TICKET'], + where: { + USER: { + $relation: { type: 'CREATED', direction: 'in' }, + ACCOUNT: { $relation: { type: 'BELONGS_TO', direction: 'in' }, __id: accountId } + } + }, + orderBy: { createdAt: 'desc' }, + limit: 5 + }) + ]) + + return { + account: accountResult.data[0], + activeSubscription: subscriptionResult.data[0], + overdueCount: overdueInvoices.total, + recentTickets: recentTickets.data + } +} +``` + + + + +```python +from concurrent.futures import ThreadPoolExecutor + + +def get_account_context(account_id: str) -> dict: + with ThreadPoolExecutor(max_workers=4) as executor: + fut_acct = executor.submit(db.records.find, {"labels": ["ACCOUNT"], "where": {"__id": account_id}}) + fut_sub = executor.submit(db.records.find, {"labels": ["SUBSCRIPTION"], "where": {"ACCOUNT": {"$relation": {"type": "HAS_SUBSCRIPTION", "direction": "in"}, "__id": account_id}, "status": "active"}}) + fut_inv = executor.submit(db.records.find, {"labels": ["INVOICE"], "where": {"SUBSCRIPTION": {"$relation": {"type": "HAS_INVOICE", "direction": "in"}, "ACCOUNT": {"$relation": {"type": "HAS_SUBSCRIPTION", "direction": "in"}, "__id": account_id}}, "status": "overdue"}}) + fut_tick = executor.submit(db.records.find, {"labels": ["SUPPORT_TICKET"], "where": {"USER": {"$relation": {"type": "CREATED", "direction": "in"}, "ACCOUNT": {"$relation": {"type": "BELONGS_TO", "direction": "in"}, "__id": account_id}}}, "orderBy": {"createdAt": "desc"}, "limit": 5}) + + return { + "account": fut_acct.result().data[0].data if fut_acct.result().data else None, + "activeSubscription": fut_sub.result().data[0].data if fut_sub.result().data else None, + "overdueCount": fut_inv.result().total, + "recentTickets": [t.data for t in fut_tick.result().data] + } +``` + + + + +```bash +# Account overdue invoices check +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{ + \"labels\": [\"INVOICE\"], + \"where\": { + \"SUBSCRIPTION\": { + \"\$relation\": {\"type\": \"HAS_INVOICE\", \"direction\": \"in\"}, + \"ACCOUNT\": { + \"\$relation\": {\"type\": \"HAS_SUBSCRIPTION\", \"direction\": \"in\"}, + \"__id\": \"$ACCOUNT_ID\" + } + }, + \"status\": \"overdue\" + } + }" +``` + + + + +--- + +## Production caveat + +Customer graphs grow with every activity event. Feature usage logs and touchpoints can reach millions of records for enterprise accounts. Use `limit` and `orderBy` on time-sorted fields (`occurredAt`, `createdAt`) to bound retrieval, and aggregate feature usage counts rather than surfacing every raw log entry. + +--- + +## Next steps + +- [Incident Response Graphs](./incident-response.mdx) — add operational context to account graphs +- [Hybrid Retrieval](./hybrid-retrieval.mdx) — semantic search within a specific account's records +- [Building a Graph-Backed API Layer](./graph-backed-api.mdx) — expose this graph through a production API diff --git a/docs/docs/tutorials/data-lineage.mdx b/docs/docs/tutorials/data-lineage.mdx new file mode 100644 index 00000000..d56c2176 --- /dev/null +++ b/docs/docs/tutorials/data-lineage.mdx @@ -0,0 +1,601 @@ +--- +sidebar_position: 12 +title: "End-to-End Data Lineage: From Source to Answer" +description: Model imported records, transformation steps, derived summaries, and final outputs so every answer can be traced back to its upstream source. +tags: [Lineage, Audit, Graph Modeling, Governance] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# End-to-End Data Lineage: From Source to Answer + +When a dashboard shows a number, a sales rep acts on it. When a model generates a recommendation, an engineer ships it. Lineage answers the question that always follows: *where did this come from?* + +This tutorial models a data pipeline as a graph: + +- **SOURCE** nodes represent raw data origins (database dumps, API responses, uploaded files) +- **ARTIFACT** nodes represent transformed or derived outputs (cleaned datasets, enriched records, summaries) +- **PIPELINE_RUN** nodes capture when and how a transformation happened +- **ANSWER** nodes represent final outputs — responses, reports, or decisions — along with the artifacts they drew from + +Every node is connected by typed relationships, so any output can be traced back to its origin in a single traversal query. + +--- + +## Lineage graph shape + +```mermaid +graph LR + SRC1[SOURCE: crm-export-2025-03-01] -->|FEEDS| PR1[PIPELINE_RUN: enrich-contacts-v2] + SRC2[SOURCE: billing-snapshot-Q1] -->|FEEDS| PR1 + PR1 -->|PRODUCED| ART1[ARTIFACT: contacts-enriched-20250301] + ART1 -->|FEEDS| PR2[PIPELINE_RUN: generate-summary-2025-Q1] + PR2 -->|PRODUCED| ART2[ARTIFACT: q1-contact-summary] + ART2 -->|CITED_BY| ANS1[ANSWER: Q1 retention report] +``` + +--- + +## Step 1: Register source records + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB('RUSHDB_API_KEY') + +const crmExport = await db.records.create({ + label: 'SOURCE', + data: { + name: 'crm-export-2025-03-01', + origin: 'Salesforce', + format: 'csv', + rowCount: 14200, + capturedAt: '2025-03-01T00:00:00Z', + checksum: 'sha256:abc123' + } +}) + +const billingSnapshot = await db.records.create({ + label: 'SOURCE', + data: { + name: 'billing-snapshot-Q1', + origin: 'Stripe', + format: 'json', + rowCount: 3800, + capturedAt: '2025-03-31T23:59:59Z', + checksum: 'sha256:def456' + } +}) +``` + + + + +```python +from rushdb import RushDB + +db = RushDB("RUSHDB_API_KEY", base_url="https://api.rushdb.com/api/v1") + +crm_export = db.records.create("SOURCE", { + "name": "crm-export-2025-03-01", + "origin": "Salesforce", + "format": "csv", + "rowCount": 14200, + "capturedAt": "2025-03-01T00:00:00Z", + "checksum": "sha256:abc123" +}) + +billing_snapshot = db.records.create("SOURCE", { + "name": "billing-snapshot-Q1", + "origin": "Stripe", + "format": "json", + "rowCount": 3800, + "capturedAt": "2025-03-31T23:59:59Z", + "checksum": "sha256:def456" +}) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +CRM_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"SOURCE","data":{"name":"crm-export-2025-03-01","origin":"Salesforce","format":"csv","rowCount":14200,"capturedAt":"2025-03-01T00:00:00Z"}}' \ + | jq -r '.data.__id') + +BILLING_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"SOURCE","data":{"name":"billing-snapshot-Q1","origin":"Stripe","format":"json","rowCount":3800,"capturedAt":"2025-03-31T23:59:59Z"}}' \ + | jq -r '.data.__id') +``` + + + + +--- + +## Step 2: Record a pipeline run + +A `PIPELINE_RUN` captures the job identity, code version, and run metadata. It connects upstream sources to downstream artifacts. + + + + +```typescript +const enrichRun = await db.records.create({ + label: 'PIPELINE_RUN', + data: { + runId: 'enrich-contacts-v2-20250301', + pipelineName: 'enrich-contacts', + version: 'v2.4.1', + startedAt: '2025-03-01T01:00:00Z', + finishedAt: '2025-03-01T01:47:23Z', + status: 'success', + triggeredBy: 'scheduler' + } +}) + +// Link sources into this run +await Promise.all([ + db.records.attach({ source: crmExport, target: enrichRun, options: { type: 'FEEDS' } }), + db.records.attach({ source: billingSnapshot, target: enrichRun, options: { type: 'FEEDS' } }), +]) +``` + + + + +```python +enrich_run = db.records.create("PIPELINE_RUN", { + "runId": "enrich-contacts-v2-20250301", + "pipelineName": "enrich-contacts", + "version": "v2.4.1", + "startedAt": "2025-03-01T01:00:00Z", + "finishedAt": "2025-03-01T01:47:23Z", + "status": "success", + "triggeredBy": "scheduler" +}) + +db.records.attach(crm_export.id, enrich_run.id, {"type": "FEEDS"}) +db.records.attach(billing_snapshot.id, enrich_run.id, {"type": "FEEDS"}) +``` + + + + +```bash +RUN_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"PIPELINE_RUN","data":{"runId":"enrich-contacts-v2-20250301","pipelineName":"enrich-contacts","version":"v2.4.1","status":"success"}}' \ + | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$CRM_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$RUN_ID\"],\"options\":{\"type\":\"FEEDS\"}}" + +curl -s -X POST "$BASE/records/$BILLING_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$RUN_ID\"],\"options\":{\"type\":\"FEEDS\"}}" +``` + + + + +--- + +## Step 3: Register the produced artifact + + + + +```typescript +const enrichedArtifact = await db.records.create({ + label: 'ARTIFACT', + data: { + name: 'contacts-enriched-20250301', + type: 'dataset', + rowCount: 13940, + storagePath: 's3://data-lake/enriched/contacts-2025-03-01.parquet', + createdAt: '2025-03-01T01:47:23Z', + schema: 'contacts-enriched-v3' + } +}) + +await db.records.attach({ + source: enrichRun, + target: enrichedArtifact, + options: { type: 'PRODUCED' } +}) +``` + + + + +```python +enriched = db.records.create("ARTIFACT", { + "name": "contacts-enriched-20250301", + "type": "dataset", + "rowCount": 13940, + "storagePath": "s3://data-lake/enriched/contacts-2025-03-01.parquet", + "createdAt": "2025-03-01T01:47:23Z", + "schema": "contacts-enriched-v3" +}) + +db.records.attach(enrich_run.id, enriched.id, {"type": "PRODUCED"}) +``` + + + + +```bash +ART_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"ARTIFACT","data":{"name":"contacts-enriched-20250301","type":"dataset","rowCount":13940,"schema":"contacts-enriched-v3"}}' \ + | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$RUN_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$ART_ID\"],\"options\":{\"type\":\"PRODUCED\"}}" +``` + + + + +--- + +## Step 4: Chain another pipeline run on top + +A second pipeline reads the enriched artifact and produces a summary. + + + + +```typescript +const summaryRun = await db.records.create({ + label: 'PIPELINE_RUN', + data: { + runId: 'summary-q1-20250401', + pipelineName: 'generate-summary', + version: 'v1.2.0', + startedAt: '2025-04-01T08:00:00Z', + finishedAt: '2025-04-01T08:11:42Z', + status: 'success', + triggeredBy: 'manual' + } +}) + +const summaryArtifact = await db.records.create({ + label: 'ARTIFACT', + data: { + name: 'q1-contact-summary', + type: 'report', + storagePath: 's3://reports/q1-2025/contact-summary.json', + createdAt: '2025-04-01T08:11:42Z' + } +}) + +await Promise.all([ + db.records.attach({ source: enrichedArtifact, target: summaryRun, options: { type: 'FEEDS' } }), + db.records.attach({ source: summaryRun, target: summaryArtifact, options: { type: 'PRODUCED' } }), +]) +``` + + + + +```python +summary_run = db.records.create("PIPELINE_RUN", { + "runId": "summary-q1-20250401", + "pipelineName": "generate-summary", + "version": "v1.2.0", + "status": "success", + "triggeredBy": "manual" +}) + +summary_artifact = db.records.create("ARTIFACT", { + "name": "q1-contact-summary", + "type": "report", + "storagePath": "s3://reports/q1-2025/contact-summary.json", + "createdAt": "2025-04-01T08:11:42Z" +}) + +db.records.attach(enriched.id, summary_run.id, {"type": "FEEDS"}) +db.records.attach(summary_run.id, summary_artifact.id, {"type": "PRODUCED"}) +``` + + + + +```bash +SUMRUN_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"PIPELINE_RUN","data":{"runId":"summary-q1-20250401","pipelineName":"generate-summary","version":"v1.2.0","status":"success"}}' \ + | jq -r '.data.__id') + +SUMART_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"ARTIFACT","data":{"name":"q1-contact-summary","type":"report"}}' \ + | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$ART_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$SUMRUN_ID\"],\"options\":{\"type\":\"FEEDS\"}}" + +curl -s -X POST "$BASE/records/$SUMRUN_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$SUMART_ID\"],\"options\":{\"type\":\"PRODUCED\"}}" +``` + + + + +--- + +## Step 5: Link an answer to the artifacts it used + +An `ANSWER` is the final output — an LLM response, a dashboard stat, or a filed report — linked to the artifacts it drew from. + + + + +```typescript +const answer = await db.records.create({ + label: 'ANSWER', + data: { + answerId: 'report-q1-retention', + type: 'retention-report', + generatedAt: '2025-04-02T09:00:00Z', + generatedBy: 'analytics-agent-v3', + content: 'Q1 2025: churn rate dropped 4.2% YoY driven by enterprise segment growth.' + } +}) + +await db.records.attach({ + source: summaryArtifact, + target: answer, + options: { type: 'CITED_BY' } +}) +``` + + + + +```python +answer = db.records.create("ANSWER", { + "answerId": "report-q1-retention", + "type": "retention-report", + "generatedAt": "2025-04-02T09:00:00Z", + "generatedBy": "analytics-agent-v3", + "content": "Q1 2025: churn rate dropped 4.2% YoY." +}) + +db.records.attach(summary_artifact.id, answer.id, {"type": "CITED_BY"}) +``` + + + + +```bash +ANS_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"ANSWER","data":{"answerId":"report-q1-retention","type":"retention-report","generatedAt":"2025-04-02T09:00:00Z"}}' \ + | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$SUMART_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$ANS_ID\"],\"options\":{\"type\":\"CITED_BY\"}}" +``` + + + + +--- + +## Step 6: Trace an answer back to its raw sources + +Given an answer ID, walk the full lineage chain to recover all upstream sources. + + + + +```typescript +// Find all sources two hops upstream from a given answer +const lineage = await db.records.find({ + labels: ['SOURCE'], + where: { + PIPELINE_RUN: { + $alias: '$run', + $relation: { type: 'FEEDS', direction: 'out' }, + ARTIFACT: { + $alias: '$artifact', + $relation: { type: 'PRODUCED', direction: 'in' }, + PIPELINE_RUN: { + $alias: '$run2', + ARTIFACT: { + $alias: '$finalArt', + $relation: { type: 'PRODUCED', direction: 'in' }, + ANSWER: { + answerId: 'report-q1-retention' + } + } + } + } + } + }, + aggregate: { + sourceName: '$record.name', + origin: '$record.origin', + capturedAt: '$record.capturedAt', + checksum: '$record.checksum', + runId: '$run.runId', + finalArtifact: '$finalArt.name' + } +}) +``` + + + + +```python +lineage = db.records.find({ + "labels": ["SOURCE"], + "where": { + "PIPELINE_RUN": { + "$alias": "$run", + "$relation": {"type": "FEEDS", "direction": "out"}, + "ARTIFACT": { + "$alias": "$artifact", + "$relation": {"type": "PRODUCED", "direction": "in"}, + "PIPELINE_RUN": { + "ARTIFACT": { + "$alias": "$finalArt", + "$relation": {"type": "PRODUCED", "direction": "in"}, + "ANSWER": { + "answerId": "report-q1-retention" + } + } + } + } + } + }, + "aggregate": { + "sourceName": "$record.name", + "origin": "$record.origin", + "capturedAt": "$record.capturedAt", + "runId": "$run.runId" + } +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["SOURCE"], + "where": { + "PIPELINE_RUN": { + "$alias": "$run", + "$relation": {"type": "FEEDS", "direction": "out"}, + "ARTIFACT": { + "$relation": {"type": "PRODUCED", "direction": "in"}, + "PIPELINE_RUN": { + "ARTIFACT": { + "$alias": "$finalArt", + "$relation": {"type": "PRODUCED", "direction": "in"}, + "ANSWER": {"answerId": "report-q1-retention"} + } + } + } + } + }, + "aggregate": { + "sourceName": "$record.name", + "origin": "$record.origin", + "capturedAt": "$record.capturedAt" + } + }' +``` + + + + +--- + +## Step 7: Find all failed pipeline runs and their downstream artifacts + + + + +```typescript +const failedDownstream = await db.records.find({ + labels: ['ARTIFACT'], + where: { + PIPELINE_RUN: { + $alias: '$run', + $relation: { type: 'PRODUCED', direction: 'in' }, + status: 'failed' + } + }, + aggregate: { + artifactName: '$record.name', + artifactType: '$record.type', + failedRunId: '$run.runId', + failedAt: '$run.finishedAt' + }, + orderBy: { failedAt: 'desc' } +}) +``` + + + + +```python +failed_downstream = db.records.find({ + "labels": ["ARTIFACT"], + "where": { + "PIPELINE_RUN": { + "$alias": "$run", + "$relation": {"type": "PRODUCED", "direction": "in"}, + "status": "failed" + } + }, + "aggregate": { + "artifactName": "$record.name", + "failedRunId": "$run.runId", + "failedAt": "$run.finishedAt" + }, + "orderBy": {"failedAt": "desc"} +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["ARTIFACT"], + "where": { + "PIPELINE_RUN": { + "$alias": "$run", + "$relation": {"type": "PRODUCED", "direction": "in"}, + "status": "failed" + } + }, + "aggregate": { + "artifactName": "$record.name", + "failedRunId": "$run.runId" + }, + "orderBy": {"failedAt": "desc"} + }' +``` + + + + +--- + +## Production caveat + +Multi-hop lineage queries will scan the entire reachable subgraph unless you scope them. Always filter by a narrow property on the starting label — `answerId`, `runId`, or a date range on `capturedAt` — before the traversal starts. Deep chains (more than four hops) can be expensive. For very long chains, consider materializing intermediate lineage summaries as ARTIFACT metadata instead of relying on traversal alone. + +--- + +## Next steps + +- [Audit Trails with Immutable Events](./audit-trails.mdx) — separate event log from current state for reconstructible history +- [Versioning Records Without Losing Queryability](./record-versioning.mdx) — keeping historical state queryable alongside current state +- [RushDB as a Memory Layer](./memory-layer.mdx) — using the same EPISODE + REFERENCE pattern for agent memory diff --git a/docs/docs/tutorials/deployment.md b/docs/docs/tutorials/deployment.md deleted file mode 100644 index bf8d7fe9..00000000 --- a/docs/docs/tutorials/deployment.md +++ /dev/null @@ -1,552 +0,0 @@ ---- -title: Deployment -description: Learn how to deploy RushDB -sidebar_position: 5 ---- - -# Deployment Guide - -This guide provides comprehensive instructions for deploying RushDB in various environments. Choose the deployment option that best suits your needs. - -## Deployment Options - -RushDB offers two primary deployment options: - -1. **RushDB Cloud (Managed Service)** - The simplest option with zero setup -2. **Self-Hosted RushDB** - Full control over your infrastructure with multiple deployment methods - -## Option 1: RushDB Cloud (Managed Service) - -The easiest way to start using RushDB is through the managed cloud service. - -### Features -- Zero setup required -- Free tier available -- Fully managed infrastructure -- Automatic updates and maintenance -- Professional support - -### Getting Started with RushDB Cloud -1. Sign up at [app.rushdb.com](https://app.rushdb.com) -2. Create a new project -3. Get your API token from the dashboard -4. Start using RushDB APIs via SDKs or REST - -## Option 2: Self-Hosted RushDB - -Self-hosting gives you complete control over your RushDB deployment and data. - -### Prerequisites - -Before deploying RushDB, ensure you have: - -1. **Neo4j Instance**: - - Minimum version: `5.25.1` - - Required plugins: - - `apoc-core` (installed and enabled) - - `graph-data-science` (required for vector search capabilities) - - Can be self-hosted or using Neo4j Aura cloud service - -2. **For Docker Deployment**: - - Docker Engine 20.10.0+ - - Docker Compose 2.0.0+ (if using Docker Compose) - - Minimum 2GB RAM for the container - -3. **For AWS Deployment**: - - AWS account with necessary permissions - - Terraform 1.0.0+ installed locally - -### Option 2A: Docker Container Deployment - -The simplest way to self-host RushDB is using Docker. - -#### Basic Docker Run Command - -```bash -docker run -p 3000:3000 \ ---name rushdb \ --e NEO4J_URL='neo4j+s://your-neo4j-instance.databases.neo4j.io' \ --e NEO4J_USERNAME='neo4j' \ --e NEO4J_PASSWORD='your-password' \ -rushdb/platform -``` - -#### Docker Compose Deployment - -Create a `docker-compose.yml` file: - -```yaml -version: '3.8' -services: - rushdb: - image: rushdb/platform - container_name: rushdb - ports: - - "3000:3000" - environment: - - NEO4J_URL=neo4j+s://your-neo4j-instance.databases.neo4j.io - - NEO4J_USERNAME=neo4j - - NEO4J_PASSWORD=your-password - # Add additional environment variables as needed -``` - -Then run: - -```bash -docker-compose up -d -``` - -#### All-in-One Docker Compose Deployment (with Neo4j) - -For development or testing environments, you can run both RushDB and Neo4j together: -
-docker-compose.yml - -```yaml -version: '3.8' -services: - rushdb: - image: rushdb/platform - container_name: rushdb - depends_on: - neo4j: - condition: service_healthy - ports: - - "3000:3000" - environment: - - NEO4J_URL=bolt://neo4j - - NEO4J_USERNAME=neo4j - - NEO4J_PASSWORD=password - # Add additional environment variables as needed - neo4j: - image: neo4j:5.25.1 - healthcheck: - test: [ "CMD-SHELL", "wget --no-verbose --tries=1 --spider localhost:7474 || exit 1" ] - interval: 5s - retries: 30 - start_period: 10s - ports: - - "7474:7474" - - "7687:7687" - environment: - - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes - - NEO4J_AUTH=neo4j/password - - NEO4J_PLUGINS=["apoc", "graph-data-science"] - volumes: - - neo4j-plugins:/var/lib/neo4j/plugins - - neo4j-data:/data - - neo4j-logs:/logs - - neo4j-conf:/var/lib/neo4j/conf - -volumes: - neo4j-plugins: - neo4j-data: - neo4j-logs: - neo4j-conf: -``` -
- -### Option 2B: AWS Deployment with Terraform - -For production-grade deployments, RushDB can be deployed to AWS using Terraform. - -#### Terraform Deployment Steps - -1. **Prepare Your Environment** - - Clone the RushDB repository or create a new directory for your Terraform configuration. - -2. **Create Terraform Configuration File** - - Create a `main.tf` file with the following content (adjust as needed): - -
-rushdb-terraform.tf - -```hcl -terraform { - required_providers { - aws = { - source = "hashicorp/aws" - version = "~> 4.0" - } - } -} - -# Configure AWS provider -provider "aws" { - region = "us-east-1" # Change to your preferred region -} - -# Use default VPC and subnets -data "aws_vpc" "default" { - default = true -} - -data "aws_subnets" "all" { - filter { - name = "vpc-id" - values = [data.aws_vpc.default.id] - } -} - -# IAM role for ECS task execution -resource "aws_iam_role" "ecs_task_execution_role" { - name = "rushdb-ecs-task-execution-role" - - assume_role_policy = jsonencode({ - Version = "2012-10-17" - Statement = [ - { - Action = "sts:AssumeRole" - Effect = "Allow" - Principal = { - Service = "ecs-tasks.amazonaws.com" - } - } - ] - }) -} - -resource "aws_iam_role_policy_attachment" "ecs_task_execution_policy" { - role = aws_iam_role.ecs_task_execution_role.name - policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" -} - -resource "aws_iam_role_policy_attachment" "cloudwatch_logs_access" { - role = aws_iam_role.ecs_task_execution_role.name - policy_arn = "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess" -} - -# CloudWatch log group for application logs -resource "aws_cloudwatch_log_group" "rushdb_logs" { - name = "/ecs/rushdb" - retention_in_days = 30 - - tags = { - Name = "rushdb-logs" - Environment = "production" - } -} - -# Security group for RushDB -resource "aws_security_group" "rushdb_sg" { - name = "rushdb-security-group" - description = "Allow traffic for RushDB" - vpc_id = data.aws_vpc.default.id - - ingress { - from_port = 0 - to_port = 0 - protocol = "-1" - self = true - cidr_blocks = ["0.0.0.0/0"] - } - - egress { - from_port = 0 - to_port = 0 - protocol = "-1" - cidr_blocks = ["0.0.0.0/0"] - } -} - -# ECS cluster -resource "aws_ecs_cluster" "rushdb_cluster" { - name = "rushdb-ecs-cluster" -} - -# Task execution role -resource "aws_iam_role" "ecs_task_execution_role" { - name = "rushdb-ecs-task-execution-role" - - assume_role_policy = jsonencode({ - Version = "2012-10-17" - Statement = [ - { - Action = "sts:AssumeRole" - Effect = "Allow" - Principal = { - Service = "ecs-tasks.amazonaws.com" - } - } - ] - }) -} - -resource "aws_iam_role_policy_attachment" "ecs_task_execution_policy" { - role = aws_iam_role.ecs_task_execution_role.name - policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" -} - -# ECS task definition -resource "aws_ecs_task_definition" "rushdb_task" { - family = "rushdb-task-definition" - network_mode = "awsvpc" - requires_compatibilities = ["FARGATE"] - cpu = "1024" - memory = "2048" - execution_role_arn = aws_iam_role.ecs_task_execution_role.arn - - container_definitions = jsonencode([{ - name = "rushdb" - image = "rushdb/platform:latest" - essential = true - - environment = [ - { name = "NEO4J_URL", value = "neo4j+s://your-neo4j-instance.databases.neo4j.io" }, - { name = "NEO4J_USERNAME", value = "neo4j" }, - { name = "NEO4J_PASSWORD", value = "your-password" }, - { name = "RUSHDB_SELF_HOSTED", value = "true" }, - { name = "RUSHDB_AES_256_ENCRYPTION_KEY", value = "your-32-character-encryption-key" } - ] - - portMappings = [{ - containerPort = 3000 - hostPort = 3000 - protocol = "tcp" - }] - - logConfiguration = { - logDriver = "awslogs" - options = { - "awslogs-group" = aws_cloudwatch_log_group.rushdb_logs.name - "awslogs-region" = "us-east-1" - "awslogs-stream-prefix" = "ecs-rushdb" - } - } - }]) -} - -# ECS service -resource "aws_ecs_service" "rushdb_service" { - name = "rushdb-ecs-service" - cluster = aws_ecs_cluster.rushdb_cluster.id - task_definition = aws_ecs_task_definition.rushdb_task.arn - desired_count = 1 - launch_type = "FARGATE" - - network_configuration { - subnets = data.aws_subnets.all.ids - security_groups = [aws_security_group.rushdb_sg.id] - assign_public_ip = true - } - - depends_on = [ - aws_cloudwatch_log_group.rushdb_logs - ] -} - -# Output the CloudWatch log group for easy access -output "cloudwatch_log_group" { - value = aws_cloudwatch_log_group.rushdb_logs.name - description = "CloudWatch log group name for RushDB application logs" -} - -# Output the service URL -output "rushdb_public_ip_note" { - value = "Check the ECS service in AWS Console for the public IP address" - description = "Note about accessing RushDB service" -} -``` - -
- -3. **Initialize Terraform** - -```bash -terraform init -``` - -4. **Plan Deployment** - -```bash -terraform plan -out=tfplan -``` - -5. **Apply the Configuration** - -```bash -terraform apply tfplan -``` - -6. **Access Your RushDB Service** - -After deployment completes, Terraform will output information about your deployment including the CloudWatch log group name. - -#### Viewing Application Logs - -To view your RushDB application logs: - -1. **Using AWS Console**: - - Go to CloudWatch in the AWS Console - - Navigate to "Log groups" - - Find the log group `/ecs/rushdb` - - Click on it to view log streams with prefix `ecs-rushdb` - -2. **Using AWS CLI**: - ```bash - # List log streams - aws logs describe-log-streams --log-group-name "/ecs/rushdb" - - # View recent logs - aws logs tail "/ecs/rushdb" --follow - ``` - -#### Advanced AWS Deployment with Load Balancer and SSL - -For a production deployment with a load balancer and SSL: - -1. Modify the Terraform configuration to include an Application Load Balancer -2. Add Route53 DNS records -3. Configure SSL certificates using ACM - -For a complete example with these features, refer to the `main.tf` in the RushDB repository. - -## Environment Variables - -The following environment variables can be used to configure your RushDB deployment: - -### Required Environment Variables - -| Variable | Description | Example | -|----------|-------------|---------| -| `NEO4J_URL` | Connection string for Neo4j database | `neo4j+s://your-instance.databases.neo4j.io` or `bolt://localhost:7687` | -| `NEO4J_USERNAME` | Username for Neo4j database | `neo4j` | -| `NEO4J_PASSWORD` | Password for Neo4j database | `your-password` | - -### Core Application Settings - -| Variable | Description | Default | Required | -|----------|----------------------------------------------|---------|----------| -| `RUSHDB_PORT` | Port for the application server | `3000` | No | -| `RUSHDB_AES_256_ENCRYPTION_KEY` | 32-character key for token encryption | `32SymbolStringForTokenEncryption` | Yes, for production | -| `RUSHDB_DASHBOARD_URL` | URL for dashboard access | `/` | No | -| `RUSHDB_SELF_HOSTED` | Whether running in self-hosted mode | `true` | No | -| `RUSHDB_SERVE_STATIC` | Whether to serve static files (Dashboard UI) | `true` | No | - -### Authentication Settings - -| Variable | Description | Default | Required | -|----------|-------------|---------|----------| -| `RUSHDB_LOGIN` | Admin username | `admin` | No | -| `RUSHDB_PASSWORD` | Admin password | `password` | Yes, for production | -| `RUSHDB_ALLOWED_LOGINS` | List of allowed login usernames | `[]` (all allowed) | No | - -### Rate Limiting - -| Variable | Description | Default | Required | -|----------|-------------|---------|----------| -| `RATE_LIMITER_REQUESTS_LIMIT` | Max requests within time frame | `100` | No | -| `RATE_LIMITER_TTL` | Time frame for rate limiting (ms) | `1000` | No | - -### OAuth and Authentication - -| Variable | Description | Required | -|----------|-------------|----------| -| `GOOGLE_CLIENT_ID` | Google OAuth client ID | For Google auth | -| `GOOGLE_SECRET` | Google OAuth secret | For Google auth | -| `GH_CLIENT_ID` | GitHub OAuth client ID | For GitHub auth | -| `GH_SECRET` | GitHub OAuth secret | For GitHub auth | -| `SERVICE_CAPTCHA_KEY` | CAPTCHA service private key | For CAPTCHA | - -### Email Configuration - -| Variable | Description | Required | -|----------|-------------|----------| -| `MAIL_HOST` | Email service host | For email | -| `MAIL_USER` | Email service username | For email | -| `MAIL_PASSWORD` | Email service password | For email | -| `MAIL_FROM` | Default "from" email address | For email | - -## CLI Commands - -RushDB provides CLI commands for managing users in self-hosted installations: - -### Create a New User - -```bash -rushdb create-user -``` - -Example: -```bash -rushdb create-user admin@example.com securepassword123 -``` - -### Update User Password - -```bash -rushdb update-password -``` - -Example: -```bash -rushdb update-password admin@example.com newsecurepassword456 -``` - -## Security Best Practices - -When deploying RushDB to production, follow these security best practices: - -1. **Change default credentials**: - - Change `RUSHDB_LOGIN` and `RUSHDB_PASSWORD` - - Use a strong, unique `RUSHDB_AES_256_ENCRYPTION_KEY` - -2. **Secure your Neo4j database**: - - Use strong passwords - - Limit network access to the database - - Use encrypted connections where possible - -3. **Use HTTPS**: - - Configure SSL/TLS on your load balancer - - Redirect HTTP to HTTPS - -4. **Set up proper monitoring and logging**: - - Monitor API usage - - Set up alerts for unusual activity - -## System Requirements - -### Minimum Specifications - -- **CPU**: 1 vCPU (2+ recommended for production) -- **Memory**: 1GB RAM (2GB+ recommended for production) -- **Storage**: 1GB for RushDB (excluding Neo4j storage requirements) -- **Neo4j Requirements**: Refer to [Neo4j system requirements](https://neo4j.com/docs/operations-manual/current/installation/requirements/) - -### Recommended Production Specifications - -- **CPU**: 2+ vCPUs -- **Memory**: 4GB+ RAM -- **Storage**: SSD storage for both RushDB and Neo4j -- **Network**: Low-latency connection between RushDB and Neo4j - -## Troubleshooting - -### Common Issues - -1. **Connection Issues to Neo4j**: - - Ensure Neo4j instance is running and accessible - - Verify credentials and connection string format - - Check network connectivity and firewall settings - -2. **Authentication Failures**: - - Verify admin credentials are correctly set - - Check encryption key length (must be 32 characters) - -3. **Performance Issues**: - - Monitor resource utilization - - Consider scaling up resources or optimizing Neo4j queries - -### Getting Help - -If you encounter problems with your RushDB deployment: - -1. Check the RushDB logs for error messages -2. Visit the [RushDB documentation](https://docs.rushdb.com) -3. Submit an issue on the [RushDB GitHub repository](https://github.com/rush-db/rushdb) - -## Conclusion - -Following this guide, you should have successfully deployed RushDB in your chosen environment. Whether you're using the managed cloud service or self-hosting, RushDB provides a powerful database solution for modern applications. diff --git a/docs/docs/tutorials/deployment.mdx b/docs/docs/tutorials/deployment.mdx new file mode 100644 index 00000000..7eccb5e2 --- /dev/null +++ b/docs/docs/tutorials/deployment.mdx @@ -0,0 +1,280 @@ +--- +sidebar_position: 3 +title: "Self-Hosting RushDB" +description: Deploy RushDB on your own infrastructure with Docker Compose, connect your own Neo4j instance, and configure embedding support. +tags: [Deployment, Self-Hosted, Docker] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Self-Hosting RushDB + +RushDB ships as a single Docker image that includes the API server and the dashboard. You bring Neo4j and a Postgres instance (for project/token management), configure a handful of environment variables, and you have a fully functional RushDB instance with no usage limits and no billing. + +--- + +## Prerequisites + +- Docker and Docker Compose installed +- A running or accessible **Neo4j 5+** instance (or let Docker Compose start one for you) +- A running **PostgreSQL 14+** instance (or let Docker Compose start one) +- Optional: an OpenAI-compatible embedding API if you want semantic search + +--- + +## Option A: Full stack with Docker Compose (recommended) + +This Compose file starts RushDB, Neo4j, and Postgres together. Copy it to an empty directory and run `docker compose up -d`. + +```yaml +# docker-compose.yml +version: '3.9' + +services: + neo4j: + image: neo4j:5 + environment: + NEO4J_AUTH: neo4j/rushdb-password + NEO4JLABS_PLUGINS: '["apoc"]' + ports: + - "7474:7474" # Neo4j browser (optional) + - "7687:7687" # Bolt protocol + volumes: + - neo4j_data:/data + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://localhost:7474 || exit 1"] + interval: 10s + retries: 10 + + postgres: + image: postgres:16-alpine + environment: + POSTGRES_DB: rushdb + POSTGRES_USER: rushdb + POSTGRES_PASSWORD: rushdb-pg-password + volumes: + - pg_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U rushdb"] + interval: 5s + retries: 10 + + rushdb: + image: rushdb/rushdb:latest + depends_on: + neo4j: + condition: service_healthy + postgres: + condition: service_healthy + ports: + - "3000:3000" + environment: + RUSHDB_SELF_HOSTED: "true" + RUSHDB_LOGIN: admin + RUSHDB_PASSWORD: change-me-in-production + + # Encryption key — must be exactly 32 characters + RUSHDB_AES_256_ENCRYPTION_KEY: "32-char-key-change-in-production" + + # Neo4j + NEO4J_URL: bolt://neo4j:7687 + NEO4J_USERNAME: neo4j + NEO4J_PASSWORD: rushdb-password + + # Postgres + SQL_DB_TYPE: postgres + SQL_DB_URL: postgresql://rushdb:rushdb-pg-password@postgres:5432/rushdb + + # Embedding (optional — remove these 4 lines to disable vector indexes) + RUSHDB_EMBEDDING_BASE_URL: https://api.openai.com/v1 + RUSHDB_EMBEDDING_API_KEY: sk-... + RUSHDB_EMBEDDING_MODEL: text-embedding-3-small + RUSHDB_EMBEDDING_DIMENSIONS: "1536" + +volumes: + neo4j_data: + pg_data: +``` + +```bash +docker compose up -d +``` + +RushDB will be available at `http://localhost:3000`. + +--- + +## Option B: RushDB against an existing Neo4j instance + +If you already have Neo4j running (or an Aura instance), skip the `neo4j` service and point `NEO4J_URL` directly at it. + +```yaml +rushdb: + image: rushdb/rushdb:latest + ports: + - "3000:3000" + environment: + RUSHDB_SELF_HOSTED: "true" + RUSHDB_LOGIN: admin + RUSHDB_PASSWORD: change-me-in-production + RUSHDB_AES_256_ENCRYPTION_KEY: "32-char-key-change-in-production" + + # Point at your existing Neo4j / Aura instance + NEO4J_URL: bolt+s://xxxxxxxx.databases.neo4j.io:7687 + NEO4J_USERNAME: neo4j + NEO4J_PASSWORD: your-aura-password + + SQL_DB_TYPE: postgres + SQL_DB_URL: postgresql://rushdb:rushdb-pg-password@postgres:5432/rushdb +``` + +See [Connecting an Aura Instance](./connect-aura-instance.mdx) for the full BYOC walkthrough. + +--- + +## Environment variable reference + +### Required + +| Variable | Default in image | Description | +|---|---|---| +| `RUSHDB_SELF_HOSTED` | `true` | Must be `"true"` to enable self-hosted mode and create the default admin account | +| `RUSHDB_LOGIN` | `admin` | Dashboard login username | +| `RUSHDB_PASSWORD` | `password` | Dashboard login password — **change this** | +| `RUSHDB_AES_256_ENCRYPTION_KEY` | `32SymbolStringForTokenEncryption` | Exactly 32-character key used to encrypt API tokens at rest — **change this** | +| `NEO4J_URL` | — | Bolt URL of your Neo4j instance | +| `NEO4J_USERNAME` | `neo4j` | Neo4j username | +| `NEO4J_PASSWORD` | `password` | Neo4j password | +| `SQL_DB_URL` | — | PostgreSQL connection string | +| `SQL_DB_TYPE` | `postgres` | Database driver — only `postgres` supported | + +### Optional + +| Variable | Default | Description | +|---|---|---| +| `RUSHDB_PORT` | `3000` | Port the API server listens on | +| `RUSHDB_SERVE_STATIC` | `true` | Serve the built dashboard from the same process | +| `RUSHDB_EMBEDDING_BASE_URL` | `https://api.openai.com/v1` | Base URL of any OpenAI-compatible embeddings endpoint | +| `RUSHDB_EMBEDDING_API_KEY` | — | Bearer token for the embedding provider | +| `RUSHDB_EMBEDDING_MODEL` | — | Model identifier (e.g. `text-embedding-3-small`). **Omit to disable vector indexes entirely** | +| `RUSHDB_EMBEDDING_DIMENSIONS` | — | Must match the model's actual output dimensions | +| `RUSHDB_EMBEDDING_BATCH_SIZE` | `500` | Records per embedding backfill batch | +| `RUSHDB_EMBEDDING_MAX_RUNTIME_MS` | `50000` | Max ms the backfill scheduler spends per index per tick | +| `RATE_LIMITER_REQUESTS_LIMIT` | `100` | Max requests per time window | +| `RATE_LIMITER_TTL` | `1000` | Rate limiter window in milliseconds | + +--- + +## First boot + +1. Open `http://localhost:3000` — you will see the RushDB dashboard login screen. +2. Sign in with the `RUSHDB_LOGIN` / `RUSHDB_PASSWORD` you configured. +3. Create your first project: click **New Project**, give it a name, and save. +4. Copy the generated API key from the **API Keys** tab. +5. Test the connection: + +```bash +curl https://localhost:3000/api/v1/records/search \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"labels":["TEST"]}' +# → {"data":[],"total":0} +``` + +--- + +## Connecting your SDK to a self-hosted instance + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB('YOUR_API_KEY', { + url: 'http://localhost:3000/api/v1' +}) +``` + + + + +```python +from rushdb import RushDB + +db = RushDB('YOUR_API_KEY', base_url='http://localhost:3000/api/v1') +``` + + + + +```bash +export RUSHDB_API_KEY="YOUR_API_KEY" +export RUSHDB_BASE="http://localhost:3000/api/v1" + +curl "$RUSHDB_BASE/records/search" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"labels":["TEST"]}' +``` + + + + +--- + +## SQLite for single-node deployments + +If you don't want to run a Postgres container, RushDB supports SQLite for the SQL layer (users, projects, tokens). Set `SQL_DB_TYPE=sqlite` and omit `SQL_DB_URL`: + +```yaml +rushdb: + image: rushdb/rushdb:latest + environment: + SQL_DB_TYPE: sqlite + SQL_DB_PATH: /data/rushdb.db # optional — defaults to ./rushdb.db + # ...other vars + volumes: + - rushdb_data:/data +``` + +:::note +SQLite is fine for a single RushDB container. Use Postgres for multi-replica deployments or anywhere you need concurrent writes from more than one process. +::: + +--- + +## CLI commands + +RushDB exposes CLI commands for user management inside the container — useful for scripted provisioning or password rotation: + +```bash +# Create a new user +docker exec rushdb rushdb create-user admin@example.com securepassword123 + +# Update an existing user's password +docker exec rushdb rushdb update-password admin@example.com newsecurepassword456 +``` + +--- + +## Production hardening checklist + +| Item | Action | +|---|---| +| `RUSHDB_PASSWORD` | Set a strong password — never leave `password` | +| `RUSHDB_AES_256_ENCRYPTION_KEY` | Generate a random 32-character string | +| Neo4j password | Change from the Compose default | +| TLS | Terminate TLS at a reverse proxy (nginx, Caddy, Traefik) in front of port 3000 | +| Postgres | Back up the `rushdb` Postgres database — it stores projects, tokens, and billing records | +| Neo4j | Enable Neo4j backups for your graph data | +| Embedding key | Rotate the embedding provider API key independently of the RushDB key | + +--- + +## Next steps + +- [Project Setup After Deployment](./self-hosted-project-setup.mdx) — create projects, invite team members, configure per-project embedding models +- [Connecting an Aura Instance](./connect-aura-instance.mdx) — use an existing Neo4j Aura database as the RushDB data store +- [BYOC vs Managed vs Self-hosted](./byoc-vs-managed.mdx) — choose the right deployment topology for your use case diff --git a/docs/docs/tutorials/discovery-queries.mdx b/docs/docs/tutorials/discovery-queries.mdx new file mode 100644 index 00000000..b7e383a4 --- /dev/null +++ b/docs/docs/tutorials/discovery-queries.mdx @@ -0,0 +1,506 @@ +--- +sidebar_position: 20 +title: "Discovery Queries: Exploring an Unknown Schema" +description: A practical workflow for exploring a RushDB project you did not design — using ontology tools, label listing, and progressive query refinement before building reliable retrieval. +tags: [MCP, SearchQuery, Schema, Exploration] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Discovery Queries: Exploring an Unknown Schema + +When you inherit a RushDB project — from a teammate, a data import, or a legacy integration — your immediate problem is: what is in here and how is it connected? + +This tutorial walks a systematic exploration workflow that works in code, in the MCP server, and in the REST API. It teaches how to trust your findings and build reliable queries from scratch without guessing at label names or property keys. + +--- + +## Why schema discovery matters + +RushDB does not enforce a fixed schema. Labels and properties are inferred from what you insert. That means: + +- Labels can vary in casing (`Article`, `ARTICLE`, `article`) depending on who wrote the ingest code +- Properties may be absent on some records +- Relationship types may be present in one direction but not the other +- You cannot assume anything from type names alone — verify before querying + +The ontology tools give you a live view of what exists. + +--- + +## Step 1: Get the ontology + +The ontology describes every label and property that exists in your project, including property types and relationship paths. + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +// Returns labels, properties, and their types in structured JSON +const ontology = await db.ai.getOntology() +console.log(JSON.stringify(ontology, null, 2)) + +// Returns the same data formatted as compact Markdown — useful for pasting to an LLM +const markdown = await db.ai.getOntologyMarkdown() +console.log(markdown) +``` + + + + +```python +from rushdb import RushDB +import os, json + +db = RushDB(os.environ["RUSHDB_API_KEY"], base_url="https://api.rushdb.com/api/v1") + +ontology = db.ai.get_ontology() +print(json.dumps(ontology, indent=2)) + +markdown = db.ai.get_ontology_markdown() +print(markdown) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +# JSON ontology +curl -s -X POST "$BASE/ai/ontology" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{}' + +# Markdown ontology +curl -s -X POST "$BASE/ai/ontology/md" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{}' +``` + + + + +Read the output carefully before writing any queries. Note every label name exactly as it appears — casing is significant. + +--- + +## Step 2: Sample records from each label + +After identifying labels, pull a small sample from each to see real data shapes. + + + + +```typescript +// Sample 3 records from each label you found +const labels = ['CUSTOMER', 'ORDER', 'PRODUCT'] // exact names from ontology + +for (const label of labels) { + const sample = await db.records.find({ + labels: [label], + limit: 3 + }) + console.log(`\n=== ${label} (${sample.total} total) ===`) + console.log(JSON.stringify(sample.data, null, 2)) +} +``` + + + + +```python +labels = ["CUSTOMER", "ORDER", "PRODUCT"] # exact names from ontology + +for label in labels: + sample = db.records.find({ + "labels": [label], + "limit": 3 + }) + print(f"\n=== {label} ({sample.total} total) ===") + for r in sample.data: + print(r.data) +``` + + + + +```bash +for LABEL in CUSTOMER ORDER PRODUCT; do + echo "=== $LABEL ===" + curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"labels\":[\"$LABEL\"],\"limit\":3}" +done +``` + + + + +--- + +## Step 3: Discover distinct values for key properties + +Before filtering, check what values actually exist for categorical fields. Use `aggregate` + `groupBy` to enumerate distinct values. + + + + +```typescript +// Find all distinct statuses on ORDER records +const statusCounts = await db.records.find({ + labels: ['ORDER'], + aggregate: { + count: { fn: 'count', alias: '$record' }, + status: '$record.status' + }, + groupBy: ['status', 'count'], + orderBy: { count: 'desc' } +}) + +console.log('ORDER statuses:') +for (const row of statusCounts.data) { + console.log(` ${row.status}: ${row.count}`) +} +``` + + + + +```python +status_counts = db.records.find({ + "labels": ["ORDER"], + "aggregate": { + "count": {"fn": "count", "alias": "$record"}, + "status": "$record.status" + }, + "groupBy": ["status", "count"], + "orderBy": {"count": "desc"} +}) + +print("ORDER statuses:") +for row in status_counts.data: + print(f" {row.data.get('status')}: {row.data.get('count')}") +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["ORDER"], + "aggregate": { + "count": {"fn": "count", "alias": "$record"}, + "status": "$record.status" + }, + "groupBy": ["status", "count"], + "orderBy": {"count": "desc"} + }' +``` + + + + +Do this for every categorical field you encounter. This prevents you from querying `status: 'active'` when the actual values are `ACTIVE`, `Active`, or `1`. + +--- + +## Step 4: Verify a relationship exists before traversing it + +Ontology shows you that a relationship path exists. Before building a traversal query, confirm direction and label names by searching for records that have a specific relationship. + + + + +```typescript +// Test: do CUSTOMER records have outbound PLACED_ORDER relationships to ORDER? +const customers = await db.records.find({ + labels: ['CUSTOMER'], + where: { + ORDER: { + $relation: { type: 'PLACED_ORDER', direction: 'out' } + } + }, + limit: 1 +}) + +if (customers.data.length === 0) { + // Try reverse direction or different type name + const reverse = await db.records.find({ + labels: ['CUSTOMER'], + where: { + ORDER: { + $relation: { type: 'PLACED_ORDER', direction: 'in' } + } + }, + limit: 1 + }) + console.log('Reverse direction hit:', reverse.data.length > 0) +} +``` + + + + +```python +customers = db.records.find({ + "labels": ["CUSTOMER"], + "where": { + "ORDER": { + "$relation": {"type": "PLACED_ORDER", "direction": "out"} + } + }, + "limit": 1 +}) + +if not customers.data: + reverse = db.records.find({ + "labels": ["CUSTOMER"], + "where": { + "ORDER": { + "$relation": {"type": "PLACED_ORDER", "direction": "in"} + } + }, + "limit": 1 + }) + print("Reverse direction hit:", len(reverse.data) > 0) +``` + + + + +```bash +# Test outbound direction +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["CUSTOMER"], + "where": { + "ORDER": { + "$relation": {"type": "PLACED_ORDER", "direction": "out"} + } + }, + "limit": 1 + }' +``` + + + + +--- + +## Step 5: Identify nullable properties with `$exists` + +When a property appears in some records but not others, use `$exists: false` to find records missing it and `$exists: true` to confirm presence before filtering. + + + + +```typescript +// How many CUSTOMER records are missing an email property? +const missingEmail = await db.records.find({ + labels: ['CUSTOMER'], + where: { email: { $exists: false } }, + aggregate: { count: { fn: 'count', alias: '$record' } }, + groupBy: ['count'] +}) + +// How many have email? +const hasEmail = await db.records.find({ + labels: ['CUSTOMER'], + where: { email: { $exists: true } }, + aggregate: { count: { fn: 'count', alias: '$record' } }, + groupBy: ['count'] +}) + +console.log(`Missing email: ${missingEmail.data[0]?.count ?? 0}`) +console.log(`Has email: ${hasEmail.data[0]?.count ?? 0}`) +``` + + + + +```python +missing_email = db.records.find({ + "labels": ["CUSTOMER"], + "where": {"email": {"$exists": False}}, + "aggregate": {"count": {"fn": "count", "alias": "$record"}}, + "groupBy": ["count"] +}) + +has_email = db.records.find({ + "labels": ["CUSTOMER"], + "where": {"email": {"$exists": True}}, + "aggregate": {"count": {"fn": "count", "alias": "$record"}}, + "groupBy": ["count"] +}) + +print(f"Missing email: {missing_email.data[0]['count'] if missing_email.data else 0}") +print(f"Has email: {has_email.data[0]['count'] if has_email.data else 0}") +``` + + + + +```bash +# Missing email +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["CUSTOMER"],"where":{"email":{"$exists":false}},"aggregate":{"count":{"fn":"count","alias":"$record"}},"groupBy":["count"]}' + +# Has email +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["CUSTOMER"],"where":{"email":{"$exists":true}},"aggregate":{"count":{"fn":"count","alias":"$record"}},"groupBy":["count"]}' +``` + + + + +--- + +## Step 6: Build the reliable query bottom-up + +After the above steps you know: + +- exact label names and their counts +- real property names and value shapes +- which categorical values are in use +- which properties may be absent +- relationship type names and their directions + +Now build your query incrementally, adding one filter at a time and verifying results at each step. + + + + +```typescript +// Step 1: verify base label works +const base = await db.records.find({ labels: ['ORDER'], limit: 1 }) +console.log('Base query:', base.total) + +// Step 2: add first filter +const withStatus = await db.records.find({ + labels: ['ORDER'], + where: { status: 'shipped' }, // confirmed from Step 3 + limit: 1 +}) +console.log('With status filter:', withStatus.total) + +// Step 3: add traversal +const withCustomer = await db.records.find({ + labels: ['ORDER'], + where: { + status: 'shipped', + CUSTOMER: { + $relation: { type: 'PLACED_ORDER', direction: 'in' }, + region: 'EU' + } + }, + limit: 1 +}) +console.log('With customer traversal:', withCustomer.total) + +// Step 4: switch to production query +const result = await db.records.find({ + labels: ['ORDER'], + where: { + status: { $in: ['shipped', 'delivered'] }, + CUSTOMER: { + $relation: { type: 'PLACED_ORDER', direction: 'in' }, + region: 'EU' + } + }, + orderBy: { createdAt: 'desc' }, + limit: 50 +}) +``` + + + + +```python +# Incremental query building +base = db.records.find({"labels": ["ORDER"], "limit": 1}) +print("Base query:", base.total) + +with_status = db.records.find({ + "labels": ["ORDER"], + "where": {"status": "shipped"}, + "limit": 1 +}) +print("With status filter:", with_status.total) + +with_customer = db.records.find({ + "labels": ["ORDER"], + "where": { + "status": "shipped", + "CUSTOMER": { + "$relation": {"type": "PLACED_ORDER", "direction": "in"}, + "region": "EU" + } + }, + "limit": 1 +}) +print("With customer traversal:", with_customer.total) +``` + + + + +```bash +# Incremental: base +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["ORDER"],"limit":1}' + +# Add status filter +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["ORDER"],"where":{"status":"shipped"},"limit":1}' + +# Add traversal +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["ORDER"],"where":{"status":"shipped","CUSTOMER":{"$relation":{"type":"PLACED_ORDER","direction":"in"},"region":"EU"}},"limit":1}' +``` + + + + +--- + +## MCP server workflow equivalent + +If you are using the MCP server with Claude or Cursor, follow the same pattern via tool calls: + +1. Call `getOntologyMarkdown` — read and understand the output before anything else +2. Call `getSearchQuerySpec` — load the canonical query structure for the LLM's context +3. Use `findRecords` with `limit: 3` for sampling, not `limit: 1000` +4. Use aggregate `groupBy` queries to enumerate categorical values +5. Add traversal filters one at a time, verifying counts match expectations + +The `getQueryBuilderPrompt` tool returns a system prompt you can paste into any LLM session to enforce the ontology-first discipline described in this tutorial. + +--- + +## Production caveat + +Ontology output reflects the current state of the database. In a live system it changes over time. If you are building an automated pipeline that depends on a specific label or property, add an existence check at startup and fail fast if the expected ontology elements are absent. Discovery workflows are interactive; production code should be explicit. + +--- + +## Next steps + +- [Agent-Safe Query Planning with Ontology First](./agent-safe-query-planning.mdx) — automate the discovery loop in an agent +- [MCP Quickstart for Real Operators](./mcp-operator-quickstart.mdx) — the same workflow via the MCP server +- [Thinking in Graphs](./thinking-in-graphs.mdx) — mental model for working with connected data diff --git a/docs/docs/tutorials/episodic-memory.mdx b/docs/docs/tutorials/episodic-memory.mdx new file mode 100644 index 00000000..579e2def --- /dev/null +++ b/docs/docs/tutorials/episodic-memory.mdx @@ -0,0 +1,520 @@ +--- +sidebar_position: 23 +title: "Episodic Memory for Multi-Step Agents" +description: Store goals, intermediate observations, tool outputs, and decisions as linked records so long-running agents can resume with context instead of stateless prompts. +tags: [Agents, Memory, Relationships, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Episodic Memory for Multi-Step Agents + +A stateless agent that restarts a task from scratch every session wastes context. A long-running agent that can write its observations and decisions to a graph database — and read them back on resume — can pick up where it left off, avoid repeating work, and build compound reasoning over time. + +This tutorial shows the episodic memory pattern: one GOAL record per task, linked STEP records capturing each intermediate action, and linked OBSERVATION records for tool outputs and discoveries. + +--- + +## Graph shape + +```mermaid +graph TD + GOAL[GOAL
Top-level task] -->|HAS_STEP| STEP_1[STEP: search docs] + GOAL -->|HAS_STEP| STEP_2[STEP: draft answer] + GOAL -->|HAS_STEP| STEP_3[STEP: verify claim] + STEP_1 -->|PRODUCED| OBS_1[OBSERVATION
Tool output] + STEP_2 -->|PRODUCED| OBS_2[OBSERVATION
Draft text] + STEP_3 -->|DEPENDS_ON| STEP_1 +``` + +| Label | What it represents | +|---|---| +| `GOAL` | The agent's top-level objective for a session | +| `STEP` | A single action the agent took (tool call, reasoning step, decision) | +| `OBSERVATION` | The result or output of a step (tool output, search result, partial answer) | + +--- + +## Step 1: Create a goal record when a session starts + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +async function startSession(taskDescription: string, agentId: string) { + const goal = await db.records.create({ + label: 'GOAL', + data: { + task: taskDescription, + agentId, + status: 'in_progress', + startedAt: new Date().toISOString() + } + }) + return goal +} + +const goal = await startSession( + 'Research RushDB graph query patterns and summarize the top 5', + 'agent-001' +) +console.log('Session goal ID:', goal.__id) +``` + + + + +```python +from rushdb import RushDB +import os +from datetime import datetime, timezone + +db = RushDB(os.environ["RUSHDB_API_KEY"], base_url="https://api.rushdb.com/api/v1") + + +def start_session(task: str, agent_id: str): + goal = db.records.create("GOAL", { + "task": task, + "agentId": agent_id, + "status": "in_progress", + "startedAt": datetime.now(timezone.utc).isoformat() + }) + return goal + + +goal = start_session( + "Research RushDB graph query patterns and summarize the top 5", + "agent-001" +) +print("Session goal ID:", goal.id) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +GOAL_RESP=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "label": "GOAL", + "data": { + "task": "Research graph query patterns", + "agentId": "agent-001", + "status": "in_progress", + "startedAt": "2025-04-01T10:00:00Z" + } + }') + +GOAL_ID=$(echo "$GOAL_RESP" | jq -r '.data.__id') +echo "Goal ID: $GOAL_ID" +``` + + + + +--- + +## Step 2: Record each step as the agent executes it + +Each agent action — tool call, web search, or LLM reasoning step — becomes a STEP record linked to the GOAL. + + + + +```typescript +async function recordStep( + goalId: string, + stepType: string, + action: string, + index: number +) { + const step = await db.records.create({ + label: 'STEP', + data: { + type: stepType, + action, + index, + status: 'executing', + startedAt: new Date().toISOString() + } + }) + + // Fetch the goal record and link + const goalResult = await db.records.find({ + labels: ['GOAL'], + where: { __id: goalId } + }) + + await db.records.attach({ + source: goalResult.data[0], + target: step, + options: { type: 'HAS_STEP', direction: 'out' } + }) + + return step +} + +const step1 = await recordStep( + goal.__id, + 'tool_call', + 'Search documentation: graph traversal patterns', + 1 +) +``` + + + + +```python +def record_step(goal_id: str, step_type: str, action: str, index: int): + step = db.records.create("STEP", { + "type": step_type, + "action": action, + "index": index, + "status": "executing", + "startedAt": datetime.now(timezone.utc).isoformat() + }) + + goal_result = db.records.find({"labels": ["GOAL"], "where": {"__id": goal_id}}) + db.records.attach( + goal_result.data[0].id, + step.id, + {"type": "HAS_STEP", "direction": "out"} + ) + return step + + +step1 = record_step(goal.id, "tool_call", "Search docs: graph traversal patterns", 1) +``` + + + + +```bash +STEP_RESP=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "label": "STEP", + "data": { + "type": "tool_call", + "action": "Search docs: graph traversal patterns", + "index": 1, + "status": "executing", + "startedAt": "2025-04-01T10:01:00Z" + } + }') +STEP_ID=$(echo "$STEP_RESP" | jq -r '.data.__id') + +# Link step to goal +curl -s -X POST "$BASE/records/$GOAL_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$STEP_ID\"],\"options\":{\"type\":\"HAS_STEP\",\"direction\":\"out\"}}" +``` + + + + +--- + +## Step 3: Store observations from tool outputs + +When a tool returns results, store the output as an OBSERVATION linked to the STEP that produced it. + + + + +```typescript +async function recordObservation( + stepId: string, + content: string, + observationType: string +) { + const obs = await db.records.create({ + label: 'OBSERVATION', + data: { + content, + type: observationType, + recordedAt: new Date().toISOString() + } + }) + + const stepResult = await db.records.find({ + labels: ['STEP'], + where: { __id: stepId } + }) + + await db.records.attach({ + source: stepResult.data[0], + target: obs, + options: { type: 'PRODUCED', direction: 'out' } + }) + + // Mark step complete + await db.records.update(stepId, { status: 'completed' }) + + return obs +} + +const obs1 = await recordObservation( + step1.__id, + 'Found 3 relevant tutorials: thinking-in-graphs, modeling-hierarchies, temporal-graphs', + 'search_result' +) +``` + + + + +```python +def record_observation(step_id: str, content: str, obs_type: str): + obs = db.records.create("OBSERVATION", { + "content": content, + "type": obs_type, + "recordedAt": datetime.now(timezone.utc).isoformat() + }) + + step_result = db.records.find({"labels": ["STEP"], "where": {"__id": step_id}}) + db.records.attach( + step_result.data[0].id, + obs.id, + {"type": "PRODUCED", "direction": "out"} + ) + + db.records.update(step_id, {"status": "completed"}) + return obs + + +obs1 = record_observation( + step1.id, + "Found 3 relevant tutorials: thinking-in-graphs, modeling-hierarchies, temporal-graphs", + "search_result" +) +``` + + + + +```bash +OBS_RESP=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "label": "OBSERVATION", + "data": { + "content": "Found 3 relevant tutorials", + "type": "search_result", + "recordedAt": "2025-04-01T10:02:00Z" + } + }') +OBS_ID=$(echo "$OBS_RESP" | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$STEP_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$OBS_ID\"],\"options\":{\"type\":\"PRODUCED\",\"direction\":\"out\"}}" + +curl -s -X PATCH "$BASE/records/$STEP_ID" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"status":"completed"}' +``` + + + + +--- + +## Step 4: Resume a session and retrieve accumulated context + +When the agent resumes — or when a new session needs to continue where a prior one left off — retrieve the full context for the GOAL. + + + + +```typescript +async function resumeSession(goalId: string) { + // Get the goal + const goalResult = await db.records.find({ + labels: ['GOAL'], + where: { __id: goalId } + }) + const goal = goalResult.data[0] + + // Get all steps in order + const steps = await db.records.find({ + labels: ['STEP'], + where: { + GOAL: { + $relation: { type: 'HAS_STEP', direction: 'in' }, + __id: goalId + } + }, + orderBy: { index: 'asc' } + }) + + // Get all observations across all steps + const observations = await db.records.find({ + labels: ['OBSERVATION'], + where: { + STEP: { + $relation: { type: 'PRODUCED', direction: 'in' }, + GOAL: { + $relation: { type: 'HAS_STEP', direction: 'in' }, + __id: goalId + } + } + } + }) + + return { + goal: { task: goal.task, status: goal.status }, + completedSteps: steps.data.filter(s => s.status === 'completed').length, + totalSteps: steps.total, + observations: observations.data.map(o => o.content) + } +} + +const context = await resumeSession(goal.__id) +console.log('Resumed context:', context) +``` + + + + +```python +def resume_session(goal_id: str) -> dict: + goal_result = db.records.find({"labels": ["GOAL"], "where": {"__id": goal_id}}) + goal_rec = goal_result.data[0] + + steps = db.records.find({ + "labels": ["STEP"], + "where": { + "GOAL": { + "$relation": {"type": "HAS_STEP", "direction": "in"}, + "__id": goal_id + } + }, + "orderBy": {"index": "asc"} + }) + + observations = db.records.find({ + "labels": ["OBSERVATION"], + "where": { + "STEP": { + "$relation": {"type": "PRODUCED", "direction": "in"}, + "GOAL": { + "$relation": {"type": "HAS_STEP", "direction": "in"}, + "__id": goal_id + } + } + } + }) + + completed = sum(1 for s in steps.data if s.data.get("status") == "completed") + return { + "goal": {"task": goal_rec.data.get("task"), "status": goal_rec.data.get("status")}, + "completedSteps": completed, + "totalSteps": steps.total, + "observations": [o.data.get("content") for o in observations.data] + } + + +context = resume_session(goal.id) +print(context) +``` + + + + +```bash +# Retrieve steps for a goal +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{ + \"labels\": [\"STEP\"], + \"where\": { + \"GOAL\": { + \"\$relation\": {\"type\": \"HAS_STEP\", \"direction\": \"in\"}, + \"__id\": \"$GOAL_ID\" + } + }, + \"orderBy\": {\"index\": \"asc\"} + }" +``` + + + + +--- + +## Step 5: Mark the goal complete + +When the task finishes, update the GOAL record with its outcome. + + + + +```typescript +await db.records.update(goal.__id, { + status: 'completed', + completedAt: new Date().toISOString(), + summary: 'Identified 5 top query patterns: traversal, aggregation, temporal, hierarchical, semantic' +}) +``` + + + + +```python +from datetime import datetime, timezone + +db.records.update(goal.id, { + "status": "completed", + "completedAt": datetime.now(timezone.utc).isoformat(), + "summary": "Identified 5 top query patterns" +}) +``` + + + + +```bash +curl -s -X PATCH "$BASE/records/$GOAL_ID" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"status":"completed","completedAt":"2025-04-01T11:00:00Z","summary":"Identified 5 top patterns"}' +``` + + + + +--- + +## When to use episodic memory vs. team memory + +| Pattern | Use case | +|---|---| +| Episodic memory (this tutorial) | Per-session agent context: goals, steps, tool outputs | +| Team memory ([Building Team Memory](./building-team-memory.mdx)) | Shared persistent knowledge: tickets, docs, decisions | +| Fact memory ([RushDB as a Memory Layer](./memory-layer.mdx)) | Long-lived facts about entities across all sessions | + +These patterns compose: an agent can write episodic steps to its own GOAL graph while also reading from shared team memory for background knowledge. + +--- + +## Production caveat + +Episodic records accumulate quickly during active agent use. Define a cleanup policy: archive GOAL records after N days, compress older OBSERVATION content (store summaries instead of full tool outputs), or use a separate RushDB project for ephemeral agent memory versus persistent team knowledge. + +--- + +## Next steps + +- [RushDB as a Memory Layer](./memory-layer.mdx) — long-lived facts and entity profiles +- [Building Team Memory](./building-team-memory.mdx) — shared knowledge graph for agents +- [Agent-Safe Query Planning](./agent-safe-query-planning.mdx) — grounded query execution guard diff --git a/docs/docs/tutorials/event-driven-ingestion.mdx b/docs/docs/tutorials/event-driven-ingestion.mdx new file mode 100644 index 00000000..5ee0f154 --- /dev/null +++ b/docs/docs/tutorials/event-driven-ingestion.mdx @@ -0,0 +1,416 @@ +--- +sidebar_position: 29 +title: "Event-Driven Ingestion from Webhooks and Queues" +description: Handle partial, repeated, and out-of-order events from webhooks or message queues without corrupting connected graph state. +tags: [Ingestion, Transactions, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Event-Driven Ingestion from Webhooks and Queues + +Webhooks and message queues deliver events at least once — occasionally more than once, occasionally out of order. Naive insertion on every delivery duplicates records, splits relationship graphs, and corrupts aggregates. + +This tutorial shows idempotent ingestion patterns: look up before you write, write only once, always link atomically. + +--- + +## The fundamental problem + +```mermaid +sequenceDiagram + participant Queue + participant Worker + participant RushDB + + Queue->>Worker: event (first delivery) + Worker->>RushDB: create record + Queue->>Worker: event (duplicate delivery) + Worker->>RushDB: create duplicate record ← problem +``` + +The solution is **find-then-create**: search for the record by its natural key before inserting. If it already exists, update it. If it does not, create it. Do both inside a transaction so no two workers race to create the same record simultaneously. + +--- + +## Step 1: Idempotent upsert of a single event + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +interface OrderEvent { + orderId: string + customerId: string + status: string + totalUsd: number + createdAt: string +} + +async function upsertOrder(event: OrderEvent): Promise { + const tx = await db.tx.begin() + try { + const existing = await db.records.find({ + labels: ['ORDER'], + where: { orderId: event.orderId } + }) + + let orderId: string + + if (existing.data.length > 0) { + // Already exists — update mutable fields only + await db.records.update(existing.data[0].__id, { status: event.status }, tx) + orderId = existing.data[0].__id + } else { + // First delivery — create + const record = await db.records.create({ label: 'ORDER', data: event }, tx) + orderId = record.__id + } + + await db.tx.commit(tx) + return orderId + } catch (err) { + await db.tx.rollback(tx) + throw err + } +} +``` + + + + +```python +from rushdb import RushDB +import os + +db = RushDB(os.environ["RUSHDB_API_KEY"], base_url="https://api.rushdb.com/api/v1") + +def upsert_order(event: dict) -> str: + tx = db.transactions.begin() + try: + existing = db.records.find({ + "labels": ["ORDER"], + "where": {"orderId": event["orderId"]} + }) + + if existing.data: + db.records.update(existing.data[0].id, {"status": event["status"]}, transaction=tx) + order_id = existing.data[0].id + else: + record = db.records.create("ORDER", event, transaction=tx) + order_id = record.id + + db.transactions.commit(tx) + return order_id + except Exception as e: + db.transactions.rollback(tx) + raise +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +ORDER_ID="ORD-9001" + +# Check if record already exists +EXISTING=$(curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"labels\":[\"ORDER\"],\"where\":{\"orderId\":\"$ORDER_ID\"}}") + +COUNT=$(echo "$EXISTING" | jq '.total') + +if [ "$COUNT" -eq 0 ]; then + # Create + curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"label\":\"ORDER\",\"data\":{\"orderId\":\"$ORDER_ID\",\"status\":\"received\",\"totalUsd\":129.99}}" +else + # Update status only + RECORD_ID=$(echo "$EXISTING" | jq -r '.data[0].__id') + curl -s -X PATCH "$BASE/records/$RECORD_ID" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"status":"processing"}' +fi +``` + + + + +:::tip Always use a natural key +Pick a field that is unique and immutable per event — `orderId`, `eventId`, `messageId`. Never use a mutable field like `status` or `updatedAt` as the deduplication key. +::: + +--- + +## Step 2: Idempotent upsert with relationship creation + +Events often carry implicit relationships. A `checkout.completed` webhook references both a customer ID and the order itself. Link them atomically. + + + + +```typescript +interface CheckoutEvent { + orderId: string + customerId: string + status: string + totalUsd: number + createdAt: string +} + +async function handleCheckout(event: CheckoutEvent): Promise { + const tx = await db.tx.begin() + try { + // Upsert ORDER + const existingOrders = await db.records.find({ + labels: ['ORDER'], + where: { orderId: event.orderId } + }) + + let order: { __id: string; [key: string]: unknown } + if (existingOrders.data.length > 0) { + order = existingOrders.data[0] + await db.records.update(order.__id, { status: event.status }, tx) + } else { + order = await db.records.create( + { label: 'ORDER', data: { orderId: event.orderId, status: event.status, totalUsd: event.totalUsd, createdAt: event.createdAt } }, + tx + ) + } + + // Resolve CUSTOMER — must exist prior to checkout + const customers = await db.records.find({ + labels: ['CUSTOMER'], + where: { customerId: event.customerId } + }) + if (customers.data.length === 0) throw new Error(`CUSTOMER ${event.customerId} not found`) + + // Idempotent attach: only link if not already linked + const alreadyLinked = await db.records.find({ + labels: ['CUSTOMER'], + where: { + customerId: event.customerId, + ORDER: { + $relation: { type: 'PLACED', direction: 'out' }, + orderId: event.orderId + } + } + }) + + if (alreadyLinked.data.length === 0) { + await db.records.attach({ + source: customers.data[0], + target: order, + options: { type: 'PLACED', direction: 'out' } + }, tx) + } + + await db.tx.commit(tx) + } catch (err) { + await db.tx.rollback(tx) + throw err + } +} +``` + + + + +```python +def handle_checkout(event: dict) -> None: + tx = db.transactions.begin() + try: + existing = db.records.find({"labels": ["ORDER"], "where": {"orderId": event["orderId"]}}) + + if existing.data: + order = existing.data[0] + db.records.update(order.id, {"status": event["status"]}, transaction=tx) + else: + order = db.records.create("ORDER", { + "orderId": event["orderId"], + "status": event["status"], + "totalUsd": event["totalUsd"], + "createdAt": event["createdAt"] + }, transaction=tx) + + customers = db.records.find({"labels": ["CUSTOMER"], "where": {"customerId": event["customerId"]}}) + if not customers.data: + raise ValueError(f"CUSTOMER {event['customerId']} not found") + + already_linked = db.records.find({ + "labels": ["CUSTOMER"], + "where": { + "customerId": event["customerId"], + "ORDER": { + "$relation": {"type": "PLACED", "direction": "out"}, + "orderId": event["orderId"] + } + } + }) + + if not already_linked.data: + db.records.attach(customers.data[0].id, order.id, {"type": "PLACED", "direction": "out"}, transaction=tx) + + db.transactions.commit(tx) + except Exception: + db.transactions.rollback(tx) + raise +``` + + + + +--- + +## Step 3: Bulk ingestion from a queue batch + +Message queues often deliver events in batches. Use `importJson` for the record layer, then link in a second pass. + + + + +```typescript +interface PageviewEvent { + sessionId: string + url: string + referrer?: string + duration: number + timestamp: string +} + +async function flushPageviews(events: PageviewEvent[]): Promise { + if (events.length === 0) return + + // Deduplicate by sessionId+url+timestamp before writing + const unique = new Map() + for (const e of events) { + unique.set(`${e.sessionId}:${e.url}:${e.timestamp}`, e) + } + + await db.records.importJson({ + label: 'PAGEVIEW', + data: Array.from(unique.values()) + }) +} +``` + + + + +```python +def flush_pageviews(events: list[dict]) -> None: + if not events: + return + + # Deduplicate before writing + seen = {} + for e in events: + key = f"{e['sessionId']}:{e['url']}:{e['timestamp']}" + seen[key] = e + + db.records.import_json({"label": "PAGEVIEW", "data": list(seen.values())}) +``` + + + + +```bash +# Batch import via import/json endpoint +curl -s -X POST "$BASE/records/import/json" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "label": "PAGEVIEW", + "data": [ + {"sessionId":"sess-1","url":"/pricing","duration":45,"timestamp":"2025-03-01T10:00:00Z"}, + {"sessionId":"sess-2","url":"/docs","duration":120,"timestamp":"2025-03-01T10:01:00Z"} + ] + }' +``` + + + + +--- + +## Step 4: Out-of-order event handling + +Some pipelines deliver events out of chronological order. Guard against overwriting a later state with an earlier one. + + + + +```typescript +async function applyOrderStatus(orderId: string, newStatus: string, eventAt: string): Promise { + const result = await db.records.find({ + labels: ['ORDER'], + where: { orderId } + }) + + if (result.data.length === 0) { + await db.records.create({ label: 'ORDER', data: { orderId, status: newStatus, lastEventAt: eventAt } }) + return + } + + const existing = result.data[0] + // Reject stale events — only advance state if this event is newer + if (existing.lastEventAt && eventAt <= (existing.lastEventAt as string)) { + console.log(`Skipping stale event for ${orderId}: ${eventAt} <= ${existing.lastEventAt}`) + return + } + + await db.records.update(existing.__id, { status: newStatus, lastEventAt: eventAt }) +} +``` + + + + +```python +def apply_order_status(order_id: str, new_status: str, event_at: str) -> None: + result = db.records.find({"labels": ["ORDER"], "where": {"orderId": order_id}}) + + if not result.data: + db.records.create("ORDER", {"orderId": order_id, "status": new_status, "lastEventAt": event_at}) + return + + existing = result.data[0] + last_event_at = existing.data.get("lastEventAt") + if last_event_at and event_at <= last_event_at: + print(f"Skipping stale event for {order_id}: {event_at} <= {last_event_at}") + return + + db.records.update(existing.id, {"status": new_status, "lastEventAt": event_at}) +``` + + + + +--- + +## Production checklist + +| Concern | Practice | +|---|---| +| Duplicate events | Find by natural key before creating | +| Duplicate edges | Check relationship exists before `attach()` | +| Out-of-order state | Compare `lastEventAt` timestamps before updating | +| Partial failures | Wrap multi-record writes in a transaction | +| Large queue spikes | Use `importJson` for batches; don't loop `create()` | + +--- + +## Next steps + +- [Audit Trails](./audit-trails.mdx) — append events to records without mutating them +- [Versioning Records](./versioning-records.mdx) — maintain a history of every state transition +- [Supply Chain Traceability](./supply-chain-traceability.mdx) — event-sourced causal chains across graph hops diff --git a/docs/docs/tutorials/explainable-results.mdx b/docs/docs/tutorials/explainable-results.mdx new file mode 100644 index 00000000..a69d4f3a --- /dev/null +++ b/docs/docs/tutorials/explainable-results.mdx @@ -0,0 +1,369 @@ +--- +sidebar_position: 33 +title: "Explainable Results" +description: Pair raw search results with related evidence, aggregate summaries, and traversal paths so users and agents can understand why a result was returned. +tags: [Search, AI, SearchQuery, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Explainable Results + +A result without context forces users to trust it blindly. An explainable result shows *why* it was returned: which fields matched, which related records reinforce it, and what aggregate signal supports it. + +This tutorial shows three patterns for building explainable results on top of RushDB: + +1. **Field-level match explanation** — which fields matched the query +2. **Evidence assembly** — related records that corroborate the result +3. **Score + signal summary** — pairing semantic `__score` with structured signals + +--- + +## Pattern 1: Field-level match explanation + +After structured retrieval, compute which fields matched the query and return them alongside the record. + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +interface MatchedField { + field: string + value: unknown + queryValue: unknown +} + +function explainMatch(record: Record, query: Record): MatchedField[] { + const matches: MatchedField[] = [] + for (const [field, queryValue] of Object.entries(query)) { + if (field.startsWith('$') || typeof queryValue === 'object') continue + if (record[field] !== undefined) { + matches.push({ field, value: record[field], queryValue }) + } + } + return matches +} + +async function searchWithExplanation(where: Record) { + const result = await db.records.find({ labels: ['ARTICLE'], where }) + + return result.data.map(record => ({ + id: record.__id, + title: record.title, + matchedFields: explainMatch(record as Record, where) + })) +} + +// Usage +const results = await searchWithExplanation({ + status: 'published', + category: 'engineering' +}) +// Each result includes: +// { id, title, matchedFields: [{ field: 'status', value: 'published', queryValue: 'published' }, ...] } +``` + + + + +```python +from rushdb import RushDB +import os + +db = RushDB(os.environ["RUSHDB_API_KEY"], base_url="https://api.rushdb.com/api/v1") + +def explain_match(record_data: dict, query: dict) -> list[dict]: + matches = [] + for field, query_value in query.items(): + if field.startswith("$") or isinstance(query_value, dict): + continue + if field in record_data: + matches.append({"field": field, "value": record_data[field], "queryValue": query_value}) + return matches + +def search_with_explanation(where: dict) -> list[dict]: + result = db.records.find({"labels": ["ARTICLE"], "where": where}) + return [ + { + "id": a.id, + "title": a.data.get("title"), + "matchedFields": explain_match(a.data, where) + } + for a in result.data + ] +``` + + + + +--- + +## Pattern 2: Evidence assembly via graph traversal + +After retrieving a primary result, traverse related records to assemble corroborating evidence. + + + + +```typescript +interface ExplainedResult { + id: string + title: string + score?: number + evidence: { + author: { name: string; affiliation?: string } | null + relatedTopics: string[] + citedBy: number + } +} + +async function assembleEvidence(articleId: string): Promise { + const [authorResult, citedByResult, topicResult] = await Promise.all([ + db.records.find({ + labels: ['AUTHOR'], + where: { + ARTICLE: { + $relation: { type: 'AUTHORED_BY', direction: 'in' }, + __id: articleId + } + } + }), + db.records.find({ + labels: ['ARTICLE'], + aggregate: { count: { fn: 'count', alias: '$record' } }, + where: { + ARTICLE: { + $relation: { type: 'CITES', direction: 'out' }, + __id: articleId + } + } + }), + db.records.find({ + labels: ['TOPIC'], + where: { + ARTICLE: { + $relation: { type: 'COVERS', direction: 'in' }, + __id: articleId + } + } + }) + ]) + + return { + author: authorResult.data[0] + ? { name: authorResult.data[0].name as string, affiliation: authorResult.data[0].affiliation as string } + : null, + relatedTopics: topicResult.data.map(t => t.name as string), + citedBy: (citedByResult.data[0]?.count as number) ?? 0 + } +} + +async function explainedSemanticSearch(userQuery: string): Promise { + const results = await db.ai.search({ + query: userQuery, + propertyName: 'content', + labels: ['ARTICLE'], + where: { status: 'published' }, + limit: 5 + }) + + return Promise.all( + results.data.map(async article => ({ + id: article.__id, + title: article.title as string, + score: article.__score as number, + evidence: await assembleEvidence(article.__id) + })) + ) +} +``` + + + + +```python +from concurrent.futures import ThreadPoolExecutor + +def assemble_evidence(article_id: str) -> dict: + def get_author(): + r = db.records.find({ + "labels": ["AUTHOR"], + "where": {"ARTICLE": {"$relation": {"type": "AUTHORED_BY", "direction": "in"}, "__id": article_id}} + }) + return {"name": r.data[0].data.get("name")} if r.data else None + + def get_cited_by(): + r = db.records.find({ + "labels": ["ARTICLE"], + "aggregate": {"count": {"fn": "count", "alias": "$record"}}, + "where": {"ARTICLE": {"$relation": {"type": "CITES", "direction": "out"}, "__id": article_id}} + }) + return r.data[0].data.get("count", 0) if r.data else 0 + + def get_topics(): + r = db.records.find({ + "labels": ["TOPIC"], + "where": {"ARTICLE": {"$relation": {"type": "COVERS", "direction": "in"}, "__id": article_id}} + }) + return [t.data.get("name") for t in r.data] + + with ThreadPoolExecutor(max_workers=3) as pool: + author_f = pool.submit(get_author) + cited_f = pool.submit(get_cited_by) + topics_f = pool.submit(get_topics) + + return { + "author": author_f.result(), + "citedBy": cited_f.result(), + "relatedTopics": topics_f.result() + } + +def explained_semantic_search(user_query: str) -> list[dict]: + results = db.ai.search({ + "query": user_query, + "propertyName": "content", + "labels": ["ARTICLE"], + "where": {"status": "published"}, + "limit": 5 + }) + + return [ + { + "id": a.id, + "title": a.data.get("title"), + "score": a.data.get("__score"), + "evidence": assemble_evidence(a.id) + } + for a in results.data + ] +``` + + + + +--- + +## Pattern 3: Score + structured signal summary + +For agent contexts, produce a brief natural-language explanation rather than raw data. + + + + +```typescript +function buildExplanationText(result: { + title: string + score: number + evidence: { author: { name: string } | null; relatedTopics: string[]; citedBy: number } +}): string { + const parts: string[] = [] + + parts.push(`"${result.title}" is a strong match (relevance: ${(result.score * 100).toFixed(0)}%).`) + + if (result.evidence.author) { + parts.push(`Written by ${result.evidence.author.name}.`) + } + + if (result.evidence.relatedTopics.length > 0) { + parts.push(`Topics: ${result.evidence.relatedTopics.slice(0, 3).join(', ')}.`) + } + + if (result.evidence.citedBy > 0) { + parts.push(`Cited by ${result.evidence.citedBy} other article${result.evidence.citedBy === 1 ? '' : 's'}.`) + } + + return parts.join(' ') +} + +// Example output: +// "Reducing Latency in Distributed Systems" is a strong match (relevance: 87%). +// Written by Jane Smith. Topics: distributed systems, latency, caching. +// Cited by 12 other articles. +``` + + + + +```python +def build_explanation_text(result: dict) -> str: + parts = [] + score_pct = int((result.get("score") or 0) * 100) + parts.append(f'"{result["title"]}" is a strong match (relevance: {score_pct}%).') + + evidence = result.get("evidence", {}) + if evidence.get("author"): + parts.append(f'Written by {evidence["author"]["name"]}.') + + topics = evidence.get("relatedTopics", []) + if topics: + parts.append(f'Topics: {", ".join(topics[:3])}.') + + cited_by = evidence.get("citedBy", 0) + if cited_by > 0: + label = "article" if cited_by == 1 else "articles" + parts.append(f'Cited by {cited_by} other {label}.') + + return " ".join(parts) +``` + + + + +--- + +## Pattern 4: REST-only evidence pipeline + +If you are building an agent or backend service without an SDK, assemble evidence with sequential REST calls. + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="$RUSHDB_API_KEY" +H='Content-Type: application/json' + +# 1. Semantic search for the primary result +RESULTS=$(curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"query":"reducing latency","propertyName":"content","labels":["ARTICLE"],"limit":1}') + +ARTICLE_ID=$(echo "$RESULTS" | jq -r '.data[0].__id') +SCORE=$(echo "$RESULTS" | jq -r '.data[0].__score') +TITLE=$(echo "$RESULTS" | jq -r '.data[0].title') + +# 2. Fetch author evidence +AUTHOR=$(curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"labels\":[\"AUTHOR\"],\"where\":{\"ARTICLE\":{\"__id\":\"$ARTICLE_ID\",\"\$relation\":{\"type\":\"AUTHORED_BY\",\"direction\":\"in\"}}}}" \ + | jq -r '.data[0].name // "unknown"') + +echo "Result: $TITLE (score: $SCORE) — Author: $AUTHOR" +``` + +--- + +## When to use explainability + +| Context | Recommended patterns | +|---|---| +| User-facing search results | Field match highlights + related topic tags | +| AI agent tool responses | Score + structured summary text | +| Compliance / audit surfaces | Full evidence assembly with provenance links | +| Debug / development | Field-level match + raw score logging | + +--- + +## Production caveat + +Evidence assembly makes one additional query per result per evidence type. For 10 results and 3 evidence types, that is 30 extra queries. Cache evidence for repeated result IDs within the same session, or pre-join commonly needed evidence fields during ingestion. + +--- + +## Next steps + +- [Search UX Patterns](./search-ux-patterns.mdx) — structured and semantic search for the end user +- [Hybrid Retrieval](./hybrid-retrieval.mdx) — `where` filter + semantic scoring in one call +- [Research Knowledge Graph](./research-knowledge-graph.mdx) — citation and co-authorship graphs to power evidence traversal diff --git a/docs/docs/tutorials/graph-backed-api.mdx b/docs/docs/tutorials/graph-backed-api.mdx new file mode 100644 index 00000000..b45e86ac --- /dev/null +++ b/docs/docs/tutorials/graph-backed-api.mdx @@ -0,0 +1,545 @@ +--- +sidebar_position: 18 +title: "Building a Graph-Backed API Layer" +description: Expose RushDB through an application API with query translation, safe filtering, and response shaping patterns for production use. +tags: [Integration, API, TypeScript, Python, Architecture] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Building a Graph-Backed API Layer + +RushDB gives your application a graph database with a REST and SDK interface. Most production apps do not expose RushDB directly to the client — they expose a purpose-built API that translates user intent into RushDB queries, enforces access rules, and shapes the response. + +This tutorial shows the patterns that work. + +--- + +## Why a translation layer? + +- **Access control**: the API enforces which records a user can query, narrowing `where` clauses with tenant or permission filters before calling RushDB +- **Schema stability**: client contracts are stable even if your graph schema evolves +- **Response shaping**: the API assembles multi-query responses into clean payloads before returning them +- **Query safety**: user input is mapped to a validated query structure — not passed through to RushDB directly + +--- + +## Pattern 1: Accept user params, produce a validated SearchQuery + +Never pass user-supplied JSON directly to `records.find`. Always construct the SearchQuery from validated inputs. + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' +import type { SearchQuery } from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +type ArticleSearchParams = { + q?: string + category?: string + authorId?: string + since?: string + page?: number + pageSize?: number +} + +function buildArticleQuery(params: ArticleSearchParams, tenantId: string): SearchQuery { + const where: Record = { + // Always scope to tenant — never trust user to supply this + tenantId + } + + if (params.category) { + where.category = params.category + } + + if (params.authorId) { + where.authorId = params.authorId + } + + if (params.since) { + // Validate date format before using it + const date = new Date(params.since) + if (isNaN(date.getTime())) throw new Error('Invalid since date') + where.publishedAt = { $gte: params.since } + } + + if (params.q) { + // Text search via $contains — not arbitrary Cypher + where.title = { $contains: params.q } + } + + return { + labels: ['ARTICLE'], + where, + orderBy: { publishedAt: 'desc' }, + skip: ((params.page ?? 0) * (params.pageSize ?? 20)), + limit: Math.min(params.pageSize ?? 20, 100) // cap at 100 + } +} + +// Usage in a route handler (e.g. Hono, Express, Next.js) +export async function getArticles(req: Request): Promise { + const tenantId = getTenantId(req) // from session/JWT + const url = new URL(req.url) + + const params: ArticleSearchParams = { + q: url.searchParams.get('q') ?? undefined, + category: url.searchParams.get('category') ?? undefined, + authorId: url.searchParams.get('authorId') ?? undefined, + since: url.searchParams.get('since') ?? undefined, + page: Number(url.searchParams.get('page') ?? 0), + pageSize: Number(url.searchParams.get('pageSize') ?? 20) + } + + const query = buildArticleQuery(params, tenantId) + const result = await db.records.find(query) + + return Response.json({ + articles: result.data.map(r => ({ + id: r.__id, + title: r.title, + category: r.category, + publishedAt: r.publishedAt + })), + total: result.total, + page: params.page, + pageSize: params.pageSize + }) +} +``` + + + + +```python +from rushdb import RushDB +from typing import Optional +import os + +db = RushDB(os.environ["RUSHDB_API_KEY"], base_url="https://api.rushdb.com/api/v1") + + +def build_article_query( + tenant_id: str, + q: Optional[str] = None, + category: Optional[str] = None, + author_id: Optional[str] = None, + since: Optional[str] = None, + page: int = 0, + page_size: int = 20, +) -> dict: + where: dict = {"tenantId": tenant_id} + + if category: + where["category"] = category + if author_id: + where["authorId"] = author_id + if since: + where["publishedAt"] = {"$gte": since} + if q: + where["title"] = {"$contains": q} + + return { + "labels": ["ARTICLE"], + "where": where, + "orderBy": {"publishedAt": "desc"}, + "skip": page * min(page_size, 100), + "limit": min(page_size, 100), + } + + +# Usage in a FastAPI handler +# from fastapi import FastAPI, Request +# app = FastAPI() +# +# @app.get("/articles") +# def get_articles(q: str = None, category: str = None, page: int = 0): +# tenant_id = get_tenant_id(request) +# query = build_article_query(tenant_id, q=q, category=category, page=page) +# result = db.records.find(query) +# return {"articles": result.data, "total": result.total} +``` + + + + +```bash +# The shell equivalent is building the query JSON and posting it +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' +TENANT_ID="tenant-42" +CATEGORY="infrastructure" + +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{ + \"labels\": [\"ARTICLE\"], + \"where\": { + \"tenantId\": \"$TENANT_ID\", + \"category\": \"$CATEGORY\" + }, + \"orderBy\": {\"publishedAt\": \"desc\"}, + \"limit\": 20 + }" +``` + + + + +--- + +## Pattern 2: Multi-query response assembly + +Some API responses require data from more than one query. Run them concurrently and assemble the response. + + + + +```typescript +// GET /projects/:id — returns project + recent tasks + active members +export async function getProjectDetail(projectId: string, tenantId: string) { + const [projectResult, tasksResult, membersResult] = await Promise.all([ + db.records.find({ + labels: ['PROJECT'], + where: { __id: projectId, tenantId } + }), + db.records.find({ + labels: ['TASK'], + where: { + PROJECT: { + $relation: { type: 'CONTAINS', direction: 'in' }, + __id: projectId + }, + status: { $in: ['open', 'in_progress'] } + }, + orderBy: { dueDate: 'asc' }, + limit: 10 + }), + db.records.find({ + labels: ['MEMBER'], + where: { + PROJECT: { + $relation: { type: 'ASSIGNED_TO', direction: 'out' }, + __id: projectId + }, + isActive: true + } + }) + ]) + + const project = projectResult.data[0] + if (!project) return null + + return { + project: { + id: project.__id, + name: project.name, + status: project.status + }, + openTasks: tasksResult.data.map(t => ({ + id: t.__id, + title: t.title, + dueDate: t.dueDate, + status: t.status + })), + members: membersResult.data.map(m => ({ + id: m.__id, + name: m.name, + role: m.role + })) + } +} +``` + + + + +```python +from concurrent.futures import ThreadPoolExecutor, as_completed + + +def get_project_detail(project_id: str, tenant_id: str): + def fetch_project(): + return db.records.find({ + "labels": ["PROJECT"], + "where": {"__id": project_id, "tenantId": tenant_id} + }) + + def fetch_tasks(): + return db.records.find({ + "labels": ["TASK"], + "where": { + "PROJECT": { + "$relation": {"type": "CONTAINS", "direction": "in"}, + "__id": project_id + }, + "status": {"$in": ["open", "in_progress"]} + }, + "orderBy": {"dueDate": "asc"}, + "limit": 10 + }) + + def fetch_members(): + return db.records.find({ + "labels": ["MEMBER"], + "where": { + "PROJECT": { + "$relation": {"type": "ASSIGNED_TO", "direction": "out"}, + "__id": project_id + }, + "isActive": True + } + }) + + with ThreadPoolExecutor(max_workers=3) as executor: + fut_project = executor.submit(fetch_project) + fut_tasks = executor.submit(fetch_tasks) + fut_members = executor.submit(fetch_members) + + project_result = fut_project.result() + tasks_result = fut_tasks.result() + members_result = fut_members.result() + + if not project_result.data: + return None + + return { + "project": project_result.data[0].data, + "openTasks": [t.data for t in tasks_result.data], + "members": [m.data for m in members_result.data] + } +``` + + + + +```bash +PROJECT_ID="proj-123" +TENANT_ID="tenant-42" + +# Run concurrently with background processes +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"labels\":[\"PROJECT\"],\"where\":{\"__id\":\"$PROJECT_ID\",\"tenantId\":\"$TENANT_ID\"}}" & + +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"labels\":[\"TASK\"],\"where\":{\"PROJECT\":{\"__id\":\"$PROJECT_ID\"},\"status\":{\"\$in\":[\"open\",\"in_progress\"]}},\"limit\":10,\"orderBy\":{\"dueDate\":\"asc\"}}" & + +wait +``` + + + + +--- + +## Pattern 3: Aggregate endpoints + +Expose KPI endpoints that return counts, sums, and distributions — not raw records. + + + + +```typescript +// GET /analytics/tasks-by-status?tenantId=...&projectId=... +export async function getTaskStatusBreakdown(tenantId: string, projectId?: string) { + const projectFilter = projectId + ? { PROJECT: { __id: projectId, $relation: { type: 'CONTAINS', direction: 'in' } } } + : {} + + const result = await db.records.find({ + labels: ['TASK'], + where: { + tenantId, + ...projectFilter + }, + aggregate: { + count: { fn: 'count', alias: '$record' }, + status: '$record.status' + }, + groupBy: ['status', 'count'], + orderBy: { count: 'desc' } + }) + + return { + breakdown: result.data, + total: result.total + } +} +``` + + + + +```python +def get_task_status_breakdown(tenant_id: str, project_id: str | None = None): + where: dict = {"tenantId": tenant_id} + if project_id: + where["PROJECT"] = { + "__id": project_id, + "$relation": {"type": "CONTAINS", "direction": "in"} + } + + result = db.records.find({ + "labels": ["TASK"], + "where": where, + "aggregate": { + "count": {"fn": "count", "alias": "$record"}, + "status": "$record.status" + }, + "groupBy": ["status", "count"], + "orderBy": {"count": "desc"} + }) + + return {"breakdown": result.data, "total": result.total} +``` + + + + +```bash +TENANT_ID="tenant-42" + +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{ + \"labels\": [\"TASK\"], + \"where\": {\"tenantId\": \"$TENANT_ID\"}, + \"aggregate\": { + \"count\": {\"fn\": \"count\", \"alias\": \"\$record\"}, + \"status\": \"\$record.status\" + }, + \"groupBy\": [\"status\", \"count\"], + \"orderBy\": {\"count\": \"desc\"} + }" +``` + + + + +--- + +## Pattern 4: Safe delete with preview + +Always offer a count-first preview before executing a delete, especially for bulk operations. + + + + +```typescript +type DeletePreview = { count: number; dryRun: true } + +async function previewDelete(tenantId: string, filter: Record): Promise { + const result = await db.records.find({ + labels: ['ARTICLE'], + where: { tenantId, ...filter }, + aggregate: { count: { fn: 'count', alias: '$record' } }, + groupBy: ['count'] + }) + return { count: result.data[0]?.count as number ?? 0, dryRun: true } +} + +async function executeDelete(tenantId: string, filter: Record) { + return db.records.deleteMany({ + labels: ['ARTICLE'], + where: { tenantId, ...filter } + }) +} + +// API handler +export async function deleteArticlesHandler(req: Request): Promise { + const { filter, confirm } = await req.json() + const tenantId = getTenantId(req) + + if (!confirm) { + const preview = await previewDelete(tenantId, filter) + return Response.json({ preview }) + } + + await executeDelete(tenantId, filter) + return Response.json({ deleted: true }) +} +``` + + + + +```python +def delete_articles(tenant_id: str, filter_: dict, confirm: bool = False): + where = {"tenantId": tenant_id, **filter_} + + if not confirm: + # Preview + preview = db.records.find({ + "labels": ["ARTICLE"], + "where": where, + "aggregate": {"count": {"fn": "count", "alias": "$record"}}, + "groupBy": ["count"] + }) + count = preview.data[0]["count"] if preview.data else 0 + return {"dryRun": True, "count": count} + + db.records.delete({"labels": ["ARTICLE"], "where": where}) + return {"deleted": True} +``` + + + + +```bash +TENANT_ID="tenant-42" + +# Preview: count matching records first +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{ + \"labels\": [\"ARTICLE\"], + \"where\": {\"tenantId\": \"$TENANT_ID\", \"status\": \"archived\"}, + \"aggregate\": {\"count\": {\"fn\": \"count\", \"alias\": \"\$record\"}}, + \"groupBy\": [\"count\"] + }" + +# Execute delete only after confirming the count +curl -s -X DELETE "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{ + \"labels\": [\"ARTICLE\"], + \"where\": {\"tenantId\": \"$TENANT_ID\", \"status\": \"archived\"} + }" +``` + + + + +--- + +## Response shaping conventions + +The patterns above follow these conventions: + +1. **Never return `__id` as the external ID field name** — map it to `id` in the response shape +2. **Never return internal RushDB metadata** (`__labels`, `__proptypes`) in public API responses +3. **Always cap `limit`** at a maximum page size (100 or less) even if the user passes a higher value +4. **Always inject tenant scope** from the session token, never from user-supplied parameters +5. **Aggregate responses** return structured objects, not raw RushDB result arrays + +--- + +## Production caveat + +SearchQuery `where` clauses support arbitrary nesting and traversal. In a public-facing API, always validate the filter shape that users supply — never spread user-provided objects directly into `where` without validation. An attacker who can inject arbitrary `$or` or traversal keys into your query can scan records they should not see. Build your `where` object programmatically from validated, typed parameters. + +--- + +## Next steps + +- [Hybrid Retrieval: Structured Filters Plus Semantic Search](./hybrid-retrieval.mdx) — combining filter and vector search in one handler +- [Semantic Search for Multi-Tenant Products](./semantic-search-multitenant.mdx) — tenant isolation at the storage layer +- [Query Optimization and KU Efficiency](./query-optimization.mdx) — measuring and reducing compute cost diff --git a/docs/docs/tutorials/graphrag.mdx b/docs/docs/tutorials/graphrag.mdx new file mode 100644 index 00000000..be4f3032 --- /dev/null +++ b/docs/docs/tutorials/graphrag.mdx @@ -0,0 +1,430 @@ +--- +sidebar_position: 36 +title: "GraphRAG — Graph-Enriched Retrieval Augmented Generation" +description: Retrieve chunks semantically, then traverse the knowledge graph to assemble author, topic, and source provenance as richer LLM context. +tags: [AI, RAG, GraphRAG, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# GraphRAG — Graph-Enriched Retrieval Augmented Generation + +Standard RAG retrieves the top-k most similar text chunks and pastes them into a prompt. GraphRAG does the same retrieval step, then traverses the knowledge graph to collect related entities — authors, topics, source documents, citations — and includes them as structured context. The LLM call is identical; the difference is what you put in the context window. + +``` +Flat RAG context GraphRAG context +───────────────────── ────────────────────────────────── +Chunk 1 text Chunk 1 text +Chunk 2 text └─ from: architecture.md +Chunk 3 text └─ author: Jane Smith (Platform) + └─ topics: distributed systems, caching + Chunk 2 text + └─ from: api-reference.md + └─ cited by: 3 other documents +``` + +--- + +## Graph shape + +```mermaid +graph LR + SOURCE[SOURCE
Document / URL] -->|HAS_CHUNK| CHUNK[CHUNK
text, chunk_index] + AUTHOR[AUTHOR] -->|AUTHORED| SOURCE + TOPIC[TOPIC] -->|COVERS| CHUNK + CHUNK -->|RELATED_TO| CHUNK2[CHUNK
related chunk] +``` + +| Label | What it represents | +|---|---| +| `SOURCE` | A document, web page, or data export (the original source) | +| `CHUNK` | A text fragment from a source, with overlap | +| `AUTHOR` | A person or team that authored the source | +| `TOPIC` | A concept tag associated with a chunk | + +--- + +## Step 1: Ingest sources and chunks + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' +import fs from 'fs' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +const CHUNK_SIZE = 500 +const CHUNK_OVERLAP = 80 + +function chunkText(text: string): string[] { + const chunks: string[] = [] + let start = 0 + while (start < text.length) { + chunks.push(text.slice(start, Math.min(start + CHUNK_SIZE, text.length)).trim()) + start += CHUNK_SIZE - CHUNK_OVERLAP + } + return chunks.filter(Boolean) +} + +interface SourceMeta { filename: string; author: string; topics: string[] } + +async function ingestSource(meta: SourceMeta, content: string) { + const tx = await db.tx.begin() + try { + // Create the SOURCE record + const source = await db.records.create({ + label: 'SOURCE', + data: { filename: meta.filename, ingestedAt: new Date().toISOString() } + }, tx) + + // Create AUTHOR if not already present (find-then-create) + const existingAuthor = await db.records.find({ labels: ['AUTHOR'], where: { name: meta.author } }) + const author = existingAuthor.data.length > 0 + ? existingAuthor.data[0] + : await db.records.create({ label: 'AUTHOR', data: { name: meta.author } }, tx) + + await db.records.attach({ source: author, target: source, options: { type: 'AUTHORED', direction: 'out' } }, tx) + + // Create chunks and link to source + const texts = chunkText(content) + const chunks = await db.records.importJson({ + label: 'CHUNK', + data: texts.map((text, i) => ({ text, chunkIndex: i, sourceFile: meta.filename })) + }) // Note: importJson doesn't support tx — do in a follow-up pass for link + + // Link chunks to source (outside transaction — importJson is atomic on its own) + await db.tx.commit(tx) + + // Attach chunks to source record after importJson committed + const chunkRecords = await db.records.find({ + labels: ['CHUNK'], + where: { sourceFile: meta.filename }, + orderBy: { chunkIndex: 'asc' } + }) + + for (const chunk of chunkRecords.data) { + await db.records.attach({ source, target: chunk, options: { type: 'HAS_CHUNK', direction: 'out' } }) + } + + // Attach topics + for (const topicName of meta.topics) { + const existingTopic = await db.records.find({ labels: ['TOPIC'], where: { name: topicName } }) + const topic = existingTopic.data.length > 0 + ? existingTopic.data[0] + : await db.records.create({ label: 'TOPIC', data: { name: topicName } }) + for (const chunk of chunkRecords.data) { + await db.records.attach({ source: topic, target: chunk, options: { type: 'COVERS', direction: 'out' } }) + } + } + + console.log(`Ingested ${texts.length} chunks from ${meta.filename}`) + } catch (err) { + await db.tx.rollback(tx) + throw err + } +} + +// Example usage +await ingestSource( + { filename: 'architecture.md', author: 'Jane Smith', topics: ['distributed systems', 'caching'] }, + fs.readFileSync('./docs/architecture.md', 'utf8') +) +``` + + + + +```python +from rushdb import RushDB +import os + +db = RushDB(os.environ['RUSHDB_API_KEY'], base_url='https://api.rushdb.com/api/v1') + +CHUNK_SIZE = 500 +CHUNK_OVERLAP = 80 + +def chunk_text(text: str) -> list[str]: + chunks, start = [], 0 + while start < len(text): + chunks.append(text[start:start + CHUNK_SIZE].strip()) + start += CHUNK_SIZE - CHUNK_OVERLAP + return [c for c in chunks if c] + +def ingest_source(filename: str, author: str, topics: list[str], content: str): + # Create SOURCE + source = db.records.create('SOURCE', { + 'filename': filename, + 'ingestedAt': '2025-01-01T00:00:00Z' + }) + + # Find or create AUTHOR + existing = db.records.find({'labels': ['AUTHOR'], 'where': {'name': author}}) + author_rec = existing.data[0] if existing.data else db.records.create('AUTHOR', {'name': author}) + db.records.attach(author_rec.id, source.id, {'type': 'AUTHORED', 'direction': 'out'}) + + # Create chunks + texts = chunk_text(content) + db.records.import_json({ + 'label': 'CHUNK', + 'data': [{'text': t, 'chunkIndex': i, 'sourceFile': filename} for i, t in enumerate(texts)] + }) + + # Link chunks to source + chunk_records = db.records.find({ + 'labels': ['CHUNK'], + 'where': {'sourceFile': filename}, + 'orderBy': {'chunkIndex': 'asc'} + }) + for chunk in chunk_records.data: + db.records.attach(source.id, chunk.id, {'type': 'HAS_CHUNK', 'direction': 'out'}) + + # Attach topics to all chunks + for topic_name in topics: + existing_topic = db.records.find({'labels': ['TOPIC'], 'where': {'name': topic_name}}) + topic = existing_topic.data[0] if existing_topic.data else db.records.create('TOPIC', {'name': topic_name}) + for chunk in chunk_records.data: + db.records.attach(topic.id, chunk.id, {'type': 'COVERS', 'direction': 'out'}) + + print(f'Ingested {len(texts)} chunks from {filename}') +``` + + + + +--- + +## Step 2: Create an embedding index on chunks + + + + +```typescript +await db.ai.indexes.create({ label: 'CHUNK', propertyName: 'text' }) + +// Poll until ready +let stats = await db.ai.indexes.stats('your-index-id') +while (stats.data.indexedRecords < stats.data.totalRecords) { + await new Promise(r => setTimeout(r, 3000)) + stats = await db.ai.indexes.stats('your-index-id') +} +console.log('Index ready') +``` + + + + +```python +import time + +index = db.ai.indexes.create({'label': 'CHUNK', 'propertyName': 'text'}) +index_id = index['id'] + +while True: + stats = db.ai.indexes.stats(index_id) + if stats['data']['indexedRecords'] >= stats['data']['totalRecords']: + break + time.sleep(3) +print('Index ready') +``` + + + + +--- + +## Step 3: GraphRAG retrieval — chunks + graph context + + + + +```typescript +interface ChunkWithContext { + text: string + source: string + author: string | null + topics: string[] + score: number +} + +async function graphRagRetrieve(userQuery: string, k = 5): Promise { + // 1. Semantic search for top-k chunks + const results = await db.ai.search({ + query: userQuery, + propertyName: 'text', + labels: ['CHUNK'], + limit: k + }) + + // 2. Enrich each chunk with graph context in parallel + return Promise.all( + results.data.map(async (chunk) => { + const [sourceResult, topicResult] = await Promise.all([ + db.records.find({ + labels: ['SOURCE'], + where: { + CHUNK: { + $relation: { type: 'HAS_CHUNK', direction: 'out' }, + __id: chunk.__id + } + } + }), + db.records.find({ + labels: ['TOPIC'], + where: { + CHUNK: { + $relation: { type: 'COVERS', direction: 'out' }, + __id: chunk.__id + } + } + }) + ]) + + const source = sourceResult.data[0] + const authorResult = source + ? await db.records.find({ + labels: ['AUTHOR'], + where: { + SOURCE: { + $relation: { type: 'AUTHORED', direction: 'out' }, + __id: source.__id + } + } + }) + : { data: [] } + + return { + text: chunk.text as string, + source: source?.filename as string ?? 'unknown', + author: authorResult.data[0]?.name as string ?? null, + topics: topicResult.data.map(t => t.name as string), + score: chunk.__score as number + } + }) + ) +} + +function buildGraphRagPrompt(userQuery: string, chunks: ChunkWithContext[]): string { + const contextBlocks = chunks.map((c, i) => [ + `[${i + 1}] (score: ${c.score.toFixed(2)}, source: ${c.source}, author: ${c.author ?? 'unknown'})`, + `Topics: ${c.topics.join(', ') || 'none'}`, + c.text + ].join('\n')).join('\n\n---\n\n') + + return [ + 'You are a helpful assistant. Answer using the provided context.', + 'Context:', + contextBlocks, + '', + `Question: ${userQuery}` + ].join('\n') +} + +// Full pipeline +const chunks = await graphRagRetrieve('How does the caching layer handle invalidation?') +const prompt = buildGraphRagPrompt('How does the caching layer handle invalidation?', chunks) + +// Pass prompt to your LLM of choice +console.log(prompt) +``` + + + + +```python +from concurrent.futures import ThreadPoolExecutor + +def graph_rag_retrieve(user_query: str, k: int = 5) -> list[dict]: + results = db.ai.search({ + 'query': user_query, + 'propertyName': 'text', + 'labels': ['CHUNK'], + 'limit': k + }) + + def enrich(chunk): + chunk_id = chunk.id + + source_result = db.records.find({ + 'labels': ['SOURCE'], + 'where': {'CHUNK': {'$relation': {'type': 'HAS_CHUNK', 'direction': 'out'}, '__id': chunk_id}} + }) + topic_result = db.records.find({ + 'labels': ['TOPIC'], + 'where': {'CHUNK': {'$relation': {'type': 'COVERS', 'direction': 'out'}, '__id': chunk_id}} + }) + + source = source_result.data[0] if source_result.data else None + author_name = None + if source: + author_result = db.records.find({ + 'labels': ['AUTHOR'], + 'where': {'SOURCE': {'$relation': {'type': 'AUTHORED', 'direction': 'out'}, '__id': source.id}} + }) + author_name = author_result.data[0].data.get('name') if author_result.data else None + + return { + 'text': chunk.data.get('text'), + 'source': source.data.get('filename') if source else 'unknown', + 'author': author_name, + 'topics': [t.data.get('name') for t in topic_result.data], + 'score': chunk.data.get('__score') + } + + with ThreadPoolExecutor(max_workers=5) as pool: + return list(pool.map(enrich, results.data)) + +def build_graph_rag_prompt(user_query: str, chunks: list[dict]) -> str: + blocks = [] + for i, c in enumerate(chunks, 1): + blocks.append( + f"[{i}] (score: {c['score']:.2f}, source: {c['source']}, author: {c['author'] or 'unknown'})\n" + f"Topics: {', '.join(c['topics']) or 'none'}\n" + f"{c['text']}" + ) + context = '\n\n---\n\n'.join(blocks) + return f"You are a helpful assistant. Answer using the provided context.\n\nContext:\n{context}\n\nQuestion: {user_query}" + +chunks = graph_rag_retrieve('How does the caching layer handle invalidation?') +prompt = build_graph_rag_prompt('How does the caching layer handle invalidation?', chunks) +print(prompt) +``` + + + + +--- + +## GraphRAG vs flat RAG — what changes in the prompt + +**Flat RAG prompt fragment:** +``` +[1] The cache layer uses a time-based TTL of 300 seconds. Stale entries + are invalidated on next read by comparing the stored timestamp... +``` + +**GraphRAG prompt fragment:** +``` +[1] (score: 0.91, source: architecture.md, author: Jane Smith) +Topics: distributed systems, caching +The cache layer uses a time-based TTL of 300 seconds. Stale entries +are invalidated on next read by comparing the stored timestamp... +``` + +The LLM now knows *where this knowledge came from*, *who wrote it*, and *what domain it belongs to*. This enables citation-aware answers and reduces hallucination on ambiguous questions. + +--- + +## Production caveat + +Each retrieved chunk triggers two additional queries for source and topic enrichment. For `k=10` that is 20 extra roundtrips. Run enrichment in parallel (as above) and cache per-chunk context if the same chunk appears across multiple queries in a session. + +--- + +## Next steps + +- [BYOV External Embeddings](./byov-external-embeddings.mdx) — supply your own vectors instead of relying on managed embeddings +- [Multi-Source RAG](./rag-multi-source.mdx) — combine PDFs, web pages, and database exports in one semantic search +- [RAG Evaluation](./rag-evaluation.mdx) — measure precision and recall before deploying GraphRAG to production diff --git a/docs/docs/tutorials/hybrid-retrieval.mdx b/docs/docs/tutorials/hybrid-retrieval.mdx new file mode 100644 index 00000000..a766f483 --- /dev/null +++ b/docs/docs/tutorials/hybrid-retrieval.mdx @@ -0,0 +1,555 @@ +--- +sidebar_position: 19 +title: "Hybrid Retrieval: Filters Plus Semantic Search" +description: Combine structured where-clause filtering with vector semantic search to narrow candidates by business constraints, then rank by relevance. +tags: [AI, Semantic Search, Retrieval, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Hybrid Retrieval: Filters Plus Semantic Search + +Pure semantic search ranks by similarity to a query embedding. Pure keyword or filter search matches exact values. Hybrid retrieval combines both: narrowing the candidate set by business constraints first, then ranking by semantic relevance within that set. + +RushDB supports hybrid retrieval natively — `db.ai.search` accepts a `where` clause that is applied before the similarity ranking step. + +--- + +## How it works + +`db.ai.search` runs structured filtering and semantic ranking in one call: + +1. The `where` clause restricts which records are eligible (tenant, date range, category, status, etc.) +2. The `query` string (or `queryVector`, see below) is compared against embeddings on the specified `propertyName` +3. Results are returned ordered by `__score` — a 0–1 cosine similarity value + +This means you get tenant isolation and time-scoping for free, without a separate pre-filter step. + +--- + +## Prerequisites + +Before calling `db.ai.search`, a managed embedding index must exist and be ready for the label and property you want to search. See [Semantic Search for Multi-Tenant Products](./semantic-search-multitenant.mdx) for setup instructions. + +--- + +## Step 1: Ingest records + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +await db.records.importJson({ + label: 'ARTICLE', + data: [ + { + tenantId: 'acme', + title: 'Deploying Postgres on Kubernetes', + body: 'A practical guide to running stateful workloads on K8s using StatefulSets and persistent volumes.', + category: 'infrastructure', + publishedAt: '2025-01-15', + status: 'published' + }, + { + tenantId: 'acme', + title: 'Backpressure Patterns in Event Queues', + body: 'How to handle consumer lag and apply backpressure in Kafka and RabbitMQ setups.', + category: 'messaging', + publishedAt: '2025-02-10', + status: 'published' + }, + { + tenantId: 'acme', + title: 'Zero-Downtime Schema Migrations', + body: 'Techniques for running database migrations without taking down production traffic.', + category: 'infrastructure', + publishedAt: '2025-03-01', + status: 'draft' + } + ] +}) +``` + + + + +```python +from rushdb import RushDB +import os + +db = RushDB(os.environ["RUSHDB_API_KEY"], base_url="https://api.rushdb.com/api/v1") + +db.records.import_json({ + "label": "ARTICLE", + "data": [ + { + "tenantId": "acme", + "title": "Deploying Postgres on Kubernetes", + "body": "A practical guide to running stateful workloads on K8s using StatefulSets and persistent volumes.", + "category": "infrastructure", + "publishedAt": "2025-01-15", + "status": "published" + }, + { + "tenantId": "acme", + "title": "Backpressure Patterns in Event Queues", + "body": "How to handle consumer lag and apply backpressure in Kafka and RabbitMQ setups.", + "category": "messaging", + "publishedAt": "2025-02-10", + "status": "published" + }, + { + "tenantId": "acme", + "title": "Zero-Downtime Schema Migrations", + "body": "Techniques for running database migrations without taking down production traffic.", + "category": "infrastructure", + "publishedAt": "2025-03-01", + "status": "draft" + } + ] +}) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +curl -s -X POST "$BASE/records/import/json" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "label": "ARTICLE", + "data": [ + { + "tenantId": "acme", + "title": "Deploying Postgres on Kubernetes", + "body": "A practical guide to running stateful workloads on K8s using StatefulSets and persistent volumes.", + "category": "infrastructure", + "publishedAt": "2025-01-15", + "status": "published" + } + ] + }' +``` + + + + +--- + +## Step 2: Semantic search with no filters (baseline) + +Start with pure semantic search to verify the index is working. + + + + +```typescript +const results = await db.ai.search({ + query: 'stateful storage in container orchestration', + propertyName: 'body', + labels: ['ARTICLE'] +}) + +for (const article of results.data) { + console.log(`${article.title} — score: ${article.__score.toFixed(3)}`) +} +``` + + + + +```python +results = db.ai.search({ + "query": "stateful storage in container orchestration", + "propertyName": "body", + "labels": ["ARTICLE"] +}) + +for article in results.data: + print(f"{article.data.get('title')} — score: {article.__score:.3f}") +``` + + + + +```bash +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "query": "stateful storage in container orchestration", + "propertyName": "body", + "labels": ["ARTICLE"] + }' +``` + + + + +--- + +## Step 3: Add structural filters (hybrid mode) + +Narrow to published articles within a given tenant. The `where` clause runs before the similarity step — only eligible records are ranked. + + + + +```typescript +const hybridResults = await db.ai.search({ + query: 'stateful storage in container orchestration', + propertyName: 'body', + labels: ['ARTICLE'], + where: { + tenantId: 'acme', + status: 'published' + } +}) + +for (const article of hybridResults.data) { + console.log(`[${article.category}] ${article.title} — score: ${article.__score.toFixed(3)}`) +} +``` + + + + +```python +hybrid_results = db.ai.search({ + "query": "stateful storage in container orchestration", + "propertyName": "body", + "labels": ["ARTICLE"], + "where": { + "tenantId": "acme", + "status": "published" + } +}) + +for article in hybrid_results.data: + print(f"[{article.data.get('category')}] {article.data.get('title')} — score: {article.__score:.3f}") +``` + + + + +```bash +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "query": "stateful storage in container orchestration", + "propertyName": "body", + "labels": ["ARTICLE"], + "where": { + "tenantId": "acme", + "status": "published" + } + }' +``` + + + + +The draft migration article is excluded even if it has a high semantic similarity score, because `status: 'published'` removes it from the candidate set before scoring. + +--- + +## Step 4: Add date range and category filters + +Compound filters are plain `where` clauses. Combine them freely — the semantics are AND by default. + + + + +```typescript +const filtered = await db.ai.search({ + query: 'handling operational failures', + propertyName: 'body', + labels: ['ARTICLE'], + where: { + tenantId: 'acme', + status: 'published', + category: 'infrastructure', + publishedAt: { $gte: '2025-01-01' } + }, + limit: 5 +}) +``` + + + + +```python +filtered = db.ai.search({ + "query": "handling operational failures", + "propertyName": "body", + "labels": ["ARTICLE"], + "where": { + "tenantId": "acme", + "status": "published", + "category": "infrastructure", + "publishedAt": {"$gte": "2025-01-01"} + }, + "limit": 5 +}) +``` + + + + +```bash +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "query": "handling operational failures", + "propertyName": "body", + "labels": ["ARTICLE"], + "where": { + "tenantId": "acme", + "status": "published", + "category": "infrastructure", + "publishedAt": {"$gte": "2025-01-01"} + }, + "limit": 5 + }' +``` + + + + +--- + +## Step 5: Paginate semantic results + +Use `skip` and `limit` for result pagination. Consistent ordering is guaranteed as long as the same `query` and `where` are used. + + + + +```typescript +async function searchArticles(query: string, tenantId: string, page = 0, pageSize = 10) { + const results = await db.ai.search({ + query, + propertyName: 'body', + labels: ['ARTICLE'], + where: { tenantId, status: 'published' }, + skip: page * pageSize, + limit: pageSize + }) + + return { + results: results.data.map(r => ({ + id: r.__id, + title: r.title, + score: r.__score + })), + page, + pageSize + } +} + +const page0 = await searchArticles('container storage performance', 'acme') +const page1 = await searchArticles('container storage performance', 'acme', 1) +``` + + + + +```python +def search_articles(query: str, tenant_id: str, page: int = 0, page_size: int = 10): + results = db.ai.search({ + "query": query, + "propertyName": "body", + "labels": ["ARTICLE"], + "where": {"tenantId": tenant_id, "status": "published"}, + "skip": page * page_size, + "limit": page_size + }) + return { + "results": [ + {"id": r.id, "title": r.data.get("title"), "score": r.__score} + for r in results.data + ], + "page": page, + "pageSize": page_size + } + +page0 = search_articles("container storage performance", "acme") +page1 = search_articles("container storage performance", "acme", page=1) +``` + + + + +```bash +# Page 0 +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"query":"container storage performance","propertyName":"body","labels":["ARTICLE"],"where":{"tenantId":"acme","status":"published"},"skip":0,"limit":10}' + +# Page 1 +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"query":"container storage performance","propertyName":"body","labels":["ARTICLE"],"where":{"tenantId":"acme","status":"published"},"skip":10,"limit":10}' +``` + + + + +--- + +## Step 6: Filtering by relationship (graph-aware hybrid retrieval) + +Combine semantic search with a graph traversal filter. Here, only articles tagged with a specific TAG record are eligible for ranking. + + + + +```typescript +// Only search articles tagged "kubernetes" +const taggedResults = await db.ai.search({ + query: 'storage class and persistent volumes', + propertyName: 'body', + labels: ['ARTICLE'], + where: { + tenantId: 'acme', + status: 'published', + TAG: { + $relation: { type: 'TAGGED_WITH', direction: 'out' }, + slug: 'kubernetes' + } + } +}) +``` + + + + +```python +tagged_results = db.ai.search({ + "query": "storage class and persistent volumes", + "propertyName": "body", + "labels": ["ARTICLE"], + "where": { + "tenantId": "acme", + "status": "published", + "TAG": { + "$relation": {"type": "TAGGED_WITH", "direction": "out"}, + "slug": "kubernetes" + } + } +}) +``` + + + + +```bash +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "query": "storage class and persistent volumes", + "propertyName": "body", + "labels": ["ARTICLE"], + "where": { + "tenantId": "acme", + "status": "published", + "TAG": { + "$relation": {"type": "TAGGED_WITH", "direction": "out"}, + "slug": "kubernetes" + } + } + }' +``` + + + + +--- + +## Step 7: External vectors (BYOV) + +If you generate embeddings outside RushDB (e.g., with your own model), pass `queryVector` instead of `query`. RushDB skips embedding generation and uses your vector directly. + + + + +```typescript +// queryVector must match the dimensions and similarity function of the index +const externalEmbedding: number[] = await myEmbeddingModel.embed('container storage performance') + +const externalResults = await db.ai.search({ + queryVector: externalEmbedding, + propertyName: 'body', + labels: ['ARTICLE'], + where: { tenantId: 'acme', status: 'published' } +}) +``` + + + + +```python +external_embedding = my_embedding_model.embed("container storage performance") + +external_results = db.ai.search({ + "queryVector": external_embedding, + "propertyName": "body", + "labels": ["ARTICLE"], + "where": {"tenantId": "acme", "status": "published"} +}) +``` + + + + +```bash +# Pass queryVector instead of query — vector must match index dimensions +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "queryVector": [0.12, -0.45, 0.03, ...], + "propertyName": "body", + "labels": ["ARTICLE"], + "where": {"tenantId": "acme", "status": "published"} + }' +``` + + + + +--- + +## When to use pure search vs. hybrid + +| Scenario | Approach | +|---|---| +| Open-ended exploration ("find anything about X") | Pure semantic search, no `where` | +| Tenant-scoped retrieval | Always add `tenantId` to `where` | +| Time-bounded retrieval (last 30 days) | Add date filter to `where` | +| Category/status filtering before ranking | Add structured fields to `where` | +| Graph-relationship scoping (tagged with, authored by) | Add relationship traversal to `where` | +| Custom embedding model | Use `queryVector` instead of `query` | + +--- + +## Performance characteristics + +- The `where` clause runs as a graph filter before the vector similarity step. A highly selective filter (eliminating most records) makes semantic search faster because fewer records are ranked. +- The `limit` parameter controls how many top-ranked records are returned. Use values between 5–50 for typical UX use cases. +- `skip` is available for pagination but deep pagination over large result sets (skip > 500) may perform worse than re-querying with an updated `where` filter. + +--- + +## Next steps + +- [Semantic Search for Multi-Tenant Products](./semantic-search-multitenant.mdx) — index setup and polling +- [Building a Graph-Backed API Layer](./graph-backed-api.mdx) — integrate hybrid retrieval into a production handler +- [Temporal Graphs](./temporal-graphs.mdx) — retrieve current-state records before semantic ranking diff --git a/docs/docs/tutorials/importing-data.md b/docs/docs/tutorials/importing-data.md index 8d4051bc..06830750 100644 --- a/docs/docs/tutorials/importing-data.md +++ b/docs/docs/tutorials/importing-data.md @@ -1,7 +1,8 @@ --- title: Importing data from external sources description: Learn how to import your data to RushDB -sidebar_position: 6 +sidebar_position: 3 +tags: [Data, Getting Started] --- # Importing data from external sources diff --git a/docs/docs/tutorials/importing-from-mongodb.md b/docs/docs/tutorials/importing-from-mongodb.md new file mode 100644 index 00000000..77e4270c --- /dev/null +++ b/docs/docs/tutorials/importing-from-mongodb.md @@ -0,0 +1,418 @@ +--- +title: Importing from MongoDB +description: A step-by-step guide to migrating MongoDB collections to RushDB — nested documents, embedded arrays, upsert, and change streams +sidebar_position: 4 +tags: [Data, MongoDB, Getting Started] +--- + +# Importing from MongoDB + +This tutorial walks through moving MongoDB data into RushDB. It covers: + +- **One-shot bulk import** — dump a collection and push it in one call +- **Embedded documents and arrays** — how `importJson` handles nesting automatically +- **Incremental sync with upsert** — run the same script on a schedule without creating duplicates +- **Cross-collection references** — link documents by ObjectId using `relationships.createMany` +- **Change streams** — react to live MongoDB writes and mirror them into RushDB + +--- + +## Prerequisites + +```bash +npm install @rushdb/javascript-sdk mongodb dotenv +``` + +```env +RUSHDB_API_KEY=your_rushdb_key +MONGO_URI=mongodb+srv://user:pass@cluster.mongodb.net/acme +``` + +--- + +## Why `importJson` instead of `createMany`? + +MongoDB documents are rarely flat. A typical `users` document might look like: + +```json +{ + "_id": "64f1a...", + "name": "Ada Lovelace", + "email": "ada@example.com", + "address": { "city": "London", "country": "UK" }, + "orders": [ + { "_id": "64f2b...", "total": 149.99, "status": "shipped" }, + { "_id": "64f2c...", "total": 29.99, "status": "delivered" } + ] +} +``` + +`records.createMany` only accepts flat rows. `records.importJson` handles nested objects and arrays by recursively creating child records and linking them in the graph. The key you use for nested arrays becomes the **label** of the child records. + +--- + +## 1. One-shot bulk import + +The simplest path: dump a collection, reshape each document, push everything to RushDB. + +```ts +import RushDB from '@rushdb/javascript-sdk' +import { MongoClient } from 'mongodb' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +async function bulkImport() { + const mongo = await MongoClient.connect(process.env.MONGO_URI!) + const mdb = mongo.db() + + // ── 1. Fetch ──────────────────────────────────────────────── + const users = await mdb.collection('users').find({}).toArray() + + // ── 2. Reshape ────────────────────────────────────────────── + // • Convert _id (ObjectId) to a plain string + // • Name child-array keys after the label you want in RushDB + const payload = users.map(u => ({ + mongoId: String(u._id), + name: u.name, + email: u.email, + city: u.address?.city, + country: u.address?.country, + // Each element of 'Order' array becomes an Order record linked to this User + Order: (u.orders ?? []).map((o: any) => ({ + mongoId: String(o._id), + total: o.total, + status: o.status + })) + })) + + // ── 3. Import ─────────────────────────────────────────────── + await db.records.importJson({ + label: 'User', + data: payload, + options: { suggestTypes: true, returnResult: false } + }) + + console.log(`Imported ${users.length} users with their orders`) + await mongo.close() +} + +bulkImport().catch(console.error) +``` + +After this runs, RushDB contains: +- One `User` record per MongoDB user document +- One `Order` record per embedded order, automatically linked to its parent user + +--- + +## 2. Incremental sync with upsert + +Add `mergeBy` and `mergeStrategy` to make subsequent runs idempotent. The script can run on a cron without creating duplicates. + +```ts +async function incrementalSync() { + const mongo = await MongoClient.connect(process.env.MONGO_URI!) + const mdb = mongo.db() + + // Only fetch documents updated in the last hour + const since = new Date(Date.now() - 60 * 60 * 1000) + const users = await mdb.collection('users') + .find({ updatedAt: { $gte: since } }) + .toArray() + + if (!users.length) { + console.log('No updates since', since.toISOString()) + await mongo.close() + return + } + + const payload = users.map(u => ({ + mongoId: String(u._id), + name: u.name, + email: u.email, + city: u.address?.city, + country: u.address?.country, + Order: (u.orders ?? []).map((o: any) => ({ + mongoId: String(o._id), + total: o.total, + status: o.status + })) + })) + + await db.records.importJson({ + label: 'User', + data: payload, + options: { + suggestTypes: true, + mergeBy: ['mongoId'], // match existing records by mongoId + mergeStrategy: 'append' // update changed fields, keep everything else + } + }) + + console.log(`Synced ${users.length} updated users`) + await mongo.close() +} +``` + +:::info mergeStrategy options +- **`append`** — adds/updates provided fields, keeps any others already in RushDB. Best for incremental enrichment. +- **`rewrite`** — replaces all own properties with the incoming set. Best when RushDB should be an exact mirror of the source. +::: + +--- + +## 3. Cross-collection references + +When orders live in a separate collection and reference users by `userId` (an ObjectId), use the "import then link" pattern. + +```ts +async function importWithReferences() { + const mongo = await MongoClient.connect(process.env.MONGO_URI!) + const mdb = mongo.db() + + const users = await mdb.collection('users').find({}).toArray() + const orders = await mdb.collection('orders').find({}).toArray() + + // 1) Import Users + await db.records.createMany({ + label: 'User', + data: users.map(u => ({ + mongoId: String(u._id), + name: u.name, + email: u.email + })), + options: { suggestTypes: true, mergeBy: ['mongoId'], mergeStrategy: 'append' } + }) + + // 2) Import Orders — preserve the reference as userMongoId for the join + await db.records.createMany({ + label: 'Order', + data: orders.map(o => ({ + mongoId: String(o._id), + userMongoId: String(o.userId), // foreign-key reference stored as plain string + total: o.total, + status: o.status, + createdAt: o.createdAt?.toISOString() + })), + options: { suggestTypes: true, mergeBy: ['mongoId'], mergeStrategy: 'append' } + }) + + // 3) Link: User -[:PLACED]-> Order by joining mongoId = userMongoId + await db.relationships.createMany({ + source: { label: 'User', key: 'mongoId' }, + target: { label: 'Order', key: 'userMongoId' }, + type: 'PLACED', + direction: 'out' + }) + + console.log('Import and link complete') + await mongo.close() +} +``` + +:::tip Why store the reference as a string? +RushDB joins on property value equality. MongoDB's `ObjectId` must be converted to `String()` before storing so the join `User.mongoId = Order.userMongoId` works correctly — both sides must be the same type. +::: + +--- + +## 4. Deeply nested collections + +If your documents have multi-level nesting (e.g. orders containing line items containing product info), nest the keys accordingly. `importJson` handles arbitrary depth. + +```ts +const payload = orders.map(o => ({ + mongoId: String(o._id), + total: o.total, + // 'LineItem' becomes the child label; each item gets its own record + LineItem: (o.items ?? []).map((item: any) => ({ + mongoId: String(item._id), + quantity: item.qty, + unitPrice: item.price, + // 'Product' becomes a grandchild record under LineItem + Product: item.product ? [{ + mongoId: String(item.product._id), + name: item.product.name, + sku: item.product.sku + }] : [] + })) +})) + +await db.records.importJson({ + label: 'Order', + data: payload, + options: { suggestTypes: true, mergeBy: ['mongoId'], mergeStrategy: 'append' } +}) +``` + +This produces the graph: `Order → LineItem → Product`. + +--- + +## 5. Change streams (real-time sync) + +MongoDB change streams let you mirror writes into RushDB as they happen, without polling. + +```ts +import RushDB from '@rushdb/javascript-sdk' +import { MongoClient, ChangeStreamDocument } from 'mongodb' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +async function watchCollection() { + const mongo = await MongoClient.connect(process.env.MONGO_URI!) + const collection = mongo.db().collection('users') + + const stream = collection.watch([], { fullDocument: 'updateLookup' }) + + stream.on('change', async (event: ChangeStreamDocument) => { + if (event.operationType === 'insert' || event.operationType === 'replace') { + const doc = (event as any).fullDocument + await db.records.upsert({ + label: 'User', + data: { + mongoId: String(doc._id), + name: doc.name, + email: doc.email, + city: doc.address?.city + }, + options: { suggestTypes: true, mergeBy: ['mongoId'], mergeStrategy: 'append' } + }) + } + + if (event.operationType === 'update') { + const doc = (event as any).fullDocument + if (!doc) return // fullDocument is null when not using updateLookup pipeline + await db.records.upsert({ + label: 'User', + data: { + mongoId: String(doc._id), + name: doc.name, + email: doc.email, + city: doc.address?.city + }, + options: { suggestTypes: true, mergeBy: ['mongoId'], mergeStrategy: 'append' } + }) + } + + if (event.operationType === 'delete') { + // Optional: remove the record from RushDB when deleted from MongoDB + const mongoId = String((event as any).documentKey._id) + await db.records.delete({ labels: ['User'], where: { mongoId } }) + } + }) + + stream.on('error', err => console.error('Change stream error:', err)) + console.log('Watching users collection for changes...') +} + +watchCollection().catch(console.error) +``` + +:::note Replica set required +Change streams require MongoDB to be running as a replica set (or MongoDB Atlas). They are not available on standalone `mongod` instances. +::: + +--- + +## 6. Batching large collections + +For collections with millions of documents, process in batches to avoid memory pressure and respect API rate limits. + +```ts +async function importLargeCollection(batchSize = 500) { + const mongo = await MongoClient.connect(process.env.MONGO_URI!) + const collection = mongo.db().collection('products') + + let skip = 0 + let imported = 0 + + while (true) { + const batch = await collection + .find({}) + .skip(skip) + .limit(batchSize) + .toArray() + + if (!batch.length) break + + const payload = batch.map(p => ({ + mongoId: String(p._id), + sku: p.sku, + name: p.name, + price: p.price, + category: p.category, + tags: p.tags // string[] — will be AI-indexed if an embedding index exists + })) + + await db.records.createMany({ + label: 'Product', + data: payload, + options: { suggestTypes: true, mergeBy: ['mongoId'], mergeStrategy: 'append' } + }) + + imported += batch.length + skip += batchSize + console.log(`Imported ${imported} products`) + } + + await mongo.close() +} +``` + +--- + +## 7. Python equivalent + +```python +from pymongo import MongoClient +from rushdb import RushDB + +db = RushDB("RUSHDB_API_KEY") +mongo = MongoClient("mongodb+srv://...") +mdb = mongo["acme"] + +# Fetch and reshape +users = list(mdb["users"].find({})) +payload = [ + { + "mongoId": str(u["_id"]), + "name": u.get("name"), + "email": u.get("email"), + "Order": [ + {"mongoId": str(o["_id"]), "total": o["total"], "status": o["status"]} + for o in u.get("orders", []) + ], + } + for u in users +] + +# Upsert into RushDB +db.records.import_json( + label="User", + data=payload, + options={"suggestTypes": True, "mergeBy": ["mongoId"], "mergeStrategy": "append"}, +) + +print(f"Imported {len(users)} users") +mongo.close() +``` + +--- + +## Troubleshooting + +| Symptom | Likely cause | Fix | +|---|---|---| +| `createMany` throws "not a flat object" | Document has embedded objects or arrays | Use `records.importJson` instead | +| Child records created with wrong label | Nested array key name not matching desired label | Rename the key in the reshape step (e.g. rename `orders` → `Order`) | +| Duplicate records after re-import | `mergeBy` not set | Add `mergeBy: ['mongoId']` to options | +| Join not linking records | ObjectId not converted to string | Ensure both sides use `String(objectId)` | +| Change stream `fullDocument` is null | `updateLookup` not enabled or update is partial | Use `{ fullDocument: 'updateLookup' }` in the watch options | +| Import dies on large collections | Memory exhausted on `.toArray()` | Use cursor pagination with `.skip()` / `.limit()` batching | + +## See also + +- [Importing data from external sources](./importing-data) — HubSpot, Postgres, Firebase, Airtable, Notion +- TypeScript SDK: [Import Data](../typescript-sdk/records/import-data) · [Relationships](../typescript-sdk/relationships) +- Python SDK: [Import Data](../python-sdk/records/import-data) · [Relationships](../python-sdk/relationships) diff --git a/docs/docs/tutorials/index.mdx b/docs/docs/tutorials/index.mdx new file mode 100644 index 00000000..877b360d --- /dev/null +++ b/docs/docs/tutorials/index.mdx @@ -0,0 +1,15 @@ +--- +title: Tutorials +description: Step-by-step guides to build real things with RushDB — from deployment to AI-powered search and RAG pipelines. +sidebar_position: 0 +hide_title: true +hide_table_of_contents: true +hide_breadcrumbs: true +force_container: true +pagination_next: null +pagination_prev: null +--- + +import TutorialsIndex from '@site/src/components/TutorialsIndex'; + + diff --git a/docs/docs/tutorials/local-setup.md b/docs/docs/tutorials/local-setup.md index e4589132..325095a9 100644 --- a/docs/docs/tutorials/local-setup.md +++ b/docs/docs/tutorials/local-setup.md @@ -1,5 +1,8 @@ --- -sidebar_position: 4 +title: Local Setup +description: Spin up a local RushDB instance with Docker Compose — no repository clone required. +sidebar_position: 2 +tags: [Getting Started, Deployment] --- # Local Setup @@ -86,8 +89,9 @@ services: - NEO4J_URL=bolt://neo4j - NEO4J_USERNAME=neo4j - NEO4J_PASSWORD=password + - SQL_DB_TYPE=sqlite # SQLite is the default; no extra service needed neo4j: - image: neo4j:5.25.1 + image: neo4j:2026.01.4 healthcheck: test: [ "CMD-SHELL", "wget --no-verbose --tries=1 --spider localhost:7474 || exit 1" ] interval: 5s @@ -99,7 +103,7 @@ services: environment: - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes - NEO4J_AUTH=neo4j/password - - NEO4J_PLUGINS=["apoc", "graph-data-science"] + - NEO4J_PLUGINS=["apoc"] volumes: - neo4j-plugins:/var/lib/neo4j/plugins - neo4j-data:/data @@ -114,6 +118,10 @@ volumes: ```
+:::info +The default `SQL_DB_TYPE=sqlite` stores user/workspace/project data in a local `rushdb.db` file inside the container. For persistent storage across container restarts, either mount a volume to the path specified by `SQL_DB_PATH` or switch to an external PostgreSQL instance. +::: + 2. Start the environment: ```bash @@ -204,6 +212,19 @@ Before running the container, ensure you provide the following required environm - **Important**: Change this to a secure value in production. - **Default**: `password` +#### 5. `SQL_DB_TYPE` +- **Description**: The SQL database engine used for storing dashboard entities (users, workspaces, projects, tokens). +- **Values**: `sqlite` (default, zero-config) or `postgres` (external PostgreSQL) +- **Default**: `sqlite` + +#### 6. `SQL_DB_PATH` +- **Description**: Path to the SQLite database file. Only used when `SQL_DB_TYPE=sqlite`. +- **Default**: `./rushdb.db` + +#### 7. `SQL_DB_URL` +- **Description**: PostgreSQL connection string. Required when `SQL_DB_TYPE=postgres`. +- **Example**: `postgresql://user:password@localhost:5432/rushdb` + ## Working with the RushDB CLI The RushDB Docker image includes a command-line interface (CLI) that you can access from the running container. diff --git a/docs/docs/tutorials/mcp-operator-quickstart.mdx b/docs/docs/tutorials/mcp-operator-quickstart.mdx new file mode 100644 index 00000000..e7a3cdd2 --- /dev/null +++ b/docs/docs/tutorials/mcp-operator-quickstart.mdx @@ -0,0 +1,268 @@ +--- +sidebar_position: 13 +title: "MCP Quickstart for Real Operators: Claude, Cursor, and VS Code" +description: Go beyond installation — learn the operator workflow for grounded, hallucination-resistant agent queries with the RushDB MCP server. +tags: [MCP, Agents, Tooling, Claude, Cursor] +--- + +# MCP Quickstart for Real Operators: Claude, Cursor, and VS Code + +The [MCP quickstart](../mcp-server/quickstart.mdx) covers installation in three lines. This tutorial covers what happens after the config file is saved. + +Most MCP tutorials stop at "it works." Real operators need to know: + +- which tool to call first — and why +- how to avoid schema hallucinations when an LLM invents field names +- how to write queries that are grounded in actual data +- what failure modes look like and how to recover from them + +--- + +## Prerequisites + +- RushDB project with at least one label and a few records +- RushDB MCP server installed and running (see [quickstart](../mcp-server/quickstart.mdx)) +- Claude Desktop, Cursor, or VS Code with Copilot agent mode + +--- + +## The core operator loop + +Every productive session with the RushDB MCP server follows three mandatory steps before asking any data question. + +``` +Step 0: getOntologyMarkdown +Step 1: getSearchQuerySpec (if building a query) +Step 2: execute the query with findRecords +``` + +This loop exists because LLMs hallucinate schema. Without seeing the ontology first, a Claude session might assume a label called `User` instead of `CUSTOMER`, or a field called `createdAt` instead of `created_at`. The ontology call costs one round trip and prevents every class of hallucination downstream. + +--- + +## Step 0: Start every session with ontology + +Ask Claude, Cursor, or VS Code Copilot: + +> "Call `getOntologyMarkdown` and summarize the labels, properties, and relationships." + +What you get back includes: + +- Exact label names (e.g. `CUSTOMER`, `ORDER`, `PRODUCT`) — case-sensitive +- Property names with their types and value ranges +- Cross-label relationships with direction (e.g. `CUSTOMER --PLACED--> ORDER`) + +**Never skip this.** It is the ground truth for everything else in the session. + +You can scope it to specific labels when your graph is large: + +> "Call `getOntologyMarkdown` with labels `['ORDER', 'CUSTOMER']`." + +This maps to the tool call: + +```json +{ + "tool": "getOntologyMarkdown", + "input": { + "labels": ["ORDER", "CUSTOMER"] + } +} +``` + +--- + +## Step 1: Get the query spec before writing queries + +Before you or the LLM starts writing a `findRecords` query, fetch the SearchQuery spec: + +> "Call `getSearchQuerySpec`." + +This returns the full SearchQuery schema the query must conform to, including `where`, `aggregate`, `groupBy`, `orderBy`, `limit`, `skip`, and all filter operators (`$in`, `$gte`, `$contains`, `$exists`, `$relation`, `$alias`, etc.). + +For complex queries, have the LLM use both the ontology and the spec together: + +> "Using the ontology you fetched and the SearchQuery spec, write a query that finds all shipped orders for customers in Germany." + +The LLM will construct something like: + +```json +{ + "labels": ["ORDER"], + "where": { + "status": "shipped", + "CUSTOMER": { + "country": "DE" + } + }, + "orderBy": { "placedAt": "desc" }, + "limit": 25 +} +``` + +Then execute it: + +> "Run `findRecords` with the query you just wrote." + +--- + +## Step 2: Execute with `findRecords` + +`findRecords` accepts the full SearchQuery and returns matching records. Example prompt: + +> "Find the 10 most recently placed orders with status 'shipped' where the customer email contains 'example.com'. Include the order ID, placedAt date, and total." + +The MCP tool call: + +```json +{ + "tool": "findRecords", + "input": { + "labels": ["ORDER"], + "where": { + "status": "shipped", + "CUSTOMER": { + "email": { "$contains": "example.com" } + } + }, + "orderBy": { "placedAt": "desc" }, + "limit": 10, + "aggregate": { + "orderId": "$record.__id", + "placedAt": "$record.placedAt", + "total": "$record.total" + } + } +} +``` + +--- + +## Practical operator prompts + +### Exploring an unfamiliar project for the first time + +``` +1. Call getOntologyMarkdown. +2. List all label names, their record counts, and the relationships between them. +3. Which label has the most records? +4. What property holds the primary identifier for each label? +``` + +### Generating a targeted search query + +``` +1. Call getOntologyMarkdown. +2. Call getSearchQuerySpec. +3. Write a findRecords query that returns all TASK records assigned to + engineers in the DEPARTMENT labeled "Platform", ordered by dueDate ascending. +4. Run the query. +``` + +### Semantic search via MCP + +For projects with embedding indexes: + +``` +1. Call getOntologyMarkdown. +2. Call findEmbeddingIndexes to confirm which labels have AI indexes. +3. Call semanticSearch with query "cloud cost optimization", + propertyName "description", labels ["TASK"], limit 10. +``` + +The underlying tool call for step 3: + +```json +{ + "tool": "semanticSearch", + "input": { + "query": "cloud cost optimization", + "propertyName": "description", + "labels": ["TASK"], + "limit": 10 + } +} +``` + +Semantic search always operates within the current project. There is no cross-project search. + +### Attaching a relationship between two records + +``` +1. Call getOntologyMarkdown. +2. Find the record IDs for the two records you want to connect: + - getRecord for the source + - findRecords with a narrow where clause for the target +3. Call attachRelation with those IDs and a relationship type from the ontology. +``` + +### Deleting records safely + +Always preview before deleting: + +``` +1. Call findRecords with the same where clause you plan to use for deletion. +2. Confirm the record count and IDs look correct. +3. Then call bulkDeleteRecords with that where clause. +``` + +`bulkDeleteRecords` takes the same SearchQuery shape as `findRecords` — it deletes all records matching the filter. + +--- + +## Failure modes and recovery + +### "I don't see any records" + +Most common causes, in order: + +1. Wrong label name — label names are case-sensitive. Call `getOntologyMarkdown` to confirm exact casing. +2. Wrong field name — field names are case-sensitive. Check the ontology's properties list. +3. Wrong relational traversal key — in `where`, relationship traversal uses the related label name directly (e.g. `CUSTOMER`, not `customer` or `customerId`). + +Recovery prompt: + +> "The last findRecords returned 0 results. Call getOntologyMarkdown, show me the exact label and field names, then rewrite the query." + +### "The query returned unexpected results" + +Ask Claude to explain the query: + +> "Explain why this query would return records that match X but not Y, given this ontology." + +Then narrow the filter and retry. + +### "I got a query error" + +Pass the error back to the LLM: + +> "findRecords returned error: [paste error]. Given the SearchQuery spec, what is wrong with this query? Fix and retry." + +--- + +## Enable discovery-first behavior automatically + +To lock in Step 0 behavior for every session, activate the `rushdb.queryBuilder` prompt. + +In Claude Desktop: + +> "Call `getQueryBuilderPrompt` and apply the result as your system message for this conversation." + +In agents/API contexts: use `ListPrompts` to fetch `rushdb.queryBuilder`, then prepend it to the system message. + +This makes the LLM call ontology before every query automatically, without you needing to ask. + +--- + +## Production caveat + +The MCP server runs `findRecords` queries inside your project's data scope. It cannot access records from other projects. If an agent query returns fewer results than expected, the first thing to check is whether the API key is scoped to the correct project. + +For write operations (`createRecord`, `attachRelation`, `bulkDeleteRecords`), always confirm record IDs before executing — recovery from accidental bulk writes requires manual work. + +--- + +## Next steps + +- [MCP tools reference](../mcp-server/tools.mdx) — full tool list with input schemas +- [Agent-Safe Query Planning with Ontology First](./agent-safe-query-planning.mdx) — building repeatable agent patterns with failure recovery +- [Hybrid Retrieval: Structured Filters Plus Semantic Search](./hybrid-retrieval.mdx) — combining semantic search with graph traversal diff --git a/docs/docs/tutorials/memory-layer.mdx b/docs/docs/tutorials/memory-layer.mdx new file mode 100644 index 00000000..f7e5ddc2 --- /dev/null +++ b/docs/docs/tutorials/memory-layer.mdx @@ -0,0 +1,591 @@ +--- +sidebar_position: 11 +title: "RushDB as a Memory Layer: Facts, Episodes, and References" +description: Model persistent facts, episodic interactions, and linked reference material as a graph so agents and applications can retrieve and reason over connected context. +tags: [Agents, AI, Memory, Graph Modeling] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# RushDB as a Memory Layer: Facts, Episodes, and References + +Stateless LLM calls forget everything between turns. Retrieval from flat vector stores returns similar chunks without regard for how pieces of information connect. Neither approach gives an agent the ability to reason over **connected context** — who said what, when, in response to what, tied to which entities they already know. + +RushDB can act as a structured memory layer that stores and links three kinds of information: + +| Memory type | What it stores | Example | +|---|---|---| +| **Fact** | Durable properties of entities | Customer's plan tier, user's language preference, a product's spec | +| **Episode** | Time-stamped interactions between entities | A support conversation, a search session, a tool call result | +| **Reference** | External documents or data linked into the graph | A knowledge-base article, a policy document, an API response | + +Connecting all three lets an agent answer: *"What did we decide last time this user asked about pricing, and what docs were cited in that conversation?"* + +--- + +## Graph shape + +```mermaid +graph LR + U[ENTITY: user-42] -->|HAD_EPISODE| E1[EPISODE: chat-2025-03-15] + E1 -->|CITED| REF1[REFERENCE: pricing-v3.md] + E1 -->|LED_TO| FACT1[FACT: plan=enterprise] + U -->|HAS_FACT| FACT1 + U -->|HAS_FACT| FACT2[FACT: language=de] + REF1 -->|PART_OF| KB[ENTITY: knowledge-base] +``` + +The graph stores facts once and links episodes back to them. New facts can supersede old ones without deleting evidence. + +--- + +## Step 1: Store durable facts about an entity + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB('RUSHDB_API_KEY') + +// Create a user entity +const user = await db.records.create({ + label: 'ENTITY', + data: { entityId: 'user-42', type: 'user', name: 'Lena Müller' } +}) + +// Attach standalone facts that can be updated independently +const planFact = await db.records.create({ + label: 'FACT', + data: { + key: 'plan', + value: 'enterprise', + setAt: new Date().toISOString(), + source: 'billing-api' + } +}) + +const langFact = await db.records.create({ + label: 'FACT', + data: { + key: 'language', + value: 'de', + setAt: new Date().toISOString(), + source: 'profile' + } +}) + +await Promise.all([ + db.records.attach({ source: user, target: planFact, options: { type: 'HAS_FACT' } }), + db.records.attach({ source: user, target: langFact, options: { type: 'HAS_FACT' } }), +]) +``` + + + + +```python +from datetime import datetime, timezone +from rushdb import RushDB + +db = RushDB("RUSHDB_API_KEY", base_url="https://api.rushdb.com/api/v1") + +now = datetime.now(timezone.utc).isoformat() + +user = db.records.create("ENTITY", { + "entityId": "user-42", + "type": "user", + "name": "Lena Müller" +}) + +plan_fact = db.records.create("FACT", { + "key": "plan", + "value": "enterprise", + "setAt": now, + "source": "billing-api" +}) + +lang_fact = db.records.create("FACT", { + "key": "language", + "value": "de", + "setAt": now, + "source": "profile" +}) + +db.records.attach(user.id, plan_fact.id, {"type": "HAS_FACT"}) +db.records.attach(user.id, lang_fact.id, {"type": "HAS_FACT"}) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +USER_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"ENTITY","data":{"entityId":"user-42","type":"user","name":"Lena Müller"}}' \ + | jq -r '.data.__id') + +FACT_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"label\":\"FACT\",\"data\":{\"key\":\"plan\",\"value\":\"enterprise\",\"setAt\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\",\"source\":\"billing-api\"}}" \ + | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$USER_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$FACT_ID\"],\"options\":{\"type\":\"HAS_FACT\"}}" +``` + + + + +--- + +## Step 2: Record an episodic interaction + +Each conversation, tool call, or workflow run becomes an EPISODE node. This allows replaying history without re-embedding full transcripts. + + + + +```typescript +const episode = await db.records.create({ + label: 'EPISODE', + data: { + episodeId: 'chat-2025-03-15', + type: 'support-chat', + startedAt: '2025-03-15T09:00:00Z', + summary: 'User asked about upgrading from enterprise to unlimited plan. Agent cited pricing doc.', + outcome: 'resolved' + } +}) + +// Link the episode to the user +await db.records.attach({ + source: user, + target: episode, + options: { type: 'HAD_EPISODE' } +}) + +// Record that a fact was changed during this episode +await db.records.attach({ + source: episode, + target: planFact, + options: { type: 'LED_TO' } +}) +``` + + + + +```python +episode = db.records.create("EPISODE", { + "episodeId": "chat-2025-03-15", + "type": "support-chat", + "startedAt": "2025-03-15T09:00:00Z", + "summary": "User asked about upgrading. Agent cited pricing doc.", + "outcome": "resolved" +}) + +db.records.attach(user.id, episode.id, {"type": "HAD_EPISODE"}) +db.records.attach(episode.id, plan_fact.id, {"type": "LED_TO"}) +``` + + + + +```bash +EPISODE_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"EPISODE","data":{"episodeId":"chat-2025-03-15","type":"support-chat","startedAt":"2025-03-15T09:00:00Z","outcome":"resolved"}}' \ + | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$USER_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$EPISODE_ID\"],\"options\":{\"type\":\"HAD_EPISODE\"}}" +``` + + + + +--- + +## Step 3: Link reference material + +Reference nodes represent external documents, policy pages, or API responses that were cited during an episode. Link them to the episode so provenance is preserved. + + + + +```typescript +// Create or find the knowledge-base root +const kb = await db.records.create({ + label: 'ENTITY', + data: { entityId: 'kb-main', type: 'knowledge-base', name: 'Product Knowledge Base' } +}) + +// Create a reference document +const pricingRef = await db.records.create({ + label: 'REFERENCE', + data: { + referenceId: 'pricing-v3', + title: 'Pricing and Plans (v3)', + url: 'https://docs.example.com/pricing', + version: '3.0', + updatedAt: '2025-02-01T00:00:00Z' + } +}) + +await Promise.all([ + db.records.attach({ source: pricingRef, target: kb, options: { type: 'PART_OF' } }), + db.records.attach({ source: episode, target: pricingRef, options: { type: 'CITED' } }), +]) +``` + + + + +```python +kb = db.records.create("ENTITY", { + "entityId": "kb-main", + "type": "knowledge-base", + "name": "Product Knowledge Base" +}) + +pricing_ref = db.records.create("REFERENCE", { + "referenceId": "pricing-v3", + "title": "Pricing and Plans (v3)", + "url": "https://docs.example.com/pricing", + "version": "3.0", + "updatedAt": "2025-02-01T00:00:00Z" +}) + +db.records.attach(pricing_ref.id, kb.id, {"type": "PART_OF"}) +db.records.attach(episode.id, pricing_ref.id, {"type": "CITED"}) +``` + + + + +```bash +REF_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"REFERENCE","data":{"referenceId":"pricing-v3","title":"Pricing and Plans (v3)","version":"3.0"}}' \ + | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$EPISODE_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$REF_ID\"],\"options\":{\"type\":\"CITED\"}}" +``` + + + + +--- + +## Step 4: Retrieve connected context for an agent turn + +When the user returns, retrieve all three memory types in one query so the agent starts from full context. + + + + +```typescript +// Retrieve this user's current facts +const facts = await db.records.find({ + labels: ['FACT'], + where: { + ENTITY: { + $alias: '$user', + $relation: { type: 'HAS_FACT', direction: 'in' }, + entityId: 'user-42' + } + }, + aggregate: { + key: '$record.key', + value: '$record.value', + setAt: '$record.setAt', + source: '$record.source' + } +}) + +// Retrieve the 5 most recent episodes +const recentEpisodes = await db.records.find({ + labels: ['EPISODE'], + where: { + ENTITY: { + $relation: { type: 'HAD_EPISODE', direction: 'in' }, + entityId: 'user-42' + } + }, + orderBy: { startedAt: 'desc' }, + limit: 5 +}) + +// Retrieve references cited in the most recent episode +const citedRefs = await db.records.find({ + labels: ['REFERENCE'], + where: { + EPISODE: { + $relation: { type: 'CITED', direction: 'in' }, + episodeId: recentEpisodes.data[0]?.episodeId + } + } +}) +``` + + + + +```python +# Current facts +facts = db.records.find({ + "labels": ["FACT"], + "where": { + "ENTITY": { + "$alias": "$user", + "$relation": {"type": "HAS_FACT", "direction": "in"}, + "entityId": "user-42" + } + }, + "aggregate": { + "key": "$record.key", + "value": "$record.value", + "setAt": "$record.setAt" + } +}) + +# Recent episodes +recent = db.records.find({ + "labels": ["EPISODE"], + "where": { + "ENTITY": { + "$relation": {"type": "HAD_EPISODE", "direction": "in"}, + "entityId": "user-42" + } + }, + "orderBy": {"startedAt": "desc"}, + "limit": 5 +}) + +# References from most recent episode +if recent.data: + latest_id = recent.data[0].data.get("episodeId") + cited = db.records.find({ + "labels": ["REFERENCE"], + "where": { + "EPISODE": { + "$relation": {"type": "CITED", "direction": "in"}, + "episodeId": latest_id + } + } + }) +``` + + + + +```bash +# Fetch current facts for user-42 +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["FACT"], + "where": { + "ENTITY": { + "$relation": {"type": "HAS_FACT", "direction": "in"}, + "entityId": "user-42" + } + } + }' + +# Fetch 5 most recent episodes +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["EPISODE"], + "where": { + "ENTITY": { + "$relation": {"type": "HAD_EPISODE", "direction": "in"}, + "entityId": "user-42" + } + }, + "orderBy": {"startedAt": "desc"}, + "limit": 5 + }' +``` + + + + +--- + +## Step 5: Update a fact without deleting history + +When a fact changes, create a new FACT node, link it with `HAS_FACT`, and optionally mark the old one superseded. This preserves the history chain. + + + + +```typescript +// New episode triggers a plan upgrade +const upgradeEpisode = await db.records.create({ + label: 'EPISODE', + data: { + episodeId: 'chat-2025-03-20', + type: 'billing', + startedAt: '2025-03-20T14:00:00Z', + summary: 'User upgraded from enterprise to unlimited.', + outcome: 'resolved' + } +}) + +await db.records.attach({ + source: user, + target: upgradeEpisode, + options: { type: 'HAD_EPISODE' } +}) + +// Create updated fact +const newPlanFact = await db.records.create({ + label: 'FACT', + data: { + key: 'plan', + value: 'unlimited', + setAt: new Date().toISOString(), + source: 'billing-api' + } +}) + +await Promise.all([ + db.records.attach({ source: user, target: newPlanFact, options: { type: 'HAS_FACT' } }), + db.records.attach({ source: upgradeEpisode, target: newPlanFact, options: { type: 'LED_TO' } }), + // Mark old fact as superseded (optional but enables temporal queries) + db.records.update(planFact.__id, { supersededAt: new Date().toISOString() }), +]) +``` + + + + +```python +upgrade_episode = db.records.create("EPISODE", { + "episodeId": "chat-2025-03-20", + "type": "billing", + "startedAt": "2025-03-20T14:00:00Z", + "outcome": "resolved" +}) + +db.records.attach(user.id, upgrade_episode.id, {"type": "HAD_EPISODE"}) + +new_plan = db.records.create("FACT", { + "key": "plan", + "value": "unlimited", + "setAt": datetime.now(timezone.utc).isoformat(), + "source": "billing-api" +}) + +db.records.attach(user.id, new_plan.id, {"type": "HAS_FACT"}) +db.records.attach(upgrade_episode.id, new_plan.id, {"type": "LED_TO"}) + +# Mark old fact superseded +db.records.update(plan_fact.id, {"supersededAt": datetime.now(timezone.utc).isoformat()}) +``` + + + + +```bash +NEW_FACT_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"label\":\"FACT\",\"data\":{\"key\":\"plan\",\"value\":\"unlimited\",\"setAt\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\",\"source\":\"billing-api\"}}" \ + | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$USER_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$NEW_FACT_ID\"],\"options\":{\"type\":\"HAS_FACT\"}}" + +# Mark old fact superseded +curl -s -X PATCH "$BASE/records/$FACT_ID" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"supersededAt\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}" +``` + + + + +--- + +## Step 6: Retrieve only current (non-superseded) facts + + + + +```typescript +const currentFacts = await db.records.find({ + labels: ['FACT'], + where: { + supersededAt: { $exists: false }, + ENTITY: { + $relation: { type: 'HAS_FACT', direction: 'in' }, + entityId: 'user-42' + } + } +}) +``` + + + + +```python +current_facts = db.records.find({ + "labels": ["FACT"], + "where": { + "supersededAt": {"$exists": False}, + "ENTITY": { + "$relation": {"type": "HAS_FACT", "direction": "in"}, + "entityId": "user-42" + } + } +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["FACT"], + "where": { + "supersededAt": {"$exists": false}, + "ENTITY": { + "$relation": {"type": "HAS_FACT", "direction": "in"}, + "entityId": "user-42" + } + } + }' +``` + + + + +--- + +## Production caveat + +History-preserving fact graphs grow unbounded. Apply a retention policy: periodically archive `EPISODE` nodes older than your SLA window, and prune `FACT` nodes where `supersededAt` is more than N days old. Use `db.records.delete` with a `where` filter rather than record-by-record deletion for scale. + +--- + +## Next steps + +- [Episodic Memory for Multi-Step Agents](./episodic-memory-agents.mdx) — storing intermediate observations, tool outputs, and decisions in a resume-capable graph +- [End-to-End Data Lineage](./data-lineage.mdx) — applying the same Event + Reference pattern to data pipeline provenance +- [Semantic Search for Multi-Tenant Products](./semantic-search-multitenant.mdx) — combining graph facts with vector retrieval diff --git a/docs/docs/tutorials/modeling-hierarchies.mdx b/docs/docs/tutorials/modeling-hierarchies.mdx new file mode 100644 index 00000000..94a22e88 --- /dev/null +++ b/docs/docs/tutorials/modeling-hierarchies.mdx @@ -0,0 +1,596 @@ +--- +sidebar_position: 16 +title: "Modeling Hierarchies, Networks, and Feedback Loops" +description: Three common graph shapes — trees, many-to-many networks, and cyclic systems — with guidance on how to query each without flattening away meaning. +tags: [Graph Modeling, Concepts, SearchQuery, Traversal] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Modeling Hierarchies, Networks, and Feedback Loops + +Not all graphs are the same shape. A file system is a tree. A social network is many-to-many. A supply chain with feedback has cycles. Each shape has different query patterns, different failure modes, and different production constraints. + +This tutorial covers all three so you can recognize which shape your domain needs and choose the right query approach from the start. + +--- + +## Shape 1: Hierarchies (trees) + +**Example domain:** Organizational chart — COMPANY → DIVISION → DEPARTMENT → TEAM → EMPLOYEE + +```mermaid +graph TD + CO[COMPANY: Acme Corp] -->|HAS_DIVISION| DIV[DIVISION: Engineering] + DIV -->|HAS_DEPARTMENT| DEPT[DEPARTMENT: Platform] + DEPT -->|HAS_TEAM| T1[TEAM: Infra] + T1 -->|MEMBER_OF| E1[EMPLOYEE: Lena] + T1 -->|MEMBER_OF| E2[EMPLOYEE: Marco] +``` + +### Ingesting the tree + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB('RUSHDB_API_KEY') + +const company = await db.records.create({ label: 'COMPANY', data: { name: 'Acme Corp' } }) +const division = await db.records.create({ label: 'DIVISION', data: { name: 'Engineering' } }) +const dept = await db.records.create({ label: 'DEPARTMENT', data: { name: 'Platform', budget: 2000000 } }) +const teamInfra = await db.records.create({ label: 'TEAM', data: { name: 'Infra', size: 6 } }) +const lena = await db.records.create({ label: 'EMPLOYEE', data: { name: 'Lena Müller', role: 'Lead SRE', level: 'L5' } }) +const marco = await db.records.create({ label: 'EMPLOYEE', data: { name: 'Marco Rossi', role: 'Engineer', level: 'L4' } }) + +await db.records.attach({ source: company, target: division, options: { type: 'HAS_DIVISION' } }) +await db.records.attach({ source: division, target: dept, options: { type: 'HAS_DEPARTMENT' } }) +await db.records.attach({ source: dept, target: teamInfra, options: { type: 'HAS_TEAM' } }) +await db.records.attach({ source: teamInfra, target: lena, options: { type: 'MEMBER_OF', direction: 'in' } }) +await db.records.attach({ source: teamInfra, target: marco, options: { type: 'MEMBER_OF', direction: 'in' } }) +``` + + + + +```python +from rushdb import RushDB + +db = RushDB("RUSHDB_API_KEY", base_url="https://api.rushdb.com/api/v1") + +company = db.records.create("COMPANY", {"name": "Acme Corp"}) +division = db.records.create("DIVISION", {"name": "Engineering"}) +dept = db.records.create("DEPARTMENT", {"name": "Platform", "budget": 2000000}) +team_infra = db.records.create("TEAM", {"name": "Infra", "size": 6}) +lena = db.records.create("EMPLOYEE", {"name": "Lena Müller", "role": "Lead SRE", "level": "L5"}) +marco = db.records.create("EMPLOYEE", {"name": "Marco Rossi", "role": "Engineer", "level": "L4"}) + +db.records.attach(company.id, division.id, {"type": "HAS_DIVISION"}) +db.records.attach(division.id, dept.id, {"type": "HAS_DEPARTMENT"}) +db.records.attach(dept.id, team_infra.id, {"type": "HAS_TEAM"}) +db.records.attach(team_infra.id, lena.id, {"type": "MEMBER_OF", "direction": "in"}) +db.records.attach(team_infra.id, marco.id, {"type": "MEMBER_OF", "direction": "in"}) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +COMPANY_ID=$(curl -s -X POST "$BASE/records" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"COMPANY","data":{"name":"Acme Corp"}}' | jq -r '.data.__id') +DIV_ID=$(curl -s -X POST "$BASE/records" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"DIVISION","data":{"name":"Engineering"}}' | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$COMPANY_ID/relations" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$DIV_ID\"],\"options\":{\"type\":\"HAS_DIVISION\"}}" +``` + + + + +### Querying the tree: all employees with their full org path + + + + +```typescript +const headcount = await db.records.find({ + labels: ['EMPLOYEE'], + where: { + TEAM: { + $alias: '$team', + $relation: { type: 'MEMBER_OF', direction: 'out' }, + DEPARTMENT: { + $alias: '$dept', + DIVISION: { + $alias: '$div', + COMPANY: { name: 'Acme Corp' } + } + } + } + }, + aggregate: { + employeeName: '$record.name', + role: '$record.role', + teamName: '$team.name', + deptName: '$dept.name', + divisionName: '$div.name' + }, + orderBy: { employeeName: 'asc' } +}) +``` + + + + +```python +headcount = db.records.find({ + "labels": ["EMPLOYEE"], + "where": { + "TEAM": { + "$alias": "$team", + "$relation": {"type": "MEMBER_OF", "direction": "out"}, + "DEPARTMENT": { + "$alias": "$dept", + "DIVISION": { + "$alias": "$div", + "COMPANY": {"name": "Acme Corp"} + } + } + } + }, + "aggregate": { + "employeeName": "$record.name", + "role": "$record.role", + "teamName": "$team.name", + "deptName": "$dept.name" + }, + "orderBy": {"employeeName": "asc"} +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["EMPLOYEE"], + "where": { + "TEAM": { + "$alias": "$team", + "$relation": {"type": "MEMBER_OF", "direction": "out"}, + "DEPARTMENT": {"DIVISION": {"COMPANY": {"name": "Acme Corp"}}} + } + }, + "aggregate": {"employeeName": "$record.name", "teamName": "$team.name"} + }' +``` + + + + +### Tree query: headcount per department + + + + +```typescript +const deptHeadcount = await db.records.find({ + labels: ['DEPARTMENT'], + where: { + TEAM: { + EMPLOYEE: { $alias: '$emp', $relation: { type: 'MEMBER_OF', direction: 'out' } } + }, + DIVISION: { COMPANY: { name: 'Acme Corp' } } + }, + aggregate: { + deptName: '$record.name', + headcount: { fn: 'count', alias: '$emp', unique: true } + }, + groupBy: ['deptName', 'headcount'], + orderBy: { headcount: 'desc' } +}) +``` + + + + +```python +dept_headcount = db.records.find({ + "labels": ["DEPARTMENT"], + "where": { + "TEAM": { + "EMPLOYEE": {"$alias": "$emp", "$relation": {"type": "MEMBER_OF", "direction": "out"}} + }, + "DIVISION": {"COMPANY": {"name": "Acme Corp"}} + }, + "aggregate": { + "deptName": "$record.name", + "headcount": {"fn": "count", "alias": "$emp", "unique": True} + }, + "groupBy": ["deptName", "headcount"], + "orderBy": {"headcount": "desc"} +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["DEPARTMENT"], + "where": { + "TEAM": {"EMPLOYEE": {"$alias": "$emp", "$relation": {"type": "MEMBER_OF", "direction": "out"}}}, + "DIVISION": {"COMPANY": {"name": "Acme Corp"}} + }, + "aggregate": { + "deptName": "$record.name", + "headcount": {"fn": "count", "alias": "$emp", "unique": true} + }, + "groupBy": ["deptName", "headcount"], + "orderBy": {"headcount": "desc"} + }' +``` + + + + +--- + +## Shape 2: Many-to-many networks + +**Example domain:** Research graph — AUTHOR ↔ PAPER ↔ TOPIC, PAPER → PAPER (citations) + +```mermaid +graph LR + A1[AUTHOR: Lena] -->|CO_AUTHORED| P1[PAPER: GNN Scaling] + A2[AUTHOR: Marco] -->|CO_AUTHORED| P1 + P1 -->|COVERS| T1[TOPIC: Graph Neural Networks] + P1 -->|COVERS| T2[TOPIC: Distributed Systems] + P1 -->|CITES| P2[PAPER: Attention is All You Need] +``` + +### Ingesting the network + + + + +```typescript +const lenaAuthor = await db.records.create({ label: 'AUTHOR', data: { name: 'Lena Müller', hIndex: 14 } }) +const marcoAuthor = await db.records.create({ label: 'AUTHOR', data: { name: 'Marco Rossi', hIndex: 9 } }) +const topicGNN = await db.records.create({ label: 'TOPIC', data: { name: 'Graph Neural Networks' } }) +const topicDistrib = await db.records.create({ label: 'TOPIC', data: { name: 'Distributed Systems' } }) +const paper1 = await db.records.create({ label: 'PAPER', data: { title: 'GNN Scaling Strategies', year: 2024, citations: 87 } }) +const paper2 = await db.records.create({ label: 'PAPER', data: { title: 'Attention is All You Need', year: 2017, citations: 90000 } }) + +await Promise.all([ + db.records.attach({ source: lenaAuthor, target: paper1, options: { type: 'CO_AUTHORED' } }), + db.records.attach({ source: marcoAuthor, target: paper1, options: { type: 'CO_AUTHORED' } }), + db.records.attach({ source: paper1, target: topicGNN, options: { type: 'COVERS' } }), + db.records.attach({ source: paper1, target: topicDistrib, options: { type: 'COVERS' } }), + db.records.attach({ source: paper1, target: paper2, options: { type: 'CITES' } }), +]) +``` + + + + +```python +lena_author = db.records.create("AUTHOR", {"name": "Lena Müller", "hIndex": 14}) +marco_author = db.records.create("AUTHOR", {"name": "Marco Rossi", "hIndex": 9}) +topic_gnn = db.records.create("TOPIC", {"name": "Graph Neural Networks"}) +paper1 = db.records.create("PAPER", {"title": "GNN Scaling Strategies", "year": 2024, "citations": 87}) +paper2 = db.records.create("PAPER", {"title": "Attention is All You Need", "year": 2017, "citations": 90000}) + +db.records.attach(lena_author.id, paper1.id, {"type": "CO_AUTHORED"}) +db.records.attach(marco_author.id, paper1.id, {"type": "CO_AUTHORED"}) +db.records.attach(paper1.id, topic_gnn.id, {"type": "COVERS"}) +db.records.attach(paper1.id, paper2.id, {"type": "CITES"}) +``` + + + + +```bash +LENA_ID=$(curl -s -X POST "$BASE/records" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"AUTHOR","data":{"name":"Lena Müller","hIndex":14}}' | jq -r '.data.__id') +PAPER_ID=$(curl -s -X POST "$BASE/records" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"PAPER","data":{"title":"GNN Scaling Strategies","year":2024}}' | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$LENA_ID/relations" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$PAPER_ID\"],\"options\":{\"type\":\"CO_AUTHORED\"}}" +``` + + + + +### Network query: co-authors on a topic + + + + +```typescript +const coAuthors = await db.records.find({ + labels: ['AUTHOR'], + where: { + PAPER: { + $alias: '$paper', + $relation: { type: 'CO_AUTHORED', direction: 'out' }, + TOPIC: { name: 'Graph Neural Networks' } + } + }, + aggregate: { + authorName: '$record.name', + hIndex: '$record.hIndex', + paperCount: { fn: 'count', alias: '$paper', unique: true } + }, + groupBy: ['authorName', 'hIndex', 'paperCount'], + orderBy: { paperCount: 'desc' } +}) +``` + + + + +```python +co_authors = db.records.find({ + "labels": ["AUTHOR"], + "where": { + "PAPER": { + "$alias": "$paper", + "$relation": {"type": "CO_AUTHORED", "direction": "out"}, + "TOPIC": {"name": "Graph Neural Networks"} + } + }, + "aggregate": { + "authorName": "$record.name", + "hIndex": "$record.hIndex", + "paperCount": {"fn": "count", "alias": "$paper", "unique": True} + }, + "groupBy": ["authorName", "hIndex", "paperCount"], + "orderBy": {"paperCount": "desc"} +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["AUTHOR"], + "where": { + "PAPER": { + "$alias": "$paper", + "$relation": {"type": "CO_AUTHORED", "direction": "out"}, + "TOPIC": {"name": "Graph Neural Networks"} + } + }, + "aggregate": { + "authorName": "$record.name", + "paperCount": {"fn": "count", "alias": "$paper", "unique": true} + }, + "groupBy": ["authorName", "paperCount"], + "orderBy": {"paperCount": "desc"} + }' +``` + + + + +--- + +## Shape 3: Cyclic systems (dependency graphs) + +**Example domain:** Package dependency graph — PACKAGE depends on other PACKAGEs through transitive chains. + +```mermaid +graph LR + PKG_A[PACKAGE: app-core] -->|DEPENDS_ON| PKG_B[PACKAGE: auth-lib] + PKG_A -->|DEPENDS_ON| PKG_C[PACKAGE: data-client] + PKG_B -->|DEPENDS_ON| PKG_D[PACKAGE: crypto-utils] + PKG_C -->|DEPENDS_ON| PKG_D +``` + +SearchQuery does not expose arbitrary-depth recursive traversal. Instead, scope multi-hop traversal explicitly to the depth your product requires. For blast-radius analysis (which packages are affected if `crypto-utils` has a CVE?), traverse up to a known safe depth. + +### Ingesting the dependency graph + + + + +```typescript +const appCore = await db.records.create({ label: 'PACKAGE', data: { name: 'app-core', version: '2.1.0' } }) +const authLib = await db.records.create({ label: 'PACKAGE', data: { name: 'auth-lib', version: '1.4.2' } }) +const dataClient = await db.records.create({ label: 'PACKAGE', data: { name: 'data-client', version: '3.0.1' } }) +const cryptoUtils = await db.records.create({ label: 'PACKAGE', data: { name: 'crypto-utils', version: '0.9.8' } }) + +await Promise.all([ + db.records.attach({ source: appCore, target: authLib, options: { type: 'DEPENDS_ON' } }), + db.records.attach({ source: appCore, target: dataClient, options: { type: 'DEPENDS_ON' } }), + db.records.attach({ source: authLib, target: cryptoUtils, options: { type: 'DEPENDS_ON' } }), + db.records.attach({ source: dataClient, target: cryptoUtils, options: { type: 'DEPENDS_ON' } }), +]) +``` + + + + +```python +app_core = db.records.create("PACKAGE", {"name": "app-core", "version": "2.1.0"}) +auth_lib = db.records.create("PACKAGE", {"name": "auth-lib", "version": "1.4.2"}) +data_client = db.records.create("PACKAGE", {"name": "data-client", "version": "3.0.1"}) +crypto_utils = db.records.create("PACKAGE", {"name": "crypto-utils", "version": "0.9.8"}) + +db.records.attach(app_core.id, auth_lib.id, {"type": "DEPENDS_ON"}) +db.records.attach(app_core.id, data_client.id, {"type": "DEPENDS_ON"}) +db.records.attach(auth_lib.id, crypto_utils.id, {"type": "DEPENDS_ON"}) +db.records.attach(data_client.id, crypto_utils.id, {"type": "DEPENDS_ON"}) +``` + + + + +```bash +CORE_ID=$(curl -s -X POST "$BASE/records" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"PACKAGE","data":{"name":"app-core","version":"2.1.0"}}' | jq -r '.data.__id') +AUTH_ID=$(curl -s -X POST "$BASE/records" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"PACKAGE","data":{"name":"auth-lib","version":"1.4.2"}}' | jq -r '.data.__id') +CRYPTO_ID=$(curl -s -X POST "$BASE/records" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"PACKAGE","data":{"name":"crypto-utils","version":"0.9.8"}}' | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$CORE_ID/relations" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$AUTH_ID\"],\"options\":{\"type\":\"DEPENDS_ON\"}}" +curl -s -X POST "$BASE/records/$AUTH_ID/relations" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$CRYPTO_ID\"],\"options\":{\"type\":\"DEPENDS_ON\"}}" +``` + + + + +### Cyclic query: two-hop blast radius for a vulnerable package + +Find all packages that depend on `crypto-utils` directly (hop 1) or through one intermediate package (hop 2): + + + + +```typescript +// Hop 1: direct dependents +const direct = await db.records.find({ + labels: ['PACKAGE'], + where: { + PACKAGE: { + $alias: '$dep', + $relation: { type: 'DEPENDS_ON', direction: 'out' }, + name: 'crypto-utils' + } + }, + aggregate: { + packageName: '$record.name', + version: '$record.version', + hop: { fn: 'count', alias: '$dep', unique: true } + }, + groupBy: ['packageName', 'version', 'hop'] +}) + +// Hop 2: packages whose dependencies depend on crypto-utils +const indirect = await db.records.find({ + labels: ['PACKAGE'], + where: { + PACKAGE: { + $alias: '$mid', + $relation: { type: 'DEPENDS_ON', direction: 'out' }, + PACKAGE: { + $relation: { type: 'DEPENDS_ON', direction: 'out' }, + name: 'crypto-utils' + } + } + }, + aggregate: { + packageName: '$record.name', + version: '$record.version', + via: '$mid.name' + } +}) +``` + + + + +```python +# Direct dependents of crypto-utils +direct = db.records.find({ + "labels": ["PACKAGE"], + "where": { + "PACKAGE": { + "$alias": "$dep", + "$relation": {"type": "DEPENDS_ON", "direction": "out"}, + "name": "crypto-utils" + } + }, + "aggregate": { + "packageName": "$record.name", + "version": "$record.version" + } +}) + +# Two-hop: packages that depend on a package that depends on crypto-utils +indirect = db.records.find({ + "labels": ["PACKAGE"], + "where": { + "PACKAGE": { + "$alias": "$mid", + "$relation": {"type": "DEPENDS_ON", "direction": "out"}, + "PACKAGE": { + "$relation": {"type": "DEPENDS_ON", "direction": "out"}, + "name": "crypto-utils" + } + } + }, + "aggregate": { + "packageName": "$record.name", + "via": "$mid.name" + } +}) +``` + + + + +```bash +# Direct dependents +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["PACKAGE"], + "where": { + "PACKAGE": { + "$relation": {"type": "DEPENDS_ON", "direction": "out"}, + "name": "crypto-utils" + } + }, + "aggregate": {"packageName": "$record.name", "version": "$record.version"} + }' +``` + + + + +--- + +## Comparison of the three shapes + +| Shape | Key property | Query pattern | Ambush | +|---|---|---|---| +| Tree | Single parent per node | Top-down with full-path aggregate | Deep trees require explicit hop count | +| Many-to-many | Nodes can appear in multiple relationships | Aggregation by relationship type | Fan-out can be large without limit | +| Cyclic | Loops are possible | Explicit depth bounds | Unbounded traversal is not supported | + +--- + +## Production caveat + +Each shape has a fan-out risk. In trees, deep hierarchies multiply candidates at every hop. In networks, highly-connected hubs (an author with 200 papers) inflate traversal cost. In cyclic graphs, even a two-hop traversal can cover thousands of paths in large dependency graphs. + +Apply `limit` conservatively and filter early on high-selectivity properties (e.g. `name`, `status`, `version`). Measure response times before and after adding hops to your query. + +--- + +## Next steps + +- [Choosing Relationship Types That Age Well](./choosing-relationship-types.mdx) — when generic vs. typed edges matter +- [Temporal Graphs: Modeling State and Event Time Together](./temporal-graphs.mdx) — adding time dimension to any of these shapes +- [SearchQuery Deep Dive](./searchquery-advanced-patterns.mdx) — `$relation`, `$alias`, and `collect` patterns diff --git a/docs/docs/tutorials/query-optimization.mdx b/docs/docs/tutorials/query-optimization.mdx new file mode 100644 index 00000000..77badb5a --- /dev/null +++ b/docs/docs/tutorials/query-optimization.mdx @@ -0,0 +1,361 @@ +--- +sidebar_position: 31 +title: "Query Optimization" +description: Shape SearchQuery, traversal breadth, aggregation strategy, and batch patterns to reduce compute cost and improve throughput. +tags: [Performance, SearchQuery, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Query Optimization + +RushDB executes every `find()` call as a Cypher query against Neo4j. The shape of your `SearchQuery` directly controls how much of the graph is traversed and how many graph operations are charged. This tutorial identifies the dominant cost patterns and shows how to reshape queries to reduce them. + +--- + +## Cost drivers + +| Factor | Impact | +|---|---| +| Number of `where` traversal hops | Multiplied graph scan per additional hop | +| `fn: "collect"` in aggregates | Materializes all matched records into a list | +| Missing `labels` filter | Scans all record types | +| High `limit` without indexed field ordering | Full sort on unindexed data | +| Per-record enrichment inside loops | O(n) queries for n results | +| `$nin` / `$in` with large arrays | Evaluated against every candidate record | + +--- + +## Tip 1: Always specify `labels` + +Without `labels`, RushDB scans every record node in your project. + + + + +```typescript +// ❌ Avoid — scans all labels +const slow = await db.records.find({ + where: { status: 'active' } +}) + +// ✅ Better — restricts scan to one label +const fast = await db.records.find({ + labels: ['USER'], + where: { status: 'active' } +}) +``` + + + + +```python +# ❌ Avoid +slow = db.records.find({"where": {"status": "active"}}) + +# ✅ Better +fast = db.records.find({"labels": ["USER"], "where": {"status": "active"}}) +``` + + + + +```bash +# ✅ Always pass labels +curl -s -X POST "$BASE/records/search" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["USER"],"where":{"status":"active"}}' +``` + + + + +--- + +## Tip 2: Filter at the deepest hop + +Push the most selective filter as deep into the traversal as possible. This prunes the graph early and avoids materializing large intermediate sets. + + + + +```typescript +// ❌ Broad: fetches all orders, then filters by status at outer level +const broad = await db.records.find({ + labels: ['CUSTOMER'], + where: { + status: 'active', // customer filter + ORDER: { $relation: { type: 'PLACED', direction: 'out' } } + // No filter on ORDER itself — all orders are traversed + } +}) + +// ✅ Narrow: prune at the ORDER hop with totalUsd filter +const narrow = await db.records.find({ + labels: ['CUSTOMER'], + where: { + status: 'active', + ORDER: { + $relation: { type: 'PLACED', direction: 'out' }, + totalUsd: { $gte: 500 } // filter at the ORDER hop + } + } +}) +``` + + + + +```python +# ✅ Push filter to the deepest hop +result = db.records.find({ + "labels": ["CUSTOMER"], + "where": { + "status": "active", + "ORDER": { + "$relation": {"type": "PLACED", "direction": "out"}, + "totalUsd": {"$gte": 500} + } + } +}) +``` + + + + +--- + +## Tip 3: Limit traversal depth + +Every additional hop multiplies the graph surface explored. Two hops is usually fine; three or more should be benchmarked at production volume. + + + + +```typescript +// Three-hop traversal — verify this performs at your data volume +const threeHop = await db.records.find({ + labels: ['TEAM'], + where: { + USER: { + $relation: { type: 'MEMBER_OF', direction: 'in' }, + ORDER: { + $relation: { type: 'PLACED', direction: 'out' }, + PRODUCT: { + $relation: { type: 'CONTAINS', direction: 'out' }, + sku: 'PROD-001' + } + } + } + } +}) + +// ✅ Equivalent two-step approach: first resolve products, then query teams +const orders = await db.records.find({ + labels: ['ORDER'], + where: { + PRODUCT: { + $relation: { type: 'CONTAINS', direction: 'out' }, + sku: 'PROD-001' + } + } +}) + +const orderIds = orders.data.map(o => o.__id) + +const teams = await db.records.find({ + labels: ['TEAM'], + where: { + USER: { + $relation: { type: 'MEMBER_OF', direction: 'in' }, + ORDER: { + $relation: { type: 'PLACED', direction: 'out' }, + __id: { $in: orderIds } + } + } + } +}) +``` + + + + +```python +orders = db.records.find({ + "labels": ["ORDER"], + "where": { + "PRODUCT": { + "$relation": {"type": "CONTAINS", "direction": "out"}, + "sku": "PROD-001" + } + } +}) + +order_ids = [o.id for o in orders.data] + +teams = db.records.find({ + "labels": ["TEAM"], + "where": { + "USER": { + "$relation": {"type": "MEMBER_OF", "direction": "in"}, + "ORDER": { + "$relation": {"type": "PLACED", "direction": "out"}, + "__id": {"$in": order_ids} + } + } + } +}) +``` + + + + +--- + +## Tip 4: Prefer `count` aggregates over `collect` + +`fn: "collect"` materializes every matched record ID into a list before returning. `fn: "count"` just increments a counter. Use `collect` only when you need the actual IDs. + + + + +```typescript +// ❌ Expensive: collects all record IDs just to check if any exist +const collected = await db.records.find({ + labels: ['ORDER'], + where: { status: 'pending' }, + aggregate: { ids: { fn: 'collect', alias: '$record' } } +}) + +// ✅ Cheap: count is all you need +const counted = await db.records.find({ + labels: ['ORDER'], + where: { status: 'pending' }, + aggregate: { count: { fn: 'count', alias: '$record' } } +}) +``` + + + + +```python +# ✅ Prefer count over collect unless IDs are needed +result = db.records.find({ + "labels": ["ORDER"], + "where": {"status": "pending"}, + "aggregate": {"count": {"fn": "count", "alias": "$record"}} +}) +``` + + + + +--- + +## Tip 5: Batch writes instead of serial `create()` loops + +A loop calling `create()` once per record makes one HTTP round-trip per write. `importJson` sends all records in a single request. + + + + +```typescript +const records = Array.from({ length: 500 }, (_, i) => ({ + eventId: `EVT-${i}`, + type: 'click', + ts: new Date().toISOString() +})) + +// ❌ 500 round-trips +for (const r of records) { + await db.records.create({ label: 'EVENT', data: r }) +} + +// ✅ 1 round-trip +await db.records.importJson({ label: 'EVENT', data: records }) +``` + + + + +```python +records = [{"eventId": f"EVT-{i}", "type": "click", "ts": "2025-01-01T00:00:00Z"} for i in range(500)] + +# ✅ Single call +db.records.import_json({"label": "EVENT", "data": records}) +``` + + + + +```bash +# Build the data array with jq then send in one call +PAYLOAD=$(jq -n '[range(500)] | map({"eventId": ("EVT-" + tostring), "type":"click"})') +curl -s -X POST "$BASE/records/import/json" \ + -H "Content-Type: application/json" -H "Authorization: Bearer $TOKEN" \ + -d "{\"label\":\"EVENT\",\"data\":$PAYLOAD}" +``` + + + + +--- + +## Tip 6: Cap `limit` in API endpoints + +Never pass user-controlled `limit` values directly. Cap at a safe maximum. + + + + +```typescript +const MAX_LIMIT = 100 + +function buildQuery(params: { limit?: number; skip?: number }): object { + return { + labels: ['ARTICLE'], + limit: Math.min(params.limit ?? 20, MAX_LIMIT), + skip: params.skip ?? 0 + } +} +``` + + + + +```python +MAX_LIMIT = 100 + +def build_query(params: dict) -> dict: + return { + "labels": ["ARTICLE"], + "limit": min(params.get("limit", 20), MAX_LIMIT), + "skip": params.get("skip", 0) + } +``` + + + + +--- + +## Quick-reference checklist + +| Check | Why it matters | +|---|---| +| `labels` always set | Avoids full graph scan | +| Most selective filter at deepest hop | Prunes intermediate node sets early | +| Traversal depth ≤ 2 hops | Three+ hops need production benchmarking | +| `fn: "collect"` replaced with `fn: "count"` where possible | `collect` materializes all matched IDs | +| Bulk writes via `importJson` | One round-trip instead of N | +| `limit` capped server-side | Prevents runaway result sets | + +--- + +## Next steps + +- [Testing SearchQuery](./testing-searchquery.mdx) — verify query correctness across TS, Python, and REST +- [Discovery Queries](./discovery-queries.mdx) — explore unknown schemas before committing to traversal patterns +- [Hybrid Retrieval](./hybrid-retrieval.mdx) — structured filter + semantic ranking patterns diff --git a/docs/docs/tutorials/rag-evaluation.mdx b/docs/docs/tutorials/rag-evaluation.mdx new file mode 100644 index 00000000..40040317 --- /dev/null +++ b/docs/docs/tutorials/rag-evaluation.mdx @@ -0,0 +1,595 @@ +--- +sidebar_position: 40 +title: "RAG Evaluation" +description: Measure Precision@k and Recall@k for your retrieval pipeline, detect score drift after model updates, and add a CI regression gate that fails on quality drops. +tags: [RAG, AI, Embeddings, Testing] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# RAG Evaluation + +Retrieval quality degrades silently: a new embedding model bumps up dimensions, a chunk-size change reduces recall, or an index backfill problem leaves some records un-vectorized. This tutorial builds a lightweight evaluation harness that measures **Precision@k** and **Recall@k** for your RushDB vector search pipeline — and plugs into CI so regressions are caught before they reach production. + +--- + +## Concepts + +| Metric | Definition | Formula | +|---|---|---| +| **Precision@k** | Of the k results returned, how many are relevant? | `|retrieved ∩ relevant| / k` | +| **Recall@k** | Of all relevant items, how many appear in the top k? | `|retrieved ∩ relevant| / |relevant|` | + +A well-tuned RAG retriever should maintain **Precision@5 ≥ 0.6** and **Recall@5 ≥ 0.5** for your domain. The exact thresholds depend on your corpus — establish a baseline first, then track drift. + +--- + +## Step 1: Build a ground-truth dataset + +Create a small set of evaluation queries paired with expected top record IDs. Aim for 20–50 queries covering your key topic areas. + + + + +```js +// eval-dataset.js +export const evalDataset = [ + { + query: 'how to set up a self-hosted RushDB instance', + expectedIds: [ + 'record-id-docker-guide', + 'record-id-env-vars', + 'record-id-first-boot' + ] + }, + { + query: 'connecting a Neo4j Aura database', + expectedIds: [ + 'record-id-aura-setup', + 'record-id-bolt-url' + ] + }, + { + query: 'external embedding indexes BYOV', + expectedIds: [ + 'record-id-byov-intro', + 'record-id-external-index', + 'record-id-vectors-key' + ] + } + // Add 20–50 entries for statistical significance +] +``` + + + + +```python +# eval_dataset.py +EVAL_DATASET = [ + { + 'query': 'how to set up a self-hosted RushDB instance', + 'expected_ids': [ + 'record-id-docker-guide', + 'record-id-env-vars', + 'record-id-first-boot' + ] + }, + { + 'query': 'connecting a Neo4j Aura database', + 'expected_ids': [ + 'record-id-aura-setup', + 'record-id-bolt-url' + ] + }, + { + 'query': 'external embedding indexes BYOV', + 'expected_ids': [ + 'record-id-byov-intro', + 'record-id-external-index', + 'record-id-vectors-key' + ] + } +] +``` + + + + +### Bootstrapping expected IDs + +If you don't have ground-truth IDs yet, run a baseline search, manually review the results, and promote the relevant ones to your dataset: + + + + +```js +import RushDB from '@rushdb/javascript-sdk' +const db = new RushDB(process.env.RUSHDB_API_KEY) + +// Run a query and inspect results to build ground truth +const hits = await db.ai.search({ + query: 'how to set up a self-hosted instance', + labels: ['DOC_CHUNK'], + propertyName: 'text', + limit: 10 +}) + +hits.forEach(h => console.log(h.__id, h.__score?.toFixed(3), h.text.slice(0, 80))) +// Manually note which IDs are relevant → add to evalDataset +``` + + + + +```python +import os +from rushdb import RushDB + +db = RushDB(os.environ['RUSHDB_API_KEY']) + +hits = db.ai.search( + query='how to set up a self-hosted instance', + labels=['DOC_CHUNK'], + property_name='text', + limit=10 +) + +for h in hits: + print(h.__id, f'{h.__score:.3f}', h.text[:80]) +# Manually note which IDs are relevant → add to EVAL_DATASET +``` + + + + +--- + +## Step 2: Implement the metrics + + + + +```js +// metrics.js +export function precisionAtK(retrievedIds, relevantIds, k) { + const topK = retrievedIds.slice(0, k) + const relevantSet = new Set(relevantIds) + const hits = topK.filter(id => relevantSet.has(id)).length + return hits / k +} + +export function recallAtK(retrievedIds, relevantIds, k) { + if (relevantIds.length === 0) return 1 + const topK = retrievedIds.slice(0, k) + const relevantSet = new Set(relevantIds) + const hits = topK.filter(id => relevantSet.has(id)).length + return hits / relevantIds.length +} + +export function meanMetric(values) { + return values.reduce((sum, v) => sum + v, 0) / values.length +} +``` + + + + +```python +# metrics.py +def precision_at_k(retrieved_ids: list[str], relevant_ids: list[str], k: int) -> float: + top_k = retrieved_ids[:k] + relevant_set = set(relevant_ids) + hits = sum(1 for id_ in top_k if id_ in relevant_set) + return hits / k + +def recall_at_k(retrieved_ids: list[str], relevant_ids: list[str], k: int) -> float: + if not relevant_ids: + return 1.0 + top_k = retrieved_ids[:k] + relevant_set = set(relevant_ids) + hits = sum(1 for id_ in top_k if id_ in relevant_set) + return hits / len(relevant_ids) + +def mean_metric(values: list[float]) -> float: + return sum(values) / len(values) if values else 0.0 +``` + + + + +--- + +## Step 3: Run the evaluation + + + + +```js +// evaluate.js +import RushDB from '@rushdb/javascript-sdk' +import { evalDataset } from './eval-dataset.js' +import { precisionAtK, recallAtK, meanMetric } from './metrics.js' + +const db = new RushDB(process.env.RUSHDB_API_KEY) +const K = 5 + +async function evaluate() { + const precisions = [] + const recalls = [] + + for (const { query, expectedIds } of evalDataset) { + const hits = await db.ai.search({ + query, + labels: ['DOC_CHUNK'], + propertyName: 'text', + limit: K + }) + + const retrievedIds = hits.map(h => h.__id) + precisions.push(precisionAtK(retrievedIds, expectedIds, K)) + recalls.push(recallAtK(retrievedIds, expectedIds, K)) + } + + const results = { + k: K, + queries: evalDataset.length, + meanPrecision: meanMetric(precisions), + meanRecall: meanMetric(recalls), + perQuery: evalDataset.map((item, i) => ({ + query: item.query, + precision: precisions[i], + recall: recalls[i] + })) + } + + console.log(`\nEvaluation Results (k=${K})`) + console.log(`Mean Precision@${K}: ${results.meanPrecision.toFixed(3)}`) + console.log(`Mean Recall@${K}: ${results.meanRecall.toFixed(3)}`) + + return results +} + +export { evaluate } +``` + + + + +```python +# evaluate.py +import os +from rushdb import RushDB +from eval_dataset import EVAL_DATASET +from metrics import precision_at_k, recall_at_k, mean_metric + +db = RushDB(os.environ['RUSHDB_API_KEY']) +K = 5 + +def evaluate(): + precisions = [] + recalls = [] + + for item in EVAL_DATASET: + hits = db.ai.search( + query=item['query'], + labels=['DOC_CHUNK'], + property_name='text', + limit=K + ) + retrieved_ids = [h.__id for h in hits] + precisions.append(precision_at_k(retrieved_ids, item['expected_ids'], K)) + recalls.append(recall_at_k(retrieved_ids, item['expected_ids'], K)) + + results = { + 'k': K, + 'queries': len(EVAL_DATASET), + 'mean_precision': mean_metric(precisions), + 'mean_recall': mean_metric(recalls), + 'per_query': [ + { + 'query': EVAL_DATASET[i]['query'], + 'precision': precisions[i], + 'recall': recalls[i] + } + for i in range(len(EVAL_DATASET)) + ] + } + + print(f"\nEvaluation Results (k={K})") + print(f"Mean Precision@{K}: {results['mean_precision']:.3f}") + print(f"Mean Recall@{K}: {results['mean_recall']:.3f}") + + return results +``` + + + + +--- + +## Step 4: Score drift detection + +Track `__score` for a fixed set of queries to detect when your embedding model or index quality changes. + + + + +```js +// score-drift.js +import fs from 'node:fs' + +const SNAPSHOT_FILE = './eval-score-snapshot.json' + +async function captureScoreSnapshot(db, queries) { + const snapshot = {} + for (const query of queries) { + const hits = await db.ai.search({ + query, + labels: ['DOC_CHUNK'], + propertyName: 'text', + limit: 5 + }) + snapshot[query] = hits.map(h => ({ id: h.__id, score: h.__score })) + } + fs.writeFileSync(SNAPSHOT_FILE, JSON.stringify(snapshot, null, 2)) + console.log('Score snapshot saved.') + return snapshot +} + +function compareSnapshots(baseline, current, driftThreshold = 0.05) { + const drifts = [] + for (const query of Object.keys(baseline)) { + if (!current[query]) continue + const baseScores = Object.fromEntries(baseline[query].map(r => [r.id, r.score])) + for (const { id, score } of current[query]) { + if (id in baseScores) { + const delta = Math.abs(score - baseScores[id]) + if (delta > driftThreshold) { + drifts.push({ query, id, baseline: baseScores[id], current: score, delta }) + } + } + } + } + return drifts +} + +export { captureScoreSnapshot, compareSnapshots } +``` + + + + +```python +# score_drift.py +import json, os + +SNAPSHOT_FILE = './eval_score_snapshot.json' + +def capture_score_snapshot(db, queries: list[str]) -> dict: + snapshot = {} + for query in queries: + hits = db.ai.search( + query=query, + labels=['DOC_CHUNK'], + property_name='text', + limit=5 + ) + snapshot[query] = [{'id': h.__id, 'score': h.__score} for h in hits] + + with open(SNAPSHOT_FILE, 'w') as f: + json.dump(snapshot, f, indent=2) + print('Score snapshot saved.') + return snapshot + +def compare_snapshots(baseline: dict, current: dict, drift_threshold: float = 0.05) -> list: + drifts = [] + for query, base_results in baseline.items(): + if query not in current: + continue + base_scores = {r['id']: r['score'] for r in base_results} + for result in current[query]: + id_ = result['id'] + if id_ in base_scores: + delta = abs(result['score'] - base_scores[id_]) + if delta > drift_threshold: + drifts.append({ + 'query': query, + 'id': id_, + 'baseline': base_scores[id_], + 'current': result['score'], + 'delta': delta + }) + return drifts +``` + + + + +--- + +## Step 5: CI regression test + +Plug the evaluation harness into your CI pipeline so merges that degrade retrieval quality are blocked. + + + + +```js +// eval.ci.js — run with: node eval.ci.js +import { evaluate } from './evaluate.js' + +const PRECISION_THRESHOLD = 0.60 +const RECALL_THRESHOLD = 0.50 + +const results = await evaluate() + +const passed = + results.meanPrecision >= PRECISION_THRESHOLD && + results.meanRecall >= RECALL_THRESHOLD + +if (!passed) { + console.error(`\n❌ Retrieval regression detected!`) + console.error(` Precision@${results.k}: ${results.meanPrecision.toFixed(3)} (threshold: ${PRECISION_THRESHOLD})`) + console.error(` Recall@${results.k}: ${results.meanRecall.toFixed(3)} (threshold: ${RECALL_THRESHOLD})`) + + // Surface worst-performing queries + const worst = [...results.perQuery] + .sort((a, b) => a.precision - b.precision) + .slice(0, 3) + console.error('\nWorst-performing queries:') + worst.forEach(q => console.error(` "${q.query}" → P=${q.precision.toFixed(2)}, R=${q.recall.toFixed(2)}`)) + + process.exit(1) +} + +console.log(`\n✓ Retrieval quality OK (P@${results.k}=${results.meanPrecision.toFixed(3)}, R@${results.k}=${results.meanRecall.toFixed(3)})`) +process.exit(0) +``` + + + + +```python +# eval_ci.py — run with: python eval_ci.py +import sys +from evaluate import evaluate + +PRECISION_THRESHOLD = 0.60 +RECALL_THRESHOLD = 0.50 + +results = evaluate() + +passed = ( + results['mean_precision'] >= PRECISION_THRESHOLD and + results['mean_recall'] >= RECALL_THRESHOLD +) + +if not passed: + print('\n❌ Retrieval regression detected!') + print(f" Precision@{results['k']}: {results['mean_precision']:.3f} (threshold: {PRECISION_THRESHOLD})") + print(f" Recall@{results['k']}: {results['mean_recall']:.3f} (threshold: {RECALL_THRESHOLD})") + + worst = sorted(results['per_query'], key=lambda q: q['precision'])[:3] + print('\nWorst-performing queries:') + for q in worst: + print(f" \"{q['query']}\" → P={q['precision']:.2f}, R={q['recall']:.2f}") + + sys.exit(1) + +print(f"\n✓ Retrieval quality OK (P@{results['k']}={results['mean_precision']:.3f}, R@{results['k']}={results['mean_recall']:.3f})") +sys.exit(0) +``` + + + + +### GitHub Actions integration + +```yaml +# .github/workflows/rag-eval.yml +name: RAG Evaluation + +on: + pull_request: + paths: + - 'src/**' # Adjust to your source paths + +jobs: + eval: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install dependencies + run: npm ci + + - name: Run RAG evaluation + env: + RUSHDB_API_KEY: ${{ secrets.RUSHDB_API_KEY }} + run: node eval.ci.js +``` + +--- + +## Step 6: Track results over time + +Save evaluation results as JSON artifacts to build a history of retrieval quality. + + + + +```js +import { evaluate } from './evaluate.js' +import fs from 'node:fs' + +const results = await evaluate() + +const entry = { + timestamp: new Date().toISOString(), + commit: process.env.GITHUB_SHA ?? 'local', + ...results +} + +// Append to a JSONL history file +fs.appendFileSync('./eval-history.jsonl', JSON.stringify(entry) + '\n') +console.log('Results saved to eval-history.jsonl') +``` + + + + +```python +import json, os +from datetime import datetime, timezone +from evaluate import evaluate + +results = evaluate() + +entry = { + 'timestamp': datetime.now(timezone.utc).isoformat(), + 'commit': os.environ.get('GITHUB_SHA', 'local'), + **results +} + +with open('./eval_history.jsonl', 'a') as f: + f.write(json.dumps(entry) + '\n') +print('Results saved to eval_history.jsonl') +``` + + + + +--- + +## Interpreting results + +| Signal | Likely cause | Action | +|---|---|---| +| Low Precision@k (< 0.4) | Too many off-topic results | Reduce `limit`, tighten `where` filters, or increase score threshold | +| Low Recall@k (< 0.4) | Missing relevant chunks | Check index status (`ready`?), increase `limit`, review chunking strategy | +| Score drift across a PR | Embedding model update | Review BYOV or model config change; re-evaluate thresholds | +| Specific query clusters failing | Coverage gap in corpus | Ingest additional source material for those topics | +| Single label dominating results | Label imbalance | Balance corpus or search labels individually then merge | + +--- + +## Tips + +- **Start with k=5 or k=10.** Going higher inflates recall trivially and makes the metric less useful for catching real degradation. +- **Weight your queries.** If some queries are business-critical, run a weighted mean rather than a simple average. +- **Keep ground truth in version control.** It becomes part of your specification — changes to expected IDs should be deliberate. +- **Re-validate ground truth after major corpus updates.** Expected IDs may no longer exist after a data migration. + +--- + +## Next steps + +- [RAG Reranking](./rag-reranking) — improve Precision@k by adding a second retrieval stage +- [Multi-Source RAG](./rag-multi-source) — extend evaluation to cross-label retrieval +- [GraphRAG](./graphrag) — measure enrichment quality alongside base retrieval metrics diff --git a/docs/docs/tutorials/rag-multi-source.mdx b/docs/docs/tutorials/rag-multi-source.mdx new file mode 100644 index 00000000..f183a3b5 --- /dev/null +++ b/docs/docs/tutorials/rag-multi-source.mdx @@ -0,0 +1,680 @@ +--- +sidebar_position: 39 +title: "Multi-Source RAG" +description: Ingest PDFs, web pages, and database records as distinct labels, then search across all sources in a single vector query with source-aware citations. +tags: [RAG, AI, Embeddings, Search] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Multi-Source RAG + +Real retrieval pipelines rarely draw from a single data source. This tutorial shows how to ingest PDFs, web pages, and database summaries as distinct record labels, then search across all of them in a single vector query — with source-aware citations in the final answer. + +--- + +## Architecture overview + +``` +┌──────────────┐ ┌──────────────┐ ┌──────────────┐ +│ PDF Chunks │ │ Web Chunks │ │ DB Summary │ +│ PDF_CHUNK │ │ WEB_CHUNK │ │ DB_SUMMARY │ +└──────┬───────┘ └──────┬───────┘ └──────┬───────┘ + │ │ │ + └─────────────────┼─────────────────┘ + │ + ai.search across + all three labels + │ + LLM synthesis + with citations +``` + +Each source type lands in its own RushDB label. This keeps data organized and lets you filter by source when needed, while a single `ai.search` call lets you retrieve relevantly from all of them simultaneously. + +--- + +## Prerequisites + +- RushDB project with an embedding index configured (cloud managed or self-hosted with `RUSHDB_EMBEDDING_MODEL` set). +- A PDF parsing library (we'll use `pdf-parse` for Node.js / `pypdf` for Python). +- An LLM client for the synthesis step. + +--- + +## Step 1: Ingest PDFs as `PDF_CHUNK` + +Chunk the PDF text and store each chunk as a separate record. Keep metadata (`source`, `page`) so you can cite the document later. + + + + +```js +import RushDB from '@rushdb/javascript-sdk' +import pdfParse from 'pdf-parse' +import fs from 'node:fs' + +const db = new RushDB(process.env.RUSHDB_API_KEY) + +function chunkText(text, size = 600, overlap = 80) { + const chunks = [] + let start = 0 + while (start < text.length) { + chunks.push(text.slice(start, start + size)) + start += size - overlap + } + return chunks +} + +async function ingestPdf(filePath, docTitle) { + const buffer = fs.readFileSync(filePath) + const { text } = await pdfParse(buffer) + const chunks = chunkText(text) + + await db.records.importJson({ + label: 'PDF_CHUNK', + data: chunks.map((chunk, i) => ({ + text: chunk, + source: docTitle, + chunkIndex: i, + sourceType: 'pdf' + })) + }) + + console.log(`Ingested ${chunks.length} chunks from "${docTitle}"`) +} + +await ingestPdf('./docs/annual-report.pdf', 'Annual Report 2024') +await ingestPdf('./docs/product-spec.pdf', 'Product Spec v3') +``` + + + + +```python +import os +from rushdb import RushDB +from pypdf import PdfReader + +db = RushDB(os.environ['RUSHDB_API_KEY']) + +def chunk_text(text: str, size: int = 600, overlap: int = 80) -> list[str]: + chunks = [] + start = 0 + while start < len(text): + chunks.append(text[start:start + size]) + start += size - overlap + return chunks + +def ingest_pdf(file_path: str, doc_title: str): + reader = PdfReader(file_path) + full_text = '\n'.join(page.extract_text() or '' for page in reader.pages) + chunks = chunk_text(full_text) + + db.records.import_json( + label='PDF_CHUNK', + data=[ + { + 'text': chunk, + 'source': doc_title, + 'chunkIndex': i, + 'sourceType': 'pdf' + } + for i, chunk in enumerate(chunks) + ] + ) + print(f'Ingested {len(chunks)} chunks from "{doc_title}"') + +ingest_pdf('./docs/annual-report.pdf', 'Annual Report 2024') +ingest_pdf('./docs/product-spec.pdf', 'Product Spec v3') +``` + + + + +--- + +## Step 2: Ingest web pages as `WEB_CHUNK` + +Fetch and chunk web content the same way. Add a `url` field for citation. + + + + +```js +import { JSDOM } from 'jsdom' + +async function ingestWebPage(url) { + const res = await fetch(url) + const html = await res.text() + const dom = new JSDOM(html) + const text = dom.window.document.body.textContent ?? '' + const chunks = chunkText(text.replace(/\s+/g, ' ').trim()) + + await db.records.importJson({ + label: 'WEB_CHUNK', + data: chunks.map((chunk, i) => ({ + text: chunk, + url, + chunkIndex: i, + sourceType: 'web' + })) + }) + + console.log(`Ingested ${chunks.length} chunks from ${url}`) +} + +await ingestWebPage('https://docs.rushdb.com/get-started/quickstart') +await ingestWebPage('https://docs.rushdb.com/tutorials/graphrag') +``` + + + + +```python +import httpx +from bs4 import BeautifulSoup + +def ingest_web_page(url: str): + response = httpx.get(url, follow_redirects=True) + soup = BeautifulSoup(response.text, 'html.parser') + text = ' '.join(soup.get_text().split()) + chunks = chunk_text(text) + + db.records.import_json( + label='WEB_CHUNK', + data=[ + { + 'text': chunk, + 'url': url, + 'chunkIndex': i, + 'sourceType': 'web' + } + for i, chunk in enumerate(chunks) + ] + ) + print(f'Ingested {len(chunks)} chunks from {url}') + +ingest_web_page('https://docs.rushdb.com/get-started/quickstart') +ingest_web_page('https://docs.rushdb.com/tutorials/graphrag') +``` + + + + +--- + +## Step 3: Ingest database summaries as `DB_SUMMARY` + +For structured data, generate a natural-language summary per entity and store it as a record. This makes relational data searchable via vector similarity. + + + + +```js +// Example: products from a SQL database +async function ingestDbSummaries(products) { + await db.records.importJson({ + label: 'DB_SUMMARY', + data: products.map(p => ({ + text: `${p.name}: ${p.description}. Category: ${p.category}. Price: $${p.price}. In stock: ${p.stock}.`, + entityId: p.id, + entityType: 'product', + sourceType: 'database' + })) + }) + console.log(`Ingested ${products.length} DB summaries`) +} + +const products = await fetchProductsFromDatabase() +await ingestDbSummaries(products) +``` + + + + +```python +def ingest_db_summaries(products: list[dict]): + db.records.import_json( + label='DB_SUMMARY', + data=[ + { + 'text': ( + f"{p['name']}: {p['description']}. " + f"Category: {p['category']}. " + f"Price: ${p['price']}. " + f"In stock: {p['stock']}." + ), + 'entityId': p['id'], + 'entityType': 'product', + 'sourceType': 'database' + } + for p in products + ] + ) + print(f'Ingested {len(products)} DB summaries') + +products = fetch_products_from_database() +ingest_db_summaries(products) +``` + + + + +--- + +## Step 4: Create embedding indexes + +Create one embedding index per label. Each index covers the `text` property. + + + + +```js +const labels = ['PDF_CHUNK', 'WEB_CHUNK', 'DB_SUMMARY'] + +for (const label of labels) { + const { id } = await db.embeddings.createIndex({ + label, + propertyName: 'text' + }) + console.log(`Created index for ${label}: ${id}`) +} +``` + + + + +```python +labels = ['PDF_CHUNK', 'WEB_CHUNK', 'DB_SUMMARY'] + +for label in labels: + result = db.embeddings.create_index(label=label, property_name='text') + print(f'Created index for {label}: {result.id}') +``` + + + + +The indexes backfill in the background. Poll status if you need to wait: + + + + +```js +import { setTimeout } from 'timers/promises' + +async function waitForIndex(indexId) { + while (true) { + const { status } = await db.embeddings.getIndex(indexId) + if (status === 'ready') break + console.log(`Index ${indexId}: ${status}`) + await setTimeout(3000) + } +} +``` + + + + +```python +import time + +def wait_for_index(index_id: str): + while True: + index = db.embeddings.get_index(index_id) + if index.status == 'ready': + break + print(f'Index {index_id}: {index.status}') + time.sleep(3) +``` + + + + +--- + +## Step 5: Cross-source search + +A single `ai.search` call across multiple labels returns results ranked by vector similarity regardless of source type. + + + + +```js +async function searchAllSources(query, limit = 8) { + const results = await db.ai.search({ + query, + labels: ['PDF_CHUNK', 'WEB_CHUNK', 'DB_SUMMARY'], + propertyName: 'text', + limit + }) + return results +} + +const hits = await searchAllSources('how does the pricing model work?') +``` + + + + +```python +def search_all_sources(query: str, limit: int = 8): + return db.ai.search( + query=query, + labels=['PDF_CHUNK', 'WEB_CHUNK', 'DB_SUMMARY'], + property_name='text', + limit=limit + ) + +hits = search_all_sources('how does the pricing model work?') +``` + + + + +### Source-specific retrieval + +You can also query a single source when you know which to target: + + + + +```js +// Only search the PDF corpus +const pdfHits = await db.ai.search({ + query: 'annual revenue breakdown', + labels: ['PDF_CHUNK'], + propertyName: 'text', + limit: 5 +}) + +// Only search web content +const webHits = await db.ai.search({ + query: 'quickstart guide', + labels: ['WEB_CHUNK'], + propertyName: 'text', + limit: 5 +}) +``` + + + + +```python +pdf_hits = db.ai.search( + query='annual revenue breakdown', + labels=['PDF_CHUNK'], + property_name='text', + limit=5 +) + +web_hits = db.ai.search( + query='quickstart guide', + labels=['WEB_CHUNK'], + property_name='text', + limit=5 +) +``` + + + + +--- + +## Step 6: Score filtering and de-duplication + +Drop low-confidence results and remove near-duplicate chunks before sending to the LLM. + + + + +```js +function deduplicateChunks(hits, scoreThreshold = 0.70, similarityThreshold = 0.95) { + // Drop low-score results + const filtered = hits.filter(h => (h.__score ?? 0) >= scoreThreshold) + + // Remove near-duplicates: skip a chunk if its text is too similar to one already kept + const kept = [] + for (const hit of filtered) { + const isDuplicate = kept.some(k => jaccardSimilarity(k.text, hit.text) >= similarityThreshold) + if (!isDuplicate) kept.push(hit) + } + return kept +} + +// Simple Jaccard similarity on word sets +function jaccardSimilarity(a, b) { + const setA = new Set(a.toLowerCase().split(/\s+/)) + const setB = new Set(b.toLowerCase().split(/\s+/)) + const intersection = [...setA].filter(w => setB.has(w)).length + const union = new Set([...setA, ...setB]).size + return intersection / union +} +``` + + + + +```python +def jaccard_similarity(a: str, b: str) -> float: + set_a = set(a.lower().split()) + set_b = set(b.lower().split()) + if not set_a or not set_b: + return 0.0 + return len(set_a & set_b) / len(set_a | set_b) + +def deduplicate_chunks( + hits: list, + score_threshold: float = 0.70, + similarity_threshold: float = 0.95 +) -> list: + filtered = [h for h in hits if (getattr(h, '__score', 0) or 0) >= score_threshold] + kept = [] + for hit in filtered: + is_dup = any( + jaccard_similarity(k.text, hit.text) >= similarity_threshold + for k in kept + ) + if not is_dup: + kept.append(hit) + return kept +``` + + + + +--- + +## Step 7: Build source-aware citations + +Format the context for the LLM prompt with numbered citations tied to each source type. + + + + +```js +function buildContextWithCitations(hits) { + const citations = [] + const contextLines = [] + + hits.forEach((hit, i) => { + const ref = i + 1 + const sourceLabel = hit.__label + + let citation + if (sourceLabel === 'PDF_CHUNK') { + citation = `[${ref}] PDF: ${hit.source} (chunk ${hit.chunkIndex})` + } else if (sourceLabel === 'WEB_CHUNK') { + citation = `[${ref}] Web: ${hit.url}` + } else { + citation = `[${ref}] Database: ${hit.entityType} ID ${hit.entityId}` + } + + citations.push(citation) + contextLines.push(`[${ref}] ${hit.text}`) + }) + + return { + context: contextLines.join('\n\n'), + citationBlock: citations.join('\n') + } +} +``` + + + + +```python +def build_context_with_citations(hits: list) -> dict: + citations = [] + context_lines = [] + + for i, hit in enumerate(hits): + ref = i + 1 + label = getattr(hit, '__label', '') + + if label == 'PDF_CHUNK': + citation = f'[{ref}] PDF: {hit.source} (chunk {hit.chunkIndex})' + elif label == 'WEB_CHUNK': + citation = f'[{ref}] Web: {hit.url}' + else: + citation = f'[{ref}] Database: {hit.entityType} ID {hit.entityId}' + + citations.append(citation) + context_lines.append(f'[{ref}] {hit.text}') + + return { + 'context': '\n\n'.join(context_lines), + 'citation_block': '\n'.join(citations) + } +``` + + + + +--- + +## Step 8: Synthesize with the LLM + + + + +```js +import OpenAI from 'openai' + +const openai = new OpenAI() + +async function answerWithSources(question) { + const rawHits = await searchAllSources(question, 12) + const hits = deduplicateChunks(rawHits) + const { context, citationBlock } = buildContextWithCitations(hits) + + const prompt = `You are a helpful assistant. Answer the question below using ONLY the provided context. +After your answer, list the sources you used as [1], [2], etc. + +Context: +${context} + +Question: ${question} + +Answer:` + + const completion = await openai.chat.completions.create({ + model: 'gpt-4o-mini', + messages: [{ role: 'user', content: prompt }] + }) + + return { + answer: completion.choices[0].message.content, + citations: citationBlock + } +} + +const { answer, citations } = await answerWithSources('What is the refund policy?') +console.log(answer) +console.log('\nSources:\n' + citations) +``` + + + + +```python +from openai import OpenAI + +openai = OpenAI() + +def answer_with_sources(question: str) -> dict: + raw_hits = search_all_sources(question, limit=12) + hits = deduplicate_chunks(raw_hits) + ctx = build_context_with_citations(hits) + + prompt = f"""You are a helpful assistant. Answer the question below using ONLY the provided context. +After your answer, list the sources you used as [1], [2], etc. + +Context: +{ctx['context']} + +Question: {question} + +Answer:""" + + completion = openai.chat.completions.create( + model='gpt-4o-mini', + messages=[{'role': 'user', 'content': prompt}] + ) + + return { + 'answer': completion.choices[0].message.content, + 'citations': ctx['citation_block'] + } + +result = answer_with_sources('What is the refund policy?') +print(result['answer']) +print('\nSources:\n' + result['citations']) +``` + + + + +--- + +## Full pipeline + +The complete flow in one place: + +``` +ingestPdf() → PDF_CHUNK records +ingestWebPage() → WEB_CHUNK records +ingestDbSummaries()→ DB_SUMMARY records + ↓ + db.embeddings.createIndex() × 3 + ↓ + db.ai.search(labels: all three) + ↓ + deduplicateChunks(hits) + ↓ + buildContextWithCitations(hits) + ↓ + LLM.chat(prompt + context) + ↓ + answer + numbered citations +``` + +--- + +## Tips + +- **Adjust chunk size per source type.** PDFs may need smaller chunks (400–500 chars) for precision; web pages can tolerate 800–1000 chars for better context. +- **Filter by `sourceType` when the query implies a source.** If the user asks "show me in the docs…", restrict labels to `['WEB_CHUNK']` to reduce noise. +- **Add a `retrievedAt` timestamp** to web chunks to detect stale content and trigger re-ingestion. +- **Use `where` filters alongside** `ai.search` to scope by date, author, or any other metadata field. + +--- + +## Next steps + +- [RAG Evaluation](./rag-evaluation) — measure precision@k and recall@k across your pipeline +- [RAG Reranking](./rag-reranking) — two-stage retrieval with cross-encoder scoring +- [GraphRAG](./graphrag) — enrich chunks with graph context before synthesis diff --git a/docs/docs/tutorials/rag-pipeline.mdx b/docs/docs/tutorials/rag-pipeline.mdx new file mode 100644 index 00000000..af2d3768 --- /dev/null +++ b/docs/docs/tutorials/rag-pipeline.mdx @@ -0,0 +1,371 @@ +--- +sidebar_position: 6 +title: RAG Pipeline in Minutes +description: Chunk Markdown files, store them in RushDB, and build a retrieval-augmented generation pipeline in TypeScript, Python, or REST. +tags: [AI, RAG] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# RAG Pipeline in Minutes + +This tutorial builds a minimal but complete RAG pipeline: + +1. Read a folder of Markdown files +2. Split each file into overlapping chunks +3. Push chunks into RushDB (with source metadata) +4. Create an embedding index on the chunk text +5. Retrieve the top-K relevant chunks for a query +6. Pass them to an LLM as context + +Prerequisites: a running RushDB instance with `RUSHDB_EMBEDDING_MODEL` configured, or RushDB Cloud with AI enabled. + +--- + +## The docs folder + +For this example, assume you have a local folder `./docs` with a few Markdown files: + +``` +docs/ + architecture.md + api-reference.md + deployment.md +``` + +Each file is a few hundred lines. The chunking step below splits them into overlapping windows so no context is lost at boundaries. + +--- + +## Step 1: Chunk and ingest + + + + +```python +import os +import re +from rushdb import RushDB + +db = RushDB("RUSHDB_API_KEY", base_url="https://api.rushdb.com/api/v1") + +DOCS_DIR = "./docs" +CHUNK_SIZE = 400 # characters +CHUNK_OVERLAP = 80 # characters + + +def chunk_text(text: str, size: int, overlap: int) -> list[str]: + chunks, start = [], 0 + while start < len(text): + end = min(start + size, len(text)) + chunks.append(text[start:end].strip()) + start += size - overlap + return [c for c in chunks if c] + + +records = [] +for filename in os.listdir(DOCS_DIR): + if not filename.endswith(".md"): + continue + path = os.path.join(DOCS_DIR, filename) + with open(path) as f: + content = f.read() + + # Extract first heading as title + match = re.search(r"^#\s+(.+)", content, re.MULTILINE) + title = match.group(1) if match else filename + + for i, chunk in enumerate(chunk_text(content, CHUNK_SIZE, CHUNK_OVERLAP)): + records.append({ + "source": filename, + "title": title, + "chunk_index": i, + "text": chunk, + }) + +db.records.import_json({"label": "CHUNK", "data": records}) +print(f"Ingested {len(records)} chunks from {DOCS_DIR}") +``` + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' +import fs from 'fs' +import path from 'path' + +const db = new RushDB('RUSHDB_API_KEY') + +const DOCS_DIR = './docs' +const CHUNK_SIZE = 400 +const CHUNK_OVERLAP = 80 + +function chunkText(text: string, size: number, overlap: number): string[] { + const chunks: string[] = [] + let start = 0 + while (start < text.length) { + const end = Math.min(start + size, text.length) + const chunk = text.slice(start, end).trim() + if (chunk) chunks.push(chunk) + start += size - overlap + } + return chunks +} + +const records: object[] = [] + +for (const filename of fs.readdirSync(DOCS_DIR)) { + if (!filename.endsWith('.md')) continue + const content = fs.readFileSync(path.join(DOCS_DIR, filename), 'utf-8') + const titleMatch = content.match(/^#\s+(.+)/m) + const title = titleMatch?.[1] ?? filename + + chunkText(content, CHUNK_SIZE, CHUNK_OVERLAP).forEach((text, i) => { + records.push({ source: filename, title, chunk_index: i, text }) + }) +} + +await db.records.importJson({ label: 'CHUNK', data: records }) +console.log(`Ingested ${records.length} chunks from ${DOCS_DIR}`) +``` + + + + +Assemble your chunks in any language, then POST them in a single batch: + +```bash +curl -X POST https://api.rushdb.com/api/v1/records/import/json \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "label": "CHUNK", + "data": [ + { + "source": "architecture.md", + "title": "Architecture Overview", + "chunk_index": 0, + "text": "RushDB stores every record as a node in a property graph..." + }, + { + "source": "architecture.md", + "title": "Architecture Overview", + "chunk_index": 1, + "text": "Relationships between nested objects are created automatically..." + }, + { + "source": "deployment.md", + "title": "Deployment Guide", + "chunk_index": 0, + "text": "You can run RushDB with Docker using the official image..." + } + ] + }' +``` + + + + +--- + +## Step 2: Create an embedding index + +Index the `text` property so RushDB can run semantic search against it. + + + + +```python +db.ai.indexes.create({ + "label": "CHUNK", + "propertyName": "text" +}) +print("Embedding index created — RushDB is backfilling in the background") +``` + + + + +```typescript +await db.ai.indexes.create({ + label: 'CHUNK', + propertyName: 'text' +}) +console.log('Embedding index created — backfilling in background') +``` + + + + +```bash +curl -X POST https://api.rushdb.com/api/v1/ai/indexes \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "label": "CHUNK", + "propertyName": "text" + }' +``` + + + + +Backfilling runs asynchronously. Poll `GET /api/v1/ai/indexes` (or `db.ai.indexes.find()`) and wait until `status` is `ready` before running searches. For small corpora this usually takes under a minute. + +--- + +## Step 3: Retrieve relevant chunks + + + + +```python +query = "How does RushDB handle nested JSON objects?" + +result = db.ai.search({ + "propertyName": "text", + "query": query, + "labels": ["CHUNK"], + "limit": 5 +}) + +context_chunks = [r["text"] for r in result["data"]] +print(f"Retrieved {len(context_chunks)} chunks for: {query!r}") +for i, r in enumerate(result["data"]): + print(f"\n[{i+1}] (score {r.get('__score', 0):.3f})") + print(r["text"][:200] + "…") +``` + + + + +```typescript +const query = 'How does RushDB handle nested JSON objects?' + +const { data: chunks } = await db.ai.search({ + propertyName: 'text', + query, + labels: ['CHUNK'], + limit: 5 +}) + +console.log(`Retrieved ${chunks.length} chunks for: "${query}"`) +chunks.forEach((chunk, i) => { + console.log(`\n[${i + 1}] score: ${chunk.__score.toFixed(3)}`) + console.log(chunk.text.slice(0, 200) + '…') +}) +``` + + + + +```bash +curl -X POST https://api.rushdb.com/api/v1/ai/search \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "propertyName": "text", + "query": "How does RushDB handle nested JSON objects?", + "labels": ["CHUNK"], + "limit": 5 + }' +``` + + + + +--- + +## Step 4: Generate an answer + +Pass the retrieved chunks as context to any LLM. Example using the OpenAI Python SDK: + +```python +from openai import OpenAI + +client = OpenAI() + +context = "\n\n---\n\n".join(context_chunks) + +response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + { + "role": "system", + "content": "Answer using only the context provided. Be concise." + }, + { + "role": "user", + "content": f"Context:\n{context}\n\nQuestion: {query}" + } + ] +) + +print(response.choices[0].message.content) +``` + +RushDB is the retrieval layer — any LLM or framework (LangChain, LlamaIndex, Vercel AI SDK) slots in at this step. + +--- + +## Filtering by source + +If you want to scope retrieval to a specific file, add a `where` clause: + + + + +```python +result = db.ai.search({ + "propertyName": "text", + "query": "docker compose environment variables", + "labels": ["CHUNK"], + "where": { "source": { "$endsWith": "deployment.md" } }, + "limit": 5 +}) +``` + + + + +```typescript +const { data } = await db.ai.search({ + propertyName: 'text', + query: 'docker compose environment variables', + labels: ['CHUNK'], + where: { source: { $endsWith: 'deployment.md' } }, + limit: 5 +}) +``` + + + + +```bash +curl -X POST https://api.rushdb.com/api/v1/ai/search \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "propertyName": "text", + "query": "docker compose environment variables", + "labels": ["CHUNK"], + "where": { "source": { "$endsWith": "deployment.md" } }, + "limit": 5 + }' +``` + + + + +The `where` prefilter runs on the graph layer before semantic scoring — so you narrow candidates without sacrificing recall within the target file. + +--- + +## What's next + +- Add more metadata fields (author, date, section heading) — they're queryable without any schema changes +- Use `$startsWith` or `$in` on `source` to search across a subset of files +- Combine with [transactions](../typescript-sdk/transactions) to atomically re-ingest a file when it changes +- Replace the fixed-size chunker with a semantic splitter (split on headings, paragraphs, or sentences) diff --git a/docs/docs/tutorials/rag-reranking.mdx b/docs/docs/tutorials/rag-reranking.mdx new file mode 100644 index 00000000..2f57ae46 --- /dev/null +++ b/docs/docs/tutorials/rag-reranking.mdx @@ -0,0 +1,459 @@ +--- +sidebar_position: 41 +title: "RAG Reranking" +description: Improve retrieval precision with two-stage search — over-fetch candidates with vector similarity, then rerank with LLM scoring or Reciprocal Rank Fusion before sending to the LLM. +tags: [RAG, AI, Embeddings, Search] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# RAG Reranking + +Vector similarity retrieval is fast but imprecise: cosine distance is a proxy for relevance, not a direct measure of it. Reranking adds a second, slower, but more accurate scoring stage that re-orders the initial candidates before they reach the LLM. + +This tutorial covers two complementary techniques: + +- **LLM-based reranking** — ask the model to score each candidate for relevance +- **Reciprocal Rank Fusion (RRF)** — combine results from multiple retrieval strategies without needing a trained cross-encoder + +--- + +## Two-stage retrieval pattern + +``` +Stage 1 (fast, broad): db.ai.search → top-N candidates (N = 20–50) + │ +Stage 2 (slow, precise): reranker → top-k final results (k = 3–5) + │ + LLM synthesis → answer +``` + +The key insight is to **over-fetch** in stage 1 (retrieve more candidates than you'll ultimately use) so the reranker has enough material to work with. + +--- + +## Approach 1: LLM-based reranking + +Ask the LLM to score each candidate passage for relevance to the query on a 0–10 scale, then sort descending. This is simple to implement, costs a small number of tokens per candidate, and works well out of the box. + + + + +```js +import RushDB from '@rushdb/javascript-sdk' +import OpenAI from 'openai' + +const db = new RushDB(process.env.RUSHDB_API_KEY) +const openai = new OpenAI() + +// Stage 1: over-fetch candidates +async function retrieveCandidates(query, candidateCount = 25) { + return db.ai.search({ + query, + labels: ['DOC_CHUNK'], + propertyName: 'text', + limit: candidateCount + }) +} + +// Stage 2: LLM reranking +async function rerankWithLlm(query, candidates, topK = 5) { + // Score all candidates in parallel (batching keeps cost low) + const scored = await Promise.all( + candidates.map(async (candidate) => { + const prompt = `Rate how relevant the following passage is to the query on a scale of 0 to 10. +Respond with ONLY a single integer (0–10). No explanation. + +Query: ${query} + +Passage: ${candidate.text} + +Relevance score:` + + const completion = await openai.chat.completions.create({ + model: 'gpt-4o-mini', + messages: [{ role: 'user', content: prompt }], + max_tokens: 5, + temperature: 0 + }) + + const raw = completion.choices[0].message.content?.trim() ?? '0' + const score = parseInt(raw, 10) + return { ...candidate, rerankScore: isNaN(score) ? 0 : score } + }) + ) + + return scored + .sort((a, b) => b.rerankScore - a.rerankScore) + .slice(0, topK) +} + +// Combined pipeline +async function retrieveAndRerank(query, topK = 5) { + const candidates = await retrieveCandidates(query, 25) + const reranked = await rerankWithLlm(query, candidates, topK) + return reranked +} + +const topChunks = await retrieveAndRerank('how does billing work for BYOC projects?') +topChunks.forEach(c => console.log(`[${c.rerankScore}/10] ${c.text.slice(0, 80)}...`)) +``` + + + + +```python +import os +from concurrent.futures import ThreadPoolExecutor +from rushdb import RushDB +from openai import OpenAI + +db = RushDB(os.environ['RUSHDB_API_KEY']) +openai = OpenAI() + +def retrieve_candidates(query: str, candidate_count: int = 25) -> list: + return db.ai.search( + query=query, + labels=['DOC_CHUNK'], + property_name='text', + limit=candidate_count + ) + +def score_candidate(query: str, candidate) -> dict: + prompt = f"""Rate how relevant the following passage is to the query on a scale of 0 to 10. +Respond with ONLY a single integer (0–10). No explanation. + +Query: {query} + +Passage: {candidate.text} + +Relevance score:""" + + completion = openai.chat.completions.create( + model='gpt-4o-mini', + messages=[{'role': 'user', 'content': prompt}], + max_tokens=5, + temperature=0 + ) + raw = (completion.choices[0].message.content or '0').strip() + try: + score = int(raw) + except ValueError: + score = 0 + return {'record': candidate, 'rerank_score': score} + +def rerank_with_llm(query: str, candidates: list, top_k: int = 5) -> list: + with ThreadPoolExecutor(max_workers=10) as executor: + scored = list(executor.map(lambda c: score_candidate(query, c), candidates)) + + scored.sort(key=lambda x: x['rerank_score'], reverse=True) + return [item['record'] for item in scored[:top_k]] + +def retrieve_and_rerank(query: str, top_k: int = 5) -> list: + candidates = retrieve_candidates(query, 25) + return rerank_with_llm(query, candidates, top_k) + +top_chunks = retrieve_and_rerank('how does billing work for BYOC projects?') +for chunk in top_chunks: + print(f'{chunk.text[:80]}...') +``` + + + + +--- + +## Approach 2: Reciprocal Rank Fusion (RRF) + +RRF merges ranked result lists from multiple retrieval strategies without needing scores to be on the same scale. It's particularly useful when combining: + +- Vector similarity (semantic) +- Keyword/property filters (exact) +- Different vector indexes (e.g., title vs body) + +**RRF formula:** + +``` +RRF(d) = Σ 1 / (k + rank_r(d)) for each result list r +``` + +Where `k = 60` is the standard smoothing constant and `r` iterates over result lists. + + + + +```js +// rrf.js + +/** + * Reciprocal Rank Fusion + * @param {string[][]} rankedLists - Arrays of record IDs, each ordered by rank + * @param {number} k - Smoothing constant (default: 60) + * @returns {string[]} Merged ranking by RRF score descending + */ +function rrfMerge(rankedLists, k = 60) { + const scores = new Map() + + for (const list of rankedLists) { + list.forEach((id, index) => { + const rank = index + 1 // 1-based + scores.set(id, (scores.get(id) ?? 0) + 1 / (k + rank)) + }) + } + + return [...scores.entries()] + .sort((a, b) => b[1] - a[1]) + .map(([id]) => id) +} + +// Example: combine semantic search on body text with title-focused search +async function hybridSearch(query, topK = 5) { + const [bodyHits, titleHits] = await Promise.all([ + db.ai.search({ + query, + labels: ['DOC_CHUNK'], + propertyName: 'text', + limit: 20 + }), + db.ai.search({ + query, + labels: ['DOC_CHUNK'], + propertyName: 'title', + limit: 20 + }) + ]) + + const bodyIds = bodyHits.map(h => h.__id) + const titleIds = titleHits.map(h => h.__id) + + const mergedIds = rrfMerge([bodyIds, titleIds]).slice(0, topK) + + // Hydrate with original records (preserve all fields) + const recordMap = Object.fromEntries( + [...bodyHits, ...titleHits].map(h => [h.__id, h]) + ) + return mergedIds.map(id => recordMap[id]).filter(Boolean) +} + +export { rrfMerge, hybridSearch } +``` + + + + +```python +# rrf.py +from concurrent.futures import ThreadPoolExecutor + +def rrf_merge(ranked_lists: list[list[str]], k: int = 60) -> list[str]: + """Reciprocal Rank Fusion over multiple ranked ID lists.""" + scores: dict[str, float] = {} + for ranked in ranked_lists: + for rank, id_ in enumerate(ranked, start=1): + scores[id_] = scores.get(id_, 0.0) + 1 / (k + rank) + return sorted(scores, key=lambda id_: scores[id_], reverse=True) + +def hybrid_search(db, query: str, top_k: int = 5) -> list: + with ThreadPoolExecutor(max_workers=2) as executor: + body_future = executor.submit( + db.ai.search, + query=query, labels=['DOC_CHUNK'], property_name='text', limit=20 + ) + title_future = executor.submit( + db.ai.search, + query=query, labels=['DOC_CHUNK'], property_name='title', limit=20 + ) + body_hits = body_future.result() + title_hits = title_future.result() + + body_ids = [h.__id for h in body_hits] + title_ids = [h.__id for h in title_hits] + + merged_ids = rrf_merge([body_ids, title_ids])[:top_k] + + record_map = {h.__id: h for h in [*body_hits, *title_hits]} + return [record_map[id_] for id_ in merged_ids if id_ in record_map] +``` + + + + +--- + +## Approach 3: Combined pipeline (RRF → LLM rerank) + +For maximum precision, use RRF to merge multiple retrieval strategies, then LLM-rerank the merged candidates: + + + + +```js +async function precisionPipeline(query, finalTopK = 5) { + // Stage 1a: broad semantic search + const semanticHits = await db.ai.search({ + query, + labels: ['DOC_CHUNK'], + propertyName: 'text', + limit: 20 + }) + + // Stage 1b: keyword-filtered candidates + const keywordHits = await db.records.find({ + labels: ['DOC_CHUNK'], + where: { + text: { $contains: query.split(' ').slice(0, 3).join(' ') } + }, + limit: 20 + }) + + // Stage 2: RRF merge + const semanticIds = semanticHits.map(h => h.__id) + const keywordIds = (keywordHits.data ?? []).map(h => h.__id) + const mergedIds = rrfMerge([semanticIds, keywordIds]).slice(0, 25) + + // Hydrate merged + const allRecords = [...semanticHits, ...(keywordHits.data ?? [])] + const recordMap = Object.fromEntries(allRecords.map(r => [r.__id, r])) + const merged = mergedIds.map(id => recordMap[id]).filter(Boolean) + + // Stage 3: LLM rerank + return rerankWithLlm(query, merged, finalTopK) +} +``` + + + + +```python +def precision_pipeline(db, openai_client, query: str, final_top_k: int = 5) -> list: + # Stage 1a: semantic search + semantic_hits = db.ai.search( + query=query, labels=['DOC_CHUNK'], property_name='text', limit=20 + ) + + # Stage 1b: keyword filter + words = query.split()[:3] + keyword_result = db.records.find( + labels=['DOC_CHUNK'], + where={'text': {'$contains': ' '.join(words)}}, + limit=20 + ) + keyword_hits = keyword_result.data if keyword_result else [] + + # Stage 2: RRF merge + semantic_ids = [h.__id for h in semantic_hits] + keyword_ids = [h.__id for h in keyword_hits] + merged_ids = rrf_merge([semantic_ids, keyword_ids])[:25] + + all_records = [*semantic_hits, *keyword_hits] + record_map = {r.__id: r for r in all_records} + merged = [record_map[id_] for id_ in merged_ids if id_ in record_map] + + # Stage 3: LLM rerank + return rerank_with_llm(query, merged, final_top_k) +``` + + + + +--- + +## Cost and latency trade-offs + +| Strategy | Latency | Cost | Precision gain | +|---|---|---|---| +| Vector only (baseline) | ~50–200 ms | $ | — | +| RRF (multi-index) | ~100–400 ms | $ | Low–Medium | +| LLM rerank (gpt-4o-mini) | +500–2000 ms | $$ | Medium–High | +| LLM rerank (gpt-4o) | +1000–4000 ms | $$$ | High | +| RRF + LLM rerank | +600–2500 ms | $$ | High | + +**Practical guidance:** +- Use **vector only** for real-time type-ahead or high-volume search where P@5 > 0.6 is already achieved. +- Use **RRF** when you have multiple meaningful retrieval signals (titles, bodies, semantic, exact match) and want free precision gains. +- Use **LLM rerank** when you need the highest possible precision for low-traffic, high-stakes queries (support tickets, legal research, medical Q&A). +- Use **RRF + LLM rerank** for regulated domains or when evaluation shows vector-only Precision@5 < 0.55. + +--- + +## Caching rerank scores + +Reranker calls are expensive. Cache results for identical (query, candidate-set) pairs to avoid re-scoring on repeated queries. + + + + +```js +const rerankCache = new Map() + +async function cachedRerank(query, candidates, topK = 5) { + // Cache key: query + sorted candidate IDs + const cacheKey = query + '|' + candidates.map(c => c.__id).sort().join(',') + + if (rerankCache.has(cacheKey)) { + return rerankCache.get(cacheKey) + } + + const result = await rerankWithLlm(query, candidates, topK) + rerankCache.set(cacheKey, result) + return result +} +``` + + + + +```python +from functools import lru_cache +import hashlib, json + +_rerank_cache: dict[str, list] = {} + +def cached_rerank(query: str, candidates: list, top_k: int = 5) -> list: + ids_key = ','.join(sorted(c.__id for c in candidates)) + cache_key = hashlib.md5(f'{query}|{ids_key}'.encode()).hexdigest() + + if cache_key in _rerank_cache: + return _rerank_cache[cache_key] + + result = rerank_with_llm(query, candidates, top_k) + _rerank_cache[cache_key] = result + return result +``` + + + + +--- + +## Full pipeline summary + +``` +User query + │ + ▼ +db.ai.search(limit=20..50) ← Stage 1: fast broad retrieval + │ + ├── optional: db.records.find(keyword filter) + │ + ▼ +rrfMerge([semantic, keyword]) ← Stage 2: fuse result lists + │ + ▼ +rerankWithLlm(merged, topK=5) ← Stage 3: precise reordering + │ + ▼ +buildPrompt(topChunks) + │ + ▼ +LLM.chat(prompt) ← Final answer with citations +``` + +--- + +## Next steps + +- [RAG Evaluation](./rag-evaluation) — measure the Precision@k impact of adding reranking +- [Multi-Source RAG](./rag-multi-source) — apply RRF across PDF, web, and database labels +- [GraphRAG](./graphrag) — add graph-enriched context alongside reranked chunks diff --git a/docs/docs/tutorials/research-knowledge-graph.mdx b/docs/docs/tutorials/research-knowledge-graph.mdx new file mode 100644 index 00000000..d46caf05 --- /dev/null +++ b/docs/docs/tutorials/research-knowledge-graph.mdx @@ -0,0 +1,451 @@ +--- +sidebar_position: 27 +title: "Research Knowledge Graph: Papers, Authors, Topics, Citations" +description: Build a scholarly graph supporting citation traversal, topical clustering, and author-centric discovery for research workflows. +tags: [Domain Blueprint, AI Search, Relationships, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Research Knowledge Graph: Papers, Authors, Topics, Citations + +Academic research is inherently graph-shaped. A paper has authors. Authors belong to institutions. Papers cite other papers. Papers cover topics. Topics overlap. A flat document store loses all of that structure. + +This tutorial builds a scholarly knowledge graph that enables citation traversal, co-author discovery, topical clustering, and semantic retrieval. + +--- + +## Graph shape + +```mermaid +graph LR + PAPER[PAPER] -->|AUTHORED_BY| AUTHOR[AUTHOR] + AUTHOR -->|AFFILIATED_WITH| INSTITUTION[INSTITUTION] + PAPER -->|COVERS| TOPIC[TOPIC] + PAPER -->|CITES| PAPER + TOPIC -->|RELATED_TO| TOPIC +``` + +| Label | What it represents | +|---|---| +| `PAPER` | A research paper with title, abstract, year, DOI | +| `AUTHOR` | A researcher or collaborator | +| `INSTITUTION` | University, lab, or company | +| `TOPIC` | A subject area or keyword cluster | + +--- + +## Step 1: Ingest papers and authors + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +await db.records.importJson({ + label: 'PAPER', + data: [ + { + doi: '10.1000/xyz001', + title: 'Graph Databases for Scientific Knowledge Representation', + abstract: 'This paper surveys the use of graph databases in representing and querying scientific knowledge, including citation networks, ontologies, and experimental results.', + year: 2023, + venue: 'VLDB', + citationCount: 47 + }, + { + doi: '10.1000/xyz002', + title: 'Neural Retrieval Augmentation with Knowledge Graphs', + abstract: 'We propose a retrieval augmentation framework that combines dense vector search with structured graph traversal to improve factual precision in language model outputs.', + year: 2024, + venue: 'NeurIPS', + citationCount: 112 + }, + { + doi: '10.1000/xyz003', + title: 'Scalable Graph Construction from Unstructured Text', + abstract: 'A pipeline for extracting entities and relationships from scientific text and constructing queryable knowledge graphs at scale.', + year: 2024, + venue: 'ACL', + citationCount: 29 + } + ] +}) + +await db.records.importJson({ + label: 'AUTHOR', + data: [ + { name: 'Dr. Yuki Tanaka', email: 'y.tanaka@uni.edu', hIndex: 18 }, + { name: 'Prof. Lena Müller', email: 'l.muller@institute.de', hIndex: 34 }, + { name: 'Dr. Carlos Reyes', email: 'c.reyes@lab.com', hIndex: 12 } + ] +}) + +await db.records.importJson({ + label: 'TOPIC', + data: [ + { name: 'graph databases', category: 'systems' }, + { name: 'knowledge representation', category: 'ai' }, + { name: 'retrieval augmented generation', category: 'nlp' }, + { name: 'information extraction', category: 'nlp' } + ] +}) +``` + + + + +```python +from rushdb import RushDB +import os + +db = RushDB(os.environ["RUSHDB_API_KEY"], base_url="https://api.rushdb.com/api/v1") + +db.records.import_json({ + "label": "PAPER", + "data": [ + { + "doi": "10.1000/xyz001", + "title": "Graph Databases for Scientific Knowledge Representation", + "abstract": "This paper surveys the use of graph databases in representing and querying scientific knowledge.", + "year": 2023, "venue": "VLDB", "citationCount": 47 + }, + { + "doi": "10.1000/xyz002", + "title": "Neural Retrieval Augmentation with Knowledge Graphs", + "abstract": "We propose a retrieval augmentation framework combining dense vector search with graph traversal.", + "year": 2024, "venue": "NeurIPS", "citationCount": 112 + }, + { + "doi": "10.1000/xyz003", + "title": "Scalable Graph Construction from Unstructured Text", + "abstract": "A pipeline for extracting entities and relationships from scientific text.", + "year": 2024, "venue": "ACL", "citationCount": 29 + } + ] +}) + +db.records.import_json({ + "label": "AUTHOR", + "data": [ + {"name": "Dr. Yuki Tanaka", "email": "y.tanaka@uni.edu", "hIndex": 18}, + {"name": "Prof. Lena Müller", "email": "l.muller@institute.de", "hIndex": 34}, + {"name": "Dr. Carlos Reyes", "email": "c.reyes@lab.com", "hIndex": 12} + ] +}) + +db.records.import_json({ + "label": "TOPIC", + "data": [ + {"name": "graph databases", "category": "systems"}, + {"name": "knowledge representation", "category": "ai"}, + {"name": "retrieval augmented generation", "category": "nlp"}, + {"name": "information extraction", "category": "nlp"} + ] +}) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +curl -s -X POST "$BASE/records/import/json" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"PAPER","data":[{"doi":"10.1000/xyz001","title":"Graph Databases for Scientific Knowledge Representation","year":2023,"venue":"VLDB","citationCount":47},{"doi":"10.1000/xyz002","title":"Neural Retrieval Augmentation with Knowledge Graphs","year":2024,"venue":"NeurIPS","citationCount":112}]}' +``` + + + + +--- + +## Step 2: Build the relationship graph + + + + +```typescript +// Fetch all records +const [papers, authors, topics] = await Promise.all([ + db.records.find({ labels: ['PAPER'] }), + db.records.find({ labels: ['AUTHOR'] }), + db.records.find({ labels: ['TOPIC'] }) +]) + +const paperMap = Object.fromEntries(papers.data.map(p => [p.doi, p])) +const authorMap = Object.fromEntries(authors.data.map(a => [a.email, a])) +const topicMap = Object.fromEntries(topics.data.map(t => [t.name, t])) + +// Paper xyz001: authored by Yuki Tanaka and Lena Müller +await db.records.attach({ source: paperMap['10.1000/xyz001'], target: authorMap['y.tanaka@uni.edu'], options: { type: 'AUTHORED_BY', direction: 'out' } }) +await db.records.attach({ source: paperMap['10.1000/xyz001'], target: authorMap['l.muller@institute.de'], options: { type: 'AUTHORED_BY', direction: 'out' } }) + +// Paper xyz002: authored by Lena Müller and Carlos Reyes, cites xyz001 +await db.records.attach({ source: paperMap['10.1000/xyz002'], target: authorMap['l.muller@institute.de'], options: { type: 'AUTHORED_BY', direction: 'out' } }) +await db.records.attach({ source: paperMap['10.1000/xyz002'], target: authorMap['c.reyes@lab.com'], options: { type: 'AUTHORED_BY', direction: 'out' } }) +await db.records.attach({ source: paperMap['10.1000/xyz002'], target: paperMap['10.1000/xyz001'], options: { type: 'CITES', direction: 'out' } }) + +// Topics +await db.records.attach({ source: paperMap['10.1000/xyz001'], target: topicMap['graph databases'], options: { type: 'COVERS', direction: 'out' } }) +await db.records.attach({ source: paperMap['10.1000/xyz002'], target: topicMap['retrieval augmented generation'], options: { type: 'COVERS', direction: 'out' } }) +await db.records.attach({ source: paperMap['10.1000/xyz002'], target: topicMap['knowledge representation'], options: { type: 'COVERS', direction: 'out' } }) +``` + + + + +```python +papers = db.records.find({"labels": ["PAPER"]}) +authors = db.records.find({"labels": ["AUTHOR"]}) +topics = db.records.find({"labels": ["TOPIC"]}) + +paper_map = {p.data["doi"]: p for p in papers.data} +author_map = {a.data["email"]: a for a in authors.data} +topic_map = {t.data["name"]: t for t in topics.data} + +db.records.attach(paper_map["10.1000/xyz001"].id, author_map["y.tanaka@uni.edu"].id, {"type": "AUTHORED_BY", "direction": "out"}) +db.records.attach(paper_map["10.1000/xyz001"].id, author_map["l.muller@institute.de"].id, {"type": "AUTHORED_BY", "direction": "out"}) + +db.records.attach(paper_map["10.1000/xyz002"].id, author_map["l.muller@institute.de"].id, {"type": "AUTHORED_BY", "direction": "out"}) +db.records.attach(paper_map["10.1000/xyz002"].id, author_map["c.reyes@lab.com"].id, {"type": "AUTHORED_BY", "direction": "out"}) +db.records.attach(paper_map["10.1000/xyz002"].id, paper_map["10.1000/xyz001"].id, {"type": "CITES", "direction": "out"}) + +db.records.attach(paper_map["10.1000/xyz001"].id, topic_map["graph databases"].id, {"type": "COVERS", "direction": "out"}) +db.records.attach(paper_map["10.1000/xyz002"].id, topic_map["retrieval augmented generation"].id, {"type": "COVERS", "direction": "out"}) +``` + + + + +```bash +# Get paper IDs, then attach +P1=$(curl -s -X POST "$BASE/records/search" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["PAPER"],"where":{"doi":"10.1000/xyz001"}}' | jq -r '.data[0].__id') +P2=$(curl -s -X POST "$BASE/records/search" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["PAPER"],"where":{"doi":"10.1000/xyz002"}}' | jq -r '.data[0].__id') + +# P2 cites P1 +curl -s -X POST "$BASE/records/$P2/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$P1\"],\"options\":{\"type\":\"CITES\",\"direction\":\"out\"}}" +``` + + + + +--- + +## Step 3: Citation traversal queries + + + + +```typescript +// Papers that cite xyz001 +const citingPapers = await db.records.find({ + labels: ['PAPER'], + where: { + PAPER: { + $relation: { type: 'CITES', direction: 'out' }, + doi: '10.1000/xyz001' + } + }, + orderBy: { citationCount: 'desc' } +}) + +// All papers by Lena Müller +const mullerPapers = await db.records.find({ + labels: ['PAPER'], + where: { + AUTHOR: { + $relation: { type: 'AUTHORED_BY', direction: 'out' }, + email: 'l.muller@institute.de' + } + }, + orderBy: { year: 'desc' } +}) + +// Co-authors of Lena Müller (authors who share a paper) +const coAuthors = await db.records.find({ + labels: ['AUTHOR'], + where: { + PAPER: { + $relation: { type: 'AUTHORED_BY', direction: 'in' }, + AUTHOR: { + $relation: { type: 'AUTHORED_BY', direction: 'out' }, + email: 'l.muller@institute.de' + } + }, + email: { $ne: 'l.muller@institute.de' } // exclude Lena herself + } +}) + +console.log('Co-authors:', coAuthors.data.map(a => a.name)) +``` + + + + +```python +# Papers citing xyz001 +citing = db.records.find({ + "labels": ["PAPER"], + "where": { + "PAPER": { + "$relation": {"type": "CITES", "direction": "out"}, + "doi": "10.1000/xyz001" + } + }, + "orderBy": {"citationCount": "desc"} +}) + +# All papers by Lena Müller +muller_papers = db.records.find({ + "labels": ["PAPER"], + "where": { + "AUTHOR": { + "$relation": {"type": "AUTHORED_BY", "direction": "out"}, + "email": "l.muller@institute.de" + } + }, + "orderBy": {"year": "desc"} +}) + +# Co-authors +co_authors = db.records.find({ + "labels": ["AUTHOR"], + "where": { + "PAPER": { + "$relation": {"type": "AUTHORED_BY", "direction": "in"}, + "AUTHOR": { + "$relation": {"type": "AUTHORED_BY", "direction": "out"}, + "email": "l.muller@institute.de" + } + }, + "email": {"$ne": "l.muller@institute.de"} + } +}) +print("Co-authors:", [a.data.get("name") for a in co_authors.data]) +``` + + + + +```bash +# Papers by specific author +AUTHOR_ID=$(curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["AUTHOR"],"where":{"email":"l.muller@institute.de"}}' \ + | jq -r '.data[0].__id') + +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"labels\":[\"PAPER\"],\"where\":{\"AUTHOR\":{\"\$relation\":{\"type\":\"AUTHORED_BY\",\"direction\":\"out\"},\"__id\":\"$AUTHOR_ID\"}},\"orderBy\":{\"year\":\"desc\"}}" +``` + + + + +--- + +## Step 4: Semantic search over abstracts + +Enable semantic search to retrieve papers by conceptual relevance rather than keyword matching. + + + + +```typescript +// Create index on abstract (run once) +await db.ai.indexes.create({ + label: 'PAPER', + propertyName: 'abstract' +}) + +// Poll until ready (simplified) +let ready = false +while (!ready) { + const stats = await db.ai.indexes.stats('PAPER') + ready = stats.data.indexedRecords === stats.data.totalRecords + if (!ready) await new Promise(r => setTimeout(r, 2000)) +} + +// Semantic search — hybrid: conceptual query + structured year filter +const results = await db.ai.search({ + query: 'combining structured graphs with neural retrieval', + propertyName: 'abstract', + labels: ['PAPER'], + where: { year: { $gte: 2023 } }, + limit: 5 +}) + +for (const paper of results.data) { + console.log(`[${paper.year}] ${paper.title} — score: ${paper.__score.toFixed(3)}`) +} +``` + + + + +```python +import time + +db.ai.indexes.create({"label": "PAPER", "propertyName": "abstract"}) + +while True: + stats = db.ai.indexes.stats("PAPER") + if stats.data.get("indexedRecords") == stats.data.get("totalRecords"): + break + time.sleep(2) + +results = db.ai.search({ + "query": "combining structured graphs with neural retrieval", + "propertyName": "abstract", + "labels": ["PAPER"], + "where": {"year": {"$gte": 2023}}, + "limit": 5 +}) + +for paper in results.data: + print(f"[{paper.data.get('year')}] {paper.data.get('title')} — score: {paper.__score:.3f}") +``` + + + + +```bash +# Create index +curl -s -X POST "$BASE/ai/indexes" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"PAPER","propertyName":"abstract"}' + +# Search +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"query":"combining structured graphs with neural retrieval","propertyName":"abstract","labels":["PAPER"],"where":{"year":{"$gte":2023}},"limit":5}' +``` + + + + +--- + +## Production caveat + +Citation graphs become highly connected over time. Deep traversal queries (papers that cite papers that cite papers) fan out exponentially. All queries in these examples traverse at most two hops. Design your search queries to bound depth by using intermediate label filters rather than chaining open-ended relationship traversals. + +--- + +## Next steps + +- [Hybrid Retrieval](./hybrid-retrieval.mdx) — filter + semantic rank in one call +- [Modeling Hierarchies, Networks, and Feedback Loops](./modeling-hierarchies.mdx) — structural patterns for citation networks +- [Data Lineage](./data-lineage.mdx) — trace derived knowledge back to source papers diff --git a/docs/docs/tutorials/reusable-search-query.mdx b/docs/docs/tutorials/reusable-search-query.mdx index bc3de694..76734a12 100644 --- a/docs/docs/tutorials/reusable-search-query.mdx +++ b/docs/docs/tutorials/reusable-search-query.mdx @@ -1,91 +1,233 @@ --- -sidebar_position: 5 +sidebar_position: 4 title: Reusable SearchQuery -description: Harness the power of RushDB's fractal API architecture through its reusable SearchQuery pattern +description: Learn the canonical SearchQuery shape reused across records, properties, labels, relationships, and values +tags: [Search, Patterns, Querying] --- -import Tabs from '@theme/Tabs'; +import Tabs from '@site/src/components/LanguageTabs'; import TabItem from '@theme/TabItem'; # Reusable SearchQuery -One of the most powerful concepts behind RushDB is its "fractal" API architecture with a self-aware design that's exposed through a consistent, easy-to-understand interface. This design philosophy allows you to use the same query structure across different aspects of your graph database, creating a highly flexible and intuitive developer experience. +RushDB uses one core query object shape across multiple APIs. -## The Power of Consistency +You can reuse the same query logic across five perspectives on data: -At the heart of RushDB's API design is the SearchQuery pattern - a standardized way to query your data that remains consistent regardless of what entity you're working with: records, relationships, labels, or properties. This consistent approach brings several powerful benefits: +- Records +- Properties +- Labels +- Relationships +- Values -- **Reduced learning curve**: Learn the query pattern once, apply it everywhere -- **Predictable API usage**: No need to learn different filtering paradigms for different entity types -- **Code reusability**: Reuse query logic across different parts of your application -- **Self-discoverability**: The graph intrinsically knows its structure and exposes it consistently +Once you learn SearchQuery once, you can apply it almost everywhere. -## SearchQuery Structure +## Canonical SearchQuery Shape -The SearchQuery object provides a standardized way to filter, sort, and paginate results: +```typescript +type SearchQuery = { + labels?: string[] + where?: Record + aggregate?: Record + groupBy?: string[] + orderBy?: 'asc' | 'desc' | Record + limit?: number + skip?: number +} +``` + +Supported by: + +- `records.find` / `/api/v1/records/search` +- `records.delete` / `/api/v1/records/delete` +- `relationships.find` / `/api/v1/relationships/search` +- `labels.find` / `/api/v1/labels/search` +- `properties.find` / `/api/v1/properties/search` + +And paired with values exploration via `/api/v1/properties/:id/values`. + +## Why It Matters + +- Reduced learning curve +- Reusable filtering logic across endpoints +- Predictable behavior for filtering, aggregation, and pagination +- Easier AI-assisted query generation because the shape stays stable + +## One Intent, Five Perspectives + +SearchQuery's ultimate feature is intent reuse. + +You can express one business question and view it from different perspectives: + +- Records: return matching entities +- Properties: discover which fields participate in that slice +- Labels: discover which entity types match +- Relationships: inspect graph links within the same slice +- Values: enumerate canonical values for a chosen property in that slice + +This means your app can move from listing to discovery to analytics without changing mental model. + +## Where Clause Essentials + +### Primitive and operator filters ```typescript -interface SearchQuery { - // Filter by record labels - labels?: string[]; +where: { + status: { $in: ['active', 'pending'] }, + amount: { $gte: 1000, $lt: 5000 }, + name: { $contains: 'acme' }, + isArchived: { $ne: true } +} +``` - // Filter by property values and relationships - where?: WhereClause; +### Logical composition - // Maximum number of records to return (default: 100) - limit?: number; +```typescript +where: { + $or: [ + { status: 'active' }, + { + $and: [ + { status: 'pending' }, + { priority: { $gte: 8 } } + ] + } + ] +} +``` - // Number of records to skip (for pagination) - skip?: number; +### Relationship traversal - // Sorting configuration - orderBy?: OrderByClause; +In SearchQuery, relationship traversal uses label keys directly. - // Data aggregation and transformation - aggregate?: AggregateClause; +```typescript +where: { + DEPARTMENT: { + $alias: '$department', + name: 'Engineering', + PROJECT: { + $alias: '$project', + EMPLOYEE: { + $alias: '$employee', + role: { $in: ['Developer', 'Lead'] } + } + } + } } ``` -## Fractal API in Action +Important: + +- Use the label key itself (`DEPARTMENT`, `PROJECT`, `EMPLOYEE`) to traverse +- Use `$alias` to reference traversed nodes in `aggregate` and `groupBy` +- Do not use unsupported traversal fields such as `$label`, `$as`, `$through`, `$of` -The power of RushDB's fractal API design becomes apparent when you see the same query structure used across different endpoints: +### Date range rule -### 1. Searching Records +For range operators on datetimes, use component objects. + +```typescript +where: { + createdAt: { + $gte: { $year: 2025, $month: 1, $day: 1 }, + $lt: { $year: 2026, $month: 1, $day: 1 } + } +} +``` + +## Aggregate and GroupBy + +SearchQuery supports both per-record aggregation and grouped analytics. + +### Per-record flat aggregation + +```typescript +const rows = await db.records.find({ + labels: ['PROJECT'], + where: { + EMPLOYEE: { $alias: '$employee' } + }, + aggregate: { + projectName: '$record.name', + headcount: { fn: 'count', alias: '$employee', unique: true }, + totalSalary: { fn: 'sum', field: 'salary', alias: '$employee' } + }, + limit: 100 +}) +``` + +### Dimensional groupBy + +```typescript +const byStatus = await db.records.find({ + labels: ['ORDER'], + aggregate: { + count: { fn: 'count', alias: '$record' }, + revenue: { fn: 'sum', field: 'total', alias: '$record' } + }, + groupBy: ['$record.status'], + orderBy: { revenue: 'desc' } +}) +``` + +### Self-group (single-row KPI) + +```typescript +const totals = await db.records.find({ + labels: ['ORDER'], + aggregate: { + totalRevenue: { fn: 'sum', field: 'total', alias: '$record' } + }, + groupBy: ['totalRevenue'], + orderBy: { totalRevenue: 'asc' } +}) +``` + +## Critical Limit Rules + +- Do not use `limit` in self-group KPI queries +- Do not use `limit` in dimensional groupBy unless intentionally asking for top N groups +- `limit` is valid for listing/browsing and per-record flat aggregation +- For self-group KPI queries, include `orderBy` on the aggregated key to force full-scan aggregation behavior + +## Reusable Patterns Across APIs + +### 1. Search records ```typescript -// Find all active PRODUCT records with price between $10-$50 const products = await db.records.find({ labels: ['PRODUCT'], where: { active: true, - price: { $gte: 10, $lte: 50 } + price: { $gte: 10, $lte: 50 }, + tags: { $in: ['featured'] } }, orderBy: { price: 'asc' }, - limit: 20 -}); + limit: 20, + skip: 0 +}) ``` ```python -# Find all active PRODUCT records with price between $10-$50 result = db.records.find({ "labels": ["PRODUCT"], "where": { "active": True, - "price": {"$gte": 10, "$lte": 50} + "price": {"$gte": 10, "$lte": 50}, + "tags": {"$in": ["featured"]} }, "orderBy": {"price": "asc"}, - "limit": 20 + "limit": 20, + "skip": 0 }) - products = result.data ``` - + ```bash -# Find all active PRODUCT records with price between $10-$50 curl -X POST "https://api.rushdb.com/api/v1/records/search" \ -H "Authorization: Bearer $RUSHDB_API_KEY" \ -H "Content-Type: application/json" \ @@ -96,33 +238,33 @@ curl -X POST "https://api.rushdb.com/api/v1/records/search" \ "price": { "$gte": 10, "$lte": 50 - } + }, + "tags": { "$in": ["featured"] } }, "orderBy": {"price": "asc"}, - "limit": 20 + "limit": 20, + "skip": 0 }' ``` -### 2. Deleting Records with the Same Query Structure +### 2. Delete records with the same where logic ```typescript -// Delete discontinued products with zero inventory await db.records.delete({ labels: ['PRODUCT'], where: { discontinued: true, inventory: 0 } -}); +}) ``` ```python -# Delete discontinued products with zero inventory db.records.delete({ "labels": ["PRODUCT"], "where": { @@ -132,9 +274,8 @@ db.records.delete({ }) ``` - + ```bash -# Delete discontinued products with zero inventory curl -X PUT "https://api.rushdb.com/api/v1/records/delete" \ -H "Authorization: Bearer $RUSHDB_API_KEY" \ -H "Content-Type: application/json" \ @@ -149,38 +290,32 @@ curl -X PUT "https://api.rushdb.com/api/v1/records/delete" \ -### 3. Searching Relationships +### 3. Search relationships ```typescript -// Find all CREATED relationships by users in the admin group const createdRelationships = await db.relationships.find({ where: { - groups: { $in: ["admin"] }, - $relation: { - type: "CREATED" - } + type: 'CREATED', + weight: { $gte: 0.5 } }, limit: 50 -}); +}) ``` ```python -# Find all CREATED relationships by users in the admin group created_relationships = db.relationships.find({ "where": { - "groups": { "$in": ["admin"] }, - "$relation": { - "type": "CREATED" - } + "type": "CREATED", + "weight": {"$gte": 0.5} }, "limit": 50 }) ``` - + ```bash # Find all CREATED relationships by users in the admin group curl -X POST "https://api.rushdb.com/api/v1/relationships/search" \ @@ -188,10 +323,8 @@ curl -X POST "https://api.rushdb.com/api/v1/relationships/search" \ -H "Content-Type: application/json" \ -d '{ "where": { - "groups": { "$in": ["admin"] }, - "$relation": { - "type": "CREATED" - } + "type": "CREATED", + "weight": { "$gte": 0.5 } }, "limit": 50 }' @@ -199,22 +332,20 @@ curl -X POST "https://api.rushdb.com/api/v1/relationships/search" \ -### 4. Discovering Labels +### 4. Find labels by data constraints ```typescript -// Find all labels used on records in North America region const labels = await db.labels.find({ where: { - region: { $in: ["US", "CA", "MX"] } + region: { $in: ['US', 'CA', 'MX'] } } -}); +}) ``` ```python -# Find all labels used on records in North America region labels = db.labels.find({ "where": { "region": {"$in": ["US", "CA", "MX"]} @@ -222,7 +353,7 @@ labels = db.labels.find({ }) ``` - + ```bash # Find all labels used on records in North America region curl -X POST "https://api.rushdb.com/api/v1/labels/search" \ @@ -237,203 +368,75 @@ curl -X POST "https://api.rushdb.com/api/v1/labels/search" \ -### 5. Exploring Properties +### 5. Discover properties for a label ```typescript -// Get all string properties used on PRODUCT records const productProps = await db.properties.find({ - labels: ["PRODUCT"], + labels: ['PRODUCT'], where: { - // ... + type: 'number' } -}); +}) ``` ```python -# Get all string properties used on PRODUCT records product_props = db.properties.find({ "labels": ["PRODUCT"], "where": { - // ... + "type": "number" } }) ``` - + ```bash -# Get all string properties used on PRODUCT records curl -X POST "https://api.rushdb.com/api/v1/properties/search" \ -H "Authorization: Bearer $RUSHDB_API_KEY" \ -H "Content-Type: application/json" \ -d '{ "labels": ["PRODUCT"], "where": { - // ... + "type": "number" } }' ``` -## APIs Supporting SearchQuery - -RushDB provides consistent SearchQuery capabilities across multiple API endpoints: - -| API Endpoint | Description | Documentation | -|--------------------------------------|--------------------------------------|---------------------------------------------------------| -| `/api/v1/records/search` | Search for records | [Records API](../rest-api/records/get-records.md) | -| `/api/v1/records/delete` | Delete records using search criteria | [Delete Records](../rest-api/records/delete-records.md) | -| `/api/v1/relationships/search` | Search for relationships | [Relationships API](../rest-api/relationships.md) | -| `/api/v1/labels/search` | Search for labels | [Labels API](../rest-api/labels.md) | -| `/api/v1/properties/search` | Search for properties | [Properties API](../rest-api/properties.md) | -| `/api/v1/properties/:id/values` | Get property values with filtering | [Properties API](../rest-api/properties.md) | - -## Powerful Use Cases - -### Dynamic Filtering in Catalog Applications - -With RushDB's fractal API design, building dynamic filtering interfaces for catalog or marketplace applications becomes dramatically simplified. The more you filter records (and simultaneously filter properties), the more precise your results become. - -```mermaid -flowchart TD - A[Initial Query: No Filters] --> B[Get All Records] - A --> C[Get All Properties] - B --> D[Start Applying Filters] - C --> D - D --> E[Refined Records] - D --> F[Available Properties Update] - E --> G[Further Filter Refinement] - F --> G - G --> H[Final Results and Options] -``` - -To implement this pattern: - -1. Fetch all records and all available properties (no filters applied) -2. As users select filters, apply the same SearchQuery to both the records and properties endpoints -3. The filtered properties API will return only properties that exist in the remaining record set -4. Update your UI to display only filter options that are still relevant - - - -```typescript -// User selects a category filter -const filterQuery = { - where: { category: "electronics" } -}; - -// Get filtered products -const products = await db.records.find({ - labels: ["PRODUCT"], - ...filterQuery -}); - -// Get available properties for the remaining product set -const availableProperties = await db.properties.find(filterQuery); - -// Generate dynamic filters based on available properties -const dynamicFilters = generateFiltersFromProperties(availableProperties); -``` - - -```python -# User selects a category filter -filter_query = { - "where": {"category": "electronics"} -} - -# Get filtered products -result = db.records.find({ - "labels": ["PRODUCT"], - **filter_query -}) - -products = result.data - -# Get available properties for the remaining product set -available_properties = db.properties.find(filter_query) - -# Generate dynamic filters based on available properties -dynamic_filters = generate_filters_from_properties(available_properties) -``` - - - -### AI-Powered Data Analytics Without ETL - -RushDB's fractal API design makes it exceptionally well-suited for AI workflows and RAG (Retrieval Augmented Generation) systems. By importing raw JSON data and allowing RushDB to automatically recognize and index structures, you can eliminate traditional ETL processes. - -```mermaid -graph TD - A[Raw JSON Dataset] --> B[RushDB Import] - B --> C[Automatic Topology Detection] - C --> D[Labels Generated] - C --> E[Properties Mapped] - C --> F[Relationships Discovered] - D --> G[AI Agent Explores Dataset] - E --> G - F --> G - G --> H[Dynamic Query Generation] - H --> I[Insight Extraction] - I --> J[Visualization/Report] -``` - -This allows AI agents to: +### 6. Explore values for a selected property -1. Explore available data structure without predefined schemas -2. Dynamically generate queries based on discovered patterns -3. Refetch and recalculate results on-the-fly as new insights emerge -4. Perform complex aggregations without manual data preparation +Use values as the final perspective when you need canonical filter options for UI facets, prompts, and dynamic query builders. -For example, an AI agent could: +Typical flow: - - -```typescript -// Discovery phase: Explore available labels -const availableLabels = await db.labels.find({}); -console.log("Discovered entity types:", Object.keys(availableLabels)); +1. Use `properties.find` to locate a property and get its id +2. Call `/api/v1/properties/:id/values` for value discovery in the same data context +3. Feed returned canonical values back into your next SearchQuery (`$in`, `$nin`, exact match) -// Explore properties of a specific label -const personProperties = await db.properties.find({ - labels: ["PERSON"], - where: { - // ... - } -}); +## Common Pitfalls to Avoid -// Generate insights based on discovered structure -const insightQuery = generateQueryFromDiscoveredStructure(personProperties); -const results = await db.records.find(insightQuery); -``` - - -```python -# Discovery phase: Explore available labels -available_labels = db.labels.find({}) -print("Discovered entity types:", list(available_labels.keys())) +- Using `limit` on KPI/self-group aggregate queries +- Forgetting `alias` on function-based aggregate entries +- Using traversal keys like `$label` instead of real label keys (`EMPLOYEE`, `PROJECT`) +- Treating `groupBy` self-group keys as property paths instead of aggregate key names +- Using plain date strings with `$gt`/`$gte`/`$lt`/`$lte` instead of datetime component objects -# Explore properties of a specific label -person_properties = db.properties.find({ - "labels": ["PERSON"], - "where": { - // ... - } -}) +## API Reference Links -# Generate insights based on discovered structure -insight_query = generate_query_from_discovered_structure(person_properties) -result = db.records.find(insight_query) -results = result.data -``` - - +| Endpoint | Docs | +|---|---| +| `/api/v1/records/search` | [Records API](../rest-api/records/get-records.md) | +| `/api/v1/records/delete` | [Delete Records](../rest-api/records/delete-records.md) | +| `/api/v1/relationships/search` | [Relationships API](../rest-api/relationships.md) | +| `/api/v1/labels/search` | [Labels API](../rest-api/labels.md) | +| `/api/v1/properties/search` | [Properties API](../rest-api/properties.md) | +| `/api/v1/properties/:id/values` | [Properties API](../rest-api/properties.md) | -## Conclusion +## Next Step -RushDB's fractal API design with the reusable SearchQuery pattern represents a significant advancement in database interaction. By maintaining a consistent query structure across different entities and operations, RushDB enables developers to build more intuitive, flexible, and powerful applications with less code and cognitive overhead. +If you want advanced examples (nested collect, multi-hop traversal, time buckets, KPI patterns), continue with the deep-dive tutorial: -This design philosophy reflects a deep understanding of how developers work with data, ensuring that once you learn the SearchQuery pattern, you can apply that knowledge universally throughout your application's interaction with RushDB. +[SearchQuery Deep Dive: Advanced Patterns](./searchquery-advanced-patterns.mdx) diff --git a/docs/docs/tutorials/search-ux-patterns.mdx b/docs/docs/tutorials/search-ux-patterns.mdx new file mode 100644 index 00000000..f7f360ba --- /dev/null +++ b/docs/docs/tutorials/search-ux-patterns.mdx @@ -0,0 +1,440 @@ +--- +sidebar_position: 30 +title: "Search UX Patterns" +description: Combine structured filters, semantic ranking, and contextual fields to build explainable, user-facing search experiences on top of RushDB. +tags: [Search, AI, SearchQuery, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Search UX Patterns + +Most user-facing search combines two things: + +- **Structured filtering** — only show results matching the user's explicit constraints (role, date range, status) +- **Relevance ranking** — sort by how closely each result matches the user's intent in natural language + +RushDB provides both surfaces: `records.find()` for structured queries and `ai.search()` for semantic ranking. The patterns below show how to combine them, surface enough context for explainable results, and paginate reliably. + +--- + +## Pattern 1: Instant structured search + +For search over categorical or exact fields, `records.find()` with a `where` clause is immediate. + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +interface ArticleSearchParams { + category?: string + authorId?: string + publishedAfter?: string + limit?: number + skip?: number +} + +async function searchArticles(params: ArticleSearchParams) { + const where: Record = { status: 'published' } + + if (params.category) where.category = params.category + if (params.authorId) where.AUTHOR = { $relation: { type: 'AUTHORED_BY', direction: 'out' }, authorId: params.authorId } + if (params.publishedAfter) where.publishedAt = { $gte: params.publishedAfter } + + const result = await db.records.find({ + labels: ['ARTICLE'], + where, + orderBy: { publishedAt: 'desc' }, + limit: params.limit ?? 20, + skip: params.skip ?? 0 + }) + + return { + items: result.data.map(a => ({ + id: a.__id, + title: a.title, + category: a.category, + publishedAt: a.publishedAt + })), + total: result.total + } +} +``` + + + + +```python +from rushdb import RushDB +import os + +db = RushDB(os.environ["RUSHDB_API_KEY"], base_url="https://api.rushdb.com/api/v1") + +def search_articles(params: dict) -> dict: + where = {"status": "published"} + + if params.get("category"): + where["category"] = params["category"] + if params.get("authorId"): + where["AUTHOR"] = { + "$relation": {"type": "AUTHORED_BY", "direction": "out"}, + "authorId": params["authorId"] + } + if params.get("publishedAfter"): + where["publishedAt"] = {"$gte": params["publishedAfter"]} + + result = db.records.find({ + "labels": ["ARTICLE"], + "where": where, + "orderBy": {"publishedAt": "desc"}, + "limit": params.get("limit", 20), + "skip": params.get("skip", 0) + }) + + return { + "items": [{"id": a.id, "title": a.data.get("title"), "category": a.data.get("category")} for a in result.data], + "total": result.total + } +``` + + + + +```bash +curl -s -X POST "https://api.rushdb.com/api/v1/records/search" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "labels": ["ARTICLE"], + "where": {"status": "published", "category": "engineering"}, + "orderBy": {"publishedAt": "desc"}, + "limit": 20, + "skip": 0 + }' +``` + + + + +--- + +## Pattern 2: Semantic search with structured post-filter + +Use `ai.search()` when the user types a free-text query. The `where` clause filters before semantic ranking, so only records matching all structural constraints are scored. + + + + +```typescript +interface SemanticSearchParams { + query: string + category?: string + publishedAfter?: string + limit?: number +} + +async function semanticArticleSearch(params: SemanticSearchParams) { + const searchParams: Parameters[0] = { + query: params.query, + propertyName: 'content', + labels: ['ARTICLE'], + limit: params.limit ?? 10 + } + + if (params.category || params.publishedAfter) { + const where: Record = { status: 'published' } + if (params.category) where.category = params.category + if (params.publishedAfter) where.publishedAt = { $gte: params.publishedAfter } + searchParams.where = where + } + + const result = await db.ai.search(searchParams) + + return result.data.map(a => ({ + id: a.__id, + title: a.title, + score: a.__score, // 0–1 cosine similarity + category: a.category + })) +} +``` + + + + +```python +def semantic_article_search(params: dict) -> list[dict]: + search_params = { + "query": params["query"], + "propertyName": "content", + "labels": ["ARTICLE"], + "limit": params.get("limit", 10) + } + + where = {"status": "published"} + if params.get("category"): + where["category"] = params["category"] + if params.get("publishedAfter"): + where["publishedAt"] = {"$gte": params["publishedAfter"]} + if len(where) > 1: + search_params["where"] = where + + result = db.ai.search(search_params) + + return [ + {"id": a.id, "title": a.data.get("title"), "score": a.data.get("__score"), "category": a.data.get("category")} + for a in result.data + ] +``` + + + + +```bash +curl -s -X POST "https://api.rushdb.com/api/v1/ai/search" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -d '{ + "query": "how to reduce latency in distributed systems", + "propertyName": "content", + "labels": ["ARTICLE"], + "where": {"status": "published", "category": "engineering"}, + "limit": 10 + }' +``` + + + + +--- + +## Pattern 3: Contextual result enrichment + +Raw records often lack display context. Enrich results with related data after retrieval — author name, category breadcrumb, or related tags. + + + + +```typescript +async function enrichedSearch(userQuery: string) { + // 1. Semantic ranking + const results = await db.ai.search({ + query: userQuery, + propertyName: 'content', + labels: ['ARTICLE'], + limit: 10 + }) + + // 2. Enrich in parallel — fetch author names via relationship traversal + const enriched = await Promise.all( + results.data.map(async (article) => { + const authorResult = await db.records.find({ + labels: ['AUTHOR'], + where: { + ARTICLE: { + $relation: { type: 'AUTHORED_BY', direction: 'in' }, + __id: article.__id + } + } + }) + + return { + id: article.__id, + title: article.title, + score: article.__score, + author: authorResult.data[0]?.name ?? null, + category: article.category + } + }) + ) + + return enriched +} +``` + + + + +```python +from concurrent.futures import ThreadPoolExecutor + +def enriched_search(user_query: str) -> list[dict]: + results = db.ai.search({ + "query": user_query, + "propertyName": "content", + "labels": ["ARTICLE"], + "limit": 10 + }) + + def enrich(article): + author_result = db.records.find({ + "labels": ["AUTHOR"], + "where": { + "ARTICLE": { + "$relation": {"type": "AUTHORED_BY", "direction": "in"}, + "__id": article.id + } + } + }) + return { + "id": article.id, + "title": article.data.get("title"), + "score": article.data.get("__score"), + "author": author_result.data[0].data.get("name") if author_result.data else None + } + + with ThreadPoolExecutor(max_workers=5) as pool: + return list(pool.map(enrich, results.data)) +``` + + + + +:::warning Enrichment at result time vs query time +Per-result enrichment (a sub-query per item) multiplies network calls. For 10 results, that is 10 additional queries. Prefer storing denormalized display fields on the primary record where possible, and reserve traversal enrichment for fields that must be kept in sync with the related record. +::: + +--- + +## Pattern 4: Pagination with stable ordering + +Pagination is only stable when results are ordered by an immutable field. Avoid paginating over semantic search — scores change with model updates. Paginate structured queries instead. + + + + +```typescript +interface Page { + items: T[] + total: number + hasNext: boolean +} + +async function paginatedArticles(category: string, page: number, pageSize = 20): Promise> { + const result = await db.records.find({ + labels: ['ARTICLE'], + where: { status: 'published', category }, + orderBy: { publishedAt: 'desc' }, + limit: pageSize, + skip: page * pageSize + }) + + return { + items: result.data.map(a => ({ id: a.__id, title: a.title, publishedAt: a.publishedAt })), + total: result.total, + hasNext: (page + 1) * pageSize < result.total + } +} +``` + + + + +```python +def paginated_articles(category: str, page: int, page_size: int = 20) -> dict: + result = db.records.find({ + "labels": ["ARTICLE"], + "where": {"status": "published", "category": category}, + "orderBy": {"publishedAt": "desc"}, + "limit": page_size, + "skip": page * page_size + }) + + return { + "items": [{"id": a.id, "title": a.data.get("title")} for a in result.data], + "total": result.total, + "hasNext": (page + 1) * page_size < result.total + } +``` + + + + +--- + +## Pattern 5: Zero-results fallback with semantic widening + +If a structured query returns nothing, fall back to semantic search without the structural constraints — then surface a "Did you mean?" style hint. + + + + +```typescript +async function searchWithFallback(query: string, category: string) { + // Try structured first + const structured = await db.records.find({ + labels: ['ARTICLE'], + where: { status: 'published', category, title: { $contains: query } }, + limit: 10 + }) + + if (structured.total > 0) { + return { results: structured.data, mode: 'exact' as const } + } + + // Widen to semantic across all categories + const semantic = await db.ai.search({ + query, + propertyName: 'content', + labels: ['ARTICLE'], + where: { status: 'published' }, + limit: 5 + }) + + return { results: semantic.data, mode: 'semantic' as const } +} +``` + + + + +```python +def search_with_fallback(query: str, category: str) -> dict: + structured = db.records.find({ + "labels": ["ARTICLE"], + "where": {"status": "published", "category": category, "title": {"$contains": query}}, + "limit": 10 + }) + + if structured.total > 0: + return {"results": structured.data, "mode": "exact"} + + semantic = db.ai.search({ + "query": query, + "propertyName": "content", + "labels": ["ARTICLE"], + "where": {"status": "published"}, + "limit": 5 + }) + + return {"results": semantic.data, "mode": "semantic"} +``` + + + + +--- + +## Choosing between structured and semantic search + +| Situation | Use | +|---|---| +| User selects filters from UI facets | `records.find()` with `where` | +| User types a free-text query box | `ai.search()` | +| User types AND applies filters | `ai.search()` with `where` | +| Result count matters (pagination) | `records.find()` — `ai.search()` does not return `total` | +| Query is empty / filters only | `records.find()` — skip semantic scoring | +| Exact match required | `records.find()` | + +--- + +## Next steps + +- [Hybrid Retrieval](./hybrid-retrieval.mdx) — two-phase structured-filter + semantic-rank for AI pipelines +- [Explainable Results](./explainable-results.mdx) — surface evidence for every search result +- [Query Optimization](./query-optimization.mdx) — shape queries for throughput and cost efficiency diff --git a/docs/docs/tutorials/searchquery-advanced-patterns.mdx b/docs/docs/tutorials/searchquery-advanced-patterns.mdx new file mode 100644 index 00000000..34c2a5ba --- /dev/null +++ b/docs/docs/tutorials/searchquery-advanced-patterns.mdx @@ -0,0 +1,321 @@ +--- +sidebar_position: 5 +title: "SearchQuery Deep Dive: Advanced Patterns" +description: Build confidence with advanced SearchQuery patterns through realistic RushDB examples +tags: [Search, Deep Dive, Aggregation, Relationships] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# SearchQuery Deep Dive: Advanced Patterns + +This tutorial is a practical companion to [Reusable SearchQuery](./reusable-search-query.mdx). + +You will compose production-style queries that RushDB teams and power users rely on every day: + +- label-based traversal with aliases +- per-record and grouped aggregations +- nested collect structures +- relationship matching and direction filters +- time-series bucketing + +## Real-World Scenario + +Assume a graph with these labels and relationships: + +- `COMPANY` +- `DEPARTMENT` +- `PROJECT` +- `EMPLOYEE` +- `TASK` + +Typical topology: + +- `COMPANY` -> `DEPARTMENT` +- `DEPARTMENT` -> `PROJECT` +- `PROJECT` -> `EMPLOYEE` +- `PROJECT` -> `TASK` + +All examples below use canonical SearchQuery behavior: + +- traversal key is the related label itself (for example `PROJECT`) +- every function-based aggregate includes `alias` +- self-group KPI queries omit `limit` + +## 1) Reusable Where Blocks + +Start by building filter fragments you can reuse across endpoints. + + + +```typescript +const activeEngineeringProjects = { + labels: ['PROJECT'], + where: { + status: { $in: ['active', 'planned'] }, + budget: { $gte: 100000 }, + DEPARTMENT: { + name: 'Engineering' + } + }, + orderBy: { budget: 'desc' }, + limit: 25 +} + +const projects = await db.records.find(activeEngineeringProjects) +``` + + +```python +active_engineering_projects = { + "labels": ["PROJECT"], + "where": { + "status": {"$in": ["active", "planned"]}, + "budget": {"$gte": 100000}, + "DEPARTMENT": { + "name": "Engineering" + } + }, + "orderBy": {"budget": "desc"}, + "limit": 25 +} + +projects = db.records.find(active_engineering_projects) +``` + + + +## 2) Relationship Traversal with Aliases + +Use aliases to aggregate related entities without flattening your schema. + +```typescript +const projectComp = await db.records.find({ + labels: ['PROJECT'], + where: { + DEPARTMENT: { + $alias: '$department', + COMPANY: { + $alias: '$company', + region: { $in: ['US', 'CA'] } + } + }, + EMPLOYEE: { + $alias: '$employee', + employmentType: { $in: ['full_time', 'contract'] } + } + }, + aggregate: { + projectId: '$record.$id', + projectName: '$record.name', + departmentName: '$department.name', + companyName: '$company.name', + headcount: { fn: 'count', alias: '$employee', unique: true }, + avgSalary: { fn: 'avg', field: 'salary', alias: '$employee', precision: 0 } + }, + limit: 100 +}) +``` + +## 3) Dimensional GroupBy + +Use dimensional grouping for distribution questions such as "how many projects per status?". + +```typescript +const statusBreakdown = await db.records.find({ + labels: ['PROJECT'], + where: { + DEPARTMENT: { name: 'Engineering' } + }, + aggregate: { + count: { fn: 'count', alias: '$record' }, + avgBudget: { fn: 'avg', field: 'budget', alias: '$record', precision: 2 } + }, + groupBy: ['$record.status'], + orderBy: { count: 'desc' } +}) +``` + +Notes: + +- dimensional keys must be property refs like `$record.status` +- omit `limit` to return full distribution + +## 4) Self-Group KPIs (Single-Row Totals) + +Use self-group mode for whole-dataset KPIs. + +```typescript +const kpi = await db.records.find({ + labels: ['TASK'], + where: { + completed: true, + completedAt: { + $gte: { $year: 2025, $month: 1, $day: 1 }, + $lt: { $year: 2026, $month: 1, $day: 1 } + } + }, + aggregate: { + completedCount: { fn: 'count', alias: '$record' }, + totalHours: { fn: 'sum', field: 'hours', alias: '$record' }, + avgHours: { fn: 'avg', field: 'hours', alias: '$record', precision: 2 } + }, + groupBy: ['completedCount', 'totalHours', 'avgHours'], + orderBy: { totalHours: 'asc' } +}) +``` + +Why this shape: + +- self-group uses aggregate key names in `groupBy` +- `orderBy` on an aggregated key ensures correct full-scan aggregation behavior +- no `limit` + +## 5) Nested Collect for Hierarchies + +Use nested `collect` to return structured trees for UI rendering. + +```typescript +const hierarchy = await db.records.find({ + labels: ['COMPANY'], + where: { + foundedAt: { $lt: { $year: 2015 } }, + DEPARTMENT: { + $alias: '$department', + PROJECT: { + $alias: '$project', + EMPLOYEE: { + $alias: '$employee', + isActive: true + } + } + } + }, + aggregate: { + companyName: '$record.name', + departments: { + fn: 'collect', + alias: '$department', + aggregate: { + name: '$department.name', + projects: { + fn: 'collect', + alias: '$project', + orderBy: { budget: 'desc' }, + aggregate: { + name: '$project.name', + topContributors: { + fn: 'collect', + alias: '$employee', + orderBy: { salary: 'desc' }, + limit: 3 + } + } + } + } + } + }, + limit: 20 +}) +``` + +Nested rule: + +- inside a collect aggregate block, nested aggregations should also use `fn: 'collect'` + +## 6) Relationship Matching and Direction + +Use `$relation` when you need to constrain traversal edge type and direction. + + + +```typescript +const authoredPosts = await db.records.find({ + labels: ['USER'], + where: { + POST: { + $alias: '$post', + $relation: { type: 'AUTHORED', direction: 'out' }, + title: { $contains: 'searchquery' } + } + }, + aggregate: { + userName: '$record.name', + authoredPosts: { fn: 'count', alias: '$post', unique: true } + }, + limit: 50 +}) +``` + + +```python +authored_posts = db.records.find({ + "labels": ["USER"], + "where": { + "POST": { + "$alias": "$post", + "$relation": {"type": "AUTHORED", "direction": "out"}, + "title": {"$contains": "searchquery"} + } + }, + "aggregate": { + "userName": "$record.name", + "authoredPosts": {"fn": "count", "alias": "$post", "unique": True} + }, + "limit": 50 +}) +``` + + + +## 7) Time Bucketing for Trends + +Use `timeBucket` to build trend charts directly from query output. + +```typescript +const monthlyCompletions = await db.records.find({ + labels: ['TASK'], + where: { + completed: true + }, + aggregate: { + month: { fn: 'timeBucket', field: 'completedAt', granularity: 'month', alias: '$record' }, + count: { fn: 'count', alias: '$record' } + }, + groupBy: ['month'], + orderBy: { month: 'asc' } +}) +``` + +## 8) Adoption Workflow for Teams + +A high-signal workflow when introducing SearchQuery to a team: + +1. Start with `records.find` listing queries. +2. Move the same `where` block to `records.delete` for controlled cleanups. +3. Add aliases and per-record aggregates. +4. Promote to dimensional `groupBy` for dashboards. +5. Convert to self-group KPI queries for single-row metrics. +6. Introduce nested `collect` when API responses must map directly to UI trees. + +## 9) Validation Checklist + +Before shipping a query: + +- `groupBy` exists only when `aggregate` exists +- each `fn` aggregate has `alias` +- traversal uses label keys, not custom traversal operators +- date ranges use component objects when comparing +- KPI/self-group queries do not use `limit` +- self-group queries order by an aggregated key + +## Wrap-Up + +You now have practical patterns for composing SearchQuery in real applications. + +Recommended next practice: + +- clone one of your existing list queries +- add aliases and per-record aggregates +- then progressively transform it into dimensional and self-group analytics diff --git a/docs/docs/tutorials/self-hosted-project-setup.mdx b/docs/docs/tutorials/self-hosted-project-setup.mdx new file mode 100644 index 00000000..9334202e --- /dev/null +++ b/docs/docs/tutorials/self-hosted-project-setup.mdx @@ -0,0 +1,164 @@ +--- +sidebar_position: 37 +title: "Project Setup After Deployment" +description: Create projects, configure per-project embedding models, invite team members, and verify SDK connectivity on a self-hosted RushDB instance. +tags: [Deployment, Self-Hosted, Configuration] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Project Setup After Deployment + +Once RushDB is running (see [Self-Hosting RushDB](./deployment.mdx)), the next steps are: + +1. Create a project +2. Generate an API key +3. Optionally configure per-project embedding settings +4. Test SDK connectivity +5. Invite team members + +--- + +## Step 1: Sign in to the dashboard + +Open `http://your-host:3000` (or whatever host and port you configured). Sign in with the `RUSHDB_LOGIN` / `RUSHDB_PASSWORD` you set in your environment. + +--- + +## Step 2: Create a project + +1. Click **New Project** in the top-left panel. +2. Give the project a name (e.g. `production`, `staging`, `my-app`). +3. Choose the data store: + - **Managed (default)** — RushDB uses the Neo4j instance configured in server env vars + - **My own Neo4j / Aura** — Toggle on and enter your connection URI and credentials. See [Connecting an Aura Instance](./connect-aura-instance.mdx) for the full walkthrough. +4. Click **Create**. + +Each project is isolated: records, relationships, and embedding indexes in one project are never visible from another. + +--- + +## Step 3: Generate an API key + +1. Open the project and navigate to the **API Keys** tab. +2. Click **New API Key**, enter a label (e.g. `backend-service`), and save. +3. Copy the key — it is only shown once. + +Store the key in an environment variable or secret manager. Never commit it to source control. + +--- + +## Step 4: Test connectivity from each SDK surface + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!, { + url: 'http://your-host:3000/api/v1' +}) + +const result = await db.records.find({ labels: ['_PING_TEST'] }) +console.log('Connected. Total records:', result.total) +``` + + + + +```python +from rushdb import RushDB +import os + +db = RushDB( + os.environ['RUSHDB_API_KEY'], + base_url='http://your-host:3000/api/v1' +) + +result = db.records.find({'labels': ['_PING_TEST']}) +print('Connected. Total records:', result.total) +``` + + + + +```bash +curl -s http://your-host:3000/api/v1/records/search \ + -H "Authorization: Bearer $RUSHDB_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"labels":["_PING_TEST"]}' | jq . +``` + + + + +--- + +## Step 5: Switching between cloud and self-hosted without code changes + +Keep the API URL in an environment variable so you can toggle between RushDB Cloud and your self-hosted instance without changing application code. + + + + +```typescript +const db = new RushDB(process.env.RUSHDB_API_KEY!, { + url: process.env.RUSHDB_API_URL ?? 'https://api.rushdb.com/api/v1' +}) +// Cloud: RUSHDB_API_URL=https://api.rushdb.com/api/v1 +// Self-hosted: RUSHDB_API_URL=http://your-host:3000/api/v1 +``` + + + + +```python +import os +from rushdb import RushDB + +db = RushDB( + os.environ['RUSHDB_API_KEY'], + base_url=os.environ.get('RUSHDB_API_URL', 'https://api.rushdb.com/api/v1') +) +``` + + + + +--- + +## Step 6: Configure an embedding model per project (optional) + +The server-level `RUSHDB_EMBEDDING_MODEL` env var sets the default embedding model for all projects. If you want to override it per project: + +1. Open the project → **Settings** → **Embedding**. +2. Select or enter a model identifier (e.g. `text-embedding-3-large`). +3. Enter the dimensions that match the model. +4. Save. + +Records ingested into this project will use the per-project model for backfill. Other projects on the same server continue using their own settings. + +:::note +Per-project embedding configuration is only relevant for **managed** embedding indexes (where RushDB calls the embedding provider). For BYOV (external) indexes, you supply the vectors yourself — no server-side model is involved. +::: + +--- + +## Step 7: Invite team members (cloud) + +On RushDB Cloud: +1. Open **Workspace** → **Team**. +2. Click **Invite Member** and enter the email address. +3. Assign a role: **Admin** or **Member**. + +On self-hosted RushDB the team member feature is available depending on your plan configuration. Contact support if you need multi-user access on a self-hosted instance. + +--- + +## Next steps + +- [Self-Hosting RushDB](./deployment.mdx) — deploy with Docker Compose +- [Connecting an Aura Instance](./connect-aura-instance.mdx) — use your own Neo4j as the data store +- [BYOC vs Managed vs Self-Hosted](./byoc-vs-managed.mdx) — compare deployment topologies diff --git a/docs/docs/tutorials/semantic-search-multitenant.mdx b/docs/docs/tutorials/semantic-search-multitenant.mdx new file mode 100644 index 00000000..0101057a --- /dev/null +++ b/docs/docs/tutorials/semantic-search-multitenant.mdx @@ -0,0 +1,629 @@ +--- +sidebar_position: 14 +title: "Semantic Search for Multi-Tenant Products" +description: Build tenant-safe semantic retrieval using RushDB's project-scoped prefilter and exact cosine similarity ranking — without global index assumptions. +tags: [AI, Search, Multi-Tenant, Architecture] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Semantic Search for Multi-Tenant Products + +Vector search in multi-tenant products has one rule that trumps all others: a user in tenant A must never see results from tenant B, even if tenant B has a more semantically similar document. + +RushDB's semantic search enforces this at the storage layer. Every project is an isolated namespace. When you call `db.ai.search()`, the search is **always scoped to your project**. There is no global vector index, no shared ANN pool, no cross-project leakage. + +This tutorial shows you exactly how the scoping works, how to add structured filters on top of semantic ranking, and how to build a multi-tenant search endpoint that is correct by construction. + +--- + +## How project-scoped search works + +```mermaid +graph LR + subgraph Project A + A1[ARTICLE: ML Intro] --> V1[(vector)] + A2[ARTICLE: Graph DBs] --> V2[(vector)] + end + subgraph Project B + B1[ARTICLE: Climate] --> V3[(vector)] + end + Q[query: 'machine learning'] --> SA[Semantic Search: Project A] + SA -->|candidates from Project A only| RANK[Exact cosine ranking] + RANK --> RESULT[ML Intro ★0.94\nGraph DBs ★0.71] +``` + +The prefilter step narrows candidates to records in the current project (via a Cypher MATCH/WHERE clause) before any similarity computation runs. Exact cosine similarity is then applied to the prefiltered set. + +This means: +- Tenant isolation is guaranteed by the query engine, not by application-layer filtering +- Adding a `where` clause narrows the candidate set further but does not change the isolation guarantee +- Results always carry a `__score` (0–1) indicating cosine similarity + +--- + +## Step 1: Ingest multi-tenant content + +In a real multi-tenant product, each tenant has its own RushDB project and its own API key. For this tutorial, the "tenant isolation" is the project itself. + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +// Each tenant has their own project key +const db = new RushDB('TENANT_PROJECT_API_KEY') + +await db.records.importJson({ + label: 'ARTICLE', + data: [ + { + title: 'Reducing Infrastructure Costs with Spot Instances', + body: 'A practical guide to lowering cloud bills using preemptible compute.', + category: 'infrastructure', + authorId: 'u-101', + publishedAt: '2025-01-15' + }, + { + title: 'Query Planner Internals', + body: 'How modern databases choose execution plans and when they get it wrong.', + category: 'databases', + authorId: 'u-202', + publishedAt: '2025-02-10' + }, + { + title: 'Incident Response Automation', + body: 'Automating runbooks, page routing, and post-incident review with LLMs.', + category: 'operations', + authorId: 'u-101', + publishedAt: '2025-03-01' + } + ] +}) +``` + + + + +```python +from rushdb import RushDB + +db = RushDB("TENANT_PROJECT_API_KEY", base_url="https://api.rushdb.com/api/v1") + +db.records.import_json({ + "label": "ARTICLE", + "data": [ + { + "title": "Reducing Infrastructure Costs with Spot Instances", + "body": "A practical guide to lowering cloud bills using preemptible compute.", + "category": "infrastructure", + "authorId": "u-101", + "publishedAt": "2025-01-15" + }, + { + "title": "Query Planner Internals", + "body": "How modern databases choose execution plans when they get it wrong.", + "category": "databases", + "authorId": "u-202", + "publishedAt": "2025-02-10" + }, + { + "title": "Incident Response Automation", + "body": "Automating runbooks, page routing, and post-incident review with LLMs.", + "category": "operations", + "authorId": "u-101", + "publishedAt": "2025-03-01" + } + ] +}) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="TENANT_PROJECT_API_KEY" +H='Content-Type: application/json' + +curl -s -X POST "$BASE/records/import/json" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "label": "ARTICLE", + "data": [ + {"title": "Reducing Infrastructure Costs with Spot Instances", "body": "A practical guide to lowering cloud bills.", "category": "infrastructure", "authorId": "u-101"}, + {"title": "Query Planner Internals", "body": "How databases choose execution plans.", "category": "databases", "authorId": "u-202"}, + {"title": "Incident Response Automation", "body": "Automating runbooks with LLMs.", "category": "operations", "authorId": "u-101"} + ] + }' +``` + + + + +--- + +## Step 2: Create an embedding index + +Semantic search requires an embedding index on the property you want to search. Create it once per label/property combination. + + + + +```typescript +const index = await db.ai.indexes.create({ + label: 'ARTICLE', + propertyName: 'body', + sourceType: 'managed' // RushDB embeds automatically +}) + +console.log('Index status:', index.data.status) +// status will be 'pending' or 'indexing' initially +``` + + + + +```python +index = db.ai.indexes.create({ + "label": "ARTICLE", + "propertyName": "body", + "sourceType": "managed" +}) + +print("Index status:", index.data["status"]) +``` + + + + +```bash +curl -s -X POST "$BASE/ai/indexes" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"ARTICLE","propertyName":"body","sourceType":"managed"}' +``` + + + + +--- + +## Step 3: Poll until the index is ready + +Backfilling vectors takes time proportional to record count. Poll the stats endpoint until `indexedRecords` equals `totalRecords`. + + + + +```typescript +async function waitForIndex(indexId: string, intervalMs = 3000): Promise { + while (true) { + const stats = await db.ai.indexes.stats(indexId) + const { totalRecords, indexedRecords } = stats.data + console.log(`Indexed ${indexedRecords} / ${totalRecords}`) + if (indexedRecords >= totalRecords) break + await new Promise(r => setTimeout(r, intervalMs)) + } +} + +await waitForIndex(index.data.id) +``` + + + + +```python +import time + +def wait_for_index(index_id: str, interval: float = 3.0) -> None: + while True: + stats = db.ai.indexes.stats(index_id) + total = stats.data["totalRecords"] + indexed = stats.data["indexedRecords"] + print(f"Indexed {indexed} / {total}") + if indexed >= total: + break + time.sleep(interval) + +wait_for_index(index.data["id"]) +``` + + + + +```bash +INDEX_ID="" + +while true; do + STATS=$(curl -s "$BASE/ai/indexes/$INDEX_ID/stats" \ + -H "Authorization: Bearer $TOKEN") + TOTAL=$(echo "$STATS" | jq '.data.totalRecords') + INDEXED=$(echo "$STATS" | jq '.data.indexedRecords') + echo "Indexed $INDEXED / $TOTAL" + [ "$INDEXED" -ge "$TOTAL" ] && break + sleep 3 +done +``` + + + + +--- + +## Step 4: Basic semantic search + +No `where` filter — returns the top matches across all ARTICLE records in this project, ranked by cosine similarity. + + + + +```typescript +const results = await db.ai.search({ + query: 'how to reduce cloud spending', + propertyName: 'body', + labels: ['ARTICLE'], + limit: 5 +}) + +for (const result of results.data) { + console.log(`${result.__score.toFixed(3)} ${result.title}`) +} +``` + + + + +```python +results = db.ai.search({ + "query": "how to reduce cloud spending", + "propertyName": "body", + "labels": ["ARTICLE"], + "limit": 5 +}) + +for r in results.data: + print(f"{r['__score']:.3f} {r['title']}") +``` + + + + +```bash +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "query": "how to reduce cloud spending", + "propertyName": "body", + "labels": ["ARTICLE"], + "limit": 5 + }' | jq '.data[] | {score: .__score, title: .title}' +``` + + + + +--- + +## Step 5: Scope results with structured filters + +Add a `where` clause to narrow candidates before cosine ranking. This is the correct pattern for per-user or per-category search in a product. + + + + +```typescript +// Only search articles written by a specific author +const authorResults = await db.ai.search({ + query: 'automation and reliability', + propertyName: 'body', + labels: ['ARTICLE'], + where: { + authorId: 'u-101' + }, + limit: 5 +}) + +// Only search articles in a specific category +const categoryResults = await db.ai.search({ + query: 'database performance', + propertyName: 'body', + labels: ['ARTICLE'], + where: { + category: { $in: ['databases', 'infrastructure'] } + }, + limit: 10 +}) + +// Date-bounded search +const recentResults = await db.ai.search({ + query: 'incident response', + propertyName: 'body', + labels: ['ARTICLE'], + where: { + publishedAt: { + $gte: { $year: 2025, $month: 2, $day: 1 } + } + }, + limit: 10 +}) +``` + + + + +```python +# Scoped to one author +author_results = db.ai.search({ + "query": "automation and reliability", + "propertyName": "body", + "labels": ["ARTICLE"], + "where": {"authorId": "u-101"}, + "limit": 5 +}) + +# Scoped to categories +category_results = db.ai.search({ + "query": "database performance", + "propertyName": "body", + "labels": ["ARTICLE"], + "where": {"category": {"$in": ["databases", "infrastructure"]}}, + "limit": 10 +}) + +# Date-bounded +recent_results = db.ai.search({ + "query": "incident response", + "propertyName": "body", + "labels": ["ARTICLE"], + "where": {"publishedAt": {"$gte": {"$year": 2025, "$month": 2, "$day": 1}}}, + "limit": 10 +}) +``` + + + + +```bash +# Author-scoped +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "query": "automation and reliability", + "propertyName": "body", + "labels": ["ARTICLE"], + "where": {"authorId": "u-101"}, + "limit": 5 + }' + +# Category-scoped +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "query": "database performance", + "propertyName": "body", + "labels": ["ARTICLE"], + "where": {"category": {"$in": ["databases","infrastructure"]}}, + "limit": 10 + }' +``` + + + + +--- + +## Step 6: Paginate over semantic results + +Use `skip` to page through ranked results. + + + + +```typescript +async function searchPage(query: string, page: number, pageSize = 10) { + return db.ai.search({ + query, + propertyName: 'body', + labels: ['ARTICLE'], + skip: page * pageSize, + limit: pageSize + }) +} + +const page0 = await searchPage('cloud infrastructure', 0) +const page1 = await searchPage('cloud infrastructure', 1) +``` + + + + +```python +def search_page(query: str, page: int, page_size: int = 10): + return db.ai.search({ + "query": query, + "propertyName": "body", + "labels": ["ARTICLE"], + "skip": page * page_size, + "limit": page_size + }) + +page0 = search_page("cloud infrastructure", 0) +page1 = search_page("cloud infrastructure", 1) +``` + + + + +```bash +# Page 0 +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"query":"cloud infrastructure","propertyName":"body","labels":["ARTICLE"],"skip":0,"limit":10}' + +# Page 1 +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"query":"cloud infrastructure","propertyName":"body","labels":["ARTICLE"],"skip":10,"limit":10}' +``` + + + + +--- + +## Step 7: Using external vectors (BYOV) + +If you manage your own embeddings, create an external index and push vectors inline at write time. + + + + +```typescript +// Create an external index (dimensions must match your model) +await db.ai.indexes.create({ + label: 'ARTICLE', + propertyName: 'body', + sourceType: 'external', + dimensions: 1536, + similarityFunction: 'cosine' +}) + +// Write a record with an inline vector +await db.records.create({ + label: 'ARTICLE', + data: { + title: 'Distributed Tracing at Scale', + body: 'How to instrument services for end-to-end trace collection.', + category: 'observability', + $vectors: [ + { + propertyName: 'body', + vector: [/* 1536-dimension float array from your embedding model */] + } + ] + } +}) + +// Search with an external query vector +const vectorResults = await db.ai.search({ + queryVector: [/* same 1536-dimension array */], + propertyName: 'body', + labels: ['ARTICLE'], + sourceType: 'external', + limit: 5 +}) +``` + + + + +```python +# Create external index +db.ai.indexes.create({ + "label": "ARTICLE", + "propertyName": "body", + "sourceType": "external", + "dimensions": 1536, + "similarityFunction": "cosine" +}) + +# Write record with inline vector +db.records.create("ARTICLE", { + "title": "Distributed Tracing at Scale", + "body": "How to instrument services for trace collection.", + "category": "observability", + "$vectors": [ + { + "propertyName": "body", + "vector": [/* 1536-dimension float array */] + } + ] +}) + +# Search with query vector +results = db.ai.search({ + "queryVector": [/* same 1536-dim array */], + "propertyName": "body", + "labels": ["ARTICLE"], + "sourceType": "external", + "limit": 5 +}) +``` + + + + +```bash +# Create external index +curl -s -X POST "$BASE/ai/indexes" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"ARTICLE","propertyName":"body","sourceType":"external","dimensions":1536,"similarityFunction":"cosine"}' + +# Search with query vector +curl -s -X POST "$BASE/ai/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"queryVector":[0.1,0.2,...],"propertyName":"body","labels":["ARTICLE"],"sourceType":"external","limit":5}' +``` + + + + +--- + +## The multi-tenant API endpoint pattern + +Here is a minimal Node.js handler that exposes project-scoped semantic search for a product: + +```typescript +// POST /api/search +import RushDB from '@rushdb/javascript-sdk' + +export async function searchHandler(req: Request): Promise { + const { query, category, page = 0, pageSize = 10 } = await req.json() + + // Each tenant authenticates with their project API key + // Never hard-code the key — pull it from the tenant's session/config + const tenantApiKey = getTenantApiKey(req) + const db = new RushDB(tenantApiKey) + + const where: Record = {} + if (category) where.category = category + + const results = await db.ai.search({ + query, + propertyName: 'body', + labels: ['ARTICLE'], + where: Object.keys(where).length > 0 ? where : undefined, + skip: page * pageSize, + limit: pageSize + }) + + return Response.json({ + results: results.data.map(r => ({ + id: r.__id, + title: r.title, + score: r.__score, + category: r.category + })), + total: results.total + }) +} +``` + +Because `RushDB(tenantApiKey)` is project-scoped, there is no application-layer filtering needed. The isolation guarantee comes from the storage layer. + +--- + +## Production caveat + +Project-scoped isolation is enforced at the API key level. If you accidentally reuse the same API key across tenants (for example by sharing a single project for all tenants), no isolation exists. Each tenant must have a separate project with a separate API key. The recommended architecture is one RushDB project per tenant. + +--- + +## Next steps + +- [Hybrid Retrieval: Structured Filters Plus Semantic Search](./hybrid-retrieval.mdx) — combining semantic ranking with deeper graph traversal +- [RAG Pipeline in Minutes](./rag-pipeline.mdx) — adding an LLM generation step on top of retrieval +- [Semantic Search reference](../typescript-sdk/ai/search.md) — full parameter reference diff --git a/docs/docs/tutorials/supply-chain-traceability.mdx b/docs/docs/tutorials/supply-chain-traceability.mdx new file mode 100644 index 00000000..f5cfdde8 --- /dev/null +++ b/docs/docs/tutorials/supply-chain-traceability.mdx @@ -0,0 +1,463 @@ +--- +sidebar_position: 28 +title: "Supply Chain Traceability and Recall Analysis" +description: Model suppliers, batches, products, shipments, and incidents so teams can answer upstream-impact and downstream-blast-radius questions for recalls. +tags: [Domain Blueprint, Relationships, SearchQuery, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Supply Chain Traceability and Recall Analysis + +When a product defect is discovered, two questions need immediate answers: + +1. **Upstream impact**: which raw materials, batches, and suppliers contributed to the affected product? +2. **Downstream blast radius**: which shipments, orders, and customers received products from this batch? + +These questions cross multiple hops in a causal chain. A graph database handles them naturally. A set of JOIN-heavy relational tables does not — not at the speed required during a recall incident. + +--- + +## Graph shape + +```mermaid +graph LR + SUPPLIER[SUPPLIER] -->|SUPPLIED| BATCH[BATCH
Raw material lot] + BATCH -->|USED_IN| PRODUCTION_RUN[PRODUCTION_RUN] + PRODUCTION_RUN -->|PRODUCED| PRODUCT[PRODUCT] + PRODUCT -->|INCLUDED_IN| SHIPMENT[SHIPMENT] + SHIPMENT -->|DELIVERED_TO| CUSTOMER[CUSTOMER] + INCIDENT[INCIDENT] -->|CAUSED_BY| BATCH +``` + +| Label | What it represents | +|---|---| +| `SUPPLIER` | A vendor or raw material source | +| `BATCH` | A specific lot of raw material or component | +| `PRODUCTION_RUN` | A manufacturing run that consumed batches | +| `PRODUCT` | A finished product unit or SKU | +| `SHIPMENT` | A delivery fulfillment | +| `CUSTOMER` | A receiving customer or distribution center | +| `INCIDENT` | A quality or safety report | + +--- + +## Step 1: Ingest supply chain records + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +await db.records.importJson({ + label: 'SUPPLIER', + data: [ + { name: 'Chem Solutions Ltd', country: 'DE', approved: true }, + { name: 'Alloy Partners Inc', country: 'US', approved: true } + ] +}) + +await db.records.importJson({ + label: 'BATCH', + data: [ + { lotId: 'LOT-2025-001', material: 'polymer-Z', producedAt: '2025-01-10', quantity: 5000 }, + { lotId: 'LOT-2025-002', material: 'polymer-Z', producedAt: '2025-01-17', quantity: 4800 }, + { lotId: 'LOT-2025-003', material: 'alloy-X', producedAt: '2025-01-20', quantity: 2000 } + ] +}) + +await db.records.importJson({ + label: 'PRODUCTION_RUN', + data: [ + { runId: 'RUN-A1', startedAt: '2025-02-01', completedAt: '2025-02-03', facilityId: 'FAC-Berlin' }, + { runId: 'RUN-A2', startedAt: '2025-02-05', completedAt: '2025-02-07', facilityId: 'FAC-Berlin' } + ] +}) + +await db.records.importJson({ + label: 'PRODUCT', + data: [ + { sku: 'PROD-001', name: 'Widget Alpha', serialRange: 'WA-10001:WA-11000' }, + { sku: 'PROD-002', name: 'Widget Beta', serialRange: 'WB-20001:WB-20500' } + ] +}) + +await db.records.importJson({ + label: 'SHIPMENT', + data: [ + { trackingId: 'SHIP-4001', shippedAt: '2025-02-15', status: 'delivered' }, + { trackingId: 'SHIP-4002', shippedAt: '2025-02-18', status: 'in_transit' } + ] +}) +``` + + + + +```python +from rushdb import RushDB +import os + +db = RushDB(os.environ["RUSHDB_API_KEY"], base_url="https://api.rushdb.com/api/v1") + +db.records.import_json({ + "label": "BATCH", + "data": [ + {"lotId": "LOT-2025-001", "material": "polymer-Z", "producedAt": "2025-01-10", "quantity": 5000}, + {"lotId": "LOT-2025-002", "material": "polymer-Z", "producedAt": "2025-01-17", "quantity": 4800}, + {"lotId": "LOT-2025-003", "material": "alloy-X", "producedAt": "2025-01-20", "quantity": 2000} + ] +}) + +db.records.import_json({ + "label": "PRODUCTION_RUN", + "data": [ + {"runId": "RUN-A1", "startedAt": "2025-02-01", "completedAt": "2025-02-03", "facilityId": "FAC-Berlin"}, + {"runId": "RUN-A2", "startedAt": "2025-02-05", "completedAt": "2025-02-07", "facilityId": "FAC-Berlin"} + ] +}) + +db.records.import_json({ + "label": "PRODUCT", + "data": [ + {"sku": "PROD-001", "name": "Widget Alpha", "serialRange": "WA-10001:WA-11000"}, + {"sku": "PROD-002", "name": "Widget Beta", "serialRange": "WB-20001:WB-20500"} + ] +}) + +db.records.import_json({ + "label": "SHIPMENT", + "data": [ + {"trackingId": "SHIP-4001", "shippedAt": "2025-02-15", "status": "delivered"}, + {"trackingId": "SHIP-4002", "shippedAt": "2025-02-18", "status": "in_transit"} + ] +}) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +curl -s -X POST "$BASE/records/import/json" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"BATCH","data":[{"lotId":"LOT-2025-001","material":"polymer-Z","producedAt":"2025-01-10","quantity":5000},{"lotId":"LOT-2025-002","material":"polymer-Z","producedAt":"2025-01-17","quantity":4800}]}' +``` + + + + +--- + +## Step 2: Link the supply chain + + + + +```typescript +const [batches, runs, products, shipments] = await Promise.all([ + db.records.find({ labels: ['BATCH'] }), + db.records.find({ labels: ['PRODUCTION_RUN'] }), + db.records.find({ labels: ['PRODUCT'] }), + db.records.find({ labels: ['SHIPMENT'] }) +]) + +const batchMap = Object.fromEntries(batches.data.map(b => [b.lotId, b])) +const runMap = Object.fromEntries(runs.data.map(r => [r.runId, r])) +const productMap = Object.fromEntries(products.data.map(p => [p.sku, p])) + +// LOT-001 USED_IN RUN-A1 +await db.records.attach({ source: batchMap['LOT-2025-001'], target: runMap['RUN-A1'], options: { type: 'USED_IN', direction: 'out' } }) +// LOT-002 USED_IN RUN-A2 +await db.records.attach({ source: batchMap['LOT-2025-002'], target: runMap['RUN-A2'], options: { type: 'USED_IN', direction: 'out' } }) + +// RUN-A1 PRODUCED PROD-001 +await db.records.attach({ source: runMap['RUN-A1'], target: productMap['PROD-001'], options: { type: 'PRODUCED', direction: 'out' } }) +// RUN-A2 PRODUCED PROD-002 +await db.records.attach({ source: runMap['RUN-A2'], target: productMap['PROD-002'], options: { type: 'PRODUCED', direction: 'out' } }) + +// Products INCLUDED_IN shipments +await db.records.attach({ source: productMap['PROD-001'], target: shipments.data[0], options: { type: 'INCLUDED_IN', direction: 'out' } }) +await db.records.attach({ source: productMap['PROD-002'], target: shipments.data[1], options: { type: 'INCLUDED_IN', direction: 'out' } }) +``` + + + + +```python +batches = db.records.find({"labels": ["BATCH"]}) +runs = db.records.find({"labels": ["PRODUCTION_RUN"]}) +products = db.records.find({"labels": ["PRODUCT"]}) +shipments = db.records.find({"labels": ["SHIPMENT"]}) + +batch_map = {b.data["lotId"]: b for b in batches.data} +run_map = {r.data["runId"]: r for r in runs.data} +product_map = {p.data["sku"]: p for p in products.data} + +db.records.attach(batch_map["LOT-2025-001"].id, run_map["RUN-A1"].id, {"type": "USED_IN", "direction": "out"}) +db.records.attach(batch_map["LOT-2025-002"].id, run_map["RUN-A2"].id, {"type": "USED_IN", "direction": "out"}) +db.records.attach(run_map["RUN-A1"].id, product_map["PROD-001"].id, {"type": "PRODUCED", "direction": "out"}) +db.records.attach(run_map["RUN-A2"].id, product_map["PROD-002"].id, {"type": "PRODUCED", "direction": "out"}) +db.records.attach(product_map["PROD-001"].id, shipments.data[0].id, {"type": "INCLUDED_IN","direction": "out"}) +db.records.attach(product_map["PROD-002"].id, shipments.data[1].id, {"type": "INCLUDED_IN","direction": "out"}) +``` + + + + +```bash +# Fetch IDs then link +BATCH_ID=$(curl -s -X POST "$BASE/records/search" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["BATCH"],"where":{"lotId":"LOT-2025-001"}}' | jq -r '.data[0].__id') +RUN_ID=$(curl -s -X POST "$BASE/records/search" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["PRODUCTION_RUN"],"where":{"runId":"RUN-A1"}}' | jq -r '.data[0].__id') + +curl -s -X POST "$BASE/records/$BATCH_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$RUN_ID\"],\"options\":{\"type\":\"USED_IN\",\"direction\":\"out\"}}" +``` + + + + +--- + +## Step 3: Downstream blast radius — all shipments from a contaminated batch + +Given a defective batch, find every shipment that could contain affected product. + + + + +```typescript +const affectedShipments = await db.records.find({ + labels: ['SHIPMENT'], + where: { + PRODUCT: { + $relation: { type: 'INCLUDED_IN', direction: 'in' }, + PRODUCTION_RUN: { + $relation: { type: 'PRODUCED', direction: 'in' }, + BATCH: { + $relation: { type: 'USED_IN', direction: 'in' }, + lotId: 'LOT-2025-001' + } + } + } + } +}) + +console.log(`Affected shipments: ${affectedShipments.total}`) +for (const shipment of affectedShipments.data) { + console.log(` ${shipment.trackingId} — ${shipment.status}`) +} +``` + + + + +```python +affected_shipments = db.records.find({ + "labels": ["SHIPMENT"], + "where": { + "PRODUCT": { + "$relation": {"type": "INCLUDED_IN", "direction": "in"}, + "PRODUCTION_RUN": { + "$relation": {"type": "PRODUCED", "direction": "in"}, + "BATCH": { + "$relation": {"type": "USED_IN", "direction": "in"}, + "lotId": "LOT-2025-001" + } + } + } + } +}) + +print(f"Affected shipments: {affected_shipments.total}") +for s in affected_shipments.data: + print(f" {s.data.get('trackingId')} — {s.data.get('status')}") +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["SHIPMENT"], + "where": { + "PRODUCT": { + "$relation": {"type": "INCLUDED_IN", "direction": "in"}, + "PRODUCTION_RUN": { + "$relation": {"type": "PRODUCED", "direction": "in"}, + "BATCH": { + "$relation": {"type": "USED_IN", "direction": "in"}, + "lotId": "LOT-2025-001" + } + } + } + } + }' +``` + + + + +--- + +## Step 4: Upstream impact — which batches contributed to an affected product? + +Given a known-bad product SKU, trace back to every batch that could have contributed. + + + + +```typescript +const sourceBatches = await db.records.find({ + labels: ['BATCH'], + where: { + PRODUCTION_RUN: { + $relation: { type: 'USED_IN', direction: 'in' }, + PRODUCT: { + $relation: { type: 'PRODUCED', direction: 'out' }, + sku: 'PROD-001' + } + } + } +}) + +console.log(`Source batches for PROD-001: ${sourceBatches.total}`) +for (const batch of sourceBatches.data) { + console.log(` ${batch.lotId} — ${batch.material} — qty: ${batch.quantity}`) +} +``` + + + + +```python +source_batches = db.records.find({ + "labels": ["BATCH"], + "where": { + "PRODUCTION_RUN": { + "$relation": {"type": "USED_IN", "direction": "in"}, + "PRODUCT": { + "$relation": {"type": "PRODUCED", "direction": "out"}, + "sku": "PROD-001" + } + } + } +}) + +print(f"Source batches for PROD-001: {source_batches.total}") +for batch in source_batches.data: + print(f" {batch.data.get('lotId')} — {batch.data.get('material')}") +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["BATCH"], + "where": { + "PRODUCTION_RUN": { + "$relation": {"type": "USED_IN", "direction": "in"}, + "PRODUCT": { + "$relation": {"type": "PRODUCED", "direction": "out"}, + "sku": "PROD-001" + } + } + } + }' +``` + + + + +--- + +## Step 5: Incident report — log a quality incident linked to a batch + + + + +```typescript +const incident = await db.records.create({ + label: 'INCIDENT', + data: { + title: 'Polymer degradation detected in lot LOT-2025-001', + severity: 'critical', + reportedAt: new Date().toISOString(), + status: 'open' + } +}) + +const batchResult = await db.records.find({ + labels: ['BATCH'], + where: { lotId: 'LOT-2025-001' } +}) + +await db.records.attach({ + source: incident, + target: batchResult.data[0], + options: { type: 'CAUSED_BY', direction: 'out' } +}) +``` + + + + +```python +from datetime import datetime, timezone + +incident = db.records.create("INCIDENT", { + "title": "Polymer degradation detected in lot LOT-2025-001", + "severity": "critical", + "reportedAt": datetime.now(timezone.utc).isoformat(), + "status": "open" +}) + +batch_result = db.records.find({"labels": ["BATCH"], "where": {"lotId": "LOT-2025-001"}}) +db.records.attach(incident.id, batch_result.data[0].id, {"type": "CAUSED_BY", "direction": "out"}) +``` + + + + +```bash +INCIDENT_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"INCIDENT","data":{"title":"Polymer degradation LOT-001","severity":"critical","status":"open","reportedAt":"2025-03-15T09:00:00Z"}}' \ + | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$INCIDENT_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$BATCH_ID\"],\"options\":{\"type\":\"CAUSED_BY\",\"direction\":\"out\"}}" +``` + + + + +--- + +## Production caveat + +Real supply chains are many-to-many: a production run may consume dozens of batches; a shipment may contain hundreds of products. The traversal queries above traverse exactly three hops. Adding more hops increases execution cost proportionally. If your supply chain has more than three levels of indirection between primary inputs and customer delivery, benchmark traversal performance against your production data volume before deploying to incident response workflows. + +--- + +## Next steps + +- [Data Lineage](./data-lineage.mdx) — end-to-end causal chain from sources to answers +- [Audit Trails](./audit-trails.mdx) — append immutable events to supply chain records +- [Incident Response Graphs](./incident-response.mdx) — operational incident root cause analysis diff --git a/docs/docs/tutorials/temporal-graphs.mdx b/docs/docs/tutorials/temporal-graphs.mdx new file mode 100644 index 00000000..ba5d6167 --- /dev/null +++ b/docs/docs/tutorials/temporal-graphs.mdx @@ -0,0 +1,555 @@ +--- +sidebar_position: 17 +title: "Temporal Graphs: Modeling State and Event Time Together" +description: Represent durable entities alongside time-stamped events so you can answer both current-state and historical questions without losing lineage. +tags: [Graph Modeling, Events, Time-Series, Transactions] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Temporal Graphs: Modeling State and Event Time Together + +Most business questions have two modes: + +- **Current state**: what is a customer's plan right now? +- **History**: what changed, when, and what caused it? + +A flat record answers the first question. It cannot answer the second — a single `PATCH` replaces what was there before and leaves no trace. + +A temporal graph models both. The entity holds its current state. Events describe each change, when it happened, and what triggered it. Relationships between entities and events let you answer both questions with the same query engine. + +--- + +## The pattern + +```mermaid +graph LR + U[ENTITY: user-88] -->|HAS_STATE| S1[STATE: plan=pro, since 2025-01-10] + S1 -->|SUPERSEDED_BY| S2[STATE: plan=enterprise, since 2025-03-20] + U -->|EXPERIENCED| EV1[EVENT: plan_upgraded, 2025-03-20] + EV1 -->|TRIGGERED_BY| ACT[ENTITY: billing-agent] + EV1 -->|RESULTED_IN| S2 +``` + +- `STATE` nodes carry the actual field values with a `since` timestamp +- `EVENT` nodes record what happened and when, without storing the full state +- The `SUPERSEDED_BY` chain lets you reconstruct history at any point in time + +--- + +## Step 1: Create the entity with initial state + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB('RUSHDB_API_KEY') + +// Create the entity +const user = await db.records.create({ + label: 'ENTITY', + data: { + entityId: 'user-88', + type: 'customer', + name: 'Priya Kapoor', + email: 'priya@example.com' + } +}) + +// Create initial state node +const state1 = await db.records.create({ + label: 'STATE', + data: { + plan: 'pro', + monthlyLimit: 50000, + since: '2025-01-10T00:00:00Z', + isCurrent: true + } +}) + +await db.records.attach({ + source: user, + target: state1, + options: { type: 'HAS_STATE' } +}) +``` + + + + +```python +from rushdb import RushDB + +db = RushDB("RUSHDB_API_KEY", base_url="https://api.rushdb.com/api/v1") + +user = db.records.create("ENTITY", { + "entityId": "user-88", + "type": "customer", + "name": "Priya Kapoor", + "email": "priya@example.com" +}) + +state1 = db.records.create("STATE", { + "plan": "pro", + "monthlyLimit": 50000, + "since": "2025-01-10T00:00:00Z", + "isCurrent": True +}) + +db.records.attach(user.id, state1.id, {"type": "HAS_STATE"}) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +USER_ID=$(curl -s -X POST "$BASE/records" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"ENTITY","data":{"entityId":"user-88","type":"customer","name":"Priya Kapoor"}}' \ + | jq -r '.data.__id') + +STATE1_ID=$(curl -s -X POST "$BASE/records" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"STATE","data":{"plan":"pro","monthlyLimit":50000,"since":"2025-01-10T00:00:00Z","isCurrent":true}}' \ + | jq -r '.data.__id') + +curl -s -X POST "$BASE/records/$USER_ID/relations" -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$STATE1_ID\"],\"options\":{\"type\":\"HAS_STATE\"}}" +``` + + + + +--- + +## Step 2: Record a state change as an event + +Use a transaction to atomically create the new state and the event, then update the old state in the same operation. + + + + +```typescript +import { Transaction } from '@rushdb/javascript-sdk' + +const tx = await db.tx.begin() + +try { + // Mark old state as no longer current + await db.records.update(state1.__id, { isCurrent: false }, tx) + + // Create new state node + const state2 = await db.records.create( + { + label: 'STATE', + data: { + plan: 'enterprise', + monthlyLimit: 500000, + since: '2025-03-20T14:00:00Z', + isCurrent: true + } + }, + tx + ) + + // Create the event + const event1 = await db.records.create( + { + label: 'EVENT', + data: { + type: 'plan_upgraded', + occurredAt: '2025-03-20T14:00:00Z', + previousPlan: 'pro', + newPlan: 'enterprise', + triggeredBy: 'billing-agent' + } + }, + tx + ) + + await Promise.all([ + // Chain old state to new state + db.records.attach({ source: state1, target: state2, options: { type: 'SUPERSEDED_BY' } }, tx), + // Link entity to new state + db.records.attach({ source: user, target: state2, options: { type: 'HAS_STATE' } }, tx), + // Link entity to event + db.records.attach({ source: user, target: event1, options: { type: 'EXPERIENCED' } }, tx), + // Link event to resulting state + db.records.attach({ source: event1, target: state2, options: { type: 'RESULTED_IN' } }, tx), + ]) + + await db.tx.commit(tx) +} catch (err) { + await db.tx.rollback(tx) + throw err +} +``` + + + + +```python +tx = db.transactions.begin() + +try: + # Mark old state + db.records.update(state1.id, {"isCurrent": False}, tx) + + # New state + state2 = db.records.create("STATE", { + "plan": "enterprise", + "monthlyLimit": 500000, + "since": "2025-03-20T14:00:00Z", + "isCurrent": True + }, transaction=tx) + + # Event + event1 = db.records.create("EVENT", { + "type": "plan_upgraded", + "occurredAt": "2025-03-20T14:00:00Z", + "previousPlan": "pro", + "newPlan": "enterprise", + "triggeredBy": "billing-agent" + }, transaction=tx) + + db.records.attach(state1.id, state2.id, {"type": "SUPERSEDED_BY"}, transaction=tx) + db.records.attach(user.id, state2.id, {"type": "HAS_STATE"}, transaction=tx) + db.records.attach(user.id, event1.id, {"type": "EXPERIENCED"}, transaction=tx) + db.records.attach(event1.id, state2.id, {"type": "RESULTED_IN"}, transaction=tx) + + db.transactions.commit(tx) +except Exception: + db.transactions.rollback(tx) + raise +``` + + + + +```bash +# Begin transaction +TX_ID=$(curl -s -X POST "$BASE/tx" \ + -H "$H" -H "Authorization: Bearer $TOKEN" | jq -r '.data.id') + +# Mark old state not current +curl -s -X PATCH "$BASE/records/$STATE1_ID" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -H "x-transaction-id: $TX_ID" \ + -d '{"isCurrent":false}' + +# Create new state in transaction +STATE2_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -H "x-transaction-id: $TX_ID" \ + -d '{"label":"STATE","data":{"plan":"enterprise","monthlyLimit":500000,"since":"2025-03-20T14:00:00Z","isCurrent":true}}' \ + | jq -r '.data.__id') + +# Create event in transaction +EVENT_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -H "x-transaction-id: $TX_ID" \ + -d '{"label":"EVENT","data":{"type":"plan_upgraded","occurredAt":"2025-03-20T14:00:00Z","newPlan":"enterprise"}}' \ + | jq -r '.data.__id') + +# Chain relationships +curl -s -X POST "$BASE/records/$STATE1_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -H "x-transaction-id: $TX_ID" \ + -d "{\"targets\":[\"$STATE2_ID\"],\"options\":{\"type\":\"SUPERSEDED_BY\"}}" + +# Commit +curl -s -X POST "$BASE/tx/$TX_ID/commit" \ + -H "Authorization: Bearer $TOKEN" +``` + + + + +--- + +## Step 3: Query current state + + + + +```typescript +const currentState = await db.records.find({ + labels: ['STATE'], + where: { + isCurrent: true, + ENTITY: { + $relation: { type: 'HAS_STATE', direction: 'in' }, + entityId: 'user-88' + } + }, + limit: 1 +}) +``` + + + + +```python +current_state = db.records.find({ + "labels": ["STATE"], + "where": { + "isCurrent": True, + "ENTITY": { + "$relation": {"type": "HAS_STATE", "direction": "in"}, + "entityId": "user-88" + } + }, + "limit": 1 +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["STATE"], + "where": { + "isCurrent": true, + "ENTITY": { + "$relation": {"type": "HAS_STATE", "direction": "in"}, + "entityId": "user-88" + } + }, + "limit": 1 + }' +``` + + + + +--- + +## Step 4: Query full event history for an entity + + + + +```typescript +const history = await db.records.find({ + labels: ['EVENT'], + where: { + ENTITY: { + $relation: { type: 'EXPERIENCED', direction: 'in' }, + entityId: 'user-88' + } + }, + orderBy: { occurredAt: 'asc' } +}) +``` + + + + +```python +history = db.records.find({ + "labels": ["EVENT"], + "where": { + "ENTITY": { + "$relation": {"type": "EXPERIENCED", "direction": "in"}, + "entityId": "user-88" + } + }, + "orderBy": {"occurredAt": "asc"} +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["EVENT"], + "where": { + "ENTITY": { + "$relation": {"type": "EXPERIENCED", "direction": "in"}, + "entityId": "user-88" + } + }, + "orderBy": {"occurredAt": "asc"} + }' +``` + + + + +--- + +## Step 5: Reconstruct state at a point in time + +Find the state that was current on a given date by checking which state node was created before that date and had not yet been superseded (or has no `SUPERSEDED_BY` link). + + + + +```typescript +// What was user-88's state on 2025-02-15? +const stateOnDate = await db.records.find({ + labels: ['STATE'], + where: { + since: { + $lte: { $year: 2025, $month: 2, $day: 15 } + }, + ENTITY: { + $relation: { type: 'HAS_STATE', direction: 'in' }, + entityId: 'user-88' + } + }, + orderBy: { since: 'desc' }, + limit: 1 +}) +// Returns the state that was active as of Feb 15 (the most recent one before that date) +``` + + + + +```python +state_on_date = db.records.find({ + "labels": ["STATE"], + "where": { + "since": {"$lte": {"$year": 2025, "$month": 2, "$day": 15}}, + "ENTITY": { + "$relation": {"type": "HAS_STATE", "direction": "in"}, + "entityId": "user-88" + } + }, + "orderBy": {"since": "desc"}, + "limit": 1 +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["STATE"], + "where": { + "since": {"$lte": {"$year": 2025, "$month": 2, "$day": 15}}, + "ENTITY": { + "$relation": {"type": "HAS_STATE", "direction": "in"}, + "entityId": "user-88" + } + }, + "orderBy": {"since": "desc"}, + "limit": 1 + }' +``` + + + + +--- + +## Step 6: Aggregate events by type over a time window + + + + +```typescript +const upgrades = await db.records.find({ + labels: ['EVENT'], + where: { + type: 'plan_upgraded', + occurredAt: { + $gte: { $year: 2025, $month: 1, $day: 1 }, + $lt: { $year: 2026, $month: 1, $day: 1 } + } + }, + aggregate: { + eventCount: { fn: 'count', alias: '$record' }, + firstUpgrade: { fn: 'min', field: 'occurredAt', alias: '$record' }, + lastUpgrade: { fn: 'max', field: 'occurredAt', alias: '$record' } + }, + groupBy: ['eventCount', 'firstUpgrade', 'lastUpgrade'] +}) +``` + + + + +```python +upgrades = db.records.find({ + "labels": ["EVENT"], + "where": { + "type": "plan_upgraded", + "occurredAt": { + "$gte": {"$year": 2025, "$month": 1, "$day": 1}, + "$lt": {"$year": 2026, "$month": 1, "$day": 1} + } + }, + "aggregate": { + "eventCount": {"fn": "count", "alias": "$record"}, + "firstUpgrade": {"fn": "min", "field": "occurredAt", "alias": "$record"}, + "lastUpgrade": {"fn": "max", "field": "occurredAt", "alias": "$record"} + }, + "groupBy": ["eventCount", "firstUpgrade", "lastUpgrade"] +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["EVENT"], + "where": { + "type": "plan_upgraded", + "occurredAt": { + "$gte": {"$year": 2025, "$month": 1, "$day": 1}, + "$lt": {"$year": 2026, "$month": 1, "$day": 1} + } + }, + "aggregate": { + "eventCount": {"fn": "count", "alias": "$record"} + }, + "groupBy": ["eventCount"] + }' +``` + + + + +--- + +## When to use this pattern vs. simple field updates + +| Scenario | Approach | +|---|---| +| Only current state matters | Update the field in-place with `PATCH` | +| You need to know what changed and when | Temporal graph with EVENT nodes | +| You need to reconstruct state at any past date | STATE chain with `since` and `isCurrent` | +| Compliance requires immutable audit trail | Append-only STATE + EVENT nodes; never delete | + +--- + +## Production caveat + +State and event chains grow indefinitely. Set a retention horizon: archive or delete `STATE` nodes where `isCurrent = false` and `since` is older than your compliance window. Use `db.records.delete` with a `where` filter for bulk archival. Keep at least one historical `STATE` per entity per quarter if you need YoY comparisons. + +--- + +## Next steps + +- [Audit Trails with Immutable Events and Derived State](./audit-trails.mdx) — extending this pattern for compliance use cases +- [RushDB as a Memory Layer](./memory-layer.mdx) — using the same FACT/EPISODE pattern for agent memory +- [SearchQuery Deep Dive](./searchquery-advanced-patterns.mdx) — date filter syntax and aggregation patterns diff --git a/docs/docs/tutorials/testing-searchquery.mdx b/docs/docs/tutorials/testing-searchquery.mdx new file mode 100644 index 00000000..088a8a34 --- /dev/null +++ b/docs/docs/tutorials/testing-searchquery.mdx @@ -0,0 +1,335 @@ +--- +sidebar_position: 32 +title: "Testing SearchQuery Across TypeScript, Python, and REST" +description: Write parity-driven tests that prove one query intent behaves identically across every RushDB surface. +tags: [Testing, SearchQuery, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Testing SearchQuery Across TypeScript, Python, and REST + +A SearchQuery written in TypeScript must return the same records as the same query written in Python or issued as a raw REST call. This is called **query parity**. Parity tests catch SDK bugs, version drift, and silent behavioral differences before they reach production. + +This tutorial shows how to structure parity-driven tests, common failure modes, and a repeatable test harness you can adapt to any record type. + +--- + +## The parity contract + +Every `records.find()` call maps to an underlying `POST /records/search`. The contract is: + +> Given the same SearchQuery body, every SDK surface and the REST API must return the same `data[]` and `total`. + +The three variables that can break parity: +1. **Serialization** — an SDK may serialize a field name differently than the REST contract expects +2. **Default values** — an SDK may silently inject defaults (e.g. `limit: 20`) that the raw query did not include +3. **Case sensitivity** — label names, relationship types, and field names are case-sensitive + +--- + +## Step 1: Set up a shared test fixture + +All parity tests should run against live data created in a dedicated test project. + + + + +```typescript +// tests/fixtures/setup.ts +import RushDB from '@rushdb/javascript-sdk' + +export const db = new RushDB(process.env.RUSHDB_TEST_API_KEY!) + +export async function seedTestData() { + const result = await db.records.importJson({ + label: 'PRODUCT', + data: [ + { sku: 'P-001', name: 'Widget Alpha', category: 'widgets', price: 29.99, inStock: true }, + { sku: 'P-002', name: 'Widget Beta', category: 'widgets', price: 49.99, inStock: false }, + { sku: 'P-003', name: 'Gadget One', category: 'gadgets', price: 99.00, inStock: true } + ] + }) + return result +} + +export async function teardownTestData() { + await db.records.deleteMany({ + labels: ['PRODUCT'], + where: { sku: { $in: ['P-001', 'P-002', 'P-003'] } } + }) +} +``` + + + + +```python +# tests/fixtures/setup.py +from rushdb import RushDB +import os + +db = RushDB(os.environ["RUSHDB_TEST_API_KEY"], base_url="https://api.rushdb.com/api/v1") + +def seed_test_data(): + return db.records.import_json({ + "label": "PRODUCT", + "data": [ + {"sku": "P-001", "name": "Widget Alpha", "category": "widgets", "price": 29.99, "inStock": True}, + {"sku": "P-002", "name": "Widget Beta", "category": "widgets", "price": 49.99, "inStock": False}, + {"sku": "P-003", "name": "Gadget One", "category": "gadgets", "price": 99.00, "inStock": True} + ] + }) + +def teardown_test_data(): + db.records.delete_many({ + "labels": ["PRODUCT"], + "where": {"sku": {"$in": ["P-001", "P-002", "P-003"]}} + }) +``` + + + + +--- + +## Step 2: Write a parity test + +Compare results from the TypeScript SDK against a raw REST call to verify the same payload produces the same results. + + + + +```typescript +// tests/parity/product-search.test.ts +import { describe, beforeAll, afterAll, it, expect } from '@jest/globals' +import { db, seedTestData, teardownTestData } from '../fixtures/setup' + +describe('SearchQuery parity — PRODUCT', () => { + beforeAll(seedTestData) + afterAll(teardownTestData) + + const query = { + labels: ['PRODUCT'], + where: { category: 'widgets', inStock: true }, + orderBy: { price: 'asc' as const } + } + + it('SDK find() returns in-stock widgets ordered by price', async () => { + const result = await db.records.find(query) + expect(result.total).toBe(1) + expect(result.data[0].sku).toBe('P-001') + }) + + it('REST POST /records/search returns same result as SDK', async () => { + const response = await fetch('https://api.rushdb.com/api/v1/records/search', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${process.env.RUSHDB_TEST_API_KEY}` + }, + body: JSON.stringify(query) + }) + const json = await response.json() + expect(json.total).toBe(1) + expect(json.data[0].sku).toBe('P-001') + }) + + it('SDK and REST return the same __id for the same record', async () => { + const [sdkResult, restResponse] = await Promise.all([ + db.records.find(query), + fetch('https://api.rushdb.com/api/v1/records/search', { + method: 'POST', + headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${process.env.RUSHDB_TEST_API_KEY}` }, + body: JSON.stringify(query) + }).then(r => r.json()) + ]) + expect(sdkResult.data[0].__id).toBe(restResponse.data[0].__id) + }) +}) +``` + + + + +```python +# tests/parity/test_product_search.py +import pytest +import requests +import os +from tests.fixtures.setup import db, seed_test_data, teardown_test_data + +QUERY = { + "labels": ["PRODUCT"], + "where": {"category": "widgets", "inStock": True}, + "orderBy": {"price": "asc"} +} + +@pytest.fixture(scope="module", autouse=True) +def test_data(): + seed_test_data() + yield + teardown_test_data() + +def test_sdk_returns_in_stock_widgets(): + result = db.records.find(QUERY) + assert result.total == 1 + assert result.data[0].data["sku"] == "P-001" + +def test_rest_returns_same_result(): + resp = requests.post( + "https://api.rushdb.com/api/v1/records/search", + json=QUERY, + headers={"Authorization": f"Bearer {os.environ['RUSHDB_TEST_API_KEY']}"} + ) + body = resp.json() + assert body["total"] == 1 + assert body["data"][0]["sku"] == "P-001" + +def test_sdk_and_rest_return_same_id(): + sdk_result = db.records.find(QUERY) + rest_result = requests.post( + "https://api.rushdb.com/api/v1/records/search", + json=QUERY, + headers={"Authorization": f"Bearer {os.environ['RUSHDB_TEST_API_KEY']}"} + ).json() + assert sdk_result.data[0].id == rest_result["data"][0]["__id"] +``` + + + + +```bash +# Smoke-test via REST — assert total is 1 and first sku is P-001 +RESULT=$(curl -s -X POST "https://api.rushdb.com/api/v1/records/search" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RUSHDB_TEST_API_KEY" \ + -d '{"labels":["PRODUCT"],"where":{"category":"widgets","inStock":true},"orderBy":{"price":"asc"}}') + +TOTAL=$(echo "$RESULT" | jq '.total') +SKU=$(echo "$RESULT" | jq -r '.data[0].sku') + +[ "$TOTAL" = "1" ] && echo "✅ total=1" || echo "❌ total=$TOTAL" +[ "$SKU" = "P-001" ] && echo "✅ sku=P-001" || echo "❌ sku=$SKU" +``` + + + + +--- + +## Step 3: Test aggregation parity + +Aggregate queries are the most likely to diverge between SDK versions. Test them explicitly. + + + + +```typescript +it('aggregate count by category returns correct totals', async () => { + const result = await db.records.find({ + labels: ['PRODUCT'], + aggregate: { + count: { fn: 'count', alias: '$record' }, + category: '$record.category' + }, + groupBy: ['category', 'count'], + orderBy: { count: 'desc' } + }) + + // widgets has 2 records, gadgets has 1 + expect(result.data[0].category).toBe('widgets') + expect(result.data[0].count).toBe(2) + expect(result.data[1].category).toBe('gadgets') + expect(result.data[1].count).toBe(1) +}) +``` + + + + +```python +def test_aggregate_count_by_category(): + result = db.records.find({ + "labels": ["PRODUCT"], + "aggregate": { + "count": {"fn": "count", "alias": "$record"}, + "category": "$record.category" + }, + "groupBy": ["category", "count"], + "orderBy": {"count": "desc"} + }) + assert result.data[0].data["category"] == "widgets" + assert result.data[0].data["count"] == 2 +``` + + + + +--- + +## Step 4: Test transaction parity + +Verify that transactional writes and rollbacks behave identically across surfaces. + + + + +```typescript +it('rolled-back create does not persist the record', async () => { + const tx = await db.tx.begin() + try { + await db.records.create({ label: 'PRODUCT', data: { sku: 'ROLLBACK-TEST', category: 'test' } }, tx) + await db.tx.rollback(tx) + } catch { + await db.tx.rollback(tx) + } + + const result = await db.records.find({ + labels: ['PRODUCT'], + where: { sku: 'ROLLBACK-TEST' } + }) + expect(result.total).toBe(0) +}) +``` + + + + +```python +def test_rollback_does_not_persist(): + tx = db.transactions.begin() + try: + db.records.create("PRODUCT", {"sku": "ROLLBACK-TEST", "category": "test"}, transaction=tx) + db.transactions.rollback(tx) + except Exception: + db.transactions.rollback(tx) + + result = db.records.find({"labels": ["PRODUCT"], "where": {"sku": "ROLLBACK-TEST"}}) + assert result.total == 0 +``` + + + + +--- + +## Common parity failure modes + +| Symptom | Likely cause | +|---|---| +| SDK returns 0, REST returns N | Label name casing mismatch (`article` vs `ARTICLE`) | +| REST returns 0, SDK returns N | SDK is injecting a default `where` clause | +| Results differ between SDK versions | SDK updated default `limit` or sort order | +| `total` correct but `data[]` differs | `orderBy` field not specified — sort is non-deterministic | +| Python returns float, REST returns int | Python SDK deserializing numeric field differently | +| Transaction test flaky | Two tests sharing the same natural key collide across runs | + +--- + +## Next steps + +- [Query Optimization](./query-optimization.mdx) — reduce the cost of the queries you've confirmed are correct +- [Discovery Queries](./discovery-queries.mdx) — explore the schema before writing parity tests +- [Explainable Results](./explainable-results.mdx) — surface evidence alongside test-verified search results diff --git a/docs/docs/tutorials/thinking-in-graphs.mdx b/docs/docs/tutorials/thinking-in-graphs.mdx new file mode 100644 index 00000000..6fce9a1e --- /dev/null +++ b/docs/docs/tutorials/thinking-in-graphs.mdx @@ -0,0 +1,617 @@ +--- +sidebar_position: 10 +title: "Thinking in Graphs: From Tables to Traversals" +description: Map the same product, customer, and order dataset from relational and document mental models into RushDB's graph model, then translate common business questions into multi-hop queries. +tags: [Concepts, Graph Modeling, SearchQuery] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Thinking in Graphs: From Tables to Traversals + +Most developers arrive at a graph database carrying mental models built from SQL tables or JSON documents. Both are useful starting points. Neither maps directly to the query patterns that make graphs worth using. + +This tutorial uses a realistic e-commerce scenario — customers, orders, products, and reviews — to show the same data across three mental models: + +1. Relational (normalized tables) +2. Document (nested JSON) +3. Graph (labeled nodes + typed relationships) + +Then it translates five common business questions into RushDB queries so you can see exactly where the graph model pays off. + +--- + +## The scenario + +A minimal e-commerce platform has: + +- Customers who place orders +- Orders that contain line items referencing products +- Products that belong to categories +- Customers who write reviews for products + +--- + +## Three mental models for the same data + +### Relational model + +``` +customers(id, name, email) +products(id, name, category_id, price) +categories(id, name) +orders(id, customer_id, placed_at, status) +order_items(order_id, product_id, quantity, unit_price) +reviews(id, customer_id, product_id, rating, body) +``` + +In SQL you answer "which customers reviewed a product they never ordered?" with a NOT EXISTS subquery across three joins. The query is correct but the intent is buried in JOIN columns. + +### Document model + +```json +{ + "customerId": "c1", + "name": "Lena Müller", + "orders": [ + { + "orderId": "o1", + "status": "shipped", + "items": [ + { "productId": "p1", "qty": 2 } + ] + } + ], + "reviews": [ + { "productId": "p2", "rating": 5 } + ] +} +``` + +Documents are fast for loading a single customer's full history. The problem appears at the edges: "which products from the same category did this customer's network also buy?" requires post-processing across multiple documents. + +### Graph model + +The same data becomes labeled nodes connected by typed, directed relationships: + +```mermaid +graph LR + C1[CUSTOMER: Lena Müller] -->|PLACED| O1[ORDER: o1] + O1 -->|CONTAINS| P1[PRODUCT: Lens Cap] + P1 -->|IN_CATEGORY| CAT[CATEGORY: Photography] + C1 -->|WROTE_REVIEW| R1[REVIEW: ★5] + R1 -->|ABOUT| P2[PRODUCT: Camera Bag] + P2 -->|IN_CATEGORY| CAT +``` + +The graph stores **relationships as first-class data**, which is why multi-hop questions become natural instead of awkward. + +--- + +## Ingesting the dataset + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB('RUSHDB_API_KEY') + +// Categories +const [photography, audio] = await Promise.all([ + db.records.create({ label: 'CATEGORY', data: { name: 'Photography' } }), + db.records.create({ label: 'CATEGORY', data: { name: 'Audio' } }), +]) + +// Products +const [lensCap, cameraBag, headphones] = await Promise.all([ + db.records.create({ label: 'PRODUCT', data: { name: 'Lens Cap 58mm', price: 12.99 } }), + db.records.create({ label: 'PRODUCT', data: { name: 'Camera Bag Pro', price: 89.00 } }), + db.records.create({ label: 'PRODUCT', data: { name: 'Studio Headphones', price: 149.00 } }), +]) + +// Link products to categories +await Promise.all([ + db.records.attach({ source: lensCap, target: photography, options: { type: 'IN_CATEGORY' } }), + db.records.attach({ source: cameraBag, target: photography, options: { type: 'IN_CATEGORY' } }), + db.records.attach({ source: headphones, target: audio, options: { type: 'IN_CATEGORY' } }), +]) + +// Customers +const [lena, marco] = await Promise.all([ + db.records.create({ label: 'CUSTOMER', data: { name: 'Lena Müller', email: 'lena@example.com' } }), + db.records.create({ label: 'CUSTOMER', data: { name: 'Marco Rossi', email: 'marco@example.com' } }), +]) + +// Orders +const order1 = await db.records.create({ label: 'ORDER', data: { status: 'shipped', placedAt: '2025-01-10' } }) +await db.records.attach({ source: lena, target: order1, options: { type: 'PLACED' } }) +await db.records.attach({ source: order1, target: lensCap, options: { type: 'CONTAINS' } }) + +const order2 = await db.records.create({ label: 'ORDER', data: { status: 'delivered', placedAt: '2025-02-14' } }) +await db.records.attach({ source: marco, target: order2, options: { type: 'PLACED' } }) +await db.records.attach({ source: order2, target: cameraBag, options: { type: 'CONTAINS' } }) + +// Reviews +const review1 = await db.records.create({ label: 'REVIEW', data: { rating: 5, body: 'Perfect fit.' } }) +await db.records.attach({ source: lena, target: review1, options: { type: 'WROTE_REVIEW' } }) +await db.records.attach({ source: review1, target: cameraBag, options: { type: 'ABOUT' } }) +``` + + + + +```python +from rushdb import RushDB + +db = RushDB("RUSHDB_API_KEY", base_url="https://api.rushdb.com/api/v1") + +# Categories +photography = db.records.create("CATEGORY", {"name": "Photography"}) +audio = db.records.create("CATEGORY", {"name": "Audio"}) + +# Products +lens_cap = db.records.create("PRODUCT", {"name": "Lens Cap 58mm", "price": 12.99}) +camera_bag = db.records.create("PRODUCT", {"name": "Camera Bag Pro", "price": 89.00}) +headphones = db.records.create("PRODUCT", {"name": "Studio Headphones", "price": 149.00}) + +# Link products to categories +db.records.attach(lens_cap.id, photography.id, {"type": "IN_CATEGORY"}) +db.records.attach(camera_bag.id, photography.id, {"type": "IN_CATEGORY"}) +db.records.attach(headphones.id, audio.id, {"type": "IN_CATEGORY"}) + +# Customers +lena = db.records.create("CUSTOMER", {"name": "Lena Müller", "email": "lena@example.com"}) +marco = db.records.create("CUSTOMER", {"name": "Marco Rossi", "email": "marco@example.com"}) + +# Orders +order1 = db.records.create("ORDER", {"status": "shipped", "placedAt": "2025-01-10"}) +db.records.attach(lena.id, order1.id, {"type": "PLACED"}) +db.records.attach(order1.id, lens_cap.id, {"type": "CONTAINS"}) + +order2 = db.records.create("ORDER", {"status": "delivered", "placedAt": "2025-02-14"}) +db.records.attach(marco.id, order2.id, {"type": "PLACED"}) +db.records.attach(order2.id, camera_bag.id, {"type": "CONTAINS"}) + +# Reviews +review1 = db.records.create("REVIEW", {"rating": 5, "body": "Perfect fit."}) +db.records.attach(lena.id, review1.id, {"type": "WROTE_REVIEW"}) +db.records.attach(review1.id, camera_bag.id, {"type": "ABOUT"}) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +# Create category +PHOTO_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"CATEGORY","data":{"name":"Photography"}}' \ + | jq -r '.data.__id') + +# Create product +LENS_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"PRODUCT","data":{"name":"Lens Cap 58mm","price":12.99}}' \ + | jq -r '.data.__id') + +# Link product to category +curl -s -X POST "$BASE/records/$LENS_ID/relations" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{\"targets\":[\"$PHOTO_ID\"],\"options\":{\"type\":\"IN_CATEGORY\"}}" +``` + + + + +--- + +## Five business questions, translated + +### Q1: Which orders has a given customer placed? + +**Relational intuition:** `SELECT * FROM orders WHERE customer_id = 'c1'` + +**Graph query:** + + + + +```typescript +const results = await db.records.find({ + labels: ['ORDER'], + where: { + CUSTOMER: { + $alias: '$customer', + $relation: { type: 'PLACED', direction: 'in' }, + email: 'lena@example.com' + } + }, + orderBy: { placedAt: 'desc' } +}) +``` + + + + +```python +results = db.records.find({ + "labels": ["ORDER"], + "where": { + "CUSTOMER": { + "$alias": "$customer", + "$relation": {"type": "PLACED", "direction": "in"}, + "email": "lena@example.com" + } + }, + "orderBy": {"placedAt": "desc"} +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["ORDER"], + "where": { + "CUSTOMER": { + "$alias": "$customer", + "$relation": {"type": "PLACED", "direction": "in"}, + "email": "lena@example.com" + } + }, + "orderBy": {"placedAt": "desc"} + }' +``` + + + + +### Q2: Which products did a customer purchase, grouped by category? + +Three hops: CUSTOMER → ORDER → PRODUCT → CATEGORY. + + + + +```typescript +const results = await db.records.find({ + labels: ['PRODUCT'], + where: { + ORDER: { + $alias: '$order', + $relation: { type: 'CONTAINS', direction: 'in' }, + CUSTOMER: { + email: 'lena@example.com' + } + }, + CATEGORY: { + $alias: '$cat' + } + }, + aggregate: { + productName: '$record.name', + price: '$record.price', + categoryName: '$cat.name', + orderedAt: '$order.placedAt' + } +}) +``` + + + + +```python +results = db.records.find({ + "labels": ["PRODUCT"], + "where": { + "ORDER": { + "$alias": "$order", + "$relation": {"type": "CONTAINS", "direction": "in"}, + "CUSTOMER": { + "email": "lena@example.com" + } + }, + "CATEGORY": { + "$alias": "$cat" + } + }, + "aggregate": { + "productName": "$record.name", + "price": "$record.price", + "categoryName": "$cat.name", + "orderedAt": "$order.placedAt" + } +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["PRODUCT"], + "where": { + "ORDER": { + "$alias": "$order", + "$relation": {"type": "CONTAINS", "direction": "in"}, + "CUSTOMER": {"email": "lena@example.com"} + }, + "CATEGORY": {"$alias": "$cat"} + }, + "aggregate": { + "productName": "$record.name", + "categoryName": "$cat.name" + } + }' +``` + + + + +### Q3: Which products received 5-star reviews but have not been ordered yet? + + + + +```typescript +// First, find product IDs that appear in orders +const orderedResults = await db.records.find({ + labels: ['PRODUCT'], + where: { + ORDER: { $relation: { type: 'CONTAINS', direction: 'in' } } + }, + aggregate: { id: '$record.__id' } +}) + +const orderedIds = orderedResults.data.map((r: any) => r.id) + +// Then find 5-star reviewed products NOT in that set +const unorderedHighRated = await db.records.find({ + labels: ['PRODUCT'], + where: { + __id: { $nin: orderedIds }, + REVIEW: { + $relation: { type: 'ABOUT', direction: 'in' }, + rating: 5 + } + } +}) +``` + + + + +```python +# Products that appear in orders +ordered = db.records.find({ + "labels": ["PRODUCT"], + "where": { + "ORDER": {"$relation": {"type": "CONTAINS", "direction": "in"}} + }, + "aggregate": {"id": "$record.__id"} +}) +ordered_ids = [r["id"] for r in ordered.data] + +# 5-star reviewed products NOT in ordered set +unordered_high = db.records.find({ + "labels": ["PRODUCT"], + "where": { + "__id": {"$nin": ordered_ids}, + "REVIEW": { + "$relation": {"type": "ABOUT", "direction": "in"}, + "rating": 5 + } + } +}) +``` + + + + +```bash +# Step 1: collect ordered product IDs (use jq to extract) +ORDERED_IDS=$(curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"labels":["PRODUCT"],"where":{"ORDER":{"$relation":{"type":"CONTAINS","direction":"in"}}},"aggregate":{"id":"$record.__id"}}' \ + | jq '[.data[].id]') + +# Step 2: query unordered 5-star products +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{ + \"labels\": [\"PRODUCT\"], + \"where\": { + \"__id\": {\"\$nin\": $ORDERED_IDS}, + \"REVIEW\": { + \"\$relation\": {\"type\": \"ABOUT\", \"direction\": \"in\"}, + \"rating\": 5 + } + } + }" +``` + + + + +### Q4: How many orders per customer, with average order recency? + + + + +```typescript +const summary = await db.records.find({ + labels: ['CUSTOMER'], + where: { + ORDER: { + $alias: '$order', + $relation: { type: 'PLACED', direction: 'out' } + } + }, + aggregate: { + customerName: '$record.name', + orderCount: { fn: 'count', alias: '$order', unique: true }, + lastOrderDate: { fn: 'max', field: 'placedAt', alias: '$order' } + }, + groupBy: ['customerName', 'orderCount', 'lastOrderDate'], + orderBy: { orderCount: 'desc' } +}) +``` + + + + +```python +summary = db.records.find({ + "labels": ["CUSTOMER"], + "where": { + "ORDER": { + "$alias": "$order", + "$relation": {"type": "PLACED", "direction": "out"} + } + }, + "aggregate": { + "customerName": "$record.name", + "orderCount": {"fn": "count", "alias": "$order", "unique": True}, + "lastOrderDate": {"fn": "max", "field": "placedAt", "alias": "$order"} + }, + "groupBy": ["customerName", "orderCount", "lastOrderDate"], + "orderBy": {"orderCount": "desc"} +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["CUSTOMER"], + "where": { + "ORDER": { + "$alias": "$order", + "$relation": {"type": "PLACED", "direction": "out"} + } + }, + "aggregate": { + "customerName": "$record.name", + "orderCount": {"fn": "count", "alias": "$order", "unique": true}, + "lastOrderDate": {"fn": "max", "field": "placedAt", "alias": "$order"} + }, + "groupBy": ["customerName", "orderCount", "lastOrderDate"], + "orderBy": {"orderCount": "desc"} + }' +``` + + + + +### Q5: Which products in the Photography category have been both ordered and reviewed? + + + + +```typescript +const results = await db.records.find({ + labels: ['PRODUCT'], + where: { + CATEGORY: { name: 'Photography' }, + ORDER: { $relation: { type: 'CONTAINS', direction: 'in' } }, + REVIEW: { $alias: '$review', $relation: { type: 'ABOUT', direction: 'in' } } + }, + aggregate: { + productName: '$record.name', + reviewCount: { fn: 'count', alias: '$review', unique: true }, + avgRating: { fn: 'avg', field: 'rating', alias: '$review', precision: 1 } + }, + groupBy: ['productName', 'reviewCount', 'avgRating'], + orderBy: { avgRating: 'desc' } +}) +``` + + + + +```python +results = db.records.find({ + "labels": ["PRODUCT"], + "where": { + "CATEGORY": {"name": "Photography"}, + "ORDER": {"$relation": {"type": "CONTAINS", "direction": "in"}}, + "REVIEW": { + "$alias": "$review", + "$relation": {"type": "ABOUT", "direction": "in"} + } + }, + "aggregate": { + "productName": "$record.name", + "reviewCount": {"fn": "count", "alias": "$review", "unique": True}, + "avgRating": {"fn": "avg", "field": "rating", "alias": "$review", "precision": 1} + }, + "groupBy": ["productName", "reviewCount", "avgRating"], + "orderBy": {"avgRating": "desc"} +}) +``` + + + + +```bash +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{ + "labels": ["PRODUCT"], + "where": { + "CATEGORY": {"name": "Photography"}, + "ORDER": {"$relation": {"type": "CONTAINS", "direction": "in"}}, + "REVIEW": {"$alias": "$review", "$relation": {"type": "ABOUT", "direction": "in"}} + }, + "aggregate": { + "productName": "$record.name", + "reviewCount": {"fn": "count", "alias": "$review", "unique": true}, + "avgRating": {"fn": "avg", "field": "rating", "alias": "$review", "precision": 1} + }, + "groupBy": ["productName", "reviewCount", "avgRating"], + "orderBy": {"avgRating": "desc"} + }' +``` + + + + +--- + +## What changed between the mental models + +| Concern | Relational | Document | Graph | +|---|---|---|---| +| Multi-hop traversal | Multi-join SQL | Iteration across documents | Single query with nested `where` | +| Adding a new relationship type | New foreign key column or join table | Schema migration or array append | New `attach` call, zero schema changes | +| Querying along a new path | Rewrite query or add index | Rewrite aggregation logic | Extend existing `where` block | +| Aggregate along path | GROUP BY with joins | Map-reduce | Per-hop aggregate in same query | + +--- + +## Production caveat + +Relationship traversal queries become expensive when each hop fans out to thousands of related records. Before deploying traversal-heavy queries in production, scope them aggressively with `limit` on the leaf label and property filters that eliminate most candidates early. The [`SearchQuery Deep Dive`](./searchquery-advanced-patterns.mdx) tutorial covers aggregation and traversal optimization in more detail. + +--- + +## Next steps + +- [Choosing Relationship Types That Age Well](./choosing-relationship-types.mdx) — when to use generic edges versus typed relationships +- [SearchQuery Deep Dive](./searchquery-advanced-patterns.mdx) — aggregation, collect, and groupBy patterns +- [RushDB as a Memory Layer](./memory-layer.mdx) — using the same graph primitives for agent memory diff --git a/docs/docs/tutorials/versioning-records.mdx b/docs/docs/tutorials/versioning-records.mdx new file mode 100644 index 00000000..b5ecfbd3 --- /dev/null +++ b/docs/docs/tutorials/versioning-records.mdx @@ -0,0 +1,385 @@ +--- +sidebar_position: 25 +title: "Versioning Records Without Losing Queryability" +description: Compare in-place mutation, append-only versions, and hybrid versioning approaches — and how to query latest state while preserving historical analysis. +tags: [Lineage, Versioning, Transactions, TypeScript, Python] +--- + +import Tabs from '@site/src/components/LanguageTabs'; +import TabItem from '@theme/TabItem'; + +# Versioning Records Without Losing Queryability + +Any mutable record in your system has a versioning question: what do you do when it changes and you still need to answer questions about the past? + +Three approaches work in RushDB: + +1. **In-place mutation** — update the record; accept that history is lost +2. **Append-only versions** — create a new VERSION record on every change; link with `CURRENT_VERSION` and `PREVIOUS_VERSION` +3. **Hybrid** — in-place mutation for queryable mutable state plus an append-only EVENT log for history + +This tutorial shows all three and explains when to use each. + +--- + +## Approach 1: In-place mutation (PATCH) + +`db.records.update` sends a `PATCH` — it merges your new fields over the existing record. This is the simplest approach and the one to default to when history is not required. + + + + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB(process.env.RUSHDB_API_KEY!) + +// Create a document +const doc = await db.records.create({ + label: 'DOCUMENT', + data: { + title: 'System Design Guide', + body: 'Initial draft content.', + version: 1, + updatedAt: new Date().toISOString() + } +}) + +// Update in place — history is overwritten +await db.records.update(doc.__id, { + body: 'Revised content with better examples.', + version: 2, + updatedAt: new Date().toISOString() +}) + +// Full replace — use set() for PUT semantics +await db.records.set(doc.__id, { + title: 'System Design Guide v2', + body: 'Complete rewrite.', + version: 3, + updatedAt: new Date().toISOString() +}) +``` + + + + +```python +from rushdb import RushDB +import os +from datetime import datetime, timezone + +db = RushDB(os.environ["RUSHDB_API_KEY"], base_url="https://api.rushdb.com/api/v1") + +doc = db.records.create("DOCUMENT", { + "title": "System Design Guide", + "body": "Initial draft content.", + "version": 1, + "updatedAt": datetime.now(timezone.utc).isoformat() +}) + +# Partial update (PATCH) +db.records.update(doc.id, { + "body": "Revised content with better examples.", + "version": 2, + "updatedAt": datetime.now(timezone.utc).isoformat() +}) + +# Full replace (PUT) +db.records.set(doc.id, { + "title": "System Design Guide v2", + "body": "Complete rewrite.", + "version": 3, + "updatedAt": datetime.now(timezone.utc).isoformat() +}) +``` + + + + +```bash +BASE="https://api.rushdb.com/api/v1" +TOKEN="RUSHDB_API_KEY" +H='Content-Type: application/json' + +DOC_ID=$(curl -s -X POST "$BASE/records" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"label":"DOCUMENT","data":{"title":"System Design Guide","body":"Initial draft.","version":1}}' \ + | jq -r '.data.__id') + +# Partial update (PATCH) +curl -s -X PATCH "$BASE/records/$DOC_ID" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"body":"Revised content.","version":2}' + +# Full set (PUT) +curl -s -X PUT "$BASE/records/$DOC_ID" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d '{"title":"System Design Guide v2","body":"Complete rewrite.","version":3}' +``` + + + + +**When to use:** simple records with no history requirement — settings, profiles, catalog items. + +--- + +## Approach 2: Append-only versions (VERSION chain) + +Every change creates a new VERSION record. A `CURRENT_VERSION` edge from the root DOCUMENT always points to the latest version. `SUPERSEDED_BY` links the chain. + +```mermaid +graph LR + DOC[DOCUMENT] -->|CURRENT_VERSION| V3[VERSION v3] + V3 -->|SUPERSEDED_BY| V2[VERSION v2] + V2 -->|SUPERSEDED_BY| V1[VERSION v1] +``` + + + + +```typescript +async function createDocumentWithVersion( + title: string, + body: string, + authorId: string +) { + const tx = await db.tx.begin() + try { + const doc = await db.records.create( + { label: 'DOCUMENT', data: { title, authorId } }, + tx + ) + + const v1 = await db.records.create( + { + label: 'VERSION', + data: { + versionNumber: 1, + body, + authorId, + createdAt: new Date().toISOString(), + isCurrent: true + } + }, + tx + ) + + await db.records.attach( + { source: doc, target: v1, options: { type: 'CURRENT_VERSION', direction: 'out' } }, + tx + ) + + await db.tx.commit(tx) + return { doc, version: v1 } + } catch (err) { + await db.tx.rollback(tx) + throw err + } +} + +async function addVersion(docId: string, newBody: string, authorId: string) { + // Get current version + const currentResult = await db.records.find({ + labels: ['VERSION'], + where: { + DOCUMENT: { + $relation: { type: 'CURRENT_VERSION', direction: 'in' }, + __id: docId + }, + isCurrent: true + } + }) + const current = currentResult.data[0] + + const tx = await db.tx.begin() + try { + // Demote old current + await db.records.update(current.__id, { isCurrent: false }, tx) + + // Create new version + const docResult = await db.records.find({ labels: ['DOCUMENT'], where: { __id: docId } }) + const newVersion = await db.records.create( + { + label: 'VERSION', + data: { + versionNumber: (current.versionNumber as number) + 1, + body: newBody, + authorId, + createdAt: new Date().toISOString(), + isCurrent: true + } + }, + tx + ) + + // Move CURRENT_VERSION edge + await db.records.detach( + { + source: docResult.data[0], + target: current, + options: { type: 'CURRENT_VERSION' } + }, + tx + ) + await db.records.attach( + { source: docResult.data[0], target: newVersion, options: { type: 'CURRENT_VERSION', direction: 'out' } }, + tx + ) + + // Chain to previous + await db.records.attach( + { source: newVersion, target: current, options: { type: 'SUPERSEDED_BY', direction: 'out' } }, + tx + ) + + await db.tx.commit(tx) + return newVersion + } catch (err) { + await db.tx.rollback(tx) + throw err + } +} + +const { doc } = await createDocumentWithVersion( + 'Architecture Overview', + 'First draft.', + 'user-5' +) + +const v2 = await addVersion(doc.__id, 'Improved with diagrams.', 'user-5') +``` + + + + +```python +def create_document_with_version(title: str, body: str, author_id: str): + tx = db.transactions.begin() + try: + doc = db.records.create("DOCUMENT", {"title": title, "authorId": author_id}, transaction=tx) + v1 = db.records.create("VERSION", { + "versionNumber": 1, + "body": body, + "authorId": author_id, + "createdAt": datetime.now(timezone.utc).isoformat(), + "isCurrent": True + }, transaction=tx) + db.records.attach(doc.id, v1.id, {"type": "CURRENT_VERSION", "direction": "out"}, transaction=tx) + db.transactions.commit(tx) + return doc, v1 + except Exception: + db.transactions.rollback(tx) + raise + + +def add_version(doc_id: str, new_body: str, author_id: str): + current_result = db.records.find({ + "labels": ["VERSION"], + "where": { + "DOCUMENT": { + "$relation": {"type": "CURRENT_VERSION", "direction": "in"}, + "__id": doc_id + }, + "isCurrent": True + } + }) + current = current_result.data[0] + + tx = db.transactions.begin() + try: + db.records.update(current.id, {"isCurrent": False}, transaction=tx) + new_version = db.records.create("VERSION", { + "versionNumber": current.data["versionNumber"] + 1, + "body": new_body, + "authorId": author_id, + "createdAt": datetime.now(timezone.utc).isoformat(), + "isCurrent": True + }, transaction=tx) + + doc_result = db.records.find({"labels": ["DOCUMENT"], "where": {"__id": doc_id}}) + db.records.detach(doc_result.data[0].id, current.id, {"type": "CURRENT_VERSION"}, transaction=tx) + db.records.attach(doc_result.data[0].id, new_version.id, {"type": "CURRENT_VERSION", "direction": "out"}, transaction=tx) + db.records.attach(new_version.id, current.id, {"type": "SUPERSEDED_BY", "direction": "out"}, transaction=tx) + + db.transactions.commit(tx) + return new_version + except Exception: + db.transactions.rollback(tx) + raise +``` + + + + +```bash +# Query current version of a document +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{ + \"labels\": [\"VERSION\"], + \"where\": { + \"DOCUMENT\": { + \"\$relation\": {\"type\": \"CURRENT_VERSION\", \"direction\": \"in\"}, + \"__id\": \"$DOC_ID\" + }, + \"isCurrent\": true + } + }" + +# Query full version history (ordered by version number) +curl -s -X POST "$BASE/records/search" \ + -H "$H" -H "Authorization: Bearer $TOKEN" \ + -d "{ + \"labels\": [\"VERSION\"], + \"where\": { + \"DOCUMENT\": { + \"\$relation\": {\"type\": \"CURRENT_VERSION\", \"direction\": \"in\"}, + \"__id\": \"$DOC_ID\" + } + }, + \"orderBy\": {\"versionNumber\": \"desc\"} + }" +``` + + + + +**When to use:** documents, configurations, contracts, or any record where historical content must be retrievable and comparable. + +--- + +## Approach 3: Hybrid (mutable state + immutable events) + +Keep the entity's current state queryable in one record while logging all changes as immutable EVENT records. This is the pattern from [Audit Trails](./audit-trails.mdx) and [Temporal Graphs](./temporal-graphs.mdx). Choose this when: + +- You need efficient current-state queries (no traversal to find latest version) +- You need a history log (who changed what, when) +- You do not need to serve the full historical content on demand + +--- + +## Choosing the right approach + +| Requirement | Best approach | +|---|---| +| No history needed | In-place mutation (PATCH/PUT) | +| Full historical content retrieval | Append-only VERSION chain | +| Audit log only (who / when / what changed) | Hybrid: mutable state + EVENT log | +| Point-in-time query (what was the state at T) | Append-only VERSION or Temporal STATE chain | +| High write throughput | In-place mutation | + +--- + +## Production caveat + +Append-only VERSION chains grow linearly with edit frequency. If documents are edited frequently (wiki-style), consider capping the chain at N versions and archiving older ones to a separate project or record-level storage. Always benchmark query performance against version count: traversals over a 500-version chain behave differently than traversals over a 10-version chain. + +--- + +## Next steps + +- [Audit Trails](./audit-trails.mdx) — immutable event log alongside mutable state +- [Temporal Graphs](./temporal-graphs.mdx) — point-in-time reconstruction +- [Compliance and Retention Patterns](./compliance-retention.mdx) — archival and deletion diff --git a/docs/docs/typescript-sdk/ai/_category_.json b/docs/docs/typescript-sdk/ai/_category_.json new file mode 100644 index 00000000..0fbfdffd --- /dev/null +++ b/docs/docs/typescript-sdk/ai/_category_.json @@ -0,0 +1,10 @@ +{ + "label": "AI & Vectors", + "position": 1, + "collapsed": false, + "collapsible": true, + "link": { + "type": "doc", + "id": "typescript-sdk/ai/overview" + } +} diff --git a/docs/docs/typescript-sdk/ai/advanced-indexing.md b/docs/docs/typescript-sdk/ai/advanced-indexing.md new file mode 100644 index 00000000..514d5eae --- /dev/null +++ b/docs/docs/typescript-sdk/ai/advanced-indexing.md @@ -0,0 +1,245 @@ +--- +sidebar_position: 2 +title: Advanced Indexing — BYOV +--- + +# Advanced Indexing — Bring Your Own Vectors + +**External indexes** (BYOV — Bring Your Own Vectors) let you supply pre-computed embedding vectors instead of having the server compute them. Use them when you need: + +- A custom or private model the server cannot access +- Multimodal embeddings (image, audio, document structure) +- Vectors already produced by your ML pipeline +- Reproducible embeddings not tied to the server's active model + +--- + +## Creating an external index + +Pass `external: true` (shorthand) **or** `sourceType: 'external'` (explicit). Both are equivalent: + +```typescript +// ── shorthand ──────────────────────────────────────────────── +const { data: extIndex } = await db.ai.indexes.create({ + label: 'Article', + propertyName: 'body', + external: true, + dimensions: 768, + similarityFunction: 'cosine', +}) +// extIndex.sourceType === 'external' +// extIndex.status === 'awaiting_vectors' + +// ── explicit ───────────────────────────────────────────────── +const { data: extIndex } = await db.ai.indexes.create({ + label: 'Article', + propertyName: 'body', + sourceType: 'external', + dimensions: 768, + similarityFunction: 'cosine', +}) +``` + +An external index starts with status `awaiting_vectors` and transitions to `ready` once at least one vector has been written. + +> Because the server never calls an embedding model , `dimensions` is **required** for external indexes. + +### External vs managed comparison + +| | Managed | External | +|---|---|---| +| `sourceType` | `'managed'` | `'external'` | +| Initial status | `'pending'` | `'awaiting_vectors'` | +| Who computes embeddings | RushDB server (via configured model) | Your application | +| `dimensions` | Optional (uses server default) | **Required** | +| Backfill for existing records | Automatic | Manual via `upsertVectors` or inline writes | + +--- + +## Pushing vectors with `upsertVectors` + +`db.ai.indexes.upsertVectors()` is the bulk upload API — ideal for seeding an index from a dataset or syncing after a batch pipeline. + +```typescript +db.ai.indexes.upsertVectors( + indexId: string, + payload: { items: Array<{ recordId: string; vector: number[] }> } +): Promise> +``` + +```typescript +const { data: records } = await db.records.find( + { where: { __label: 'Article' } } +) + +const myEmbedder = new MyEmbeddingModel() +const items = await Promise.all( + records.map(async record => ({ + recordId: record.__id, + vector: await myEmbedder.embed(record.body) + })) +) + +await db.ai.indexes.upsertVectors(extIndex.id, { items }) +``` + +The request is idempotent — calling it again with the same `recordId` **replaces** the stored vector. + +--- + +## Writing vectors at record creation time + +Instead of a two-step create → upsertVectors flow, you can write vectors inline using the `vectors` parameter on any write operation. The server resolves the correct external index automatically. + +See [Write Operations with Vectors](./write-with-vectors.md) for the full reference. + +```typescript +// One-step: create record AND write its vector +const { data: record } = await db.records.create({ + label: 'Article', + data: { title: 'Warp drives', body: 'Alcubierre metric...' }, + vectors: [{ propertyName: 'body', vector: myVec }] +}) +``` + +--- + +## Disambiguation {#disambiguation} + +When the same `(label, propertyName)` pair is covered by more than one external index (different `similarityFunction` or `dimensions`), RushDB cannot determine which index to use without extra information. + +Specify `similarityFunction` to resolve the ambiguity: + +```typescript +// Two indexes on Product:embedding — cosine and euclidean +await db.ai.indexes.create({ + label: 'Product', propertyName: 'embedding', external: true, + similarityFunction: 'cosine', dimensions: 768, +}) +await db.ai.indexes.create({ + label: 'Product', propertyName: 'embedding', external: true, + similarityFunction: 'euclidean', dimensions: 768, +}) + +// ✅ explicit — writes to the cosine index only +await db.records.create({ + label: 'Product', + data: { name: 'Widget' }, + vectors: [{ + propertyName: 'embedding', + vector: vec, + similarityFunction: 'cosine', // <-- required when ambiguous + }] +}) + +// ✅ explicit — searches the euclidean index only +await db.ai.search({ + label: 'Product', + propertyName: 'embedding', + queryVector: vec, + similarityFunction: 'euclidean', // <-- required when ambiguous +}) + +// ❌ omitting similarityFunction when two indexes exist → 422 Unprocessable Entity +await db.records.create({ + label: 'Product', + data: { name: 'Gadget' }, + vectors: [{ propertyName: 'embedding', vector: vec }], +}) +``` + +### Index signature uniqueness + +Two index policies are considered **identical** (and a second `create` returns `409 Conflict`) when all five fields match: + +| Field | Effect on uniqueness | +|---|---| +| `label` | ✅ | +| `propertyName` | ✅ | +| `sourceType` | ✅ | +| `similarityFunction` | ✅ | +| `dimensions` | ✅ | + +Changing any one field produces a distinct index and both are allowed to coexist. + +--- + +## Complete BYOV worked example + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB('your-api-key') + +// 1. Create the external index +const { data: idx } = await db.ai.indexes.create({ + label: 'Doc', + propertyName: 'content', + external: true, + dimensions: 3, + similarityFunction: 'cosine', +}) + +// 2. Create records + write inline vectors (one round trip per record) +const articles = [ + { title: 'Alpha', content: 'First article', vector: [1, 0, 0] }, + { title: 'Beta', content: 'Second article', vector: [0, 1, 0] }, + { title: 'Gamma', content: 'Third article', vector: [0, 0, 1] }, +] + +for (const { title, content, vector } of articles) { + await db.records.create({ + label: 'Doc', + data: { title, content }, + vectors: [{ propertyName: 'content', vector }], + }) +} + +// 3. Search using a pre-computed query vector +const { data: results } = await db.ai.search({ + label: 'Doc', + propertyName: 'content', + queryVector: [1, 0, 0], // closest to Alpha + limit: 3, +}) + +console.log(results[0].title) // 'Alpha' +console.log(results[0].__score) // ~1.0 +``` + +--- + +## Batch import with `$vectors` + +For bulk seeding, use `records.importJson()` with a `$vectors` key on each object: + +```typescript +await db.records.importJson({ + "Doc": [ + { title: "Alpha", content: "First article", "$vectors": [{ propertyName: "content", vector: [1, 0, 0] }] }, + { title: "Beta", content: "Second article", "$vectors": [{ propertyName: "content", vector: [0, 1, 0] }] }, + { title: "Gamma", content: "Third article", "$vectors": [{ propertyName: "content", vector: [0, 0, 1] }] }, + ] +}) +``` + +`$vectors` entries are **stripped** from the stored record data — they only drive the vector write and do **not** appear as child records or extra properties. + +--- + +## Mixing managed and external indexes + +You can have both a managed index and an external index on the same property simultaneously: + +```typescript +// Managed — server embeds for full-text search +await db.ai.indexes.create({ label: 'Product', propertyName: 'description' }) + +// External — your custom multimodal model +await db.ai.indexes.create({ + label: 'Product', propertyName: 'description', + external: true, dimensions: 512, similarityFunction: 'cosine', +}) +``` + +Specifying `similarityFunction` in `db.ai.search()` routes the query to the intended index. diff --git a/docs/docs/typescript-sdk/ai/indexing.md b/docs/docs/typescript-sdk/ai/indexing.md new file mode 100644 index 00000000..ec07eb12 --- /dev/null +++ b/docs/docs/typescript-sdk/ai/indexing.md @@ -0,0 +1,219 @@ +--- +sidebar_position: 1 +title: Embedding Indexes +--- + +# Embedding Indexes + +An **embedding index** is a policy that tells RushDB to vectorize a specific string property for a label. Once `status` is `ready`, every record matching that label+property pair is searchable via `db.ai.search()`. + +--- + +## How indexes work + +Indexes are scoped to `(label, propertyName)`. "Book:description" and "Article:description" are completely independent — they maintain separate vector stores and never interfere. + +``` +Index policy + label: "Book" + propertyName: "description" + sourceType: "managed" + dimensions: 1536 + status: "ready" + +↓ backfill runs automatically + +Book records get vectors stored on their VALUE relationships: + rel._emb_managed_cosine_1536 = [0.1, 0.2, ...] +``` + +When new records are created or existing records are updated, the index transitions back to `pending` and vectors are recomputed on the next backfill cycle. + +--- + +## `db.ai.indexes.find()` + +List all embedding index policies for the current project. + +```typescript +const { data: indexes } = await db.ai.indexes.find() +/* +[ + { + id: "01jb...", + label: "Book", + propertyName: "description", + sourceType: "managed", + similarityFunction: "cosine", + dimensions: 1536, + status: "ready", + modelKey: "text-embedding-3-small", + ... + } +] +*/ +``` + +--- + +## `db.ai.indexes.create()` + +Create a new managed embedding index for a string property. + +```typescript +db.ai.indexes.create(params: { + label: string + propertyName: string + sourceType?: 'managed' | 'external' + similarityFunction?: 'cosine' | 'euclidean' // default: 'cosine' + dimensions?: number // default: server RUSHDB_EMBEDDING_DIMENSIONS +}): Promise> +``` + +```typescript +// Simplest form — uses server-configured model and dimensions +const { data: index } = await db.ai.indexes.create({ + label: 'Book', + propertyName: 'description' +}) + +console.log(index.status) // 'pending' → backfill starts immediately +``` + +```typescript +// With explicit parameters +const { data: index } = await db.ai.indexes.create({ + label: 'Article', + propertyName: 'body', + similarityFunction: 'cosine', + dimensions: 1536 +}) +``` + +> Attempting to create a duplicate `(label, propertyName, sourceType, similarityFunction, dimensions)` tuple returns `409 Conflict`. + +### Index lifecycle + +| Status | Description | +|---|---| +| `pending` | Policy created, waiting for backfill scheduler | +| `indexing` | Backfill in progress | +| `awaiting_vectors` | External index — waiting for client to push vectors | +| `ready` | All existing records have vectors, search is available | +| `error` | Backfill failed; check server logs for the cause | + +--- + +## `db.ai.indexes.stats(id)` + +Returns the fill rate for an index — useful for progress monitoring or health checks. + +```typescript +db.ai.indexes.stats(id: string): Promise> +``` + +```typescript +const { data: stats } = await db.ai.indexes.stats(index.id) +console.log(`${stats.indexedRecords} / ${stats.totalRecords} records indexed`) +``` + +```typescript +type EmbeddingIndexStats = { + totalRecords: number + indexedRecords: number +} +``` + +--- + +## `db.ai.indexes.delete(id)` + +Remove an embedding index policy and its scoped vector data. + +```typescript +await db.ai.indexes.delete(index.id) +``` + +The underlying Neo4j DDL vector index is only dropped when **zero embeddings remain** across the entire project. This avoids unnecessary index rebuilds when multiple policies share the same `(dimensions, similarityFunction)` combination. + +--- + +## Response type + +```typescript +type EmbeddingIndex = { + id: string + projectId: string + /** Neo4j label this index is scoped to (e.g. "Book"). */ + label: string + propertyName: string + modelKey: string + sourceType: 'managed' | 'external' + similarityFunction: 'cosine' | 'euclidean' + dimensions: number + vectorPropertyName: string // internal Neo4j property name for the vector + enabled: boolean + status: string + createdAt: string + updatedAt: string +} +``` + +--- + +## Waiting for an index to become ready + +For managed indexes, backfill runs asynchronously. Poll `db.ai.indexes.find()` until status is `ready`: + +```typescript +async function waitForIndexReady( + db: RushDB, + indexId: string, + timeoutMs = 90_000 +): Promise { + const deadline = Date.now() + timeoutMs + while (Date.now() < deadline) { + const { data: indexes } = await db.ai.indexes.find() + const idx = indexes.find(i => i.id === indexId) + if (idx?.status === 'ready') return + if (idx?.status === 'error') throw new Error('Index entered error state') + await new Promise(r => setTimeout(r, 3_000)) + } + throw new Error('Index did not become ready in time') +} + +const { data: index } = await db.ai.indexes.create({ label: 'Book', propertyName: 'description' }) +await waitForIndexReady(db, index.id) +// now safe to call db.ai.search(...) +``` + +--- + +## Multiple indexes on the same property + +You can have more than one index per `(label, propertyName)` pair, provided the signature differs: + +```typescript +// Same label + property, different similarity function +await db.ai.indexes.create({ + label: 'Product', + propertyName: 'description', + similarityFunction: 'cosine', + dimensions: 768 +}) + +await db.ai.indexes.create({ + label: 'Product', + propertyName: 'description', + similarityFunction: 'euclidean', + dimensions: 768 +}) +``` + +When performing a search or writing inline vectors against a property with multiple indexes, specify `similarityFunction` to disambiguate. See [Advanced Indexing — BYOV](./advanced-indexing.md#disambiguation) for details. + +--- + +## `List` properties + +String array properties are supported. Each item in the array is embedded individually, then mean-pooled into a single vector stored on the relationship. diff --git a/docs/docs/typescript-sdk/ai/overview.md b/docs/docs/typescript-sdk/ai/overview.md new file mode 100644 index 00000000..80406562 --- /dev/null +++ b/docs/docs/typescript-sdk/ai/overview.md @@ -0,0 +1,188 @@ +--- +sidebar_position: 0 +title: Overview +--- + +# AI & Semantic Search + +RushDB is a **self-aware memory layer for agents, humans, and apps**. It continuously understands its own structure — labels, fields, value distributions, relationships — and exposes that knowledge so that agents can reason over real data without hallucinating schema details, and apps can retrieve semantically relevant context on demand. + +The `db.ai` namespace covers three capabilities: + +| Capability | Description | +|---|---| +| **Graph Ontology** | Self-describing schema discovery: label names, field types, value ranges, and the relationship map — always up to date | +| **Embedding Indexes** | Per-label vector policies that turn string properties into long-term semantic memory | +| **Semantic Search** | Cosine/euclidean similarity retrieval over indexed properties, for agents and apps alike | + +--- + +## How it fits together + +``` +┌─────────────────────────────────────────────────────┐ +│ Your data (records + relationships) │ +│ │ +│ BOOK { title: "...", description: "..." } │ +└────────────────────┬────────────────────────────────┘ + │ + db.ai.indexes.create() + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ Embedding index policy │ +│ label: BOOK property: description dims: 1536 │ +│ sourceType: managed | external │ +└────────────────────┬────────────────────────────────┘ + │ + Backfill (managed) / inline vectors (external) + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ Vector stored on VALUE relationship │ +│ rel._emb_managed_cosine_1536 = [0.1, 0.2, ...] │ +└────────────────────┬────────────────────────────────┘ + │ + db.ai.search({ query / queryVector }) + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ SemanticSearchResult[] — records ranked by score │ +│ result.__score = 0.94 (cosine similarity) │ +└─────────────────────────────────────────────────────┘ +``` + +--- + +## Quick links + +| Topic | Description | +|---|---| +| [Ontology](./overview.md#graph-ontology) | Schema discovery with `getOntology` / `getOntologyMarkdown` | +| [Indexing](./indexing.md) | Create and manage managed embedding indexes | +| [Advanced indexing — BYOV](./advanced-indexing.md) | Bring Your Own Vectors: external indexes, inline writes | +| [Semantic search](./search.md) | Query by meaning with `db.ai.search()` | +| [Writing with vectors](./write-with-vectors.md) | Attach vectors at create / upsert / importJson time | + +--- + +## Graph Ontology + +The ontology endpoints expose a live snapshot of your database structure — without any manual schema definitions. + +### `db.ai.getOntologyMarkdown()` + +Returns the full schema as compact Markdown — the **recommended format for LLM context injection**. + +```typescript +db.ai.getOntologyMarkdown( + params?: { labels?: string[] }, + transaction?: Transaction | string +): Promise> +``` + +```typescript +// Inject into LLM at session start +const { data: schema } = await db.ai.getOntologyMarkdown() +const messages = [ + { role: 'system', content: `You are a data assistant.\n\n${schema}` }, + { role: 'user', content: 'How many paid orders are there?' } +] + +// Scope to specific labels +const { data: orderSchema } = await db.ai.getOntologyMarkdown({ labels: ['Order'] }) +``` + +
+Example output + +```text +# Graph Ontology + +## Labels + +| Label | Count | +|-----------|------:| +| `Order` | 1840 | +| `User` | 312 | +| `Product` | 95 | + +--- + +## `Order` (1840 records) + +### Properties + +| Property | Type | Values / Range | +|-------------|----------|------------------------------------------| +| `status` | string | `pending`, `paid`, `shipped` (+2 more) | +| `total` | number | `4.99`..`2499.00` | +| `createdAt` | datetime | `2024-01-03`..`2026-02-27` | + +### Relationships + +| Type | Direction | Other Label | +|-------------|-----------|-------------| +| `PLACED_BY` | out | `User` | +| `CONTAINS` | out | `Product` | +``` + +
+ +--- + +### `db.ai.getOntology()` + +Returns the same ontology as a structured JSON array — useful for schema UIs, auto-complete, or looking up property IDs for `db.properties.values()`. + +```typescript +db.ai.getOntology( + params?: { labels?: string[] }, + transaction?: Transaction | string +): Promise> +``` + +```typescript +// List all labels with counts +const { data: ontology } = await db.ai.getOntology() +for (const item of ontology) { + console.log(`${item.label}: ${item.count} records`) +} + +// Get property ID for value enumeration +const { data: [bookSchema] } = await db.ai.getOntology({ labels: ['Book'] }) +const genreProp = bookSchema.properties.find(p => p.name === 'genre') +const { data: genres } = await db.properties.values({ id: genreProp.id }) +``` + +```typescript +type OntologyItem = { + label: string + count: number + properties: OntologyProperty[] + relationships: OntologyRelationship[] +} + +type OntologyProperty = { + id: string // use with db.properties.values() + name: string + type: string // 'string' | 'number' | 'boolean' | 'datetime' + values?: Array // up to 10 samples (string/boolean only) + min?: number | string // number/datetime only + max?: number | string +} + +type OntologyRelationship = { + label: string + type: string + direction: 'in' | 'out' +} +``` + +:::note Caching +Both methods share a **1-hour cache** per project. The first call after TTL expiry triggers a full graph scan; all subsequent calls within the hour are instant. +::: + +:::tip Agent quickstart +Call `db.ai.getOntologyMarkdown()` first in every AI session. Without it, models will hallucinate field and label names. +::: diff --git a/docs/docs/typescript-sdk/ai/search.md b/docs/docs/typescript-sdk/ai/search.md new file mode 100644 index 00000000..d2821461 --- /dev/null +++ b/docs/docs/typescript-sdk/ai/search.md @@ -0,0 +1,198 @@ +--- +sidebar_position: 3 +title: Semantic Search +--- + +# Semantic Search + +`db.ai.search()` performs semantic vector search across records that have an associated embedding index. + +--- + +## Signature + +```typescript +db.ai.search(params: { + /** Neo4j label to search within (e.g. "Book"). */ + label: string | string[] + + /** Property the target embedding index is scoped to. */ + propertyName: string + + /** Natural-language query — used by managed indexes. */ + query?: string + + /** Pre-computed query vector — used by external indexes. */ + queryVector?: number[] + + /** Override the similarity function when multiple indexes match. */ + similarityFunction?: 'cosine' | 'euclidean' + + /** Override dimensions when multiple indexes match. */ + dimensions?: number + + /** Prefilter: only return records that also satisfy this where clause. */ + where?: WhereClause + + /** Maximum results to return. */ + limit?: number + + /** Results to skip (for pagination). */ + skip?: number +}): Promise> +``` + +--- + +## Result type + +```typescript +type SemanticSearchResult = { + /** Similarity score: higher is better. */ + __score: number + /** Record ID. */ + __id: string + /** Record label. */ + __label: string + /** All record properties are returned at the top level. */ + [key: string]: unknown +} +``` + +Results are always ordered by `__score` descending — closest match first. + +--- + +## Managed search (query text) + +For a **managed** index, pass `query` (a natural-language string). The server embeds it using the same model that was used when building the index, then ranks the prefiltered candidates by similarity. + +```typescript +const { data: results } = await db.ai.search({ + label: 'Book', + propertyName: 'description', + query: 'space exploration and interstellar travel', + limit: 5, +}) + +results.forEach(r => { + console.log(`[${r.__score.toFixed(4)}] ${r.title}`) +}) +``` + +--- + +## External search (query vector) + +For an **external** index, pass `queryVector` — a pre-computed embedding produced by your own model. No text is sent to the server. + +```typescript +const myEmbedder = new MyEmbeddingModel() +const vec = await myEmbedder.embed('space exploration') + +const { data: results } = await db.ai.search({ + label: 'Article', + propertyName: 'body', + queryVector: vec, + limit: 10, +}) +``` + +- `query` is **not allowed** with external indexes — the server has no model to embed it. +- `queryVector` is **not required** for managed indexes but is accepted (bypasses server embedding). + +### Dimension inference + +When `queryVector` is supplied you can omit `dimensions` — the server infers it from `queryVector.length`: + +```typescript +// dimensions is optional when queryVector is given +await db.ai.search({ + label: 'Product', + propertyName: 'embedding', + queryVector: [0.1, 0.9, 0.4], // length 3 → dimensions inferred as 3 +}) +``` + +--- + +## Filtering with `where` + +The `where` clause acts as a **prefilter** — only records that satisfy the filter are candidates for similarity ranking. RushDB already scopes search to the current project, and `where` adds your application-level constraints before scoring. + +```typescript +const { data: results } = await db.ai.search({ + label: 'Product', + propertyName: 'description', + query: 'wireless headphones', + where: { + category: { $eq: 'electronics' }, + inStock: { $eq: true }, + price: { $lt: 100 }, + }, + limit: 20, +}) +``` + +All `WhereClause` operators supported by `db.records.find()` are available here. + +--- + +## Multi-label search + +Pass an array of labels to search across multiple entity types simultaneously: + +```typescript +const { data: results } = await db.ai.search({ + label: ['Article', 'Post', 'Comment'], + propertyName: 'body', + query: 'machine learning trends', + limit: 10, +}) + +// Each result carries __label so you can tell them apart +results.forEach(r => console.log(r.__label, r.__score, r.title ?? r.text)) +``` + +All listed labels must have an embedding index on the same `propertyName`, or the request will return `404` for the missing labels. + +--- + +## Disambiguation + +When two indexes exist for the same `(label, propertyName)`, you must specify `similarityFunction` (and optionally `dimensions`) to select the target index: + +```typescript +// Two indexes: Product:embedding/cosine and Product:embedding/euclidean +await db.ai.search({ + label: 'Product', + propertyName: 'embedding', + queryVector: vec, + similarityFunction: 'cosine', // required — otherwise 422 Unprocessable Entity +}) +``` + +--- + +## Pagination + +```typescript +const PAGE = 20 + +// Page 1 +const { data: page1 } = await db.ai.search({ ..., limit: PAGE, skip: 0 }) +// Page 2 +const { data: page2 } = await db.ai.search({ ..., limit: PAGE, skip: PAGE }) +``` + +--- + +## Error reference + +| HTTP | Cause | +|---|---| +| `404 Not Found` | No enabled embedding index found for `(label, propertyName)` | +| `422 Unprocessable Entity` | Multiple indexes match and `similarityFunction` was not specified | +| `422 Unprocessable Entity` | `query` text supplied for an external index (server cannot embed it) | +| `422 Unprocessable Entity` | Vector length does not match index `dimensions` | +| `503 Service Unavailable` | Embedding model unavailable (managed indexes only) | diff --git a/docs/docs/typescript-sdk/ai/write-with-vectors.md b/docs/docs/typescript-sdk/ai/write-with-vectors.md new file mode 100644 index 00000000..5e0ac532 --- /dev/null +++ b/docs/docs/typescript-sdk/ai/write-with-vectors.md @@ -0,0 +1,344 @@ +--- +sidebar_position: 4 +title: Writing Records with Vectors +--- + +# Writing Records with Vectors + +RushDB lets you attach pre-computed embedding vectors to records **at write time**, eliminating the need for a separate `upsertVectors` call. Any operation that creates or modifies records supports this through the `vectors` parameter (or the `$vectors` key in batch imports). + +This feature requires at least one [external index](./advanced-indexing.md) to exist for the target `(label, propertyName)`. + +--- + +## `vectors` parameter + +All write operations accept a `vectors` array: + +```typescript +type VectorEntry = { + /** Property name this vector is associated with. */ + propertyName: string + /** Pre-computed embedding vector. */ + vector: number[] + /** Required when multiple indexes exist on the same property. */ + similarityFunction?: 'cosine' | 'euclidean' +} +``` + +--- + +## `records.create()` with vectors + +```typescript +const { data: record } = await db.records.create({ + label: 'Article', + data: { + title: 'How transformers work', + body: 'Attention is all you need ...', + }, + vectors: [ + { propertyName: 'body', vector: myEmbed('Attention is all you need ...') } + ], +}) + +console.log(record.__id) // record is created AND vector is written atomically +``` + +--- + +## `records.upsert()` with vectors + +`upsert` is idempotent on the record's slug (natural key). Passing `vectors` writes (or replaces) the stored vector for each `propertyName` in the same call: + +```typescript +// First call — creates the record + writes vector +const { data: r1 } = await db.records.upsert({ + label: 'Article', + data: { slug: 'transformers-101', title: 'Transformers 101', body: '...' }, + vectors: [{ propertyName: 'body', vector: v1 }], +}) + +// Second call — same slug → updates the title/body + replaces the vector +const { data: r2 } = await db.records.upsert({ + label: 'Article', + data: { slug: 'transformers-101', title: 'Transformers 101 (revised)', body: 'Updated ...' }, + vectors: [{ propertyName: 'body', vector: v2 }], +}) + +console.log(r1.__id === r2.__id) // true — same record +``` + +--- + +## `records.set()` with vectors + +`set` replaces all properties of a record with new values. Including `vectors` writes those vectors at the same time: + +```typescript +// Find or create the record first +const { data: rec } = await db.records.create({ + label: 'Product', + data: { name: 'Widget', price: 9.99 }, +}) + +// Full replace — data AND vector updated together +await db.records.set(rec.__id, { + data: { name: 'Widget Pro', price: 19.99 }, + vectors: [{ propertyName: 'description', vector: newVec }], +}) +``` + +--- + +## `records.importJson()` with `$vectors` + +For bulk ingestion, add a `$vectors` key alongside properties in each JSON object. The format is the same as the `VectorEntry` array: + +```typescript +await db.records.importJson({ + "Article": [ + { + title: "Alpha", + body: "First article about AI", + "$vectors": [{ propertyName: "body", vector: [1, 0, 0] }] + }, + { + title: "Beta", + body: "Second article about ML", + "$vectors": [{ propertyName: "body", vector: [0, 1, 0] }] + }, + { + title: "Gamma", + body: "Third article about DL", + "$vectors": [{ propertyName: "body", vector: [0, 0, 1] }] + }, + ] +}) +``` + +Important: `$vectors` entries are stripped before the record is persisted. They: +- **Do not** appear as record properties +- **Do not** create child records +- **Do not** appear in query results + +--- + +## `records.createMany()` with vectors + +`createMany` is optimised for flat (CSV-like) rows. Use the top-level `vectors` parameter — an array indexed by row position — to attach a vector to each record without nesting arrays inside your flat data: + +```typescript +await db.records.createMany({ + label: 'Product', + data: [ + { name: 'Alpha', description: 'First product' }, + { name: 'Beta', description: 'Second product' }, + { name: 'Gamma', description: 'Third product' }, + ], + vectors: [ + [{ propertyName: 'description', vector: [1, 0, 0] }], // row 0 + [{ propertyName: 'description', vector: [0, 1, 0] }], // row 1 + [{ propertyName: 'description', vector: [0, 0, 1] }], // row 2 + ], + options: { returnResult: true }, +}) +``` + +### Sparse vectors + +Leave rows without vectors by providing a shorter `vectors` array (any unspecified trailing rows are skipped): + +```typescript +await db.records.createMany({ + label: 'Product', + data: [{ name: 'Alpha' }, { name: 'Beta' }, { name: 'Gamma' }], + // only row 0 gets a vector; rows 1 and 2 are skipped + vectors: [[{ propertyName: 'description', vector: myVec }]], +}) +``` + +### Validation + +The SDK throws synchronously if `vectors.length > data.length`: + +```typescript +// ❌ Throws: "vectors length (3) exceeds the number of data rows (2)" +db.records.createMany({ + label: 'Product', + data: [{ name: 'A' }, { name: 'B' }], + vectors: [ + [{ propertyName: 'description', vector: [1, 0, 0] }], + [{ propertyName: 'description', vector: [0, 1, 0] }], + [{ propertyName: 'description', vector: [0, 0, 1] }], // no row 2 + ], +}) +``` + +--- + +## `records.importCsv()` with vectors + +CSV data is a raw string, so per-row vectors are supplied as a separate `vectors` parameter using the same indexed-array format as `createMany`. Row indices are 0-based and refer to data rows after the header is consumed. + +```typescript +const csv = `name,description +Alpha,First product +Beta,Second product +Gamma,Third product` + +await db.records.importCsv({ + label: 'Product', + data: csv, + vectors: [ + [{ propertyName: 'description', vector: [1, 0, 0] }], // csv row 0 + [{ propertyName: 'description', vector: [0, 1, 0] }], // csv row 1 + [{ propertyName: 'description', vector: [0, 0, 1] }], // csv row 2 + ], + options: { returnResult: true }, +}) +``` + +### Sparse vectors + +Same sparse pattern as `createMany` — any rows beyond `vectors.length` get no vector: + +```typescript +await db.records.importCsv({ + label: 'Product', + data: csv, + // only the first row gets a vector + vectors: [[{ propertyName: 'description', vector: myVec }]], +}) +``` + +### Validation + +The server returns `400 Bad Request` if `vectors.length` exceeds the number of data rows (validated after CSV parsing). The client does not know the row count before sending since CSV is a raw string. + +``` +400 Bad Request: vectors length (5) exceeds the number of CSV data rows (3) +``` + +--- + +## Specifying `similarityFunction` for disambiguation + +When a single `(label, propertyName)` has multiple external indexes registered (e.g. one cosine and one euclidean), you must include `similarityFunction` in each `VectorEntry` so the server can route the write to the correct index: + +```typescript +// Write to the cosine index +await db.records.create({ + label: 'Product', + data: { name: 'Widget' }, + vectors: [ + { propertyName: 'embedding', vector: vec, similarityFunction: 'cosine' } + ], +}) +``` + +Omitting `similarityFunction` when multiple indexes match returns `422 Unprocessable Entity`. + +--- + +## Multiple vectors in one call + +You can write vectors for multiple properties or indexes in a single operation: + +```typescript +await db.records.create({ + label: 'Document', + data: { title: 'Multi-modal doc', abstract: '...', fullText: '...' }, + vectors: [ + { propertyName: 'abstract', vector: abstractVec }, + { propertyName: 'fullText', vector: fullTextVec }, + ], +}) +``` + +Each entry is matched independently against the available external indexes. + +--- + +## Complete worked example + +```typescript +import RushDB from '@rushdb/javascript-sdk' + +const db = new RushDB('your-api-key') +const emb = new YourEmbeddingModel() + +// 1. Create an external index once (idempotent via 409 Conflict) +const { data: idx } = await db.ai.indexes.create({ + label: 'Article', + propertyName: 'body', + external: true, + dimensions: 768, + similarityFunction: 'cosine', +}).catch(e => e.status === 409 ? db.ai.indexes.find() : Promise.reject(e)) + +// 2. Create records from your pipeline, embedding as you go +const docs = [ + { title: 'Alpha', body: 'First doc' }, + { title: 'Beta', body: 'Second doc' }, +] + +for (const doc of docs) { + await db.records.create({ + label: 'Article', + data: doc, + vectors: [{ propertyName: 'body', vector: await emb.embed(doc.body) }], + }) +} + +// 3. Search +const queryVec = await emb.embed('first document') +const { data } = await db.ai.search({ + label: 'Article', + propertyName: 'body', + queryVector: queryVec, + limit: 3, +}) +console.log(data[0].title) // 'Alpha' +``` + +--- + +## Inline vectors vs. `upsertVectors` + +| | Inline `vectors` | `db.ai.indexes.upsertVectors()` | +|---|---|---| +| **Round trips** | 1 (write + vector together) | 2+ (write, then upload) | +| **Use case** | Streaming ingestion, real-time pipeline | Batch backfill, dataset migration | +| **Idempotency** | Depends on the write operation used | Always idempotent per `recordId` | +| **Availability** | `create`, `upsert`, `set`, `createMany`, `importCsv`, `importJson` | Standalone call on any existing records | +| **Multi-record** | `createMany` or `importCsv` with indexed `vectors[][]`, `importJson` with `$vectors` per item | Single bulk payload | + +For streaming pipelines that produce records one-by-one or in small batches, inline vectors are simpler and more efficient. For seeding an index from a large existing dataset, `upsertVectors` is the right choice. + +--- + +## Per-row vs. per-item vector formats + +| Method | Vector syntax | Notes | +|---|---|---| +| `create` | `vectors: VectorEntry[]` | single record | +| `upsert` | `vectors: VectorEntry[]` | single record, idempotent | +| `set` | `vectors: VectorEntry[]` | single record, full replace | +| `importJson` | `"$vectors": VectorEntry[]` inside each item | nested in data object | +| `createMany` | `vectors: VectorEntry[][]` (indexed) | `vectors[i]` → `data[i]` | +| `importCsv` | `vectors: VectorEntry[][]` (indexed) | `vectors[i]` → CSV row `i` | + +`importJson` uses the `$vectors` in-item style because JSON items can themselves be nested objects with their own structure. `createMany` and `importCsv` use the external indexed array style because the data they carry is flat — no room for nested arrays inside a flat record. + +--- + +## Error conditions + +| Error | Cause | Method | +|---|---|---| +| `404 Not Found` | No external index exists for `(label, propertyName)` | all | +| `422 Unprocessable Entity` | `vector.length` does not match `index.dimensions` | all | +| `422 Unprocessable Entity` | Multiple indexes match and `similarityFunction` was not specified | all | +| `400 Bad Request` | `vectors.length` exceeds number of CSV data rows | `importCsv` | +| Client `Error` | `vectors.length` exceeds `data.length` | `createMany` (thrown synchronously) | diff --git a/docs/docs/typescript-sdk/introduction.md b/docs/docs/typescript-sdk/introduction.md index 671d787c..f69cad3a 100644 --- a/docs/docs/typescript-sdk/introduction.md +++ b/docs/docs/typescript-sdk/introduction.md @@ -1,195 +1,85 @@ --- -sidebar_position: 1 +sidebar_position: 0 title: Introduction --- -# RushDB TypeScript/JavaScript SDK +# TypeScript / JavaScript SDK -Welcome to the comprehensive guide on working with the RushDB SDK. This SDK provides a modern, flexible interface for managing your data, relationships, and metadata in RushDB through JavaScript and TypeScript applications. +Push JSON, query by value or meaning, traverse graphs — from Node.js or the browser. -## What is RushDB SDK? - -The RushDB JavaScript/TypeScript SDK is a powerful client library that lets you interact with RushDB's features directly from your JavaScript or TypeScript applications. Whether you're building web applications, server backends, or automation scripts, this SDK gives you full access to RushDB's capabilities with an intuitive, type-safe API. - -## Highlights - -- **✨ No Configuration Needed**: Plug-and-play design requires minimal setup to get started -- **🤖 Automatic Type Inference**: Enjoy seamless type safety with automatic TypeScript inference -- **↔️ Isomorphic Architecture**: Fully compatible with both server and browser environments -- **🏋️ Zero Dependencies**: Lightweight (just 6.9KB gzipped) and efficient with no external dependencies - -## Getting Started - -### Installation - -To begin using RushDB SDK, add it to your project with your preferred package manager: +## Install ```bash -# Using npm npm install @rushdb/javascript-sdk - -# Using yarn -yarn add @rushdb/javascript-sdk - -# Using pnpm -pnpm add @rushdb/javascript-sdk +# or: yarn add @rushdb/javascript-sdk | pnpm add @rushdb/javascript-sdk ``` -### Quick Setup - -After installation, create an instance of the RushDB SDK in your project: +## Connect ```typescript -import RushDB from '@rushdb/javascript-sdk'; +import RushDB from '@rushdb/javascript-sdk' -const db = new RushDB('RUSHDB_API_KEY'); +const db = new RushDB('RUSHDB_API_KEY') ``` -Replace `RUSHDB_API_KEY` with your actual API token from the [RushDB Dashboard](https://app.rushdb.com/). +Get your API token from the [RushDB Dashboard](https://app.rushdb.com/). -### Usage Example +## First write ```typescript -import RushDB from '@rushdb/javascript-sdk' - -// Setup SDK -const db = new RushDB("RUSHDB_API_KEY"); - -// Push any data, and RushDB will automatically flatten it into Records -// and establish relationships between them accordingly. +// Nested objects become linked records automatically await db.records.importJson({ - label: "COMPANY", + label: 'MOVIE', data: { - name: 'Google LLC', - address: '1600 Amphitheatre Parkway, Mountain View, CA 94043, USA', - foundedAt: '1998-09-04T00:00:00.000Z', - rating: 4.9, - DEPARTMENT: [{ - name: 'Research & Development', - description: 'Innovating and creating advanced technologies for AI, cloud computing, and consumer devices.', - // Nested relationships are automatically created - PROJECT: [{ - name: 'Bard AI', - // ... more properties - }] - }] + title: 'Inception', + rating: 8.8, + genre: 'sci-fi', + ACTOR: [ + { name: 'Leonardo DiCaprio', country: 'USA' }, + { name: 'Ken Watanabe', country: 'Japan' } + ] } }) - -// Find Records by specific criteria -const employees = await db.records.find({ - labels: ['EMPLOYEE'], - where: { - position: { $contains: 'AI' } - } -}) -``` - -## SDK Configuration Options - -The RushDB SDK is designed to be flexible and configurable. When initializing the SDK, you can provide configuration options to customize its behavior. - -### Constructor Parameters - -```typescript -const db = new RushDB(token, config); -``` - -**Parameters:** - -- `token` (`string`): Your API token from the RushDB Dashboard -- `config` (`SDKConfig`): Optional configuration object - -### Configuration Object (`SDKConfig`) - -The configuration object allows you to customize the SDK's behavior and connection details: - -```typescript -type SDKConfig = { - httpClient?: HttpClientInterface; - timeout?: number; - logger?: Logger; - options?: { - allowForceDelete?: boolean; - } -} & ApiConnectionConfig; +// Created: MOVIE → ACTOR × 2 (relationships wired automatically) ``` -Where `ApiConnectionConfig` is either: +## First read ```typescript -{ - host?: string; - port?: number; - protocol?: string; -} -``` - -Or: - -```typescript -{ - url?: string; -} +const { data: movies, total } = await db.records.find({ + labels: ['MOVIE'], + where: { rating: { $gte: 8 } }, + orderBy: { rating: 'desc' } +}) ``` -### Configuration Options Explained - -- **Connection settings**: - - `url`: The complete URL to the RushDB API (e.g., `https://api.rushdb.com/api/v1`) - - **OR** the individual components: - - `host`: The domain name or IP address (e.g., `api.rushdb.com/api/v1`) - - `port`: The port number (defaults to 80 for HTTP, 443 for HTTPS) - - `protocol`: Either `http` or `https` (defaults to `https`) - -- **Advanced options**: - - `timeout`: Request timeout in milliseconds (default: 30000) - - `httpClient`: Custom HTTP client implementation - - `logger`: Custom logging function - - `options.allowForceDelete`: When set to `true`, allows deleting all records without specifying criteria (defaults to `false` for safety) - -### Example with Configuration +## Configuration ```typescript -import RushDB from '@rushdb/javascript-sdk'; - const db = new RushDB('RUSHDB_API_KEY', { - url: 'http://localhost:3000/api/v1', - timeout: 5000, - options: { - allowForceDelete: false - } -}); -``` - -## SDK Architecture - -The RushDB SDK uses a consistent approach for accessing the RushDB API instance across all SDK components. Classes like `Transaction`, `DBRecordInstance`, `DBRecordsArrayInstance` and `Model` all use the static `RushDB.getInstance()` method to obtain the API instance, ensuring a uniform pattern throughout the SDK. - -This architecture provides several benefits: - -1. **Simplified Access**: Components can access the API without managing dependencies -2. **Consistency**: All components use the same mechanism to access API methods -3. **Cleaner Code**: Removes the need for inheritance from a base proxy class - -Example of the implementation pattern: - -```typescript -// Internal implementation example -async someMethod(param: string): Promise { - const instance = RushDB.getInstance() // Get the RushDB instance - return await instance.someApi.someMethod(param) // Use the instance to make API calls -} + url: 'http://localhost:3000/api/v1', // or use host/port/protocol + timeout: 5000 // default: 30000ms +}) ``` -## Next Steps - -To continue learning about the RushDB TypeScript SDK, explore these related sections: - -- [Working with Records](../typescript-sdk/records/create-records.md) -- [Managing Relationships](../typescript-sdk/relationships) -- [Working with Properties](../typescript-sdk/properties) -- [Working with Labels](../typescript-sdk/labels) -- [Working with Transactions](../typescript-sdk/transactions) - -Before you begin exploring the SDK features, make sure you have a valid API token. If you haven't set up your RushDB account yet, follow our guide to [registering on the dashboard and generating an API token](../get-started/quick-tutorial.mdx). +| Option | Default | Description | +|---|---|---| +| `url` | — | Full API URL (alternative to host/port/protocol) | +| `host` | — | Domain or IP | +| `port` | 80 / 443 | Port number | +| `protocol` | `https` | `http` or `https` | +| `timeout` | `30000` | Request timeout in ms | +| `httpClient` | — | Custom HTTP client — required for Edge / Cloudflare Workers | +| `logger` | — | Custom logging function | +| `options.allowForceDelete` | `false` | Must be `true` to delete all records without criteria (safety gate) | + +## Namespaces + +| Namespace | Use | +|---|---| +| `db.records` | Create, find, update, delete records | +| `db.relationships` | Attach and detach edges | +| `db.tx` | Transactions | +| `db.labels` | List labels and counts | +| `db.properties` | Inspect field names, types, value ranges | +| `db.ai` | Schema export + semantic search | diff --git a/docs/docs/typescript-sdk/labels.md b/docs/docs/typescript-sdk/labels.md index 31ed337f..0886dc66 100644 --- a/docs/docs/typescript-sdk/labels.md +++ b/docs/docs/typescript-sdk/labels.md @@ -1,158 +1,26 @@ --- -sidebar_position: 2 +sidebar_position: 5 --- # Labels -The RushDB TypeScript SDK provides a simple interface for working with [labels](../concepts/labels.md) in your database. Labels in RushDB help categorize and organize [records](../concepts/records.md), functioning similarly to table names in relational databases but with the flexibility of graph databases. +List which labels exist in your project and how many records each has. -## Labels Overview +## `db.labels.find()` -Labels in RushDB: -- Provide a way to categorize and organize records -- Enable efficient querying across similar types of records -- Each record has exactly one user-defined label (e.g., `User`, `Product`, `Car`) -- Are case-sensitive (e.g., "User" and "user" are treated as different labels) -- Function similarly to table names in relational databases but with graph database flexibility - -## Labels API - -The SDK provides label-related methods through the `labels` object: - -```typescript -// Access the labels API -const labels = db.labels; -``` - -The Labels API is built on the powerful [SearchQuery](../concepts/search/introduction.md) interface, which enables you to use the same querying capabilities that are available throughout the RushDB search API. This means you can leverage complex filters, logical operators, and comparison operators when working with labels. - -### Find Labels - -Searches for labels based on the provided query parameters and returns label names with their record counts: +Returns `{ [label]: count }` for records matching the optional filter. ```typescript -const response = await db.labels.find({ - // Optional: Any search parameters to filter labels - // Similar to record search queries - where: { - // You can filter by record properties that have specific labels - name: "John" - }, - // Other search parameters like skip, limit, etc. -}); - -// Response contains labels with their counts -console.log(response.data); -/* Example output: -{ - "User": 125 -} -*/ +// All labels +const { data } = await db.labels.find() +// { MOVIE: 84, ACTOR: 312, DIRECTOR: 47 } + +// Labels for records where rating > 8 +const { data } = await db.labels.find({ + where: { rating: { $gt: 8 } } +}) +// { MOVIE: 21 } ``` -## Using Labels with Records - -When creating or updating records, you need to specify a label: - -```typescript -// Create a record with the "User" label -const user = await db.records.create({ - label: "User", - data: { - name: "John Doe", - email: "john.doe@example.com" - } -}); - -// Find all records with the "User" label -const users = await db.records.find({ - labels: ["User"] -}); -``` - -## Filtering Labels - -The labels API leverages the powerful [`SearchQuery`](../concepts/search/introduction.md) interface, allowing you to use the same advanced querying capabilities that are available throughout the RushDB search API. You can use complex queries to filter which labeled records to include: - -### Example with Multiple Conditions - -```typescript -const response = await db.labels.find({ - where: { - age: { $gt: 30 }, - active: true - } -}); -``` - -This will return labels for records where `age` is greater than 30 AND `active` is true. - -### Example with OR Logic - -```typescript -const response = await db.labels.find({ - where: { - $or: [ - { country: "USA" }, - { country: "Canada" } - ] - } -}); -``` - -This will return labels for records where `country` is either "USA" OR "Canada". - -### Advanced Query Operators - -Since the Labels API uses the [`SearchQuery`](../concepts/search/introduction.md) interface, you can use all the query operators available in the [RushDB search API](../concepts/search/introduction.md): - -```typescript -const response = await db.labels.find({ - where: { - // String operators - name: { $contains: "Smith" }, - email: { $endsWith: "@example.com" }, - - // Numeric operators - age: { $gt: 18, $lt: 65 }, - score: { $gte: 4.5 }, - - // Array operators - tags: { $in: ["premium", "verified"] }, - - // Negation - status: { $ne: "inactive" } - } -}); -``` - -## Label Requirements and Limitations - -- **Single Custom Label**: Each record can have only one custom label at a time -- **Required Field**: A custom label is required for each record -- **Case-Sensitive**: Labels are case-sensitive ("User" ≠ "user") - -## Working with Labels - -### Best Practices - -1. **Consistent naming conventions**: Use a consistent pattern for [label](../concepts/labels.md) names (e.g., singular nouns, PascalCase) -2. **Meaningful labels**: Choose labels that describe what the record represents, not just its attributes -3. **Hierarchical labeling**: Consider using more specific labels for specialized record types (e.g., "Employee" and "Manager" instead of just "Person") - -### Common Use Cases - -- **Data organization**: Group related records for easier querying and visualization -- **Access control**: Set permissions based on record labels -- **Conditional processing**: Apply different business logic depending on record types -- **Schema validation**: Enforce data structure based on record labels - -## Internal Representation - -Internally, labels are stored as the `__RUSHDB__KEY__LABEL__` property and exposed to clients as `__label`. This property is essential for organizing records and enabling efficient queries across similar types of data. - -## Additional Resources - -- [Labels Concept Documentation](../concepts/labels.md) - Learn more about labels and their role in the RushDB data model -- [Search API Documentation](../concepts/search/introduction.md) - Explore the powerful search capabilities available in RushDB +`find()` accepts the same `where`, `skip`, `limit` parameters as `db.records.find()`. diff --git a/docs/docs/typescript-sdk/models.md b/docs/docs/typescript-sdk/models.md index fed8e125..ee2474b2 100644 --- a/docs/docs/typescript-sdk/models.md +++ b/docs/docs/typescript-sdk/models.md @@ -1,366 +1,161 @@ --- -sidebar_position: 3 +sidebar_position: 7 --- # Models -In this section, we focus on how to define models using the RushDB SDK. Defining models accurately is crucial as it not only aids in validating the fields according to the schema but also enhances the developer experience with features like autocomplete and field name suggestions. +A `Model` binds a label to a schema, giving you typed access to all record operations without ever repeating the label name. -## Understanding Schema - -The `Schema` is at the core of model definitions in RushDB. It specifies the structure and constraints of the data fields within your model. Here's a breakdown of the properties you can define within a `Schema`: +## Define a model ```typescript -type Schema = Record; -``` - -**Schema Properties Explained:** - -- `default`: This is the initial value of the field if no value is provided during record creation. It can be a static value or a function that returns a value asynchronously, allowing for dynamic default values. -- `multiple`: Indicates whether the field can hold multiple values (array) or just a single value. -- `required`: Specifies whether a field is mandatory. If set to true, you cannot create a record without providing a value for this field. -- `type`: Defines the data type of the field. The type determines the available search operators and how data is validated and stored. Possible types include: - - `boolean` - - `datetime` (can be either a detailed object or an ISO string) - - `null` - - `number` - - `string` - - `vector` (for embedding vectors used in similarity search) -- `unique`: If set to true, the field must have a unique value across all records in the database, useful for fields like email addresses or custom identifiers. - -### Working with Default Values - -Default values are especially useful for automatically setting fields like timestamps, status flags, or counters without requiring explicit values for each record creation. RushDB supports both static default values and dynamic values generated by functions: - -```typescript -// Helper function to get current ISO timestamp -const getCurrentISO = () => new Date().toISOString(); - -// Using static and dynamic default values -const UserModel = new Model('USER', { - name: { type: 'string' }, - avatar: { type: 'string' }, - login: { type: 'string', unique: true }, - password: { type: 'string' }, - active: { type: 'boolean', default: true }, // Static default - createdAt: { type: 'datetime', default: getCurrentISO }, // Dynamic default - tags: { type: 'string', multiple: true, required: false }, -}); -``` - -When you create a record without specifying values for fields with defaults, the system automatically applies these defaults: - -```typescript -// The createdAt field will be automatically set to the current date/time -// The active field will be set to true -const newUser = await UserModel.create({ - name: 'John Doe', - login: 'johndoe', - password: 'securePassword123', - avatar: 'avatar.jpg' -}); -``` - -Default value functions can also be asynchronous, allowing for operations like fetching configuration values: - -```typescript -const ConfigModel = new Model('CONFIG', { - key: { type: 'string', unique: true }, - value: { type: 'string' }, - expiresAt: { - type: 'datetime', - default: async () => { - // Default expiration is 7 days from now - const date = new Date(); - date.setDate(date.getDate() + 7); - return date.toISOString(); - } - } -}); -``` +import RushDB, { Model } from '@rushdb/javascript-sdk' -## Creating a Model with Model +const db = new RushDB('RUSHDB_API_KEY') -With an understanding of `Schema`, you can define a model in the RushDB system. Here's how to define a simple `Author` model: - -```typescript -const Author = new Model('author', { - name: { type: 'string' }, - email: { type: 'string', unique: true } -}); +const MovieModel = new Model('MOVIE', { + title: { type: 'string' }, + rating: { type: 'number' }, + genre: { type: 'string' }, + releasedAt:{ type: 'datetime', default: () => new Date().toISOString() } +}) ``` -**Model Constructor Parameters:** - -- `label`: A unique string identifier for the model, which represents a [Label](../concepts/labels) in RushDB. It's used to categorize records and define their type in the database system. Labels are crucial for organizing and querying your data. -- `schema`: The schema definition based on `Schema`, which dictates the structure and rules of the data stored. +### Schema field types -### Type Helpers in Models +| Type | Notes | +|---|---| +| `boolean` | | +| `datetime` | ISO string or detailed object | +| `null` | | +| `number` | | +| `string` | | -The `Model` class offers several built-in type helpers that enhance TypeScript integration: +### Schema field options -```typescript -// These are defined in the Model class and available as readonly properties -readonly draft!: InferType> -readonly record!: DBRecord -readonly recordInstance!: DBRecordInstance -readonly recordsArrayInstance!: DBRecordsArrayInstance -``` +| Option | Description | +|---|---| +| `type` | **Required.** Field data type | +| `default` | Static value or `() => value` function (sync or async) | +| `multiple` | `true` → field holds an array | +| `required` | `true` → create throws if value missing | +| `unique` | `true` → value must be unique across all records of this label | -**Type Helpers Explained:** - -- `draft`: Represents a draft version of the schema - a flat object containing only the record's own properties defined by the schema, excluding system fields such as `__id`, `__label`, and `__proptypes`. This is useful when creating new records. -- `record`: Represents a fully-defined record with database representation, including all fields that come with the record's database-side representation. -- `recordInstance`: Extends the record by providing additional methods to operate on a specific record, such as saving, updating, or deleting it. -- `recordsArrayInstance`: Similar to a single record instance but supports batch or bulk operations for efficient management of multiple records. - -### Practical Type Helpers Example - -Here's a practical example of how to use the type helpers to create strongly-typed variables and functions in your application: +## Types from the model ```typescript -// Define the Label as a constant -export const USER = 'USER' as const; +export const USER = 'USER' as const -// Create a model with the USER label export const UserModel = new Model(USER, { - name: { type: 'string' }, - avatar: { type: 'string' }, - login: { type: 'string', unique: true }, - password: { type: 'string' }, - createdAt: { type: 'datetime', default: getCurrentISO }, - tags: { type: 'string', multiple: true, required: false }, -}); - -// Export type definitions derived from model -export type UserRecord = typeof UserModel.record; -export type UserRecordResult = never> = - typeof UserModel.recordInstance & { data: T }; -export type UserRecordsArrayResult = typeof UserModel.recordsArrayInstance; -export type UserRecordDraft = typeof UserModel.draft; -export type UserSearchQuery = SearchQuery; -``` - -### Model Implementation Architecture - -The `Model` class uses the same architectural pattern as other SDK components like `Transaction` and `DBRecordInstance`. It uses the static `RushDB.getInstance()` method to access the API: - -```typescript -// Internal implementation pattern (from model.ts) -async someMethod(params) { - const instance = RushDB.getInstance() - return await instance.someApi.someMethod(params) -} + name: { type: 'string' }, + login: { type: 'string', unique: true }, + password: { type: 'string' }, + active: { type: 'boolean', default: true }, + createdAt: { type: 'datetime', default: () => new Date().toISOString() }, + tags: { type: 'string', multiple: true, required: false }, +}) +// Export strongly-typed aliases +export type UserRecord = typeof UserModel.record +export type UserRecordDraft = typeof UserModel.draft +export type UserSearchQuery = SearchQuery ``` -This architecture ensures consistent API access across all SDK components. - -These exported types can then be used throughout your application to ensure type safety: +| Helper | What it represents | +|---|---| +| `.record` | Full DB record including system fields (`__id`, `__label`, `__proptypes`) | +| `.draft` | Your schema fields only — no system fields; use when creating records | +| `.recordInstance` | Record + instance methods (`update`, `delete`, `attach`, …) | +| `.recordsArrayInstance` | Array result with `data` + `total` | -```typescript -// Function that accepts a user draft (without system fields) -function prepareUserForRegistration(user: UserRecordDraft): UserRecordDraft { - return { - ...user, - // Add additional processing if needed - }; -} +## CRUD operations -// Function that works with a complete user record (with system fields) -function getUserDisplayName(user: UserRecord): string { - return user.name || user.__id; -} +### Create -// Function that receives a user recordInstance with additional methods -async function updateUserAvatar(user: UserRecordResult): Promise { - const newAvatar = generateAvatarUrl(user.data.name); - return await UserModel.update(user.data.__id, { avatar: newAvatar }); -} +```typescript +const movie = await MovieModel.create({ + title: 'Inception', rating: 8.8, genre: 'sci-fi' +}) -// Function that creates a type-safe search query -function buildUserSearchQuery(nameFilter: string): UserSearchQuery { - return { - where: { - name: { $contains: nameFilter }, - // TypeScript will ensure only valid fields and operators are used - }, - sort: { createdAt: 'desc' } - }; -} +const movies = await MovieModel.createMany([ + { title: 'The Dark Knight', rating: 9.0, genre: 'action' }, + { title: 'Interstellar', rating: 8.6, genre: 'sci-fi' } +]) ``` -This approach gives you several advantages: -- **Consistent Type Definitions**: All user-related types are derived from a single source of truth. -- **Autocomplete Support**: Your IDE will suggest valid field names and types. -- **Type Safety**: TypeScript will catch errors if you try to access non-existent fields. -- **Maintainability**: Changes to the model automatically propagate to all derived types. - -## Registering and Managing Models - -Models in RushDB don't need to be registered explicitly. When you create a model, it's ready to use right away: +### Read ```typescript -// Create the model -const AuthorModel = new Model('author', { - name: { type: 'string' }, - email: { type: 'string', unique: true } -}); - -// Start using it directly -const author = await AuthorModel.create({ - name: "Jane Doe", - email: "jane@example.com" -}); +const all = await MovieModel.find() +const sciFi = await MovieModel.find({ where: { genre: 'sci-fi' } }) +const one = await MovieModel.findOne({ where: { title: 'Inception' } }) +const byId = await MovieModel.findById('movie-id-123') +const unique = await MovieModel.findUniq({ where: { title: 'Inception' } }) ``` -### Important: RushDB Initialization Architecture - -Due to the async initialization architecture of RushDB, it's important to initialize the RushDB instance early in your application's lifecycle. This is because JavaScript modules are lazy-loaded and only executed when imported. - -To ensure that the RushDB instance is available when needed by your models, it's recommended to: - -1. Create your RushDB instance in a dedicated file at the root of your application -2. Export this instance so it can be imported by other modules -3. Import this file early in your application's bootstrap process - -Example of proper initialization: - -```typescript -// db.ts (at the root of your project) -import RushDB from '@rushdb/javascript-sdk'; - -// Initialize RushDB with your API token -export const db = new RushDB('RUSHDB_API_KEY'); - -// You can also export a helper function to access the instance -export const getRushDBInstance = () => { - return RushDB.getInstance(); -}; -``` +### Update ```typescript -// app.ts or index.ts (your application entry point) -import { db } from './db'; -// Import your models after importing the db -import { UserModel, PostModel } from './models'; +// Partial update — only listed fields change +await MovieModel.update('movie-id-123', { rating: 9.1 }) -// The rest of your application code... +// Full replace — all other fields are removed +await MovieModel.set('movie-id-123', { title: 'Inception', rating: 9.1, genre: 'sci-fi' }) ``` -This approach ensures that the RushDB instance is initialized before any model tries to use it, preventing "No RushDB instance found" errors. - -## Model CRUD Operations - -After creating a model, you can perform CRUD (Create, Read, Update, Delete) operations through the model's methods. - -### Creating Records +### Delete ```typescript -// Create a single record -const newAuthor = await AuthorModel.create({ - name: 'Alice Smith', - email: 'alice.smith@example.com' -}); - -// Create multiple records -const authors = await AuthorModel.createMany([ - { name: 'Bob Johnson', email: 'bob.johnson@example.com' }, - { name: 'Carol Davis', email: 'carol.davis@example.com' } -]); +await MovieModel.delete({ where: { genre: 'temp' } }) +await MovieModel.deleteById(['movie-id-123', 'movie-id-456']) ``` -### Reading Records +### Relationships ```typescript -// Find all records of this model -const allAuthors = await AuthorModel.find(); - -// Find specific records with search criteria -const specificAuthors = await AuthorModel.find({ - where: { name: { $contains: 'Smith' } } -}); - -// Find a single record -const oneAuthor = await AuthorModel.findOne({ - where: { email: 'alice.smith@example.com' } -}); +await MovieModel.attach({ + source: 'movie-id-123', + target: 'actor-id-456', + options: { type: 'STARS', direction: 'out' } +}) -// Find by unique identifier -const authorById = await AuthorModel.findById('author_id_123'); +await MovieModel.detach({ + source: 'movie-id-123', + target: 'actor-id-456', + options: { type: 'STARS' } +}) ``` -### Updating Records +## Initialization order -```typescript -// Update a specific record by ID -await AuthorModel.update('author_id_123', { - name: 'Alice Johnson-Smith' -}); - -// Set all values of a record (replace existing data) -await AuthorModel.set('author_id_123', { - name: 'Alice Johnson', - email: 'alice.johnson@example.com' -}); -``` - -### Deleting Records +Create the `RushDB` instance before importing models — models call `RushDB.getInstance()` on first use. ```typescript -// Delete records matching criteria -await AuthorModel.delete({ - where: { name: { $contains: 'temp' } } -}); +// db.ts +import RushDB from '@rushdb/javascript-sdk' +export const db = new RushDB('RUSHDB_API_KEY') -// Delete records by ID -await AuthorModel.deleteById(['author_id_123', 'author_id_456']); +// models.ts — import db.ts first in your app entry +import './db' +import { Model } from '@rushdb/javascript-sdk' +export const MovieModel = new Model('MOVIE', { /* … */ }) ``` -### Working with Relationships +## Transactions ```typescript -// Attach a relationship -await AuthorModel.attach({ - source: 'author_id_123', - target: 'book_id_456', - options: { type: 'WROTE' } -}); - -// Detach a relationship -await AuthorModel.detach({ - source: 'author_id_123', - target: 'book_id_456', - options: { type: 'WROTE' } -}); +const tx = await db.tx.begin() +try { + const movie = await MovieModel.create({ title: 'Dune', rating: 8.0, genre: 'sci-fi' }, tx) + const actor = await ActorModel.create({ name: 'Timothée Chalamet' }, tx) + await MovieModel.attach({ source: movie, target: actor, options: { type: 'STARS' } }, tx) + await tx.commit() +} catch (e) { + await tx.rollback() + throw e +} ``` -## Advanced TypeScript Support - -For a complete, up-to-date guide on configuring declaration merging, path aliases, and schema-aware intellisense (including typed related queries), see the Model reference: [TypeScript: extend SDK types for schema-aware suggestions](./typescript-reference/Model#typescript-extend-sdk-types-for-schema-aware-suggestions). - -Note on result typing with aggregations/grouping: when you use `aggregate` or `groupBy`, the result shape can differ from your schema. You can augment the instance type as `typeof Model.recordInstance & { data: T }` to describe the returned payload. See the dedicated explanation and examples in the Model reference, and the concepts for [Aggregations](../concepts/search/aggregations) and [Grouping](../concepts/search/group-by). - -## Working with Transactions - -Model operations can be performed within transactions to ensure data integrity. For more information on using transactions with models, see the [Transactions](../typescript-sdk/transactions) documentation. - -## Conclusion - -Defining models with `Model` and `Schema` sets a robust foundation for your application's data architecture. It enables strong type-checking, validation, and inter-model relationships, enhancing the robustness and scalability of your applications. In subsequent sections, we will explore how to interact with these models to create, retrieve, update, and delete records. - ---- - -## Related Documentation - -For a more in-depth understanding of the RushDB TypeScript SDK and its capabilities, refer to these related sections: +## Advanced TypeScript -- [Introduction to TypeScript SDK](../typescript-sdk/introduction) - Learn about the basics of using the SDK -- [Transactions](../typescript-sdk/transactions) - Learn how to use transactions with models for atomic operations -- [Labels](../concepts/labels) - Understand how Labels work in RushDB and how they're used to categorize records +For declaration merging, path aliases, and schema-aware intellisense (typed relation queries, aggregate result shapes), see the [Model reference](./typescript-reference/Model#typescript-extend-sdk-types-for-schema-aware-suggestions). diff --git a/docs/docs/typescript-sdk/properties.md b/docs/docs/typescript-sdk/properties.md index 38a46154..3649d2a6 100644 --- a/docs/docs/typescript-sdk/properties.md +++ b/docs/docs/typescript-sdk/properties.md @@ -1,228 +1,49 @@ --- -sidebar_position: 3 +sidebar_position: 4 --- # Properties -[Properties](../concepts/properties.md) are the individual key-value pairs that make up the data within a [record](../concepts/records.md) in RushDB. This guide covers how to work with properties using the TypeScript SDK, including finding, retrieving, and managing property values. +Inspect and manage field definitions across your project. -## Overview +## `db.properties.find()` -The properties API in the SDK enables you to: -- Find properties based on search criteria -- Retrieve specific properties by ID -- Get possible values for a property -- Delete properties from the database - -## Finding Properties - -### Using RushDB's `find()` Method - -To search for properties that match specific criteria, use the `properties.find` method: +Returns all property definitions matching the filter. ```typescript -const properties = await db.properties.find({ - where: { - name: 'email', - type: 'string' - } -}); - -console.log(properties); -/* -{ - data: [ - { - id: 'property_id_1', - name: 'email', - type: 'string', - ... - }, - { - id: 'property_id_2', - name: 'email', - type: 'string', - ... - } - ], - total: 2 -} -*/ +const { data } = await db.properties.find({ + where: { type: 'number' } +}) +// [{ id, name: 'rating', type: 'number', ... }, ...] ``` -#### Parameters - -- `searchQuery`: A search query object to find matching properties - - `where`: Conditions to filter properties - - `sort`: Sort criteria for results - - `limit`: Maximum number of results to return - - `skip`: Number of results to skip -- `transaction` (optional): A [transaction](../concepts/transactions.mdx) object or string to include the operation within a transaction - -#### Returns - -- A promise that resolves to an array of property objects - -### Finding Properties in Transactions +## `db.properties.findById()` ```typescript -const transaction = await db.tx.begin(); -try { - const properties = await db.properties.find({ - where: { - name: { $in: ['email', 'phone'] } - } - }, transaction); - - // Perform other operations... - - await transaction.commit(); - console.log(properties); -} catch (error) { - await transaction.rollback(); - throw error; -} +const prop = await db.properties.findById('property-id') ``` -## Retrieving a Property by ID +## `db.properties.values()` -### Using RushDB's `findById()` Method - -To retrieve a specific property by its ID, use the `properties.findById` method: +Enumerate distinct values for a property — useful for building filter UIs. ```typescript -const property = await db.properties.findById('property_id_1'); +const { data: genres } = await db.properties.values('prop-id-genre') +// ['sci-fi', 'action', 'drama', ...] -console.log(property); -/* -{ - id: 'property_id_1', - name: 'email', - type: 'string', - ... -} -*/ -``` - -#### Parameters - -- `id`: The ID of the property to retrieve -- `transaction` (optional): A [transaction](../concepts/transactions.mdx) object or string to include the operation within a transaction - -#### Returns - -- A promise that resolves to the property object if found, or null if not found - -## Getting Property Values - -### Using RushDB's `values()` Method - -To retrieve possible values for a specific property, use the `properties.values` method: - -```typescript -const values = await db.properties.values('property_id_1', { - where: { - status: 'active' - }, - query: 'john', +// With filter +const { data } = await db.properties.values('prop-id', { + query: 'sci', // text prefix filter orderBy: 'asc', limit: 10 -}); - -console.log(values); -/* -{ - data: ['john@example.com', 'johnny@example.com'], - total: 2 -} -*/ +}) ``` -#### Parameters - -- `id`: The ID of the property to get values for -- `searchQuery` (optional): SearchQuery object with filtering options: - - `where` (object): Filter criteria for records containing this property - - `query` (string): Filter values by this text string - - `orderBy` (string): Sort direction ('asc' or 'desc') - - `limit` (number): Maximum number of values to return - - `skip` (number): Number of values to skip for pagination -- `transaction` (optional): A [transaction](../concepts/transactions.mdx) object or string to include the operation within a transaction - -#### Returns - -- A promise that resolves to an object containing the values and a total count - -## Deleting Properties - -### Using RushDB's `delete()` Method - -To delete a property from the database, use the `properties.delete` method: - -```typescript -const result = await db.properties.delete('property_id_1'); - -console.log(result); -/* -{ - success: true, - message: "Property deleted successfully" -} -*/ -``` - -#### Parameters - -- `id`: The ID of the property to delete -- `transaction` (optional): A [transaction](../concepts/transactions.mdx) object or string to include the operation within a transaction - -#### Returns - -- A promise that resolves to a success object - -#### Deleting Properties in Transactions +## `db.properties.delete()` ```typescript -const transaction = await db.tx.begin(); -try { - await db.properties.delete('property_id_1', transaction); - - // Perform other operations... - - await transaction.commit(); -} catch (error) { - await transaction.rollback(); - throw error; -} +await db.properties.delete('property-id') ``` -## Best Practices for Working with Properties - -1. **Use Transactions for Related Operations** - - When performing multiple operations that need to be atomic, use [transactions](../concepts/transactions.mdx) - - This ensures data consistency and prevents partial changes - -2. **Optimize Search Queries** - - Use specific search criteria to minimize the amount of data returned - - Filter by name, type, or other attributes to narrow down results - -3. **Cache Property IDs When Appropriate** - - If you frequently access the same properties, cache their IDs - - This reduces the need for repeated lookups - -4. **Consider the Impact of Property Deletion** - - Deleting a property affects all records that use it - - Instead of deleting common properties, consider marking them as deprecated - -5. **Use Distinct Values for Enumeration** - - When fetching property values for UI dropdown elements, use the `distinct: true` option - - This provides a cleaner list of possible values without duplicates - -## Conclusion - -The Properties API in the RushDB TypeScript SDK provides a comprehensive set of methods for working with properties. By understanding these methods and their parameters, you can effectively manage properties in your application. - -For more information on related topics, see: -- [Records](./records/create-records.md) - Work with records that contain properties -- [Relationships](./relationships.md) - Connect records with relationships -- [Models](./models.md) - Define structured schemas for your data +Deletes the property definition and removes it from all records that use it. + type: 'string', diff --git a/docs/docs/typescript-sdk/raw-queries.md b/docs/docs/typescript-sdk/raw-queries.md index 68ebfb06..73f0a1d6 100644 --- a/docs/docs/typescript-sdk/raw-queries.md +++ b/docs/docs/typescript-sdk/raw-queries.md @@ -1,10 +1,12 @@ --- -sidebar_position: 7 +sidebar_position: 8 --- # Raw Queries -> **Important (cloud-only):** This endpoint is available only on the RushDB managed cloud service or when your project is connected to a custom database through RushDB Cloud. It is not available for self-hosted or local-only deployments — attempting to use it against a non-cloud instance will fail. +:::warning Requires a connected Neo4j instance +This endpoint is only available when your project is connected to your own Neo4j database. Connecting a custom Neo4j instance is available on the free tier — see the RushDB dashboard to set it up. +::: Use this endpoint to run arbitrary Cypher queries against your connected Neo4j database. This is intended for advanced use-cases and requires the managed service or a custom DB connection. diff --git a/docs/docs/typescript-sdk/records/_category_.json b/docs/docs/typescript-sdk/records/_category_.json index cb98d192..ebf0e246 100644 --- a/docs/docs/typescript-sdk/records/_category_.json +++ b/docs/docs/typescript-sdk/records/_category_.json @@ -1,6 +1,6 @@ { "label": "Records", - "position": 1, + "position": 2, "collapsed": false, "collapsible": false } diff --git a/docs/docs/typescript-sdk/records/create-records.md b/docs/docs/typescript-sdk/records/create-records.md index 29938c8c..5de038c9 100644 --- a/docs/docs/typescript-sdk/records/create-records.md +++ b/docs/docs/typescript-sdk/records/create-records.md @@ -4,794 +4,116 @@ sidebar_position: 1 # Create Records -Creating [records](../../concepts/records.md) is a fundamental operation when working with any data-driven application. RushDB provides multiple ways to create records, from direct API calls to Model-based abstractions. +Three methods for writing flat records. For nested/graph data see [Import Data](./import-data.md). -This guide covers different approaches to creating records, from the most basic to more advanced patterns. - -## Overview - -The create record methods in the SDK enable you to: -- Create a single [record](../../concepts/records.md) with [properties](../../concepts/properties.md) and a [label](../../concepts/labels.md) -- Create multiple records in one operation -- Upsert records (create or update based on matching criteria) -- Control data type inference and other formatting options -- Create records with precise type control -- Create records within [transactions](../../concepts/transactions.mdx) for data consistency -- Create records using Model abstractions for type safety - -## Creating Single Records - -There are multiple ways to create records in RushDB. Let's start with the most basic approach using the direct API methods. - -### Using RushDB's `create()` Method - -The most direct way to create a record is using the API client's `records.create` method: +## `db.records.create()` ```typescript -const newAuthor = await db.records.create({ - label: 'AUTHOR', - data: { - name: 'John Doe', - email: 'john.doe@example.com' - }, - options: { - suggestTypes: true - } -}); - -console.log(newAuthor); -/* -{ - __id: 'generated_id', - __label: 'AUTHOR', - name: 'John Doe', - email: 'john.doe@example.com' -} -*/ +const movie = await db.records.create({ + label: 'MOVIE', + data: { title: 'Inception', rating: 8.8, genre: 'sci-fi' } +}) +// → DBRecordInstance { __id, __label, title, rating, genre } ``` -#### Parameters - -- `label`: The [label](../../concepts/labels.md)/type for the record -- `data`: The data for the record as a flat object -- `options` (optional): Configuration options for record creation: - - `suggestTypes` (boolean, **default: `true`**): Automatically infers data types for [properties](../../concepts/properties.md). Set to `false` to disable type inference and store all values as strings - - `castNumberArraysToVectors` (boolean, default: `false`): When true, converts numeric arrays to vector type - - `convertNumericValuesToNumbers` (boolean, default: `false`): When true, converts string numbers to number type -- `transaction` (optional): A [transaction](../../concepts/transactions.mdx) object or string to include the operation within a transaction - -:::info Default Type Inference -By default, `suggestTypes` is `true` for all write operations. RushDB automatically infers data types from your values. To disable this and store all properties as strings, explicitly set `suggestTypes: false`. -::: - -#### Returns - -- A promise that resolves to a `DBRecordInstance` containing the created [record](../../concepts/records.md) - -#### Creating Records in Transactions +### Precise type control (PropertyDraft) ```typescript -const transaction = await db.tx.begin(); -try { - const newAuthor = await db.records.create({ - label: 'AUTHOR', - data: { - name: 'Jane Smith', - email: 'jane.smith@example.com' - } - }, transaction); - - // Perform other operations... - - await transaction.commit(); - console.log(newAuthor); -} catch (error) { - await transaction.rollback(); - throw error; -} -``` - -### Property-Based Approach for Precise Type Control - -When you need precise control over property types, you can use the property-based approach by passing an array of `PropertyDraft` objects instead of a flat data object: - -```typescript -const newAuthor = await db.records.create({ - label: 'AUTHOR', +await db.records.create({ + label: 'MOVIE', data: [ - { - name: 'name', - type: 'string', - value: 'John Doe' - }, - { - name: 'age', - type: 'number', - value: 42 - }, - { - name: 'isActive', - type: 'boolean', - value: true - }, - { - name: 'tags', - type: 'string', - value: 'fiction,sci-fi,bestseller', - valueSeparator: ',' - }, - { - name: 'scores', - type: 'number', - value: '85,90,95', - valueSeparator: ',' - }, - { - name: 'joinDate', - type: 'datetime', - value: '2025-04-23T10:30:00Z' - } + { name: 'title', type: 'string', value: 'Inception' }, + { name: 'rating', type: 'number', value: 8.8 }, + { name: 'genres', type: 'string', value: 'sci-fi,thriller', valueSeparator: ',' }, + { name: 'releasedAt', type: 'datetime', value: '2010-07-16T00:00:00Z' } ] -}); - -console.log(newAuthor); -/* -{ - __id: 'generated_id', - __label: 'AUTHOR', - __proptypes: { - name: 'string', - age: 'number', - isActive: 'boolean', - tags: 'string', - scores: 'number', - joinDate: 'datetime' - }, - name: 'John Doe', - age: 42, - isActive: true, - tags: ['fiction', 'sci-fi', 'bestseller'], - scores: [85, 90, 95], - joinDate: '2025-04-23T10:30:00Z' -} -*/ -``` - -#### Property Draft Object Properties - -Each property draft object supports the following properties: - -| Property | Type | Description | -|----------|------|-------------| -| `name` | `string` | The property name | -| `type` | `string` | The data type ('string', 'number', 'boolean', 'datetime', etc.) | -| `value` | `any` | The property value | -| `valueSeparator` | `string` (optional) | Separator to split string values into arrays | - -## Creating Multiple Records - -When you need to create multiple flat records (CSV-like rows) in a single operation, use the `records.createMany` method. For nested or complex JSON, use `records.importJson`. - -### Using RushDB's `createMany()` Method (flat rows only) - -```typescript -const authors = await db.records.createMany({ - label: 'AUTHOR', - data: [ - { name: 'Alice Johnson', email: 'alice.johnson@example.com' }, - { name: 'Bob Brown', email: 'bob.brown@example.com' } - ], - options: { - suggestTypes: true - } -}); - -console.log(authors); -/* -{ - data: [ - { - __id: 'generated_id_1', - __label: 'AUTHOR', - name: 'Alice Johnson', - email: 'alice.johnson@example.com' - }, - { - __id: 'generated_id_2', - __label: 'AUTHOR', - name: 'Bob Brown', - email: 'bob.brown@example.com' - } - ], - total: 2 -} -*/ -``` - -#### Parameters - -- `label`: The [label](../../concepts/labels.md)/type for all records -- `data`: An object or array of objects, each a flat record (no nested objects/arrays) -- `options` (optional): Configuration options for record creation: - - `suggestTypes` (boolean, **default: `true`**): Automatically infers data types for [properties](../../concepts/properties.md). Set to `false` to disable type inference - - `castNumberArraysToVectors` (boolean, default: `false`): When true, converts numeric arrays to vector type - - `convertNumericValuesToNumbers` (boolean, default: `false`): When true, converts string numbers to number type - - `capitalizeLabels` (bool): When true, converts all labels to uppercase - - `relationshipType` (str): Default relationship type between nodes - - `returnResult` (bool, default: `false`): When true, returns imported records in response - - Throws if any record contains nested objects/arrays. Use `records.importJson` for that. - - ### Using RushDB's `importJson()` Method (nested JSON) - - Use `importJson` for nested objects, arrays of nested objects, or hash-map like payloads. - - Signature: - - ```ts - db.records.importJson({ data, label?: string, options?: ImportOptions }, tx?) - ``` - - Behavior: - - If `label` is provided, it's used for the import. - - If `label` is omitted, the input must be an object with a single top-level key whose name becomes the label, e.g. `{ ITEM: [ {...}, {...} ] }`. - - If `label` is omitted and the object has multiple top-level keys (e.g. `{ some: 'key', data: 1, nested: { level: 2 } }`), an error is thrown. - - Multiple top-level keys: - - Without `label`: not allowed — importJson requires a single top-level key to infer the label and will throw. - - With `label`: allowed — the provided `label` becomes the root label; the multiple keys are treated as nested structure under that root. - - If you want each top-level key to become its own label root, call `importJson` separately per key or pass single-key objects per call. - - Examples: - - OK (label inferred): - ```json - { "ITEM": [ { /*...*/ }, { /*...*/ } ] } - ``` - - OK (label inferred with object): - ```json - { "ITEM": { /*...*/ } } - ``` - - OK with explicit label (multiple top-level keys): - ```json - { "ITEM": { /*...*/ }, "PRODUCT": { /*...*/ } } - ``` - Call as: `db.records.importJson({ label: 'INVENTORY', data: { ITEM: {...}, PRODUCT: {...} } })` - - Will throw without label (multiple top-level keys): - ```json - { "ITEM": { /*...*/ }, "PRODUCT": { /*...*/ } } - ``` - - Will throw without label (mixed keys): - ```json - { "ITEM": { /*...*/ }, "notNestedProp": "12" } - ``` -- `transaction` (optional): A [transaction](../../concepts/transactions.mdx) object or string to include the operation within a transaction - -#### Returns - -- A promise that resolves to a `DBRecordsArrayInstance` containing the created [records](../../concepts/records.md) - -#### Creating Multiple Records in Transactions - -```typescript -const transaction = await db.tx.begin(); -try { - const authors = await db.records.createMany({ - label: 'AUTHOR', - data: [ - { name: 'Charlie Green', email: 'charlie.green@example.com' }, - { name: 'David Blue', email: 'david.blue@example.com' } - ] - }, transaction); - - // Perform other operations... - - await transaction.commit(); - console.log(authors); -} catch (error) { - await transaction.rollback(); - throw error; -} -``` - -## Upserting Records - -The `upsert` method provides a powerful way to create or update records in a single operation. It attempts to find an existing record based on specified properties and either creates a new one or updates the existing record according to your chosen strategy. - -### Using RushDB's `upsert()` Method - -```typescript -const product = await db.records.upsert({ - label: 'Product', - data: { - sku: 'SKU-001', - name: 'Laptop Pro', - price: 1299.99, - category: 'Electronics' - }, - options: { - mergeBy: ['sku'], - mergeStrategy: 'append', - suggestTypes: true - } -}); - -console.log(product); -/* -{ - __id: 'generated_id', - __label: 'Product', - sku: 'SKU-001', - name: 'Laptop Pro', - price: 1299.99, - category: 'Electronics' -} -*/ -``` - -#### Parameters - -- `label` (optional): The [label](../../concepts/labels.md)/type for the record -- `data`: Flat object or array of property drafts containing the record data -- `options` (optional): Configuration options for the upsert operation: - - `mergeBy` (string[], optional): Property names to match on. If empty/undefined, matches on all incoming properties - - `mergeStrategy` ('rewrite' | 'append', default: 'append'): Strategy for handling updates - - `suggestTypes` (boolean, **default: `true`**): Automatically infers data types for [properties](../../concepts/properties.md). Set to `false` to disable type inference - - `castNumberArraysToVectors` (boolean, default: `false`): Converts numeric arrays to vector type - - `convertNumericValuesToNumbers` (boolean, default: `false`): Converts string numbers to number type -- `transaction` (optional): A [transaction](../../concepts/transactions.mdx) object or string to include the operation within a transaction - -:::info Default Type Inference -By default, `suggestTypes` is `true` for all write operations including upsert. RushDB automatically infers data types from your values. To disable this and store all properties as strings, explicitly set `suggestTypes: false`. -::: - -#### Returns - -- A promise that resolves to a `DBRecordInstance` containing the created or updated [record](../../concepts/records.md) - -### Merge Strategies - -#### Append Strategy - -The `append` strategy (default) updates or adds properties while preserving existing ones: - -```typescript -// Initial create -const product = await db.records.upsert({ - label: 'Product', - data: { sku: 'SKU-001', name: 'Widget', price: 10, category: 'Tools' }, - options: { mergeBy: ['sku'], mergeStrategy: 'append', suggestTypes: true } -}); - -// Update price and add stock - name and category are preserved -const updated = await db.records.upsert({ - label: 'Product', - data: { sku: 'SKU-001', price: 15, stock: 100 }, - options: { mergeBy: ['sku'], mergeStrategy: 'append', suggestTypes: true } -}); - -console.log(updated.data); -/* -{ - sku: 'SKU-001', - name: 'Widget', // Preserved - category: 'Tools', // Preserved - price: 15, // Updated - stock: 100 // Added -} -*/ -``` - -#### Rewrite Strategy - -The `rewrite` strategy replaces all properties with the incoming data: - -```typescript -// Rewrite - removes unspecified fields -const rewritten = await db.records.upsert({ - label: 'Product', - data: { sku: 'SKU-001', name: 'New Widget', price: 20 }, - options: { mergeBy: ['sku'], mergeStrategy: 'rewrite', suggestTypes: true } -}); - -console.log(rewritten.data); -/* -{ - sku: 'SKU-001', - name: 'New Widget', - price: 20 - // category and stock are removed -} -*/ -``` - -### Common Use Cases - -#### Idempotent Data Imports - -```typescript -// Can be safely run multiple times without creating duplicates -const user = await db.records.upsert({ - label: 'User', - data: { - email: 'john@example.com', - name: 'John Doe', - lastLogin: new Date().toISOString() - }, - options: { - mergeBy: ['email'], - mergeStrategy: 'append', - suggestTypes: true - } -}); -``` - -#### Multi-Tenant Applications - -```typescript -// Match on both tenant and entity identifiers -const setting = await db.records.upsert({ - label: 'Setting', - data: { - tenantId: 'tenant-123', - userId: 'user-456', - theme: 'dark', - notifications: true - }, - options: { - mergeBy: ['tenantId', 'userId'], - mergeStrategy: 'append', - suggestTypes: true - } -}); -``` - -#### Configuration Management - -```typescript -// Update configuration by key -const config = await db.records.upsert({ - label: 'Config', - data: { - key: 'api_timeout', - value: 30000, - updatedAt: new Date().toISOString() - }, - options: { - mergeBy: ['key'], - mergeStrategy: 'append', - suggestTypes: true - } -}); +}) ``` -#### Inventory Updates - -```typescript -// Update stock while preserving product details -const inventory = await db.records.upsert({ - label: 'Product', - data: { - productCode: 'PROD-789', - stock: 50, - lastRestocked: new Date().toISOString() - }, - options: { - mergeBy: ['productCode'], - mergeStrategy: 'append', - suggestTypes: true - } -}); -``` +| PropertyDraft field | Type | Description | +|---|---|---| +| `name` | `string` | Property name | +| `type` | `string` | `string` · `number` · `boolean` · `datetime` · `null` · `vector` | +| `value` | any | The value | +| `valueSeparator` | `string` | Split `value` string into an array on this separator | -### Matching Behavior - -#### With Specific MergeBy Fields - -When `mergeBy` contains specific field names, only those fields are used for matching: - -```typescript -// Matches only on 'email' -const user = await db.records.upsert({ - label: 'User', - data: { email: 'user@example.com', name: 'John', age: 30 }, - options: { mergeBy: ['email'], mergeStrategy: 'append' } -}); -``` - -#### Without MergeBy (All Properties Match) - -When `mergeBy` is empty or undefined, matching is performed on all incoming properties: - -```typescript -// Matches only if ALL properties (email, name, age) match exactly -const user = await db.records.upsert({ - label: 'User', - data: { email: 'user@example.com', name: 'John', age: 30 }, - options: { mergeStrategy: 'append' } -}); - -// This would create a new record (age doesn't match) -const different = await db.records.upsert({ - label: 'User', - data: { email: 'user@example.com', name: 'John', age: 31 }, - options: { mergeStrategy: 'append' } -}); -``` +## `db.records.createMany()` -### Using with Transactions +Flat rows only — no nested objects. For nested data use [`importJson`](./import-data.md). ```typescript -const transaction = await db.tx.begin(); -try { - const product = await db.records.upsert({ - label: 'Product', - data: { sku: 'SKU-001', name: 'Widget', price: 10 }, - options: { mergeBy: ['sku'], mergeStrategy: 'append' } - }, transaction); - - const inventory = await db.records.upsert({ - label: 'Inventory', - data: { productSku: 'SKU-001', quantity: 100, warehouse: 'A' }, - options: { mergeBy: ['productSku', 'warehouse'], mergeStrategy: 'append' } - }, transaction); - - await transaction.commit(); -} catch (error) { - await transaction.rollback(); - throw error; -} -``` - -### Property-Based Upsert - -For precise type control, you can use property drafts: - -```typescript -const record = await db.records.upsert({ - label: 'Product', +const result = await db.records.createMany({ + label: 'ACTOR', data: [ - { name: 'sku', type: 'string', value: 'SKU-001' }, - { name: 'price', type: 'number', value: 99.99 }, - { name: 'tags', type: 'string', value: 'electronics,sale', valueSeparator: ',' }, - { name: 'inStock', type: 'boolean', value: true } - ], - options: { - mergeBy: ['sku'], - mergeStrategy: 'append' - } -}); + { name: 'Leonardo DiCaprio', country: 'USA' }, + { name: 'Ken Watanabe', country: 'Japan' } + ] +}) +// → DBRecordsArrayInstance { data: [...], total: 2 } ``` -### Best Practices for Upsert - -1. **Choose Appropriate MergeBy Fields** - - Use fields that uniquely identify your records (like `email`, `sku`, `userId`) - - Consider multi-field matching for multi-tenant scenarios +## `db.records.upsert()` -2. **Select the Right Strategy** - - Use `append` when you want to preserve existing data and only update specific fields - - Use `rewrite` when you need a complete replacement of the record - -3. **Use with Transactions for Related Updates** - - Combine multiple upserts in a [transaction](../../concepts/transactions.mdx) to ensure atomicity - - Roll back if any operation fails - -4. **Handle Edge Cases** - - Be aware that empty `mergeBy` means matching on all properties - - Consider performance implications when matching on many fields - -5. **Idempotent Operations** - - Upsert is ideal for data synchronization and import operations - - Safely re-run operations without creating duplicates - -## Creating Records with Models - -The recommended approach for structured applications is to use RushDB's [Models](../models.md). Models provide type safety, validation, and a more intuitive API for working with records. - -We'll use the following model definitions for these examples: +Create-or-update based on matching criteria. ```typescript -const AuthorRepo = new Model('author', { - name: { type: 'string' }, - email: { type: 'string', unique: true } -}); +// Match on 'title'; update rating if found, create if not +const movie = await db.records.upsert({ + label: 'MOVIE', + data: { title: 'Inception', rating: 9.0, genre: 'sci-fi' }, + options: { mergeBy: ['title'], mergeStrategy: 'append' } +}) ``` -### Using Model's `create` Method +### Merge strategies -The `create` method on a model creates a single record. +| Strategy | Behaviour | +|---|---| +| `append` (default) | Add / update incoming fields; preserve all other existing fields | +| `rewrite` | Replace all fields with incoming data; unmentioned fields are removed | -#### Signature -```typescript -create( - record: InferSchemaTypesWrite, - transaction?: Transaction | string -): Promise>; -``` +### `mergeBy` behaviour -#### Parameters +| `mergeBy` value | Match behaviour | +|---|---| +| `['field']` | Match only on listed fields | +| `[]` or omitted | Match on ALL incoming property keys | -- `record`: An object that adheres to the schema defined for the model -- `transaction` (optional): A [transaction](../../concepts/transactions.mdx) object or string to include the operation within a transaction +## Options -#### Returns +All three methods accept the same `options` object: -- A promise that resolves to a `DBRecordInstance` containing the created [record](../../concepts/records.md) +| Option | Default | Description | +|---|---|---| +| `suggestTypes` | `true` | Infer types automatically | +| `convertNumericValuesToNumbers` | `false` | Convert string numbers to number type | +| `capitalizeLabels` | `false` | Uppercase all inferred label names | +| `relationshipType` | `__RUSHDB__RELATION__DEFAULT__` | Relationship type used for nested links | +| `returnResult` | `false` | Return created records in the response | +| `mergeBy` | — | Fields to match on for upsert | +| `mergeStrategy` | `append` | `append` or `rewrite` | -#### Example +## In a transaction ```typescript -const newAuthor = await AuthorRepo.create({ - name: 'John Doe', - email: 'john.doe@example.com' -}); - -console.log(newAuthor); -/* -{ - data: { - __id: 'generated_id', - __label: 'author', - name: 'John Doe', - email: 'john.doe@example.com' - } -} -*/ -``` - -#### Using with Transactions - -```typescript -const transaction = await db.tx.begin(); +const tx = await db.tx.begin() try { - const newAuthor = await AuthorRepo.create({ - name: 'Jane Smith', - email: 'jane.smith@example.com' - }, transaction); - - // Perform other operations... - - await transaction.commit(); - console.log(newAuthor); -} catch (error) { - await transaction.rollback(); - throw error; -} -``` - -### Using Model's `createMany` Method - -The `createMany` method on a model creates multiple records in a single operation. - -#### Signature -```typescript -createMany( - records: Array>, - transaction?: Transaction | string -): Promise>; -``` - -#### Parameters - -- `records`: An array of objects, each adhering to the schema defined for the model -- `transaction` (optional): A transaction object or string to include the operation within a transaction - -#### Returns - -- A promise that resolves to a `DBRecordsArrayInstance` containing the created records - -#### Example - -```typescript -const authors = await AuthorRepo.createMany([ - { name: 'Alice Johnson', email: 'alice.johnson@example.com' }, - { name: 'Bob Brown', email: 'bob.brown@example.com' } -]); - -console.log(authors); -/* -{ - data: [ - { - __id: 'generated_id_1', - __label: 'author', - name: 'Alice Johnson', - email: 'alice.johnson@example.com' - }, - { - __id: 'generated_id_2', - __label: 'author', - name: 'Bob Brown', - email: 'bob.brown@example.com' - } - ], - total: 2 + const movie = await db.records.create({ label: 'MOVIE', data: { title: 'Dune' } }, tx) + const actor = await db.records.create({ label: 'ACTOR', data: { name: 'Timothée Chalamet' } }, tx) + await db.records.attach({ source: movie, target: actor, options: { type: 'STARS' } }, tx) + await tx.commit() +} catch (e) { + await tx.rollback() + throw e } -*/ ``` -#### Using with Transactions +## Via Model ```typescript -const transaction = await db.tx.begin(); -try { - const authors = await AuthorRepo.createMany([ - { name: 'Charlie Green', email: 'charlie.green@example.com' }, - { name: 'David Blue', email: 'david.blue@example.com' } - ], transaction); +const MovieModel = new Model('MOVIE', { title: { type: 'string' }, rating: { type: 'number' } }) - // Perform other operations... - - await transaction.commit(); - console.log(authors); -} catch (error) { - await transaction.rollback(); - throw error; -} +const movie = await MovieModel.create({ title: 'Inception', rating: 8.8 }) +const movies = await MovieModel.createMany([{ title: 'Dune' }, { title: 'Arrival' }]) ``` -## Best Practices for Creating Records - -1. **Use Models for Structured Applications** - - Models provide type safety, validation, and better organization - - They enforce schema consistency across your application - -2. **Use Transactions for Related Operations** - - When creating multiple records that are related, use [transactions](../../concepts/transactions.mdx) - - Transactions ensure data consistency and allow rollback if operations fail - -3. **Handle Uniqueness Constraints** - - Models automatically check uniqueness before creating records - - Handle `UniquenessError` exceptions appropriately - -4. **Leverage Batch Operations** - - Use `createMany` for better performance when creating multiple records - - It minimizes network requests and database overhead - -5. **Consider Default Values** - - Define default values in your schema to reduce repetitive code - - Default values can be static or derived from functions (like timestamps) - -6. **Choose the Right Data Type Control Approach** - - Use the flat object approach for most cases where automatic type inference is sufficient - - Use the property-based approach with `PropertyDraft` objects when you need precise control over types - -## Data Type Handling - -RushDB supports the following property types: - -- `string`: Text values -- `number`: Numeric values -- `boolean`: True/false values -- `null`: Null values -- `datetime`: ISO8601 format strings (e.g., "2025-04-23T10:30:00Z") -- `vector`: Arrays of numbers (when `castNumberArraysToVectors` is true) - -### Automatic Type Inference - -**By default, `suggestTypes` is set to `true` for all write operations** (create, createMany, upsert, importJson). This means RushDB automatically infers data types from your values: -- Numeric values become `number` type -- `true`/`false` become `boolean` type -- ISO8601 strings become `datetime` type -- `null` becomes `null` type -- All other values become `string` type - -To disable automatic type inference and store all values as strings, you must **explicitly set `suggestTypes: false`** in your options. - -### Additional Type Conversions - -When `convertNumericValuesToNumbers` is enabled, string values that represent numbers (e.g., '30') will be converted to their numeric equivalents (e.g., 30). - -When `castNumberArraysToVectors` is enabled, numeric arrays will be stored as `vector` type instead of `number` arrays. - -For more complex data import operations, refer to the [Import Data](./import-data.md) documentation. - -## Conclusion - -Creating records in RushDB can be done through direct API calls or through the Model abstraction. While direct API calls offer flexibility for dynamic or ad-hoc operations, using Models is recommended for most applications due to their type safety, validation capabilities, and more intuitive API. - -For more advanced record operations, see the other guides in this section: -- [Get Records](./get-records.md) - Retrieve records from the database -- [Update Records](./update-records.md) - Modify existing records -- [Delete Records](./delete-records.md) - Remove records from the database -- [Import Data](./import-data.md) - Import data in bulk - diff --git a/docs/docs/typescript-sdk/records/delete-records.md b/docs/docs/typescript-sdk/records/delete-records.md index a13de652..18f699b5 100644 --- a/docs/docs/typescript-sdk/records/delete-records.md +++ b/docs/docs/typescript-sdk/records/delete-records.md @@ -4,218 +4,52 @@ sidebar_position: 7 # Delete Records -RushDB provides flexible APIs for deleting records from your database. This capability lets you remove individual records by ID or delete multiple records at once using search query filters. - -## Overview - -The delete endpoints allow you to: -- Delete a single record or multiple records by ID using `deleteById` -- Delete records using search queries with the `delete` method -- Delete records directly from record instances -- Perform atomic deletions using transactions -- Safely remove records with proper authentication - -All delete operations require authentication using a bearer token and handle relationships appropriately. Deletion operations can also be performed within transactions for atomic operations. - -## Delete a Single Record by ID +## `db.records.deleteById()` ```typescript -// Delete a single record by ID -await db.records.deleteById('record-id-here'); -``` - -This method deletes a single record identified by its unique ID. +// Single record +await db.records.deleteById('movie-id-123') -### Parameters - -| Parameter | Type | Description | -|-----------|--------|-------------| -| `idOrIds` | `String` or `Array` | The unique identifier of the record to delete, or an array of IDs | -| `transaction` | `Transaction` or `String` | Optional transaction for atomic operations | - -### Example - -```typescript -// Delete a specific record -try { - const response = await db.records.deleteById('018e4c71-5f20-7db2-b0b1-e7e681542af9'); - if (response.success) { - console.log('Record deleted successfully'); - } -} catch (error) { - console.error('Failed to delete record:', error); -} - -// Delete multiple records by their IDs -try { - const response = await db.records.deleteById([ - '018e4c71-5f20-7db2-b0b1-e7e681542af9', - '018e4c71-5f20-7db2-b0b1-e7e681542af8' - ]); - if (response.success) { - console.log('Records deleted successfully'); - } -} catch (error) { - console.error('Failed to delete records:', error); -} - -// Delete within a transaction -const tx = await db.tx.begin(); -try { - await db.records.deleteById('018e4c71-5f20-7db2-b0b1-e7e681542af9', tx); - await db.tx.commit(tx); - console.log('Record deleted successfully in transaction'); -} catch (error) { - await db.tx.rollback(tx); - console.error('Transaction failed:', error); -} +// Multiple records +await db.records.deleteById(['id-1', 'id-2', 'id-3']) ``` -## Delete Records Using a Search Query - -```typescript -// Delete records using search query -await db.records.delete( - { - where: { /* search conditions */ } - }, - transaction // optional -); -``` - -This method deletes records that match the specified search criteria. - -### Parameters +All relationships attached to deleted records are removed automatically. -| Parameter | Type | Description | -|-----------|------|-------------| -| `searchQuery` | `SearchQuery` | Query to identify records to delete | -| `transaction` | `Transaction` or `String` | Optional transaction for atomic operations | +## `db.records.delete()` -Note: Using an empty `where` clause without allowing force delete will throw an `EmptyTargetError`. - -You can use search parameters to define which records to delete: - -| SearchQuery Field | Type | Description | -|-------------------|----------|--------------------------------------------------------------------------------------------| -| `where` | `Object` | Filter conditions for records to delete ([learn more](../../concepts/search/where)) | -| `labels` | `Array` | Optional array of labels to filter records by ([learn more](../../concepts/search/labels)) | -| `limit` | `Number` | Maximum number of records to delete (optional) | - -### Example +Delete all records matching a search query. ```typescript -// Delete all users with age under 18 -try { - const response = await db.records.delete({ - where: { - label: 'USER', - age: { $lt: 18 } - } - }); - if (response.success) { - console.log(response.data.message); // Displays success message with deletion count - } -} catch (error) { - console.error('Failed to delete records:', error); -} - -// Delete inactive products in a specific category -try { - const response = await db.records.delete({ - where: { - label: 'PRODUCT', - category: 'electronics', - isActive: false - } - }); - if (response.success) { - console.log(response.data.message); - } -} catch (error) { - console.error('Failed to delete records:', error); -} +// Delete all sci-fi movies with low ratings +await db.records.delete({ + labels: ['MOVIE'], + where: { genre: 'sci-fi', rating: { $lt: 5 } } +}) ``` -## Bulk Deletion with Complex Queries +:::warning +An empty `where` without `allowForceDelete: true` in the SDK config throws `EmptyTargetError`. +::: -For more advanced deletion scenarios, you can use the full power of RushDB's search query system: +## In a transaction ```typescript -// Delete records with complex criteria +const tx = await db.tx.begin() try { - const response = await db.records.delete({ - where: { - $or: [ - { status: 'archived', lastModified: { $lt: '2024-01-01' } }, - { status: 'deleted', isTemporary: true } - ] - }, - labels: ['DOCUMENT', 'ATTACHMENT'], - limit: 1000 // Optional: limit the number of records deleted - }); - console.log(`${response.data.message}`); -} catch (error) { - console.error('Bulk deletion failed:', error); + await db.records.deleteById('movie-id-123', tx) + await db.records.delete({ labels: ['ACTOR'], where: { country: 'temp' } }, tx) + await tx.commit() +} catch (e) { + await tx.rollback(); throw e } ``` -## Deleting Records from a Record Instance - -If you already have a record instance, you can delete it directly: +## Via Model ```typescript -// Find a record first -const record = await db.records.findById('018e4c71-5f20-7db2-b0b1-e7e681542af9'); - -// Then delete it -try { - const response = await record.delete(); - if (response.success) { - console.log('Record deleted successfully'); - } -} catch (error) { - console.error('Failed to delete record:', error); -} +const MovieModel = new Model('MOVIE', { title: { type: 'string' } }) -// With a transaction -const tx = await db.tx.begin(); -try { - await record.delete(tx); - await db.tx.commit(tx); - console.log('Record deleted successfully within transaction'); -} catch (error) { - await db.tx.rollback(tx); - console.error('Transaction failed:', error); -} +await MovieModel.deleteById(['id-1', 'id-2']) +await MovieModel.delete({ where: { genre: 'temp' } }) ``` - -## Handling Relationships - -When deleting records, all relationships associated with those records are automatically deleted. This ensures database integrity and prevents orphaned relationships. - -## Safety Features and Transactions - -RushDB implements several safeguards for delete operations: - -1. **Authentication**: All delete operations require a valid authentication token -2. **Authorization**: Users can only delete records in projects they have access to -3. **Validation**: Input data is validated before processing -4. **Transactions**: Delete operations can be wrapped in transactions for data consistency -5. **Partial Failure Handling**: If a deletion affects multiple records and some operations fail, all changes are rolled back when using transactions -6. **Empty Query Protection**: The API prevents accidental deletion of all records by requiring explicit configuration to allow force deletion with empty `where` clauses - -## Performance Considerations - -- For large-scale deletions, RushDB processes operations in batches -- Complex query conditions may increase processing time -- Consider using [label filtering](../../concepts/search/labels) to narrow down records before deletion -- For very large datasets, use pagination in combination with delete operations - -## Related Documentation - -- [Search Introduction](../../concepts/search/introduction) -- [Where Clause](../../concepts/search/where) -- [Labels](../../concepts/search/labels) -- [Record Relationships](../../concepts/relationships) -- [Transactions](../../concepts/transactions.mdx) diff --git a/docs/docs/typescript-sdk/records/get-records.md b/docs/docs/typescript-sdk/records/get-records.md index e2843c46..6e9932eb 100644 --- a/docs/docs/typescript-sdk/records/get-records.md +++ b/docs/docs/typescript-sdk/records/get-records.md @@ -2,803 +2,215 @@ sidebar_position: 5 --- -# Get Records +# Get Records -RushDB provides flexible TypeScript SDK methods for retrieving records from your database. The Search API is one of the most powerful features of RushDB, allowing you to find records, navigate relationships, and transform results to exactly match your application's needs. +RushDB provides four read methods: look up by ID, find one, find unique, or run a full search query. -## Overview - -The record retrieval and search methods in the SDK enable you to: -- Get a single record by its ID -- Find a single record that matches specific criteria -- Find records that match complex queries with filtering, sorting, and pagination -- Traverse relationships between records -- Perform vector similarity searches -- Retrieve records with related data -- Transform and aggregate search results - -## Get Single Records - -RushDB provides several methods for retrieving individual records, whether you know their ID or need to find them using search criteria. - -### Get a Record by ID with `findById()` - -When you already know the unique identifier of the record you need: +## `db.records.findById()` ```typescript -// Get a single record by ID -const user = await db.records.findById('user-123'); - -// Get multiple records by their IDs -const users = await db.records.findById(['user-123', 'user-456', 'user-789']); +const movie = await db.records.findById('movie-id-123') +const movies = await db.records.findById(['id-1', 'id-2', 'id-3']) ``` -This method retrieves one or more records identified by their unique IDs. +Returns `DBRecordInstance` (single) or `DBRecordsArrayInstance` (array). -#### Parameters +## `db.records.findOne()` -| Parameter | Type | Description | -|-----------|--------|-------------| -| `idOrIds` | `String` or `Array` | The unique identifier(s) of the record(s) to retrieve | -| `transaction` | `Transaction` or `String` | Optional transaction for atomic operations | - -#### Examples +Returns the first matching record, or `null` if none found. ```typescript -// Retrieve a single record -try { - const person = await db.records.findById('018e4c71-5f20-7db2-b0b1-e7e681542af9'); - console.log(`Found ${person.label()} with name: ${person.data.name}`); -} catch (error) { - console.error('Failed to retrieve record:', error); -} - -// Retrieve multiple records by ID -try { - const employees = await db.records.findById([ - '018e4c71-5f20-7db2-b0b1-e7e681542af9', - '018e4c71-5f20-7db2-b0b1-e7e681542af8' - ]); - console.log(`Found ${employees.data.length} records`); -} catch (error) { - console.error('Failed to retrieve records:', error); -} - -// Using with a transaction -const tx = await db.tx.begin(); -try { - const record = await db.records.findById('018e4c71-5f20-7db2-b0b1-e7e681542af9', tx); - // Use the record - await tx.commit(); -} catch (error) { - await tx.rollback(); - console.error('Transaction failed:', error); -} +const movie = await db.records.findOne({ + labels: ['MOVIE'], + where: { title: 'Inception' } +}) ``` -### Find a Single Record with `findOne()` +## `db.records.findUniq()` -When you need to find a record that matches specific criteria: +Like `findOne` but throws `NonUniqueResultError` if more than one record matches. ```typescript -const user = await db.records.findOne({ - labels: ["USER"], - where: { - email: "jane@example.com" - } -}); -``` +import { NonUniqueResultError } from '@rushdb/javascript-sdk' -This method returns a single record that matches your query parameters, or null if no match is found. - -#### Parameters - -| Parameter | Type | Description | -|-----------|------|-------------| -| `searchQuery` | `SearchQuery` | Query object with filters to match records | -| `transaction` | `Transaction` or `String` | Optional transaction for atomic operations | - -#### Example - -```typescript try { - const user = await db.records.findOne({ - labels: ["USER"], - where: { - email: "jane@example.com" - } - }); - - if (user.data) { - console.log(`Found user: ${user.data.name}`); - } else { - console.log("User not found"); - } -} catch (error) { - console.error('Error searching for user:', error); -} -``` - -### Find a Unique Record with `findUniq()` - -When you expect exactly one matching record and want to ensure uniqueness: - -```typescript -try { - const user = await db.records.findUniq({ - labels: ["USER"], - where: { - email: "jane@example.com" // Assuming email is a unique field - } - }); -} catch (error) { - if (error instanceof NonUniqueResultError) { - console.error(`Expected one result but found multiple matches`); - } else { - console.error('Error searching for user:', error); - } -} -``` - -This method throws a `NonUniqueResultError` if more than one record matches your criteria. This is useful when querying fields that should be unique. - -#### Parameters - -| Parameter | Type | Description | -|-----------|------|-------------| -| `searchQuery` | `SearchQuery` | Query object with filters to match records | -| `transaction` | `Transaction` or `String` | Optional transaction for atomic operations | - -#### Example with Error Handling - -```typescript -try { - const user = await db.records.findUniq({ - labels: ["USER"], - where: { - email: "jane@example.com" - } - }); - - if (user.data) { - console.log(`Found unique user: ${user.data.name}`); - } -} catch (error) { - if (error instanceof NonUniqueResultError) { - console.error(`Expected one result but found ${error.count} matches`); - } else { - console.error('Error searching for user:', error); - } + const movie = await db.records.findUniq({ + labels: ['MOVIE'], + where: { title: 'Inception' } + }) +} catch (e) { + if (e instanceof NonUniqueResultError) console.error(`found ${e.count} matches`) } ``` -## Search for Multiple Records - -### Basic Searching with `find()` - -The most versatile search method is `find()`, which accepts a SearchQuery object to filter, sort, and paginate results. - -```typescript -// Basic search for records with the "USER" label -const result = await db.records.find({ - labels: ["USER"], - where: { - isActive: true - }, - limit: 10, - orderBy: { createdAt: "desc" } -}); - -// Access the returned records -const users = result.data; -console.log(`Found ${result.total} total users`); -``` - -This method searches for records that match the specified criteria, with support for filtering, pagination, and sorting. - -### Parameters - -| Field | Type | Description | -|-----------|------------------|-------------------------------------------------------------------------------------------------| -| `where` | `Object` | Filter conditions for records ([learn more](../../concepts/search/where)) | -| `orderBy` | `String` or `Object` | Sorting criteria ([learn more](../../concepts/search/pagination-order)) | -| `skip` | `Number` | Number of records to skip for pagination ([learn more](../../concepts/search/pagination-order)) | -| `limit` | `Number` | Maximum number of records to return (default: 1000) | -| `labels` | `Array` | Optional array of labels to filter records by ([learn more](../../concepts/search/labels)) | -| `aggregate` | `Object` | Optional aggregations map ([learn more](../../concepts/search/aggregations)) | -| `groupBy` | `Array` | Optional grouping keys (e.g. `["$record.status"]`) used with aggregations | +## `db.records.find()` -### Return Value - -The find method returns an object containing: -- `data`: An array of record instances matching the query -- `total`: The total number of records that match the query (before pagination) - -### Examples - -**Basic Search** - -```typescript -// Find all active users sorted by name -const result = await db.records.find({ - where: { - isActive: true - }, - labels: ["USER"], - orderBy: { name: 'asc' }, - limit: 50 -}); - -console.log(`Found ${result.total} active users, showing first ${result.data.length}`); -``` - -**Advanced Filtering** +Full search with filtering, sorting, and pagination. ```typescript -// Find products with specific criteria -const results = await db.records.find({ - labels: ["PRODUCT"], - where: { - $or: [ - { status: 'in_stock', price: { $lt: 100 } }, - { status: 'pre_order', releaseDate: { $lt: '2025-06-01' } } - ] - }, - orderBy: [ - { popularity: 'desc' }, - { price: 'asc' } - ], - limit: 20 -}); +const { data: movies, total } = await db.records.find({ + labels: ['MOVIE'], + where: { rating: { $gte: 8 }, genre: 'sci-fi' }, + orderBy: { rating: 'desc' }, + limit: 20, + skip: 0 +}) ``` -Search queries support a powerful and flexible syntax for filtering records. For a detailed explanation of all the available operators and capabilities, see the [Where clause documentation](../../concepts/search/where). +### SearchQuery parameters -## Advanced Search Features +| Field | Type | Description | +|---|---|---| +| `labels` | `string[]` | Filter by one or more labels | +| `where` | `object` | Filter conditions ([docs](../../concepts/search/where)) | +| `orderBy` | `string \| object` | Sort criteria ([docs](../../concepts/search/pagination-order)) | +| `limit` | `number` | Max records to return (default: 1000) | +| `skip` | `number` | Records to skip for pagination | +| `aggregate` | `object` | Aggregation map ([docs](../../concepts/search/aggregations)) | +| `groupBy` | `string[]` | Grouping keys, e.g. `['$record.genre']` | -### Relationship Traversal +## Relationship traversal -One of RushDB's most powerful features is the ability to search across relationships between records: +Filter across graph edges inline with `where`: ```typescript -// Find all blog posts by users who work at tech companies -const techBloggers = await db.records.find({ - labels: ["POST"], +// Movies where at least one actor is from the USA +const { data } = await db.records.find({ + labels: ['MOVIE'], where: { - USER: { // Traverse to related USER records - COMPANY: { // Traverse to related COMPANY records - industry: "Technology" - } - }, - publishedAt: { $lte: new Date() } // Only published posts - }, - orderBy: { publishedAt: "desc" }, - limit: 20 -}); -``` - -For more complex relationship queries, you can specify relationship types and directions: - -```typescript -// Find users who follow specific topics -const users = await db.records.find({ - labels: ["USER"], - where: { - TOPIC: { - $relation: { - type: "FOLLOWS", - direction: "out" // User -> FOLLOWS -> Topic - }, - name: { $in: ["TypeScript", "GraphDB", "RushDB"] } - } + ACTOR: { country: 'USA' } } -}); -``` - -See the [Where clause documentation](../../concepts/search/where#relationship-queries) for more details on relationship queries. +}) -### Vector Search - -RushDB supports vector similarity searches for AI and machine learning applications: - -```typescript -// Find documents similar to a query embedding -const similarDocuments = await db.records.find({ - labels: ["DOCUMENT"], +// With explicit relation type and direction +const { data: films } = await db.records.find({ + labels: ['MOVIE'], where: { - embedding: { - $vector: { - fn: "gds.similarity.cosine", // Similarity function - query: queryEmbedding, // Your vector embedding - threshold: { $gte: 0.75 } // Minimum similarity threshold - } + DIRECTOR: { + $relation: { type: 'DIRECTED_BY', direction: 'out' }, + name: { $contains: 'Nolan' } } - }, - limit: 10 -}); -``` - -See the [Vector operators documentation](../../concepts/search/where#vector-operators) for more details on vector search capabilities. - -### Field Existence and Type Checking - -RushDB provides operators to check for field existence and data types, which is particularly useful when working with heterogeneous data: - -```typescript -// Find users who have provided an email but not a phone number -const emailOnlyUsers = await db.records.find({ - labels: ["USER"], - where: { - $and: [ - { email: { $exists: true } }, // Must have email - { phoneNumber: { $exists: false } } // Must not have phone number - ] - } -}); - -// Find records where age is actually stored as a number (not string) -const properAgeRecords = await db.records.find({ - labels: ["USER"], - where: { - age: { $type: "number" } } -}); - -// Complex query combining type and existence checks -const validProfiles = await db.records.find({ - labels: ["PROFILE"], - where: { - $and: [ - { bio: { $type: "string" } }, // Bio must be text - { bio: { $contains: "developer" } }, // Bio mentions developer - { skills: { $exists: true } }, // Skills must exist - { avatar: { $exists: false } } // No avatar uploaded yet - ] - } -}); +}) ``` -The `$exists` operator is useful for: -- Data validation and cleanup -- Finding incomplete profiles -- Filtering by optional fields +See [Where clause docs](../../concepts/search/where#relationship-queries) for full syntax. -The `$type` operator is useful for: -- Working with imported data that might have inconsistent types -- Validating data integrity -- Ensuring type consistency before operations +## Field operators -See the [Field existence operators documentation](../../concepts/search/where#field-existence-operator) for more details. +```typescript +// Numeric range +where: { rating: { $gte: 8, $lte: 9.5 } } -### Pagination and Sorting +// Set membership +where: { genre: { $in: ['sci-fi', 'thriller'] } } -Control the order and volume of results: +// Text +where: { title: { $contains: 'dark' } } -```typescript -// Get the second page of results (20 items per page) -const page2 = await db.records.find({ - labels: ["PRODUCT"], - where: { - category: "Electronics" - }, - skip: 20, // Skip the first 20 results - limit: 20, // Return 20 results - orderBy: { - price: "asc" // Sort by price ascending - } -}); +// Existence / type checks +where: { $and: [{ poster: { $exists: true } }, { rating: { $type: 'number' } }] } -// Get total number of results for pagination UI -const totalProducts = page2.total; +// Logical +where: { $or: [{ genre: 'sci-fi' }, { rating: { $gte: 9 } }] } ``` -For more details on pagination and sorting options, see the [Pagination and ordering documentation](../../concepts/search/pagination-order). - -### Aggregations +Full operator reference: [Where clause docs](../../concepts/search/where). -Transform and aggregate your search results: +## Aggregations ```typescript -// Calculate comapany statis by employees and salaries -const companySalaryStats = await db.records.find({ - labels: ['COMPANY'], - where: { - EMPLOYEE: { - $alias: '$employee', // Define alias for employee records - salary: { - $gte: 50000 // Filter employees by salary - } - } - }, +const stats = await db.records.find({ + labels: ['MOVIE'], + where: { ACTOR: { $alias: '$actor', country: 'USA' } }, aggregate: { - // Use field directly from record - companyName: '$record.name', - - // Count unique employees using the defined alias - employeesCount: { - fn: 'count', - unique: true, - alias: '$employee' - }, - - // Calculate total salary using the defined alias - totalWage: { - fn: 'sum', - field: 'salary', - alias: '$employee' - }, - - // Collect unique employees names - employeeNames: { - fn: 'collect', - field: 'name', - alias: '$employee' - }, - - // Get average salary with precision - avgSalary: { - fn: 'avg', - field: 'salary', - alias: '$employee', - precision: 0 - }, - - // Get min and max salary - minSalary: { - fn: 'min', - field: 'salary', - alias: '$employee' - }, - maxSalary: { - fn: 'max', - field: 'salary', - alias: '$employee' - } + title: '$record.title', + actorCount: { fn: 'count', unique: true, alias: '$actor' }, + avgRating: { fn: 'avg', field: 'rating', alias: '$record', precision: 1 }, + actorNames: { fn: 'collect',field: 'name', alias: '$actor' } } -}); +}) ``` -For comprehensive details on available aggregation functions and usage, see the [Aggregations documentation](../../concepts/search/aggregations). - -### Grouping Results (groupBy) - -`groupBy` lets you pivot / summarize records. Each key references an alias plus a property. The root alias is `$record`. +:::danger Do not set `limit` when using `aggregate` — it cuts the scan and returns mathematically incorrect totals. Use `orderBy` on an aggregated key instead. +::: -Full reference & advanced patterns: [Grouping guide](../../concepts/search/group-by) - -Basic grouping: -```typescript -const byStage = await db.records.find({ - labels: ['HS_DEAL'], - aggregate: { - count: { fn: 'count', alias: '$record' }, - avgAmount: { fn: 'avg', field: 'amount', alias: '$record' } - }, - groupBy: ['$record.dealstage'], - orderBy: { count: 'desc' } -}); -// byStage.data example: [{ dealstage: 'prospecting', count: 120, avgAmount: 3400 }, ...] -``` +### GroupBy -Grouping by related alias: ```typescript -const byDepartment = await db.records.find({ - labels: ['DEPARTMENT'], - where: { PROJECT: { $alias: '$project' } }, +const byGenre = await db.records.find({ + labels: ['MOVIE'], aggregate: { - projectCount: { fn: 'count', alias: '$project' }, - projects: { fn: 'collect', field: 'name', alias: '$project', unique: true } + count: { fn: 'count', alias: '$record' }, + avgRating: { fn: 'avg', field: 'rating', alias: '$record', precision: 1 } }, - groupBy: ['$record.name'], - orderBy: { projectCount: 'desc' } -}); -``` - -Multiple group keys (pivot style): -```typescript -const pivot = await db.records.find({ - labels: ['PROJECT'], - aggregate: { count: { fn: 'count', alias: '$record' } }, - groupBy: ['$record.category', '$record.active'], + groupBy: ['$record.genre'], orderBy: { count: 'desc' } -}); +}) +// [{ genre: 'sci-fi', count: 42, avgRating: 7.9 }, ...] ``` -Notes: -* Requires at least one aggregation. -* Output rows contain group fields + aggregated fields (no raw record body unless grouped / aggregated). -* `collect` is unique by default; set `unique: false` to allow duplicates. -* For hierarchical drill-down: group at parent, use nested `collect` for children instead of adding children to `groupBy`. +Full reference: [Aggregations](../../concepts/search/aggregations) · [Grouping](../../concepts/search/group-by) -## Model-Based Search - -If you're using RushDB's Model system (recommended), you get the same powerful search capabilities with additional type safety and convenience. - -### Searching with Models - -Models provide type-safe search methods that understand your data structure: +## TimeBucket (time-series) ```typescript -// Define your model -const UserModel = new Model('USER', { - email: { type: 'string', unique: true }, - name: { type: 'string' }, - age: { type: 'number' }, - isActive: { type: 'boolean', default: true } -}); - -// Search using the model -const activeUsers = await UserModel.find({ - where: { - age: { $gte: 21 }, - isActive: true +const daily = await db.records.find({ + labels: ['ORDER'], + aggregate: { + day: { fn: 'timeBucket', field: 'createdAt', granularity: 'day', alias: '$record' }, + count: { fn: 'count', alias: '$record' } }, - orderBy: { name: "asc" } -}); - -// TypeScript provides full type safety for your results -const firstUser = activeUsers.data[0]; -const userName: string = firstUser.name; // Correctly typed as string + groupBy: ['day'], + orderBy: { day: 'asc' } +}) ``` -### Model Search Methods +`granularity` values: `day` · `week` · `month` · `quarter` · `year` · `hours` · `minutes` · `seconds` (use plural + `size` for custom window widths). -Models provide the same search methods as direct record search, but with label pre-filled: +## Nested collect (hierarchical results) ```typescript -// Find all matching records -const users = await UserModel.find({ - where: { isActive: true } -}); - -// Find a single record -const jane = await UserModel.findOne({ - where: { email: "jane@example.com" } -}); - -// Find by ID -const user = await UserModel.findById("user-123"); - -// Find a unique record -const uniqueUser = await UserModel.findUniq({ - where: { email: "unique@example.com" } -}); +const tree = await db.records.find({ + labels: ['MOVIE'], + where: { ACTOR: { $alias: '$actor' } }, + aggregate: { + title: '$record.title', + actors: { fn: 'collect', alias: '$actor', aggregate: { + name: '$actor.name', + country: '$actor.country' + }} + } +}) ``` -Note that when using model search methods, you don't need to specify the `labels` field in the search query since it's automatically set to the model's label. - -For more details on models and type safety, see: -- [Models documentation](../../typescript-sdk/models) +:::note Only `fn: 'collect'` is valid inside a nested `aggregate` block. +::: -## Search Within Transactions - -All search operations can be performed within transactions for consistency: +## In a transaction ```typescript -// Begin a transaction -const tx = await db.tx.begin(); - +const tx = await db.tx.begin() try { - // Perform search within the transaction - const users = await db.records.find({ - labels: ["USER"], - where: { isActive: true } - }, tx); - - // Use the results to make changes - for (const user of users.data) { - if (user.data.lastLogin < olderThan3Months) { - await user.update({ isActive: false }, tx); - } - } - - // Commit the transaction when done - await tx.commit(); -} catch (error) { - // Roll back the transaction on error - await tx.rollback(); - throw error; + const { data } = await db.records.find({ labels: ['MOVIE'] }, tx) + // … do more work … + await tx.commit() +} catch (e) { + await tx.rollback(); throw e } ``` -For more details on transactions, see the [Transactions documentation](../../typescript-sdk/transactions). - -## Performance Best Practices - -When working with the Search API, follow these best practices for optimal performance: - -1. **Be Specific with Labels**: Always specify labels to narrow the search scope. -2. **Use Indexed Properties**: Prioritize filtering on properties that have indexes. -3. **Limit Results**: Use pagination to retrieve only the records you need. -4. **Optimize Relationship Traversal**: Avoid deep relationship traversals when possible. -5. **Use Aliases Efficiently**: Define aliases only for records you need to reference in aggregations. -6. **Filter Early**: Apply filters as early as possible in relationship traversals to reduce the amount of data processed. - -## Search Related Records - -You can efficiently search for records that are related to a specific record using the entry point feature in search queries or direct relationship traversal. - -```typescript -// Search for records related to a specific record -const relatedRecords = await db.records.find({ - id: 'source-record-id', // Starting from this record - where: { /* search conditions */ } -}); -``` - -This method searches for records that are directly related to a specific record, identified by its ID. - -### Parameters - -| Parameter | Type | Description | -|-----------|------------------|---------------------------------------------------------------------------| -| `id` | String | The unique identifier of the source record | -| `where` | Object | Filter conditions for records ([learn more](../../concepts/search/where)) | -| `orderBy` | String or Object | Sorting criteria (same as regular search) | -| `skip` | Number | Number of records to skip for pagination | -| `limit` | Number | Maximum number of records to return | - -### Example - -```typescript -// Find all documents associated with a specific person -const personId = '018e4c71-5f20-7db2-b0b1-e7e681542af9'; -const result = await db.records.find({ - id: personId, - labels: ['DOCUMENT'], - where: { - status: 'active' - }, - orderBy: { createdAt: 'desc' } -}); - -console.log(`Found ${result.total} documents for this person`); -``` - -## Get Record Properties - -```typescript -// Get properties of a specific record -const properties = await db.records.getProperties('record-id-here'); -``` - -This method retrieves all properties of a specific record. - -### Example +## Via Model ```typescript -const properties = await db.records.getProperties('018e4c71-5f20-7db2-b0b1-e7e681542af9'); -console.log(properties); -// Output: -// [ -// { name: 'firstName', type: 'string', value: 'John' }, -// { name: 'lastName', type: 'string', value: 'Doe' }, -// { name: 'age', type: 'number', value: 30 } -// ] -``` - -## Get Record Relations +const MovieModel = new Model('MOVIE', { title: { type: 'string' }, rating: { type: 'number' } }) -```typescript -// Get relationships of a specific record -const relationships = await db.records.getRelations('record-id-here', { - skip: 0, - limit: 20 -}); +const all = await MovieModel.find() +const sciFi = await MovieModel.find({ where: { genre: 'sci-fi' } }) +const one = await MovieModel.findOne({ where: { title: 'Inception' } }) +const byId = await MovieModel.findById('movie-id-123') +const unique = await MovieModel.findUniq({ where: { title: 'Inception' } }) ``` -This method retrieves the relationships of a specific record. - -### Parameters - -| Parameter | Type | Description | -|------------|--------|-------------| -| `id` | String | The unique identifier of the record | -| `options` | Object | Optional pagination parameters | - -### Example - -```typescript -const { data, total } = await db.records.getRelations('018e4c71-5f20-7db2-b0b1-e7e681542af9'); - -console.log(`This record has ${total} relationships`); -data.forEach(relation => { - console.log(`Relation type: ${relation.type}`); - console.log(`Target: ${relation.target.id} (${relation.target.label})`); -}); -``` - -## Search Relations - -```typescript -// Search for relationships -const relationships = await db.records.searchRelations({ - where: { /* search conditions */ } -}); -``` - -This method searches for relationships between records based on specified criteria. - -### Example - -```typescript -// Find all employment relationships created in the last month -const lastMonth = new Date(); -lastMonth.setMonth(lastMonth.getMonth() - 1); - -const { data, total } = await db.records.searchRelations({ - where: { - type: 'WORKS_AT', - startDate: { $gte: lastMonth.toISOString() } - } -}); - -console.log(`Found ${total} new employment relationships`); -``` - -## TypeScript Type Support - -The RushDB SDK provides TypeScript types to enhance developer experience and type safety: - -```typescript -import { - SearchQuery, - DBRecord, - DBRecordInstance, - DBRecordsArrayInstance, - Schema, - PropertyDefinition, - Relation -} from '@rushdb/javascript-sdk'; - -// Define a schema type for better type checking -type UserSchema = { - name: { type: 'string' }; - age: { type: 'number' }; - email: { type: 'string', unique: true }; - isActive: { type: 'boolean', default: true }; -}; - -// Strongly-typed search -const query: SearchQuery = { - where: { - age: { $gt: 21 }, - isActive: true - } -}; - -// Type-safe result handling -const result = await db.records.find(query); - -// Working with typed data -result.data.forEach((record) => { - // TypeScript knows that name is a string, age is a number, etc. - console.log(`${record.data.name} (${record.data.age}): ${record.data.email}`); -}); -``` +Model search methods auto-fill `labels` from the model definition. -## Performance Considerations - -To optimize your record retrieval and search operations: - -- **Use Appropriate Methods**: Choose the right method for your needs (`findById` for known IDs, `find` for searches) -- **Specify Labels**: Always include label filters to limit the search scope -- **Use Appropriate Limits**: Set reasonable `limit` values to control response size and query performance -- **Implement Pagination**: Use pagination (`skip` and `limit`) for large result sets -- **Optimize Complex Queries**: Break down complex queries when possible -- **Leverage Indexes**: Prioritize filtering on indexed properties -- **Filter Early in Traversals**: Apply filters as early as possible in relationship traversals -- **Consider Caching**: For frequently accessed records, implement caching strategies -- **Use Transactions**: Wrap related operations in transactions for consistency and improved performance -- **Monitor Query Performance**: Test and optimize slow queries - -## Related Documentation - -- [Search Introduction](../../concepts/search/introduction) -- [Where Clause](../../concepts/search/where) -- [Labels](../../concepts/search/labels) -- [Pagination and Order](../../concepts/search/pagination-order) -- [Record Relationships](../../concepts/relationships) -- [Aggregations](../../concepts/search/aggregations) -- [Transactions](../../concepts/transactions.mdx) -- [Models](../../typescript-sdk/models) diff --git a/docs/docs/typescript-sdk/records/import-data.md b/docs/docs/typescript-sdk/records/import-data.md index e81ecd40..d5e2605a 100644 --- a/docs/docs/typescript-sdk/records/import-data.md +++ b/docs/docs/typescript-sdk/records/import-data.md @@ -163,8 +163,7 @@ const importOptions = { convertNumericValuesToNumbers: true, capitalizeLabels: false, relationshipType: 'OWNS', - returnResult: true, - castNumberArraysToVectors: false + returnResult: true }; const importedUsers = await db.records.importJson({ label: 'user', data: data.users, options: importOptions }) @@ -175,7 +174,6 @@ const importedUsers = await db.records.importJson({ label: 'user', data: data.us | Option | Type | Default | Description | |---------------------------------|---------|---------------------------------|---------------------------------------------------| | `suggestTypes` | Boolean | `true` | **Default is `true`** - Automatically infers data types for properties. Set to `false` to disable type inference and store all values as strings | -| `castNumberArraysToVectors` | Boolean | `false` | Converts numeric arrays to vector type | | `convertNumericValuesToNumbers` | Boolean | `false` | Converts string numbers to number type | | `capitalizeLabels` | Boolean | `false` | Converts all labels to uppercase | | `relationshipType` | String | `__RUSHDB__RELATION__DEFAULT__` | Default relationship type between Records (nodes) | @@ -200,53 +198,3 @@ By default, `suggestTypes` is `true` for all import operations (importJson, impo - importJson: nested/mixed JSON. Provide label explicitly, or pass a single-key object like `{ LABEL: [...] }` to infer the label. - importCsv: CSV string input with parseConfig; dynamicTyping inherits from options.suggestTypes when omitted. -## How RushDB JSON Import Works - -When you import data through the TypeScript SDK, RushDB applies a breadth-first search (BFS) algorithm to parse and transform your data: - -1. **Data Preparation**: Each record is assigned a unique UUIDv7 `__id` (unless provided) -2. **Type Inference**: If `suggestTypes` is enabled, RushDB analyzes values to determine appropriate data types -3. **Graph Construction**: Records become nodes in the graph database with properties and relationships -4. **Metadata Generation**: Type information is stored in `__proptypes` for each record -5. **Storage**: Data is efficiently inserted into the underlying Neo4j database - -### Data Structure Example - -For example, importing this JSON: - -```json -{ - "car": { - "make": "Tesla", - "model": "Model 3", - "engine": { - "power": 283, - "type": "electric" - } - } -} -``` - -Creates this graph structure in RushDB: - -- A `car` node with properties `make: "Tesla"` and `model: "Model 3"` -- An `engine` node with properties `power: 283` and `type: "electric"` -- A relationship connecting the car to its engine -- Property metadata nodes tracking property names and types - -The TypeScript SDK abstracts this complexity, allowing you to focus on your data models. - -## Performance Considerations - -- For large data imports (>1,000 records), consider batching your requests in chunks -- Setting `returnResult: false` is recommended for large imports to improve performance -- For time-critical imports, pre-process your data to ensure type consistency -- CSV imports currently read the full string; for very large files consider splitting client-side -- Upsert on large batches may benefit from using stable unique keys in `mergeBy` to minimize match cost. - -## Related Documentation - -- [REST API - Import Data](../../rest-api/records/import-data) - Complete API details for data import -- [Storage Internals](../../concepts/storage) - Technical details about how RushDB stores your data -- [Properties](../../concepts/properties) - Learn about property handling and type inference -- [Transactions](../../concepts/transactions.mdx) - Understand how RushDB ensures data integrity during imports diff --git a/docs/docs/typescript-sdk/records/update-records.md b/docs/docs/typescript-sdk/records/update-records.md index 145d5e36..b001a608 100644 --- a/docs/docs/typescript-sdk/records/update-records.md +++ b/docs/docs/typescript-sdk/records/update-records.md @@ -4,544 +4,79 @@ sidebar_position: 6 # Update Records -Updating [records](../../concepts/records.md) is a crucial operation for maintaining and modifying data within your application. RushDB provides multiple ways to update records, from direct API calls to Model-based abstractions. +Two methods for updating records: `update()` patches fields, `set()` replaces them all. -This guide covers different approaches to updating records, from the most basic to more advanced patterns. +## `db.records.update()` -## Overview - -The update record methods in the SDK enable you to: -- Update a single [record](../../concepts/records.md) with new [properties](../../concepts/properties.md) -- Update multiple records in one operation -- Control data type inference and other formatting options -- Update records with precise type control -- Update records within [transactions](../../concepts/transactions.mdx) for data consistency -- Update records using Model abstractions for type safety - -## Updating Single Records - -There are multiple ways to update records in RushDB. Let's start with the most basic approach using the direct API methods. - -### Using RushDB's `update()` Method - -The most direct way to update a record is using the API client's `records.update` method: - -```typescript -const updatedAuthor = await db.records.update({ - target: 'author_id', - label: 'AUTHOR', - data: { - name: 'John Doe Updated', - email: 'john.doe.updated@example.com' - }, - options: { - suggestTypes: true - } -}); - -console.log(updatedAuthor); -/* -{ - __id: 'author_id', - __label: 'AUTHOR', - name: 'John Doe Updated', - email: 'john.doe.updated@example.com' -} -*/ -``` - -#### Parameters - -- `target`: The target record to modify (record ID, record instance, or record object) -- `label`: The [label](../../concepts/labels.md)/type for the record -- `data`: The updated data for the record as a flat object -- `options` (optional): Configuration options for record update: - - `suggestTypes` (boolean, default: `true`): When true, automatically infers data types for [properties](../../concepts/properties.md) - - `castNumberArraysToVectors` (boolean, default: `false`): When true, converts numeric arrays to vector type - - `convertNumericValuesToNumbers` (boolean, default: `false`): When true, converts string numbers to number type -- `transaction` (optional): A [transaction](../../concepts/transactions.mdx) object or string to include the operation within a transaction - -#### Returns - -- A promise that resolves to a `DBRecordInstance` containing the updated [record](../../concepts/records.md) - -#### Updating Records in Transactions - -#### Updating Records in Transactions +Partial update — only the specified fields change; all other fields are preserved. ```typescript -const transaction = await db.tx.begin(); -try { - const updatedAuthor = await db.records.update({ - target: 'author_id', - label: 'AUTHOR', - data: { - name: 'Jane Smith Updated', - email: 'jane.smith.updated@example.com' - } - }, transaction); - - // Perform other operations... - - await transaction.commit(); - console.log(updatedAuthor); -} catch (error) { - await transaction.rollback(); - throw error; -} +await db.records.update({ + target: 'movie-id-123', + label: 'MOVIE', + data: { rating: 9.1 } +}) +// → DBRecordInstance (title, genre, etc. unchanged) ``` -### Using RushDB's `set()` Method +## `db.records.set()` -While the `update()` method only modifies the specified fields while preserving other existing fields, the `set()` method replaces all fields of a record with the provided values. This is useful when you want to completely reset a record's state. +Full replace — all fields not in `data` are removed. ```typescript -const updatedAuthor = await db.records.set({ - target: 'author_id', - label: 'AUTHOR', - data: { - name: 'John Doe Reset', - email: 'john.reset@example.com' - // All other fields will be removed - }, - options: { - suggestTypes: true - } -}); - -console.log(updatedAuthor); -/* -{ - __id: 'author_id', - __label: 'AUTHOR', - name: 'John Doe Reset', - email: 'john.reset@example.com' - // Previous fields that were not specified are now gone -} -*/ +await db.records.set({ + target: 'movie-id-123', + label: 'MOVIE', + data: { title: 'Inception', rating: 9.1, genre: 'sci-fi' } +}) +// → DBRecordInstance (only these three fields remain) ``` -#### Parameters - -The parameters for `set()` are identical to those for `update()`: - -- `target`: The target record to modify (record ID, record instance, or record object) -- `label`: The [label](../../concepts/labels.md)/type for the record -- `data`: The complete new data for the record as a flat object -- `options` (optional): Configuration options identical to those for `update()` -- `transaction` (optional): A [transaction](../../concepts/transactions.mdx) object or string - -#### Returns - -- A promise that resolves to a `DBRecordInstance` containing the updated [record](../../concepts/records.md) - -#### Difference between `update()` and `set()` - -The key difference between these methods: - -- `update()`: Performs a partial update, only modifying the specified fields -- `set()`: Performs a complete replacement, removing any fields not specified in the data - -#### Setting Records in Transactions - -```typescript -const transaction = await db.tx.begin(); -try { - const resetAuthor = await db.records.set({ - target: 'author_id', - label: 'AUTHOR', - data: { - name: 'Reset Author', - email: 'reset@example.com' - } - }, transaction); - - await transaction.commit(); - console.log(resetAuthor); -} catch (error) { - await transaction.rollback(); - throw error; -} -``` +### Parameters (both methods) -### Property-Based Approach for Precise Type Control +| Parameter | Description | +|---|---| +| `target` | Record ID string, record instance, or record object | +| `label` | Label for the record | +| `data` | Flat object or `PropertyDraft[]` for precise type control | +| `options` | `suggestTypes`, `convertNumericValuesToNumbers` | +| `transaction` | Optional `Transaction` or ID string | -When you need precise control over property types, you can use the property-based approach by passing an array of `PropertyDraft` objects instead of a flat data object: +### PropertyDraft approach ```typescript -const updatedAuthor = await db.records.update({ - target: 'author_id', - label: 'AUTHOR', +await db.records.update({ + target: 'movie-id-123', + label: 'MOVIE', data: [ - { - name: 'name', - type: 'string', - value: 'John Doe Updated' - }, - { - name: 'joinDate', - type: 'datetime', - value: '2025-05-15T14:30:00Z' - } + { name: 'rating', type: 'number', value: 9.1 }, + { name: 'releasedAt', type: 'datetime', value: '2010-07-16T00:00:00Z' } ] -}); - -console.log(updatedAuthor); -/* -{ - __id: 'author_id', - __label: 'AUTHOR', - name: 'John Doe Updated', - joinDate: '2025-05-15T14:30:00Z' -} -*/ -``` - -#### Property Draft Object Properties - -Each property draft object supports the following properties: - -| Property | Type | Description | -|----------|------|-------------| -| `name` | `string` | The property name | -| `type` | `string` | The data type ('string', 'number', 'boolean', 'datetime', etc.) | -| `value` | `any` | The property value | -| `valueSeparator` | `string` (optional) | Separator to split string values into arrays | - -## Updating Multiple Records - -## Updating Multiple Records - -When you need to update multiple records in a single operation, you can use a combination of `find` and `update` methods. - -### Using Find and Update Pattern - -```typescript -// Find all authors with a specific name -const authorsToUpdate = await db.records.find({ - labels: ['AUTHOR'], - where: { name: 'John Doe' } -}); - -// Update each record individually -for (const author of authorsToUpdate.data) { - await db.records.update({ - target: author.__id, - label: 'AUTHOR', - data: { name: 'John Doe Updated' } - }); -} - -console.log(authorsToUpdate); -/* -{ - data: [ - { - __id: 'author_id_1', - __label: 'AUTHOR', - name: 'John Doe Updated', - email: 'john.doe@example.com' - }, - { - __id: 'author_id_2', - __label: 'AUTHOR', - name: 'John Doe Updated', - email: 'john.doe@example.com' - } - ], - total: 2 -} -*/ -``` - -#### Updating Multiple Records in Transactions - -```typescript -// Find records matching criteria -const postsToUpdate = await db.records.find({ - labels: ['POST'], - where: { rating: { $lt: 5 } } -}); - -const transaction = await db.tx.begin(); -try { - // Update each record within the transaction - for (const post of postsToUpdate.data) { - await db.records.update({ - target: post.__id, - label: 'POST', - data: { rating: 5 } - }, transaction); - } - await transaction.commit(); - console.log(postsToUpdate); - /* - { - data: [ - { - __id: 'post_id_1', - __label: 'POST', - created: '2023-01-02T00:00:00Z', - title: 'Blog Post Title 1', - content: 'This is a blog post content.', - rating: 5 - }, - { - __id: 'post_id_2', - __label: 'POST', - created: '2023-01-03T00:00:00Z', - title: 'Blog Post Title 2', - content: 'This is another blog post content.', - rating: 5 - } - ], - total: 2 - } - */ -} catch (error) { - await transaction.rollback(); - throw error; -} -``` - -## Updating Records with Models - -## Updating Records with Models - -The recommended approach for structured applications is to use RushDB's [Models](../models.md). Models provide type safety, validation, and a more intuitive API for working with records. - -We'll use the following model definitions for these examples: - -```typescript -const AuthorRepo = new Model('author', { - name: { type: 'string' }, - email: { type: 'string', unique: true } -}); -``` - -### Using Model's `update` Method - -The `update` method on a model updates a single record. - -#### Signature -```typescript -update( - target: DBRecordTarget, - record: Partial>, - transaction?: Transaction | string -): Promise>; -``` - -#### Parameters - -- `target`: The target record to update (ID string, record instance, or record object) -- `record`: An object containing the fields to update and their new values -- `transaction` (optional): A [transaction](../../concepts/transactions.mdx) object or string to include the operation within a transaction - -#### Returns - -- A promise that resolves to a `DBRecordInstance` containing the updated [record](../../concepts/records.md) - -#### Example - -```typescript -const updatedAuthor = await AuthorRepo.update('author_id', { - name: 'John Doe Updated' -}); - -console.log(updatedAuthor); -/* -{ - __id: 'author_id', - __label: 'author', - name: 'John Doe Updated', - email: 'john.doe@example.com' -} -*/ -``` - -#### Using with Transactions - -```typescript -const transaction = await db.tx.begin(); -try { - const updatedAuthor = await AuthorRepo.update('author_id', { - name: 'Jane Smith Updated' - }, transaction); - - // Perform other operations... - - await transaction.commit(); - console.log(updatedAuthor); -} catch (error) { - await transaction.rollback(); - throw error; -} -``` - -### Using Model's `set` Method - -The `set` method on a model completely replaces a record's data with the new values. - -#### Signature -```typescript -set( - target: DBRecordTarget, - record: InferSchemaTypesWrite, - transaction?: Transaction | string -): Promise>; -``` - -#### Parameters - -- `target`: The target record to modify (ID string, record instance, or record object) -- `record`: An object containing all the fields to set for the record (fields not included will be removed) -- `transaction` (optional): A [transaction](../../concepts/transactions.mdx) object or string to include the operation within a transaction - -#### Returns - -- A promise that resolves to a `DBRecordInstance` containing the modified [record](../../concepts/records.md) - -#### Example - -```typescript -const resetAuthor = await AuthorRepo.set('author_id', { - name: 'John Doe Reset', - email: 'john.reset@example.com' - // All fields not specified will be removed -}); - -console.log(resetAuthor); -/* -{ - __id: 'author_id', - __label: 'author', - name: 'John Doe Reset', - email: 'john.reset@example.com' -} -*/ +}) ``` -#### Difference between `update` and `set` - -- `update`: Performs a partial update, preserving fields not specified in the update data -- `set`: Completely replaces all record data with the new values, removing any fields not specified - -#### Using with Transactions +## In a transaction ```typescript -const transaction = await db.tx.begin(); +const tx = await db.tx.begin() try { - const resetAuthor = await AuthorRepo.set('author_id', { - name: 'Complete Reset', - email: 'reset@example.com' - }, transaction); - - // Perform other operations... - - await transaction.commit(); - console.log(resetAuthor); -} catch (error) { - await transaction.rollback(); - throw error; + await db.records.update({ target: 'movie-id-123', label: 'MOVIE', data: { rating: 9.1 } }, tx) + await db.records.update({ target: 'actor-id-456', label: 'ACTOR', data: { country: 'USA' } }, tx) + await tx.commit() +} catch (e) { + await tx.rollback(); throw e } ``` -## Complex Example with Multiple Updates in a Transaction - -## Complex Example with Multiple Updates in a Transaction - -In this example, we'll update an `Author` and a `Post` within the same transaction. This ensures that either both updates succeed, or both are rolled back in case of an error. +## Via Model ```typescript -const transaction = await db.tx.begin(); -try { - // Update the author - const updatedAuthor = await AuthorRepo.update('author_id', { - name: 'Updated Author Name' - }, transaction); +const MovieModel = new Model('MOVIE', { title: { type: 'string' }, rating: { type: 'number' } }) - // Update the post - const updatedPost = await PostRepo.update('post_id', { - title: 'Updated Post Title', - content: 'Updated content for the post.', - rating: 4.8 - }, transaction); +// Partial update +await MovieModel.update('movie-id-123', { rating: 9.1 }) - await transaction.commit(); - console.log(updatedAuthor); - console.log(updatedPost); - /* - { - __id: 'author_id', - __label: 'author', - name: 'Updated Author Name', - email: 'john.doe@example.com' - } - - { - __id: 'post_id', - __label: 'post', - created: '2023-01-02T00:00:00Z', - title: 'Updated Post Title', - content: 'Updated content for the post.', - rating: 4.8 - } - */ -} catch (error) { - await transaction.rollback(); - throw error; -} +// Full replace +await MovieModel.set('movie-id-123', { title: 'Inception', rating: 9.1, genre: 'sci-fi' }) ``` -## Best Practices for Updating Records - -1. **Use Models for Structured Applications** - - Models provide type safety, validation, and better organization - - They enforce schema consistency across your application - -2. **Use Transactions for Related Operations** - - When updating multiple records that are related, use [transactions](../../concepts/transactions.mdx) - - Transactions ensure data consistency and allow rollback if operations fail - -3. **Handle Uniqueness Constraints** - - Models automatically check uniqueness before updating records - - Handle `UniquenessError` exceptions appropriately - -4. **Partial Updates vs. Complete Replacement** - - Use the `update` method for partial updates when you only need to change specific fields - - Use the `set` method when you want to completely replace a record's data - - This minimizes network traffic and avoids unintended side effects - -5. **Consider Validation** - - Validate your data on the client side before sending updates - - This improves performance and provides a better user experience - -6. **Choose the Right Data Type Control Approach** - - Use the flat object approach for most cases where automatic type inference is sufficient - - Use the property-based approach with `PropertyDraft` objects when you need precise control over types - -## Data Type Handling - -RushDB supports the same property types for updates as it does for creating records: - -- `string`: Text values -- `number`: Numeric values -- `boolean`: True/false values -- `null`: Null values -- `datetime`: ISO8601 format strings (e.g., "2025-04-23T10:30:00Z") -- `vector`: Arrays of numbers (when `castNumberArraysToVectors` is true) - -When `suggestTypes` is enabled (default), RushDB automatically infers these types from your data. - -When `convertNumericValuesToNumbers` is enabled, string values that represent numbers (e.g., '30') will be converted to their numeric equivalents (e.g., 30). - -## Conclusion - -Updating records in RushDB can be done through direct API calls or through the Model abstraction. While direct API calls offer flexibility for dynamic or ad-hoc operations, using Models is recommended for most applications due to their type safety, validation capabilities, and more intuitive API. - -For more advanced record operations, see the other guides in this section: -- [Get Records](./get-records.md) - Retrieve records from the database -- [Create Records](./create-records.md) - Create new records -- [Delete Records](./delete-records.md) - Remove records from the database -- [Import Data](./import-data.md) - Import data in bulk diff --git a/docs/docs/typescript-sdk/relationships.md b/docs/docs/typescript-sdk/relationships.md index ecf4f239..1610447a 100644 --- a/docs/docs/typescript-sdk/relationships.md +++ b/docs/docs/typescript-sdk/relationships.md @@ -1,478 +1,118 @@ --- -sidebar_position: 4 +sidebar_position: 3 --- # Relationships -[Relationships](../concepts/relationships.md) in RushDB connect records to form a rich, interconnected network of data. The TypeScript SDK provides powerful methods for creating, managing, and traversing relationships between records. +Connect records into a graph. Relationships have a type and a direction. -## Overview - -The relationships API in the SDK enables you to: -- Create connections between records -- Remove relationships between records -- Search for relationships based on specific criteria -- Build complex graph-like data structures -- Navigate between connected entities - -## Creating Relationships - -### Using RushDB's `attach()` Method - -To create a relationship between records, use the `records.attach` method: +## `db.records.attach()` ```typescript -// Attaching one record to another -const result = await db.records.attach({ - source: 'user_123', - target: 'company_456', - options: { - type: 'WORKS_AT', - direction: 'out' // User -> WORKS_AT -> Company - } -}); - -console.log(result); -/* -{ - success: true, - message: "Relationship created successfully" -} -*/ +await db.records.attach({ + source: 'movie-id-123', + target: 'actor-id-456', + options: { type: 'STARS', direction: 'out' } + // (MOVIE) -[:STARS]-> (ACTOR) +}) ``` -#### Parameters - -- `params`: An object containing: - - `source`: The source record (ID, record object, or record instance) - - `target`: The target record(s) (ID, array of IDs, record object, record instance, or array of record instances) - - `options` (optional): Configuration for the relationship: - - `type`: The type/name of the relationship - - `direction`: Direction of the relationship ('in' or 'out') -- `transaction` (optional): A [transaction](../concepts/transactions.mdx) object or string to include the operation within a transaction - -#### Returns - -- A promise that resolves to a success object - -### Using Model's `attach()` Method +`target` can be a single ID, an array of IDs, or a record instance. -If you're using models, you can use the model's `attach` method: +## `db.records.detach()` ```typescript -// Define models -const UserModel = new Model('USER', { - name: { type: 'string' }, - email: { type: 'string', unique: true } -}); - -const CompanyModel = new Model('COMPANY', { - name: { type: 'string' }, - industry: { type: 'string' } -}); - -// Create records -const user = await UserModel.create({ - name: 'John Doe', - email: 'john@example.com' -}); - -const company = await CompanyModel.create({ - name: 'Acme Inc.', - industry: 'Technology' -}); - -// Create relationship between user and company -await UserModel.attach({ - source: user, - target: company, - options: { - type: 'WORKS_AT', - direction: 'out' - } -}); -``` - -### Creating Relationships in Transactions - -```typescript -const transaction = await db.tx.begin(); -try { - const user = await db.records.create({ - label: 'USER', - data: { - name: 'Jane Smith', - email: 'jane@example.com' - } - }, transaction); - - const company = await db.records.create({ - label: 'COMPANY', - data: { - name: 'Tech Corp', - industry: 'Software' - } - }, transaction); - - await db.records.attach({ - source: user.data.__id, - target: company.data.__id, - options: { - type: 'WORKS_AT', - direction: 'out' - } - }, transaction); - - await transaction.commit(); -} catch (error) { - await transaction.rollback(); - throw error; -} -``` - -### Bulk Relationship Creation by Key Match - -Use `relationships.createMany` to create relationships in bulk by matching a key from a source label to a key from a target label. This is useful when you ingest data in batches (e.g., from CSV/JSON) and want to connect records created at different times. - -```ts -// Create USER -[:ORDERED]-> ORDER for all pairs where -// USER.id = ORDER.userId and both match the given tenant -await db.relationships.createMany({ - source: { label: 'USER', key: 'id', where: { tenantId } }, - target: { label: 'ORDER', key: 'userId', where: { tenantId } }, - type: 'ORDERED', - direction: 'out' // (source) -[:ORDERED]-> (target) +await db.records.detach({ + source: 'movie-id-123', + target: 'actor-id-456', + options: { typeOrTypes: 'STARS' } // omit to detach all types }) ``` -Parameters -- `source`: Object describing the source side - - `label`: Source record label (string) - - `key`: Property on the source used for equality match (string) - - `where` (optional): Additional filters for source records; same shape as SearchQuery `where` -- `target`: Object describing the target side - - `label`: Target record label (string) - - `key`: Property on the target used for equality match (string) - - `where` (optional): Additional filters for target records; same shape as SearchQuery `where` -- `type` (optional): Relationship type. Defaults to the RushDB default type when omitted -- `direction` (optional): 'in' or 'out'. Defaults to 'out'. - -Notes -- Matching condition is always `source[key] = target[key]` plus any additional `where` constraints. -- `where` uses the same operators as record search (e.g., plain equality `{ tenantId: 'ACME' }`, or explicit `{ tenantId: 'ACME' }`). -- Operation can run within a transaction if provided. +## Bulk create by key match -## Removing Relationships - -### Using RushDB's `detach()` Method - -To remove a relationship between records, use the `records.detach` method: +Connect records by matching a property on the source to a property on the target: ```typescript -const result = await db.records.detach({ - source: 'user_123', - target: 'company_456', - options: { - type: 'WORKS_AT' // Optional: Only detach relationships of this type - } -}); - -console.log(result); -/* -{ - success: true, - message: "Relationship removed successfully" -} -*/ +await db.relationships.createMany({ + source: { label: 'MOVIE', key: 'directorId' }, + target: { label: 'DIRECTOR', key: 'id' }, + type: 'DIRECTED_BY', + direction: 'out' +}) +// Creates MOVIE -[:DIRECTED_BY]-> DIRECTOR where MOVIE.directorId = DIRECTOR.id ``` -#### Parameters - -- `params`: An object containing: - - `source`: The source record (ID, record object, or record instance) - - `target`: The target record(s) (ID, array of IDs, record object, record instance, or array of record instances) - - `options` (optional): Configuration for the detach operation: - - `typeOrTypes`: The type(s) of relationships to remove (string or array of strings) - - `direction`: Direction of the relationship to remove ('in' or 'out') -- `transaction` (optional): A [transaction](../concepts/transactions.mdx) object or string to include the operation within a transaction - -#### Returns - -- A promise that resolves to a success object - -### Using Model's `detach()` Method - -If you're using models, you can use the model's `detach` method: +## Bulk delete by key match ```typescript -// Detach a relationship between user and company -await UserModel.detach({ - source: user.data.__id, - target: company.data.__id, - options: { - typeOrTypes: 'WORKS_AT' - } -}); -``` - -### Bulk Relationship Deletion by Key Match - -You can remove relationships in bulk with the SDK using `relationships.deleteMany`. It accepts the same shape as `createMany` and supports two modes: - -- key-match mode: match source and target records by equality of a pair of properties (e.g. `USER.id = ORDER.userId`) and delete the relationship between matched pairs. -- many-to-many (cartesian) mode: opt-in operation that deletes relationships between every matching source and target pair that satisfy provided filters — use with extreme caution. - -TypeScript example — key match deletion: - -```ts await db.relationships.deleteMany({ - source: { label: 'USER', key: 'id', where: { tenantId } }, - target: { label: 'ORDER', key: 'userId', where: { tenantId } }, - type: 'ORDERED', + source: { label: 'MOVIE', key: 'directorId' }, + target: { label: 'DIRECTOR', key: 'id' }, + type: 'DIRECTED_BY', direction: 'out' }) ``` -TypeScript example — many-to-many deletion (explicit opt-in): +### Many-to-many deletion (cartesian) -```ts -// WARNING: manyToMany will perform a cartesian-style deletion across the -// filtered sets. Only use with explicit filters on both sides. +```typescript +// Deletes HAS_TAG relationships between ALL matching pairs await db.relationships.deleteMany({ - source: { label: 'USER', where: { tenantId } }, - target: { label: 'TAG', where: { tenantId } }, + source: { label: 'MOVIE', where: { genre: 'sci-fi' } }, + target: { label: 'TAG', where: { category: 'genre' } }, type: 'HAS_TAG', - direction: 'out', - manyToMany: true + manyToMany: true // Must be explicit — requires non-empty where on both sides }) ``` -Parameters -- `source`: Object describing the source side - - `label`: Source record label (string) - - `key` (optional): Property on the source used for equality match (string) - - `where` (optional): Additional filters for source records; same shape as SearchQuery `where` -- `target`: Object describing the target side - - `label`: Target record label (string) - - `key` (optional): Property on the target used for equality match (string) - - `where` (optional): Additional filters for target records; same shape as SearchQuery `where` -- `type` (optional): Relationship type to restrict deletions -- `direction` (optional): 'in' or 'out'. Defaults to 'out'. -- `manyToMany` (optional): boolean. When `true` the operation will perform deletions across all source/target pairs matching provided filters (cartesian). This must be explicitly set. - -Important notes and safeguards -- If `manyToMany` is not provided or is `false`, both `source.key` and `target.key` must be supplied — deletion matches records where `source[key] = target[key]`. -- If `manyToMany` is `true`, the server requires non-empty `where` filters for both `source` and `target` to avoid accidental full-cartesian deletions. -- Use `manyToMany` only when you intentionally want to delete relationships across filtered sets. Consider testing on a staging dataset first. - +:::warning Both `source.where` and `target.where` must be non-empty when `manyToMany: true`. +::: -## Finding Relationships - -### Using RushDB's `relationships.find()` Method - -To search for relationships based on specific criteria, use the `relationships.find` method: +## Find relationships ```typescript -const relationships = await db.relationships.find({ - labels: ['USER'], +const { data, total } = await db.relationships.find({ + labels: ['MOVIE'], where: { - name: { $contains: 'John' }, - COMPANY: { - industry: 'Technology', - $relation: 'WORKS_AT' - } - }, - limit: 10 -}); - -console.log(relationships); -/* -{ - data: [ - { - sourceId: 'user_123', - sourceLabel: 'USER', - targetId: 'company_456', - targetLabel: 'COMPANY', - type: 'WORKS_AT' - }, - // More relationships... - ], - total: 5 -} -*/ + ACTOR: { $relation: 'STARS', country: 'USA' } + } +}) ``` -#### Parameters - -- `searchQuery`: A search query object to find matching relationships - - `where`: Conditions to filter relationships - - `limit`: Maximum number of results to return - - `skip`: Number of results to skip -- `transaction` (optional): A [transaction](../concepts/transactions.mdx) object or string to include the operation within a transaction - -#### Returns - -- A promise that resolves to an API response object: `{ success: boolean; data: Array; total?: number }` - -### Finding Relationships in Transactions +## In a transaction ```typescript -const transaction = await db.tx.begin(); +const tx = await db.tx.begin() try { - const relationships = await db.relationships.find({ - labels: ['USER'], - where: { - name: { $contains: 'John' }, - COMPANY: { - industry: 'Technology', - $relation: 'WORKS_AT' - } - }, - limit: 10 - }, transaction); - - // Perform other operations... - - await transaction.commit(); - console.log(relationships); -} catch (error) { - await transaction.rollback(); - throw error; + const movie = await db.records.create({ label: 'MOVIE', data: { title: 'Dune' } }, tx) + const actor = await db.records.create({ label: 'ACTOR', data: { name: 'Timothée Chalamet' } }, tx) + await db.records.attach({ source: movie, target: actor, options: { type: 'STARS' } }, tx) + await tx.commit() +} catch (e) { + await tx.rollback(); throw e } ``` -## Retrieving Relationships for a Record - -### Using RushDB's `records.relations()` Method - -To get all relationships for a specific record, use the `records.relations` method: - -```typescript -const relations = await db.records.relations('user_123'); - -console.log(relations); -/* -{ - data: [ - { - id: 'relation_id_1', - type: 'WORKS_AT', - source: 'user_123', - target: 'company_456', - direction: 'out' - }, - { - id: 'relation_id_2', - type: 'FOLLOWS', - source: 'user_789', - target: 'user_123', - direction: 'in' - }, - // More relationships... - ], - total: 5 -} -*/ -``` - -#### Parameters - -- `target`: The record to get relationships for (ID, record object, or record instance) -- `transaction` (optional): A [transaction](../concepts/transactions.mdx) object or string to include the operation within a transaction - -#### Returns - -- A promise that resolves to an array of relationship objects - -## Relationship Direction - -Relationships in RushDB have a direction, which defines how records are connected. When creating or querying relationships, you can specify the direction: - -- `out`: The relationship goes from source to target: `(source) -[RELATIONSHIP]-> (target)` -- `in`: The relationship goes from target to source: `(source) <-[RELATIONSHIP]- (target)` - -For example: -```typescript -// Outgoing relationship (User -[WORKS_AT]-> Company) -await db.records.attach({ - source: userId, - target: companyId, - options: { - type: 'WORKS_AT', - direction: 'out' - } -}); - -// Incoming relationship (Department <-[BELONGS_TO]- Employee) -await db.records.attach({ - source: departmentId, - target: employeeId, - options: { - type: 'BELONGS_TO', - direction: 'in' - } -}); -``` - -## Custom Relationship Types - -By default, RushDB uses a standard relationship type, but you can specify custom types to model your domain more accurately: +## Via Model ```typescript -// Creating a relationship with a custom type -await db.records.attach({ - source: mentorId, - target: menteeId, - options: { - type: 'MENTORS', - direction: 'out' - } -}); +await MovieModel.attach({ + source: 'movie-id-123', + target: 'actor-id-456', + options: { type: 'STARS', direction: 'out' } +}) -// Creating a relationship when importing nested data -const company = await db.records.create({ - label: 'COMPANY', - data: { - name: 'Tech Corp', - employees: [ - { name: 'Jane Smith', position: 'CTO' }, - { name: 'John Doe', position: 'Developer' } - ] - }, - options: { - relationshipType: 'EMPLOYS' // Custom relationship type - } -}); +await MovieModel.detach({ + source: 'movie-id-123', + target: 'actor-id-456', + options: { typeOrTypes: 'STARS' } +}) ``` -## Best Practices for Working with Relationships - -1. **Use Meaningful Relationship Types** - - Choose descriptive names for relationship types that clearly convey their meaning - - Establish a consistent naming convention for relationships (e.g., using verbs like 'FOLLOWS', 'WORKS_AT') - -2. **Consider Relationship Direction** - - Use the direction parameter to model the natural flow of relationships - - For bidirectional relationships, create two relationships with opposite directions - -3. **Use Transactions for Multiple Operations** - - When creating or updating multiple records and their relationships, use transactions - - This ensures all operations succeed or fail together, maintaining data consistency - -4. **Optimize Relationship Queries** - - Specify relationship types when searching to improve performance - - Use direction filters to narrow down search results - -5. **Model Domain Relationships Carefully** - - Use relationship types that map to real-world concepts in your domain - - Consider the cardinality of relationships (one-to-one, one-to-many, many-to-many) - -## Conclusion - -The Relationships API in the RushDB TypeScript SDK provides a comprehensive set of methods for creating, managing, and querying relationships between records. By understanding these methods and their parameters, you can effectively build interconnected data structures in your application. +## Direction reference -For more information on related topics, see: -- [Records](./records/create-records.md) - Work with records that participate in relationships -- [Search](./records/get-records.md) - Advanced querying across relationships -- [Models](./models.md) - Define structured schemas for your data +| `direction` | Graph pattern | +|---|---| +| `out` | `(source) -[:TYPE]-> (target)` | +| `in` | `(source) <-[:TYPE]- (target)` | diff --git a/docs/docs/typescript-sdk/transactions.md b/docs/docs/typescript-sdk/transactions.md index 866c4dd1..a68a84c8 100644 --- a/docs/docs/typescript-sdk/transactions.md +++ b/docs/docs/typescript-sdk/transactions.md @@ -1,260 +1,52 @@ --- -sidebar_position: 5 +sidebar_position: 6 --- # Transactions -The RushDB TypeScript SDK provides a simple but powerful interface for working with database transactions. Transactions allow you to perform multiple database operations atomically, ensuring that either all operations succeed or none do, which helps maintain data consistency. +Group multiple operations into an atomic unit — all succeed or all roll back. -## Transaction Overview - -Transactions in RushDB TypeScript SDK: -- Enable multiple database operations to be executed as a single atomic unit -- Provide ACID (Atomicity, Consistency, Isolation, Durability) guarantees -- Automatically roll back after a timeout to prevent hanging transactions -- Can be explicitly committed or rolled back - -## Transaction API - -The SDK provides transaction-related methods through the `tx` object: - -```typescript -// Access the transaction API -const tx = db.tx; -``` - -### Begin a Transaction - -Creates a new transaction and returns a transaction object: - -```typescript -const transaction = await db.tx.begin({ - ttl: 10000 // Optional: Time to live in milliseconds (default: 5000ms, max: 30000ms) -}); - -// transaction object contains the transaction ID -console.log(transaction.id); // e.g., "018e5c31-f35a-7000-89cd-850db63a1e77" -``` - -### Get a Transaction - -Checks if a transaction exists and retrieves its information: - -```typescript -// You can pass either a transaction object or a transaction ID string -const txInfo = await db.tx.get(transaction); -// or -const txInfo = await db.tx.get("018e5c31-f35a-7000-89cd-850db63a1e77"); -``` - -### Commit a Transaction - -Commits all changes made within the transaction, making them permanent in the database: +## Basic pattern ```typescript -// You can pass either a transaction object or a transaction ID string -await transaction.commit() -// or -await db.tx.commit(transaction); -// or -await db.tx.commit("018e5c31-f35a-7000-89cd-850db63a1e77"); -``` - -### Rollback a Transaction - -Discards all changes made within the transaction: - -```typescript -// You can pass either a transaction object or a transaction ID string -await transaction.rollback() -// or -await db.tx.rollback(transaction); -// or -await db.tx.rollback("018e5c31-f35a-7000-89cd-850db63a1e77"); -``` - -## Using Transactions with API Methods - -Most API methods in the RushDB TypeScript SDK accept an optional transaction parameter that allows you to include the operation in a transaction: - -```typescript -// Create a transaction -const transaction = await db.tx.begin({ ttl: 10000 }); +const tx = await db.tx.begin() +// optional: { ttl: 10000 } — default 5000ms, max 30000ms try { - // Perform operations as part of the transaction - const person = await db.records.create({ - label: "Person", - data: { name: "John Doe", age: 30 } - }, transaction); // Pass the transaction as the second parameter - - const address = await db.records.create({ - label: "Address", - data: { street: "123 Main St", city: "New York" } - }, transaction); + const movie = await db.records.create( + { label: 'MOVIE', data: { title: 'Dune', rating: 8.0 } }, tx) - // Create a relationship between the person and address - await db.records.attach({ - source: person, - target: address, - options: { - type: "LIVES_AT", - direction: "out" - } - }, transaction); + const actor = await db.records.create( + { label: 'ACTOR', data: { name: 'Timothée Chalamet' } }, tx) - // Commit the transaction if all operations succeeded - await transaction.commit() - // or - // await db.tx.commit(transaction); + await db.records.attach( + { source: movie, target: actor, options: { type: 'STARS' } }, tx) - console.log("All operations completed successfully!"); -} catch (error) { - // Rollback the transaction if any operation failed - await transaction.rollback() - // or - // await db.tx.rollback(transaction); - console.error("Transaction failed:", error); + await tx.commit() // or: await db.tx.commit(tx) +} catch (e) { + await tx.rollback() // or: await db.tx.rollback(tx) + throw e } ``` -## Transaction Timeout - -Transactions in RushDB have a timeout mechanism to prevent hanging transactions: - -- Default timeout: 5 seconds (5000ms) -- Maximum timeout: 30 seconds (30000ms) -- If a transaction is not committed or rolled back within its TTL, it will be automatically rolled back - -## Best Practices - -1. **Keep transactions short and focused** - - Long-running transactions can lead to resource contention and reduce overall system performance. - -2. **Set appropriate TTL** - - Choose a TTL that gives your operations enough time to complete, but not so long that resources are unnecessarily tied up. - -3. **Always commit or rollback explicitly** - - Explicitly commit or rollback transactions rather than relying on automatic timeout. - -4. **Implement proper error handling** - - Always use try/catch blocks when working with transactions to ensure proper rollback in case of errors. - -5. **Use transactions only when necessary** - - For single operations, you don't need to use transactions. Only use transactions when multiple operations need to be atomic. - -6. **Be aware of transaction scope** +Pass `tx` as the last argument to any record/relationship method. - Transactions in RushDB are tied to your API token and will affect only the operations performed with that token. +## API -## Example: Complete Transaction Workflow +| Method | Description | +|---|---| +| `db.tx.begin({ ttl? })` | Start a transaction. Returns a `Transaction` object with `.id` | +| `db.tx.get(tx)` | Check if a transaction still exists | +| `db.tx.commit(tx)` | Commit — makes all changes permanent | +| `db.tx.rollback(tx)` | Rollback — discards all changes | +| `tx.commit()` / `tx.rollback()` | Shorthand on the transaction object itself | -Here's a complete example showing a transaction workflow for creating a user profile with multiple related records: +## Timeouts -```typescript -import RushDB from '@rushdb/javascript-sdk'; - -// Initialize SDK -const db = new RushDB('RUSHDB_API_KEY'); - -async function createUserProfile(userData) { - // Begin a transaction with 15-second TTL - const transaction = await db.tx.begin({ ttl: 15000 }); - - try { - // Create user record - const user = await db.records.create({ - label: "User", - data: { - username: userData.username, - email: userData.email - } - }, transaction); - - // Create profile record - const profile = await db.records.create({ - label: "Profile", - data: { - firstName: userData.firstName, - lastName: userData.lastName, - birthDate: userData.birthDate - } - }, transaction); - - // Create address record - const address = await db.records.create({ - label: "Address", - data: { - street: userData.street, - city: userData.city, - postalCode: userData.postalCode, - country: userData.country - } - }, transaction); - - // Create relationships - await db.records.attach({ - source: user, - target: profile, - options: { - type: "HAS_PROFILE", - direction: "out" - } - }, transaction); - - await db.records.attach({ - source: profile, - target: address, - options: { - type: "HAS_ADDRESS", - direction: "out" - } - }, transaction); +Uncommitted transactions auto-rollback after the TTL expires. - // Commit the transaction - await transaction.commit() - // or - // await db.tx.commit(transaction); - - return { - success: true, - user - }; - - } catch (error) { - // Rollback the transaction on any error - await transaction.rollback() - // or - // await db.tx.rollback(transaction); - - return { - success: false, - error: error.message - }; - } -} - -// Usage -createUserProfile({ - username: "johndoe", - email: "john@example.com", - firstName: "John", - lastName: "Doe", - birthDate: "1990-01-01", - street: "123 Main St", - city: "New York", - postalCode: "10001", - country: "USA" -}).then(result => { - if (result.success) { - console.log("User profile created successfully:", result.user); - } else { - console.error("Failed to create user profile:", result.error); - } -}); -``` +| Parameter | Value | +|---|---| +| Default TTL | 5 000 ms | +| Maximum TTL | 30 000 ms | + type: "LIVES_AT", diff --git a/docs/docs/typescript-sdk/typescript-reference/DBRecordInstance.md b/docs/docs/typescript-sdk/typescript-reference/DBRecordInstance.md index 181d56e7..c4558f37 100644 --- a/docs/docs/typescript-sdk/typescript-reference/DBRecordInstance.md +++ b/docs/docs/typescript-sdk/typescript-reference/DBRecordInstance.md @@ -224,8 +224,8 @@ Removes a relationship from this record to the target record(s). const userRecord = await UserModel.findById('user_123'); // Access record data -console.log(userRecord.id()); // 'user_123' -console.log(userRecord.label()); // 'User' +console.log(userRecord.id); // 'user_123' +console.log(userRecord.label); // 'User' console.log(userRecord.data?.name); // 'John Doe' // Update the record diff --git a/docs/docs/typescript-sdk/typescript-reference/DBRecordsArrayInstance.md b/docs/docs/typescript-sdk/typescript-reference/DBRecordsArrayInstance.md index 8f4b3e1b..f2475687 100644 --- a/docs/docs/typescript-sdk/typescript-reference/DBRecordsArrayInstance.md +++ b/docs/docs/typescript-sdk/typescript-reference/DBRecordsArrayInstance.md @@ -98,7 +98,7 @@ console.log(userRecords.data?.length); // Number of records in this page (max 10 // Access individual record instances userRecords.data?.forEach(user => { - console.log(user.id(), user.data?.name); + console.log(user.id, user.data?.name); }); // Access the original search query diff --git a/docs/docs/typescript-sdk/typescript-reference/SearchQuery.md b/docs/docs/typescript-sdk/typescript-reference/SearchQuery.md index 58548491..b3d85816 100644 --- a/docs/docs/typescript-sdk/typescript-reference/SearchQuery.md +++ b/docs/docs/typescript-sdk/typescript-reference/SearchQuery.md @@ -13,14 +13,15 @@ export type SearchQuery = SearchQueryLabelsClause & PaginationClause & OrderClause & WhereClause & - AggregateClause + AggregateClause & + GroupByClause ``` ## Type Parameters -| Parameter | Description | -|--------------------------|-------------------------------------------------------------------------| -| `S extends Schema = any` | The schema type that defines the structure of the records being queried | +| Parameter | Description | +|---------------------------------|---------------------------------------------------------------------------------------------------------------| +| `S extends Schema = Schema` | The schema type that defines the structure of the records being queried. `Schema` is `Record` where `SchemaField = { type: 'boolean' \| 'datetime' \| 'null' \| 'number' \| 'string'; required?: boolean; multiple?: boolean; unique?: boolean; default?: ... }`. The default `Schema` (rather than `any`) preserves type safety while remaining permissive when no explicit schema is provided. | ## Query Components @@ -83,6 +84,16 @@ export type AggregateClause = { Defines aggregation operations to perform on the query results. +### GroupBy Clause + +```typescript +export type GroupByClause = { + groupBy?: Array +} +``` + +Shapes how aggregation output is grouped. See [GroupBy](#groupby) for the two supported modes. + ## Where Expressions The `where` property of a search query can include various expressions to filter records: @@ -96,8 +107,7 @@ export type PropertyExpression = NullExpression | NumberExpression | StringExpression | - TypeExpression | - VectorExpression + TypeExpression ``` #### Number Expressions @@ -129,6 +139,67 @@ export type StringExpression = string | { } ``` +#### Datetime Expressions + +```typescript +export type DatetimeObject = { + $year: number + $month?: number + $day?: number + $hour?: number + $minute?: number + $second?: number + $millisecond?: number + $microsecond?: number + $nanosecond?: number +} + +export type DatetimeExpression = string | DatetimeObject | { + $gt?: DatetimeObject | string + $gte?: DatetimeObject | string + $lt?: DatetimeObject | string + $lte?: DatetimeObject | string + $ne?: DatetimeObject | string + $in?: Array + $nin?: Array + $exists?: boolean +} +``` + +Datetime fields support two matching styles: + +**ISO 8601 exact match or set membership:** +```typescript +// Exact ISO match +{ where: { created: '2023-01-01T00:00:00Z' } } + +// Match a set of dates +{ where: { created: { $in: ['2023-01-01T00:00:00Z', '2023-06-01T00:00:00Z'] } } } +``` + +**Component object — match a specific point in time or use for range comparisons:** +```typescript +// Exact point: January 1 2023 +{ where: { created: { $year: 2023, $month: 1, $day: 1 } } } +``` + +:::warning Never use plain ISO strings with `$gt`/`$lt` comparisons +Always use component objects for range comparisons: +```typescript +// Records created in 1994 +{ where: { created: { $gte: { $year: 1994 }, $lt: { $year: 1995 } } } } + +// Records created in January 1994 +{ where: { created: { $gte: { $year: 1994, $month: 1 }, $lt: { $year: 1994, $month: 2 } } } } + +// Records created in the 1990s +{ where: { created: { $gte: { $year: 1990 }, $lt: { $year: 2000 } } } } + +// Records created on 1994-03-15 +{ where: { created: { $gte: { $year: 1994, $month: 3, $day: 15 }, $lt: { $year: 1994, $month: 3, $day: 16 } } } } +``` +::: + #### Boolean Expressions ```typescript @@ -153,7 +224,7 @@ export type NullExpression = null | { ```typescript export type TypeExpression = { - $type: 'string' | 'number' | 'boolean' | 'datetime' | 'null' | 'vector' + $type: 'string' | 'number' | 'boolean' | 'datetime' | 'null' } ``` @@ -163,14 +234,27 @@ The `$type` operator checks whether a field has a specific data type: // Find records where age is actually stored as a number { where: { - age: { $type: "number" } + age: { $type: 'number' } } } +``` + +#### $id Operator -// Find records with vector embeddings +Filter records by their own ID directly inside the `where` clause: + +```typescript +// Find records whose ID is in a known set { where: { - embedding: { $type: "vector" } + $id: { $in: ['id1', 'id2', 'id3'] } + } +} + +// Filter by specific ID on a related node +{ + where: { + EMPLOYEE: { $id: 'specific-employee-id' } } } ``` @@ -216,22 +300,179 @@ export type Related = Models> = } ``` -Defines conditions on related records. Learn more about [relationships in RushDB](../../concepts/relationships). +Defines conditions on related records. The key of the nested object **is** the label name (case-sensitive). Use `$alias` to name the traversal for later use in `aggregate`/`groupBy`, and `$relation` to constrain the relationship type or direction: + +```typescript +// Constrain by relationship type and direction +{ + where: { + POST: { + $relation: { type: 'AUTHORED', direction: 'in' }, // full form + title: { $contains: 'Graph' } + } + } +} + +// Shorthand — type only +{ + where: { + POST: { + $relation: 'AUTHORED', + publishedAt: { $gte: { $year: 2024 } } + } + } +} +``` + +Learn more about [relationships in RushDB](../../concepts/relationships). + +#### $xor and $nor operators + +```typescript +// $xor — exactly one of the conditions must match +{ + where: { + $xor: [ + { isPremium: true }, + { hasFreeTrialAccess: true } + ] + } +} + +// $nor — none of the conditions may match +{ + where: { + $nor: [ + { status: 'deleted' }, + { status: 'archived' } + ] + } +} +``` ### Aggregation ```typescript +export type AggregateCollectFn = { + fn: 'collect' + alias: string + field?: string // omit to collect entire records + unique?: boolean // deduplicate; default true + limit?: number // max items in the collected array + skip?: number // skip N items in the collected array + orderBy?: Order // sort collected items + aggregate?: { // nested collect only — see Nested Collect below + [field: string]: AggregateCollectFn + } +} + +export type AggregateTimeBucketFn = { + fn: 'timeBucket' + field: string // datetime field to bucket + alias: string + granularity: 'day' | 'week' | 'month' | 'quarter' | 'year' + | 'months' | 'hours' | 'minutes' | 'seconds' | 'years' + size?: number // bucket size for plural granularities (e.g. months:2 = bi-monthly) +} + export type AggregateFn = - | { alias: string; field: string; fn: 'avg'; precision?: number } - | { alias: string; field: string; fn: 'max' } - | { alias: string; field: string; fn: 'min' } - | { alias: string; field: string; fn: 'sum' } - | { alias: string; field?: string; fn: 'count'; unique?: boolean } - | { field: string; fn: `gds.similarity.${VectorSearchFn}`; alias: string; vector: number } + | { fn: 'count'; alias?: string; field?: string; unique?: boolean } + | { fn: 'sum'; alias?: string; field: string } + | { fn: 'avg'; alias?: string; field: string; precision?: number } + | { fn: 'min'; alias?: string; field: string } + | { fn: 'max'; alias?: string; field: string } + | { fn: 'vector.similarity.cosine' | 'vector.similarity.euclidean'; alias: string; field: string; query: number[] } + | AggregateTimeBucketFn | AggregateCollectFn + +// Inline reference — copy a field value into the output row without a function: +// 'outputKey': '$alias.fieldName' +// e.g. companyName: '$record.name', projectBudget: '$record.budget' +export type AggregateInlineRef = string // '$alias.fieldName' + +export type Aggregate = { + [outputKey: string]: AggregateFn | AggregateInlineRef +} ``` -Defines aggregation functions to apply to the query results. +Defines aggregation operations to apply to the query results. `alias` defaults to `'$record'` for root-label fields; set it to the `$alias` declared in `where` for related nodes. + +## GroupBy + +`groupBy` operates in two modes: + +### Mode A — Dimensional (one row per distinct value) + +Entries are `'$alias.propertyName'` strings. Each distinct value becomes its own output row. + +```typescript +// Count and avg amount per deal stage +const result = await db.records.find({ + labels: ['DEAL'], + aggregate: { + count: { fn: 'count', alias: '$record' }, + avgAmt: { fn: 'avg', field: 'amount', alias: '$record', precision: 2 } + }, + groupBy: ['$record.stage'], + orderBy: { count: 'desc' } +}); +// Output: [{ stage: 'won', count: 42, avgAmt: 15200.00 }, ...] + +// Pivot on two keys (category × active) +const pivot = await db.records.find({ + labels: ['PROJECT'], + aggregate: { count: { fn: 'count', alias: '$record' } }, + groupBy: ['$record.category', '$record.active'], + orderBy: { count: 'desc' } +}); +``` + +### Mode B — Self-group (one row with global KPIs) + +Put the **aggregation key names** themselves into `groupBy` (not `$alias.field` paths). + +```typescript +// Total salary across all employees (single result row) +const kpis = await db.records.find({ + labels: ['EMPLOYEE'], + aggregate: { + totalSalary: { fn: 'sum', field: 'salary', alias: '$record' }, + headcount: { fn: 'count', alias: '$record' }, + avgSalary: { fn: 'avg', field: 'salary', alias: '$record', precision: 0 } + }, + groupBy: ['totalSalary', 'headcount', 'avgSalary'], + orderBy: { totalSalary: 'asc' } // ← required for correct full-scan total +}); +// Output: [{ totalSalary: 4875000, headcount: 95, avgSalary: 51315 }] +``` + +:::caution Late-ordering rule +For self-group queries, always add `orderBy` on an aggregation key. Without it the engine applies `LIMIT` before aggregation and produces mathematically wrong totals. +::: + +## Critical Rules + +:::danger Do not combine `limit` with `aggregate` +Never set `limit` when `aggregate` is present (except to cap the root records in a per-record flat aggregation). `limit` restricts the record scan, so aggregates like `sum` or `avg` operate only on the first N rows and return wrong results. + +```typescript +// ❌ WRONG — limit cuts the scan, totalBudget covers only 10 projects +const wrong = await db.records.find({ + labels: ['PROJECT'], + aggregate: { totalBudget: { fn: 'sum', field: 'budget', alias: '$record' } }, + groupBy: ['totalBudget'], + limit: 10 // DO NOT include +}); + +// ✅ CORRECT — no limit; full dataset is summed +const correct = await db.records.find({ + labels: ['PROJECT'], + aggregate: { totalBudget: { fn: 'sum', field: 'budget', alias: '$record' } }, + groupBy: ['totalBudget'], + orderBy: { totalBudget: 'asc' } // triggers late ordering +}); +``` +::: ## Usage Examples @@ -276,49 +517,135 @@ const users = await UserModel.find({ }); ``` +### Datetime Range Query + +```typescript +// Records created in 2024 +const recent = await UserModel.find({ + where: { + createdAt: { $gte: { $year: 2024 }, $lt: { $year: 2025 } } + } +}); + +// Records created in Q1 2023 +const q1 = await db.records.find({ + labels: ['ORDER'], + where: { + issuedAt: { $gte: { $year: 2023, $month: 1 }, $lt: { $year: 2023, $month: 4 } } + } +}); +``` + +### Filter by Record ID + +```typescript +// Find records from a known set of IDs +const records = await db.records.find({ + where: { $id: { $in: ['id1', 'id2', 'id3'] } } +}); +``` + ### Filtering by Related Records ```typescript -// Find users who authored a post with a specific title +// Find users who authored a post (constrain relationship type + direction) const users = await UserModel.find({ where: { - Post: { - $relation: { type: 'AUTHORED' }, - title: 'My First Post' + POST: { + $relation: { type: 'AUTHORED', direction: 'in' }, + title: { $contains: 'Graph' } } } }); ``` -### Aggregation +### Aggregation with Inline Refs ```typescript -// Calculate average age of users per country -const results = await UserModel.find({ +// One row per company with employee stats — inline ref copies field directly +const stats = await db.records.find({ + labels: ['COMPANY'], + where: { EMPLOYEE: { $alias: '$employee', salary: { $gte: 50000 } } }, aggregate: { - averageAge: { fn: 'avg', field: 'age', alias: 'averageAge' }, - countries: { + companyName: '$record.name', // inline ref — no fn needed + headcount: { fn: 'count', unique: true, alias: '$employee' }, + totalWage: { fn: 'sum', field: 'salary', alias: '$employee' }, + avgSalary: { fn: 'avg', field: 'salary', alias: '$employee', precision: 0 }, + employeeNames: { fn: 'collect', - field: 'country', - alias: 'countries', - unique: true + field: 'name', + alias: '$employee', + unique: true, + orderBy: { name: 'asc' }, + limit: 10 } } }); ``` -### Vector Search +### TimeBucket — Time-Series Aggregation + +```typescript +// Daily order count for 2024 +const daily = await db.records.find({ + labels: ['ORDER'], + where: { issuedAt: { $gte: { $year: 2024 }, $lt: { $year: 2025 } } }, + aggregate: { + day: { fn: 'timeBucket', field: 'issuedAt', granularity: 'day', alias: '$record' }, + count: { fn: 'count', alias: '$record' } + }, + groupBy: ['day'], + orderBy: { day: 'asc' } +}); + +// Monthly revenue +const monthly = await db.records.find({ + labels: ['ORDER'], + aggregate: { + month: { fn: 'timeBucket', field: 'issuedAt', granularity: 'month', alias: '$record' }, + revenue: { fn: 'sum', field: 'amount', alias: '$record' } + }, + groupBy: ['month'], + orderBy: { month: 'asc' } +}); + +// Bi-monthly buckets (granularity: 'months', size: 2) +const biMonthly = await db.records.find({ + labels: ['ORDER'], + aggregate: { + period: { fn: 'timeBucket', field: 'issuedAt', granularity: 'months', size: 2, alias: '$record' }, + count: { fn: 'count', alias: '$record' } + }, + groupBy: ['period'], + orderBy: { period: 'asc' } +}); +``` + +### Nested Collect (Hierarchical Output) ```typescript -// Find records with similar vector embeddings -const similar = await EmbeddingModel.find({ +// COMPANY → DEPARTMENT → PROJECT tree +const tree = await db.records.find({ + labels: ['COMPANY'], where: { - embedding: { - $similarity: { - vector: [0.1, 0.2, 0.3, ...], - limit: 10 + DEPARTMENT: { $alias: '$dept', + PROJECT: { $alias: '$proj' } + } + }, + aggregate: { + company: '$record.name', + departments: { + fn: 'collect', + alias: '$dept', + aggregate: { + projects: { + fn: 'collect', + alias: '$proj', + orderBy: { name: 'asc' } + } } } } }); +// Output: [{ company: 'Acme', departments: [{ name: 'Eng', projects: [...] }, ...] }] ``` diff --git a/docs/docs/typescript-sdk/typescript-reference/_category_.json b/docs/docs/typescript-sdk/typescript-reference/_category_.json index 07abbec1..317450d4 100644 --- a/docs/docs/typescript-sdk/typescript-reference/_category_.json +++ b/docs/docs/typescript-sdk/typescript-reference/_category_.json @@ -1,6 +1,6 @@ { "label": "Typescript Reference", - "position": 6, + "position": 9, "collapsed": true, "collapsible": true } diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index 266ccb89..142e3712 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -151,6 +151,7 @@ const config: Config = { plugins: [ tailwindPlugin, + require('./plugins/tutorials-data.cjs'), async function pluginLlmsTxt(context) { return { name: 'llms-txt-plugin', @@ -331,45 +332,47 @@ const config: Config = { }, image: 'img/og.png', navbar: { - title: '', + title: 'RushDB Docs', logo: { alt: 'RushDB Logo', src: 'img/logo.svg' }, items: [ - { - href: '/', - label: 'RushDB Docs', - position: 'left' - }, { label: 'Python SDK', - href: '/python-sdk/introduction', + to: '/python-sdk/introduction', className: 'python-sdk', position: 'left', activeBaseRegex: '/python-sdk/' }, { label: 'TypeScript SDK', - href: '/typescript-sdk/introduction', + to: '/typescript-sdk/introduction', className: 'typescript-sdk', position: 'left', activeBaseRegex: '/typescript-sdk/' }, { label: 'REST API', - href: '/rest-api/introduction', + to: '/rest-api/introduction', className: 'rest-api', position: 'left', activeBaseRegex: '/rest-api/' }, { label: 'MCP Server', - href: '/mcp-server/introduction', + to: '/mcp-server/introduction', className: 'mcp-server', position: 'left', activeBaseRegex: '/mcp-server/' }, + { + type: 'docSidebar', + sidebarId: 'tutorials', + label: 'Tutorials', + position: 'left', + className: 'tutorials-link' + }, { href: 'https://github.com/rush-db/rushdb', // label: 'GitHub', @@ -384,7 +387,7 @@ const config: Config = { }, { href: 'https://app.rushdb.com', - label: 'Dashboard', + label: 'Log In', position: 'right', className: 'cta-button' } @@ -448,8 +451,8 @@ const config: Config = { copyright: `© ${new Date().getFullYear()}, Collect Software Inc.` }, prism: { - theme: themes.oneLight, - darkTheme: atomTheme, + theme: themes.vsLight, + darkTheme: themes.vsDark, fontFamily: 'JetBrains Mono', additionalLanguages: ['python', 'javascript', 'typescript', 'bash', 'json'] } diff --git a/docs/package.json b/docs/package.json index dbf88294..a80816e4 100644 --- a/docs/package.json +++ b/docs/package.json @@ -23,6 +23,7 @@ "@radix-ui/react-tabs": "^1.1.2", "autoprefixer": "^10.4.20", "clsx": "^2.1.1", + "lucide-react": "^0.473.0", "postcss": "^8.4.49", "prism-react-renderer": "2.3.1", "prism-themes": "^1.9.0", diff --git a/docs/plugins/tutorials-data.cjs b/docs/plugins/tutorials-data.cjs new file mode 100644 index 00000000..db87ad8f --- /dev/null +++ b/docs/plugins/tutorials-data.cjs @@ -0,0 +1,91 @@ +// @ts-check +'use strict' + +const fs = require('fs') +const path = require('path') + +// gray-matter lives in the workspace root node_modules +let matter +try { + matter = require('gray-matter') +} catch { + matter = require(path.resolve(__dirname, '../../../node_modules/gray-matter')) +} + +/** Average words-per-minute for technical reading */ +const WPM = 200 + +/** + * Strip MDX/JSX tags, import lines, frontmatter artefacts, and HTML comments + * from content so word count is representative of prose. + */ +function extractProse(content) { + return content + .replace(/^---[\s\S]*?---/, '') // frontmatter (just in case) + .replace(/^import\s.+$/gm, '') // import lines + .replace(/export\s+(const|function|default)[^}]*\{[\s\S]*?\n\}/gm, '') // export blocks + .replace(/<[^>]+>/g, ' ') // JSX/HTML tags + .replace(/\{[^}]+\}/g, ' ') // JSX expressions + .replace(/```[\s\S]*?```/g, ' CODE ') // code blocks → single word + .replace(/`[^`]+`/g, ' ') // inline code + .replace(/\s+/g, ' ') + .trim() +} + +function estimateReadTime(content) { + const prose = extractProse(content) + const words = prose.split(/\s+/).filter(Boolean).length + const minutes = Math.ceil(words / WPM) + return `${Math.max(1, minutes)} min` +} + +/** + * @param {string} siteDir + * @returns {Array<{id: string, title: string, description: string, href: string, tags: string[], time: string}>} + */ +function loadTutorials(siteDir) { + const tutorialsDir = path.join(siteDir, 'docs', 'tutorials') + const entries = fs.readdirSync(tutorialsDir) + + const tutorials = [] + + for (const entry of entries) { + if (!/\.(md|mdx)$/.test(entry)) continue + if (entry === 'index.mdx') continue + + const filePath = path.join(tutorialsDir, entry) + const raw = fs.readFileSync(filePath, 'utf8') + const { data: fm, content } = matter(raw) + + const slug = entry.replace(/\.(md|mdx)$/, '') + + tutorials.push({ + id: slug, + title: fm.title || slug, + description: fm.description || '', + href: `/tutorials/${slug}`, + tags: Array.isArray(fm.tags) ? fm.tags : [], + time: estimateReadTime(content), + sidebar_position: fm.sidebar_position ?? 999 + }) + } + + // Sort by sidebar_position so order matches the sidebar + tutorials.sort((a, b) => a.sidebar_position - b.sidebar_position) + + return tutorials +} + +/** @type {import('@docusaurus/types').PluginModule} */ +module.exports = function tutorialsDataPlugin(context) { + return { + name: 'tutorials-data', + async loadContent() { + return loadTutorials(context.siteDir) + }, + async contentLoaded({ content, actions }) { + const { setGlobalData } = actions + setGlobalData(content) + } + } +} diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 0c4855e7..427de7cd 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -5,6 +5,7 @@ const sidebars: SidebarsConfig = { pythonSdk: [{ type: 'autogenerated', dirName: 'python-sdk' }], typescriptSdk: [{ type: 'autogenerated', dirName: 'typescript-sdk' }], mcpServer: [{ type: 'autogenerated', dirName: 'mcp-server' }], + tutorials: [{ type: 'autogenerated', dirName: 'tutorials' }], docs: [ { type: 'link', @@ -30,6 +31,12 @@ const sidebars: SidebarsConfig = { href: '/mcp-server/introduction', className: 'mcp-server sdk-link' }, + { + type: 'link', + label: 'Tutorials', + href: '/tutorials', + className: 'tutorials-link sdk-link' + }, { type: 'category', label: 'Getting Started', @@ -41,11 +48,13 @@ const sidebars: SidebarsConfig = { label: 'Core Concepts', collapsed: false, items: [ - 'concepts/storage', 'concepts/records', + 'concepts/agent-memory-model', 'concepts/labels', 'concepts/properties', 'concepts/relationships', + 'concepts/ontology-schema-discovery', + 'concepts/storage', 'concepts/transactions', { label: 'Search', @@ -60,18 +69,6 @@ const sidebars: SidebarsConfig = { ] } ] - }, - { - type: 'category', - label: 'Tutorials', - collapsed: false, - items: [ - 'tutorials/configuring-dashboard', - 'tutorials/deployment', - 'tutorials/local-setup', - 'tutorials/reusable-search-query', - 'tutorials/importing-data' - ] } ] } diff --git a/docs/src/components/CopyPageButton.tsx b/docs/src/components/CopyPageButton.tsx new file mode 100644 index 00000000..dbd87944 --- /dev/null +++ b/docs/src/components/CopyPageButton.tsx @@ -0,0 +1,296 @@ +import React, { useCallback, useEffect, useRef, useState } from 'react' +import BrowserOnly from '@docusaurus/BrowserOnly' +import { useLocation } from '@docusaurus/router' +import { Clipboard, Check, ChevronDown, FileText, Maximize2, Minimize2 } from 'lucide-react' + +const GITHUB_RAW = 'https://raw.githubusercontent.com/rush-db/rushdb/main/docs/docs' +const DOCS_BASE = 'https://docs.rushdb.com' + +function ClaudeIcon() { + return ( + + + + ) +} + +function ChatGPTIcon() { + return ( + + + + ) +} + +function T3Icon() { + return ( + + + + + + + ) +} + +function MarkdownIcon() { + return ( + + + + + + ) +} + +interface MenuItem { + key: string + Icon: React.ComponentType + label: string + description: string + getHref: (pageUrl: string, markdownUrl: string) => string +} + +const LLM_PROMPT = (url: string) => `Read from this URL: ${url} and explain it to me` + +const MENU_ITEMS: MenuItem[] = [ + { + key: 'markdown', + Icon: MarkdownIcon, + label: 'View as Markdown', + description: 'Open this page in Markdown', + getHref: (_pageUrl, markdownUrl) => markdownUrl + }, + { + key: 'claude', + Icon: ClaudeIcon, + label: 'Open in Claude', + description: 'Ask questions about this page', + getHref: (pageUrl) => `https://claude.ai/new?q=${encodeURIComponent(LLM_PROMPT(pageUrl))}` + }, + { + key: 'chatgpt', + Icon: ChatGPTIcon, + label: 'Open in ChatGPT', + description: 'Ask questions about this page', + getHref: (pageUrl) => `https://chatgpt.com/?q=${encodeURIComponent(LLM_PROMPT(pageUrl))}` + }, + { + key: 't3', + Icon: T3Icon, + label: 'Open in T3 Chat', + description: 'Ask questions about this page', + getHref: (pageUrl) => `https://t3.chat/new?q=${encodeURIComponent(LLM_PROMPT(pageUrl))}` + } +] + +function CopyPageButtonInner() { + const [open, setOpen] = useState(false) + const [copied, setCopied] = useState(false) + const [resolvedMarkdownUrl, setResolvedMarkdownUrl] = useState(null) + const containerRef = useRef(null) + const location = useLocation() + + const slug = location.pathname.replace(/\/+$/, '').replace(/^\//, '') || 'index' + const pageUrl = slug === 'index' ? DOCS_BASE : `${DOCS_BASE}/${slug}` + const markdownUrl = `${GITHUB_RAW}/${slug}.md` + const markdownUrlMdx = `${GITHUB_RAW}/${slug}.mdx` + + // Resolve which extension actually exists; cache per slug + useEffect(() => { + setResolvedMarkdownUrl(null) + fetch(markdownUrl, { method: 'HEAD' }) + .then((r) => { + if (r.ok) { + setResolvedMarkdownUrl(markdownUrl) + return + } + fetch(markdownUrlMdx, { method: 'HEAD' }) + .then((r2) => { + setResolvedMarkdownUrl(r2.ok ? markdownUrlMdx : markdownUrl) + }) + .catch(() => setResolvedMarkdownUrl(markdownUrl)) + }) + .catch(() => setResolvedMarkdownUrl(markdownUrl)) + }, [slug]) + + const effectiveMarkdownUrl = resolvedMarkdownUrl ?? markdownUrl + + const handleCopy = useCallback(async () => { + try { + const res = await fetch(effectiveMarkdownUrl) + const text = res.ok ? await res.text() : null + const content = text ?? `# ${document.title}\n\n${window.location.href}` + await navigator.clipboard.writeText(content) + } catch { + // fallback: copy the URL when fetch/clipboard fails + try { + await navigator.clipboard.writeText(window.location.href) + } catch { + const el = document.createElement('textarea') + el.value = window.location.href + document.body.appendChild(el) + el.select() + document.execCommand('copy') + document.body.removeChild(el) + } + } + setCopied(true) + setTimeout(() => setCopied(false), 2000) + }, [effectiveMarkdownUrl]) + + useEffect(() => { + if (!open) return + const onMouseDown = (e: MouseEvent) => { + if (containerRef.current && !containerRef.current.contains(e.target as Node)) { + setOpen(false) + } + } + const onKeyDown = (e: KeyboardEvent) => { + if (e.key === 'Escape') setOpen(false) + } + document.addEventListener('mousedown', onMouseDown) + document.addEventListener('keydown', onKeyDown) + return () => { + document.removeEventListener('mousedown', onMouseDown) + document.removeEventListener('keydown', onKeyDown) + } + }, [open]) + + return ( +
+ {/* Main copy button */} + + + {/* Dropdown chevron */} + + + {/* Dropdown menu */} + {open && ( + + )} +
+ ) +} + +// ── Public export ─────────────────────────────────────────────────────────── + +export default function CopyPageButton() { + return }>{() => } +} + +// ── Widescreen toggle ──────────────────────────────────────────────────────── + +const WIDESCREEN_KEY = 'rushdb-docs-widescreen' + +function WidescreenButtonInner() { + const [wide, setWide] = useState(false) + + useEffect(() => { + const stored = localStorage.getItem(WIDESCREEN_KEY) === 'true' + setWide(stored) + if (stored) document.documentElement.setAttribute('data-widescreen', 'true') + }, []) + + const toggle = useCallback(() => { + setWide((v) => { + const next = !v + localStorage.setItem(WIDESCREEN_KEY, String(next)) + if (next) { + document.documentElement.setAttribute('data-widescreen', 'true') + } else { + document.documentElement.removeAttribute('data-widescreen') + } + return next + }) + }, []) + + return ( + + ) +} + +export function WidescreenButton() { + return ( + }> + {() => } + + ) +} diff --git a/docs/src/components/DocsHomePage.tsx b/docs/src/components/DocsHomePage.tsx new file mode 100644 index 00000000..24cd952a --- /dev/null +++ b/docs/src/components/DocsHomePage.tsx @@ -0,0 +1,597 @@ +import React from 'react' + +const BORDER_CLASS = 'border-[var(--ifm-color-emphasis-200)]' + +// ── Icons ───────────────────────────────────────────────────────────────────── + +const ArrowIcon = () => ( + + + + +) + +const TypeScriptIcon = () => ( + +) + +const PythonIcon = () => ( + +) + +const RestIcon = () => ( + +) + +const McpIcon = () => ( + +) + +const BookIcon = () => ( + +) + +const RocketIcon = () => ( + +) + +const ConceptsIcon = () => ( + +) + +const IngestIcon = () => ( + +) + +const FilterIcon = () => ( + +) + +const SparkleIcon = () => ( + +) + +const ShieldIcon = () => ( + +) + +const GraphIcon = () => ( + +) + +// ── Data ────────────────────────────────────────────────────────────────────── + +type InterfaceCard = { + icon: () => React.ReactElement + label: string + badge: string + badgeColor: string + description: string + features: string[] + href: string +} + +const INTERFACE_CARDS: InterfaceCard[] = [ + { + icon: TypeScriptIcon, + label: 'TypeScript SDK', + badge: 'npm', + badgeColor: '#3178C6', + description: 'Full type safety for Node.js and browsers. Async/await API with zero config.', + features: ['Type-safe query builder', 'Browser + Node.js', 'ESM & CommonJS'], + href: '/typescript-sdk/introduction' + }, + { + icon: PythonIcon, + label: 'Python SDK', + badge: 'pip', + badgeColor: '#3776AB', + description: 'Ergonomic client for backend scripts and data workflows. Sync and async.', + features: ['Sync & async client', 'Pandas-friendly output', 'Pipeline-ready'], + href: '/python-sdk/introduction' + }, + { + icon: RestIcon, + label: 'REST API', + badge: 'HTTP', + badgeColor: '#16a34a', + description: 'Language-agnostic HTTP access. Works from any stack with curl or fetch.', + features: ['OpenAPI spec', 'Full CRUD + search', 'Transaction support'], + href: '/rest-api/introduction' + }, + { + icon: McpIcon, + label: 'MCP Server', + badge: 'MCP', + badgeColor: '#7c3aed', + description: 'Model Context Protocol server. Give any LLM agent direct database access.', + features: ['Claude, GPT & Cursor', 'Schema auto-exposed', 'One-command deploy'], + href: '/mcp-server/introduction' + } +] + +type ResourceCard = { + icon: () => React.ReactElement + label: string + description: string + cta: string + href: string +} + +const RESOURCE_CARDS: ResourceCard[] = [ + { + icon: RocketIcon, + label: 'Quick Tutorial', + description: 'First write → graph → semantic search in under 10 minutes.', + cta: 'Start here', + href: '/get-started/quick-tutorial' + }, + { + icon: BookIcon, + label: 'Tutorials', + description: 'Hands-on guides: RAG pipelines, deployment, vector search, agent memory.', + cta: 'Browse tutorials', + href: '/tutorials' + }, + { + icon: ConceptsIcon, + label: 'Concepts', + description: 'How RushDB stores, links, and queries data — records, properties, graphs.', + cta: 'Learn concepts', + href: '/concepts' + } +] + +type FeatureItem = { + icon: () => React.ReactElement + title: string + description: string + accent: string +} + +const FEATURE_ITEMS: FeatureItem[] = [ + { + icon: IngestIcon, + title: 'Ingest anything', + description: + 'Push flat objects, nested trees, or batches. Types inferred, graph built — no schema needed.', + accent: '#3f81ff' + }, + { + icon: GraphIcon, + title: 'Auto-linked graph', + description: 'Nested objects become linked records automatically. Traverse relationships in queries.', + accent: '#8b5cf6' + }, + { + icon: SparkleIcon, + title: 'Semantic search', + description: 'Index any text property and query by meaning. Combine with field filters.', + accent: '#f59e0b' + }, + { + icon: ShieldIcon, + title: 'ACID transactions', + description: 'Wrap any combination of writes and reads. Nothing persists if any step fails.', + accent: '#10b981' + } +] + +// ── Sub-components ──────────────────────────────────────────────────────────── + +function CheckIcon() { + return ( + + + + ) +} + +function InterfaceCard({ card: c }: { card: InterfaceCard }) { + return ( + + {/* Header */} +
+
+ +
+ + {c.badge} + +
+ + {/* Title & description */} +

{c.label}

+

+ {c.description} +

+ + {/* Features */} +
    + {c.features.map((f) => ( +
  • + + + + {f} +
  • + ))} +
+ + {/* CTA */} + + Get started + +
+ ) +} + +function ResourceCard({ card: c }: { card: ResourceCard }) { + return ( + +
+ +
+

{c.label}

+

+ {c.description} +

+ + {c.cta} + +
+ ) +} + +function FeaturePill({ item }: { item: FeatureItem }) { + return ( +
+ + + +
+

{item.title}

+

+ {item.description} +

+
+
+ ) +} + +// ── Main component ──────────────────────────────────────────────────────────── + +export default function DocsHomePage() { + return ( +
+ {/* ── Hero ─────────────────────────────────────────────────────────── */} +
+
+ + Make AI Agents and Apps Conciuous +
+ +

+ Welcome to RushDB +

+ +

+ Push any JSON — RushDB infers types, links nested objects into a graph, and makes everything + queryable by field value or by meaning. No schema design. No migrations. No separate vector store. +

+ + +
+ + {/* ── Features grid ────────────────────────────────────────────────── */} +
+ {FEATURE_ITEMS.map((item) => ( + + ))} +
+ + {/* ── Interfaces ───────────────────────────────────────────────────── */} +
+
+

+ Choose your interface +

+

+ Every interface gives you the same capabilities — pick the one that fits your stack. +

+
+
+ {INTERFACE_CARDS.map((card) => ( + + ))} +
+
+ + {/* ── Divider ──────────────────────────────────────────────────────── */} +
+ + {/* ── Learning resources ───────────────────────────────────────────── */} +
+
+

Start learning

+

+ Guides, tutorials, and concept explanations to get you productive fast. +

+
+
+ {RESOURCE_CARDS.map((card) => ( + + ))} +
+
+ + {/* ── Deployment CTA ───────────────────────────────────────────────── */} +
+
+

+ Cloud +

+

RushDB Cloud

+

+ Free tier — 100,000 KU/month and 2 projects. No credit card required. +

+ + Sign up free + +
+ +
+

+ Self-Hosted +

+

+ Run on your infrastructure +

+

+ No KU limits, no billing. Deploy with Docker in minutes on your own stack. +

+ + Deployment guide + +
+
+
+ ) +} diff --git a/docs/src/components/LanguageTabs.tsx b/docs/src/components/LanguageTabs.tsx new file mode 100644 index 00000000..9bcf5cc4 --- /dev/null +++ b/docs/src/components/LanguageTabs.tsx @@ -0,0 +1,151 @@ +import React from 'react' +import Tabs from '@theme/Tabs' + +// ── Icons ────────────────────────────────────────────────────────────────── + +function PythonIcon(): React.ReactElement { + return ( + + ) +} + +function TypeScriptIcon(): React.ReactElement { + return ( + + ) +} + +function ShellIcon(): React.ReactElement { + return ( + + ) +} + +// ── Language config ──────────────────────────────────────────────────────── + +/** Render order: lower number = shown first. Unknown values get 99. */ +const LANG_ORDER: Record = { + python: 0, + typescript: 1, + shell: 2 +} + +const LANG_CONFIG: Record React.ReactElement }> = { + python: { label: 'Python', Icon: PythonIcon }, + typescript: { label: 'TypeScript', Icon: TypeScriptIcon }, + shell: { label: 'shell', Icon: ShellIcon } +} + +// ── Component ────────────────────────────────────────────────────────────── + +interface LanguageTabsProps { + children: React.ReactNode + /** Accepted for drop-in compatibility with usage; always + * overridden internally to "programming-language" for cross-page tab syncing. */ + groupId?: string + [key: string]: unknown +} + +/** + * Drop-in replacement for `` that: + * - Enforces `groupId="programming-language"` (cross-page tab syncing) + * - Always renders Python first, TypeScript second, anything else after + * - Injects the Python 🐍 and TypeScript TS badge icons into tab labels + * - Sets Python as the default active tab + * + * Usage in MDX (import as `Tabs` to avoid changing any body tags): + * ```mdx + * import Tabs from '@site/src/components/LanguageTabs'; + * import TabItem from '@theme/TabItem'; + * ``` + */ +export default function LanguageTabs({ children }: LanguageTabsProps): React.ReactElement { + const items = React.Children.toArray(children).filter((child): child is React.ReactElement => + React.isValidElement(child) + ) + + const sorted = [...items].sort( + // @ts-ignore + (a, b) => (LANG_ORDER[a.props.value] ?? 99) - (LANG_ORDER[b.props.value] ?? 99) + ) + + const enhanced = sorted.map((child, index) => { + // @ts-ignore + const value: string = child.props.value + const config = LANG_CONFIG[value] + + const label = + config ? + + + {config.label} + + // @ts-ignore + : child.props.label + + return React.cloneElement(child as React.ReactElement>, { + key: value, + label, + default: index === 0 + }) + }) + + // @ts-ignore + return ( +
+ {enhanced} +
+ ) +} diff --git a/docs/src/components/ResponsiveTabs.tsx b/docs/src/components/ResponsiveTabs.tsx new file mode 100644 index 00000000..b20de700 --- /dev/null +++ b/docs/src/components/ResponsiveTabs.tsx @@ -0,0 +1,78 @@ +import React, { useState, Children, isValidElement } from 'react' + +interface Tab { + value: string + label: string + isDefault: boolean + content: React.ReactNode +} + +interface ResponsiveTabsProps { + children: React.ReactNode +} + +/** + * Outer demo tabs that render as tab buttons on desktop and a native setSelected(e.target.value)} + aria-label="Select example" + > + {tabs.map((t) => ( + + ))} + + + + {/* Desktop: tab buttons */} +
+ {tabs.map((t) => ( + + ))} +
+ + {/* Content */} +
+ {activeTab?.content} +
+ + ) +} diff --git a/docs/src/components/TutorialsIndex.tsx b/docs/src/components/TutorialsIndex.tsx new file mode 100644 index 00000000..997f5f82 --- /dev/null +++ b/docs/src/components/TutorialsIndex.tsx @@ -0,0 +1,181 @@ +import React, { useMemo, useState } from 'react' +import { usePluginData } from '@docusaurus/useGlobalData' + +export type TutorialEntry = { + id: string + title: string + description: string + href: string + tags: string[] + time: string +} + +const BORDER_CLASS = 'border-[var(--ifm-color-emphasis-200)]' + +const SearchIcon = () => ( + + + + +) + +const ClockIcon = () => ( + + + + +) + +const ArrowIcon = () => ( + + + + +) + +function TutorialCard({ tutorial: t }: { tutorial: TutorialEntry }) { + return ( + +
+ {t.tags.map((tag) => ( + + {tag} + + ))} +
+ +

+ {t.title} +

+ +

+ {t.description} +

+ +
+ + + {t.time} + + + Read + +
+
+ ) +} + +export default function TutorialsIndex() { + const tutorials = usePluginData('tutorials-data') as TutorialEntry[] + const [search, setSearch] = useState('') + const [activeTag, setActiveTag] = useState(null) + + const allTags = useMemo(() => { + const seen = new Set() + for (const tutorial of tutorials) { + for (const tag of tutorial.tags) { + seen.add(tag) + } + } + return [...seen] + }, [tutorials]) + + const filtered = useMemo(() => { + return tutorials.filter((tutorial) => { + const query = search.toLowerCase() + const matchesSearch = + !search || + tutorial.title.toLowerCase().includes(query) || + tutorial.description.toLowerCase().includes(query) + const matchesTag = !activeTag || tutorial.tags.includes(activeTag) + return matchesSearch && matchesTag + }) + }, [tutorials, search, activeTag]) + + return ( +
+
+

Tutorials

+

+ Hands-on guides to build real things with RushDB +

+
+ +
+
+ +
+ setSearch(e.target.value)} + className={`box-border w-full rounded-md border border-solid ${BORDER_CLASS} focus:border-accent bg-transparent py-3 pl-11 pr-4 text-[15px] text-[var(--ifm-font-color-base)] outline-none transition-colors duration-150 ease-out placeholder:text-[var(--ifm-color-emphasis-500)]`} + /> +
+ +
+ {['All', ...allTags].map((tag) => { + const isActive = tag === 'All' ? !activeTag : tag === activeTag + + return ( + + ) + })} +
+ + {filtered.length > 0 ? +
+ {filtered.map((tutorial) => ( + + ))} +
+ :
+ No tutorials match your search. +
+ } +
+ ) +} diff --git a/docs/src/css/custom.css b/docs/src/css/custom.css index b7badd02..b43ffb0e 100644 --- a/docs/src/css/custom.css +++ b/docs/src/css/custom.css @@ -25,21 +25,26 @@ --ifm-code-font-size: 14px; --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.05); - --ifm-navbar-background-color: #ffffff; --ifm-navbar-height: 4rem; - --ifm-footer-background-color: #ffffff; + --ifm-container-width-xl: 1280px; --ifm-container-width: 1280px; - --ifm-background-color: #ffffff; + --ifm-navbar-background-color: #fafafa; + --ifm-footer-background-color: #fafafa; + --ifm-background-color: #fafafa; --ifm-toc-border-color: #e5e7eb; + --ifm-hr-background-color: var(--ifm-toc-border-color); + --doc-sidebar-width: 260px !important; --ifm-list-left-padding: 1rem; --ifm-font-color-base: #101010; --ifm-footer-link-color: #101010; + + --ifm-font-weight-semibold: 600; } body { @@ -56,7 +61,7 @@ html[data-theme='dark'] { --ifm-font-color-base: #d9d9d9; - --ifm-toc-link-color: var(--ifm-toc-link-color); + /* --ifm-toc-link-color: var(--ifm-toc-link-color); */ --ifm-menu-color: rgb(158, 158, 158); --ifm-menu-color-active: var(--ifm-color-primary); @@ -68,16 +73,18 @@ html[data-theme='dark'] { --ifm-color-primary-lighter: #fff; --ifm-color-primary-lightest: #fff; - --ifm-background-color: #000000; - --ifm-navbar-background-color: #000000; - --ifm-footer-background-color: #000000; - --ifm-code-background: #2a2a2a; + --ifm-background-color: #222222; + --ifm-navbar-background-color: #222222; + --ifm-footer-background-color: #222222; + --ifm-code-background: #323232; --docusaurus-highlighted-code-line-bg: rgba(255, 255, 255, 0.1); - --ifm-toc-border-color: #191919; + --ifm-toc-border-color: #3d3d3d; + + --ifm-hr-background-color: var(--ifm-toc-border-color); --ifm-color-primary: #ffffff; - --ifm-footer-link-color: #ffffff; + --ifm-footer-link-color: var(--ifm-menu-color); } @@ -108,10 +115,60 @@ html[data-theme='dark'] { border-bottom: 1px solid var(--ifm-toc-border-color); backdrop-filter: blur(12px); background-color: var(--ifm-navbar-background-color); + font-weight: 600; +} + +/* Inactive left-side SDK/API nav links — same color as inactive sidebar items */ +.navbar__link { + --ifm-navbar-link-color: var(--ifm-menu-color); +} + +.navbar__link::before { + background-color: var(--ifm-menu-color) !important; +} + +/* Dark mode: bump inactive up slightly so it's readable on the dark navbar */ +html[data-theme='dark'] .navbar__link { + /* --ifm-navbar-link-color: rgba(255, 255, 255, 0.5); */ + --ifm-navbar-link-color: var(--ifm-menu-color) !important; +} + +html[data-theme='dark'] .navbar__link::before { + background-color: rgba(255, 255, 255, 0.5) !important; +} + +/* Active link */ +.navbar__link--active { + --ifm-navbar-link-color: var(--ifm-color-primary); + text-shadow: rgb(0 114 255 / 22%) 0px 0px 16px + +} +html[data-theme='dark'] .navbar__link--active { + text-shadow: rgb(255 175 0 / 55%) 0px 0px 16px; +} + +.navbar__link--active::before { + background-color: var(--ifm-color-primary) !important; +} + +html[data-theme='dark'] .navbar__link--active { + --ifm-navbar-link-color: var(--ifm-color-primary); +} + +html[data-theme='dark'] .navbar__link--active::before { + background-color: var(--ifm-color-primary) !important; } .navbar__brand { margin-right: 0; + align-items: center; +} + +.navbar__title { + display: flex; + flex-direction: column; + align-items: flex-start; + line-height: 1.25; } .theme-doc-sidebar-container { @@ -150,13 +207,13 @@ html[data-theme='dark'] { .menu__list-item > .menu__link--active { font-weight: 600; background-color: transparent !important; - border-right: 2px solid var(--ifm-color-primary); border-radius: 0 !important; + text-shadow: rgb(0 114 255 / 22%) 0px 0px 16px; } /* Category content spacing */ .theme-doc-sidebar-item-category-level-1 { - @apply border-none pt-4; + @apply border-none; } /* Collapse button */ @@ -172,20 +229,63 @@ html[data-theme='dark'] { } .theme-doc-toc-desktop { - padding: 1.5rem !important; + padding: 0rem !important; } +/* ── TOC: match main sidebar style ──────────────────────────────────────── */ + .table-of-contents { - padding-left: 0.5rem; + padding-left: 0; + font-weight: 600; +} + +/* Remove li margins — padding goes on the instead, like .menu__link */ +.table-of-contents li { + margin: 0; +} + +.table-of-contents li:not(:first-child) { + margin-top: 0.25rem; } .table-of-contents__link { - font-size: 0.875rem; + font-size: 14px; + font-weight: 600; + color: var(--ifm-menu-color); + display: block; + border-radius: 6px; + padding: var(--ifm-menu-link-padding-vertical) var(--ifm-menu-link-padding-horizontal); + line-height: 1.25; + transition: color var(--ifm-transition-fast) var(--ifm-transition-timing-default); +} + +.table-of-contents__link:hover, +.table-of-contents__link:hover code { + color: var(--ifm-color-primary); + text-decoration: none; + background: transparent; +} + +.table-of-contents__link--active, +.table-of-contents__link--active code { + color: var(--ifm-color-primary) !important; + background: transparent; + text-shadow: rgb(0 114 255 / 22%) 0px 0px 16px; +} + +/* Keep nesting indent but drop the left border line (sidebar uses padding only) */ +.table-of-contents__left-border { + border-left: none; +} + +.table-of-contents ul { + padding-left: var(--ifm-menu-link-padding-horizontal); } code { - background-color: var(--ifm-code-background); - /* border: 1px solid var(--ifm-toc-border-color); */ + background-color: var(--ifm-code-background); + border: 1px solid var(--ifm-toc-border-color); + font-size: inherit; border-radius: 6px; font-family: var(--ifm-font-family-monospace), monospace; font-feature-settings: "liga" 0; @@ -204,7 +304,7 @@ pre code { /* Specific styling for inline code */ :not(pre) > code { - padding: 0.2rem 0.4rem; + padding: 0.1rem 0.2rem; vertical-align: middle; } @@ -226,6 +326,23 @@ article, --ifm-h4-font-size: 1.25rem; } +.markdown h1 { + text-shadow: rgb(0 114 255 / 22%) 0px 0px 20px +} + +html[data-theme='dark'] .markdown h1{ + text-shadow: rgb(255 175 0 / 55%) 0px 0px 28px; +} + +html[data-theme='dark'] .menu__list-item > .menu__link--active { + text-shadow: rgb(255 175 0 / 55%) 0px 0px 16px; +} + +html[data-theme='dark'] .table-of-contents__link--active, +html[data-theme='dark'] .table-of-contents__link--active code { + text-shadow: rgb(255 175 0 / 55%) 0px 0px 16px; +} + .markdown > h2 { margin-top: 3rem; margin-bottom: 1rem; @@ -247,27 +364,27 @@ article, padding: 1.25rem 1.5rem 1.25rem 1.25rem; margin: 1.5rem 0; border-left: 2px solid var(--ifm-color-primary); - background-color: #f5faff; - color: #1a2a3a; + background-color: rgba(0, 114, 255, 0.06); + color: var(--ifm-font-color-base); } .theme-admonition-info { border-color: var(--ifm-color-primary); - background-color: #dde9ff; - color: var(--ifm-color-primary-darkest); + background-color: rgba(0, 114, 255, 0.08); + color: var(--ifm-font-color-base); } .theme-admonition .admonition-icon svg { color: var(--ifm-color-primary); } html[data-theme='dark'] .theme-admonition { - background-color: #a3a3a3; - border-color: var(--ifm-color-primary-dark); - color: #e6eaf3; + background-color: rgba(255, 255, 255, 0.05); + border-color: var(--ifm-color-primary); + color: var(--ifm-font-color-base); } html[data-theme='dark'] .theme-admonition-info { - background-color: #494949; + background-color: rgba(255, 255, 255, 0.05); border-color: var(--ifm-color-primary); - color: var(--ifm-color-primary-lightest); + color: var(--ifm-font-color-base); } html[data-theme='dark'] .theme-admonition .admonition-icon svg { color: var(--ifm-color-primary-light); @@ -312,6 +429,7 @@ max-width: var(--ifm-container-width-xl) !important; height: 24px; display: flex; background-color: var(--ifm-navbar-link-color); + -webkit-mask-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 24 24' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12'/%3E%3C/svg%3E"); mask-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 24 24' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12'/%3E%3C/svg%3E"); transition: background-color var(--ifm-transition-fast) } @@ -323,85 +441,546 @@ max-width: var(--ifm-container-width-xl) !important; .python-sdk, .typescript-sdk, .rest-api, .mcp-server { @apply flex items-center justify-center gap-2; } -.python-sdk > a::before { - content: ''; - width: 18px; - height: 18px; - display: flex; - @apply mr-2; - background-color: var(--ifm-menu-color); - mask-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 1024 1024'%3E%3Cpath fill='white' clip-rule='evenodd' fill-rule='evenodd' d='M511.847,757.279c0,10.586,0,20.188,0,30.832c80.904,0,161.375,0,241.785,0 c0,37.582,0,74.129-0.01,110.672c0,0.328-0.164,0.652-0.221,0.984c-5.341,30.986-22.589,54.121-47.641,72.006 c-30.986,22.119-66.561,32.812-103.044,41.27c-85.164,19.752-168.318,12.871-248.585-21.24 c-19.08-8.107-36.901-20.795-52.854-34.273c-22.846-19.307-33.87-45.564-33.9-75.832c-0.073-78.047-0.805-156.102,0.225-234.133 c0.925-70.191,55.389-122.805,125.531-123.01c77.975-0.227,155.967-1.086,233.918,0.23 C705.86,526.119,772.818,466,783.688,391.142c1.251-8.611,1.377-17.432,1.425-26.162c0.163-30.611,0.064-61.224,0.064-91.837 c0-3.605,0-7.213,0-11.838c12.517,0,23.854,0,35.193,0c18.148,0.002,36.3,0.142,54.448-0.023 c43.185-0.395,75.38,18.446,97.422,55.311c16.096,26.917,24.123,56.681,30.942,86.92c10.478,46.456,16.828,93.305,13.369,141.044 c-4.067,56.15-20.297,108.848-47.128,158.115c-20.56,37.752-53.253,54.877-96.167,54.734 c-115.953-0.381-231.907-0.129-347.859-0.127C521.083,757.279,516.769,757.279,511.847,757.279z M648.96,850.41 c-25.734-0.252-46.009,19.758-46.221,45.611c-0.214,25.793,19.813,46.887,44.912,47.307 c25.027,0.418,46.009-20.428,46.279-45.986C694.203,871.342,674.386,850.658,648.96,850.41z'/%3E%3Cpath fill='white' clip-rule='evenodd' fill-rule='evenodd' d='M510.621,261.305c0-10.549,0-20.083,0-30.812c-81.056,0-161.535,0-242.22,0 c-0.474-3.074-1.038-5.012-1.03-6.947c0.141-34.312-0.533-68.657,0.749-102.928c2.042-54.623,23.93-84.223,76.56-98.907 c31.827-8.878,64.892-14.673,97.789-18.063c64.773-6.675,129.604-4.182,193.803,7.426c37.246,6.734,68.971,24.009,92.947,53.935 c16.724,20.873,25.52,44.756,25.516,71.703c-0.017,77.714,0.374,155.43-0.115,233.142 c-0.449,71.218-53.786,123.692-125.062,123.814c-78.651,0.136-157.304-0.036-235.956,0.057 c-76.784,0.088-139.957,52.139-154.399,127.492c-1.883,9.83-2.224,20.059-2.314,30.111c-0.285,31.285-0.105,62.574-0.105,93.861 c0,3.617,0,7.23,0,12.09c-7.474,0-13.77,0.051-20.063-0.008c-24.864-0.227-49.776,0.426-74.576-0.945 c-39.027-2.16-68.304-21.814-89.938-53.754c-18.086-26.703-25.991-57.164-33.359-88.004 C-0.209,534.822,3.376,455.854,25.584,377.35c20.668-73.063,76.843-115.872,152.937-115.998 c106.549-0.177,213.097-0.047,319.646-0.047C502.121,261.305,506.076,261.305,510.621,261.305z M373.439,75.404 c-25.309,0.176-45.207,20.863-45.057,46.848c0.149,25.682,20.729,46.29,45.985,46.043c25.146-0.245,45.418-21.308,45.201-46.962 C419.35,95.679,398.811,75.23,373.439,75.404z'/%3E%3C/svg%3E%0A"); -} -.typescript-sdk > a::before { +.python-sdk > a::before, +.typescript-sdk > a::before, +.rest-api > a::before, +.mcp-server > a::before, +.navbar__link.python-sdk::before, +.navbar__link.typescript-sdk::before, +.navbar__link.rest-api::before, +.navbar__link.mcp-server::before { content: ''; - width: 18px; - height: 18px; display: flex; - @apply mr-2; + flex-shrink: 0; background-color: var(--ifm-menu-color); - mask-image: url("data:image/svg+xml,%3Csvg%20xmlns%3D%22http://www.w3.org/2000/svg%22%20viewBox%3D%220%200%20128%20128%22%3E%3Cpath%20fill%3D%22white%22%20d%3D%22M2%2C63.91v62.5H127V1.41H2Zm100.73-5a15.56%2C15.56%2C0%2C0%2C1%2C7.82%2C4.5%2C20.58%2C20.58%2C0%2C0%2C1%2C3%2C4c0%2C.16-5.4%2C3.81-8.69%2C5.85-.12.08-.6-.44-1.13-1.23a7.09%2C7.09%2C0%2C0%2C0-5.87-3.53c-3.79-.26-6.23%2C1.73-6.21%2C5a4.58%2C4.58%2C0%2C0%2C0%2C.54%2C2.34c.83%2C1.73%2C2.38%2C2.76%2C7.24%2C4.86%2C8.95%2C3.85%2C12.78%2C6.39%2C15.16%2C10%2C2.66%2C4%2C3.25%2C10.46%2C1.45%2C15.24-2%2C5.2-6.9%2C8.73-13.83%2C9.9a38.32%2C38.32%2C0%2C0%2C1-9.52-.1A23%2C23%2C0%2C0%2C1%2C80%2C109.19c-1.15-1.27-3.39-4.58-3.25-4.82a9.34%2C9.34%2C0%2C0%2C1%2C1.15-.73L82.5%2C101l3.59-2.08.75%2C1.11a16.78%2C16.78%2C0%2C0%2C0%2C4.74%2C4.54c4%2C2.1%2C9.46%2C1.81%2C12.16-.62a5.43%2C5.43%2C0%2C0%2C0%2C.69-6.92c-1-1.39-3-2.56-8.59-5-6.45-2.78-9.23-4.5-11.77-7.24a16.48%2C16.48%2C0%2C0%2C1-3.43-6.25%2C25%2C25%2C0%2C0%2C1-.22-8c1.33-6.23%2C6-10.58%2C12.82-11.87A31.66%2C31.66%2C0%2C0%2C1%2C102.73%2C58.93ZM73.39%2C64.15l0%2C5.12H57.16V115.5H45.65V69.26H29.38v-5a49.19%2C49.19%2C0%2C0%2C1%2C.14-5.16c.06-.08%2C10-.12%2C22-.1L73.33%2C59Z%22/%3E%3C/svg%3E"); + -webkit-mask-size: contain; + mask-size: contain; + -webkit-mask-repeat: no-repeat; + mask-repeat: no-repeat; + -webkit-mask-position: center; + mask-position: center; } + +/* Sidebar: 18px + right margin */ +.python-sdk > a::before, +.typescript-sdk > a::before, .rest-api > a::before { - content: ''; width: 18px; height: 18px; - display: flex; @apply mr-2; - background-color: var(--ifm-menu-color); - mask-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' version='1.0' viewBox='0 0 512.000000 533.000000' preserveAspectRatio='xMidYMid meet'%3E%3Cg transform='translate(0.000000,533.000000) scale(0.100000,-0.100000)' fill='%23000000' stroke='none'%3E%3Cpath d='M1820 4890 c-568 -24 -859 -109 -1082 -319 -174 -164 -276 -384 -322 -703 -34 -231 -40 -424 -40 -1163 0 -875 12 -1082 80 -1350 106 -423 378 -673 829 -764 269 -54 505 -64 1410 -58 762 5 863 9 1080 47 321 56 539 171 691 364 194 245 253 496 275 1171 12 372 5 1283 -11 1500 -36 472 -120 719 -314 923 -199 209 -456 304 -931 343 -166 13 -1395 20 -1665 9z m1442 -320 c534 -22 747 -79 913 -245 144 -143 206 -336 235 -725 16 -219 23 -1113 11 -1475 -21 -656 -74 -864 -262 -1040 -136 -126 -305 -184 -639 -217 -192 -19 -1728 -19 -1920 0 -334 33 -503 91 -639 217 -187 174 -241 384 -262 1025 -12 357 -5 1280 11 1495 20 269 61 451 128 580 49 93 154 198 247 247 164 86 430 131 825 141 296 8 1136 6 1352 -3z'/%3E%3Cpath d='M1525 3491 c-16 -10 -38 -27 -48 -38 -10 -11 -119 -324 -243 -694 -250 -750 -243 -717 -177 -790 65 -71 160 -74 229 -5 23 23 41 61 70 149 l38 117 251 0 250 0 50 -121 c56 -138 94 -182 164 -194 53 -9 126 25 158 74 46 68 46 67 -253 784 -151 364 -285 673 -297 685 -50 54 -135 69 -192 33z m166 -774 l68 -162 -121 -3 c-66 -1 -123 0 -126 3 -4 3 18 80 48 170 30 90 56 162 59 159 3 -2 35 -77 72 -167z'/%3E%3Cpath d='M2700 3501 c-74 -23 -136 -78 -169 -151 -20 -44 -21 -63 -21 -686 l0 -641 23 -34 c32 -49 105 -83 158 -74 53 9 115 64 129 113 5 21 10 146 10 280 l0 242 148 0 c238 0 348 36 455 149 67 70 102 129 121 206 57 216 -39 443 -232 548 -90 49 -146 57 -382 56 -118 0 -226 -4 -240 -8z m466 -327 c49 -23 77 -68 82 -129 5 -63 -13 -102 -65 -142 -36 -28 -39 -28 -195 -31 l-158 -4 0 161 0 161 151 0 c119 0 158 -3 185 -16z'/%3E%3Cpath d='M3890 3502 c-40 -19 -70 -46 -84 -77 -14 -29 -16 -113 -16 -718 l0 -684 23 -34 c32 -49 105 -83 158 -74 54 9 115 64 129 116 7 25 10 266 8 701 -3 642 -4 664 -23 697 -30 52 -67 74 -127 77 -29 2 -60 0 -68 -4z'/%3E%3C/g%3E%3C/svg%3E"); } -.mcp-server > a::before { - content: ''; + +/* mcp-server: 16px in both contexts */ +.mcp-server > a::before, +.navbar__link.mcp-server::before { width: 16px; height: 16px; - display: flex; +} +.mcp-server > a::before { @apply mr-2; - background-color: var(--ifm-menu-color); - mask-image: url("data:image/svg+xml,%3Csvg fill='currentColor' fill-rule='evenodd' height='1em' style='flex:none;line-height:1' viewBox='0 0 24 24' width='1em' xmlns='http://www.w3.org/2000/svg'%3E%3Ctitle%3EModelContextProtocol%3C/title%3E%3Cpath d='M15.688 2.343a2.588 2.588 0 00-3.61 0l-9.626 9.44a.863.863 0 01-1.203 0 .823.823 0 010-1.18l9.626-9.44a4.313 4.313 0 016.016 0 4.116 4.116 0 011.204 3.54 4.3 4.3 0 013.609 1.18l.05.05a4.115 4.115 0 010 5.9l-8.706 8.537a.274.274 0 000 .393l1.788 1.754a.823.823 0 010 1.18.863.863 0 01-1.203 0l-1.788-1.753a1.92 1.92 0 010-2.754l8.706-8.538a2.47 2.47 0 000-3.54l-.05-.049a2.588 2.588 0 00-3.607-.003l-7.172 7.034-.002.002-.098.097a.863.863 0 01-1.204 0 .823.823 0 010-1.18l7.273-7.133a2.47 2.47 0 00-.003-3.537z'%3E%3C/path%3E%3Cpath d='M14.485 4.703a.823.823 0 000-1.18.863.863 0 00-1.204 0l-7.119 6.982a4.115 4.115 0 000 5.9 4.314 4.314 0 006.016 0l7.12-6.982a.823.823 0 000-1.18.863.863 0 00-1.204 0l-7.119 6.982a2.588 2.588 0 01-3.61 0 2.47 2.47 0 010-3.54l7.12-6.982z'%3E%3C/path%3E%3C/svg%3E"); } -.navbar__link.python-sdk::before { - content: ''; +/* Navbar: python + typescript = 20px */ +.navbar__link.python-sdk::before, +.navbar__link.typescript-sdk::before { width: 20px; height: 20px; - display: flex; - background-color: var(--ifm-menu-color); +} + +/* Navbar: rest-api = 22px */ +.navbar__link.rest-api::before { + width: 22px; + height: 22px; +} + +/* Per-technology mask images (sidebar + navbar share the same SVG) */ +.python-sdk > a::before, +.navbar__link.python-sdk::before { + -webkit-mask-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 1024 1024'%3E%3Cpath fill='white' clip-rule='evenodd' fill-rule='evenodd' d='M511.847,757.279c0,10.586,0,20.188,0,30.832c80.904,0,161.375,0,241.785,0 c0,37.582,0,74.129-0.01,110.672c0,0.328-0.164,0.652-0.221,0.984c-5.341,30.986-22.589,54.121-47.641,72.006 c-30.986,22.119-66.561,32.812-103.044,41.27c-85.164,19.752-168.318,12.871-248.585-21.24 c-19.08-8.107-36.901-20.795-52.854-34.273c-22.846-19.307-33.87-45.564-33.9-75.832c-0.073-78.047-0.805-156.102,0.225-234.133 c0.925-70.191,55.389-122.805,125.531-123.01c77.975-0.227,155.967-1.086,233.918,0.23 C705.86,526.119,772.818,466,783.688,391.142c1.251-8.611,1.377-17.432,1.425-26.162c0.163-30.611,0.064-61.224,0.064-91.837 c0-3.605,0-7.213,0-11.838c12.517,0,23.854,0,35.193,0c18.148,0.002,36.3,0.142,54.448-0.023 c43.185-0.395,75.38,18.446,97.422,55.311c16.096,26.917,24.123,56.681,30.942,86.92c10.478,46.456,16.828,93.305,13.369,141.044 c-4.067,56.15-20.297,108.848-47.128,158.115c-20.56,37.752-53.253,54.877-96.167,54.734 c-115.953-0.381-231.907-0.129-347.859-0.127C521.083,757.279,516.769,757.279,511.847,757.279z M648.96,850.41 c-25.734-0.252-46.009,19.758-46.221,45.611c-0.214,25.793,19.813,46.887,44.912,47.307 c25.027,0.418,46.009-20.428,46.279-45.986C694.203,871.342,674.386,850.658,648.96,850.41z'/%3E%3Cpath fill='white' clip-rule='evenodd' fill-rule='evenodd' d='M510.621,261.305c0-10.549,0-20.083,0-30.812c-81.056,0-161.535,0-242.22,0 c-0.474-3.074-1.038-5.012-1.03-6.947c0.141-34.312-0.533-68.657,0.749-102.928c2.042-54.623,23.93-84.223,76.56-98.907 c31.827-8.878,64.892-14.673,97.789-18.063c64.773-6.675,129.604-4.182,193.803,7.426c37.246,6.734,68.971,24.009,92.947,53.935 c16.724,20.873,25.52,44.756,25.516,71.703c-0.017,77.714,0.374,155.43-0.115,233.142 c-0.449,71.218-53.786,123.692-125.062,123.814c-78.651,0.136-157.304-0.036-235.956,0.057 c-76.784,0.088-139.957,52.139-154.399,127.492c-1.883,9.83-2.224,20.059-2.314,30.111c-0.285,31.285-0.105,62.574-0.105,93.861 c0,3.617,0,7.23,0,12.09c-7.474,0-13.77,0.051-20.063-0.008c-24.864-0.227-49.776,0.426-74.576-0.945 c-39.027-2.16-68.304-21.814-89.938-53.754c-18.086-26.703-25.991-57.164-33.359-88.004 C-0.209,534.822,3.376,455.854,25.584,377.35c20.668-73.063,76.843-115.872,152.937-115.998 c106.549-0.177,213.097-0.047,319.646-0.047C502.121,261.305,506.076,261.305,510.621,261.305z M373.439,75.404 c-25.309,0.176-45.207,20.863-45.057,46.848c0.149,25.682,20.729,46.29,45.985,46.043c25.146-0.245,45.418-21.308,45.201-46.962 C419.35,95.679,398.811,75.23,373.439,75.404z'/%3E%3C/svg%3E%0A"); mask-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 1024 1024'%3E%3Cpath fill='white' clip-rule='evenodd' fill-rule='evenodd' d='M511.847,757.279c0,10.586,0,20.188,0,30.832c80.904,0,161.375,0,241.785,0 c0,37.582,0,74.129-0.01,110.672c0,0.328-0.164,0.652-0.221,0.984c-5.341,30.986-22.589,54.121-47.641,72.006 c-30.986,22.119-66.561,32.812-103.044,41.27c-85.164,19.752-168.318,12.871-248.585-21.24 c-19.08-8.107-36.901-20.795-52.854-34.273c-22.846-19.307-33.87-45.564-33.9-75.832c-0.073-78.047-0.805-156.102,0.225-234.133 c0.925-70.191,55.389-122.805,125.531-123.01c77.975-0.227,155.967-1.086,233.918,0.23 C705.86,526.119,772.818,466,783.688,391.142c1.251-8.611,1.377-17.432,1.425-26.162c0.163-30.611,0.064-61.224,0.064-91.837 c0-3.605,0-7.213,0-11.838c12.517,0,23.854,0,35.193,0c18.148,0.002,36.3,0.142,54.448-0.023 c43.185-0.395,75.38,18.446,97.422,55.311c16.096,26.917,24.123,56.681,30.942,86.92c10.478,46.456,16.828,93.305,13.369,141.044 c-4.067,56.15-20.297,108.848-47.128,158.115c-20.56,37.752-53.253,54.877-96.167,54.734 c-115.953-0.381-231.907-0.129-347.859-0.127C521.083,757.279,516.769,757.279,511.847,757.279z M648.96,850.41 c-25.734-0.252-46.009,19.758-46.221,45.611c-0.214,25.793,19.813,46.887,44.912,47.307 c25.027,0.418,46.009-20.428,46.279-45.986C694.203,871.342,674.386,850.658,648.96,850.41z'/%3E%3Cpath fill='white' clip-rule='evenodd' fill-rule='evenodd' d='M510.621,261.305c0-10.549,0-20.083,0-30.812c-81.056,0-161.535,0-242.22,0 c-0.474-3.074-1.038-5.012-1.03-6.947c0.141-34.312-0.533-68.657,0.749-102.928c2.042-54.623,23.93-84.223,76.56-98.907 c31.827-8.878,64.892-14.673,97.789-18.063c64.773-6.675,129.604-4.182,193.803,7.426c37.246,6.734,68.971,24.009,92.947,53.935 c16.724,20.873,25.52,44.756,25.516,71.703c-0.017,77.714,0.374,155.43-0.115,233.142 c-0.449,71.218-53.786,123.692-125.062,123.814c-78.651,0.136-157.304-0.036-235.956,0.057 c-76.784,0.088-139.957,52.139-154.399,127.492c-1.883,9.83-2.224,20.059-2.314,30.111c-0.285,31.285-0.105,62.574-0.105,93.861 c0,3.617,0,7.23,0,12.09c-7.474,0-13.77,0.051-20.063-0.008c-24.864-0.227-49.776,0.426-74.576-0.945 c-39.027-2.16-68.304-21.814-89.938-53.754c-18.086-26.703-25.991-57.164-33.359-88.004 C-0.209,534.822,3.376,455.854,25.584,377.35c20.668-73.063,76.843-115.872,152.937-115.998 c106.549-0.177,213.097-0.047,319.646-0.047C502.121,261.305,506.076,261.305,510.621,261.305z M373.439,75.404 c-25.309,0.176-45.207,20.863-45.057,46.848c0.149,25.682,20.729,46.29,45.985,46.043c25.146-0.245,45.418-21.308,45.201-46.962 C419.35,95.679,398.811,75.23,373.439,75.404z'/%3E%3C/svg%3E%0A"); } + +.typescript-sdk > a::before, .navbar__link.typescript-sdk::before { - content: ''; - width: 20px; - height: 20px; - display: flex; - background-color: var(--ifm-menu-color); + -webkit-mask-image: url("data:image/svg+xml,%3Csvg%20xmlns%3D%22http://www.w3.org/2000/svg%22%20viewBox%3D%220%200%20128%20128%22%3E%3Cpath%20fill%3D%22white%22%20d%3D%22M2%2C63.91v62.5H127V1.41H2Zm100.73-5a15.56%2C15.56%2C0%2C0%2C1%2C7.82%2C4.5%2C20.58%2C20.58%2C0%2C0%2C1%2C3%2C4c0%2C.16-5.4%2C3.81-8.69%2C5.85-.12.08-.6-.44-1.13-1.23a7.09%2C7.09%2C0%2C0%2C0-5.87-3.53c-3.79-.26-6.23%2C1.73-6.21%2C5a4.58%2C4.58%2C0%2C0%2C0%2C.54%2C2.34c.83%2C1.73%2C2.38%2C2.76%2C7.24%2C4.86%2C8.95%2C3.85%2C12.78%2C6.39%2C15.16%2C10%2C2.66%2C4%2C3.25%2C10.46%2C1.45%2C15.24-2%2C5.2-6.9%2C8.73-13.83%2C9.9a38.32%2C38.32%2C0%2C0%2C1-9.52-.1A23%2C23%2C0%2C0%2C1%2C80%2C109.19c-1.15-1.27-3.39-4.58-3.25-4.82a9.34%2C9.34%2C0%2C0%2C1%2C1.15-.73L82.5%2C101l3.59-2.08.75%2C1.11a16.78%2C16.78%2C0%2C0%2C0%2C4.74%2C4.54c4%2C2.1%2C9.46%2C1.81%2C12.16-.62a5.43%2C5.43%2C0%2C0%2C0%2C.69-6.92c-1-1.39-3-2.56-8.59-5-6.45-2.78-9.23-4.5-11.77-7.24a16.48%2C16.48%2C0%2C0%2C1-3.43-6.25%2C25%2C25%2C0%2C0%2C1-.22-8c1.33-6.23%2C6-10.58%2C12.82-11.87A31.66%2C31.66%2C0%2C0%2C1%2C102.73%2C58.93ZM73.39%2C64.15l0%2C5.12H57.16V115.5H45.65V69.26H29.38v-5a49.19%2C49.19%2C0%2C0%2C1%2C.14-5.16c.06-.08%2C10-.12%2C22-.1L73.33%2C59Z%22/%3E%3C/svg%3E"); mask-image: url("data:image/svg+xml,%3Csvg%20xmlns%3D%22http://www.w3.org/2000/svg%22%20viewBox%3D%220%200%20128%20128%22%3E%3Cpath%20fill%3D%22white%22%20d%3D%22M2%2C63.91v62.5H127V1.41H2Zm100.73-5a15.56%2C15.56%2C0%2C0%2C1%2C7.82%2C4.5%2C20.58%2C20.58%2C0%2C0%2C1%2C3%2C4c0%2C.16-5.4%2C3.81-8.69%2C5.85-.12.08-.6-.44-1.13-1.23a7.09%2C7.09%2C0%2C0%2C0-5.87-3.53c-3.79-.26-6.23%2C1.73-6.21%2C5a4.58%2C4.58%2C0%2C0%2C0%2C.54%2C2.34c.83%2C1.73%2C2.38%2C2.76%2C7.24%2C4.86%2C8.95%2C3.85%2C12.78%2C6.39%2C15.16%2C10%2C2.66%2C4%2C3.25%2C10.46%2C1.45%2C15.24-2%2C5.2-6.9%2C8.73-13.83%2C9.9a38.32%2C38.32%2C0%2C0%2C1-9.52-.1A23%2C23%2C0%2C0%2C1%2C80%2C109.19c-1.15-1.27-3.39-4.58-3.25-4.82a9.34%2C9.34%2C0%2C0%2C1%2C1.15-.73L82.5%2C101l3.59-2.08.75%2C1.11a16.78%2C16.78%2C0%2C0%2C0%2C4.74%2C4.54c4%2C2.1%2C9.46%2C1.81%2C12.16-.62a5.43%2C5.43%2C0%2C0%2C0%2C.69-6.92c-1-1.39-3-2.56-8.59-5-6.45-2.78-9.23-4.5-11.77-7.24a16.48%2C16.48%2C0%2C0%2C1-3.43-6.25%2C25%2C25%2C0%2C0%2C1-.22-8c1.33-6.23%2C6-10.58%2C12.82-11.87A31.66%2C31.66%2C0%2C0%2C1%2C102.73%2C58.93ZM73.39%2C64.15l0%2C5.12H57.16V115.5H45.65V69.26H29.38v-5a49.19%2C49.19%2C0%2C0%2C1%2C.14-5.16c.06-.08%2C10-.12%2C22-.1L73.33%2C59Z%22/%3E%3C/svg%3E"); } + +.rest-api > a::before, .navbar__link.rest-api::before { - content: ''; - width: 22px; - height: 22px; - display: flex; - background-color: var(--ifm-menu-color); + -webkit-mask-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' version='1.0' viewBox='0 0 512.000000 533.000000' preserveAspectRatio='xMidYMid meet'%3E%3Cg transform='translate(0.000000,533.000000) scale(0.100000,-0.100000)' fill='%23000000' stroke='none'%3E%3Cpath d='M1820 4890 c-568 -24 -859 -109 -1082 -319 -174 -164 -276 -384 -322 -703 -34 -231 -40 -424 -40 -1163 0 -875 12 -1082 80 -1350 106 -423 378 -673 829 -764 269 -54 505 -64 1410 -58 762 5 863 9 1080 47 321 56 539 171 691 364 194 245 253 496 275 1171 12 372 5 1283 -11 1500 -36 472 -120 719 -314 923 -199 209 -456 304 -931 343 -166 13 -1395 20 -1665 9z m1442 -320 c534 -22 747 -79 913 -245 144 -143 206 -336 235 -725 16 -219 23 -1113 11 -1475 -21 -656 -74 -864 -262 -1040 -136 -126 -305 -184 -639 -217 -192 -19 -1728 -19 -1920 0 -334 33 -503 91 -639 217 -187 174 -241 384 -262 1025 -12 357 -5 1280 11 1495 20 269 61 451 128 580 49 93 154 198 247 247 164 86 430 131 825 141 296 8 1136 6 1352 -3z'/%3E%3Cpath d='M1525 3491 c-16 -10 -38 -27 -48 -38 -10 -11 -119 -324 -243 -694 -250 -750 -243 -717 -177 -790 65 -71 160 -74 229 -5 23 23 41 61 70 149 l38 117 251 0 250 0 50 -121 c56 -138 94 -182 164 -194 53 -9 126 25 158 74 46 68 46 67 -253 784 -151 364 -285 673 -297 685 -50 54 -135 69 -192 33z m166 -774 l68 -162 -121 -3 c-66 -1 -123 0 -126 3 -4 3 18 80 48 170 30 90 56 162 59 159 3 -2 35 -77 72 -167z'/%3E%3Cpath d='M2700 3501 c-74 -23 -136 -78 -169 -151 -20 -44 -21 -63 -21 -686 l0 -641 23 -34 c32 -49 105 -83 158 -74 53 9 115 64 129 113 5 21 10 146 10 280 l0 242 148 0 c238 0 348 36 455 149 67 70 102 129 121 206 57 216 -39 443 -232 548 -90 49 -146 57 -382 56 -118 0 -226 -4 -240 -8z m466 -327 c49 -23 77 -68 82 -129 5 -63 -13 -102 -65 -142 -36 -28 -39 -28 -195 -31 l-158 -4 0 161 0 161 151 0 c119 0 158 -3 185 -16z'/%3E%3Cpath d='M3890 3502 c-40 -19 -70 -46 -84 -77 -14 -29 -16 -113 -16 -718 l0 -684 23 -34 c32 -49 105 -83 158 -74 54 9 115 64 129 116 7 25 10 266 8 701 -3 642 -4 664 -23 697 -30 52 -67 74 -127 77 -29 2 -60 0 -68 -4z'/%3E%3C/g%3E%3C/svg%3E"); mask-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' version='1.0' viewBox='0 0 512.000000 533.000000' preserveAspectRatio='xMidYMid meet'%3E%3Cg transform='translate(0.000000,533.000000) scale(0.100000,-0.100000)' fill='%23000000' stroke='none'%3E%3Cpath d='M1820 4890 c-568 -24 -859 -109 -1082 -319 -174 -164 -276 -384 -322 -703 -34 -231 -40 -424 -40 -1163 0 -875 12 -1082 80 -1350 106 -423 378 -673 829 -764 269 -54 505 -64 1410 -58 762 5 863 9 1080 47 321 56 539 171 691 364 194 245 253 496 275 1171 12 372 5 1283 -11 1500 -36 472 -120 719 -314 923 -199 209 -456 304 -931 343 -166 13 -1395 20 -1665 9z m1442 -320 c534 -22 747 -79 913 -245 144 -143 206 -336 235 -725 16 -219 23 -1113 11 -1475 -21 -656 -74 -864 -262 -1040 -136 -126 -305 -184 -639 -217 -192 -19 -1728 -19 -1920 0 -334 33 -503 91 -639 217 -187 174 -241 384 -262 1025 -12 357 -5 1280 11 1495 20 269 61 451 128 580 49 93 154 198 247 247 164 86 430 131 825 141 296 8 1136 6 1352 -3z'/%3E%3Cpath d='M1525 3491 c-16 -10 -38 -27 -48 -38 -10 -11 -119 -324 -243 -694 -250 -750 -243 -717 -177 -790 65 -71 160 -74 229 -5 23 23 41 61 70 149 l38 117 251 0 250 0 50 -121 c56 -138 94 -182 164 -194 53 -9 126 25 158 74 46 68 46 67 -253 784 -151 364 -285 673 -297 685 -50 54 -135 69 -192 33z m166 -774 l68 -162 -121 -3 c-66 -1 -123 0 -126 3 -4 3 18 80 48 170 30 90 56 162 59 159 3 -2 35 -77 72 -167z'/%3E%3Cpath d='M2700 3501 c-74 -23 -136 -78 -169 -151 -20 -44 -21 -63 -21 -686 l0 -641 23 -34 c32 -49 105 -83 158 -74 53 9 115 64 129 113 5 21 10 146 10 280 l0 242 148 0 c238 0 348 36 455 149 67 70 102 129 121 206 57 216 -39 443 -232 548 -90 49 -146 57 -382 56 -118 0 -226 -4 -240 -8z m466 -327 c49 -23 77 -68 82 -129 5 -63 -13 -102 -65 -142 -36 -28 -39 -28 -195 -31 l-158 -4 0 161 0 161 151 0 c119 0 158 -3 185 -16z'/%3E%3Cpath d='M3890 3502 c-40 -19 -70 -46 -84 -77 -14 -29 -16 -113 -16 -718 l0 -684 23 -34 c32 -49 105 -83 158 -74 54 9 115 64 129 116 7 25 10 266 8 701 -3 642 -4 664 -23 697 -30 52 -67 74 -127 77 -29 2 -60 0 -68 -4z'/%3E%3C/g%3E%3C/svg%3E"); } +.mcp-server > a::before, .navbar__link.mcp-server::before { + -webkit-mask-image: url("data:image/svg+xml,%3Csvg fill='currentColor' fill-rule='evenodd' height='1em' style='flex:none;line-height:1' viewBox='0 0 24 24' width='1em' xmlns='http://www.w3.org/2000/svg'%3E%3Ctitle%3EModelContextProtocol%3C/title%3E%3Cpath d='M15.688 2.343a2.588 2.588 0 00-3.61 0l-9.626 9.44a.863.863 0 01-1.203 0 .823.823 0 010-1.18l9.626-9.44a4.313 4.313 0 016.016 0 4.116 4.116 0 011.204 3.54 4.3 4.3 0 013.609 1.18l.05.05a4.115 4.115 0 010 5.9l-8.706 8.537a.274.274 0 000 .393l1.788 1.754a.823.823 0 010 1.18.863.863 0 01-1.203 0l-1.788-1.753a1.92 1.92 0 010-2.754l8.706-8.538a2.47 2.47 0 000-3.54l-.05-.049a2.588 2.588 0 00-3.607-.003l-7.172 7.034-.002.002-.098.097a.863.863 0 01-1.204 0 .823.823 0 010-1.18l7.273-7.133a2.47 2.47 0 00-.003-3.537z'%3E%3C/path%3E%3Cpath d='M14.485 4.703a.823.823 0 000-1.18.863.863 0 00-1.204 0l-7.119 6.982a4.115 4.115 0 000 5.9 4.314 4.314 0 006.016 0l7.12-6.982a.823.823 0 000-1.18.863.863 0 00-1.204 0l-7.119 6.982a2.588 2.588 0 01-3.61 0 2.47 2.47 0 010-3.54l7.12-6.982z'%3E%3C/path%3E%3C/svg%3E"); + mask-image: url("data:image/svg+xml,%3Csvg fill='currentColor' fill-rule='evenodd' height='1em' style='flex:none;line-height:1' viewBox='0 0 24 24' width='1em' xmlns='http://www.w3.org/2000/svg'%3E%3Ctitle%3EModelContextProtocol%3C/title%3E%3Cpath d='M15.688 2.343a2.588 2.588 0 00-3.61 0l-9.626 9.44a.863.863 0 01-1.203 0 .823.823 0 010-1.18l9.626-9.44a4.313 4.313 0 016.016 0 4.116 4.116 0 011.204 3.54 4.3 4.3 0 013.609 1.18l.05.05a4.115 4.115 0 010 5.9l-8.706 8.537a.274.274 0 000 .393l1.788 1.754a.823.823 0 010 1.18.863.863 0 01-1.203 0l-1.788-1.753a1.92 1.92 0 010-2.754l8.706-8.538a2.47 2.47 0 000-3.54l-.05-.049a2.588 2.588 0 00-3.607-.003l-7.172 7.034-.002.002-.098.097a.863.863 0 01-1.204 0 .823.823 0 010-1.18l7.273-7.133a2.47 2.47 0 00-.003-3.537z'%3E%3C/path%3E%3Cpath d='M14.485 4.703a.823.823 0 000-1.18.863.863 0 00-1.204 0l-7.119 6.982a4.115 4.115 0 000 5.9 4.314 4.314 0 006.016 0l7.12-6.982a.823.823 0 000-1.18.863.863 0 00-1.204 0l-7.119 6.982a2.588 2.588 0 01-3.61 0 2.47 2.47 0 010-3.54l7.12-6.982z'%3E%3C/path%3E%3C/svg%3E"); +} + +.tutorials-link { + @apply flex items-center justify-center gap-2; +} + +.tutorials-link > a::before, +.navbar__link.tutorials-link::before { content: ''; - width: 16px; - height: 16px; display: flex; + flex-shrink: 0; background-color: var(--ifm-menu-color); - mask-image: url("data:image/svg+xml,%3Csvg fill='currentColor' fill-rule='evenodd' height='1em' style='flex:none;line-height:1' viewBox='0 0 24 24' width='1em' xmlns='http://www.w3.org/2000/svg'%3E%3Ctitle%3EModelContextProtocol%3C/title%3E%3Cpath d='M15.688 2.343a2.588 2.588 0 00-3.61 0l-9.626 9.44a.863.863 0 01-1.203 0 .823.823 0 010-1.18l9.626-9.44a4.313 4.313 0 016.016 0 4.116 4.116 0 011.204 3.54 4.3 4.3 0 013.609 1.18l.05.05a4.115 4.115 0 010 5.9l-8.706 8.537a.274.274 0 000 .393l1.788 1.754a.823.823 0 010 1.18.863.863 0 01-1.203 0l-1.788-1.753a1.92 1.92 0 010-2.754l8.706-8.538a2.47 2.47 0 000-3.54l-.05-.049a2.588 2.588 0 00-3.607-.003l-7.172 7.034-.002.002-.098.097a.863.863 0 01-1.204 0 .823.823 0 010-1.18l7.273-7.133a2.47 2.47 0 00-.003-3.537z'%3E%3C/path%3E%3Cpath d='M14.485 4.703a.823.823 0 000-1.18.863.863 0 00-1.204 0l-7.119 6.982a4.115 4.115 0 000 5.9 4.314 4.314 0 006.016 0l7.12-6.982a.823.823 0 000-1.18.863.863 0 00-1.204 0l-7.119 6.982a2.588 2.588 0 01-3.61 0 2.47 2.47 0 010-3.54l7.12-6.982z'%3E%3C/path%3E%3C/svg%3E"); + -webkit-mask-size: contain; + mask-size: contain; + -webkit-mask-repeat: no-repeat; + mask-repeat: no-repeat; + -webkit-mask-position: center; + mask-position: center; + width: 18px; + height: 18px; + @apply mr-2; + -webkit-mask-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='black' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpath d='m15 12-8.373 8.373a1 1 0 1 1-3-3L12 9'/%3E%3Cpath d='m18 15 4-4'/%3E%3Cpath d='m21.5 11.5-1.914-1.914A2 2 0 0 1 19 8.172V7l-2.26-2.26a6 6 0 0 0-4.113-1.454L9 2.96l.92.82A6 6 0 0 1 12 8.586V10l2 2 1.172-1.172a2 2 0 0 1 2.828 0L21.5 13.5'/%3E%3C/svg%3E"); + mask-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='black' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpath d='m15 12-8.373 8.373a1 1 0 1 1-3-3L12 9'/%3E%3Cpath d='m18 15 4-4'/%3E%3Cpath d='m21.5 11.5-1.914-1.914A2 2 0 0 1 19 8.172V7l-2.26-2.26a6 6 0 0 0-4.113-1.454L9 2.96l.92.82A6 6 0 0 1 12 8.586V10l2 2 1.172-1.172a2 2 0 0 1 2.828 0L21.5 13.5'/%3E%3C/svg%3E"); +} + +.navbar__link.tutorials-link::before { + @apply mr-0; + width: 18px; + height: 18px; } .navbar__items--right { @apply gap-4; } +/* ── Programming-language tabs: smaller than outer demo tabs ─────────────── */ + +.language-tabs .tabs { + margin-bottom: 0; + gap: 16px; +} + +.language-tabs .tabs__item { + font-size: 14px; + font-weight: 400; + font-family: var(--ifm-font-family-monospace); + color: var(--ifm-menu-color); + padding: 0.25rem 0.7rem; + display: inline-flex !important; + align-items: center; + gap: 8px; + border-radius: 4px; + border-bottom: 0px solid transparent; +} + +.language-tabs .tabs__item svg { + width: 13px !important; + height: 13px !important; +} + +.language-tabs .tabs__item--active { + border-bottom-color: transparent !important; + background: var(--ifm-hover-overlay) !important; + color: var(--ifm-menu-color-active); +} + +.tabs__item { + color: var(--ifm-menu-color); +} + +.tabs__item--active { + color: var(--ifm-menu-color-active); +} + +.tabs__item[data-lang="python"]::before { + -webkit-mask-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 1024 1024'%3E%3Cpath fill='white' clip-rule='evenodd' fill-rule='evenodd' d='M511.847,757.279c0,10.586,0,20.188,0,30.832c80.904,0,161.375,0,241.785,0 c0,37.582,0,74.129-0.01,110.672c0,0.328-0.164,0.652-0.221,0.984c-5.341,30.986-22.589,54.121-47.641,72.006 c-30.986,22.119-66.561,32.812-103.044,41.27c-85.164,19.752-168.318,12.871-248.585-21.24 c-19.08-8.107-36.901-20.795-52.854-34.273c-22.846-19.307-33.87-45.564-33.9-75.832c-0.073-78.047-0.805-156.102,0.225-234.133 c0.925-70.191,55.389-122.805,125.531-123.01c77.975-0.227,155.967-1.086,233.918,0.23 C705.86,526.119,772.818,466,783.688,391.142c1.251-8.611,1.377-17.432,1.425-26.162c0.163-30.611,0.064-61.224,0.064-91.837 c0-3.605,0-7.213,0-11.838c12.517,0,23.854,0,35.193,0c18.148,0.002,36.3,0.142,54.448-0.023 c43.185-0.395,75.38,18.446,97.422,55.311c16.096,26.917,24.123,56.681,30.942,86.92c10.478,46.456,16.828,93.305,13.369,141.044 c-4.067,56.15-20.297,108.848-47.128,158.115c-20.56,37.752-53.253,54.877-96.167,54.734 c-115.953-0.381-231.907-0.129-347.859-0.127C521.083,757.279,516.769,757.279,511.847,757.279z M648.96,850.41 c-25.734-0.252-46.009,19.758-46.221,45.611c-0.214,25.793,19.813,46.887,44.912,47.307 c25.027,0.418,46.009-20.428,46.279-45.986C694.203,871.342,674.386,850.658,648.96,850.41z'/%3E%3Cpath fill='white' clip-rule='evenodd' fill-rule='evenodd' d='M510.621,261.305c0-10.549,0-20.083,0-30.812c-81.056,0-161.535,0-242.22,0 c-0.474-3.074-1.038-5.012-1.03-6.947c0.141-34.312-0.533-68.657,0.749-102.928c2.042-54.623,23.93-84.223,76.56-98.907 c31.827-8.878,64.892-14.673,97.789-18.063c64.773-6.675,129.604-4.182,193.803,7.426c37.246,6.734,68.971,24.009,92.947,53.935 c16.724,20.873,25.52,44.756,25.516,71.703c-0.017,77.714,0.374,155.43-0.115,233.142 c-0.449,71.218-53.786,123.692-125.062,123.814c-78.651,0.136-157.304-0.036-235.956,0.057 c-76.784,0.088-139.957,52.139-154.399,127.492c-1.883,9.83-2.224,20.059-2.314,30.111c-0.285,31.285-0.105,62.574-0.105,93.861 c0,3.617,0,7.23,0,12.09c-7.474,0-13.77,0.051-20.063-0.008c-24.864-0.227-49.776,0.426-74.576-0.945 c-39.027-2.16-68.304-21.814-89.938-53.754c-18.086-26.703-25.991-57.164-33.359-88.004 C-0.209,534.822,3.376,455.854,25.584,377.35c20.668-73.063,76.843-115.872,152.937-115.998 c106.549-0.177,213.097-0.047,319.646-0.047C502.121,261.305,506.076,261.305,510.621,261.305z M373.439,75.404 c-25.309,0.176-45.207,20.863-45.057,46.848c0.149,25.682,20.729,46.29,45.985,46.043c25.146-0.245,45.418-21.308,45.201-46.962 C419.35,95.679,398.811,75.23,373.439,75.404z'/%3E%3C/svg%3E"); + mask-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 1024 1024'%3E%3Cpath fill='white' clip-rule='evenodd' fill-rule='evenodd' d='M511.847,757.279c0,10.586,0,20.188,0,30.832c80.904,0,161.375,0,241.785,0 c0,37.582,0,74.129-0.01,110.672c0,0.328-0.164,0.652-0.221,0.984c-5.341,30.986-22.589,54.121-47.641,72.006 c-30.986,22.119-66.561,32.812-103.044,41.27c-85.164,19.752-168.318,12.871-248.585-21.24 c-19.08-8.107-36.901-20.795-52.854-34.273c-22.846-19.307-33.87-45.564-33.9-75.832c-0.073-78.047-0.805-156.102,0.225-234.133 c0.925-70.191,55.389-122.805,125.531-123.01c77.975-0.227,155.967-1.086,233.918,0.23 C705.86,526.119,772.818,466,783.688,391.142c1.251-8.611,1.377-17.432,1.425-26.162c0.163-30.611,0.064-61.224,0.064-91.837 c0-3.605,0-7.213,0-11.838c12.517,0,23.854,0,35.193,0c18.148,0.002,36.3,0.142,54.448-0.023 c43.185-0.395,75.38,18.446,97.422,55.311c16.096,26.917,24.123,56.681,30.942,86.92c10.478,46.456,16.828,93.305,13.369,141.044 c-4.067,56.15-20.297,108.848-47.128,158.115c-20.56,37.752-53.253,54.877-96.167,54.734 c-115.953-0.381-231.907-0.129-347.859-0.127C521.083,757.279,516.769,757.279,511.847,757.279z M648.96,850.41 c-25.734-0.252-46.009,19.758-46.221,45.611c-0.214,25.793,19.813,46.887,44.912,47.307 c25.027,0.418,46.009-20.428,46.279-45.986C694.203,871.342,674.386,850.658,648.96,850.41z'/%3E%3Cpath fill='white' clip-rule='evenodd' fill-rule='evenodd' d='M510.621,261.305c0-10.549,0-20.083,0-30.812c-81.056,0-161.535,0-242.22,0 c-0.474-3.074-1.038-5.012-1.03-6.947c0.141-34.312-0.533-68.657,0.749-102.928c2.042-54.623,23.93-84.223,76.56-98.907 c31.827-8.878,64.892-14.673,97.789-18.063c64.773-6.675,129.604-4.182,193.803,7.426c37.246,6.734,68.971,24.009,92.947,53.935 c16.724,20.873,25.52,44.756,25.516,71.703c-0.017,77.714,0.374,155.43-0.115,233.142 c-0.449,71.218-53.786,123.692-125.062,123.814c-78.651,0.136-157.304-0.036-235.956,0.057 c-76.784,0.088-139.957,52.139-154.399,127.492c-1.883,9.83-2.224,20.059-2.314,30.111c-0.285,31.285-0.105,62.574-0.105,93.861 c0,3.617,0,7.23,0,12.09c-7.474,0-13.77,0.051-20.063-0.008c-24.864-0.227-49.776,0.426-74.576-0.945 c-39.027-2.16-68.304-21.814-89.938-53.754c-18.086-26.703-25.991-57.164-33.359-88.004 C-0.209,534.822,3.376,455.854,25.584,377.35c20.668-73.063,76.843-115.872,152.937-115.998 c106.549-0.177,213.097-0.047,319.646-0.047C502.121,261.305,506.076,261.305,510.621,261.305z M373.439,75.404 c-25.309,0.176-45.207,20.863-45.057,46.848c0.149,25.682,20.729,46.29,45.985,46.043c25.146-0.245,45.418-21.308,45.201-46.962 C419.35,95.679,398.811,75.23,373.439,75.404z'/%3E%3C/svg%3E"); +} + +.tabs__item[data-lang="typescript"]::before { + -webkit-mask-image: url("data:image/svg+xml,%3Csvg%20xmlns%3D%22http://www.w3.org/2000/svg%22%20viewBox%3D%220%200%20128%20128%22%3E%3Cpath%20fill%3D%22white%22%20d%3D%22M2%2C63.91v62.5H127V1.41H2Zm100.73-5a15.56%2C15.56%2C0%2C0%2C1%2C7.82%2C4.5%2C20.58%2C20.58%2C0%2C0%2C1%2C3%2C4c0%2C.16-5.4%2C3.81-8.69%2C5.85-.12.08-.6-.44-1.13-1.23a7.09%2C7.09%2C0%2C0%2C0-5.87-3.53c-3.79-.26-6.23%2C1.73-6.21%2C5a4.58%2C4.58%2C0%2C0%2C0%2C.54%2C2.34c.83%2C1.73%2C2.38%2C2.76%2C7.24%2C4.86%2C8.95%2C3.85%2C12.78%2C6.39%2C15.16%2C10%2C2.66%2C4%2C3.25%2C10.46%2C1.45%2C15.24-2%2C5.2-6.9%2C8.73-13.83%2C9.9a38.32%2C38.32%2C0%2C0%2C1-9.52-.1A23%2C23%2C0%2C0%2C1%2C80%2C109.19c-1.15-1.27-3.39-4.58-3.25-4.82a9.34%2C9.34%2C0%2C0%2C1%2C1.15-.73L82.5%2C101l3.59-2.08.75%2C1.11a16.78%2C16.78%2C0%2C0%2C0%2C4.74%2C4.54c4%2C2.1%2C9.46%2C1.81%2C12.16-.62a5.43%2C5.43%2C0%2C0%2C0%2C.69-6.92c-1-1.39-3-2.56-8.59-5-6.45-2.78-9.23-4.5-11.77-7.24a16.48%2C16.48%2C0%2C0%2C1-3.43-6.25%2C25%2C25%2C0%2C0%2C1-.22-8c1.33-6.23%2C6-10.58%2C12.82-11.87A31.66%2C31.66%2C0%2C0%2C1%2C102.73%2C58.93ZM73.39%2C64.15l0%2C5.12H57.16V115.5H45.65V69.26H29.38v-5a49.19%2C49.19%2C0%2C0%2C1%2C.14-5.16c.06-.08%2C10-.12%2C22-.1L73.33%2C59Z%22/%3E%3C/svg%3E"); + mask-image: url("data:image/svg+xml,%3Csvg%20xmlns%3D%22http://www.w3.org/2000/svg%22%20viewBox%3D%220%200%20128%20128%22%3E%3Cpath%20fill%3D%22white%22%20d%3D%22M2%2C63.91v62.5H127V1.41H2Zm100.73-5a15.56%2C15.56%2C0%2C0%2C1%2C7.82%2C4.5%2C20.58%2C20.58%2C0%2C0%2C1%2C3%2C4c0%2C.16-5.4%2C3.81-8.69%2C5.85-.12.08-.6-.44-1.13-1.23a7.09%2C7.09%2C0%2C0%2C0-5.87-3.53c-3.79-.26-6.23%2C1.73-6.21%2C5a4.58%2C4.58%2C0%2C0%2C0%2C.54%2C2.34c.83%2C1.73%2C2.38%2C2.76%2C7.24%2C4.86%2C8.95%2C3.85%2C12.78%2C6.39%2C15.16%2C10%2C2.66%2C4%2C3.25%2C10.46%2C1.45%2C15.24-2%2C5.2-6.9%2C8.73-13.83%2C9.9a38.32%2C38.32%2C0%2C0%2C1-9.52-.1A23%2C23%2C0%2C0%2C1%2C80%2C109.19c-1.15-1.27-3.39-4.58-3.25-4.82a9.34%2C9.34%2C0%2C0%2C1%2C1.15-.73L82.5%2C101l3.59-2.08.75%2C1.11a16.78%2C16.78%2C0%2C0%2C0%2C4.74%2C4.54c4%2C2.1%2C9.46%2C1.81%2C12.16-.62a5.43%2C5.43%2C0%2C0%2C0%2C.69-6.92c-1-1.39-3-2.56-8.59-5-6.45-2.78-9.23-4.5-11.77-7.24a16.48%2C16.48%2C0%2C0%2C1-3.43-6.25%2C25%2C25%2C0%2C0%2C1-.22-8c1.33-6.23%2C6-10.58%2C12.82-11.87A31.66%2C31.66%2C0%2C0%2C1%2C102.73%2C58.93ZM73.39%2C64.15l0%2C5.12H57.16V115.5H45.65V69.26H29.38v-5a49.19%2C49.19%2C0%2C0%2C1%2C.14-5.16c.06-.08%2C10-.12%2C22-.1L73.33%2C59Z%22/%3E%3C/svg%3E"); +} + +.tabs__item[data-lang="rest"]::before { + -webkit-mask-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' version='1.0' viewBox='0 0 512.000000 533.000000' preserveAspectRatio='xMidYMid meet'%3E%3Cg transform='translate(0.000000,533.000000) scale(0.100000,-0.100000)' fill='%23000000' stroke='none'%3E%3Cpath d='M1820 4890 c-568 -24 -859 -109 -1082 -319 -174 -164 -276 -384 -322 -703 -34 -231 -40 -424 -40 -1163 0 -875 12 -1082 80 -1350 106 -423 378 -673 829 -764 269 -54 505 -64 1410 -58 762 5 863 9 1080 47 321 56 539 171 691 364 194 245 253 496 275 1171 12 372 5 1283 -11 1500 -36 472 -120 719 -314 923 -199 209 -456 304 -931 343 -166 13 -1395 20 -1665 9z m1442 -320 c534 -22 747 -79 913 -245 144 -143 206 -336 235 -725 16 -219 23 -1113 11 -1475 -21 -656 -74 -864 -262 -1040 -136 -126 -305 -184 -639 -217 -192 -19 -1728 -19 -1920 0 -334 33 -503 91 -639 217 -187 174 -241 384 -262 1025 -12 357 -5 1280 11 1495 20 269 61 451 128 580 49 93 154 198 247 247 164 86 430 131 825 141 296 8 1136 6 1352 -3z'/%3E%3Cpath d='M1525 3491 c-16 -10 -38 -27 -48 -38 -10 -11 -119 -324 -243 -694 -250 -750 -243 -717 -177 -790 65 -71 160 -74 229 -5 23 23 41 61 70 149 l38 117 251 0 250 0 50 -121 c56 -138 94 -182 164 -194 53 -9 126 25 158 74 46 68 46 67 -253 784 -151 364 -285 673 -297 685 -50 54 -135 69 -192 33z m166 -774 l68 -162 -121 -3 c-66 -1 -123 0 -126 3 -4 3 18 80 48 170 30 90 56 162 59 159 3 -2 35 -77 72 -167z'/%3E%3Cpath d='M2700 3501 c-74 -23 -136 -78 -169 -151 -20 -44 -21 -63 -21 -686 l0 -641 23 -34 c32 -49 105 -83 158 -74 53 9 115 64 129 113 5 21 10 146 10 280 l0 242 148 0 c238 0 348 36 455 149 67 70 102 129 121 206 57 216 -39 443 -232 548 -90 49 -146 57 -382 56 -118 0 -226 -4 -240 -8z m466 -327 c49 -23 77 -68 82 -129 5 -63 -13 -102 -65 -142 -36 -28 -39 -28 -195 -31 l-158 -4 0 161 0 161 151 0 c119 0 158 -3 185 -16z'/%3E%3Cpath d='M3890 3502 c-40 -19 -70 -46 -84 -77 -14 -29 -16 -113 -16 -718 l0 -684 23 -34 c32 -49 105 -83 158 -74 54 9 115 64 129 116 7 25 10 266 8 701 -3 642 -4 664 -23 697 -30 52 -67 74 -127 77 -29 2 -60 0 -68 -4z'/%3E%3C/g%3E%3C/svg%3E"); + mask-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' version='1.0' viewBox='0 0 512.000000 533.000000' preserveAspectRatio='xMidYMid meet'%3E%3Cg transform='translate(0.000000,533.000000) scale(0.100000,-0.100000)' fill='%23000000' stroke='none'%3E%3Cpath d='M1820 4890 c-568 -24 -859 -109 -1082 -319 -174 -164 -276 -384 -322 -703 -34 -231 -40 -424 -40 -1163 0 -875 12 -1082 80 -1350 106 -423 378 -673 829 -764 269 -54 505 -64 1410 -58 762 5 863 9 1080 47 321 56 539 171 691 364 194 245 253 496 275 1171 12 372 5 1283 -11 1500 -36 472 -120 719 -314 923 -199 209 -456 304 -931 343 -166 13 -1395 20 -1665 9z m1442 -320 c534 -22 747 -79 913 -245 144 -143 206 -336 235 -725 16 -219 23 -1113 11 -1475 -21 -656 -74 -864 -262 -1040 -136 -126 -305 -184 -639 -217 -192 -19 -1728 -19 -1920 0 -334 33 -503 91 -639 217 -187 174 -241 384 -262 1025 -12 357 -5 1280 11 1495 20 269 61 451 128 580 49 93 154 198 247 247 164 86 430 131 825 141 296 8 1136 6 1352 -3z'/%3E%3Cpath d='M1525 3491 c-16 -10 -38 -27 -48 -38 -10 -11 -119 -324 -243 -694 -250 -750 -243 -717 -177 -790 65 -71 160 -74 229 -5 23 23 41 61 70 149 l38 117 251 0 250 0 50 -121 c56 -138 94 -182 164 -194 53 -9 126 25 158 74 46 68 46 67 -253 784 -151 364 -285 673 -297 685 -50 54 -135 69 -192 33z m166 -774 l68 -162 -121 -3 c-66 -1 -123 0 -126 3 -4 3 18 80 48 170 30 90 56 162 59 159 3 -2 35 -77 72 -167z'/%3E%3Cpath d='M2700 3501 c-74 -23 -136 -78 -169 -151 -20 -44 -21 -63 -21 -686 l0 -641 23 -34 c32 -49 105 -83 158 -74 53 9 115 64 129 113 5 21 10 146 10 280 l0 242 148 0 c238 0 348 36 455 149 67 70 102 129 121 206 57 216 -39 443 -232 548 -90 49 -146 57 -382 56 -118 0 -226 -4 -240 -8z m466 -327 c49 -23 77 -68 82 -129 5 -63 -13 -102 -65 -142 -36 -28 -39 -28 -195 -31 l-158 -4 0 161 0 161 151 0 c119 0 158 -3 185 -16z'/%3E%3Cpath d='M3890 3502 c-40 -19 -70 -46 -84 -77 -14 -29 -16 -113 -16 -718 l0 -684 23 -34 c32 -49 105 -83 158 -74 54 9 115 64 129 116 7 25 10 266 8 701 -3 642 -4 664 -23 697 -30 52 -67 74 -127 77 -29 2 -60 0 -68 -4z'/%3E%3C/g%3E%3C/svg%3E"); +} + +.tabs__item[data-lang="mcp-server"]::before, +.tabs__item[data-lang="mcp"]::before { + width: 13px; + height: 13px; + -webkit-mask-image: url("data:image/svg+xml,%3Csvg fill='currentColor' fill-rule='evenodd' viewBox='0 0 24 24' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M15.688 2.343a2.588 2.588 0 00-3.61 0l-9.626 9.44a.863.863 0 01-1.203 0 .823.823 0 010-1.18l9.626-9.44a4.313 4.313 0 016.016 0 4.116 4.116 0 011.204 3.54 4.3 4.3 0 013.609 1.18l.05.05a4.115 4.115 0 010 5.9l-8.706 8.537a.274.274 0 000 .393l1.788 1.754a.823.823 0 010 1.18.863.863 0 01-1.203 0l-1.788-1.753a1.92 1.92 0 010-2.754l8.706-8.538a2.47 2.47 0 000-3.54l-.05-.049a2.588 2.588 0 00-3.607-.003l-7.172 7.034-.002.002-.098.097a.863.863 0 01-1.204 0 .823.823 0 010-1.18l7.273-7.133a2.47 2.47 0 00-.003-3.537z'%3E%3C/path%3E%3Cpath d='M14.485 4.703a.823.823 0 000-1.18.863.863 0 00-1.204 0l-7.119 6.982a4.115 4.115 0 000 5.9 4.314 4.314 0 006.016 0l7.12-6.982a.823.823 0 000-1.18.863.863 0 00-1.204 0l-7.119 6.982a2.588 2.588 0 01-3.61 0 2.47 2.47 0 010-3.54l7.12-6.982z'%3E%3C/path%3E%3C/svg%3E"); + mask-image: url("data:image/svg+xml,%3Csvg fill='currentColor' fill-rule='evenodd' viewBox='0 0 24 24' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M15.688 2.343a2.588 2.588 0 00-3.61 0l-9.626 9.44a.863.863 0 01-1.203 0 .823.823 0 010-1.18l9.626-9.44a4.313 4.313 0 016.016 0 4.116 4.116 0 011.204 3.54 4.3 4.3 0 013.609 1.18l.05.05a4.115 4.115 0 010 5.9l-8.706 8.537a.274.274 0 000 .393l1.788 1.754a.823.823 0 010 1.18.863.863 0 01-1.203 0l-1.788-1.753a1.92 1.92 0 010-2.754l8.706-8.538a2.47 2.47 0 000-3.54l-.05-.049a2.588 2.588 0 00-3.607-.003l-7.172 7.034-.002.002-.098.097a.863.863 0 01-1.204 0 .823.823 0 010-1.18l7.273-7.133a2.47 2.47 0 00-.003-3.537z'%3E%3C/path%3E%3Cpath d='M14.485 4.703a.823.823 0 000-1.18.863.863 0 00-1.204 0l-7.119 6.982a4.115 4.115 0 000 5.9 4.314 4.314 0 006.016 0l7.12-6.982a.823.823 0 000-1.18.863.863 0 00-1.204 0l-7.119 6.982a2.588 2.588 0 01-3.61 0 2.47 2.47 0 010-3.54l7.12-6.982z'%3E%3C/path%3E%3C/svg%3E"); +} + +/* ── Responsive outer tabs: select on mobile, buttons on desktop ─────────── */ + +.responsive-tabs__select-wrapper { + display: none; +} + +.responsive-tabs__nav { + display: flex; + flex-wrap: wrap; + gap: 4px; + border-bottom: 2px solid var(--ifm-toc-border-color); + padding-bottom: 0; + margin-bottom: 1rem; +} + +.responsive-tabs__tab { + background: none; + border: none; + border-bottom: 2px solid transparent; + margin-bottom: -2px; + padding: 0.5rem 1rem; + font-size: 0.9rem; + font-weight: 600; + font-family: var(--ifm-font-family-base); + color: var(--ifm-menu-color); + cursor: pointer; + border-radius: 6px 6px 0 0; + transition: color 0.15s, background 0.15s; +} + +.responsive-tabs__tab:hover { + background: var(--ifm-hover-overlay); + color: var(--ifm-color-primary); +} + +.responsive-tabs__tab--active { + color: var(--ifm-color-primary) !important; + border-bottom-color: var(--ifm-color-primary) !important; +} + +@media (max-width: 996px) { + .responsive-tabs__nav { + display: none; + } + + .responsive-tabs__select-wrapper { + display: block; + margin-bottom: 1rem; + } + + .responsive-tabs__select { + width: 100%; + padding: 0.5rem 2.25rem 0.5rem 0.75rem; + font-size: 0.9rem; + font-weight: 600; + font-family: var(--ifm-font-family-base); + color: var(--ifm-font-color-base); + background-color: var(--ifm-background-color); + border: 1px solid var(--ifm-toc-border-color); + border-radius: 6px; + appearance: none; + -webkit-appearance: none; + background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 24 24' fill='none' stroke='%23888' stroke-width='2.5' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpolyline points='6 9 12 15 18 9'%3E%3C/polyline%3E%3C/svg%3E"); + background-repeat: no-repeat; + background-position: right 0.65rem center; + cursor: pointer; + } + + .responsive-tabs__select:focus { + outline: 2px solid var(--ifm-color-primary); + outline-offset: 2px; + } +} + .cta-button { @apply flex items-center justify-center transition bg-accent text-accent-contrast hover:bg-accent-hover focus-visible:bg-accent-focus ring-accent-ring gap-2 [&>svg]:hidden rounded-md px-3 font-medium; } +/* Move theme toggle left of Login button */ +.navbar__items--right [class*='colorModeToggle'] { + order: 1; +} + +.navbar__items--right .cta-button { + order: 2; +} + .navbar-sidebar { height: 100dvh; } + +/* ── Copy page button ──────────────────────────────────────────────────── */ + +.breadcrumbs-with-copy { + display: flex; + align-items: center; + justify-content: space-between; + align-items: baseline; + margin-bottom: 24px; + padding-top: 16px; +} + +.breadcrumbs-actions { + display: inline-flex; + align-items: center; + gap: 6px; + flex-shrink: 0; +} + +.copy-page-container { + position: relative; + display: inline-flex; + user-select: none; +} + +.copy-page-main-btn, +.copy-page-chevron-btn { + display: inline-flex; + align-items: center; + gap: 5px; + padding: 7px 14px; + background: var(--ifm-background-surface-color); + border: 1px solid var(--ifm-color-emphasis-300); + font-size: 13.5px; + font-weight: 500; + cursor: pointer; + color: var(--ifm-font-color-base); + line-height: 1; + transition: background 0.12s; + font-family: var(--ifm-font-family-base); +} + +.copy-page-main-btn { + border-right: none; + border-radius: 6px 0 0 6px; +} + +.copy-page-chevron-btn { + padding: 7px 9px; + border-left: 1px solid var(--ifm-color-emphasis-200); + border-radius: 0 6px 6px 0; +} + +.copy-page-main-btn:hover, +.copy-page-chevron-btn:hover { + background: var(--ifm-color-emphasis-100); +} + +.copy-page-dropdown { + position: absolute; + top: calc(100% + 6px); + right: 0; + min-width: 252px; + background: var(--ifm-background-surface-color); + border: 1px solid var(--ifm-color-emphasis-300); + border-radius: 10px; + box-shadow: 0 8px 24px rgba(0, 0, 0, 0.13); + overflow: hidden; + z-index: 200; +} + +.copy-page-item { + display: flex; + align-items: center; + gap: 12px; + padding: 10px 14px; + text-decoration: none !important; + color: var(--ifm-font-color-base) !important; + transition: background 0.1s; + cursor: pointer; +} + +.copy-page-item:hover { + background: var(--ifm-color-emphasis-100); + text-decoration: none !important; +} + +.copy-page-item:hover .copy-page-item-label, +.copy-page-item:hover .copy-page-item-desc { + text-decoration: none !important; +} + +.copy-page-item--bordered { + border-top: 1px solid var(--ifm-color-emphasis-200); +} + +.copy-page-item-icon { + display: flex; + align-items: center; + justify-content: center; + width: 36px; + height: 36px; + min-width: 36px; + border-radius: 8px; + background: var(--ifm-color-emphasis-200); + flex-shrink: 0; + color: var(--ifm-font-color-base); + overflow: hidden; +} + +.copy-page-item-text { + display: flex; + flex-direction: column; + gap: 2px; +} + +.copy-page-item-label { + font-size: 13px; + font-weight: 500; + line-height: 1.2; + color: var(--ifm-font-color-base); + text-decoration: none !important; +} + +.copy-page-item-desc { + font-size: 11.5px; + line-height: 1.3; + color: var(--ifm-color-emphasis-700); + text-decoration: none !important; +} + +/* ── TOC column width: fixed 380px ──────────────────────────────────────── */ + +@media (min-width: 997px) { + .col.col--3:has(.table-of-contents) { + max-width: 320px !important; + flex: 0 0 320px !important; + width: 320px !important; + } + + /* Content column takes the remaining space */ + [class*='docItemCol'][class*='docItemCol'] { + max-width: calc(100% - 320px) !important; + flex: 1 1 0 !important; + } +} + +@media (min-width: 1279px) { + .col.col--3:has(.table-of-contents) { + max-width: 380px !important; + flex: 0 0 380px !important; + width: 380px !important; + } + + /* Content column takes the remaining space */ + [class*='docItemCol'][class*='docItemCol'] { + max-width: calc(100% - 380px) !important; + flex: 1 1 0 !important; + } +} + +/* ── Mobile TOC: remove background ──────────────────────────────────────── */ +@media (max-width: 996px) { + [class*='tocCollapsible']:not([class*='tocCollapsibleExpanded']), + [class*='tocCollapsible'] { + background-color: transparent !important; + border-radius: 0 !important; + } +} + +/* ── Widescreen toggle button ────────────────────────────────────────────── */ + +.widescreen-btn { + display: inline-flex; + align-items: center; + justify-content: center; + padding: 7px 9px; + background: var(--ifm-background-surface-color); + border: 1px solid var(--ifm-color-emphasis-300); + border-radius: 6px; + cursor: pointer; + color: var(--ifm-font-color-base); + line-height: 1; + transition: background 0.12s; + font-family: var(--ifm-font-family-base); +} + +.widescreen-btn:hover { + background: var(--ifm-color-emphasis-100); +} + +.widescreen-btn--active { + background: var(--ifm-color-emphasis-200); + border-color: var(--ifm-color-emphasis-400); +} + +/* ── Widescreen mode overrides ───────────────────────────────────────────── */ + +@media (min-width: 997px) { + /* Lift the 1140px container cap only inside the doc content area */ + html[data-widescreen='true'] [class*='docMainContainer'] .container { + max-width: none !important; + } + + /* Let docItemWrapperEnhanced (hidden-sidebar path) also expand */ + html[data-widescreen='true'] [class*='docItemWrapper'] { + max-width: none !important; + } + + /* Keep specific landing pages containered even if widescreen is enabled */ + html[data-widescreen='true'] [class*='docMainContainer']:has(.doc-layout--force-container) .container { + max-width: 1140px !important; + } + + html[data-widescreen='true'] [class*='docItemWrapper']:has(.doc-layout--force-container) { + max-width: 1140px !important; + margin: 0 auto; + width: 100%; + } +} + +/* ── Markdown tables ─────────────────────────────────────────────────────── */ + +.markdown table { + display: table; + border-collapse: separate; + border-spacing: 0; + width: 100%; + border-radius: 8px; + overflow: hidden; + border: 1px solid var(--ifm-toc-border-color); + font-size: 0.875rem; + margin: 1.5rem 0; +} + +.markdown table thead tr { + background-color: rgba(0, 0, 0, 0.04); +} + +html[data-theme='dark'] .markdown table thead tr { + background-color: rgba(255, 255, 255, 0.06); +} + +.markdown table th { + font-weight: 600; + font-size: 0.85rem; + text-transform: uppercase; + letter-spacing: 0.06em; + color: var(--ifm-font-color-base); + opacity: 0.65; + padding: 0.7rem 1rem; + border: none; + border-bottom: 1px solid var(--ifm-toc-border-color); + white-space: nowrap; + text-align: left; +} + +.markdown table tr { + font-size: 1rem; + +} + +.markdown table td { + padding: 0.65rem 1rem; + border: none; + border-bottom: 1px solid var(--ifm-toc-border-color); + vertical-align: top; + line-height: 1.55; +} + +.markdown table tbody tr:last-child td { + border-bottom: none; +} + +.markdown table tbody tr:nth-child(even) { + background-color: rgba(0, 0, 0, 0.015); +} + +html[data-theme='dark'] .markdown table tbody tr:nth-child(even) { + background-color: rgba(255, 255, 255, 0.015); +} diff --git a/docs/src/theme/DocBreadcrumbs/index.tsx b/docs/src/theme/DocBreadcrumbs/index.tsx new file mode 100644 index 00000000..2c717672 --- /dev/null +++ b/docs/src/theme/DocBreadcrumbs/index.tsx @@ -0,0 +1,30 @@ +import React from 'react' +import { useDoc } from '@docusaurus/plugin-content-docs/client' +import DocBreadcrumbs from '@theme-original/DocBreadcrumbs' +import type DocBreadcrumbsType from '@theme/DocBreadcrumbs' +import type { WrapperProps } from '@docusaurus/types' +import CopyPageButton, { WidescreenButton } from '@site/src/components/CopyPageButton' + +type Props = WrapperProps +type PageFrontMatter = { + hide_breadcrumbs?: boolean +} + +export default function DocBreadcrumbsWrapper(props: Props) { + const { frontMatter } = useDoc() + const pageFrontMatter = frontMatter as typeof frontMatter & PageFrontMatter + + if (pageFrontMatter.hide_breadcrumbs) { + return null + } + + return ( +
+ +
+ + +
+
+ ) +} diff --git a/docs/src/theme/DocItem/Layout/index.tsx b/docs/src/theme/DocItem/Layout/index.tsx new file mode 100644 index 00000000..09610332 --- /dev/null +++ b/docs/src/theme/DocItem/Layout/index.tsx @@ -0,0 +1,25 @@ +import React from 'react' +import { useDoc } from '@docusaurus/plugin-content-docs/client' +import DocItemLayout from '@theme-original/DocItem/Layout' +import type DocItemLayoutType from '@theme/DocItem/Layout' +import type { WrapperProps } from '@docusaurus/types' + +type Props = WrapperProps +type PageFrontMatter = { + force_container?: boolean +} + +export default function DocItemLayoutWrapper(props: Props) { + const { frontMatter } = useDoc() + const pageFrontMatter = frontMatter as typeof frontMatter & PageFrontMatter + + if (pageFrontMatter.force_container) { + return ( +
+ +
+ ) + } + + return +} diff --git a/docs/src/theme/Tabs/index.tsx b/docs/src/theme/Tabs/index.tsx new file mode 100644 index 00000000..fc63a4e8 --- /dev/null +++ b/docs/src/theme/Tabs/index.tsx @@ -0,0 +1,25 @@ +import React, { Children, cloneElement, isValidElement } from 'react' +import OriginalTabs from '@theme-original/Tabs' +import type TabsType from '@theme/Tabs' +import type { WrapperProps } from '@docusaurus/types' + +type Props = WrapperProps + +/** + * Wraps every TabItem child with a `data-lang` attribute equal to its `value`. + * The original Tabs component reads `attributes` from TabItem props and spreads + * them onto the rendered
  • button — so this injects the attribute + * with zero changes to MDX files. + */ +export default function TabsWrapper({ children, ...props }: Props): React.ReactElement { + const enhanced = Children.map(children, (child) => { + if (!isValidElement(child)) return child + const { value, attributes } = child.props as { value?: string; attributes?: Record } + if (!value) return child + return cloneElement(child as unknown as React.ReactElement>, { + attributes: { ...attributes, 'data-lang': value } + }) + }) + + return {enhanced} +} diff --git a/package.json b/package.json index 45fe327f..165bfbf9 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,7 @@ }, "devDependencies": { "@changesets/cli": "^2.27.10", + "concurrently": "^9.2.1", "@commitlint/cli": "^17.6.6", "@commitlint/config-conventional": "^17.6.6", "@swc/jest": "^0.2.26", diff --git a/packages/javascript-sdk/README.md b/packages/javascript-sdk/README.md index 526dabdb..615e3338 100644 --- a/packages/javascript-sdk/README.md +++ b/packages/javascript-sdk/README.md @@ -4,7 +4,9 @@ # 🚀 RushDB SDK for JavaScript and TypeScript -### The Modern Graph Database Client for Web & Node.js +### Turn Any Data into Intelligence — for Web & Node.js + +Push any JSON — records, events, AI outputs, configs. RushDB structures it, connects it, and makes it queryable instantly. [![NPM Version](https://img.shields.io/npm/v/%40rushdb%2Fjavascript-sdk)](https://www.npmjs.com/package/@rushdb/javascript-sdk) [![NPM License](https://img.shields.io/npm/l/%40rushdb%2Fjavascript-sdk)](#license "Go to license section") @@ -102,6 +104,18 @@ const matchedEmployees = await db.records.find({ } }) +// Constrain traversal by relationship type/direction with $relation +const authoredPosts = await db.records.find({ + labels: ['USER'], + where: { + POST: { + $relation: { type: 'AUTHORED', direction: 'out' }, + title: { $contains: 'graph' } + } + }, + limit: 10 +}) + const company = await db.records.findUniq({ labels: ['COMPANY'], where: { diff --git a/packages/javascript-sdk/src/api/api.ts b/packages/javascript-sdk/src/api/api.ts index 6a42be41..37bd8bfa 100644 --- a/packages/javascript-sdk/src/api/api.ts +++ b/packages/javascript-sdk/src/api/api.ts @@ -26,6 +26,16 @@ import type { Where } from '../types/index.js' import type { ApiResponse } from './types.js' +import type { + CreateEmbeddingIndexParams, + EmbeddingIndex, + EmbeddingIndexStats, + SemanticSearchParams, + SemanticSearchResult, + UpsertEmbeddingVectorsParams, + UpsertEmbeddingVectorsResult, + VectorEntry +} from './types.js' import { getOwnProperties, @@ -220,11 +230,13 @@ export class RestAPI { { label, data: rawData, - options + options, + vectors }: { label: string data: InferSchemaTypesWrite | Array options?: Omit + vectors?: VectorEntry[] }, transaction?: Transaction | string ): Promise> => { @@ -240,9 +252,9 @@ export class RestAPI { const data = getOwnProperties(removeUndefinedDeep(rawData)) if (isArray(data) && data.every(isPropertyDraft)) { - payload.requestData = { label, properties: data, options } + payload.requestData = { label, properties: data, options, ...(vectors?.length && { vectors }) } } else if (isFlatObject(data)) { - payload.requestData = { label, data, options } + payload.requestData = { label, data, options, ...(vectors?.length && { vectors }) } } else if (isObject(data)) { throw new Error( 'Provided data is not a flat object. Consider using the `importJson` method for nested objects or arrays of nested objects, or use `createMany` for arrays of flat objects.' @@ -280,6 +292,11 @@ export class RestAPI { label: string data: Array> options?: DBRecordCreationOptions + /** + * Per-row inline vectors for external embedding indexes. + * `vectors[i]` is applied to `data[i]`. Its length must not exceed `data.length`. + */ + vectors?: VectorEntry[][] }, transaction?: Transaction | string ): Promise> => { @@ -292,12 +309,24 @@ export class RestAPI { ) } + if (data.vectors && data.vectors.length > items.length) { + throw new Error( + `records.createMany: vectors length (${data.vectors.length}) exceeds the number of data rows (${items.length}).` + ) + } + + // Inject per-row vectors as $vectors on each item so the backend BFS handles them + const itemsWithVectors = + data.vectors?.length ? + items.map((item, i) => (data.vectors![i]?.length ? { ...item, $vectors: data.vectors![i] } : item)) + : items + const txId = pickTransactionId(transaction) const path = `/records/import/json` const payload = { headers: Object.assign({}, buildTransactionHeader(txId)), method: 'POST', - requestData: { ...data, data: items } + requestData: { label: data.label, data: itemsWithVectors, options: data.options } } const requestId = typeof this.logger === 'function' ? generateRandomId() : '' this.logger?.({ requestId, path, ...payload }) @@ -421,6 +450,12 @@ export class RestAPI { newline?: string } parentId?: string + /** + * Per-row inline vectors for external embedding indexes. + * `vectors[i]` is applied to CSV row `i` (0-based, after header). + * Its length must not exceed the number of data rows — validated server-side. + */ + vectors?: VectorEntry[][] }, transaction?: Transaction | string ): Promise> => { @@ -678,12 +713,14 @@ export class RestAPI { target, label, data: rawData, - options + options, + vectors }: { target: DBRecordTarget label: string data: InferSchemaTypesWrite | Array options?: Omit + vectors?: VectorEntry[] }, transaction?: Transaction | string ) => { @@ -700,9 +737,9 @@ export class RestAPI { const data = getOwnProperties(removeUndefinedDeep(rawData)) if (isArray(data) && data.every(isPropertyDraft)) { - payload.requestData = { label, properties: data } + payload.requestData = { label, properties: data, ...(vectors?.length && { vectors }) } } else if (isFlatObject(data)) { - payload.requestData = { label, data, options } + payload.requestData = { label, data, options, ...(vectors?.length && { vectors }) } } else if (isObject(data)) { throw new Error('Provided data is not a flat object. Consider to use `importJson` method.') } else { @@ -792,7 +829,8 @@ export class RestAPI { { label, data: rawData, - options + options, + vectors }: { label?: string data: InferSchemaTypesWrite | Array @@ -800,6 +838,7 @@ export class RestAPI { mergeBy?: string[] mergeStrategy?: 'rewrite' | 'append' } + vectors?: VectorEntry[] }, transaction?: Transaction | string ): Promise> => { @@ -820,9 +859,14 @@ export class RestAPI { } if (isArray(data) && data.every(isPropertyDraft)) { - payload.requestData = { label, properties: data, options: defaultOptions } + payload.requestData = { + label, + properties: data, + options: defaultOptions, + ...(vectors?.length && { vectors }) + } } else if (isFlatObject(data)) { - payload.requestData = { label, data, options: defaultOptions } + payload.requestData = { label, data, options: defaultOptions, ...(vectors?.length && { vectors }) } } else if (isObject(data)) { throw new Error( 'Provided data is not a flat object. Upsert supports flat objects or property drafts array.' @@ -1182,17 +1226,17 @@ export class RestAPI { // Only for managed/custom db instances connected to cloud public query = { /** - * Runs a raw Cypher query against the connected Neo4j database. + * Executes a raw query against the underlying database engine. * * NOTE: This endpoint is cloud-only — available only on the RushDB managed * service or when your project is connected to a custom database through * RushDB Cloud. It will not work for self-hosted or local-only deployments. * - * @param param0 - Object containing the Cypher query and optional params - * @param param0.query - Cypher query string to execute + * @param param0 - Object containing the query and optional params + * @param param0.query - Query string to execute * @param param0.params - Optional parameters to pass to the query * @param transaction - Optional transaction id or Transaction instance to run the query in - * @returns ApiResponse - Raw result returned by the server (Neo4j driver result wrapped in ApiResponse) + * @returns ApiResponse - Raw result returned by the server (wrapped in ApiResponse) */ raw: async ( { query, params }: { query: string; params?: FlatObject }, @@ -1215,4 +1259,176 @@ export class RestAPI { return response } } + + /** + * API methods for AI-assisted graph exploration. + */ + public ai = { + /** + * Returns the full graph ontology as structured JSON. + * Each item contains the label name, record count, properties with value ranges/samples, + * and cross-label relationships with direction. + * Use property `id` fields to pass to db.properties.values() for deeper drill-down. + * @param params - Optional filter: provide `labels` array to scope to specific labels only + * @param transaction - Optional transaction for atomic operations + */ + getOntology: async (params?: { labels?: string[] }, transaction?: Transaction | string) => { + const txId = pickTransactionId(transaction) + const path = `/ai/ontology` + const payload = { + headers: Object.assign({}, buildTransactionHeader(txId)), + method: 'POST', + requestData: params ?? {} + } + const requestId = typeof this.logger === 'function' ? generateRandomId() : '' + this.logger?.({ requestId, path, ...payload }) + + const response = await this.fetcher>(path, payload) + this.logger?.({ requestId, path, ...payload, responseData: response.data }) + + return response + }, + + /** + * Returns the full graph ontology as compact Markdown tables. + * Token-efficient — intended for direct LLM consumption. + * Includes: labels with counts, properties with types and value ranges/samples, + * and cross-label relationship map. + * @param params - Optional filter: provide `labels` array to scope to specific labels only + * @param transaction - Optional transaction for atomic operations + */ + getOntologyMarkdown: async (params?: { labels?: string[] }, transaction?: Transaction | string) => { + const txId = pickTransactionId(transaction) + const path = `/ai/ontology/md` + const payload = { + headers: Object.assign({}, buildTransactionHeader(txId)), + method: 'POST', + requestData: params ?? {} + } + const requestId = typeof this.logger === 'function' ? generateRandomId() : '' + this.logger?.({ requestId, path, ...payload }) + + const response = await this.fetcher>(path, payload) + this.logger?.({ requestId, path, ...payload, responseData: response.data }) + + return response + }, + + /** + * Embedding Index management methods. + */ + indexes: { + /** + * Lists all embedding index policies configured for the current project. + */ + find: async () => { + const path = `/ai/indexes` + const payload = { method: 'GET', headers: {} } + const requestId = typeof this.logger === 'function' ? generateRandomId() : '' + this.logger?.({ requestId, path, ...payload }) + + const response = await this.fetcher>(path, payload) + this.logger?.({ requestId, path, ...payload, responseData: response.data }) + + return response + }, + + /** + * Creates a new embedding index policy for a string property. + * @param params.propertyName - Name of the property to index + * @param params.modelKey - Embedding model identifier (e.g. 'text-embedding-3-small') + * @param params.dimensions - Vector dimensionality produced by the model + */ + create: async (params: CreateEmbeddingIndexParams) => { + const path = `/ai/indexes` + const { external, ...rest } = params + const resolvedParams = external === true ? { ...rest, sourceType: 'external' as const } : rest + const payload = { + method: 'POST', + headers: {}, + requestData: resolvedParams + } + const requestId = typeof this.logger === 'function' ? generateRandomId() : '' + this.logger?.({ requestId, path, ...payload }) + + const response = await this.fetcher>(path, payload) + this.logger?.({ requestId, path, ...payload, responseData: response.data }) + + return response + }, + + /** + * Upserts external vectors for a specific embedding index. + * @param id - The target embedding index ID + * @param params.items - Array of { recordId, vector } + */ + upsertVectors: async (id: string, params: UpsertEmbeddingVectorsParams) => { + const path = `/ai/indexes/${id}/vectors/upsert` + const payload = { + method: 'POST', + headers: {}, + requestData: params + } + const requestId = typeof this.logger === 'function' ? generateRandomId() : '' + this.logger?.({ requestId, path, ...payload }) + + const response = await this.fetcher>(path, payload) + this.logger?.({ requestId, path, ...payload, responseData: response.data }) + + return response + }, + + /** + * Deletes an embedding index policy by ID. + * @param id - The ID of the embedding index to delete + */ + delete: async (id: string) => { + const path = `/ai/indexes/${id}` + const payload = { method: 'DELETE', headers: {} } + const requestId = typeof this.logger === 'function' ? generateRandomId() : '' + this.logger?.({ requestId, path, ...payload }) + + const response = await this.fetcher>(path, payload) + this.logger?.({ requestId, path, ...payload, responseData: response.data }) + + return response + }, + + /** + * Returns Neo4j-level statistics for an embedding index. + * @param id - The ID of the embedding index + */ + stats: async (id: string) => { + const path = `/ai/indexes/${id}/stats` + const payload = { method: 'GET', headers: {} } + const requestId = typeof this.logger === 'function' ? generateRandomId() : '' + this.logger?.({ requestId, path, ...payload }) + + const response = await this.fetcher>(path, payload) + this.logger?.({ requestId, path, ...payload, responseData: response.data }) + + return response + } + }, + + /** + * Performs semantic (vector) search over records whose `propertyName` has been indexed. + * + * RushDB performs exact search: candidates are narrowed via MATCH/WHERE first, + * then ranked by similarity. You can pass either query text or queryVector. + * + * @param params - Search parameters including the query text, property name, and optional filters + */ + search: async (params: SemanticSearchParams) => { + const path = `/ai/search` + const payload = { method: 'POST', headers: {}, requestData: params } + const requestId = typeof this.logger === 'function' ? generateRandomId() : '' + this.logger?.({ requestId, path, ...payload }) + + const response = await this.fetcher>(path, payload) + this.logger?.({ requestId, path, ...payload, responseData: response.data }) + + return response + } + } } diff --git a/packages/javascript-sdk/src/api/index.ts b/packages/javascript-sdk/src/api/index.ts index e2b945a9..5f32d4a6 100644 --- a/packages/javascript-sdk/src/api/index.ts +++ b/packages/javascript-sdk/src/api/index.ts @@ -1,2 +1,12 @@ export { RestAPI } from './api.js' -export { ApiResponse } from './types.js' +export { + ApiResponse, + CreateEmbeddingIndexParams, + EmbeddingIndex, + EmbeddingIndexStats, + SemanticSearchParams, + SemanticSearchResult, + UpsertEmbeddingVectorItem, + UpsertEmbeddingVectorsParams, + UpsertEmbeddingVectorsResult +} from './types.js' diff --git a/packages/javascript-sdk/src/api/types.ts b/packages/javascript-sdk/src/api/types.ts index b6764c4e..0749217b 100644 --- a/packages/javascript-sdk/src/api/types.ts +++ b/packages/javascript-sdk/src/api/types.ts @@ -1,5 +1,117 @@ +import type { DBRecord } from '../sdk/record.js' +import type { Schema } from '../types/index.js' + export type ApiResponse> = { data: T success: boolean total?: number } & E + +/** An embedding index policy stored in RushDB. */ +export type EmbeddingIndex = { + id: string + projectId: string + /** Neo4j label this index is scoped to (e.g. "Book"). */ + label: string + propertyName: string + modelKey: string + sourceType: 'managed' | 'external' + similarityFunction: 'cosine' | 'euclidean' + dimensions: number + vectorPropertyName: string + enabled: boolean + /** 'pending' | 'indexing' | 'awaiting_vectors' | 'ready' | 'error' */ + status: string + createdAt: string + updatedAt: string +} + +/** Parameters for creating a new embedding index. */ +export type CreateEmbeddingIndexParams = { + /** Neo4j label to scope this index to (e.g. "Book", "Task"). */ + label: string + propertyName: string + sourceType?: 'managed' | 'external' + /** + * Shorthand for `sourceType: 'external'`. + * When `true`, the index will be created with `sourceType: 'external'` regardless of the `sourceType` field. + */ + external?: boolean + similarityFunction?: 'cosine' | 'euclidean' + dimensions?: number +} + +/** + * A single vector entry for inline vector upsert. + * Provided alongside record data in create/upsert/set calls. + */ +export type VectorEntry = { + /** Name of the property whose embedding index should be written to. */ + propertyName: string + /** The embedding vector to store. Its length must match the index dimensions. */ + vector: number[] + /** + * Required when two indexes share the same `propertyName` and `dimensions` but differ in + * `similarityFunction`. Omit when there is only one matching index. + */ + similarityFunction?: 'cosine' | 'euclidean' +} + +export type UpsertEmbeddingVectorItem = { + recordId: string + vector: number[] +} + +export type UpsertEmbeddingVectorsParams = { + items: UpsertEmbeddingVectorItem[] +} + +export type UpsertEmbeddingVectorsResult = { + updated: number + requested: number +} + +/** Neo4j-level stats for an embedding index. */ +export type EmbeddingIndexStats = { + totalRecords: number + indexedRecords: number +} + +/** Parameters for semantic (vector) search over an embedding index. */ +export type SemanticSearchParams = { + /** Name of the indexed property to search against. */ + propertyName: string + /** Free-text query that will be embedded and compared against indexed vectors. */ + query?: string + /** External vector query. Use instead of query text for external indexes. */ + queryVector?: number[] + /** + * One or more Neo4j labels to scope the search. + * The first label is used to resolve which embedding index to use. + * Required — always provide at least one label. + */ + labels: string[] + sourceType?: 'managed' | 'external' + similarityFunction?: 'cosine' | 'euclidean' + dimensions?: number + /** + * Optional filter applied before cosine scoring. + * Candidates are narrowed via MATCH/WHERE and then ranked by similarity. + */ + where?: Record + /** Number of results to skip for pagination (default 0). */ + skip?: number + /** Maximum number of results to return (default 20). */ + limit?: number +} + +/** + * A record returned by db.ai.search(). + * Identical to DBRecord but with __score guaranteed present — never optional. + * __score is the cosine similarity between the query vector and this record's embedding (0–1, + * higher = more similar). It is only injected by the semantic search path; regular + * db.records.find() / db.records.search() results are plain DBRecord and never carry __score. + */ +export type SemanticSearchResult = DBRecord & { + readonly __score: number +} diff --git a/packages/javascript-sdk/src/common/constants.ts b/packages/javascript-sdk/src/common/constants.ts index b5c4e641..f70fb471 100644 --- a/packages/javascript-sdk/src/common/constants.ts +++ b/packages/javascript-sdk/src/common/constants.ts @@ -8,13 +8,11 @@ export const PROPERTY_TYPE_DATETIME = 'datetime' as const export const PROPERTY_TYPE_BOOLEAN = 'boolean' as const export const PROPERTY_TYPE_NUMBER = 'number' as const export const PROPERTY_TYPE_NULL = 'null' as const -export const PROPERTY_TYPE_VECTOR = 'vector' as const export const PROPERTY_TYPES = [ PROPERTY_TYPE_STRING, PROPERTY_TYPE_DATETIME, PROPERTY_TYPE_BOOLEAN, PROPERTY_TYPE_NUMBER, - PROPERTY_TYPE_NULL, - PROPERTY_TYPE_VECTOR + PROPERTY_TYPE_NULL ] diff --git a/packages/javascript-sdk/src/index.node.ts b/packages/javascript-sdk/src/index.node.ts index 54d01f51..5e9876f3 100644 --- a/packages/javascript-sdk/src/index.node.ts +++ b/packages/javascript-sdk/src/index.node.ts @@ -1,11 +1,36 @@ import { HttpClient, HttpClientResponse } from './network/HttpClient.js' import { NodeHttpClient } from './network/NodeHttpClient.js' import { initSDK, RushDB } from './sdk/index.js' -import { type ApiResponse, RestAPI } from './api/index.js' +import { + type ApiResponse, + type CreateEmbeddingIndexParams, + type EmbeddingIndex, + type EmbeddingIndexStats, + type SemanticSearchParams, + type SemanticSearchResult, + type UpsertEmbeddingVectorItem, + type UpsertEmbeddingVectorsParams, + type UpsertEmbeddingVectorsResult, + RestAPI +} from './api/index.js' initSDK(new NodeHttpClient()) -export { RushDB, HttpClient, HttpClientResponse, RestAPI, type ApiResponse } +export { + RushDB, + HttpClient, + HttpClientResponse, + RestAPI, + type ApiResponse, + type EmbeddingIndex, + type CreateEmbeddingIndexParams, + type EmbeddingIndexStats, + type SemanticSearchParams, + type SemanticSearchResult, + type UpsertEmbeddingVectorItem, + type UpsertEmbeddingVectorsParams, + type UpsertEmbeddingVectorsResult +} export * from './types/index.js' export * from './sdk/index.js' diff --git a/packages/javascript-sdk/src/index.worker.ts b/packages/javascript-sdk/src/index.worker.ts index f0c6c0d7..0dd3bb74 100644 --- a/packages/javascript-sdk/src/index.worker.ts +++ b/packages/javascript-sdk/src/index.worker.ts @@ -1,11 +1,36 @@ import { FetchHttpClient } from './network/FetchHttpClient.js' import { HttpClient, HttpClientResponse } from './network/HttpClient.js' import { initSDK, RushDB } from './sdk/index.js' -import { type ApiResponse, RestAPI } from './api/index.js' +import { + type ApiResponse, + type CreateEmbeddingIndexParams, + type EmbeddingIndex, + type EmbeddingIndexStats, + type SemanticSearchParams, + type SemanticSearchResult, + type UpsertEmbeddingVectorItem, + type UpsertEmbeddingVectorsParams, + type UpsertEmbeddingVectorsResult, + RestAPI +} from './api/index.js' initSDK(new FetchHttpClient()) -export { RushDB, HttpClient, HttpClientResponse, RestAPI, type ApiResponse } +export { + RushDB, + HttpClient, + HttpClientResponse, + RestAPI, + type ApiResponse, + type EmbeddingIndex, + type CreateEmbeddingIndexParams, + type EmbeddingIndexStats, + type SemanticSearchParams, + type SemanticSearchResult, + type UpsertEmbeddingVectorItem, + type UpsertEmbeddingVectorsParams, + type UpsertEmbeddingVectorsResult +} export * from './types/index.js' export * from './sdk/index.js' diff --git a/packages/javascript-sdk/src/network/index.ts b/packages/javascript-sdk/src/network/index.ts index 87776d8d..8a02821d 100644 --- a/packages/javascript-sdk/src/network/index.ts +++ b/packages/javascript-sdk/src/network/index.ts @@ -18,7 +18,7 @@ export const createFetcher = ...defaultHeaders, ...initHeaders }, - typeof token !== 'undefined' ? { token } : {} + typeof token !== 'undefined' ? { Authorization: `Bearer ${token}` } : {} ) as RequestHeaders, ...init }) diff --git a/packages/javascript-sdk/src/sdk/constants.ts b/packages/javascript-sdk/src/sdk/constants.ts index c5bc5443..f2a116e3 100644 --- a/packages/javascript-sdk/src/sdk/constants.ts +++ b/packages/javascript-sdk/src/sdk/constants.ts @@ -8,9 +8,3 @@ export const ALLOWED_CONFIG_PROPERTIES = [ 'logger', 'options' ] - -export const PlanPrefix = { - initial: 'in', - extended: 'ex', - fullFeatured: 'ff' -} as const diff --git a/packages/javascript-sdk/src/sdk/model.ts b/packages/javascript-sdk/src/sdk/model.ts index eb987735..04885e41 100644 --- a/packages/javascript-sdk/src/sdk/model.ts +++ b/packages/javascript-sdk/src/sdk/model.ts @@ -322,7 +322,7 @@ export class Model { const canUpdate = !matchingRecords?.data?.length || - (matchingRecords.data.length === 1 && matchingRecords.data[0]?.id() === pickRecordId(target)!) + (matchingRecords.data.length === 1 && matchingRecords.data[0]?.id === pickRecordId(target)!) if (canUpdate) { const result = await instance.records[method]({ target, label: this.label, data }, tx) diff --git a/packages/javascript-sdk/src/sdk/record.ts b/packages/javascript-sdk/src/sdk/record.ts index fd79d39d..5136b172 100644 --- a/packages/javascript-sdk/src/sdk/record.ts +++ b/packages/javascript-sdk/src/sdk/record.ts @@ -108,7 +108,6 @@ export type RelationDetachOptions = { export type DBRecordCreationOptions = { returnResult?: boolean suggestTypes?: boolean - castNumberArraysToVectors?: boolean convertNumericValuesToNumbers?: boolean capitalizeLabels?: boolean relationshipType?: string @@ -130,23 +129,18 @@ export class DBRecordInstance @@ -40,11 +38,3 @@ export type SDKConfig = { allowForceDelete?: boolean } } & ApiConnectionConfig - -export type PlanType = keyof typeof PlanPrefix - -type RawServerSettings = NonNullable -export type TokenPublicVariables = Pick< - RawServerSettings, - 'selfHosted' | 'customDB' | 'managedDB' | 'planType' -> diff --git a/packages/javascript-sdk/src/sdk/utils.ts b/packages/javascript-sdk/src/sdk/utils.ts index 3f7649e7..1b121191 100644 --- a/packages/javascript-sdk/src/sdk/utils.ts +++ b/packages/javascript-sdk/src/sdk/utils.ts @@ -1,8 +1,8 @@ import type { PropertyValue, Schema, InferSchemaTypesWrite } from '../types/index.js' -import type { PlanType, SDKConfig, TokenPublicVariables } from './types.js' +import type { SDKConfig } from './types.js' import { isObject } from '../common/utils.js' -import { ALLOWED_CONFIG_PROPERTIES, PlanPrefix } from './constants.js' +import { ALLOWED_CONFIG_PROPERTIES } from './constants.js' import { UniquenessError } from './errors.js' export const mergeDefaultsWithPayload = async ( @@ -132,38 +132,3 @@ export function idToTimestamp(id: string): number { export function idToDate(id: string): Date { return new Date(idToTimestamp(id)) } - -export function extractMixedPropertiesFromToken( - prefixedToken: string -): [TokenPublicVariables | null, string] { - // Try such token format XX_YYYYYY_ZZZ - const reg = /^([a-z]{2})_([01]{3}\d{0,})_(.+)$/ - - const matchedToken = prefixedToken.match(reg) - - if (!matchedToken) { - // Working with old token - return [null, prefixedToken] - } - - const [, prefix, bits, rawToken] = matchedToken - - // Get user prefixed plan - const planEntry = (Object.entries(PlanPrefix) as [PlanType, string][]).find( - ([_, currentPrefix]) => currentPrefix === prefix - )! - - const plan = planEntry[0] - - // Build feature flags - const [bCustomDb, bManagedDb, bSelfHosted] = bits.split('') - - const settings: TokenPublicVariables = { - customDB: bCustomDb === '1', - managedDB: bManagedDb === '1', - selfHosted: bSelfHosted === '1', - planType: plan - } - - return [{ ...settings }, rawToken] -} diff --git a/packages/javascript-sdk/src/types/expressions.ts b/packages/javascript-sdk/src/types/expressions.ts index 5a87c04a..8e59637c 100644 --- a/packages/javascript-sdk/src/types/expressions.ts +++ b/packages/javascript-sdk/src/types/expressions.ts @@ -3,8 +3,7 @@ import type { PROPERTY_TYPE_DATETIME, PROPERTY_TYPE_NULL, PROPERTY_TYPE_NUMBER, - PROPERTY_TYPE_STRING, - PROPERTY_TYPE_VECTOR + PROPERTY_TYPE_STRING } from '../common/constants.js' import type { MaybeArray, RequireAtLeastOne } from './utils.js' import type { @@ -45,17 +44,6 @@ export type StringExpression = Record<'$exists', BooleanValue> > -export type VectorSearchFn = 'jaccard' | 'overlap' | 'cosine' | 'pearson' | 'euclideanDistance' | 'euclidean' -// Value range [0,1] | [0,1] | [-1,1] | [-1,1] | [0, Infinity) | (0, 1] - -export type VectorExpression = { - $vector: { - fn: `gds.similarity.${VectorSearchFn}` - query: Array - threshold: number | RequireAtLeastOne> - } -} - export type TypeExpression = { $type: PropertyType } @@ -67,7 +55,6 @@ export type PropertyExpression = | NumberExpression | StringExpression | TypeExpression - | VectorExpression export type PropertyExpressionByType = { [PROPERTY_TYPE_BOOLEAN]: BooleanExpression | TypeExpression @@ -75,7 +62,6 @@ export type PropertyExpressionByType = { [PROPERTY_TYPE_NULL]: NullExpression | TypeExpression [PROPERTY_TYPE_NUMBER]: NumberExpression | TypeExpression [PROPERTY_TYPE_STRING]: StringExpression | TypeExpression - [PROPERTY_TYPE_VECTOR]: VectorExpression | TypeExpression } // Logical Expressions diff --git a/packages/javascript-sdk/src/types/query.ts b/packages/javascript-sdk/src/types/query.ts index 643acdf6..dfe6c2bc 100644 --- a/packages/javascript-sdk/src/types/query.ts +++ b/packages/javascript-sdk/src/types/query.ts @@ -7,8 +7,7 @@ import type { NumberExpression, PropertyExpression, PropertyExpressionByType, - StringExpression, - VectorSearchFn + StringExpression } from './expressions.js' import type { Schema } from './schema.js' import type { AnyObject, MaybeArray, RequireAtLeastOne } from './utils.js' @@ -116,7 +115,12 @@ export type AggregateFn = | { field: string; fn: 'max'; alias?: string } | { field: string; fn: 'min'; alias?: string } | { field: string; fn: 'sum'; alias?: string } - | { field: string; fn: `gds.similarity.${VectorSearchFn}`; alias?: string; query: number[] } + | { + field: string + fn: 'vector.similarity.cosine' | 'vector.similarity.euclidean' + alias?: string + query: number[] + } | AggregateTimeBucketFn | AggregateCollectFn diff --git a/packages/javascript-sdk/src/types/schema.ts b/packages/javascript-sdk/src/types/schema.ts index a3d67fd6..4d8a149d 100644 --- a/packages/javascript-sdk/src/types/schema.ts +++ b/packages/javascript-sdk/src/types/schema.ts @@ -23,7 +23,6 @@ type TypeMappingWrite = { null: null number: number string: string - vector: Array } export type OptionalKeysWrite = { @@ -55,7 +54,6 @@ type TypeMappingRead = { null: null number: number string: string - vector: Array } export type OptionalKeysRead = { diff --git a/packages/javascript-sdk/src/types/value.ts b/packages/javascript-sdk/src/types/value.ts index 5a4516b7..cd8f39a4 100644 --- a/packages/javascript-sdk/src/types/value.ts +++ b/packages/javascript-sdk/src/types/value.ts @@ -3,8 +3,7 @@ import type { PROPERTY_TYPE_DATETIME, PROPERTY_TYPE_NULL, PROPERTY_TYPE_NUMBER, - PROPERTY_TYPE_STRING, - PROPERTY_TYPE_VECTOR + PROPERTY_TYPE_STRING } from '../common/constants.js' import type { MaybeArray } from './utils.js' @@ -40,7 +39,6 @@ export type PropertyType = | typeof PROPERTY_TYPE_NULL | typeof PROPERTY_TYPE_NUMBER | typeof PROPERTY_TYPE_STRING - | typeof PROPERTY_TYPE_VECTOR type WithId = T & { id: string } type WithValue = T & { value: PropertyValue } @@ -69,8 +67,6 @@ export type PropertySingleValue = : TType extends typeof PROPERTY_TYPE_STRING ? StringValue : TType extends typeof PROPERTY_TYPE_NULL ? NullValue : TType extends typeof PROPERTY_TYPE_BOOLEAN ? BooleanValue - : TType extends typeof PROPERTY_TYPE_VECTOR ? Array : StringValue -export type PropertyValue = - TType extends typeof PROPERTY_TYPE_VECTOR ? Array : MaybeArray> +export type PropertyValue = MaybeArray> diff --git a/packages/javascript-sdk/tests/ai.external.e2e.test.ts b/packages/javascript-sdk/tests/ai.external.e2e.test.ts new file mode 100644 index 00000000..e51020ee --- /dev/null +++ b/packages/javascript-sdk/tests/ai.external.e2e.test.ts @@ -0,0 +1,138 @@ +import path from 'path' +import dotenv from 'dotenv' + +dotenv.config({ path: path.resolve(__dirname, '../.env') }) + +import RushDB from '../src/index.node' + +jest.setTimeout(120_000) + +describe('db.ai External Vector flow (e2e)', () => { + const apiKey = process.env.RUSHDB_API_KEY + const apiUrl = process.env.RUSHDB_API_URL || 'http://localhost:3000' + + if (!apiKey) { + it('skips because RUSHDB_API_KEY is not set', () => { + expect(true).toBe(true) + }) + return + } + + const db = new RushDB(apiKey, { url: apiUrl }) + + const tenantId = `ai-external-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}` + const LABEL = 'ExternalArticle' + const PROPERTY = 'description' + + const vectors = { + alpha: [1, 0, 0], + beta: [0, 1, 0], + gamma: [0, 0, 1] + } + + let indexId: string | undefined + let recordIds: string[] = [] + + beforeAll(async () => { + const alpha = await db.records.create({ + label: LABEL, + data: { + title: 'Alpha', + description: 'alpha text', + tenantId + }, + options: { suggestTypes: true } + }) + + const beta = await db.records.create({ + label: LABEL, + data: { + title: 'Beta', + description: 'beta text', + tenantId + }, + options: { suggestTypes: true } + }) + + const gamma = await db.records.create({ + label: LABEL, + data: { + title: 'Gamma', + description: 'gamma text', + tenantId + }, + options: { suggestTypes: true } + }) + + recordIds = [alpha.id, beta.id, gamma.id].filter(Boolean) + + const index = await db.ai.indexes.create({ + label: LABEL, + propertyName: PROPERTY, + sourceType: 'external', + similarityFunction: 'cosine', + dimensions: 3 + }) + + indexId = index.data.id + + await db.ai.indexes.upsertVectors(indexId, { + items: [ + { recordId: alpha.id, vector: vectors.alpha }, + { recordId: beta.id, vector: vectors.beta }, + { recordId: gamma.id, vector: vectors.gamma } + ] + }) + }) + + afterAll(async () => { + await db.records.delete({ labels: [LABEL], where: { tenantId } }) + + if (indexId) { + try { + await db.ai.indexes.delete(indexId) + } catch { + // ignore + } + } + }) + + it('supports create(external) + upsertVectors + queryVector search', async () => { + expect(indexId).toBeDefined() + expect(recordIds.length).toBe(3) + + const res = await db.ai.search({ + propertyName: PROPERTY, + labels: [LABEL], + sourceType: 'external', + similarityFunction: 'cosine', + dimensions: 3, + queryVector: [1, 0, 0], + where: { tenantId }, + limit: 3 + }) + + expect(res.success).toBe(true) + expect(res.data.length).toBeGreaterThan(0) + expect(String(res.data[0].title ?? '')).toBe('Alpha') + + for (let i = 0; i < res.data.length - 1; i++) { + expect(res.data[i].__score).toBeGreaterThanOrEqual(res.data[i + 1].__score) + } + }) + + it('rejects query text for external index', async () => { + await expect( + db.ai.search({ + propertyName: PROPERTY, + labels: [LABEL], + sourceType: 'external', + similarityFunction: 'cosine', + dimensions: 3, + query: 'alpha text', + where: { tenantId }, + limit: 3 + }) + ).rejects.toBeTruthy() + }) +}) diff --git a/packages/javascript-sdk/tests/ai.inline-vectors.e2e.test.ts b/packages/javascript-sdk/tests/ai.inline-vectors.e2e.test.ts new file mode 100644 index 00000000..a0930c39 --- /dev/null +++ b/packages/javascript-sdk/tests/ai.inline-vectors.e2e.test.ts @@ -0,0 +1,750 @@ +/** + * E2E tests for the inline-vector BYOV (Bring Your Own Vectors) DX. + * + * This file exercises every surface area of the inline-vector feature: + * - records.create() with vectors: [...] + * - records.upsert() with vectors: [...] + * - records.set() with vectors: [...] + * - records.importJson with $vectors per item + * - ai.indexes.create() with external: true shorthand + * - ai.search() with queryVector (no query text) + * - disambiguation: two indexes on same property, different similarityFunction + * - error paths: wrong dimensions, ambiguous match, no matching index + * + * Prerequisites + * ───────────── + * RUSHDB_API_KEY and RUSHDB_API_URL must be set in packages/javascript-sdk/.env + * + * If RUSHDB_API_KEY is absent the whole suite is skipped gracefully. + */ + +import path from 'path' +import dotenv from 'dotenv' + +dotenv.config({ path: path.resolve(__dirname, '../.env') }) + +import RushDB from '../src/index.node' +import type { EmbeddingIndex } from '../src/api/types' + +jest.setTimeout(120_000) + +// ───────────────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────────────── + +/** Wait for an embedding index to reach 'ready', polling every 2 s. */ +async function waitForIndexReady( + db: RushDB, + indexId: string, + timeoutMs = 60_000, + interval = 2_000 +): Promise { + const deadline = Date.now() + timeoutMs + while (Date.now() < deadline) { + const list = await db.ai.indexes.find() + const idx = (list.data as EmbeddingIndex[]).find((i) => i.id === indexId) + if (idx?.status === 'ready') return + if (idx?.status === 'error') throw new Error(`Index ${indexId} entered error state`) + await new Promise((r) => setTimeout(r, interval)) + } + throw new Error(`Index ${indexId} did not become ready within ${timeoutMs} ms`) +} + +/** Build a 3-D unit cosine vector pointing at slot `i` (0-indexed). */ +const unitVec = (i: number, dims = 3): number[] => Array.from({ length: dims }, (_, k) => (k === i ? 1 : 0)) + +// ───────────────────────────────────────────────────────────────────────────── +// Suite +// ───────────────────────────────────────────────────────────────────────────── + +describe('ai – inline vectors BYOV (e2e)', () => { + const apiKey = process.env.RUSHDB_API_KEY + const apiUrl = process.env.RUSHDB_API_URL || 'http://localhost:3000' + + if (!apiKey) { + it('skips because RUSHDB_API_KEY is not set', () => expect(true).toBe(true)) + return + } + + const db = new RushDB(apiKey, { url: apiUrl }) + + // Unique tenant tag so parallel test runs never collide + const tenantId = `inline-vec-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}` + const LABEL = 'InlineVecArticle' + const PROP = 'body' + const DIMS = 3 + + // Track created index IDs for cleanup + const createdIndexIds: string[] = [] + + // Helper: create external index scoped to LABEL:PROP with DIMS dimensions + async function makeIndex(similarityFunction: 'cosine' | 'euclidean' = 'cosine') { + const { data: idx } = await db.ai.indexes.create({ + label: LABEL, + propertyName: PROP, + external: true, // shorthand instead of sourceType: 'external' + similarityFunction, + dimensions: DIMS + }) + createdIndexIds.push(idx.id) + return idx + } + + // ── Teardown ──────────────────────────────────────────────────────────────── + + afterAll(async () => { + await db.records.delete({ labels: [LABEL], where: { tenantId } }).catch(() => {}) + for (const id of createdIndexIds) { + await db.ai.indexes.delete(id).catch(() => {}) + } + }) + + // ═══════════════════════════════════════════════════════════════════════════ + // 1. Index creation via `external: true` shorthand + // ═══════════════════════════════════════════════════════════════════════════ + + describe('ai.indexes.create() with external: true', () => { + it('creates an external index and resolves sourceType correctly', async () => { + const { data: idx } = await db.ai.indexes.create({ + label: `${LABEL}Meta`, + propertyName: 'summary', + external: true, + dimensions: DIMS, + similarityFunction: 'cosine' + }) + createdIndexIds.push(idx.id) + + expect(idx.sourceType).toBe('external') + expect(idx.dimensions).toBe(DIMS) + expect(idx.similarityFunction).toBe('cosine') + }) + + it('creates an external index via explicit sourceType (no regression)', async () => { + const { data: idx } = await db.ai.indexes.create({ + label: `${LABEL}Compat`, + propertyName: 'description', + sourceType: 'external', + dimensions: DIMS, + similarityFunction: 'cosine' + }) + createdIndexIds.push(idx.id) + + expect(idx.sourceType).toBe('external') + }) + }) + + // ═══════════════════════════════════════════════════════════════════════════ + // 2. records.create() with inline vectors + // ═══════════════════════════════════════════════════════════════════════════ + + describe('records.create() with vectors', () => { + let indexId: string + + beforeAll(async () => { + const idx = await makeIndex('cosine') + indexId = idx.id + }) + + it('creates a record and writes the inline vector in a single call', async () => { + const record = await db.records.create({ + label: LABEL, + data: { body: 'Alpha article', tenantId }, + vectors: [{ propertyName: PROP, vector: unitVec(0) }] + }) + + expect(record.id).toBeDefined() + expect(record.data.body).toBe('Alpha article') + + // Verify the vector was written: search with the same vector → should return this record + await waitForIndexReady(db, indexId) + + const res = await db.ai.search({ + propertyName: PROP, + labels: [LABEL], + sourceType: 'external', + similarityFunction: 'cosine', + dimensions: DIMS, + queryVector: unitVec(0), + where: { tenantId, body: 'Alpha article' }, + limit: 5 + }) + + expect(res.success).toBe(true) + expect(res.data.length).toBeGreaterThan(0) + const top = res.data[0] + expect(top.__id).toBe(record.id) + expect(top.__score).toBeCloseTo(1, 2) + }) + + it('returns results in descending __score order', async () => { + // Create two more records with distinct unit vectors + await db.records.create({ + label: LABEL, + data: { body: 'Beta article', tenantId }, + vectors: [{ propertyName: PROP, vector: unitVec(1) }] + }) + await db.records.create({ + label: LABEL, + data: { body: 'Gamma article', tenantId }, + vectors: [{ propertyName: PROP, vector: unitVec(2) }] + }) + + await waitForIndexReady(db, indexId) + + const res = await db.ai.search({ + propertyName: PROP, + labels: [LABEL], + sourceType: 'external', + similarityFunction: 'cosine', + dimensions: DIMS, + queryVector: unitVec(0), + where: { tenantId }, + limit: 10 + }) + + expect(res.data.length).toBeGreaterThan(1) + for (let i = 0; i < res.data.length - 1; i++) { + expect(res.data[i].__score).toBeGreaterThanOrEqual(res.data[i + 1].__score) + } + }) + }) + + // ═══════════════════════════════════════════════════════════════════════════ + // 3. records.upsert() with inline vectors + // ═══════════════════════════════════════════════════════════════════════════ + + describe('records.upsert() with vectors', () => { + let indexId: string + + beforeAll(async () => { + const idx = await makeIndex('cosine') + indexId = idx.id + }) + + it('creates a record and writes a vector on first upsert', async () => { + const upsertTenant = `${tenantId}-upsert` + + const record = await db.records.upsert({ + label: LABEL, + data: { body: 'Upsert Alpha', slug: 'u-alpha', tenantId: upsertTenant }, + options: { mergeBy: ['slug', 'tenantId'], suggestTypes: true }, + vectors: [{ propertyName: PROP, vector: unitVec(0) }] + }) + + expect(record.id).toBeDefined() + await waitForIndexReady(db, indexId) + + const res = await db.ai.search({ + propertyName: PROP, + labels: [LABEL], + sourceType: 'external', + similarityFunction: 'cosine', + dimensions: DIMS, + queryVector: unitVec(0), + where: { tenantId: upsertTenant }, + limit: 5 + }) + + expect(res.data[0]?.__id).toBe(record.id) + }) + + it('updates the vector when the same record is upserted again', async () => { + const upsertTenant = `${tenantId}-upsert2` + + // First upsert → vector points at slot 1 + const first = await db.records.upsert({ + label: LABEL, + data: { body: 'Update vector article', slug: 'u-update', tenantId: upsertTenant }, + options: { mergeBy: ['slug', 'tenantId'], suggestTypes: true }, + vectors: [{ propertyName: PROP, vector: unitVec(1) }] + }) + + // Second upsert same slug → vector should now point at slot 0 + const second = await db.records.upsert({ + label: LABEL, + data: { body: 'Update vector article', slug: 'u-update', tenantId: upsertTenant }, + options: { mergeBy: ['slug', 'tenantId'], suggestTypes: true }, + vectors: [{ propertyName: PROP, vector: unitVec(0) }] + }) + + // Both upserts should resolve to the same record + expect(second.id).toBe(first.id) + + await waitForIndexReady(db, indexId) + + // Search with slot-0 → should be the top result (score ≈ 1) + const res = await db.ai.search({ + propertyName: PROP, + labels: [LABEL], + sourceType: 'external', + similarityFunction: 'cosine', + dimensions: DIMS, + queryVector: unitVec(0), + where: { tenantId: upsertTenant }, + limit: 5 + }) + + expect(res.data[0]?.__id).toBe(first.id) + expect(res.data[0]?.__score).toBeCloseTo(1, 2) + }) + }) + + // ═══════════════════════════════════════════════════════════════════════════ + // 4. records.set() with inline vectors + // ═══════════════════════════════════════════════════════════════════════════ + + describe('records.set() with vectors', () => { + let indexId: string + + beforeAll(async () => { + const idx = await makeIndex('cosine') + indexId = idx.id + }) + + it('overwrites record data and writes the inline vector atomically', async () => { + const setTenant = `${tenantId}-set` + + // Create without vector first + const original = await db.records.create({ + label: LABEL, + data: { body: 'Old body', extra: 'keep', tenantId: setTenant } + }) + + // Set replaces data and writes vector + const updated = await db.records.set({ + target: original, + label: LABEL, + data: { body: 'New body', tenantId: setTenant }, + vectors: [{ propertyName: PROP, vector: unitVec(0) }] + }) + + expect(updated.id).toBe(original.id) + expect(updated.data.body).toBe('New body') + + await waitForIndexReady(db, indexId) + + const res = await db.ai.search({ + propertyName: PROP, + labels: [LABEL], + sourceType: 'external', + similarityFunction: 'cosine', + dimensions: DIMS, + queryVector: unitVec(0), + where: { tenantId: setTenant }, + limit: 5 + }) + + expect(res.data[0]?.__id).toBe(original.id) + }) + }) + + // ═══════════════════════════════════════════════════════════════════════════ + // 5. records.importJson() with $vectors + // ═══════════════════════════════════════════════════════════════════════════ + + describe('records.importJson() with $vectors', () => { + let indexId: string + + beforeAll(async () => { + const idx = await makeIndex('cosine') + indexId = idx.id + }) + + it('writes vectors for each item in a batch import', async () => { + const importTenant = `${tenantId}-import` + + await db.records.importJson({ + label: LABEL, + data: [ + { + body: 'Import Alpha', + tenantId: importTenant, + $vectors: [{ propertyName: PROP, vector: unitVec(0) }] + }, + { + body: 'Import Beta', + tenantId: importTenant, + $vectors: [{ propertyName: PROP, vector: unitVec(1) }] + }, + { + body: 'Import Gamma', + tenantId: importTenant, + $vectors: [{ propertyName: PROP, vector: unitVec(2) }] + } + ] + }) + + await waitForIndexReady(db, indexId) + + // Query close to slot 0 → Import Alpha should rank highest + const res = await db.ai.search({ + propertyName: PROP, + labels: [LABEL], + sourceType: 'external', + similarityFunction: 'cosine', + dimensions: DIMS, + queryVector: unitVec(0), + where: { tenantId: importTenant }, + limit: 5 + }) + + expect(res.data.length).toBe(3) + expect(String(res.data[0].body ?? '')).toBe('Import Alpha') + expect(res.data[0].__score).toBeCloseTo(1, 2) + }) + + it('does not create $vectors as child records', async () => { + // If $vectors were processed by BFS as child entities, we'd find InlineVecArticle-like + // child records. Verify they don't exist. + const importTenant = `${tenantId}-no-children` + + await db.records.importJson({ + label: LABEL, + data: [ + { + body: 'No children check', + tenantId: importTenant, + $vectors: [{ propertyName: PROP, vector: unitVec(0) }] + } + ] + }) + + // Only one record should exist (the parent), none named after $vectors keys + const all = await db.records.find({ + labels: [LABEL], + where: { tenantId: importTenant } + }) + + expect(all.total).toBe(1) + }) + }) + + // ═══════════════════════════════════════════════════════════════════════════ + // 6. Semantic search: dimension auto-inference from queryVector.length + // ═══════════════════════════════════════════════════════════════════════════ + + describe('ai.search() dimension inference', () => { + let indexId: string + + beforeAll(async () => { + const idx = await makeIndex('cosine') + indexId = idx.id + }) + + it('finds the index when dimensions is omitted (inferred from queryVector length)', async () => { + const inferTenant = `${tenantId}-infer` + + await db.records.create({ + label: LABEL, + data: { body: 'Infer dims', tenantId: inferTenant }, + vectors: [{ propertyName: PROP, vector: unitVec(0) }] + }) + + await waitForIndexReady(db, indexId) + + // No `dimensions` field — server must infer 3 from queryVector.length + const res = await db.ai.search({ + propertyName: PROP, + labels: [LABEL], + sourceType: 'external', + similarityFunction: 'cosine', + queryVector: unitVec(0), // length = 3 + where: { tenantId: inferTenant }, + limit: 5 + }) + + expect(res.success).toBe(true) + expect(res.data.length).toBeGreaterThan(0) + }) + }) + + // ═══════════════════════════════════════════════════════════════════════════ + // 7. Disambiguation: two indexes share same property, different similarityFunction + // ═══════════════════════════════════════════════════════════════════════════ + + describe('disambiguation via similarityFunction', () => { + const DISAMLABEL = `${LABEL}Disambig` + const DISAMTENANTID = `${tenantId}-disambig` + let cosineIndexId: string + let euclideanIndexId: string + + beforeAll(async () => { + // Create two indexes on the same property but different similarity functions + const [{ data: ci }, { data: ei }] = await Promise.all([ + db.ai.indexes.create({ + label: DISAMLABEL, + propertyName: PROP, + external: true, + dimensions: DIMS, + similarityFunction: 'cosine' + }), + db.ai.indexes.create({ + label: DISAMLABEL, + propertyName: PROP, + external: true, + dimensions: DIMS, + similarityFunction: 'euclidean' + }) + ]) + cosineIndexId = ci.id + euclideanIndexId = ei.id + createdIndexIds.push(cosineIndexId, euclideanIndexId) + }) + + afterAll(async () => { + await db.records.delete({ labels: [DISAMLABEL], where: { tenantId: DISAMTENANTID } }).catch(() => {}) + }) + + it('writes to the correct index when similarityFunction is specified', async () => { + // This should succeed — exactly one cosine index matches + const record = await db.records.create({ + label: DISAMLABEL, + data: { body: 'Disambig article', tenantId: DISAMTENANTID }, + vectors: [{ propertyName: PROP, vector: unitVec(0), similarityFunction: 'cosine' }] + }) + + expect(record.id).toBeDefined() + }) + + it('writes to the euclidean index when similarityFunction is euclidean', async () => { + const record = await db.records.create({ + label: DISAMLABEL, + data: { body: 'Euclidean article', tenantId: DISAMTENANTID }, + vectors: [{ propertyName: PROP, vector: unitVec(1), similarityFunction: 'euclidean' }] + }) + + expect(record.id).toBeDefined() + }) + + it('rejects inline vectors when similarityFunction is omitted (ambiguous match)', async () => { + // Two indexes exist → server cannot pick one → must return error + await expect( + db.records.create({ + label: DISAMLABEL, + data: { body: 'Ambiguous article', tenantId: DISAMTENANTID }, + vectors: [ + { + propertyName: PROP, + vector: unitVec(0) + // no similarityFunction → ambiguous + } + ] + }) + ).rejects.toBeTruthy() + }) + }) + + // ═══════════════════════════════════════════════════════════════════════════ + // 9. records.createMany() with vectors + // ═══════════════════════════════════════════════════════════════════════════ + + describe('records.createMany() with vectors', () => { + let indexId: string + + beforeAll(async () => { + const idx = await makeIndex('cosine') + indexId = idx.id + }) + + it('creates multiple records and writes per-row vectors', async () => { + const rows = [ + { body: 'createMany alpha', tenantId }, + { body: 'createMany beta', tenantId }, + { body: 'createMany gamma', tenantId } + ] + + const result = await db.records.createMany({ + label: LABEL, + data: rows, + options: { returnResult: true }, + vectors: [ + [{ propertyName: PROP, vector: unitVec(0) }], // row 0 → [1,0,0] + [{ propertyName: PROP, vector: unitVec(1) }], // row 1 → [0,1,0] + [{ propertyName: PROP, vector: unitVec(2) }] // row 2 → [0,0,1] + ] + }) + + expect(result.data.length).toBe(3) + + // Index [1,0,0] — should find row 0 at score ≈ 1 + await waitForIndexReady(db, indexId) + const { data: res } = await db.ai.search({ + labels: [LABEL], + propertyName: PROP, + queryVector: unitVec(0), + where: { tenantId }, + limit: 3 + }) + expect(res.length).toBeGreaterThanOrEqual(1) + expect(res[0].__score).toBeGreaterThan(0.99) + }) + + it('rejects when vectors length exceeds data length (client-side)', async () => { + expect(() => + db.records.createMany({ + label: LABEL, + data: [{ body: 'one row', tenantId }], + vectors: [ + [{ propertyName: PROP, vector: unitVec(0) }], + [{ propertyName: PROP, vector: unitVec(1) }] // extra — no corresponding row + ] + }) + ).toThrow(/vectors length.*exceeds/) + }) + + it('allows sparse vectors — only some rows need vectors', async () => { + const rows = [ + { body: 'createMany sparse 0', tenantId }, + { body: 'createMany sparse 1', tenantId }, + { body: 'createMany sparse 2', tenantId } + ] + + // Only provide a vector for row 0; rows 1 and 2 are left without vectors + const result = await db.records.createMany({ + label: LABEL, + data: rows, + options: { returnResult: true }, + vectors: [[{ propertyName: PROP, vector: unitVec(0) }]] + }) + + expect(result.data.length).toBe(3) + }) + }) + + // ═══════════════════════════════════════════════════════════════════════════ + // 10. records.importCsv() with vectors + // ═══════════════════════════════════════════════════════════════════════════ + + describe('records.importCsv() with vectors', () => { + let indexId: string + + beforeAll(async () => { + const idx = await makeIndex('cosine') + indexId = idx.id + }) + + // Build CSV with tenantId baked in + function makeCsv() { + return ( + `body,tenantId\n` + + `importCsv alpha,${tenantId}\n` + + `importCsv beta,${tenantId}\n` + + `importCsv gamma,${tenantId}` + ) + } + + it('imports CSV rows and writes per-row vectors', async () => { + const result = await db.records.importCsv({ + label: LABEL, + data: makeCsv(), + options: { returnResult: true }, + vectors: [ + [{ propertyName: PROP, vector: unitVec(0) }], // row 0 → [1,0,0] + [{ propertyName: PROP, vector: unitVec(1) }], // row 1 → [0,1,0] + [{ propertyName: PROP, vector: unitVec(2) }] // row 2 → [0,0,1] + ] + }) + + expect(result.data.length).toBe(3) + + // Vector [1,0,0] — row 0 should be closest + await waitForIndexReady(db, indexId) + const { data: res } = await db.ai.search({ + labels: [LABEL], + propertyName: PROP, + queryVector: unitVec(0), + where: { tenantId }, + limit: 3 + }) + expect(res.length).toBeGreaterThanOrEqual(1) + expect(res[0].__score).toBeGreaterThan(0.99) + }) + + it('rejects when vectors length exceeds number of CSV rows (server-side)', async () => { + // Only 3 data rows but we provide 5 vector entries + await expect( + db.records.importCsv({ + label: LABEL, + data: makeCsv(), + vectors: [ + [{ propertyName: PROP, vector: unitVec(0) }], + [{ propertyName: PROP, vector: unitVec(1) }], + [{ propertyName: PROP, vector: unitVec(2) }], + [{ propertyName: PROP, vector: unitVec(0) }], // no row 3 + [{ propertyName: PROP, vector: unitVec(1) }] // no row 4 + ] + }) + ).rejects.toBeTruthy() + }) + + it('allows sparse vectors — only some CSV rows need vectors', async () => { + const result = await db.records.importCsv({ + label: LABEL, + data: makeCsv(), + options: { returnResult: true }, + vectors: [ + [{ propertyName: PROP, vector: unitVec(2) }] // only row 0 has a vector + ] + }) + expect(result.data.length).toBe(3) + }) + }) + + // ═══════════════════════════════════════════════════════════════════════════ + // 8. Error paths + // ═══════════════════════════════════════════════════════════════════════════ + + describe('error paths', () => { + let indexId: string + + beforeAll(async () => { + const idx = await makeIndex('cosine') + indexId = idx.id + }) + + it('rejects a vector with wrong dimensions', async () => { + await expect( + db.records.create({ + label: LABEL, + data: { body: 'Wrong dims', tenantId }, + vectors: [ + { + propertyName: PROP, + vector: [0.1, 0.2, 0.3, 0.4] // 4 dims, index expects 3 + } + ] + }) + ).rejects.toBeTruthy() + }) + + it('rejects a vector when no matching external index exists', async () => { + await expect( + db.records.create({ + label: LABEL, + data: { body: 'No index', tenantId }, + vectors: [ + { + propertyName: 'nonExistentProp', // no index on this property + vector: unitVec(0) + } + ] + }) + ).rejects.toBeTruthy() + }) + + it('rejects query text against an external index during search', async () => { + await expect( + db.ai.search({ + propertyName: PROP, + labels: [LABEL], + sourceType: 'external', + similarityFunction: 'cosine', + dimensions: DIMS, + query: 'query text is not allowed for external indexes', + where: { tenantId }, + limit: 5 + }) + ).rejects.toBeTruthy() + }) + }) +}) diff --git a/packages/javascript-sdk/tests/ai.search.e2e.test.ts b/packages/javascript-sdk/tests/ai.search.e2e.test.ts new file mode 100644 index 00000000..66f8daeb --- /dev/null +++ b/packages/javascript-sdk/tests/ai.search.e2e.test.ts @@ -0,0 +1,439 @@ +/** + * E2E tests for the semantic (vector) search flow via db.ai.search(). + * + * Prerequisites + * ───────────── + * 1. RUSHDB_API_KEY and RUSHDB_API_URL must be set in packages/javascript-sdk/.env + * 2. The server must be configured with: + * RUSHDB_EMBEDDING_MODEL, RUSHDB_EMBEDDING_DIMENSIONS, RUSHDB_EMBEDDING_API_KEY + * + * If any of these are missing, all tests in this file are skipped gracefully. + * + * HOW THE FLOW WORKS + * ────────────────── + * Indexes are scoped to (label, propertyName). "Book:title" and "Task:title" are completely + * independent — they backfill only matching records and store rel.__propKey = "Book:title" / + * "Task:title" on the VALUE relationships. The shared Neo4j DDL index + * (`rushdb_emb_value_rels`) is created idempotently and dropped only when zero embeddings + * remain anywhere in the graph. + * + * SEARCH QUERY SHAPE + * ────────────────── + * await db.ai.search({ + * propertyName: 'description', // which indexed property + * query: 'deep learning', // free-text — server embeds it + * labels: ['Article'], // required; first entry selects the index + * limit: 10, + * skip: 0, + * // `where` adds application-level prefiltering before similarity scoring: + * where: { published: true } + * }) + * + * RETURNED SHAPE + * ────────────── + * { data: Array<{ record: Record, score: number }>, success: boolean } + * score ∈ [0, 1] — cosine similarity, higher = more similar. + */ + +import path from 'path' +import dotenv from 'dotenv' + +dotenv.config({ path: path.resolve(__dirname, '../.env') }) + +import RushDB from '../src/index.node' +import type { EmbeddingIndex } from '../src/api/types' + +jest.setTimeout(120_000) // embedding backfill can take a while + +// ───────────────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────────────── + +/** Poll index status until 'ready' or timeout (ms). */ +async function waitForIndexReady( + db: RushDB, + indexId: string, + timeoutMs = 90_000, + interval = 3_000 +): Promise { + const deadline = Date.now() + timeoutMs + while (Date.now() < deadline) { + const list = await db.ai.indexes.find() + const idx = (list.data as EmbeddingIndex[]).find((i) => i.id === indexId) + if (idx?.status === 'ready') return + if (idx?.status === 'error') throw new Error(`Embedding index ${indexId} entered error state`) + await new Promise((r) => setTimeout(r, interval)) + } + throw new Error(`Embedding index ${indexId} did not become ready within ${timeoutMs}ms`) +} + +// ───────────────────────────────────────────────────────────────────────────── +// Test suite +// ───────────────────────────────────────────────────────────────────────────── + +describe('db.ai.search – semantic (vector) search (e2e)', () => { + const apiKey = process.env.RUSHDB_API_KEY + const apiUrl = process.env.RUSHDB_API_URL || 'http://localhost:3000' + + /** Skip the entire suite when credentials or embedding env vars are absent */ + if (!apiKey) { + it('skips because RUSHDB_API_KEY is not set', () => { + expect(true).toBe(true) + }) + return + } + + const db = new RushDB(apiKey, { url: apiUrl }) + + // Unique tenant tag so test data never collides with other runs + const tenantId = `ai-search-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}` + const LABEL = 'Article' + const PROPERTY = 'description' + + /** Index ID captured after creation so it can be deleted in afterAll */ + let indexId: string | undefined + + // Sample corpus – each record has a distinct semantic theme + const articles = [ + { + title: 'Intro to Machine Learning', + description: + 'Machine learning is a branch of artificial intelligence focused on building systems that learn from data.', + category: 'ml', + published: true, + tenantId + }, + { + title: 'Deep Neural Networks', + description: + 'Deep neural networks consist of multiple hidden layers that progressively extract higher-level features.', + category: 'ml', + published: true, + tenantId + }, + { + title: 'Quantum Computing Basics', + description: + 'Quantum computers use qubits and superposition to perform calculations impossible for classical computers.', + category: 'quantum', + published: true, + tenantId + }, + { + title: 'French Cuisine', + description: + 'French cuisine is celebrated worldwide for its refined techniques, rich sauces, and fresh ingredients.', + category: 'food', + published: false, + tenantId + }, + { + title: 'Graph Database Internals', + description: + 'Graph databases store data as nodes and edges, enabling efficient traversal of connected data structures.', + category: 'databases', + published: true, + tenantId + } + ] + + // ── Setup / teardown ─────────────────────────────────────────────────────── + + beforeAll(async () => { + // Insert test records + await db.records.createMany({ + label: LABEL, + data: articles, + options: { suggestTypes: true, returnResult: false } + }) + + // Create the embedding index policy scoped to (label, propertyName) + let createdIndex: EmbeddingIndex + try { + const res = await db.ai.indexes.create({ label: LABEL, propertyName: PROPERTY }) + createdIndex = res.data as EmbeddingIndex + } catch (err: any) { + // 409 = index already exists (idempotent re-run) — fetch it + const list = await db.ai.indexes.find() + const existing = (list.data as EmbeddingIndex[]).find( + (i) => i.propertyName === PROPERTY && i.label === LABEL + ) + if (!existing) throw err + createdIndex = existing + } + + indexId = createdIndex.id + + // Wait until the backfill scheduler has embedded all values + await waitForIndexReady(db, indexId) + }) + + afterAll(async () => { + // Remove test data + await db.records.delete({ labels: [LABEL], where: { tenantId } }) + // Remove the embedding index policy (best-effort) + if (indexId) { + try { + await db.ai.indexes.delete(indexId) + } catch { + // ignore + } + } + }) + + // ── semantic ranking over project-scoped candidates ──────────────────────── + + it('returns semantically similar results for an ML query', async () => { + const res = await db.ai.search({ + propertyName: PROPERTY, + query: 'neural networks and artificial intelligence', + labels: [LABEL], + limit: 3 + }) + + expect(res.success).toBe(true) + expect(Array.isArray(res.data)).toBe(true) + expect(res.data.length).toBeGreaterThan(0) + expect(res.data.length).toBeLessThanOrEqual(3) + + // Every result must have a numeric score injected as __score + res.data.forEach((item) => { + expect(typeof item.__score).toBe('number') + expect(item.__score).toBeGreaterThan(0) + expect(item.__score).toBeLessThanOrEqual(1) + expect(item.__id).toBeDefined() + }) + + // ML articles should rank higher than "French Cuisine" or "Quantum Computing" + const topTitle = String(res.data[0].title ?? '') + expect(['Intro to Machine Learning', 'Deep Neural Networks']).toContain(topTitle) + }) + + it('returns results ordered by score descending', async () => { + const res = await db.ai.search({ + propertyName: PROPERTY, + query: 'graph database nodes edges', + labels: [LABEL], + limit: 5 + }) + + expect(res.success).toBe(true) + const scores = res.data.map((r) => r.__score) + for (let i = 0; i < scores.length - 1; i++) { + expect(scores[i]).toBeGreaterThanOrEqual(scores[i + 1]) + } + }) + + it('respects limit and skip for pagination', async () => { + const page1 = await db.ai.search({ + propertyName: PROPERTY, + query: 'data science algorithms', + labels: [LABEL], + limit: 2, + skip: 0 + }) + + const page2 = await db.ai.search({ + propertyName: PROPERTY, + query: 'data science algorithms', + labels: [LABEL], + limit: 2, + skip: 2 + }) + + expect(page1.success).toBe(true) + expect(page2.success).toBe(true) + + // Pages must not overlap (different records) + const ids1 = page1.data.map((r) => r.__id ?? r.title) + const ids2 = page2.data.map((r) => r.__id ?? r.title) + const overlap = ids1.filter((id) => ids2.includes(id)) + expect(overlap).toHaveLength(0) + }) + + // ── additional prefiltering via where ────────────────────────────────────── + + it('where filter restricts candidates before cosine scoring', async () => { + // 'French Cuisine' has published: false — must not appear when filtering published: true + const res = await db.ai.search({ + propertyName: PROPERTY, + query: 'cooking and gastronomy', + labels: [LABEL], + where: { published: true, tenantId }, + limit: 5 + }) + + expect(res.success).toBe(true) + + const titles = res.data.map((r) => String(r.title ?? '')) + expect(titles).not.toContain('French Cuisine') + }) + + it('where filter narrows the result set', async () => { + const res = await db.ai.search({ + propertyName: PROPERTY, + query: 'quantum superposition qubits', + labels: [LABEL], + where: { category: 'quantum', tenantId }, + limit: 5 + }) + + expect(res.success).toBe(true) + expect(res.data.length).toBeGreaterThan(0) + + // Every returned record must match the category filter + res.data.forEach((item) => { + expect(item.category).toBe('quantum') + }) + }) + + it('multi-label search still uses exact prefiltering before ranking', async () => { + // Two labels passed -> candidates are narrowed before similarity scoring. + const res = await db.ai.search({ + propertyName: PROPERTY, + query: 'data and storage', + labels: [LABEL, 'NonExistentLabel'], + limit: 5 + }) + + // Should still succeed; NonExistentLabel simply contributes zero candidates. + expect(res.success).toBe(true) + expect(Array.isArray(res.data)).toBe(true) + }) + + // ── Error handling ───────────────────────────────────────────────────────── + + it('returns error when propertyName has no index', async () => { + await expect( + db.ai.search({ + propertyName: '__nonexistent_prop__', + query: 'anything', + labels: ['NonExistentLabel'], + limit: 5 + }) + ).rejects.toThrow() + }) + + // ── Conflict guard ───────────────────────────────────────────────────────── + + it('returns 409 when creating a duplicate (label, propertyName) index', async () => { + // Attempt to register the same index again; server must reject with 409 + await expect(db.ai.indexes.create({ label: LABEL, propertyName: PROPERTY })).rejects.toMatchObject({ + status: 409 + }) + }) +}) + +// ───────────────────────────────────────────────────────────────────────────── +// Multi-label isolation +// ───────────────────────────────────────────────────────────────────────────── + +describe('db.ai – multi-label index isolation (e2e)', () => { + const apiKey = process.env.RUSHDB_API_KEY + const apiUrl = process.env.RUSHDB_API_URL || 'http://localhost:3000' + + if (!apiKey) { + it('skips because RUSHDB_API_KEY is not set', () => expect(true).toBe(true)) + return + } + + const db = new RushDB(apiKey, { url: apiUrl }) + + const tenantId = `ai-isolation-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}` + const PROPERTY = 'bio' + const LABEL_A = 'Scientist' + const LABEL_B = 'Chef' + + let indexAId: string | undefined + let indexBId: string | undefined + + const scientists = [ + { bio: 'Pioneered the theory of general relativity and quantum mechanics.', tenantId }, + { bio: 'Developed the first programmable electronic computer and advanced computing theory.', tenantId }, + { bio: 'Discovered the structure of DNA using X-ray crystallography techniques.', tenantId } + ] + + const chefs = [ + { bio: 'World-renowned for perfecting classic French cuisine and buttery sauces.', tenantId }, + { bio: 'A celebrated pastry chef known for innovative chocolate desserts.', tenantId }, + { bio: 'Specialises in fermentation, umami flavours, and Japanese kaiseki cuisine.', tenantId } + ] + + beforeAll(async () => { + // Insert records for both labels + await Promise.all([ + db.records.createMany({ + label: LABEL_A, + data: scientists, + options: { suggestTypes: true, returnResult: false } + }), + db.records.createMany({ + label: LABEL_B, + data: chefs, + options: { suggestTypes: true, returnResult: false } + }) + ]) + + // Create separate indexes scoped to each label + const [resA, resB] = await Promise.all([ + db.ai.indexes.create({ label: LABEL_A, propertyName: PROPERTY }), + db.ai.indexes.create({ label: LABEL_B, propertyName: PROPERTY }) + ]) + indexAId = (resA.data as EmbeddingIndex).id + indexBId = (resB.data as EmbeddingIndex).id + + // Wait for both to be ready + await Promise.all([waitForIndexReady(db, indexAId!), waitForIndexReady(db, indexBId!)]) + }) + + afterAll(async () => { + await db.records.delete({ labels: [LABEL_A], where: { tenantId } }).catch(() => {}) + await db.records.delete({ labels: [LABEL_B], where: { tenantId } }).catch(() => {}) + if (indexAId) await db.ai.indexes.delete(indexAId).catch(() => {}) + if (indexBId) await db.ai.indexes.delete(indexBId).catch(() => {}) + }) + + it('Scientist search does not return Chef records', async () => { + const res = await db.ai.search({ + propertyName: PROPERTY, + query: 'physics and relativity', + labels: [LABEL_A], + limit: 5 + }) + + expect(res.success).toBe(true) + expect(res.data.length).toBeGreaterThan(0) + + // All results must be Scientist records (i.e. bio topics are scientific) + const bios = res.data.map((r) => String(r.bio ?? '')) + bios.forEach((bio) => { + // Chef bios contain food/cuisine keywords; none should appear + expect(bio.toLowerCase()).not.toMatch(/cuisine|chocolate|dessert|fermentation|kaiseki|pastry/) + }) + }) + + it('Chef search does not return Scientist records', async () => { + const res = await db.ai.search({ + propertyName: PROPERTY, + query: 'cooking techniques and gastronomy', + labels: [LABEL_B], + limit: 5 + }) + + expect(res.success).toBe(true) + expect(res.data.length).toBeGreaterThan(0) + + // All results must be Chef records (bios are food-oriented) + const bios = res.data.map((r) => String(r.bio ?? '')) + bios.forEach((bio) => { + // Scientist bios contain science keywords; none should appear + expect(bio.toLowerCase()).not.toMatch(/relativity|dna|computer|quantum|electron|crystallography/) + }) + }) + + it('duplicate (label, propertyName) index creation returns 409', async () => { + await expect(db.ai.indexes.create({ label: LABEL_A, propertyName: PROPERTY })).rejects.toMatchObject({ + status: 409 + }) + }) +}) diff --git a/packages/javascript-sdk/tests/records.importJson.upsert-nested-linking.e2e.test.ts b/packages/javascript-sdk/tests/records.importJson.upsert-nested-linking.e2e.test.ts index 0b0b1863..47b3a9a5 100644 --- a/packages/javascript-sdk/tests/records.importJson.upsert-nested-linking.e2e.test.ts +++ b/packages/javascript-sdk/tests/records.importJson.upsert-nested-linking.e2e.test.ts @@ -70,8 +70,8 @@ describe('records.importJson upsert nested linking (e2e)', () => { (r.type.includes('RUSHDB_DEFAULT_RELATION') || r.type.includes('__RUSHDB__RELATION__DEFAULT__')) ) - const departmentId = departments.data[0].id() - const companyIds = companies.data.map((c) => c.id()) + const departmentId = departments.data[0].id + const companyIds = companies.data.map((c) => c.id) // For each company, there must be at least one relation to the department (either direction) const relatedPairs = new Set(rels.map((r) => `${r.sourceId}->${r.targetId}`)) diff --git a/packages/javascript-sdk/tests/records.upsert.e2e.test.ts b/packages/javascript-sdk/tests/records.upsert.e2e.test.ts index 7aad4331..0ed5fc19 100644 --- a/packages/javascript-sdk/tests/records.upsert.e2e.test.ts +++ b/packages/javascript-sdk/tests/records.upsert.e2e.test.ts @@ -98,7 +98,7 @@ describe('records.upsert (e2e)', () => { }) // Same ID confirms it matched on all properties - expect(first.id()).toBe(second.id()) + expect(first.id).toBe(second.id) // Third upsert with different property value (should create new record since not all props match) const third = await db.records.upsert({ @@ -109,7 +109,7 @@ describe('records.upsert (e2e)', () => { expect(third.data.orderId).toBe('ORD-001') expect(third.data.status).toBe('completed') // Different ID confirms it's a new record (status didn't match) - expect(first.id()).not.toBe(third.id()) + expect(first.id).not.toBe(third.id) // Cleanup await db.records.delete({ labels: ['Order'], where: { tenantId } }) @@ -138,7 +138,7 @@ describe('records.upsert (e2e)', () => { expect(second.data.value).toBe('light') expect(second.data.tenantId).toBe(tenantId) // Same ID confirms it's an update - expect(first.id()).toBe(second.id()) + expect(first.id).toBe(second.id) // Cleanup await db.records.delete({ labels: ['Setting'], where: { tenantId } }) @@ -241,8 +241,8 @@ describe('records.upsert (e2e)', () => { expect(third.data.supplier).toBe('ABC Corp') // All should be the same record - expect(first.id()).toBe(second.id()) - expect(second.id()).toBe(third.id()) + expect(first.id).toBe(second.id) + expect(second.id).toBe(third.id) // Cleanup await db.records.delete({ labels: ['Inventory'], where: { tenantId } }) @@ -309,7 +309,7 @@ describe('records.upsert (e2e)', () => { }) expect(updated.data.value).toBe('updated') - expect(result.id()).toBe(updated.id()) + expect(result.id).toBe(updated.id) // Cleanup await db.records.delete({ where: { tenantId } }) diff --git a/packages/mcp-server/Dockerfile b/packages/mcp-server/Dockerfile index b9325fc4..b920e059 100644 --- a/packages/mcp-server/Dockerfile +++ b/packages/mcp-server/Dockerfile @@ -12,5 +12,10 @@ FROM node_base COPY package.json package-lock.json ./ COPY --from=builder /app/build ./build ENV NODE_ENV=production -RUN --mount=type=cache,target=/root/.npm npm ci --ignore-scripts --omit-dev +# MCP_TRANSPORT: "stdio" (default) | "http" +# Override with -e MCP_TRANSPORT=http to enable the Streamable HTTP server +ENV MCP_TRANSPORT=stdio +# HTTP mode: server listens on this port (only relevant when MCP_TRANSPORT=http) +EXPOSE 3001 +RUN --mount=type=cache,target=/root/.npm npm ci --ignore-scripts --omit=dev ENTRYPOINT ["node", "/app/build/index.js"] diff --git a/packages/mcp-server/LOCAL_DEVELOPMENT.md b/packages/mcp-server/LOCAL_DEVELOPMENT.md new file mode 100644 index 00000000..f2ed1ecd --- /dev/null +++ b/packages/mcp-server/LOCAL_DEVELOPMENT.md @@ -0,0 +1,121 @@ +# Local Development & Testing with ngrok + +This guide explains how to run and test the RushDB MCP server locally, including the OAuth authorization flow end-to-end using ngrok to expose your local servers publicly. + +## Prerequisites + +- [ngrok](https://ngrok.com/) installed and authenticated (`ngrok config add-authtoken `) +- pnpm installed +- Dependencies installed (`pnpm install` from the repo root) + +--- + +## 1. Start the Platform + +From the **repo root**, start the platform (API on port 3000 + dashboard on port 3005): + +```bash +pnpm dev +``` + +This runs `platform/core` (NestJS REST API) and `platform/dashboard` (React UI) concurrently. + +--- + +## 2. Expose the MCP Server with ngrok + +**Start ngrok before the MCP server** — you need the public URL first to set `MCP_RESOURCE_URL`. + +Only the MCP server (port 3001) needs to be publicly reachable. ChatGPT's servers call it directly to fetch OAuth metadata and exchange tokens. Everything else (consent page, login) happens in your browser, which can reach `localhost:3005` without a tunnel. + +```bash +ngrok http 3001 +``` + +Copy the `https://` URL from the output — you'll need it as `MCP_RESOURCE_URL` in the next step. + +--- + +## 3. Build and Run the MCP Server + +First build the MCP server: + +```bash +# from repo root +pnpm --filter @rushdb/mcp-server build + +# or from packages/mcp-server +pnpm build +``` + +Then start it in HTTP mode, substituting the ngrok URL you got in the previous step: + +```bash +MCP_TRANSPORT=http \ +RUSHDB_API_URL=http://localhost:3000/api/v1 \ +RUSHDB_OAUTH_ISSUER=http://localhost:3000 \ +MCP_RESOURCE_URL=https://.ngrok-free.app \ +RUSHDB_AES_256_ENCRYPTION_KEY=32SymbolStringForTokenEncryption \ +node packages/mcp-server/build/index.js +``` + +> **`MCP_RESOURCE_URL`** must be the ngrok URL from step 2. Every time ngrok restarts it issues a new URL (unless you have a paid static domain), so you'll need to restart the MCP server with the updated value. + +> **`RUSHDB_AES_256_ENCRYPTION_KEY`** must be exactly 32 characters. It is used to sign and verify JWT access tokens — use the same value the platform is configured with. + +--- + +## 4. Connect an AI Client + +### ChatGPT + +1. Open **ChatGPT** → Settings → Connected Apps → Add MCP connector +2. Enter the MCP server URL: `https://.ngrok-free.app/mcp` +3. ChatGPT will trigger the OAuth flow: + - Your browser is redirected to `http://localhost:3005/oauth/consent` — no tunnel needed for the dashboard + - Log in to RushDB (if prompted) + - Select the project to grant access to → click **Allow** +4. ChatGPT receives an access token and can now use the 31 RushDB tools. + +### MCP Inspector (quick local test, no OAuth) + +To test tools directly without an AI client: + +```bash +pnpm --filter @rushdb/mcp-server inspector +# or: npx @modelcontextprotocol/inspector packages/mcp-server/build/index.js +``` + +Set the transport to **HTTP** and point it at `http://localhost:3001/mcp`. The inspector lets you call any tool interactively and inspect request/response payloads. + +--- + +## 5. Verify the OAuth Discovery Endpoints + +Before connecting an AI client, confirm the discovery endpoints are reachable: + +```bash +# OpenID configuration (needed for RFC 8414 / OAuth 2.0 Authorization Server Metadata) +curl https://.ngrok-free.app/.well-known/openid-configuration | jq + +# MCP endpoint health +curl https://.ngrok-free.app/mcp +``` + +--- + +## 6. Revoke a Connection + +Connected applications can be revoked from the **Workspace Settings** page (`/workspace-settings`) in the dashboard under the "Connected Applications" section. + +--- + +## Environment Variable Reference + +| Variable | Description | Example | +|----------|-------------|---------| +| `MCP_TRANSPORT` | Transport mode. Use `http` for OAuth/remote clients | `http` | +| `RUSHDB_API_URL` | Base URL of the RushDB REST API | `http://localhost:3000/api/v1` | +| `RUSHDB_OAUTH_ISSUER` | OAuth issuer — must match the platform origin | `http://localhost:3000` | +| `MCP_RESOURCE_URL` | Public URL of this MCP server (used in OAuth metadata) | `https://xxxx.ngrok-free.app` | +| `RUSHDB_AES_256_ENCRYPTION_KEY` | 32-character key for JWT signing/verification | `32SymbolStringForTokenEncryption` | diff --git a/packages/mcp-server/README.md b/packages/mcp-server/README.md index a6617492..93bb9ea4 100644 --- a/packages/mcp-server/README.md +++ b/packages/mcp-server/README.md @@ -1,16 +1,16 @@ # RushDB MCP Server -A Model Context Protocol server providing access to RushDB's Labeled Meta Property Graph (LMPG) database. +A Model Context Protocol server providing AI agents and LLMs direct access to RushDB — the platform that turns any data into intelligence. Push records, events, AI outputs, and configs; RushDB structures, connects, and makes them queryable instantly. ## Features -- **Record Management**: Create, read, update, and delete records -- **Graph Operations**: Attach and detach relationships between records -- **Advanced Querying**: Search across records using RushDB's flexible query language -- **Label & Property Discovery**: Browse labels and properties in your database +- **Record Management**: Create, read, update, and delete structured knowledge records +- **Relationship Operations**: Attach and detach relationships between records +- **Advanced Querying**: Search across records using RushDB's flexible JSON query language +- **Label & Property Discovery**: Browse labels and properties in your knowledge base - **Bulk Operations**: Efficient bulk create and delete operations - **Data Export**: Export records to CSV format -- **LMPG Architecture**: Work with RushDB's revolutionary property-first graph model +- **AI Memory Layer**: Use RushDB as persistent, queryable memory for AI agents and workflows ## Quick Start diff --git a/packages/mcp-server/index.ts b/packages/mcp-server/index.ts index e919d8f8..5d04393a 100644 --- a/packages/mcp-server/index.ts +++ b/packages/mcp-server/index.ts @@ -14,6 +14,7 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js' import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js' +import { WebStandardStreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js' import { CallToolRequestSchema, ErrorCode, @@ -23,560 +24,908 @@ import { GetPromptRequestSchema } from '@modelcontextprotocol/sdk/types.js' import { ToolName, tools } from './tools.js' -import { FindLabels } from './tools/FindLabels.js' -import { CreateRecord } from './tools/CreateRecord.js' -import { UpdateRecord } from './tools/UpdateRecord.js' -import { DeleteRecord } from './tools/DeleteRecord.js' -import { FindRecords } from './tools/FindRecords.js' -import { GetRecord } from './tools/GetRecord.js' -import { AttachRelation } from './tools/AttachRelation.js' -import { DetachRelation } from './tools/DetachRelation.js' -import { FindRelationships } from './tools/FindRelationships.js' -import { BulkCreateRecords } from './tools/BulkCreateRecords.js' -import { BulkDeleteRecords } from './tools/BulkDeleteRecords.js' -import { ExportRecords } from './tools/ExportRecords.js' -import { OpenBrowser } from './tools/OpenBrowser.js' -import { HelpAddToClient } from './tools/HelpAddToClient.js' -import { SetRecord } from './tools/SetRecord.js' -import { FindOneRecord } from './tools/FindOneRecord.js' -import { FindUniqRecord } from './tools/FindUniqRecord.js' -import { DeleteRecordById } from './tools/DeleteRecordById.js' -import { PropertyValues } from './tools/PropertyValues.js' -import { FindProperties } from './tools/FindProperties.js' -import { FindPropertyById } from './tools/FindPropertyById.js' -import { DeleteProperty } from './tools/DeleteProperty.js' -import { TransactionBegin } from './tools/TransactionBegin.js' -import { TransactionCommit } from './tools/TransactionCommit.js' -import { TransactionRollback } from './tools/TransactionRollback.js' -import { TransactionGet } from './tools/TransactionGet.js' -import { GetSettings } from './tools/GetSettings.js' -import { GetRecordsByIds } from './tools/GetRecordsByIds.js' +import { findLabels } from './tools/findLabels.js' +import { createRecord } from './tools/createRecord.js' +import { updateRecord } from './tools/updateRecord.js' +import { deleteRecord } from './tools/deleteRecord.js' +import { findRecords } from './tools/findRecords.js' +import { getRecord } from './tools/getRecord.js' +import { attachRelation } from './tools/attachRelation.js' +import { detachRelation } from './tools/detachRelation.js' +import { findRelationships } from './tools/findRelationships.js' +import { bulkCreateRecords } from './tools/bulkCreateRecords.js' +import { bulkDeleteRecords } from './tools/bulkDeleteRecords.js' +import { exportRecords } from './tools/exportRecords.js' +import { helpAddToClient } from './tools/helpAddToClient.js' +import { setRecord } from './tools/setRecord.js' +import { findOneRecord } from './tools/findOneRecord.js' +import { findUniqRecord } from './tools/findUniqRecord.js' +import { deleteRecordById } from './tools/deleteRecordById.js' +import { propertyValues } from './tools/propertyValues.js' +import { findProperties } from './tools/findProperties.js' +import { findPropertyById } from './tools/findPropertyById.js' +import { deleteProperty } from './tools/deleteProperty.js' +import { getRecordsByIds } from './tools/getRecordsByIds.js' +import { findEmbeddingIndexes } from './tools/findEmbeddingIndexes.js' +import { createEmbeddingIndex } from './tools/createEmbeddingIndex.js' +import { deleteEmbeddingIndex } from './tools/deleteEmbeddingIndex.js' +import { getEmbeddingIndexStats } from './tools/getEmbeddingIndexStats.js' +import { semanticSearch } from './tools/semanticSearch.js' +import { getOntology } from './tools/getOntology.js' +import { getOntologyMarkdown } from './tools/getOntologyMarkdown.js' import SYSTEM_PROMPT from './systemPrompt.js' +import { getSearchQuerySpec } from './tools/getSearchQuerySpec.js' +import { requestContext, RequestContext } from './util/db.js' +import { resolveRequestContext, makeMcpAuthError } from './util/auth.js' + +// ─── MCP Server factory ─────────────────────────────────────────────────────── +// Each call creates an independent MCP Server instance with all handlers wired. +// In STDIO mode a single server is created once. +// In HTTP mode a new server is created per request (required because Server +// supports only one transport at a time). + +function createMcpServer(): Server { + const server = new Server( + { + name: 'rushdb-mcp-server', + version: '1.0.0' + }, + { + // `instructions` is sent in the MCP initialize response. + // Conforming clients (ChatGPT, Claude Desktop, etc.) inject this as + // system-level context before any tool call — it is the canonical way + // to deliver server-wide behavioural rules to the model automatically. + instructions: SYSTEM_PROMPT, + capabilities: { + tools: {}, + prompts: {} + } + } + ) -const server = new Server( - { - name: 'rushdb-mcp-server', - version: '1.0.0' - }, - { - capabilities: { - tools: { - list: true, - call: true - }, - prompts: {} + server.setRequestHandler(ListToolsRequestSchema, async () => { + return { + tools } - } -) + }) -server.setRequestHandler(ListToolsRequestSchema, async () => { - return { - tools - } -}) - -// Expose RushDB system prompt via MCP Prompts so clients can fetch and inject it -server.setRequestHandler(ListPromptsRequestSchema, async () => { - return { - prompts: [ - { - name: 'rushdb.queryBuilder', - description: - 'RushDB Query Builder system prompt: guides the model to discover labels/properties first and construct validated SearchQuery objects before calling find-related tools.', - arguments: [] - } - ] - } -}) + // Expose RushDB system prompt via MCP Prompts so clients can fetch and inject it + server.setRequestHandler(ListPromptsRequestSchema, async () => { + return { + prompts: [ + { + name: 'rushdb.queryBuilder', + description: + 'RushDB Query Builder system prompt: guides the model to discover labels/properties first and construct validated SearchQuery objects before calling find-related tools.', + arguments: [] + } + ] + } + }) -server.setRequestHandler(GetPromptRequestSchema, async (request: any) => { - const name = request.params.name as string - if (name !== 'rushdb.queryBuilder') { - throw new McpError(ErrorCode.InvalidRequest, `Unknown prompt: ${name}`) - } - return { - description: - 'RushDB Query Builder system prompt to enable discovery-first, schema-safe SearchQuery construction before find-related tool calls.', - messages: [ - { - role: 'user', - content: { - type: 'text', - text: SYSTEM_PROMPT + server.setRequestHandler(GetPromptRequestSchema, async (request: any) => { + const name = request.params.name as string + if (name !== 'rushdb.queryBuilder') { + throw new McpError(ErrorCode.InvalidRequest, `Unknown prompt: ${name}`) + } + return { + description: + 'RushDB Query Builder system prompt to enable discovery-first, schema-safe SearchQuery construction before find-related tool calls.', + messages: [ + { + role: 'user', + content: { + type: 'text', + text: SYSTEM_PROMPT + } } - } - ] - } -}) - -server.setRequestHandler(CallToolRequestSchema, async (request: any) => { - const toolName = request.params.name as ToolName - const args = request.params.arguments || {} - - try { - switch (toolName) { - case 'FindLabels': - const foundLabels = await FindLabels({ - where: args.where as Record | undefined, - limit: args.limit as number | undefined, - skip: args.skip as number | undefined, - orderBy: args.orderBy as Record | undefined - }) - return { - content: [ - { - type: 'text', - text: - foundLabels.length > 0 ? - foundLabels.map((l: any) => `${l.name}: ${l.count} records`).join('\n') - : 'No labels found' - } - ] + ] + } + }) + + server.setRequestHandler(CallToolRequestSchema, async (request: any) => { + const toolName = request.params.name as ToolName + const args = request.params.arguments || {} + + try { + switch (toolName) { + case 'getOntologyMarkdown': { + const md = await getOntologyMarkdown({ + labels: args.labels as string[] | undefined + }) + return { + content: [ + { + type: 'text', + text: md ?? 'No ontology data found.' + } + ] + } } - case 'CreateRecord': - const createResult = await CreateRecord({ - label: args.label as string, - data: args.data as Record, - transactionId: args.transactionId as string | undefined - }) - return { - content: [ - { - type: 'text', - text: `${createResult.message}\nID: ${createResult.id}` - } - ] + case 'getOntology': { + const ontology = await getOntology({ + labels: args.labels as string[] | undefined + }) + return { + content: [ + { + type: 'text', + text: ontology ? JSON.stringify(ontology, null, 2) : 'No ontology data found.' + } + ] + } } - case 'UpdateRecord': - const updateResult = await UpdateRecord({ - recordId: args.recordId as string, - label: args.label as string, - data: args.data as Record, - transactionId: args.transactionId as string | undefined - }) - return { - content: [ - { - type: 'text', - text: updateResult.message - } - ] - } + case 'findLabels': + const foundLabels = await findLabels({ + where: args.where as Record | undefined, + limit: args.limit as number | undefined, + skip: args.skip as number | undefined, + orderBy: args.orderBy as Record | undefined + }) + return { + content: [ + { + type: 'text', + text: + foundLabels.length > 0 ? + foundLabels.map((l: any) => `${l.name}: ${l.count} records`).join('\n') + : 'No labels found' + } + ] + } - case 'DeleteRecord': - const deleteResult = await DeleteRecord({ - recordId: args.recordId as string, - transactionId: args.transactionId as string | undefined - }) - return { - content: [ - { - type: 'text', - text: deleteResult.message - } - ] - } + case 'createRecord': + const createResult = await createRecord({ + label: args.label as string, + data: args.data as Record, + transactionId: args.transactionId as string | undefined + }) + return { + content: [ + { + type: 'text', + text: `${createResult.message}\nID: ${createResult.id}` + } + ] + } - case 'FindRecords': - const foundRecords = await FindRecords({ - labels: args.labels as string[] | undefined, - where: args.where as Record | undefined, - limit: args.limit as number | undefined, - skip: args.skip as number | undefined, - orderBy: args.orderBy as Record | undefined, - aggregate: args.aggregate as - | Record - | undefined, - groupBy: args.groupBy as string[] | undefined - }) - - const isAggregate = Boolean(args.aggregate) || Boolean(args.groupBy) - return { - content: [ - { - type: 'text', - text: - Array.isArray(foundRecords) && foundRecords.length === 0 ? - 'No matching records found.' - : JSON.stringify(foundRecords, null, 2) - } - ] - } + case 'updateRecord': + const updateResult = await updateRecord({ + recordId: args.recordId as string, + label: args.label as string, + data: args.data as Record, + transactionId: args.transactionId as string | undefined + }) + return { + content: [ + { + type: 'text', + text: updateResult.message + } + ] + } - case 'GetRecord': - const record = await GetRecord({ - recordId: args.recordId as string - }) - return { - content: [ - { - type: 'text', - text: JSON.stringify(record, null, 2) - } - ] - } + case 'deleteRecord': + const deleteResult = await deleteRecord({ + recordId: args.recordId as string, + transactionId: args.transactionId as string | undefined + }) + return { + content: [ + { + type: 'text', + text: deleteResult.message + } + ] + } - case 'AttachRelation': - const attachResult = await AttachRelation({ - sourceId: args.sourceId as string, - targetId: args.targetId as string | undefined, - targetIds: args.targetIds as string[] | undefined, - relationType: args.relationType as string | undefined, - direction: args.direction as 'outgoing' | 'incoming' | 'bidirectional' | undefined, - transactionId: args.transactionId as string | undefined - }) - return { - content: [ - { - type: 'text', - text: attachResult.message - } - ] - } + case 'findRecords': + const foundRecords = await findRecords({ + labels: args.labels as string[] | undefined, + where: args.where as Record | undefined, + limit: args.limit as number | undefined, + skip: args.skip as number | undefined, + orderBy: args.orderBy as Record | undefined, + aggregate: args.aggregate as + | Record + | undefined, + groupBy: args.groupBy as string[] | undefined + }) + + const isAggregate = Boolean(args.aggregate) || Boolean(args.groupBy) + const isEmpty = + isAggregate ? false : ( + Array.isArray((foundRecords as any)?.data) && (foundRecords as any).data.length === 0 + ) + return { + content: [ + { + type: 'text', + text: isEmpty ? 'No matching records found.' : JSON.stringify(foundRecords, null, 2) + } + ] + } - case 'DetachRelation': - const detachResult = await DetachRelation({ - sourceId: args.sourceId as string, - targetId: args.targetId as string | undefined, - targetIds: args.targetIds as string[] | undefined, - relationType: args.relationType as string | undefined, - direction: args.direction as 'outgoing' | 'incoming' | 'bidirectional' | undefined, - transactionId: args.transactionId as string | undefined - }) - return { - content: [ - { - type: 'text', - text: detachResult.message - } - ] - } + case 'getRecord': + const record = await getRecord({ + recordId: args.recordId as string + }) + return { + content: [ + { + type: 'text', + text: JSON.stringify(record, null, 2) + } + ] + } - case 'FindRelationships': - const relations = await FindRelationships({ - where: args.where as Record | undefined, - limit: args.limit as number | undefined, - skip: args.skip as number | undefined, - orderBy: args.orderBy as Record | undefined - }) - return { - content: [ - { - type: 'text', - text: relations.length > 0 ? JSON.stringify(relations, null, 2) : 'No relations found' - } - ] - } + case 'attachRelation': + const attachResult = await attachRelation({ + sourceId: args.sourceId as string, + targetId: args.targetId as string | undefined, + targetIds: args.targetIds as string[] | undefined, + relationType: args.relationType as string | undefined, + direction: args.direction as 'outgoing' | 'incoming' | 'bidirectional' | undefined, + transactionId: args.transactionId as string | undefined + }) + return { + content: [ + { + type: 'text', + text: attachResult.message + } + ] + } - case 'BulkCreateRecords': - const bulkCreateResult = await BulkCreateRecords({ - label: args.label as string, - data: args.data as Record[], - transactionId: args.transactionId as string | undefined - }) - return { - content: [ - { - type: 'text', - text: `${bulkCreateResult.message}\nIDs: ${bulkCreateResult.ids.join(', ')}` - } - ] - } + case 'detachRelation': + const detachResult = await detachRelation({ + sourceId: args.sourceId as string, + targetId: args.targetId as string | undefined, + targetIds: args.targetIds as string[] | undefined, + relationType: args.relationType as string | undefined, + direction: args.direction as 'outgoing' | 'incoming' | 'bidirectional' | undefined, + transactionId: args.transactionId as string | undefined + }) + return { + content: [ + { + type: 'text', + text: detachResult.message + } + ] + } - case 'BulkDeleteRecords': - const bulkDeleteResult = await BulkDeleteRecords({ - labels: args.labels as string[] | undefined, - where: args.where as Record, - transactionId: args.transactionId as string | undefined - }) - return { - content: [ - { - type: 'text', - text: bulkDeleteResult.message - } - ] - } + case 'findRelationships': + const relations = await findRelationships({ + where: args.where as Record | undefined, + limit: args.limit as number | undefined, + skip: args.skip as number | undefined, + orderBy: args.orderBy as Record | undefined + }) + return { + content: [ + { + type: 'text', + text: relations.length > 0 ? JSON.stringify(relations, null, 2) : 'No relations found' + } + ] + } - case 'ExportRecords': - const exportResult = await ExportRecords({ - labels: args.labels as string[] | undefined, - where: args.where as Record | undefined, - limit: args.limit as number | undefined - }) - return { - content: [ - { - type: 'text', - text: `Export completed at ${exportResult.dateTime}\n\n${exportResult.csv}` - } - ] - } + case 'bulkCreateRecords': + const bulkCreateResult = await bulkCreateRecords({ + label: args.label as string, + data: args.data as Record[], + transactionId: args.transactionId as string | undefined + }) + return { + content: [ + { + type: 'text', + text: `${bulkCreateResult.message}\nIDs: ${bulkCreateResult.ids.join(', ')}` + } + ] + } - case 'OpenBrowser': - const openBrowserResult = await OpenBrowser({ - url: args.url as string - }) - return { - content: [ - { - type: 'text', - text: openBrowserResult.message - } - ] - } + case 'bulkDeleteRecords': + const bulkDeleteResult = await bulkDeleteRecords({ + labels: args.labels as string[] | undefined, + where: args.where as Record, + transactionId: args.transactionId as string | undefined + }) + return { + content: [ + { + type: 'text', + text: bulkDeleteResult.message + } + ] + } - case 'HelpAddToClient': - const helpAddToClientResult = await HelpAddToClient() - return { - content: [ - { - type: 'text', - text: helpAddToClientResult.instructions - } - ] - } + case 'exportRecords': + const exportResult = await exportRecords({ + labels: args.labels as string[] | undefined, + where: args.where as Record | undefined, + limit: args.limit as number | undefined + }) + return { + content: [ + { + type: 'text', + text: `Export completed at ${exportResult.dateTime}\n\n${exportResult.csv}` + } + ] + } - case 'GetQueryBuilderPrompt': - return { - content: [ - { - type: 'text', - text: SYSTEM_PROMPT - } - ] - } + case 'helpAddToClient': + const helpAddToClientResult = await helpAddToClient() + return { + content: [ + { + type: 'text', + text: helpAddToClientResult.instructions + } + ] + } - case 'SetRecord': - const setResult = await SetRecord({ - recordId: args.recordId as string, - label: args.label as string, - data: args.data as Record, - transactionId: args.transactionId as string | undefined - }) - return { - content: [ - { - type: 'text', - text: setResult.message - } - ] - } + case 'getQueryBuilderPrompt': + return { + content: [ + { + type: 'text', + text: SYSTEM_PROMPT + } + ] + } - case 'FindOneRecord': - const foundOneRecord = await FindOneRecord({ - labels: args.labels as string[] | undefined, - where: args.where as Record | undefined - }) - return { - content: [ - { - type: 'text', - text: foundOneRecord ? JSON.stringify(foundOneRecord, null, 2) : 'No matching record found.' - } - ] + case 'getSearchQuerySpec': { + const spec = await getSearchQuerySpec() + return { + content: [ + { + type: 'text', + text: spec.spec + } + ] + } } - case 'FindUniqRecord': - const foundUniqRecord = await FindUniqRecord({ - labels: args.labels as string[] | undefined, - where: args.where as Record | undefined - }) - return { - content: [ - { - type: 'text', - text: foundUniqRecord ? JSON.stringify(foundUniqRecord, null, 2) : 'No unique record found.' - } - ] - } + case 'setRecord': + const setResult = await setRecord({ + recordId: args.recordId as string, + label: args.label as string, + data: args.data as Record, + transactionId: args.transactionId as string | undefined + }) + return { + content: [ + { + type: 'text', + text: setResult.message + } + ] + } - case 'DeleteRecordById': - const deleteByIdResult = await DeleteRecordById({ - recordId: args.recordId as string, - transactionId: args.transactionId as string | undefined - }) - return { - content: [ - { - type: 'text', - text: deleteByIdResult.message - } - ] - } + case 'findOneRecord': + const foundOneRecord = await findOneRecord({ + labels: args.labels as string[] | undefined, + where: args.where as Record | undefined + }) + return { + content: [ + { + type: 'text', + text: foundOneRecord ? JSON.stringify(foundOneRecord, null, 2) : 'No matching record found.' + } + ] + } - case 'PropertyValues': - const propertyValues = await PropertyValues({ - propertyId: args.propertyId as string, - query: args.query as string | undefined, - orderBy: args.orderBy as 'asc' | 'desc' | undefined, - limit: args.limit as number | undefined, - skip: args.skip as number | undefined - }) - return { - content: [ - { - type: 'text', - text: propertyValues ? JSON.stringify(propertyValues, null, 2) : 'No property values found' - } - ] - } + case 'findUniqRecord': + const foundUniqRecord = await findUniqRecord({ + labels: args.labels as string[] | undefined, + where: args.where as Record | undefined + }) + return { + content: [ + { + type: 'text', + text: foundUniqRecord ? JSON.stringify(foundUniqRecord, null, 2) : 'No unique record found.' + } + ] + } - case 'FindProperties': - const foundProperties = await FindProperties({ - where: args.where as Record | undefined, - limit: args.limit as number | undefined, - skip: args.skip as number | undefined, - orderBy: args.orderBy as Record | undefined - }) - return { - content: [ - { - type: 'text', - text: - foundProperties.length > 0 ? JSON.stringify(foundProperties, null, 2) : 'No properties found' - } - ] - } - case 'GetRecordsByIds': - const recordsByIds = await GetRecordsByIds({ recordIds: args.recordIds as string[] }) - return { - content: [ - { - type: 'text', - text: recordsByIds.count > 0 ? JSON.stringify(recordsByIds.data, null, 2) : 'No records found' - } - ] + case 'deleteRecordById': + const deleteByIdResult = await deleteRecordById({ + recordId: args.recordId as string, + transactionId: args.transactionId as string | undefined + }) + return { + content: [ + { + type: 'text', + text: deleteByIdResult.message + } + ] + } + + case 'propertyValues': { + const pvResult = await propertyValues({ + propertyId: args.propertyId as string, + query: args.query as string | undefined, + orderBy: args.orderBy as 'asc' | 'desc' | undefined, + limit: args.limit as number | undefined, + skip: args.skip as number | undefined + }) + return { + content: [ + { + type: 'text', + text: pvResult ? JSON.stringify(pvResult, null, 2) : 'No property values found' + } + ] + } } - case 'FindPropertyById': - const foundProperty = await FindPropertyById({ - propertyId: args.propertyId as string - }) - return { - content: [ - { - type: 'text', - text: foundProperty ? JSON.stringify(foundProperty, null, 2) : 'Property not found' - } - ] + case 'findProperties': + const foundProperties = await findProperties({ + where: args.where as Record | undefined, + limit: args.limit as number | undefined, + skip: args.skip as number | undefined, + orderBy: args.orderBy as Record | undefined + }) + return { + content: [ + { + type: 'text', + text: + foundProperties.length > 0 ? + JSON.stringify(foundProperties, null, 2) + : 'No properties found' + } + ] + } + case 'getRecordsByIds': + const recordsByIds = await getRecordsByIds({ recordIds: args.recordIds as string[] }) + return { + content: [ + { + type: 'text', + text: recordsByIds.count > 0 ? JSON.stringify(recordsByIds.data, null, 2) : 'No records found' + } + ] + } + + case 'findPropertyById': + const foundProperty = await findPropertyById({ + propertyId: args.propertyId as string + }) + return { + content: [ + { + type: 'text', + text: foundProperty ? JSON.stringify(foundProperty, null, 2) : 'Property not found' + } + ] + } + + case 'deleteProperty': + const deletePropertyResult = await deleteProperty({ + propertyId: args.propertyId as string + }) + return { + content: [ + { + type: 'text', + text: deletePropertyResult.message + } + ] + } + + case 'findEmbeddingIndexes': { + const indexes = await findEmbeddingIndexes() + return { + content: [ + { + type: 'text', + text: + indexes && indexes.length > 0 ? + JSON.stringify(indexes, null, 2) + : 'No embedding indexes found' + } + ] + } } - case 'DeleteProperty': - const deletePropertyResult = await DeleteProperty({ - propertyId: args.propertyId as string - }) - return { - content: [ - { - type: 'text', - text: deletePropertyResult.message - } - ] + case 'createEmbeddingIndex': { + const newIndex = await createEmbeddingIndex({ + label: args.label as string, + propertyName: args.propertyName as string + }) + return { + content: [ + { + type: 'text', + text: newIndex ? JSON.stringify(newIndex, null, 2) : 'Embedding index created' + } + ] + } } - case 'TransactionBegin': - const beginResult = await TransactionBegin({ - ttl: args.ttl as number | undefined - }) - return { - content: [ - { - type: 'text', - text: `${beginResult.message}\nTransaction ID: ${beginResult.transactionId}` - } - ] + case 'deleteEmbeddingIndex': { + const deleteIndexResult = await deleteEmbeddingIndex({ + indexId: args.indexId as string + }) + return { + content: [ + { + type: 'text', + text: + deleteIndexResult ? JSON.stringify(deleteIndexResult, null, 2) : 'Embedding index deleted' + } + ] + } } - case 'TransactionCommit': - const commitResult = await TransactionCommit({ - transactionId: args.transactionId as string - }) - return { - content: [ - { - type: 'text', - text: commitResult.message - } - ] + case 'getEmbeddingIndexStats': { + const stats = await getEmbeddingIndexStats({ + indexId: args.indexId as string + }) + return { + content: [ + { + type: 'text', + text: stats ? JSON.stringify(stats, null, 2) : 'No stats available' + } + ] + } } - case 'TransactionRollback': - const rollbackResult = await TransactionRollback({ - transactionId: args.transactionId as string - }) - return { - content: [ - { - type: 'text', - text: rollbackResult.message - } - ] + case 'semanticSearch': { + const searchResults = await semanticSearch({ + propertyName: args.propertyName as string, + query: args.query as string, + labels: args.labels as string[], + where: args.where as Record | undefined, + topK: args.topK as number | undefined, + limit: args.limit as number | undefined, + skip: args.skip as number | undefined + }) + return { + content: [ + { + type: 'text', + text: + searchResults && searchResults.length > 0 ? + JSON.stringify(searchResults, null, 2) + : 'No matching records found.' + } + ] + } } - case 'TransactionGet': - const transactionInfo = await TransactionGet({ - transactionId: args.transactionId as string - }) + default: + throw new McpError(ErrorCode.MethodNotFound, 'Tool not found') + } + } catch (error) { + console.error('Error executing tool:', error) + + if (error instanceof McpError) { + throw error + } + + const message = error instanceof Error ? error.message : String(error) + + // Check if error is related to API endpoint or missing env vars + if ( + message.includes('RUSHDB_API_KEY') || + message.includes('Invalid URL') || + message.includes('Failed to fetch') || + message.includes('Network error') || + !process.env.RUSHDB_API_KEY + ) { return { content: [ { type: 'text', - text: JSON.stringify(transactionInfo, null, 2) + text: "It seems like you haven't configured your RushDB credentials. Would you like me to open the RushDB dashboard for you so you can sign up and get your credentials?" } ] } + } - case 'GetSettings': - const settings = await GetSettings() + // Map raw HTTP status codes from the SDK fetcher into actionable messages + const httpStatus = /^(\d{3})$/.exec(message.trim())?.[1] + if (httpStatus) { + const status = Number(httpStatus) + let hint: string + if (status === 400) { + hint = + 'Bad request (400): the query or payload is invalid. Check field names, operators, and required arguments. Call getSearchQuerySpec for the correct SearchQuery syntax.' + } else if (status === 401) { + hint = + 'Unauthorized (401): the API key is missing or invalid. Ask the user to verify RUSHDB_API_KEY.' + } else if (status === 403) { + hint = 'Forbidden (403): access denied. The API key does not have permission for this operation.' + } else if (status === 404) { + hint = + 'Not found (404): the requested resource does not exist. Verify that record IDs, project IDs, and label names are correct (labels are case-sensitive). Call getOntologyMarkdown to rediscover the available schema.' + } else if (status === 409) { + hint = + 'Conflict (409): the operation conflicts with existing data. Check for duplicate keys or conflicting constraints.' + } else if (status === 422) { + hint = + 'Unprocessable entity (422): the server could not process the request. Check field types and required fields match the schema.' + } else if (status >= 500) { + hint = `Server error (${status}): an unexpected error occurred on the RushDB server. Retry the operation or contact support if it persists.` + } else { + hint = `HTTP error ${status} from RushDB API.` + } return { - content: [ - { - type: 'text', - text: JSON.stringify(settings, null, 2) - } - ] + content: [{ type: 'text', text: hint }] } + } - default: - throw new McpError(ErrorCode.MethodNotFound, 'Tool not found') - } - } catch (error) { - console.error('Error executing tool:', error) - - // Check if error is related to API endpoint or missing env vars - if ( - error instanceof Error && - (error.message.includes('RUSHDB_API_KEY') || - error.message.includes('Invalid URL') || - error.message.includes('Failed to fetch') || - error.message.includes('Network error') || - !process.env.RUSHDB_API_KEY) - ) { - // Open browser for configuration + // Generic error fallback return { content: [ { type: 'text', - text: "It seems like you haven't configured your RushDB credentials. Would you like me to open the RushDB dashboard for you so you can sign up and get your credentials?" + text: `Error: ${message}` } ] } } + }) + + return server +} // end createMcpServer() + +// ─── Launch ─────────────────────────────────────────────────────────────────── + +const mcpTransport = process.env.MCP_TRANSPORT || 'stdio' + +if (mcpTransport === 'http') { + // ── HTTP / OAuth mode ── + // Uses Hono + StreamableHTTPServerTransport. + // Each request gets its own MCP Server instance + per-request RushDB client. + const { Hono } = await import('hono') + const { serve } = await import('@hono/node-server') + + const httpApp = new Hono() + const port = Number(process.env.PORT ?? 3001) + const resourceUrl = process.env.MCP_RESOURCE_URL || `http://localhost:${port}` + const oauthIssuer = process.env.RUSHDB_OAUTH_ISSUER || 'https://api.rushdb.com' + + // Proxy all OAuth endpoints (authorize, token, etc.) through to platform/core, + // rewriting Location headers and JSON bodies so redirect URLs stay on the tunnel. + // RUSHDB_OAUTH_PRODUCTION_ISSUER lets you override the "real" public issuer URL + // that the platform might embed in its own discovery docs (defaults to api.rushdb.com). + const productionIssuer = process.env.RUSHDB_OAUTH_PRODUCTION_ISSUER || 'https://api.rushdb.com' + const rewriteUpstream = (s: string) => + s.replaceAll(oauthIssuer, resourceUrl).replaceAll(productionIssuer, resourceUrl) + + // Fetch and cache the proxied oauth-authorization-server doc (rewritten to resourceUrl). + // Used both for the oauth-authorization-server endpoint AND to synthesize + // /.well-known/openid-configuration (which platform/core doesn't implement). + let cachedAuthServerMeta: Record | null = null + const getAuthServerMeta = async (): Promise> => { + if (cachedAuthServerMeta) return cachedAuthServerMeta + const res = await fetch(`${oauthIssuer}/.well-known/oauth-authorization-server`) + if (!res.ok) throw new Error(`upstream ${res.status}`) + const data = await res.json() + const upstreamIssuer: string = (data as any).issuer || oauthIssuer + cachedAuthServerMeta = JSON.parse( + rewriteUpstream(JSON.stringify(data).replaceAll(upstreamIssuer, resourceUrl)) + ) + return cachedAuthServerMeta! + } - // For other errors, return the error message - return { - content: [ - { - type: 'text', - text: `Error: ${error instanceof Error ? error.message : String(error)}` - } - ] + // /.well-known/oauth-authorization-server — proxy + rewrite + const serveAuthServerMeta = async (c: any) => { + try { + return c.json(await getAuthServerMeta()) + } catch { + return c.json({ error: 'upstream_unavailable' }, 502 as any) } } -}) -const transport = new StdioServerTransport() -await server.connect(transport) + // /.well-known/openid-configuration — synthesized from oauth-authorization-server + // (platform/core doesn't expose this endpoint but ChatGPT requires it to discover + // the registration_endpoint for RFC 7591 Dynamic Client Registration). + const serveOpenIdConfig = async (c: any) => { + try { + const as = await getAuthServerMeta() + const oidc: Record = { + issuer: as.issuer ?? resourceUrl, + authorization_endpoint: as.authorization_endpoint, + token_endpoint: as.token_endpoint, + jwks_uri: as.jwks_uri, + registration_endpoint: as.registration_endpoint, + scopes_supported: as.scopes_supported, + response_types_supported: as.response_types_supported ?? ['code'], + grant_types_supported: as.grant_types_supported, + code_challenge_methods_supported: as.code_challenge_methods_supported ?? ['S256'], + token_endpoint_auth_methods_supported: as.token_endpoint_auth_methods_supported ?? ['none'], + subject_types_supported: ['public'], + id_token_signing_alg_values_supported: ['RS256'], + service_documentation: as.service_documentation + } + // remove undefined values + Object.keys(oidc).forEach((k) => oidc[k] === undefined && delete oidc[k]) + return c.json(oidc) + } catch { + return c.json({ error: 'upstream_unavailable' }, 502 as any) + } + } + + const proxyOAuthEndpoint = async (c: any) => { + const incomingUrl = new URL(c.req.url) + const upstreamUrl = new URL(`${oauthIssuer}${c.req.path}`) + incomingUrl.searchParams.forEach((v, k) => upstreamUrl.searchParams.set(k, v)) + + const method = c.req.method as string + const incomingHeaders = Object.fromEntries( + [...c.req.raw.headers.entries()].filter(([k]) => { + const lower = k.toLowerCase() + // Drop hop-by-hop and host headers that would confuse the upstream + return ![ + 'host', + 'connection', + 'keep-alive', + 'proxy-authorization', + 'te', + 'trailers', + 'transfer-encoding', + 'upgrade' + ].includes(lower) + }) + ) + const upstreamRes = await fetch(upstreamUrl.toString(), { + method, + headers: incomingHeaders, + body: method !== 'GET' && method !== 'HEAD' ? await c.req.text() : undefined, + redirect: 'manual' + }) + + const responseText = rewriteUpstream(await upstreamRes.text()) + const headers: Record = {} + upstreamRes.headers.forEach((v, k) => { + const lower = k.toLowerCase() + // Strip content-length: rewriteUpstream changes body size; let the runtime recompute it. + // Strip transfer-encoding: chunked encoding from upstream must not propagate. + if (lower === 'content-length' || lower === 'transfer-encoding') return + headers[k] = lower === 'location' ? rewriteUpstream(v) : v + }) + return new Response(responseText || null, { status: upstreamRes.status, headers }) + } + + // OAuth / OpenID discovery — intercept ANY path ending in a well-known suffix + // before the /oauth/* catch-all claims it. This handles variants like: + // /.well-known/openid-configuration + // /mcp/.well-known/openid-configuration + // /oauth/token/.well-known/openid-configuration ← ChatGPT appends to token_endpoint + // /.well-known/oauth-authorization-server + // /oauth/token/.well-known/oauth-authorization-server + // etc. + httpApp.use('*', async (c, next) => { + const p = c.req.path + if (c.req.method === 'GET') { + if (p.endsWith('/.well-known/openid-configuration') || p === '/.well-known/openid-configuration') { + return serveOpenIdConfig(c) + } + if ( + p.endsWith('/.well-known/oauth-authorization-server') || + p === '/.well-known/oauth-authorization-server' + ) { + return serveAuthServerMeta(c) + } + } + return next() + }) + + // Proxy OAuth endpoints (authorize / token / JWKS / register / etc.) + httpApp.all('/oauth/*', proxyOAuthEndpoint) + httpApp.get('/api/v1/dashboard/mcp-oauth/*', proxyOAuthEndpoint) + // Proxy /.well-known/* paths not handled above (e.g. jwks.json) directly to platform/core + httpApp.get('/.well-known/*', proxyOAuthEndpoint) + + // Protected resource metadata — all path variants ChatGPT probes. + // Points authorization_servers at *this* server so ChatGPT follows the proxy routes. + const protectedResourceDoc = { + resource: resourceUrl, + authorization_servers: [resourceUrl], + scopes_supported: ['projects:read', 'records:read', 'records:write'], + bearer_methods_supported: ['header'], + resource_documentation: 'https://docs.rushdb.com/mcp-server' + } + for (const path of [ + '/.well-known/oauth-protected-resource', + '/.well-known/oauth-protected-resource/mcp', + '/mcp/.well-known/oauth-protected-resource' + ]) { + httpApp.get(path, (c) => c.json(protectedResourceDoc)) + } + + // MCP Streamable HTTP endpoint + httpApp.post('/mcp', async (c) => { + const authHeader = c.req.header('Authorization') || '' + const bearer = authHeader.replace(/^Bearer\s+/i, '').trim() + const projectId = c.req.query('project_id') + const resourceMetadataUrl = `${resourceUrl}/.well-known/oauth-protected-resource` + + if (!bearer) { + c.header( + 'WWW-Authenticate', + `Bearer resource_metadata="${resourceMetadataUrl}", error="unauthorized", error_description="No token provided"` + ) + return c.json( + makeMcpAuthError( + resourceMetadataUrl, + 'unauthorized', + 'No token provided. Link your RushDB account to continue.' + ), + 401 + ) + } + + let ctx: RequestContext + try { + ctx = await resolveRequestContext(bearer, projectId) + } catch (e) { + c.header( + 'WWW-Authenticate', + `Bearer resource_metadata="${resourceMetadataUrl}", error="invalid_token", error_description="Token verification failed"` + ) + return c.json( + makeMcpAuthError( + resourceMetadataUrl, + 'invalid_token', + 'Token verification failed. Please re-link your RushDB account.' + ), + 401 + ) + } + + const mcpServer = createMcpServer() + // WebStandardStreamableHTTPServerTransport works directly with Fetch API + // Request/Response, which is exactly what Hono provides via c.req.raw. + // Create a fresh transport per request (stateless — sessionIdGenerator: undefined). + // enableJsonResponse: true returns a single JSON response instead of SSE stream. + const transport = new WebStandardStreamableHTTPServerTransport({ enableJsonResponse: true }) + + // Run inside per-request AsyncLocalStorage context so all tool handlers + // can access db, scopes, etc. via requestContext.getStore(). + return new Promise((resolve, reject) => { + requestContext.run(ctx, async () => { + try { + await mcpServer.connect(transport) + const response = await transport.handleRequest(c.req.raw) + resolve(response) + } catch (e) { + reject(e) + } + }) + }).catch((err) => { + console.error('[MCP HTTP] transport error:', err) + return new Response(JSON.stringify({ error: 'internal_error', message: String(err) }), { + status: 500, + headers: { 'Content-Type': 'application/json' } + }) + }) + }) + + // GET /mcp — required for SSE-based MCP clients (optional but good practice) + httpApp.get('/mcp', (c) => c.json({ error: 'method_not_allowed', error_description: 'Use POST /mcp' }, 405)) + + serve({ fetch: httpApp.fetch, port }, (info) => { + process.stderr.write(`RushDB MCP HTTP server running on port ${info.port}\n`) + process.stderr.write(`Resource URL: ${resourceUrl}\n`) + process.stderr.write(`Set MCP_TRANSPORT=http to enable this mode\n`) + }) +} else { + // ── STDIO mode (default, unchanged) ── + const server = createMcpServer() + const transport = new StdioServerTransport() + await server.connect(transport) +} diff --git a/packages/mcp-server/package.json b/packages/mcp-server/package.json index 8d7f1584..ae2771ee 100644 --- a/packages/mcp-server/package.json +++ b/packages/mcp-server/package.json @@ -6,11 +6,12 @@ "model-context-protocol", "mcp-server", "rushdb", - "database", - "graph-database", - "neo4j", + "ai-platform", + "knowledge-engine", + "application-platform", "llm", "ai-agents", + "ai-memory", "tools" ], "license": "Apache-2.0", @@ -35,8 +36,11 @@ }, "dependencies": { "@rushdb/javascript-sdk": "workspace:*", - "@modelcontextprotocol/sdk": "1.20.0", + "@modelcontextprotocol/sdk": "1.27.1", + "@hono/node-server": "^1.13.0", "dotenv": "^16.4.7", + "hono": "^4.7.0", + "jose": "^5.9.0", "jsonschema": "^1.5.0" }, "devDependencies": { diff --git a/packages/mcp-server/searchQuerySpec.ts b/packages/mcp-server/searchQuerySpec.ts new file mode 100644 index 00000000..a067398b --- /dev/null +++ b/packages/mcp-server/searchQuerySpec.ts @@ -0,0 +1,536 @@ +// Complete RushDB SearchQuery specification. +// Returned by the getSearchQuerySpec tool so the spec lives outside the system +// prompt and is loaded into context only when needed — keeping the system prompt +// short and letting each LLM call attend to the full reference as a focused tool +// result rather than a distant memory from session start. + +export const SEARCH_QUERY_SPEC = `━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +RUSHDB SEARCHQUERY — COMPLETE REFERENCE +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +SearchQuery shape: + labels? string[] — filter by record type(s); multi-label = OR + where? object — filter conditions; see §1 + aggregate? object — aggregation map; see §2 + groupBy? string[] — shapes aggregate output; see §3 + orderBy? string|object — 'asc'|'desc' or { field:'asc'|'desc' } + limit? number — max root records (default 100; max 1000) + skip? number — pagination offset + +CRITICAL LIMITS +• NEVER include limit when aggregate is present (sum/avg/min/max/count/collect/timeBucket). + limit restricts the record scan → results are mathematically wrong. + e.g. "total budget of all 33 projects" with limit:10 returns only the sum of the first 10. +• Self-group and dimensional groupBy queries: omit limit entirely (or scope to root records only). +• limit is valid only for listing/browsing queries or per-record flat aggregation (one row per root record). + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +§1) WHERE — COMPLETE FILTER & TRAVERSAL REFERENCE +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +The where clause mechanism: when a nested object key is NOT a criteria operator (like $gt, +$contains, etc.) and NOT a flat value, RushDB interprets that key as the LABEL of a related +record to traverse. + +── PRIMITIVE VALUE MATCHING ────────────────────────────────────────── +Direct equality, all types: + name: "John Doe" // exact string (case-sensitive equality) + isActive: true // boolean + age: 30 // number + created: "2023-01-01T00:00:00Z" // ISO 8601 datetime + +── STRING OPERATORS ───────────────────────────────────────────────── + name: { $contains: "John" } // substring match (case-insensitive) + name: { $startsWith: "J" } // prefix match (case-insensitive) + name: { $endsWith: "son" } // suffix match (case-insensitive) + name: { $ne: "deleted" } // not equal + status: { $in: ["active","pending"] } // matches any value in array + status: { $nin: ["deleted","archived"] } // matches none of these values + +── NUMBER OPERATORS ────────────────────────────────────────────────── + age: { $gt: 18 } // greater than + age: { $gte: 21 } // greater than or equal + age: { $lt: 65 } // less than + age: { $lte: 64 } // less than or equal + age: { $ne: 18 } // not equal + age: { $in: [20,30,40] } // matches any of these numbers + age: { $nin: [20,30,40] } // matches none of these numbers + +── BOOLEAN OPERATORS ──────────────────────────────────────────────── + isActive: true // direct match + isActive: { $ne: false } // not equal (matches true or unset) + +── DATETIME OPERATORS ─────────────────────────────────────────────── + NEVER use plain date strings with comparison operators like $gt/$gte/$lt/$lte. + use component objects instead: + + // ISO 8601 exact match or equality only: + created: "2023-01-01T00:00:00Z" + created: { $in: ["2023-01-01T00:00:00Z", "2023-02-01T00:00:00Z"] } + + // Component matching (exact point in time): + created: { $year: 2023, $month: 1, $day: 1 } + // Available components: $year $month $day $hour $minute $second $millisecond $microsecond $nanosecond + + // Range comparisons — ALWAYS use component objects: + Year "in 1994": { field: { $gte: { $year:1994 }, $lt: { $year:1995 } } } + Month "Jan 1994": { field: { $gte: { $year:1994,$month:1 }, $lt: { $year:1994,$month:2 } } } + Day "1994-03-15": { field: { $gte: { $year:1994,$month:3,$day:15 }, $lt: { $year:1994,$month:3,$day:16 } } } + Decade "1990s": { field: { $gte: { $year:1990 }, $lt: { $year:2000 } } } + Relative ("last 7 days", "this month"): compute ISO UTC boundary → use ISO string with $gte. + + // Month+day WITHOUT year: unsupported — ask the user for a year. Do not mention internal reasons. + +── VECTOR SIMILARITY (aggregate only) ─────────────────────────────── + // $vector is NOT a where operator. Use it only inside aggregate: + aggregate: { + similarity: { + fn: "vector.similarity.cosine", // cosine | euclidean + field: "embedding", + query: [1, 2, 3, 4, 5], + alias: "$record" + } + } + +── FIELD EXISTENCE & TYPE ─────────────────────────────────────────── + phoneNumber: { $exists: true } // only records that have this field (not null/empty) + phoneNumber: { $exists: false } // only records that do NOT have this field + age: { $type: "number" } // "string"|"number"|"boolean"|"datetime"|"null"|"vector" + +── LOGICAL GROUPING OPERATORS ─────────────────────────────────────── + // Implicit $and (multiple keys at same level = AND): + where: { name: { $startsWith: "J" }, age: { $gte: 21 } } + + // Explicit versions: + $and: [ { name: { $startsWith: "J" } }, { age: { $gte: 21 } } ] + $or: [ { name: { $startsWith: "J" } }, { age: { $gte: 21 } } ] + $not: { status: "deleted" } + $nor: [ { status: "deleted" }, { status: "archived" } ] + $xor: [ { isPremium: true }, { hasFreeTrialAccess: true } ] // exactly one must match + + // Nested logical grouping: + $or: [ + { status: "active" }, + { $and: [ { status: "pending" }, { createdAt: { $gte: "2023-01-01T00:00:00Z" } } ] } + ] + +── RELATIONSHIP TRAVERSAL ──────────────────────────────────────────── +Traversal rule: ANY top-level key that reads as a label name (ALL_CAPS style) is interpreted +as a related-record traversal, not a field filter. Uses OPTIONAL MATCH in Cypher — records +are included even if the related record doesn't exist UNLESS you explicitly filter for it. + +Basic (filter by related record properties): + where: { + name: "Tech Corp", + DEPARTMENT: { // traverse to related DEPARTMENT records + name: "Engineering", + headcount: { $gte: 10 } + } + } + +Multi-level nesting (path): + where: { + DEPARTMENT: { + name: "Engineering", + PROJECT: { // DEPARTMENT → PROJECT + name: "Database", + EMPLOYEE: { role: "Developer" } // PROJECT → EMPLOYEE + } + } + } + +⚠ TRAVERSAL SYNTAX — COMMON HALLUCINATION (ALWAYS WRONG): + The operators $label / $direction / $as / $of / $through DO NOT EXIST. + Never write: { "employee": { "$label": "EMPLOYEE", "$as": "$emp", "$direction": "out" } } + The key IS the label. There is no $label operand. Alias is always $alias, not $as. + WRONG: where: { employee: { $label:'EMPLOYEE', $direction:'out', $as:'$emp' } } + CORRECT: where: { EMPLOYEE: { $alias:'$emp' } } // key = label; alias via $alias only + +$alias — name a traversed node for use in aggregate/groupBy: + where: { + DEPARTMENT: { + $alias: '$department', + PROJECT: { + $alias: '$project', + EMPLOYEE: { $alias: '$employee' } + } + } + } + +$relation — constrain relationship type and/or direction: + where: { + POST: { + $relation: { type: 'AUTHORED', direction: 'in' }, // full form + title: { $contains: 'Graph' } + } + } + Shorthand (type only): $relation: 'AUTHORED' + direction options: 'in' | 'out' (omit = any direction) + +$id — filter by record ID (supports $in, $nin, string operators): + where: { $id: { $in: ['id1','id2'] } } + where: { EMPLOYEE: { $id: 'specific-id' } } + +── LOGICAL GROUPING WITH RELATIONSHIPS ────────────────────────────── +Logical operators can wrap entire relationship blocks: + where: { + $or: [ + { DEPARTMENT: { name: "Engineering" } }, + { DEPARTMENT: { name: "Product" } } + ] + } + where: { + $and: [ + { DEPARTMENT: { name: "Engineering" } }, + { PROJECT: { budget: { $gte: 10000 } } } + ] + } + where: { + name: "Tech Corp", + $or: [ + { DEPARTMENT: { name: "Engineering" } }, + { DEPARTMENT: { name: "Product", $not: { PROJECT: { status: "Canceled" } } } } + ] + } + +Logical operators INSIDE relationship blocks: + where: { + DEPARTMENT: { + $or: [ { name: "Engineering" }, { name: "Product" } ], + PROJECT: { + $and: [ { budget: { $gte: 10000 } }, { status: { $ne: "Canceled" } } ] + } + } + } + +── KEY BEHAVIORAL NOTES ───────────────────────────────────────────── + • Field names are case-sensitive. + • Missing fields are NOT matched — { active: true } skips records without an 'active' field. + • String operators ($contains, $startsWith, $endsWith) are case-insensitive. + • Array fields: condition satisfied if ANY element matches (tags:"typescript" matches ["js","typescript"]). + • Relationship traversal uses OPTIONAL MATCH — records are returned even if no related record + exists, unless you add a property filter on that related block. + • Logical operators work at ANY nesting level, including inside relationship blocks. + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +§2) AGGREGATE — FUNCTIONS & INLINE REFS +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +Every aggregate key maps to either an INLINE REF or an AGGREGATION FUNCTION. + +INLINE REF (copy a field value into the output row — no fn): + "outputKey": "$alias.fieldName" + Examples: + "companyName": "$record.name" + "projectBudget": "$record.budget" + +AGGREGATION FUNCTIONS (alias defaults to '$record' if omitted): + fn:'count' — count matching records; optional field + unique:bool + fn:'sum' — { fn:'sum', field:'salary', alias:'$employee' } + fn:'avg' — { fn:'avg', field:'salary', alias:'$employee', precision:2 } + fn:'min' — { fn:'min', field:'salary', alias:'$employee' } + fn:'max' — { fn:'max', field:'salary', alias:'$employee' } + fn:'collect' — gather into array; see §4 + fn:'timeBucket' — temporal bucketing; see §5 + + alias: '$record' for root-label fields; the $alias string declared in where for related nodes. + EVERY fn-based aggregate entry MUST include alias. + +FLAT CROSS-LABEL EXAMPLE (PROJECT root + EMPLOYEE metrics): + labels: ['PROJECT'], + where: { budget:{ $lte:10000000 }, EMPLOYEE:{ $alias:'$employee' } }, + aggregate: { + projectName: '$record.name', + projectBudget: '$record.budget', + headcount: { fn:'count', unique:true, alias:'$employee' }, + totalWage: { fn:'sum', field:'salary', alias:'$employee' }, + avgSalary: { fn:'avg', field:'salary', precision:0, alias:'$employee' }, + minSalary: { fn:'min', field:'salary', alias:'$employee' }, + maxSalary: { fn:'max', field:'salary', alias:'$employee' } + } + → one row per PROJECT record, each with employee stats embedded. + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +§3) GROUPBY — TWO MODES +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +MODE A — DIMENSIONAL (one row per distinct value — "$alias.propertyName" format): + aggregate: { count:{ fn:'count', alias:'$record' }, avg:{ fn:'avg', field:'total', alias:'$record' } }, + groupBy: ['$record.status'], + orderBy: { count:'desc' } + Output rows: [{ "status":"pending","count":120,"avg":310.42 }, ...] + Note: group key appears WITHOUT alias prefix in output ($record.status → "status"). + Multiple keys = pivot: groupBy: ['$record.category','$record.active'] → one row per (category,active) pair. + +MODE B — SELF-GROUP (collapse everything to ONE row with global metric(s)): + Put the AGGREGATION KEY NAMES themselves in groupBy (not property paths): + aggregate: { totalBudget:{ fn:'sum', field:'budget', alias:'$record' } }, + groupBy: ['totalBudget'] + Output: [{ "totalBudget": 1875251446 }] + Multiple KPIs: groupBy: ['totalRevenue','orderCount'] → [{ "totalRevenue":987654, "orderCount":420 }] + +⚠ LATE-ORDERING RULE — CRITICAL FOR CORRECT TOTALS: + When orderBy references an aggregated key, the engine applies ORDER BY + LIMIT + AFTER the aggregation (full-scan first, then paginate). + When orderBy is absent or references a raw field, LIMIT is applied BEFORE aggregation + → only the first N raw records are aggregated → WRONG totals. + FIX: for self-group and any pure metric query, always add orderBy on the aggregation key: + aggregate:{ total:{ fn:'sum', field:'amount', alias:'$record' } }, + groupBy:['total'], + orderBy:{ total:'asc' } ← triggers late ordering; ensures full dataset is summed + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +§4) COLLECT — ARRAY GATHERING & NESTED STRUCTURES +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +Basic collect (gather a field into an array): + { fn:'collect', field:'name', alias:'$employee', unique:true } + +Collect options: + field? — specific field; omit to collect entire records + unique? — deduplicate (default true) + limit? — max items in the array + skip? — skip N items in the collected array + orderBy? — sort collected items: { salary:'desc' } + +NESTED COLLECT — only fn:'collect' is valid at nested levels (Cypher limitation): + labels: ['COMPANY'], + where: { + DEPARTMENT: { $alias:'$dept', + PROJECT: { $alias:'$proj', + EMPLOYEE: { $alias:'$emp', dob:{ $lte:{ $year:1994 } } } + } + } + }, + aggregate: { + company: '$record.name', + departments: { + fn:'collect', alias:'$dept', + aggregate: { + projects: { + fn:'collect', alias:'$proj', orderBy:{ projectName:'asc' }, + aggregate: { + employees: { fn:'collect', alias:'$emp', orderBy:{ salary:'desc' }, limit:3 } + } + } + } + } + } + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +§5) TIMEBUCKET — TIME-SERIES BUCKETING +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + fn:'timeBucket', field:'createdAt', granularity:'day'|'week'|'month'|'quarter'|'year' + Custom N-sized windows: granularity:'months'|'hours'|'minutes'|'seconds'|'years', size:N + +Examples: + Daily counts: + aggregate:{ day:{ fn:'timeBucket', field:'createdAt', granularity:'day', alias:'$record' }, count:{ fn:'count', alias:'$record' } }, + groupBy:['day'], orderBy:{ day:'asc' } + Monthly revenue: + aggregate:{ month:{ fn:'timeBucket', field:'issuedAt', granularity:'month', alias:'$record' }, revenue:{ fn:'sum', field:'amount', alias:'$record' } }, + groupBy:['month'], orderBy:{ month:'asc' } + Bi-monthly (every 2 months): + aggregate:{ period:{ fn:'timeBucket', field:'startedAt', granularity:'months', size:2, alias:'$record' }, n:{ fn:'count', alias:'$record' } }, + groupBy:['period'], orderBy:{ period:'asc' } + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +§6) LIMIT RULES BY QUERY MODE +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + • Self-group (single KPI row): NO limit — but MUST add orderBy on aggregation key for late ordering. + • Dimensional groupBy: NO limit to get all groups; add limit + orderBy on aggregation key for "top N". + • Per-record flat aggregation (one row per root record): limit IS valid (caps root records). + • Pure listing (no aggregate): limit is always valid. + • "how many" simple count: read 'total' from the findRecords response — do NOT use fn:'count' for this. + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +§7) METRIC FIELD DISCOVERY ACROSS RELATED LABELS +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +If the metric field is NOT on the target label, search related labels before giving up: + 1) Confirm target label. findProperties(labels:[]) — look for the metric field. + 2) If absent, walk adjacent labels via getOntologyMarkdown or findRelationships probe. + 3) For each candidate related label R: findProperties(labels:[R]) and attempt the same match. + 4) When found on CHILD: where:{ CHILD:{ ...filters..., $alias:'$child' } }, aggregate alias:'$child'. + Root-level filters (status, dates) stay at the top-level where. + 5) Never abandon after one miss — always attempt at least one related-label discovery pass. + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +§8) RANGE / DISTRIBUTION QUERIES +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + • type = number or datetime → findRecords aggregate with fn:'min' + fn:'max'. Add groupBy key names + for self-group mode. Or: getOntology (JSON) → propertyValues(propertyId) → returns { min, max } directly. + • type = string or boolean → propertyValues(propertyId) to list all distinct values. + • NEVER call findRecords with a where filter to "search for" values of a field — that returns records, not ranges. + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +§9) MULTI-LABEL FILTER DISTRIBUTION +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +Place each filter with the label that actually holds the field. On zero results, silently retry +by moving the filter to the related child block before asking. + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +§10) ENUM / VALUE NORMALIZATION +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +Never hardcode guessed values for enumerated fields: + 1) findProperties to locate the property and get its id. + 2) propertyValues(propertyId, { query: }) to probe existing values. + 3) If no match: try case variants, abbreviations, partial prefixes. + 4) Use ONLY canonical values returned by propertyValues. + 5) Re-run propertyValues with empty query to list top candidates; retry once silently. + Ask only if two+ equally plausible values remain. + 6) Always mention assumption briefly if mapping is non-obvious, then proceed. + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +§11) RELATIONSHIP & PATH QUERIES +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +Entity resolution by name: probe with findRecords(limit:1, where:{ :{ $contains:'...' } }). + +MULTI-HOP RELATIONSHIP DISCOVERY +Pre-check before multi-hop: + 1) findProperties(labels:[parent]) for direct scalar fields. + 2) Fetch 1 sample parent record. findRelationships filtered by its id → discover adjacent labels. + 3) If direct path to child exists: where:{ CHILD:{ $alias:'$child' } } — STOP. No intermediate wrappers. + 4) Only if no direct path: BFS (depth ≤ 4). + +BFS algorithm: + 1) Resolve parent + child labels via findLabels. + 2) Fetch 1 sample record of current hop; findRelationships filtered by its id → adjacent labels. + 3) On finding path PARENT→A→B→CHILD: + where:{ A:{ B:{ CHILD:{ $alias:'$child' } } } } + aggregate:{ metric:{ fn:'count'|'avg'|..., alias:'$child' } } + 4) No top-level limit for pure grouped aggregations. + 5) Only the root parent label appears in labels:[]. Intermediates appear only inside where. + 6) NEVER reuse '$record' alias for related-node aggregation. + 7) After path found, collapse redundant intermediate layers. + 8) If BFS exhausts without match: synonym remap using findProperties output; if still unavailable, ask. + +Avoid over-nesting: + WRONG: where:{ A:{ B:{ $alias:'$b' } } } ← when B is directly linked to root + CORRECT: where:{ B:{ $alias:'$b' } } + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +§12) NL → WHERE TRANSLATION QUICK REFERENCE +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +• Numerics: 1k=1000, 1m=1000000, 1b=1000000000. Strip currency symbols ($100k→100000). +• Equality / sets: + field: value + field: { $ne: value } + field: { $in: [v1,v2] } + field: { $nin: [v1,v2] } +• Numbers: > $gt >= $gte < $lt <= $lte between X and Y → { $gte:X, $lte:Y } +• Strings (case-insensitive): $contains / $startsWith / $endsWith / $in / $nin / $ne. +• Booleans: field: true|false or { field: { $ne: value } }. +• Datetime — always component objects for ranges; see §1 datetime operators above. +• Logical: $and / $or / $not / $nor / $xor. Prefer implicit AND when simple. +• Field names are case-sensitive. String comparisons are case-insensitive by default. + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +§13) VALIDATION CHECKLIST +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +Before submitting a findRecords call, verify: +□ No groupBy without aggregate. +□ alias present on every fn-based aggregate entry ('$record' for root; declared $alias for related). +□ Inline refs ("$alias.field" string values) do NOT need fn or alias key. +□ limit absent for self-group and dimensional groupBy (unless scoping root records in flat aggregation). +□ orderBy on aggregation key present for self-group queries (triggers late ordering → correct totals). +□ groupBy mode correct: + Dimensional: entries are "$alias.propertyName" strings. + Self-group: entries are aggregation key names (no dot, no alias prefix). +□ Nested collect: only fn:'collect' is valid inside a collect's aggregate block. +□ Traversal: key = label name (ALL_CAPS). NEVER $label/$direction/$as/$of/$through. + WRONG: { employee: { $label:'EMPLOYEE' } } CORRECT: { EMPLOYEE: { $alias:'$emp' } } +□ No '$record' alias reused for related-node aggregation. +□ Vector threshold semantics: euclidean → $lte; others → $gte. +□ Month+day without year → ask for year. +□ Aggregation intent? → query MUST include aggregate + groupBy. Raw records ≠ aggregation. + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +§14) EXAMPLE PATTERNS +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +(Actual label/field names always come from getOntologyMarkdown — never from these examples.) + +List with numeric filter: + findRecords({ labels:['