This guide explains how to set up Google Cloud Spanner for use with PathRAG's SpannerGraphStorage backend.
- An active Google Cloud project with billing enabled
- Google Cloud SDK (
gcloud) installed and configured
gcloud auth application-default login# Set your project ID
export PROJECT_ID=$(gcloud config get-value project)
# Enable the required APIs
gcloud services enable \
spanner.googleapis.com \
aiplatform.googleapis.com \
cloudresourcemanager.googleapis.comIf you are running PathRAG on a local machine or a VM without a default service account:
export SERVICE_ACCOUNT="pathrag-spanner-sa"
# Create the service account
gcloud iam service-accounts create $SERVICE_ACCOUNT \
--description="Service account for PathRAG with Spanner Graph" \
--display-name="PathRAG Spanner SA"
# Grant Spanner database user role
gcloud projects add-iam-policy-binding $PROJECT_ID \
--member="serviceAccount:${SERVICE_ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com" \
--role="roles/spanner.databaseUser"
# Grant Vertex AI user role (for Gemini LLM / embedding)
gcloud projects add-iam-policy-binding $PROJECT_ID \
--member="serviceAccount:${SERVICE_ACCOUNT}@${PROJECT_ID}.iam.gserviceaccount.com" \
--role="roles/aiplatform.user"# Set environment variables
export SPANNER_INSTANCE="pathrag-instance"
export SPANNER_DATABASE="pathrag-database"
export SPANNER_REGION="us-central1" # Change to your preferred region
# Create the Spanner instance
gcloud spanner instances create $SPANNER_INSTANCE \
--config=regional-$SPANNER_REGION \
--description="Spanner instance for PathRAG" \
--nodes=1 \
--edition=ENTERPRISE
# Create the database
gcloud spanner databases create $SPANNER_DATABASE \
--instance=$SPANNER_INSTANCENote:
SpannerGraphStorageautomatically creates the required tables ({namespace}_nodes,{namespace}_edges) and property graph DDL on first initialization. No manual schema setup is needed.
pip install google-cloud-spannerOr if you are using the PathRAG project:
pip install -e .
pip install google-cloud-spannerCreate a .env file in your project root (or examples/ directory):
# .env
GOOGLE_CLOUD_PROJECT=your-gcp-project-id
SPANNER_INSTANCE=pathrag-instance
SPANNER_DATABASE=pathrag-database
# LLM configuration (choose one)
GEMINI_API_KEY=your-gemini-api-key
# OPENAI_API_KEY=your-openai-api-key
# Optional overrides
# LLM_MODEL_NAME=gemini/gemini-2.5-flash
# EMBEDDING_MODEL_NAME=gemini/gemini-embedding-001
# EMBEDDING_DIM=3072from PathRAG import PathRAG, QueryParam
rag = PathRAG(
working_dir="./data",
llm_model_name="gemini/gemini-2.5-flash",
embedding_model_name="gemini/gemini-embedding-001",
embedding_dim=3072,
graph_storage="SpannerGraphStorage",
addon_params={
"spanner_project_id": "your-gcp-project-id",
"spanner_instance_id": "pathrag-instance",
"spanner_database_id": "pathrag-database",
},
)
# Index documents
await rag.ainsert("Your document text here...")
# Query
response = await rag.aquery(
"Your question here?",
param=QueryParam(mode="hybrid"),
)
print(response)from PathRAG.spanner_graph_storage import SpannerGraphStorage
graph = SpannerGraphStorage(
namespace="my_graph",
global_config={
"spanner_project_id": "your-gcp-project-id",
"spanner_instance_id": "pathrag-instance",
"spanner_database_id": "pathrag-database",
},
)
# Insert nodes
await graph.upsert_node("APPLE", {
"entity_type": "ORGANIZATION",
"description": "American multinational technology company",
"source_id": "chunk-001",
})
# Insert edges
await graph.upsert_edge("STEVE JOBS", "APPLE", {
"weight": "3.0",
"description": "Co-founded Apple in 1976",
"keywords": "founding, co-founder",
"source_id": "chunk-001",
})
# Query (uses GQL internally)
edges = await graph.get_node_edges("STEVE JOBS")
degree = await graph.node_degree("APPLE")export GOOGLE_CLOUD_PROJECT=your-gcp-project-id
export SPANNER_INSTANCE=pathrag-instance
export SPANNER_DATABASE=pathrag-database
# Run tests only (no cleanup)
python examples/spanner/test_spanner_graph_storage.py
# Run tests then cleanup test tables
python examples/spanner/test_spanner_graph_storage.py --cleanup
# Cleanup only (skip tests)
python examples/spanner/test_spanner_graph_storage.py --cleanup-onlySpannerGraphStorage automatically creates the following schema:
| Column | Type | Description |
|---|---|---|
id |
STRING(MAX) NOT NULL | Node identifier (Primary Key) |
entity_type |
STRING(MAX) | Entity type (e.g., PERSON, ORGANIZATION) |
description |
STRING(MAX) | Entity description |
source_id |
STRING(MAX) | Source chunk ID |
| Column | Type | Description |
|---|---|---|
id |
STRING(MAX) NOT NULL | Source node ID (Primary Key, FK → nodes) |
target_id |
STRING(MAX) NOT NULL | Target node ID (Primary Key, FK → nodes) |
weight |
FLOAT64 | Relationship weight |
description |
STRING(MAX) | Relationship description |
keywords |
STRING(MAX) | Relationship keywords |
source_id |
STRING(MAX) | Source chunk ID |
CREATE OR REPLACE PROPERTY GRAPH pathrag_{namespace}
NODE TABLES (
{namespace}_nodes
KEY(id)
LABEL Entity
PROPERTIES(id, entity_type, description, source_id)
)
EDGE TABLES (
{namespace}_edges
KEY(id, target_id)
SOURCE KEY(id) REFERENCES {namespace}_nodes(id)
DESTINATION KEY(target_id) REFERENCES {namespace}_nodes(id)
LABEL Relationship
PROPERTIES(weight, description, keywords, source_id)
)To remove test data and resources:
# Drop the database
gcloud spanner databases delete $SPANNER_DATABASE \
--instance=$SPANNER_INSTANCE
# (Optional) Delete the instance
gcloud spanner instances delete $SPANNER_INSTANCE