diff --git a/README.md b/README.md index fa92132..1081a29 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,9 @@ Deploy Memgraph using methods that suit your environment, whether it's container ## List of best practices +### Database Migrations +- [Schema migrations with Flyway and Memgraph](./flyway/) + ### Debugging - [Generating a core dump with Memgraph in Docker Compose](./debugging/docker_compose_with_core_dump_generation/) diff --git a/flyway/Dockerfile.flyway b/flyway/Dockerfile.flyway new file mode 100644 index 0000000..ef6c1b1 --- /dev/null +++ b/flyway/Dockerfile.flyway @@ -0,0 +1,18 @@ +FROM flyway/flyway:11.1.0 + +# Install the Neo4j JDBC driver (full bundle: driver + Bolt + SQL translator) +# and the Flyway plugin for Neo4j (implements Flyway's database SPI for graph DBs). +# Memgraph speaks the Bolt protocol, so the Neo4j JDBC driver connects directly. + +USER root + +ADD https://repo1.maven.org/maven2/org/neo4j/neo4j-jdbc-full-bundle/6.12.1/neo4j-jdbc-full-bundle-6.12.1.jar \ + /flyway/jars/neo4j-jdbc-full-bundle.jar + +ADD https://repo1.maven.org/maven2/eu/michael-simons/neo4j/neo4j-flyway-database/0.0.4/neo4j-flyway-database-0.0.4.jar \ + /flyway/jars/neo4j-flyway-database.jar + +RUN chmod 644 /flyway/jars/neo4j-jdbc-full-bundle.jar \ + /flyway/jars/neo4j-flyway-database.jar + +USER flyway diff --git a/flyway/README.md b/flyway/README.md new file mode 100644 index 0000000..9566d37 --- /dev/null +++ b/flyway/README.md @@ -0,0 +1,144 @@ +# Schema Migrations with Flyway and Memgraph + +Manage versioned graph schema migrations against Memgraph using [Flyway](https://flywaydb.org/) with the [Neo4j JDBC driver](https://github.com/neo4j/neo4j-jdbc) and [neo4j-flyway-database](https://github.com/michael-simons/neo4j-flyway-database) plugin. Memgraph's Bolt protocol compatibility allows the Neo4j JDBC driver to connect directly. + +## Architecture + +``` +sql/ + ├── V1__Create_indexes.cypher Versioned .cypher + ├── V2__Seed_graph.cypher migrations executed + └── V3__Schema_evolution.cypher via Bolt protocol + │ + ▼ + ┌──────────────┐ Bolt (7687) ┌─────────────┐ + │ Flyway │ ────────────────> │ Memgraph │ + │ (Neo4j JDBC) │ │ (MAGE) │ + └──────────────┘ └─────────────┘ +``` + +## What This Example Does + +1. **Creates indexes** on `Person(email)`, `Person(name)`, and `Company(name)` for fast lookups. +2. **Seeds the graph** with Person and Company nodes connected by `WORKS_AT` relationships. +3. **Evolves the schema** by adding `KNOWS` relationships, enriching `WORKS_AT` with a `department` property, and adding an `active` flag to all Person nodes. +4. **Tracks every migration** using Flyway's history graph (`__Neo4jMigration` nodes linked by `MIGRATED_TO` relationships). +5. **Prevents re-execution** — each versioned migration runs exactly once, verified by checksum. + +## How It Works + +Flyway connects to Memgraph via the [Neo4j JDBC driver](https://github.com/neo4j/neo4j-jdbc) (which speaks the Bolt protocol natively). The [neo4j-flyway-database](https://github.com/michael-simons/neo4j-flyway-database) plugin implements Flyway's database SPI, enabling Flyway to: + +- Recognize `jdbc:neo4j://` connection URLs +- Parse `.cypher` migration files +- Store migration history as a graph (not a SQL table) + +Migration history is stored as `__Neo4jMigration` nodes linked by `MIGRATED_TO` relationships. This is the same format used by the standalone [neo4j-migrations](https://github.com/michael-simons/neo4j-migrations) tool. + +The `--bolt-server-name-for-init=Neo4j/5.2.0` flag on Memgraph ensures the Neo4j JDBC driver recognizes it as a compatible Bolt endpoint. + +## Quick Start + +```bash +# 1. Start Memgraph and run all migrations +docker compose up --build + +# 2. (optional) Verify the migration results +pip install -r requirements.txt +python verify_migration.py +``` + +The `flyway` container applies all migrations and exits. Memgraph stays running on `bolt://localhost:7687` and the Lab UI is available at `http://localhost:3000`. + +## Running Additional Flyway Commands + +After the initial `docker compose up`, you can run more Flyway commands against the running Memgraph instance: + +```bash +# Check migration status +docker compose run --rm flyway info + +# Validate applied migrations (checksum verification) +docker compose run --rm flyway validate + +# Repair the migration history (fix checksums or remove failed entries) +docker compose run --rm flyway repair +``` + +## Project Structure + +``` +flyway/ +├── README.md # This file +├── docker-compose.yml # Memgraph + Flyway services +├── Dockerfile.flyway # Flyway image with Neo4j JDBC + plugin +├── flyway.conf # Flyway configuration +├── sql/ +│ ├── V1__Create_indexes.cypher # Migration 1: index creation +│ ├── V2__Seed_graph.cypher # Migration 2: initial nodes and relationships +│ └── V3__Schema_evolution.cypher # Migration 3: new rels, properties +├── requirements.txt # Python deps for verification script +└── verify_migration.py # Verify migration results +``` + +## Migration File Naming + +Flyway uses a strict naming convention: + +| Pattern | Description | +|---------|-------------| +| `V1__Description.cypher` | Versioned migration (runs once, in order) | +| `V1.1__Description.cypher` | Sub-versioned migration | +| `R__Description.cypher` | Repeatable migration (runs on every change) | +| `U1__Description.cypher` | Undo migration (Flyway Teams only) | + +The double underscore `__` separates the version from the description. Descriptions use underscores instead of spaces. + +## Writing Memgraph-Compatible Migrations + +When writing Cypher for Memgraph migrations, keep these syntax differences in mind compared to Neo4j: + +| Operation | Memgraph | Neo4j | +|-----------|----------|-------| +| Create index | `CREATE INDEX ON :Label(prop)` | `CREATE INDEX FOR (n:Label) ON (n.prop)` | +| Drop index | `DROP INDEX ON :Label(prop)` | `DROP INDEX name` | +| Unique constraint | `CREATE CONSTRAINT ON (n:Label) ASSERT n.prop IS UNIQUE` | `CREATE CONSTRAINT FOR (n:Label) REQUIRE n.prop IS UNIQUE` | +| Existence constraint | `CREATE CONSTRAINT ON (n:Label) ASSERT EXISTS (n.prop)` | `CREATE CONSTRAINT FOR (n:Label) REQUIRE n.prop IS NOT NULL` | + +Stick to **standard Cypher** (`CREATE`, `MATCH`, `MERGE`, `SET`, `DELETE`) for maximum compatibility. Avoid Neo4j-specific features like `CALL {} IN TRANSACTIONS` or APOC procedures. + +Each statement in a `.cypher` file must end with a semicolon (`;`). Flyway splits on semicolons to execute statements individually. + +## Flyway vs Liquibase + +Both tools can manage graph migrations against Memgraph. Key differences: + +| Aspect | Flyway | Liquibase | +|--------|--------|-----------| +| Migration format | Plain `.cypher` files | XML, YAML, JSON, or Cypher changelogs | +| History storage | `__Neo4jMigration` nodes + `MIGRATED_TO` rels | `__LiquibaseChangeLog` nodes | +| Naming convention | `V{version}__description.cypher` | `id` + `author` per changeset | +| Rollback | Undo migrations (Teams only) | Rollback blocks (Community) | +| Approach | SQL-first, file-per-migration | Changelog-first, changeset-based | + +See the [Liquibase example](../liquibase/) in this repository for comparison. + +## Compatibility Notes + +- **Memgraph v2.11+** defaults `--bolt-server-name-for-init` to a Neo4j-compatible value. The flag in `docker-compose.yml` is kept explicit for clarity. +- The `neo4j-flyway-database` plugin stores migration history as graph nodes with `__Neo4jMigration` labels and `MIGRATED_TO` relationships. These can be queried: `MATCH (m:__Neo4jMigration) RETURN m ORDER BY m.version`. +- The plugin uses some Neo4j-specific Cypher internally (e.g., `CREATE CONSTRAINT ... IF NOT EXISTS`). If Memgraph does not support certain syntax, check the [neo4j-flyway-database GitHub](https://github.com/michael-simons/neo4j-flyway-database) for updates. +- The Neo4j JDBC driver (v6.x) speaks Bolt natively — it is **not** built on top of the Neo4j Java Driver. + +## Version Compatibility + +This example was tested with: + +* **Memgraph MAGE 3.9.0** +* **Flyway 11.1.0** +* **Neo4j JDBC Full Bundle 6.12.1** +* **neo4j-flyway-database 0.0.4** + +## Need Help? + +If you encounter issues, visit the [Memgraph Discord server](https://discord.gg/memgraph) to get help from the community or the Memgraph team! diff --git a/flyway/docker-compose.yml b/flyway/docker-compose.yml new file mode 100644 index 0000000..7b3fc42 --- /dev/null +++ b/flyway/docker-compose.yml @@ -0,0 +1,37 @@ +services: + memgraph: + image: memgraph/memgraph-mage:3.9.0 + container_name: memgraph-flyway + ports: + - "7687:7687" + - "3000:3000" + command: + [ + "--bolt-server-name-for-init=Neo4j/5.2.0", + "--log-level=TRACE", + "--also-log-to-stderr=true", + "--telemetry-enabled=false", + ] + healthcheck: + test: + [ + "CMD-SHELL", + "echo 'RETURN 1;' | mgconsole --host 127.0.0.1 --port 7687 || exit 1", + ] + interval: 5s + timeout: 5s + retries: 10 + start_period: 10s + + flyway: + build: + context: . + dockerfile: Dockerfile.flyway + container_name: flyway-runner + depends_on: + memgraph: + condition: service_healthy + volumes: + - ./flyway.conf:/flyway/conf/flyway.conf + - ./sql:/flyway/sql + command: migrate diff --git a/flyway/flyway.conf b/flyway/flyway.conf new file mode 100644 index 0000000..968f0d4 --- /dev/null +++ b/flyway/flyway.conf @@ -0,0 +1,14 @@ +# Flyway configuration for Memgraph +# The jdbc:neo4j: prefix is handled by the Neo4j JDBC driver. +# Memgraph accepts connections via the Bolt protocol on port 7687. + +flyway.url=jdbc:neo4j://memgraph:7687 +flyway.locations=filesystem:/flyway/sql + +# Accept .cypher files as migrations (in addition to the default .sql) +flyway.sqlMigrationSuffixes=.cypher + +# Memgraph Community Edition has no authentication by default. +# Set user/password if you have auth enabled. +# flyway.user= +# flyway.password= diff --git a/flyway/requirements.txt b/flyway/requirements.txt new file mode 100644 index 0000000..d42839c --- /dev/null +++ b/flyway/requirements.txt @@ -0,0 +1 @@ +neo4j==5.28.1 diff --git a/flyway/sql/V1__Create_indexes.cypher b/flyway/sql/V1__Create_indexes.cypher new file mode 100644 index 0000000..c65f3ae --- /dev/null +++ b/flyway/sql/V1__Create_indexes.cypher @@ -0,0 +1,6 @@ +// Create indexes on frequently queried properties. +// Indexes speed up MATCH lookups and are essential for performant graph queries. + +CREATE INDEX ON :Person(email); +CREATE INDEX ON :Person(name); +CREATE INDEX ON :Company(name); diff --git a/flyway/sql/V2__Seed_graph.cypher b/flyway/sql/V2__Seed_graph.cypher new file mode 100644 index 0000000..e5f3c2f --- /dev/null +++ b/flyway/sql/V2__Seed_graph.cypher @@ -0,0 +1,21 @@ +// Seed the graph with Person and Company nodes plus WORKS_AT relationships. + +CREATE (:Person {name: 'Alice', email: 'alice@memgraph.io', role: 'Engineer'}); +CREATE (:Person {name: 'Bob', email: 'bob@memgraph.io', role: 'Product Manager'}); +CREATE (:Person {name: 'Charlie', email: 'charlie@techcorp.com', role: 'Data Scientist'}); +CREATE (:Person {name: 'Diana', email: 'diana@techcorp.com', role: 'CTO'}); + +CREATE (:Company {name: 'Memgraph', founded: 2016, domain: 'graph-databases'}); +CREATE (:Company {name: 'TechCorp', founded: 2010, domain: 'data-analytics'}); + +MATCH (p:Person {email: 'alice@memgraph.io'}), (c:Company {name: 'Memgraph'}) +CREATE (p)-[:WORKS_AT {since: 2020}]->(c); + +MATCH (p:Person {email: 'bob@memgraph.io'}), (c:Company {name: 'Memgraph'}) +CREATE (p)-[:WORKS_AT {since: 2021}]->(c); + +MATCH (p:Person {email: 'charlie@techcorp.com'}), (c:Company {name: 'TechCorp'}) +CREATE (p)-[:WORKS_AT {since: 2018}]->(c); + +MATCH (p:Person {email: 'diana@techcorp.com'}), (c:Company {name: 'TechCorp'}) +CREATE (p)-[:WORKS_AT {since: 2015}]->(c); diff --git a/flyway/sql/V3__Schema_evolution.cypher b/flyway/sql/V3__Schema_evolution.cypher new file mode 100644 index 0000000..901da83 --- /dev/null +++ b/flyway/sql/V3__Schema_evolution.cypher @@ -0,0 +1,28 @@ +// Schema evolution: add KNOWS relationships and enrich existing data. +// Demonstrates how Flyway tracks incremental graph changes. + +// Add social connections between people +MATCH (a:Person {email: 'alice@memgraph.io'}), (b:Person {email: 'bob@memgraph.io'}) +CREATE (a)-[:KNOWS {since: 2020, context: 'coworkers'}]->(b); + +MATCH (a:Person {email: 'alice@memgraph.io'}), (c:Person {email: 'charlie@techcorp.com'}) +CREATE (a)-[:KNOWS {since: 2022, context: 'conference'}]->(c); + +MATCH (c:Person {email: 'charlie@techcorp.com'}), (d:Person {email: 'diana@techcorp.com'}) +CREATE (c)-[:KNOWS {since: 2018, context: 'coworkers'}]->(d); + +// Enrich WORKS_AT relationships with department information +MATCH (p:Person {email: 'alice@memgraph.io'})-[r:WORKS_AT]->(:Company {name: 'Memgraph'}) +SET r.department = 'Engineering'; + +MATCH (p:Person {email: 'bob@memgraph.io'})-[r:WORKS_AT]->(:Company {name: 'Memgraph'}) +SET r.department = 'Product'; + +MATCH (p:Person {email: 'charlie@techcorp.com'})-[r:WORKS_AT]->(:Company {name: 'TechCorp'}) +SET r.department = 'Data Science'; + +MATCH (p:Person {email: 'diana@techcorp.com'})-[r:WORKS_AT]->(:Company {name: 'TechCorp'}) +SET r.department = 'Executive'; + +// Add active status to all Person nodes +MATCH (p:Person) SET p.active = true; diff --git a/flyway/verify_migration.py b/flyway/verify_migration.py new file mode 100644 index 0000000..73a96d3 --- /dev/null +++ b/flyway/verify_migration.py @@ -0,0 +1,90 @@ +"""Verify that Flyway migrations were applied to Memgraph. + +Connects to Memgraph and checks that the expected graph structure exists, +including nodes, relationships, and Flyway's internal migration history. +""" + +from neo4j import GraphDatabase + +URI = "bolt://localhost:7687" + + +def main(): + driver = GraphDatabase.driver(URI) + + with driver.session() as session: + # 1. Check Person nodes + result = session.run("MATCH (p:Person) RETURN p.name AS name, p.email AS email, p.role AS role, p.active AS active ORDER BY p.name") + persons = list(result) + print(f"Person nodes: {len(persons)}") + for record in persons: + print(f" - {record['name']} ({record['email']}) role={record['role']} active={record['active']}") + + # 2. Check Company nodes + result = session.run("MATCH (c:Company) RETURN c.name AS name, c.founded AS founded, c.domain AS domain ORDER BY c.name") + companies = list(result) + print(f"\nCompany nodes: {len(companies)}") + for record in companies: + print(f" - {record['name']} (founded {record['founded']}, domain: {record['domain']})") + + # 3. Check WORKS_AT relationships + result = session.run( + "MATCH (p:Person)-[r:WORKS_AT]->(c:Company) " + "RETURN p.name AS person, c.name AS company, r.since AS since, r.department AS department " + "ORDER BY p.name" + ) + works_at = list(result) + print(f"\nWORKS_AT relationships: {len(works_at)}") + for record in works_at: + print(f" - {record['person']} -> {record['company']} (since {record['since']}, dept: {record['department']})") + + # 4. Check KNOWS relationships + result = session.run( + "MATCH (a:Person)-[r:KNOWS]->(b:Person) " + "RETURN a.name AS from_person, b.name AS to_person, r.context AS context " + "ORDER BY a.name" + ) + knows = list(result) + print(f"\nKNOWS relationships: {len(knows)}") + for record in knows: + print(f" - {record['from_person']} -> {record['to_person']} ({record['context']})") + + # 5. Check Flyway migration history (stored as graph nodes by neo4j-flyway-database) + result = session.run( + "MATCH (m:__Neo4jMigration) " + "RETURN m.version AS version, m.description AS description " + "ORDER BY m.version" + ) + migrations = list(result) + if migrations: + print("\nFlyway migration history:") + for record in migrations: + print(f" - V{record['version']}: {record['description']}") + else: + print("\nFlyway migration history: not found (check if neo4j-flyway-database plugin was loaded)") + + # 6. Check migration chain (MIGRATED_TO relationships) + result = session.run( + "MATCH (a:__Neo4jMigration)-[r:MIGRATED_TO]->(b:__Neo4jMigration) " + "RETURN a.version AS from_v, b.version AS to_v " + "ORDER BY a.version" + ) + chain = list(result) + if chain: + print("\nMigration chain:") + for record in chain: + print(f" - V{record['from_v']} -> V{record['to_v']}") + + # 7. Summary + result = session.run("MATCH (n) RETURN count(n) AS nodes") + node_count = result.single()["nodes"] + result = session.run("MATCH ()-[r]->() RETURN count(r) AS rels") + rel_count = result.single()["rels"] + print(f"\nGraph summary: {node_count} nodes, {rel_count} relationships") + + driver.close() + print("\nVerification complete.") + + +if __name__ == "__main__": + main()