diff --git a/import/migrate/mongodb/README.md b/import/migrate/mongodb/README.md new file mode 100644 index 0000000..74b7661 --- /dev/null +++ b/import/migrate/mongodb/README.md @@ -0,0 +1,44 @@ +# MongoDB to Memgraph Migration + +This directory contains scripts and configuration for migrating data from MongoDB to Memgraph using the built-in `migrate.mongodb()` procedure from MAGE. The migration demonstrates how to transfer a social network dataset from MongoDB's document-based structure to Memgraph's graph structure using Memgraph's native migration capabilities. + +## Overview + +### Installation + +1. **Clone and navigate to the directory:** + ```bash + cd import/migrate/mongodb/ + ``` + +2. **Install Python dependencies:** + ```bash + pip install -r requirements.txt + ``` + +3. **Start the services:** + ```bash + docker compose up -d + ``` + + This will start: + - MongoDB on port 27017 + - Memgraph with MAGE on port 7687 (Bolt) + +## Usage + +### Step 1: Populate MongoDB + +Run the script to populate MongoDB with sample data: + +```bash +python populate_mongodb.py +``` + +### Step 2: Run Migration + +Migrate data from MongoDB to Memgraph using the `migrate.mongodb()` procedure: + +```bash +python migrate_to_memgraph.py +``` diff --git a/import/migrate/mongodb/docker-compose.yml b/import/migrate/mongodb/docker-compose.yml new file mode 100644 index 0000000..c534a45 --- /dev/null +++ b/import/migrate/mongodb/docker-compose.yml @@ -0,0 +1,28 @@ +services: + mongo: + image: mongo:6 + restart: unless-stopped + ports: + - "27017:27017" + environment: + MONGO_INITDB_ROOT_USERNAME: root + MONGO_INITDB_ROOT_PASSWORD: example + command: ["--auth"] + volumes: + - mongo_data:/data/db + + memgraph: + image: memgraph/memgraph-mage:custom + container_name: memgraph + ports: + - "7687:7687" + volumes: + - memgraph_data:/var/lib/memgraph + - memgraph_logs:/var/log/memgraph + command: [--log-level=TRACE, --also-log-to-stderr=true, --query-execution-timeout-sec=0] + + +volumes: + mongo_data: + memgraph_data: + memgraph_logs: diff --git a/import/migrate/mongodb/migrate_to_memgraph.py b/import/migrate/mongodb/migrate_to_memgraph.py new file mode 100644 index 0000000..3649c8b --- /dev/null +++ b/import/migrate/mongodb/migrate_to_memgraph.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +""" +Migration script to transfer data from MongoDB to Memgraph using the migrate.mongodb() procedure. +This script demonstrates how to use Memgraph's built-in migration capabilities to query MongoDB +and create corresponding graph structures in Memgraph. +""" + +from gqlalchemy import Memgraph +import sys + +# Configuration +MEMGRAPH_HOST = "localhost" +MEMGRAPH_PORT = 7687 + +def connect_memgraph(): + """Connect to Memgraph.""" + try: + memgraph = Memgraph(host=MEMGRAPH_HOST, port=MEMGRAPH_PORT) + # Test the connection + memgraph.execute_and_fetch("MATCH (n) RETURN count(n) as count") + print("Successfully connected to Memgraph!") + return memgraph + except Exception as e: + print(f"Failed to connect to Memgraph: {e}") + return None + +def clear_memgraph(memgraph): + """Clear all data from Memgraph.""" + try: + memgraph.execute("MATCH (n) DETACH DELETE n") + print("Cleared all data from Memgraph.") + except Exception as e: + print(f"Error clearing Memgraph: {e}") + +def migrate_users(memgraph): + """Migrate users from MongoDB to Memgraph using migrate.mongodb().""" + print("Migrating users...") + + try: + # Use migrate.mongodb() to get user data + results = list(memgraph.execute_and_fetch( + """ + CALL migrate.mongodb("users", + {}, + { + projection: {_id: 1, name: 1, email: 1, age: 1, city: 1, profession: 1, created_at: 1, is_active: 1} + }, + { + host: "mongo", + port: 27017, + username: "root", + password: "example", + database: "social_network", + authSource: "admin" + } + ) + YIELD row + MERGE (u:User {user_id: row._id}) + SET u += row + RETURN count(u) as users_migrated + """ + )) + + users_migrated = results[0]["users_migrated"] if results else 0 + print(f"Migrated {users_migrated} users.") + return users_migrated + + except Exception as e: + print(f"Error migrating users: {e}") + return 0 + +def migrate_posts(memgraph): + """Migrate posts from MongoDB to Memgraph using migrate.mongodb().""" + print("Migrating posts...") + + try: + # Use migrate.mongodb() to get post data + results = list(memgraph.execute_and_fetch( + """ + CALL migrate.mongodb("posts", + {}, + { + projection: {_id: 1, user_id: 1, title: 1, content: 1, tags: 1, likes: 1, created_at: 1} + }, + { + host: "mongo", + port: 27017, + username: "root", + password: "example", + database: "social_network", + authSource: "admin" + } + ) + YIELD row + MERGE (p:Post {post_id: row._id}) + SET p += row + RETURN count(p) as posts_migrated + """ + )) + + posts_migrated = results[0]["posts_migrated"] if results else 0 + print(f"Migrated {posts_migrated} posts.") + return posts_migrated + + except Exception as e: + print(f"Error migrating posts: {e}") + return 0 + +def migrate_comments(memgraph): + """Migrate comments from MongoDB to Memgraph using migrate.mongodb().""" + print("Migrating comments...") + + try: + # Use migrate.mongodb() to get comment data + results = list(memgraph.execute_and_fetch( + """ + CALL migrate.mongodb("comments", + {}, + { + projection: {_id: 1, post_id: 1, user_id: 1, content: 1, created_at: 1} + }, + { + host: "mongo", + port: 27017, + username: "root", + password: "example", + database: "social_network", + authSource: "admin" + } + ) + YIELD row + MERGE (c:Comment {comment_id: row._id}) + SET c += row + RETURN count(c) as comments_migrated + """ + )) + + comments_migrated = results[0]["comments_migrated"] if results else 0 + print(f"Migrated {comments_migrated} comments.") + return comments_migrated + + except Exception as e: + print(f"Error migrating comments: {e}") + return 0 + +def create_relationships(memgraph): + """Create relationships between nodes in Memgraph using migrate.mongodb().""" + print("Creating relationships...") + + # Create FOLLOWS relationships + follows_created = create_follows_relationships(memgraph) + + # Create CREATED relationships (users -> posts) + created_relationships = create_created_relationships(memgraph) + + # Create COMMENTED relationships (users -> comments -> posts) + commented_relationships = create_commented_relationships(memgraph) + + print(f"Created {follows_created} FOLLOWS relationships.") + print(f"Created {created_relationships} CREATED relationships.") + print(f"Created {commented_relationships} COMMENTED relationships.") + + return follows_created + created_relationships + commented_relationships + +def create_follows_relationships(memgraph): + """Create FOLLOWS relationships between users using migrate.mongodb().""" + try: + results = list(memgraph.execute_and_fetch( + """ + CALL migrate.mongodb("relationships", + {type: "follows"}, + { + projection: {_id: 1, from_user: 1, to_user: 1, created_at: 1} + }, + { + host: "mongo", + port: 27017, + username: "root", + password: "example", + database: "social_network", + authSource: "admin" + } + ) + YIELD row + MATCH (from_user:User {user_id: row.from_user}) + MATCH (to_user:User {user_id: row.to_user}) + CREATE (from_user)-[r:FOLLOWS {created_at: row.created_at}]->(to_user) + RETURN count(r) as follows_created + """ + )) + + follows_created = results[0]["follows_created"] if results else 0 + return follows_created + + except Exception as e: + print(f"Error creating FOLLOWS relationships: {e}") + return 0 + +def create_created_relationships(memgraph): + """Create CREATED relationships between users and posts using migrate.mongodb().""" + try: + results = list(memgraph.execute_and_fetch( + """ + CALL migrate.mongodb("posts", + {}, + { + projection: {_id: 1, user_id: 1} + }, + { + host: "mongo", + port: 27017, + username: "root", + password: "example", + database: "social_network", + authSource: "admin" + } + ) + YIELD row + MATCH (user:User {user_id: row.user_id}) + MATCH (post:Post {post_id: row._id}) + CREATE (user)-[r:CREATED]->(post) + RETURN count(r) as created_relationships + """ + )) + + created_relationships = results[0]["created_relationships"] if results else 0 + return created_relationships + + except Exception as e: + print(f"Error creating CREATED relationships: {e}") + return 0 + +def create_commented_relationships(memgraph): + """Create COMMENTED relationships between users, comments, and posts using migrate.mongodb().""" + try: + results = list(memgraph.execute_and_fetch( + """ + CALL migrate.mongodb("comments", + {}, + { + projection: {_id: 1, user_id: 1, post_id: 1} + }, + { + host: "mongo", + port: 27017, + username: "root", + password: "example", + database: "social_network", + authSource: "admin" + } + ) + YIELD row + MATCH (user:User {user_id: row.user_id}) + MATCH (comment:Comment {comment_id: row._id}) + MATCH (post:Post {post_id: row.post_id}) + CREATE (user)-[r1:COMMENTED]->(comment) + CREATE (comment)-[r2:COMMENTED]->(post) + RETURN count(r1) + count(r2) as commented_relationships + """ + )) + + commented_relationships = results[0]["commented_relationships"] if results else 0 + return commented_relationships + + except Exception as e: + print(f"Error creating COMMENTED relationships: {e}") + return 0 + +def main(): + """Main function to run the migration.""" + print("Starting MongoDB to Memgraph migration using migrate.mongodb()...") + + # Connect to Memgraph + memgraph = connect_memgraph() + if not memgraph: + print("Failed to connect to Memgraph. Exiting.") + sys.exit(1) + + try: + # Clear existing data + clear_memgraph(memgraph) + + # Migrate data using migrate.mongodb() procedure + users_count = migrate_users(memgraph) + posts_count = migrate_posts(memgraph) + comments_count = migrate_comments(memgraph) + relationships_count = create_relationships(memgraph) + + print(f"\nMigration completed successfully!") + print(f"Summary:") + print(f"- Users migrated: {users_count}") + print(f"- Posts migrated: {posts_count}") + print(f"- Comments migrated: {comments_count}") + print(f"- Relationships created: {relationships_count}") + + print("\nYou can now query your graph data in Memgraph.") + print("\nExample queries:") + print("MATCH (u:User) RETURN u LIMIT 5;") + print("MATCH (u:User)-[:FOLLOWS]->(f:User) RETURN u.name, f.name;") + print("MATCH (u:User)-[:CREATED]->(p:Post) RETURN u.name, p.title;") + + except Exception as e: + print(f"Migration failed: {e}") + sys.exit(1) + finally: + print("Migration process completed.") + +if __name__ == "__main__": + main() diff --git a/import/migrate/mongodb/populate_mongodb.py b/import/migrate/mongodb/populate_mongodb.py new file mode 100644 index 0000000..31c6ca1 --- /dev/null +++ b/import/migrate/mongodb/populate_mongodb.py @@ -0,0 +1,277 @@ +#!/usr/bin/env python3 +""" +Script to populate MongoDB with sample data for migration to Memgraph. +This creates a social network dataset with users, posts, and relationships. +""" + +import pymongo +from datetime import datetime, timedelta +import random +import sys + +def connect_to_mongodb(): + """Connect to MongoDB with authentication.""" + try: + client = pymongo.MongoClient( + "mongodb://root:example@localhost:27017/", + authSource="admin" + ) + # Test the connection + client.admin.command('ping') + print("Successfully connected to MongoDB!") + return client + except Exception as e: + print(f"Failed to connect to MongoDB: {e}") + sys.exit(1) + +def create_sample_data(): + """Create sample data for the social network.""" + + # Sample users data + users = [ + { + "_id": "user_1", + "name": "Alice Johnson", + "email": "alice@example.com", + "age": 28, + "city": "New York", + "profession": "Software Engineer", + "created_at": datetime.now() - timedelta(days=365), + "is_active": True + }, + { + "_id": "user_2", + "name": "Bob Smith", + "email": "bob@example.com", + "age": 32, + "city": "San Francisco", + "profession": "Data Scientist", + "created_at": datetime.now() - timedelta(days=300), + "is_active": True + }, + { + "_id": "user_3", + "name": "Carol Davis", + "email": "carol@example.com", + "age": 25, + "city": "Seattle", + "profession": "Product Manager", + "created_at": datetime.now() - timedelta(days=200), + "is_active": True + }, + { + "_id": "user_4", + "name": "David Wilson", + "email": "david@example.com", + "age": 35, + "city": "Boston", + "profession": "DevOps Engineer", + "created_at": datetime.now() - timedelta(days=150), + "is_active": False + }, + { + "_id": "user_5", + "name": "Eva Brown", + "email": "eva@example.com", + "age": 29, + "city": "Austin", + "profession": "UX Designer", + "created_at": datetime.now() - timedelta(days=100), + "is_active": True + } + ] + + # Sample posts data + posts = [ + { + "_id": "post_1", + "user_id": "user_1", + "title": "Learning Graph Databases", + "content": "Just started exploring Memgraph and graph databases. The performance is amazing!", + "tags": ["graph", "database", "memgraph"], + "likes": 15, + "created_at": datetime.now() - timedelta(days=5) + }, + { + "_id": "post_2", + "user_id": "user_2", + "title": "Data Science Insights", + "content": "Working on a new machine learning project using graph neural networks.", + "tags": ["ml", "data-science", "gnn"], + "likes": 23, + "created_at": datetime.now() - timedelta(days=3) + }, + { + "_id": "post_3", + "user_id": "user_3", + "title": "Product Strategy", + "content": "Planning the roadmap for our new graph analytics platform.", + "tags": ["product", "strategy", "analytics"], + "likes": 8, + "created_at": datetime.now() - timedelta(days=1) + }, + { + "_id": "post_4", + "user_id": "user_1", + "title": "Cypher Query Tips", + "content": "Here are some advanced Cypher patterns I've been using lately.", + "tags": ["cypher", "queries", "tips"], + "likes": 31, + "created_at": datetime.now() - timedelta(hours=12) + }, + { + "_id": "post_5", + "user_id": "user_5", + "title": "Design Systems", + "content": "Building a comprehensive design system for our graph visualization tools.", + "tags": ["design", "ui", "visualization"], + "likes": 12, + "created_at": datetime.now() - timedelta(hours=6) + } + ] + + # Sample relationships data + relationships = [ + { + "_id": "rel_1", + "from_user": "user_1", + "to_user": "user_2", + "type": "follows", + "created_at": datetime.now() - timedelta(days=200) + }, + { + "_id": "rel_2", + "from_user": "user_1", + "to_user": "user_3", + "type": "follows", + "created_at": datetime.now() - timedelta(days=150) + }, + { + "_id": "rel_3", + "from_user": "user_2", + "to_user": "user_1", + "type": "follows", + "created_at": datetime.now() - timedelta(days=180) + }, + { + "_id": "rel_4", + "from_user": "user_3", + "to_user": "user_5", + "type": "follows", + "created_at": datetime.now() - timedelta(days=50) + }, + { + "_id": "rel_5", + "from_user": "user_4", + "to_user": "user_1", + "type": "follows", + "created_at": datetime.now() - timedelta(days=100) + }, + { + "_id": "rel_6", + "from_user": "user_5", + "to_user": "user_2", + "type": "follows", + "created_at": datetime.now() - timedelta(days=30) + } + ] + + # Sample comments data + comments = [ + { + "_id": "comment_1", + "post_id": "post_1", + "user_id": "user_2", + "content": "Great post! I've been using Memgraph for similar projects.", + "created_at": datetime.now() - timedelta(days=4) + }, + { + "_id": "comment_2", + "post_id": "post_1", + "user_id": "user_3", + "content": "Thanks for sharing these insights!", + "created_at": datetime.now() - timedelta(days=3) + }, + { + "_id": "comment_3", + "post_id": "post_2", + "user_id": "user_1", + "content": "Very interesting approach to GNNs!", + "created_at": datetime.now() - timedelta(days=2) + }, + { + "_id": "comment_4", + "post_id": "post_4", + "user_id": "user_5", + "content": "These Cypher patterns are really helpful!", + "created_at": datetime.now() - timedelta(hours=8) + } + ] + + return users, posts, relationships, comments + +def populate_database(client): + """Populate the MongoDB database with sample data.""" + db = client.social_network + + # Clear existing data + print("Clearing existing data...") + db.users.drop() + db.posts.drop() + db.relationships.drop() + db.comments.drop() + + # Get sample data + users, posts, relationships, comments = create_sample_data() + + # Insert data + print("Inserting users...") + db.users.insert_many(users) + + print("Inserting posts...") + db.posts.insert_many(posts) + + print("Inserting relationships...") + db.relationships.insert_many(relationships) + + print("Inserting comments...") + db.comments.insert_many(comments) + + # Create indexes for better performance + print("Creating indexes...") + db.users.create_index("email", unique=True) + db.users.create_index("city") + db.posts.create_index("user_id") + db.posts.create_index("created_at") + db.relationships.create_index([("from_user", 1), ("to_user", 1)]) + db.comments.create_index("post_id") + + print("Database populated successfully!") + + # Print summary + print(f"\nSummary:") + print(f"- Users: {db.users.count_documents({})}") + print(f"- Posts: {db.posts.count_documents({})}") + print(f"- Relationships: {db.relationships.count_documents({})}") + print(f"- Comments: {db.comments.count_documents({})}") + +def main(): + """Main function to populate MongoDB.""" + print("Starting MongoDB population...") + + # Connect to MongoDB + client = connect_to_mongodb() + + try: + # Populate the database + populate_database(client) + except Exception as e: + print(f"Error populating database: {e}") + sys.exit(1) + finally: + # Close the connection + client.close() + print("MongoDB connection closed.") + +if __name__ == "__main__": + main() diff --git a/import/migrate/mongodb/requirements.txt b/import/migrate/mongodb/requirements.txt new file mode 100644 index 0000000..7a181ea --- /dev/null +++ b/import/migrate/mongodb/requirements.txt @@ -0,0 +1,2 @@ +gqlalchemy==1.4.1 +python-dotenv==1.0.0