diff --git a/.env.example b/.env.example new file mode 100644 index 000000000..00cf7efde --- /dev/null +++ b/.env.example @@ -0,0 +1,14 @@ +# PostgreSQL +# ------------------------------------------------------------------------------ +POSTGRES_HOST=postgres +POSTGRES_PORT=5432 +POSTGRES_DB=bookstore +POSTGRES_USER=debug +POSTGRES_PASSWORD=debug +DATABASE_URL="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}" +MODEL_NAME="all-MiniLM-L6-v2" +ORDER_EXECUTOR_REPLICAS=3 +REDIS_HOST=redis +REDIS_PORT=6379 + + diff --git a/.gitignore b/.gitignore index ed8ebf583..b2704623e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,7 @@ -__pycache__ \ No newline at end of file +__pycache__ +.env +books.csv +postgres_data/ +ai/ +.vscode/ +logs/ diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..332015392 --- /dev/null +++ b/Makefile @@ -0,0 +1,23 @@ +test_bookstore_post: + curl 'http://localhost:8081/checkout' \ + -H 'Accept: */*' \ + -H 'Accept-Language: en-US,en;q=0.9' \ + -H 'Cache-Control: no-cache' \ + -H 'Connection: keep-alive' \ + -H 'Content-Type: application/json' \ + -H 'DNT: 1' \ + -H 'Origin: http://localhost:8080' \ + -H 'Pragma: no-cache' \ + -H 'Referer: http://localhost:8080/' \ + -H 'Sec-Fetch-Dest: empty' \ + -H 'Sec-Fetch-Mode: cors' \ + -H 'Sec-Fetch-Site: same-site' \ + -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36' \ + -H 'sec-ch-ua: "Not A(Brand";v="8", "Chromium";v="132", "Google Chrome";v="132"' \ + -H 'sec-ch-ua-mobile: ?0' \ + -H 'sec-ch-ua-platform: "macOS"' \ + --data-raw '{"user":{"name":"John Doe","contact":"john.doe@example.ru"},"creditCard":{"number":"6011111111111117","expirationDate":"12/95","cvv":"123"},"userComment":"Please handle with care.","items":[{"name":"Book A","quantity":1},{"name":"Book B","quantity":2}],"billingAddress":{"street":"123 Main St","city":"Springfield","state":"IL","zip":"62701","country":"USA"},"shippingMethod":"Standard","giftWrapping":true,"termsAndConditionsAccepted":true}' +proto: + python -m grpc_tools.protoc -I=utils/pb --python_out=utils/pb --grpc_python_out=utils/pb utils/pb/order_executor/order_executor.proto --proto_path=utils/pb --python_out=utils/pb --grpc_python_out=utils/pb +start: + docker-compose up --build --scale order_executor=$(ORDER_EXECUTOR_REPLICAS) diff --git a/data/books.sample.csv b/data/books.sample.csv new file mode 100644 index 000000000..5970177cc --- /dev/null +++ b/data/books.sample.csv @@ -0,0 +1,2 @@ +Title,Author,Genre,SubGenre,Height,Publisher +Fundamentals of Wavelets,"Goswami, Jaideva",tech,signal_processing,228,Wiley diff --git a/db/Dockerfile b/db/Dockerfile new file mode 100644 index 000000000..4eb83b46a --- /dev/null +++ b/db/Dockerfile @@ -0,0 +1,14 @@ +FROM docker.io/postgres:16 + +RUN apt-get update && apt-get install -y \ + git \ + build-essential \ + postgresql-server-dev-$PG_MAJOR \ + && rm -rf /var/lib/apt/lists/* + +RUN git clone --branch v0.7.0 https://github.com/pgvector/pgvector.git \ + && cd pgvector \ + && make \ + && make install + +COPY ./db/init-extensions.sql /docker-entrypoint-initdb.d/ diff --git a/db/dummy_data/bookstore.sql b/db/dummy_data/bookstore.sql new file mode 100644 index 000000000..2e390646c Binary files /dev/null and b/db/dummy_data/bookstore.sql differ diff --git a/db/init-extensions.sql b/db/init-extensions.sql new file mode 100644 index 000000000..0aa0fc225 --- /dev/null +++ b/db/init-extensions.sql @@ -0,0 +1 @@ +CREATE EXTENSION IF NOT EXISTS vector; diff --git a/docker-compose.yaml b/docker-compose.yaml index b4a60a537..92cb0f935 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -14,6 +14,8 @@ services: # Mount the frontend directory - ./frontend/src:/usr/share/nginx/html orchestrator: + env_file: + - .env build: # Use the current directory as the build context # This allows us to access the files in the current directory inside the Dockerfile @@ -35,7 +37,10 @@ services: - ./utils:/app/utils # Mount the orchestrator/src directory in the current directory to the /app/orchestrator/src directory in the container - ./orchestrator/src:/app/orchestrator/src + - ./orchestrator/logs:/app/orchestrator/logs fraud_detection: + env_file: + - .env build: # Use the current directory as the build context # This allows us to access the files in the current directory inside the Dockerfile @@ -56,4 +61,108 @@ services: # Mount the utils directory in the current directory to the /app/utils directory in the container - ./utils:/app/utils # Mount the fraud_detection/src directory in the current directory to the /app/fraud_detection/src directory in the container - - ./fraud_detection/src:/app/fraud_detection/src \ No newline at end of file + - ./fraud_detection/src:/app/fraud_detection/src + suggestions: + build: + context: ./ + dockerfile: ./suggestions/Dockerfile + env_file: + - .env + ports: + - 50053:50053 + environment: + # Pass the environment variables to the container + # The PYTHONUNBUFFERED environment variable ensures that the output from the application is logged to the console + - PYTHONUNBUFFERED=TRUE + # The PYTHONFILE environment variable specifies the absolute entry point of the application + - PYTHONFILE=/app/suggestions/src/app.py + volumes: + # Mount the utils directory in the current directory to the /app/utils directory in the container + - ./utils:/app/utils + - ./suggestions/src:/app/suggestions/src + - ./ai/models:/app/ai/models/ + + postgres: + build: + context: . + dockerfile: ./db/Dockerfile + image: bookstore_postgres + container_name: bookstore_postgres + volumes: + - ./postgres_data:/var/lib/postgresql/data + - ./db/init-extensions.sql:/docker-entrypoint-initdb.d/init-extensions.sql + - ./db/dummy_data/bookstore.sql:/docker-entrypoint-initdb.d/bookstore.sql + env_file: + - .env + ports: + - "5432:5432" + + transaction_verification: + env_file: + - .env + build: + # Use the current directory as the build context + # This allows us to access the files in the current directory inside the Dockerfile + context: ./ + # Use the Dockerfile in the fraud_detection directorys + dockerfile: ./transaction_verification/Dockerfile + ports: + # Expose port 50052 on the host, and map port 50052 of the container to port 50051 on the host + - 50052:50052 + environment: + # Pass the environment variables to the container + # The PYTHONUNBUFFERED environment variable ensures that the output from the application is logged to the console + - PYTHONUNBUFFERED=TRUE + # The PYTHONFILE environment variable specifies the absolute entry point of the application + # Check app.py in the fraud_detection directory to see how this is used + - PYTHONFILE=/app/transaction_verification/src/app.py + volumes: + # Mount the utils directory in the current directory to the /app/utils directory in the container + - ./utils:/app/utils + # Mount the transection_verification/src directory in the current directory to the /app/transection_verification/src directory in the container + - ./transaction_verification/src:/app/transaction_verification/src + + redis: + image: redis:7 + ports: + - "6379:6379" + volumes: + - redis_data:/data + command: ["redis-server", "--appendonly", "yes"] + + order_executor: + env_file: + - .env + build: + context: . + dockerfile: ./order_executor/Dockerfile + depends_on: + - suggestions + - transaction_verification + - fraud_detection + - redis + environment: + - PYTHONUNBUFFERED=TRUE + - NODE_ID=${NODE_ID} + - PYTHONFILE=/app/order_executor/src/app.py + volumes: + - ./utils:/app/utils + - ./order_executor/src:/app/order_executor/src + command: ["python", "order_executor/src/app.py"] + + order_debug: + build: + context: . + dockerfile: ./order_executor/Dockerfile + volumes: + - ./order_executor/src:/app/order_executor/src + - ./utils:/app/utils + command: ["python", "order_executor/src/debug.py"] + + +volumes: + redis_data: + + + + diff --git a/docs/README.md b/docs/README.md index 75ae1828a..433e3a37b 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,3 +1,51 @@ -# Documentation +## Overview +The Bookstore Application follows a modern microservices-based architecture. The architecture consists of three primary layers: +1. **Client-Facing Layer** +2. **Microservices Layer (gRPC Services)** +3. **Data Layer** -This folder should contain your documentation, explaining the structure and content of your project. It should also contain your diagrams, explaining the architecture. The recommended writing format is Markdown. +## Client-Facing Layer +### REST Checkout API +The **REST Checkout API** serves as the single entry point for client interactions. It handles customer checkout requests and orchestrates calls to the underlying microservices. Key features include: +- Managing checkout requests efficiently. +- Doing validation using schemas and data types. +- Orchestrating multiple microservices to complete the checkout process. + +#### Threading Implementation +The checkout process implements threading to enhance performance by: +- Processing multiple microservice calls concurrently. + +## Microservices Layer (gRPC Services) +This layer consists of independent services that handle specific tasks and communicate via **gRPC** + +### 1. Suggestions Microservice +- Connects to a **PostgreSQL database** with the **pgvector** extension. +- Uses **sentence-transformers** to generate vector embeddings of book descriptions, its title and the author. +- Provides **personalized book recommendations** based on vector similarity. +- Enables **efficient semantic search** capabilities for books. + +### 2. Fraud Detection Service +- Analyzes checkout transactions for potential fraud. +- Returns a **risk assessment score** to the checkout service to prevent fraudulent activities. + +### 3. Validation Service +- Performs **credit card validation** using the **Luhn algorithm** to verify card numbers. +- Checks **expiration dates** to ensure the card is still valid. +- Provides a **fast validation layer** before payment processing to prevent invalid transactions. + +## Data Layer +### PostgreSQL Database +- Serves as the **primary data store** for the application. +- Stores book metadata, user transactions, and other essential records. + +### pgvector Extension +- A **specialized PostgreSQL extension** that enables efficient **vector operations and similarity searches**. + +## Architecture Diagram + +--- + +## System Diagram + + +--- diff --git a/docs/checkpoint-2.md b/docs/checkpoint-2.md new file mode 100644 index 000000000..065c1d9a6 --- /dev/null +++ b/docs/checkpoint-2.md @@ -0,0 +1,147 @@ +# π Order Executor Working + +``` +ββββββββββββββββ gRPC ββββββββββββββββββββββ +β Order Queue ββββββββββββββΆβ Order Executor β +ββββββββββββββββ β (replicated) β + ββββββββ¬βββββββββββββββ + β + β gRPC + ββββββββββββββββββββββββββββββΌβββββββββββββββββββββββββββββ + β β β +βββββββββββββββββ ββββββββββββββββββββββ ββββββββββββββββββββββ +β fraud_detectionβββββββββΆβ transaction_verif. βββββββΆβ suggestions β +βββββββββββββββββ ββββββββββββββββββββββ ββββββββββββββββββββββ +``` + +### Keeping Orchestrator and Order Executor separate: +1. `orchestrator` is for HTTP requests from frontend. +2. `order_executor` is for background tasks, handled internally in the system. + +``` +[Frontend Browser] ---> [Orchestrator (Flask)] ---> [Microservices via gRPC] + +[Background System / Queue] ---> [Order Executor (Worker)] ---> [Microservices via gRPC] +``` + +### Dequeues +``` + [ Order Queue ] β shared (e.g., Redis) + β + βββββββββββΌββββββββββ + β β + [order_executor_1] [order_executor_2] + (Leader) (Follower) + β + Dequeues + Executes +``` + +# π Project Documentation + +This document explains the architecture and inner workings of the order-processing system involving multiple microservices, Redis-based leader election, and vector clocks. + +--- + +## π System Model + +### π¦ Architecture Type +- **Architecture**: Microservice-based architecture +- **Communication**: Services interact via gRPC +- **Coordination**: Leader election and job coordination via Redis + +### π Components +- **Order Executor**: Multiple replicas, one elected as leader to handle orders. +- **Redis**: Centralized coordination for: + - Leader election (via `SETNX` pattern) + - Order queue (`LPUSH`/`LPOP`) +- **Fraud Detection Service**: Validates the legitimacy of an order. +- **Transaction Verification Service**: Verifies payment details. +- **Suggestion Service**: Recommends related books for the order. +- **PostgreSQL**: Stores book and order information. +- **Orchestrator**: Acts as frontend API layer. + +### β» Flow of Events +1. Client places order via frontend. +2. Order is pushed to Redis queue. +3. OrderExecutor leader pops from queue and dispatches it to the relevant services. +4. Each service responds back with data. +5. Logs are maintained per service for audit/debug. + +### π₯ Failure Modes +- **OrderExecutor crash**: Redis automatically allows other replicas to contend for leadership. +- **Redis failure**: Coordination and queue operations halt until Redis is restored. +- **Microservice crash**: gRPC call fails; logged by OrderExecutor and retried/reported. + +--- + +## β» Leader Election Diagram + +### Algorithm Used: Redis-based lock (`SETNX`) + +``` +[Initial State] +No leader β All executors try to acquire lock + +[Frame 1] +Executor-2 acquires lock: + Redis key: leader_lock = Executor-2 + Logs: [Executor-2] Became leader + +[Frame 2] +Executor-1, Executor-3: + Check Redis β Not leader + Logs: [Executor-X] Not leader. Waiting... + +[Frame 3] +Executor-2 renews the lock every 10 seconds +If it crashes, key expires β New election happens +``` + +--- + +## β± Vector Clocks Diagram + +### Scenario: Order is placed and processed through services + +``` +Processes: E (Executor), F (Fraud), T (Transaction), S (Suggestion) + +[Event 1] Order placed by user +E: [1,0,0,0] + +[Event 2] Executor sends to Fraud +E: [2,0,0,0] β F: [2,1,0,0] + +[Event 3] Executor sends to Transaction +E: [3,1,0,0] β T: [3,1,1,0] + +[Event 4] Executor sends to Suggestion +E: [4,1,1,0] β S: [4,1,1,1] + +[Event 5] Executor processes responses +E: [5,1,1,1] +``` + +Each number represents the logical clock of that process. Vector clock updates ensure causality and ordering. + +--- + +## π Notes +- Vector clocks and leader election are simplified representations based on lecture models. +- gRPC ensures reliable structured communication. +- Redis lock + TTL gives fault tolerance during leadership handoff. +- Microservices are loosely coupled and easily scalable. + +--- + +## π Directory Reference +``` +/utils/pb/* β Generated gRPC protobufs +/order_executor/src/app.py β Leader election and dispatcher +/order_executor/src/debug.pyβ Flask-based debug server +/fraud_detection/src/* β Fraud analysis logic +/suggestions/src/* β Book recommendation engine +/transaction_verification/src/* β Payment check logic +/orchestrator/src/* β Main API handler +``` + diff --git a/docs/images/architecture-diagram.png b/docs/images/architecture-diagram.png new file mode 100644 index 000000000..3b77728b8 Binary files /dev/null and b/docs/images/architecture-diagram.png differ diff --git a/docs/images/system-diagram.png b/docs/images/system-diagram.png new file mode 100644 index 000000000..1bae360ff Binary files /dev/null and b/docs/images/system-diagram.png differ diff --git a/docs/system-diagram.mmd b/docs/system-diagram.mmd new file mode 100644 index 000000000..54955f7d6 --- /dev/null +++ b/docs/system-diagram.mmd @@ -0,0 +1,19 @@ +sequenceDiagram + participant User + participant REST API + participant Transaction Verification Service + participant Fraud Detection Service + participant Suggestions Service + participant PostgreSQL + + User->>REST API: Initiates checkout request + REST API-->>+Transaction Verification Service: Validate credit card (Luhn Algorithm) + REST API-->>+Fraud Detection Service: Analyze transaction risk + REST API-->>+Suggestions Service: Fetch book recommendations + + Fraud Detection Service-->>REST API: Risk assessment + Suggestions Service-->>PostgreSQL: Query book metadata (pgvector) + PostgreSQL-->>Suggestions Service: Return book similarity results + Suggestions Service-->>REST API: Recommended books + + REST API->>User: Returns checkout response (success/fail) + recommendations diff --git a/fraud_detection/Dockerfile b/fraud_detection/Dockerfile index 341df7f6f..0d6396f56 100644 --- a/fraud_detection/Dockerfile +++ b/fraud_detection/Dockerfile @@ -1,15 +1,11 @@ -# Use an official Python runtime as the base image -FROM python:3.11 -# Set the working directory in the container -# Both the utils and src folders will be mounted as volumes, please see docker-compose.yaml +FROM python:3.11 WORKDIR /app -# Copy the requirements file to the working directory COPY ./fraud_detection/requirements.txt . -# Install the Python dependencies RUN pip install --no-cache-dir -r requirements.txt -# Set the command to run the application -CMD python utils/other/hotreload.py "fraud_detection/src/app.py" \ No newline at end of file +COPY fraud_detection /app/fraud_detection + +CMD python utils/other/hotreload.py "fraud_detection/src/app.py" diff --git a/fraud_detection/requirements.txt b/fraud_detection/requirements.txt index a80eedef7..a98819dbf 100644 --- a/fraud_detection/requirements.txt +++ b/fraud_detection/requirements.txt @@ -1,4 +1,8 @@ -grpcio==1.60.0 -grpcio-tools==1.60.0 -protobuf==4.25.2 +grpcio==1.70.0 +grpcio-tools==1.70.0 +protobuf==5.29.3 watchdog==6.0.0 +pandas==2.1.3 +joblib==1.3.2 +scikit-learn==1.5.2 +geoip2==5.0.1 diff --git a/fraud_detection/src/GeoLite2-Country.mmdb b/fraud_detection/src/GeoLite2-Country.mmdb new file mode 100644 index 000000000..527ca2eed Binary files /dev/null and b/fraud_detection/src/GeoLite2-Country.mmdb differ diff --git a/fraud_detection/src/app.py b/fraud_detection/src/app.py index b2f1d2fce..00de437e2 100644 --- a/fraud_detection/src/app.py +++ b/fraud_detection/src/app.py @@ -1,45 +1,236 @@ +import logging import sys import os - -# This set of lines are needed to import the gRPC stubs. -# The path of the stubs is relative to the current file, or absolute inside the container. -# Change these lines only if strictly needed. -FILE = __file__ if '__file__' in globals() else os.getenv("PYTHONFILE", "") -fraud_detection_grpc_path = os.path.abspath(os.path.join(FILE, '../../../utils/pb/fraud_detection')) -sys.path.insert(0, fraud_detection_grpc_path) -import fraud_detection_pb2 as fraud_detection -import fraud_detection_pb2_grpc as fraud_detection_grpc - import grpc from concurrent import futures +import pandas as pd +import geoip2.database +import joblib + +logger = logging.getLogger(__name__) + +FRAUD_THRESHOLD = float(os.environ.get("FRAUD_THRESHOLD", "0.7")) + +FILE = __file__ if "__file__" in globals() else os.getenv("PYTHONFILE", "") +grpc_path = os.path.abspath(os.path.join(FILE, "../../../utils/pb/fraud_detection")) +vector_clock_path = os.path.abspath(os.path.join(FILE, "../../../utils/vector_clock")) +sys.path.insert(0, grpc_path) +sys.path.insert(0, vector_clock_path) + +import fraud_detection_pb2 as fd_pb2 +import fraud_detection_pb2_grpc as fd_pb2_grpc + +from vector_clock import OrderEventTracker + +GEOIP_PATH = os.path.join(os.path.dirname(__file__), "GeoLite2-Country.mmdb") +MODEL_PATH = os.path.join(os.path.dirname(__file__), "fraud_model.pkl") +ENCODERS_PATH = os.path.join(os.path.dirname(__file__), "label_encoders.pkl") + +logging.basicConfig(level=logging.INFO) + +model = None +label_encoders = None +geoip_reader = None + + +def load_models(): + global model, label_encoders, geoip_reader + if not os.path.exists(MODEL_PATH): + print(f"ERROR: fraud_model.pkl is missing at {MODEL_PATH}") + exit(1) + if not os.path.exists(ENCODERS_PATH): + print(f"ERROR: label_encoders.pkl is missing at {ENCODERS_PATH}") + exit(1) + + print(f"Loading model from: {MODEL_PATH}") + model = joblib.load(MODEL_PATH) + print(f"Loading encoders from: {ENCODERS_PATH}") + label_encoders = joblib.load(ENCODERS_PATH) + print("Model and encoders loaded successfully") + + try: + geoip_reader = geoip2.database.Reader(GEOIP_PATH) + print("GeoIP database loaded successfully") + except Exception as e: + print(f"Could not load GeoIP database: {e}. IP verification will be disabled.") + geoip_reader = None + + +load_models() + + +def get_country_from_ip(ip_address, reader): + try: + response = reader.country(ip_address) + return response.country.name + except: + return "Unknown" + + +def predict_fraud(order_data): + """ + Use the pre-loaded model and label_encoders to predict fraud for the stored transaction. + order_data: dict with keys: billing_city, billing_country, amount, payment_method, ip_address + """ + ip_address = order_data["ip_address"] + ip_country = get_country_from_ip(ip_address, geoip_reader) + + # Prepare data for the model + import pandas as pd -# Create a class to define the server functions, derived from -# fraud_detection_pb2_grpc.HelloServiceServicer -class HelloService(fraud_detection_grpc.HelloServiceServicer): - # Create an RPC function to say hello - def SayHello(self, request, context): - # Create a HelloResponse object - response = fraud_detection.HelloResponse() - # Set the greeting field of the response object - response.greeting = "Hello, " + request.name - # Print the greeting message - print(response.greeting) - # Return the response object + input_data = pd.DataFrame( + { + "billing_city": [order_data["billing_city"]], + "billing_country": [order_data["billing_country"]], + "amount": [order_data["amount"]], + "payment_method": [order_data["payment_method"]], + } + ) + + # Transform categorical variables + for col, le in label_encoders.items(): + if col in input_data.columns: + try: + input_data[col] = le.transform(input_data[col]) + except ValueError: + input_data[col] = -1 # fallback for unseen categories + + # Predict + fraud_probability = model.predict_proba(input_data)[0][1] + + # ip_country mismatch + ip_country_mismatch = ( + ip_country != order_data["billing_country"] and ip_country != "Unknown" + ) + if ip_country_mismatch: + fraud_probability = max(fraud_probability, 0.5) + + # also check for high-risk countries or payment + is_high_risk_country = order_data["billing_country"] in [ + "Russia", + "North Korea", + "Syria", + ] + is_high_risk_payment = order_data["payment_method"] == "Crypto" + + details = { + "ip_country": ip_country, + "ip_country_mismatch": ip_country_mismatch, + "high_risk_country": is_high_risk_country, + "high_risk_payment": is_high_risk_payment, + "model_score": float(fraud_probability), + "final_score": float(fraud_probability), + } + return fraud_probability, details + + +class FraudDetectionService(fd_pb2_grpc.FraudDetectionServiceServicer): + # Keep a dictionary for orders, storing data from InitializeOrder + def __init__(self): + self.orders = {} + self.service_name = "fraud_detection" + + self.order_event_tracker = OrderEventTracker() + + def InitializeOrder(self, request, context): + """ + Cache the order data, do not run final logic yet. + """ + order_id = request.order_id + self.orders[order_id] = { + "amount": request.amount, + "ip_address": request.ip_address, + "email": request.email, + "billing_country": request.billing_country, + "billing_city": request.billing_city, + "payment_method": request.payment_method, + } + print( + f"[FraudDetection] Initialized order {order_id} with data: {self.orders[order_id]}" + ) + + received_clock = dict(request.vectorClock) + if not self.order_event_tracker.order_exists(order_id): + self.order_event_tracker.initialize_order(order_id) + logger.info( + f"[TransactionVerification]Initialized order {order_id} with vector clock" + ) + + updated_clock = self.order_event_tracker.record_event( + order_id=order_id, + service=self.service_name, + event_name=self.service_name + ".InitializeOrder", + received_clock=received_clock, + ) + + return fd_pb2.FraudInitResponse(success=True, vectorClock=updated_clock) + + def CheckFraud(self, request, context): + """ + The final check: retrieve stored data if it exists, run the logic, return a decision. + If no stored data found, fallback to request directly (or reject). + """ + order_id = request.order_id + stored = self.orders.get(order_id) + + # If missing, either fallback or reject + if not stored: + # fallback or partial logic using request + print( + f"[FraudDetection] order_id {order_id} not found in self.orders. Fallback to request data." + ) + # If we want to just do immediate logic: + stored = { + "amount": request.amount, + "ip_address": request.ip_address, + "email": request.email, + "billing_country": request.billing_country, + "billing_city": request.billing_city, + "payment_method": request.payment_method, + } + + fraud_probability, details = predict_fraud(stored) + + action = "APPROVE" + reasons = [] + if fraud_probability > FRAUD_THRESHOLD: + action = "REJECT" + if details["model_score"] > 0.5: + reasons.append("Transaction pattern matches known fraudulent behavior") + if details["high_risk_country"]: + reasons.append("Transaction originates from a high-risk country") + if details["high_risk_payment"]: + reasons.append("High-risk payment method") + if details["ip_country_mismatch"]: + reasons.append( + f"IP location ({details['ip_country']}) doesn't match billing country ({stored['billing_country']})" + ) + + print( + f"[FraudDetection] Response: action={action}, reasons={reasons}, details={details}" + ) + + response = fd_pb2.FraudResponse( + fraud_probability=float(fraud_probability), + action=action, + # Convert 'details' dict to google.protobuf.Struct automatically + details=details, + reasons=reasons, + ) return response + def serve(): - # Create a gRPC server server = grpc.server(futures.ThreadPoolExecutor()) - # Add HelloService - fraud_detection_grpc.add_HelloServiceServicer_to_server(HelloService(), server) - # Listen on port 50051 + fd_pb2_grpc.add_FraudDetectionServiceServicer_to_server( + FraudDetectionService(), server + ) port = "50051" - server.add_insecure_port("[::]:" + port) - # Start the server + server.add_insecure_port(f"[::]:" + port) server.start() - print("Server started. Listening on port 50051.") - # Keep thread alive + logging.info(f"Fraud Detection service listening on port {port}") server.wait_for_termination() -if __name__ == '__main__': - serve() \ No newline at end of file + +if __name__ == "__main__": + serve() diff --git a/fraud_detection/src/fraud_model.pkl b/fraud_detection/src/fraud_model.pkl new file mode 100644 index 000000000..f57fc6c14 Binary files /dev/null and b/fraud_detection/src/fraud_model.pkl differ diff --git a/fraud_detection/src/grpc_test_client.py b/fraud_detection/src/grpc_test_client.py new file mode 100644 index 000000000..0ab88d99e --- /dev/null +++ b/fraud_detection/src/grpc_test_client.py @@ -0,0 +1,29 @@ +import grpc +import os +import sys + +FILE = __file__ if "__file__" in globals() else os.getenv("PYTHONFILE", "") +grpc_path = os.path.abspath(os.path.join(FILE, "../../../utils/pb/fraud_detection")) +sys.path.insert(0, grpc_path) + + +import fraud_detection_pb2 as fd_pb2 +import fraud_detection_pb2_grpc as fd_pb2_grpc + +def run(): + channel = grpc.insecure_channel("localhost:50051") # Connect to gRPC server + stub = fd_pb2_grpc.FraudDetectionStub(channel) + + request = fd_pb2.FraudRequest( + order_id="12345", + user_id="user_789", + amount=10000, # Large amount to trigger fraud + payment_method="Crypto", + location="North Korea" + ) + + response = stub.CheckFraud(request) + print(f"Fraudulent: {response.is_fraudulent}, Reason: {response.reason}") + +if __name__ == "__main__": + run() diff --git a/fraud_detection/src/label_encoders.pkl b/fraud_detection/src/label_encoders.pkl new file mode 100644 index 000000000..9e2b9fe0d Binary files /dev/null and b/fraud_detection/src/label_encoders.pkl differ diff --git a/frontend/src/index.html b/frontend/src/index.html index 15c47351f..3309222d8 100644 --- a/frontend/src/index.html +++ b/frontend/src/index.html @@ -13,6 +13,10 @@