From 227a86fb7e903b0d2dbbc8f5fda796148f288a79 Mon Sep 17 00:00:00 2001
From: Luis Tomas Bolivar <ltomasbo@redhat.com>
Date: Fri, 27 Mar 2026 10:17:52 +0100
Subject: [PATCH 1/3] feat: split MCP server from sidecar to separate Cloud Run
 service

Extract the MCP server from a sidecar container inside the agent service
into its own Cloud Run service with ingress: internal. This enables
independent scaling and lifecycle management while keeping the MCP
server accessible only to the agent's service account via HTTPS.

Security:
- ingress: internal blocks all external traffic at the network level
- Only the agent's service account has roles/run.invoker on the MCP service
- Agent sends Google ID token via X-Serverless-Authorization header so
  Cloud Run IAM validates the caller without consuming the Authorization
  header, which carries the Red Hat SSO JWT for console.redhat.com

Changes:
- Create mcp-service.yaml with ingress: internal, maxScale: 4
- Remove MCP sidecar container block from service.yaml
- Add deploy_mcp() function and --service mcp option to deploy.sh
- Auto-discover MCP URL post-deployment and set MCP_SERVER_URL on agent
- Add MCP service deletion to cleanup.sh
- Update all documentation (architecture, mcp-integration, configuration,
  authentication, README, CLAUDE.md) to reflect separate service architecture
  and account for GMA SSO API, scope allowlists, SESSION_BACKEND, and
  other changes that landed in main

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .env.example                            |   8 +-
 CLAUDE.md                               |  12 ++-
 README.md                               |  50 +++++----
 deploy/cloudrun/README.md               | 126 +++++++++++-----------
 deploy/cloudrun/cleanup.sh              |  16 ++-
 deploy/cloudrun/deploy.sh               | 132 +++++++++++++++++++-----
 deploy/cloudrun/mcp-service.yaml        |  70 +++++++++++++
 deploy/cloudrun/service.yaml            |  44 ++------
 deploy/cloudrun/setup.sh                |   2 +-
 docs/architecture.md                    |  59 ++++++-----
 docs/authentication.md                  |  14 +--
 docs/configuration.md                   |  10 +-
 docs/mcp-integration.md                 |  89 +++++++---------
 src/lightspeed_agent/config/settings.py |   2 +-
 14 files changed, 395 insertions(+), 239 deletions(-)
 create mode 100644 deploy/cloudrun/mcp-service.yaml

diff --git a/.env.example b/.env.example
index a3c19992..55cbcc62 100644
--- a/.env.example
+++ b/.env.example
@@ -45,9 +45,9 @@ GMA_CLIENT_SECRET=your_gma_client_secret
 # Red Hat Lightspeed MCP Server Configuration
 # -----------------------------------------------------------------------------
 # The MCP server provides tools to access Red Hat Insights APIs.
-# It runs as a sidecar container. The agent forwards the caller's JWT token
-# to the MCP server, which uses it to authenticate with console.redhat.com
-# on behalf of the calling user.
+# It runs as a separate service (ingress: internal) in Cloud Run.
+# The agent forwards the caller's JWT token to the MCP server, which uses
+# it to authenticate with console.redhat.com on behalf of the calling user.
 #
 # The MCP server can access:
 #   - Advisor (recommendations)
@@ -65,7 +65,7 @@ GMA_CLIENT_SECRET=your_gma_client_secret
 MCP_TRANSPORT_MODE=http
 
 # MCP server URL (for http/sse modes)
-# In Podman deployments, the MCP server runs as a sidecar on localhost:8081
+# In Podman deployments, the MCP server runs on localhost:8081
 # (port 8081 avoids conflict with A2A Inspector which uses 8080)
 MCP_SERVER_URL=http://localhost:8081
 
diff --git a/CLAUDE.md b/CLAUDE.md
index 1c1a0afe..6bb1456f 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 ## Project Overview
 
-Red Hat Lightspeed Agent for Google Cloud — an A2A-ready (Agent-to-Agent protocol) AI agent providing access to Red Hat Insights, built on Google Agent Development Kit (ADK), Gemini 2.5 Flash, and a Red Hat Lightspeed MCP server sidecar. Integrates with Google Cloud Marketplace for provisioning, billing, and metering.
+Red Hat Lightspeed Agent for Google Cloud — an A2A-ready (Agent-to-Agent protocol) AI agent providing access to Red Hat Insights, built on Google Agent Development Kit (ADK), Gemini 2.5 Flash, and a Red Hat Lightspeed MCP server. Integrates with Google Cloud Marketplace for provisioning, billing, and metering.
 
 ## Common Commands
 
@@ -78,14 +78,16 @@ CI blocks merge on lint/test failures — catching issues locally saves round-tr
 
 ## Architecture
 
-### Two-Service Design
+### Three-Service Design
 
-The system runs as two separate FastAPI services with separate concerns:
+The system runs as three separate services:
 
-1. **Lightspeed Agent** (port 8000, `src/lightspeed_agent/main.py`) — The AI agent service. Scales to zero on Cloud Run. Handles A2A protocol requests (JSON-RPC 2.0 at `/`), serves the AgentCard at `/.well-known/agent.json`. Uses ADK `LlmAgent` with MCP tools loaded from the sidecar.
+1. **Lightspeed Agent** (port 8000, `src/lightspeed_agent/main.py`) — The AI agent service. Scales to zero on Cloud Run. Handles A2A protocol requests (JSON-RPC 2.0 at `/`), serves the AgentCard at `/.well-known/agent.json`. Uses ADK `LlmAgent` with MCP tools loaded from the MCP server.
 
 2. **Marketplace Handler** (port 8001, `src/lightspeed_agent/marketplace/app.py`) — Always-on service for Google Cloud Marketplace Pub/Sub provisioning events and Dynamic Client Registration (DCR). Has a single hybrid `/dcr` endpoint that routes Pub/Sub messages vs DCR requests based on request content.
 
+3. **MCP Server** — Red Hat Lightspeed MCP server providing Insights API tools. Runs as a separate Cloud Run service with `ingress: internal` (only reachable by the agent's service account).
+
 ### Database Isolation
 
 Two separate PostgreSQL databases (security boundary):
@@ -105,7 +107,7 @@ Setting `SKIP_JWT_VALIDATION=true` bypasses auth (dev only, blocked when running
 
 ### MCP Integration
 
-The agent loads tools from a Red Hat Lightspeed MCP server running as a sidecar:
+The agent loads tools from a Red Hat Lightspeed MCP server (separate Cloud Run service):
 - Transport modes: `stdio` (dev), `http` (prod), `sse` (streaming) — configured via `MCP_TRANSPORT_MODE`
 - Read-only mode (`MCP_READ_ONLY=true`) filters to a safe subset of tools
 - Tool categories: Advisor, Inventory, Vulnerability, Remediations, Planning, Image Builder, Subscription Management, Content Sources
diff --git a/README.md b/README.md
index b5325ba4..409d1547 100644
--- a/README.md
+++ b/README.md
@@ -46,24 +46,34 @@ The system consists of **two separate services**:
                                  │
                                  │ Shared PostgreSQL
                                  ▼
-┌────────────────────────────────────────────────────────────────────────┐
-│                    Lightspeed Agent (Port 8000)                        │
-│                     ──────────────────────────                         │
-│  ┌─────────────────────┐      ┌─────────────────────────────┐          │
-│  │  Lightspeed Agent   │ HTTP │   Red Hat Lightspeed MCP    │          │
-│  │   (Gemini + ADK)    │◄────►│   Server (Sidecar)          │          │
-│  │                     │      │                             │          │
-│  │   - A2A protocol    │      │   - Advisor, Inventory      │          │
-│  │   - OAuth 2.0       │      │   - Vulnerability, Patch    │          │
-│  │   - Session mgmt    │      │   - Remediations            │          │
-│  └─────────────────────┘      └──────────────┬──────────────┘          │
-└──────────────────────────────────────────────┼─────────────────────────┘
-                                               │
-                                               ▼
-                                       ┌───────────────────┐
-                                       │ console.redhat.com│
-                                       │ (Insights APIs)   │
-                                       └───────────────────┘
+┌─────────────────────────────────────┐
+│    Lightspeed Agent (Port 8000)    │
+│     ──────────────────────────     │
+│  ┌─────────────────────┐           │
+│  │  Lightspeed Agent   │           │
+│  │   (Gemini + ADK)    │           │
+│  │                     │           │
+│  │   - A2A protocol    │           │
+│  │   - OAuth 2.0       │           │
+│  │   - Session mgmt    │           │
+│  └──────────┬──────────┘           │
+└─────────────┼──────────────────────┘
+              │ HTTPS
+              ▼
+┌─────────────────────────────────────┐
+│  Red Hat Lightspeed MCP Server     │
+│  (Cloud Run - ingress: internal)   │
+│                                     │
+│   - Advisor, Inventory              │
+│   - Vulnerability, Patch            │
+│   - Remediations                    │
+└──────────────┬──────────────────────┘
+               │
+               ▼
+       ┌───────────────────┐
+       │ console.redhat.com│
+       │ (Insights APIs)   │
+       └───────────────────┘
 ```
 
 ### Service Responsibilities
@@ -696,10 +706,10 @@ This separation ensures:
 
 ### How the MCP Server Works
 
-The MCP server runs as a sidecar container and provides tools for the agent to interact with Red Hat Insights APIs:
+The MCP server runs as a separate Cloud Run service and provides tools for the agent to interact with Red Hat Insights APIs:
 
 1. **Agent receives a request** (e.g., "Show me my system vulnerabilities")
-2. **Agent calls MCP tools** via HTTP to the MCP server (localhost:8081), forwarding the caller's JWT token in the Authorization header
+2. **Agent calls MCP tools** via HTTPS to the MCP server, forwarding the caller's JWT token in the Authorization header
 3. **MCP server authenticates** with console.redhat.com using the forwarded JWT token
 4. **MCP server calls Insights APIs** and returns results to the agent
 5. **Agent formats the response** and returns it to the user
diff --git a/deploy/cloudrun/README.md b/deploy/cloudrun/README.md
index 4d1ce206..4efd05cb 100644
--- a/deploy/cloudrun/README.md
+++ b/deploy/cloudrun/README.md
@@ -4,7 +4,7 @@ Deploy the Red Hat Lightspeed Agent for Google Cloud to Google Cloud Run for pro
 
 ## Architecture
 
-The deployment consists of **two separate Cloud Run services** plus **Cloud Memorystore for Redis** (for rate limiting):
+The deployment consists of **three separate Cloud Run services** plus **Cloud Memorystore for Redis** (for rate limiting):
 
 ```
                               Google Cloud Marketplace
@@ -28,25 +28,32 @@ The deployment consists of **two separate Cloud Run services** plus **Cloud Memo
            │                                                 │
            │ Shared PostgreSQL Database                      │ DCR (create OAuth clients)
            ▼                                                 ▼
-┌──────────────────────────────────────────────┐    ┌──────────────────────┐
-│   Lightspeed Agent Service (Port 8000)       │    │  Red Hat SSO         │
-│   ─────────────────────────────────────      │    │  (GMA SSO API)       │
-│  ┌──────────────────┐   ┌──────────────────┐ │    │                      │
-│  │ Lightspeed Agent │   │ Lightspeed MCP   │ │    │  Production:         │
-│  │                  │   │ Server (8081)    │ │    │   sso.redhat.com     │
-│  │  - Gemini 2.5    │   │                  │ │    │                      │
-│  │  - A2A protocol  │◄-►│ - Advisor tools  │ │    │                      │
-│  │  - OAuth 2.0     │   │ - Inventory tools│ │    │                      │
-│  │                  │   │ - Vuln. tools    │ │    │                      │
-│  └──────────────────┘   └────────┬─────────┘ │    └──────────────────────┘
-│                                  │           │
-└──────────────────────────────────┼───────────┘
-                                   │
-                                   ▼
-                          ┌──────────────────┐
-                          │console.redhat.com│
-                          │ (Insights APIs)  │
-                          └──────────────────┘
+┌──────────────────────────────┐                    ┌──────────────────────┐
+│   Lightspeed Agent Service   │                    │  Red Hat SSO         │
+│   (Port 8000)                │                    │  (GMA SSO API)       │
+│  ┌──────────────────┐        │                    │                      │
+│  │ Lightspeed Agent │        │     HTTPS          │  Production:         │
+│  │  - Gemini 2.5    │────────┼──────────────┐     │   sso.redhat.com     │
+│  │  - A2A protocol  │        │              │     │                      │
+│  │  - OAuth 2.0     │        │              │     │                      │
+│  └──────────────────┘        │              │     │                      │
+└──────────────────────────────┘              │     │                      │
+                                              │     └──────────────────────┘
+                                              ▼
+                               ┌──────────────────────────┐
+                               │ MCP Server Service       │
+                               │ (ingress: internal)      │
+                               │                          │
+                               │ - Advisor tools          │
+                               │ - Inventory tools        │
+                               │ - Vulnerability tools    │
+                               └────────────┬─────────────┘
+                                            │
+                                            ▼
+                                   ┌──────────────────┐
+                                   │console.redhat.com│
+                                   │ (Insights APIs)  │
+                                   └──────────────────┘
 ```
 
 ### Service Responsibilities
@@ -54,15 +61,19 @@ The deployment consists of **two separate Cloud Run services** plus **Cloud Memo
 | Service | Port | Purpose | Scaling |
 |---------|------|---------|---------|
 | **Marketplace Handler** | 8001 | Pub/Sub events, DCR | Always on (minScale=1) |
+| **MCP Server** | 8080 | Red Hat Insights API tools | Scale to zero (stateless) |
 | **Lightspeed Agent** | 8000 | A2A queries, user interactions | Scale to zero |
 
 ### Deployment Order
 
 1. **Set up Cloud Memorystore Redis and VPC connector** - Required for agent rate limiting (see [Redis Setup](#redis-setup-for-rate-limiting))
 2. **Deploy Marketplace Handler first** - Must be running to receive provisioning events
-3. **Deploy Agent after provisioning** - Can be deployed when customers are ready to use the agent
+3. **Deploy MCP Server** - Must be running before the agent can call Insights APIs
+4. **Deploy Agent after provisioning** - Can be deployed when customers are ready to use the agent
 
-The MCP server runs as a sidecar in the Agent service. The agent forwards the caller's JWT token to the MCP server, which uses it to authenticate with console.redhat.com on behalf of the user (see [MCP Authentication](#mcp-authentication)).
+The MCP server runs as a separate Cloud Run service with `ingress: internal`, reachable only by the agent's service account. The agent connects to it over HTTPS. The agent forwards the caller's JWT token to the MCP server, which uses it to authenticate with console.redhat.com on behalf of the user (see [MCP Authentication](#mcp-authentication)).
+
+> **Note:** The `MCP_SERVER_URL` in `service.yaml` is a placeholder. `deploy.sh` auto-discovers the MCP service URL after deployment and sets it on the agent.
 
 ## Service Accounts
 
@@ -346,25 +357,21 @@ deployed first** so its URL is known when the agent is configured.
 ./deploy/cloudrun/deploy.sh --service handler --allow-unauthenticated
 ```
 
-**Step 2: Get the handler URL and set `MARKETPLACE_HANDLER_URL`**
+**Step 2: Deploy the MCP server**
 
 ```bash
-# Get the marketplace handler URL
-HANDLER_URL=$(gcloud run services describe ${HANDLER_SERVICE_NAME:-marketplace-handler} \
-  --region=$GOOGLE_CLOUD_LOCATION \
-  --project=$GOOGLE_CLOUD_PROJECT \
-  --format='value(status.url)')
-echo "Handler URL: $HANDLER_URL"
-
-# Export it so deploy.sh can set it on the agent service
-export MARKETPLACE_HANDLER_URL="$HANDLER_URL"
+./deploy/cloudrun/deploy.sh --service mcp
 ```
 
+The MCP service is deployed with `ingress: internal` — it is only reachable by
+the agent's service account within the same GCP project. `deploy.sh` grants
+`roles/run.invoker` on the MCP service to the agent's service account only.
+
 **Step 3: Deploy the agent**
 
-The deploy script automatically sets `AGENT_PROVIDER_URL` (agent base URL)
-and `MARKETPLACE_HANDLER_URL` on the agent service using the actual
-Cloud Run URLs after deployment. `AGENT_PROVIDER_ORGANIZATION_URL`
+The deploy script automatically sets `AGENT_PROVIDER_URL` (agent base URL),
+`MARKETPLACE_HANDLER_URL`, and `MCP_SERVER_URL` on the agent service using the
+actual Cloud Run URLs after deployment. `AGENT_PROVIDER_ORGANIZATION_URL`
 (the provider's website, used as the JWT audience for DCR) is set in the
 YAML configs and does not change per deployment.
 
@@ -396,7 +403,7 @@ curl -s $AGENT_URL/.well-known/agent.json | jq '.capabilities.extensions'
 
 | Flag | Description |
 |------|-------------|
-| `--service <service>` | Which service to deploy: `all` (default), `handler`, `agent` |
+| `--service <service>` | Which service to deploy: `all` (default), `handler`, `mcp`, `agent` |
 | `--image <image>` | Container image for the agent (default: `gcr.io/$PROJECT_ID/lightspeed-agent:latest`) |
 | `--handler-image <image>` | Container image for the marketplace handler (default: `gcr.io/$PROJECT_ID/marketplace-handler:latest`) |
 | `--mcp-image <image>` | Container image for the MCP server (default: `gcr.io/$PROJECT_ID/red-hat-lightspeed-mcp:latest`) |
@@ -408,8 +415,9 @@ curl -s $AGENT_URL/.well-known/agent.json | jq '.capabilities.extensions'
 | Service | YAML Config | Description |
 |---------|-------------|-------------|
 | `handler` | `marketplace-handler.yaml` | Pub/Sub events, DCR requests |
-| `agent` | `service.yaml` | A2A queries with MCP sidecar |
-| `all` | Both | Deploy both services |
+| `mcp` | `mcp-service.yaml` | Red Hat Insights MCP tools (internal) |
+| `agent` | `service.yaml` | A2A queries, user interactions |
+| `all` | All three | Deploy all services |
 
 The deploy script performs variable substitution on the YAML configs
 (`${PROJECT_ID}`, `${REGION}`, image references, etc.) and deploys using
@@ -424,7 +432,7 @@ sed -e "s|\${PROJECT_ID}|$GOOGLE_CLOUD_PROJECT|g" \
     -e "s|\${VPC_CONNECTOR_NAME}|${VPC_CONNECTOR_NAME:-lightspeed-redis-conn}|g" \
     -e "s|\${SERVICE_NAME}|${SERVICE_NAME:-lightspeed-agent}|g" \
     -e "s|\${SERVICE_ACCOUNT_NAME}|${SERVICE_ACCOUNT_NAME:-lightspeed-agent}|g" \
-    -e "s|\${MCP_IMAGE}|${MCP_IMAGE:-gcr.io/$GOOGLE_CLOUD_PROJECT/insights-mcp:latest}|g" \
+    -e "s|\${MCP_SERVICE_NAME}|${MCP_SERVICE_NAME:-rh-lightspeed-mcp}|g" \
     deploy/cloudrun/service.yaml | \
     gcloud run services replace - --region=$GOOGLE_CLOUD_LOCATION --project=$GOOGLE_CLOUD_PROJECT
 ```
@@ -453,14 +461,18 @@ The agent uses Cloud Memorystore for Redis for distributed rate limiting. Requir
 
 The service uses a VPC connector to reach the Redis instance. Set `VPC_CONNECTOR_NAME` (default: `lightspeed-redis-conn`) when deploying. See [Rate Limiting — Testing](../../docs/rate-limiting.md#testing-rate-limiting) for how to validate rate limiting.
 
-### MCP Server Sidecar
+### MCP Server Service
 
 | Setting | Value | Description |
 |---------|-------|-------------|
 | CPU | 1 | vCPUs allocated |
 | Memory | 512Mi | Memory limit |
-| Port | 8080 | Internal MCP port |
+| Port | 8080 | MCP server port |
 | Image | `gcr.io/$PROJECT_ID/red-hat-lightspeed-mcp:latest` | MCP server image (copied from Quay.io) |
+| Ingress | `internal` | Only reachable from same GCP project |
+| IAM | Agent SA only | Only the agent's service account has `roles/run.invoker` |
+| Min Instances | 0 | Scale to zero when idle |
+| Max Instances | 4 | Should be >= agent maxScale (each agent makes concurrent MCP calls) |
 
 ### Copying the MCP Image to GCR
 
@@ -491,7 +503,7 @@ This step is required before running `deploy.sh`. The deploy script defaults to
 
 ### Customizing MCP Server Configuration
 
-The MCP server configuration is hardcoded in `deploy/cloudrun/service.yaml` because Cloud Run does not support environment variable expansion in the `args` field (unlike Kubernetes/Podman).
+The MCP server configuration is hardcoded in `deploy/cloudrun/mcp-service.yaml` because Cloud Run does not support environment variable expansion in the `args` field (unlike Kubernetes/Podman).
 
 **Current MCP server settings:**
 ```yaml
@@ -506,10 +518,10 @@ args:
 
 **To change MCP server settings:**
 
-1. Edit `deploy/cloudrun/service.yaml` directly:
+1. Edit `deploy/cloudrun/mcp-service.yaml` directly:
    ```bash
-   vim deploy/cloudrun/service.yaml
-   # Find the "insights-mcp" container section
+   vim deploy/cloudrun/mcp-service.yaml
+   # Find the "rh-lightspeed-mcp" container section
    # Modify the args array as needed
    ```
 
@@ -520,10 +532,10 @@ args:
 
 3. Redeploy after making changes:
    ```bash
-   ./deploy/cloudrun/deploy.sh --service agent
+   ./deploy/cloudrun/deploy.sh --service mcp
    ```
 
-**Note**: If you change the MCP server port, you must also update the `MCP_SERVER_URL` environment variable in the agent container to match.
+**Note**: If you change the MCP server port, you must also update the `MCP_SERVER_URL` environment variable on the agent service to match.
 
 ### Alternative: Use Docker Hub
 
@@ -573,13 +585,13 @@ docker push docker.io/YOUR_USERNAME/red-hat-lightspeed-mcp:latest
 
 ## How the MCP Server Works
 
-The MCP server runs as a sidecar container alongside the agent:
+The MCP server runs as a separate Cloud Run service (`rh-lightspeed-mcp`) with `ingress: internal`:
 
-1. **Agent Container** (port 8000): Handles A2A requests, uses Gemini for AI
-2. **MCP Server Container** (port 8080): Provides tools for Red Hat Insights APIs
+1. **Agent Service** (port 8000): Handles A2A requests, uses Gemini for AI
+2. **MCP Server Service** (port 8080): Provides tools for Red Hat Insights APIs
 
 When the agent needs to access Insights data (e.g., system vulnerabilities, recommendations):
-1. Agent calls MCP tools via HTTP to `localhost:8080`
+1. Agent calls MCP tools via HTTPS to the MCP service URL
 2. Agent forwards credentials to the MCP server via HTTP headers (see below)
 3. MCP server authenticates with console.redhat.com
 4. MCP server calls the appropriate Insights API
@@ -623,7 +635,7 @@ Bearer token that is active and carries the `api.console` and `api.ocm` scopes.
 ```
 ┌──────────┐    ┌───────────────┐    ┌──────────────┐    ┌──────────────┐    ┌──────────────────┐
 │  Client  │    │Lightspeed Agt │    │ Red Hat SSO  │    │  MCP Server  │    │console.redhat.com│
-│(Gemini)  │    │  (port 8000)  │    │ (Red Hat SSO)│    │  (port 8080) │    │ (Insights APIs)  │
+│(Gemini)  │    │  (port 8000)  │    │ (Red Hat SSO)│    │  (internal)  │    │ (Insights APIs)  │
 └────┬─────┘    └──────┬────────┘    └──────┬───────┘    └──────┬───────┘    └────────┬─────────┘
      │                 │                    │                   │                     │
      │  ── Obtain Token (directly from SSO) ──                 │                     │
@@ -814,12 +826,10 @@ curl -H "Authorization: Bearer $(gcloud auth print-identity-token)" \
 
 The local proxy handles Google Cloud Run authentication, allowing you to test with just your Red Hat SSO token.
 
-**Important:** The MCP sidecar inside Cloud Run uses port 8080. To avoid port conflicts, run the proxy on a different port (e.g., 8099).
-
 **1. Start the local proxy:**
 
 ```bash
-# Start proxy on localhost:8099 (NOT 8080 - that's used by MCP sidecar)
+# Start proxy on localhost:8099
 gcloud run services proxy lightspeed-agent \
   --region=$GOOGLE_CLOUD_LOCATION \
   --project=$GOOGLE_CLOUD_PROJECT \
@@ -1013,10 +1023,6 @@ fuser -k 8099/tcp
 
 **Note:** The proxy doesn't create any cloud resources - it only runs locally on your machine. Stopping the proxy (Ctrl+C) is sufficient to clean up.
 
-**Why port 8099 instead of 8080?**
-
-The MCP sidecar inside Cloud Run uses port 8080 internally. If you run the proxy on port 8080, the agent will try to connect to the proxy instead of the MCP sidecar, causing "Failed to create MCP session" errors. Using port 8099 (or any other port except 8080) avoids this conflict.
-
 ### Testing Without Proxy (Direct Cloud Run Access)
 
 If you prefer to test without the proxy, you'll need to:
@@ -1442,7 +1448,7 @@ To remove all resources created by the setup and deploy scripts:
 ```
 
 This will delete:
-- Cloud Run services (lightspeed-agent, marketplace-handler)
+- Cloud Run services (lightspeed-agent, marketplace-handler, rh-lightspeed-mcp)
 - Pub/Sub topic and subscription
 - Secret Manager secrets
 - Service accounts (runtime + Pub/Sub invoker) and IAM bindings
diff --git a/deploy/cloudrun/cleanup.sh b/deploy/cloudrun/cleanup.sh
index 58e08628..64e67590 100755
--- a/deploy/cloudrun/cleanup.sh
+++ b/deploy/cloudrun/cleanup.sh
@@ -42,6 +42,7 @@ REGION="${GOOGLE_CLOUD_LOCATION:-us-central1}"
 SERVICE_NAME="${SERVICE_NAME:-lightspeed-agent}"
 SERVICE_ACCOUNT_NAME="${SERVICE_ACCOUNT_NAME:-${SERVICE_NAME}}"
 HANDLER_SERVICE_NAME="${HANDLER_SERVICE_NAME:-marketplace-handler}"
+MCP_SERVICE_NAME="${MCP_SERVICE_NAME:-rh-lightspeed-mcp}"
 DB_INSTANCE_NAME="${DB_INSTANCE_NAME:-lightspeed-agent-db}"
 SERVICE_ACCOUNT="${SERVICE_ACCOUNT_NAME}@${PROJECT_ID}.iam.gserviceaccount.com"
 
@@ -79,7 +80,7 @@ fi
 
 log_warn "This will delete the following resources from project: $PROJECT_ID"
 echo ""
-echo "  - Cloud Run services: $SERVICE_NAME, $HANDLER_SERVICE_NAME"
+echo "  - Cloud Run services: $SERVICE_NAME, $HANDLER_SERVICE_NAME, $MCP_SERVICE_NAME"
 echo "  - Pub/Sub topic: $PUBSUB_TOPIC"
 echo "  - Pub/Sub subscription: $PUBSUB_SUBSCRIPTION"
 echo "  - Secrets: redhat-sso-client-id, redhat-sso-client-secret, database-url,"
@@ -129,6 +130,17 @@ else
     log_info "Cloud Run service '$HANDLER_SERVICE_NAME' does not exist, skipping"
 fi
 
+# Delete MCP server service
+if gcloud run services describe "$MCP_SERVICE_NAME" --region="$REGION" --project="$PROJECT_ID" &>/dev/null; then
+    gcloud run services delete "$MCP_SERVICE_NAME" \
+        --region="$REGION" \
+        --project="$PROJECT_ID" \
+        --quiet
+    log_info "Cloud Run service '$MCP_SERVICE_NAME' deleted"
+else
+    log_info "Cloud Run service '$MCP_SERVICE_NAME' does not exist, skipping"
+fi
+
 # =============================================================================
 # Step 2: Delete Pub/Sub Resources
 # =============================================================================
@@ -264,7 +276,7 @@ log_info "Cleanup complete!"
 log_info "=========================================="
 echo ""
 echo "The following resources have been removed:"
-echo "  - Cloud Run services ($SERVICE_NAME, $HANDLER_SERVICE_NAME)"
+echo "  - Cloud Run services ($SERVICE_NAME, $HANDLER_SERVICE_NAME, $MCP_SERVICE_NAME)"
 echo "  - Pub/Sub topic and subscription"
 echo "  - Secret Manager secrets"
 echo "  - Service accounts (runtime + Pub/Sub invoker) and IAM bindings"
diff --git a/deploy/cloudrun/deploy.sh b/deploy/cloudrun/deploy.sh
index 0dc606dd..72e84d8d 100755
--- a/deploy/cloudrun/deploy.sh
+++ b/deploy/cloudrun/deploy.sh
@@ -3,18 +3,19 @@
 # Google Cloud Run Deployment Script
 # =============================================================================
 #
-# Deploys BOTH services to Google Cloud Run:
+# Deploys three services to Google Cloud Run:
 # 1. marketplace-handler - Handles DCR and Pub/Sub events (always running)
-# 2. lightspeed-agent - A2A agent with MCP sidecar (runs after provisioning)
+# 2. rh-lightspeed-mcp   - MCP server for Red Hat Insights APIs (internal)
+# 3. lightspeed-agent    - A2A agent (runs after provisioning)
 #
-# Uses the YAML service configs (service.yaml and marketplace-handler.yaml)
-# with variable substitution to deploy each service.
+# Uses the YAML service configs (service.yaml, mcp-service.yaml, and
+# marketplace-handler.yaml) with variable substitution to deploy each service.
 #
 # Usage:
 #   ./deploy/cloudrun/deploy.sh [OPTIONS]
 #
 # Options:
-#   --service <service>       Which service to deploy: all, handler, agent
+#   --service <service>       Which service to deploy: all, handler, mcp, agent
 #                             (default: all)
 #   --image <image>           Container image for the agent
 #                             (default: gcr.io/$PROJECT_ID/lightspeed-agent:latest)
@@ -26,15 +27,15 @@
 #   --build                   Build images before deploying
 #
 # Architecture:
-#   ┌─────────────────────────┐     ┌─────────────────────────┐
-#   │  Marketplace Handler    │     │   Lightspeed Agent      │
-#   │  (Cloud Run #1)         │     │    (Cloud Run #2)       │
-#   │                         │     │                         │
-#   │  - POST /dcr            │     │  - POST / (A2A)         │
-#   │  - Pub/Sub push         │     │  - /.well-known/agent   │
-#   │  - Account approval     │     │  - OAuth flow           │
-#   │  - GMA SSO API          │     │  - MCP sidecar          │
-#   └─────────────────────────┘     └─────────────────────────┘
+#   ┌─────────────────────────┐  ┌──────────────────────┐  ┌─────────────────────────┐
+#   │  Marketplace Handler    │  │  MCP Server          │  │   Lightspeed Agent      │
+#   │  (Cloud Run #1)         │  │  (Cloud Run #2)      │  │   (Cloud Run #3)        │
+#   │                         │  │  ingress: internal   │  │                         │
+#   │  - POST /dcr            │  │  - Insights APIs     │  │  - POST / (A2A)         │
+#   │  - Pub/Sub push         │  │  - Advisor, Inventory│  │  - /.well-known/agent   │
+#   │  - Account approval     │  │  - Vulnerability     │  │  - OAuth flow           │
+#   │  - GMA SSO API          │  │  - Remediations      │  │  - Calls MCP via HTTPS  │
+#   └─────────────────────────┘  └──────────────────────┘  └─────────────────────────┘
 #
 # Prerequisites:
 #   - Run setup.sh first to configure GCP services
@@ -63,6 +64,7 @@ REGION="${GOOGLE_CLOUD_LOCATION:-us-central1}"
 SERVICE_NAME="${SERVICE_NAME:-lightspeed-agent}"
 SERVICE_ACCOUNT_NAME="${SERVICE_ACCOUNT_NAME:-${SERVICE_NAME}}"
 HANDLER_SERVICE_NAME="${HANDLER_SERVICE_NAME:-marketplace-handler}"
+MCP_SERVICE_NAME="${MCP_SERVICE_NAME:-rh-lightspeed-mcp}"
 DB_INSTANCE_NAME="${DB_INSTANCE_NAME:-lightspeed-agent-db}"
 VPC_CONNECTOR_NAME="${VPC_CONNECTOR_NAME:-lightspeed-redis-conn}"
 IMAGE_TAG="${IMAGE_TAG:-latest}"
@@ -95,7 +97,7 @@ HANDLER_IMAGE="${HANDLER_IMAGE:-}"
 MCP_IMAGE="${MCP_IMAGE:-gcr.io/${PROJECT_ID}/red-hat-lightspeed-mcp:latest}"
 
 # Parse arguments
-DEPLOY_SERVICE="all"  # all, handler, agent
+DEPLOY_SERVICE="all"  # all, handler, mcp, agent
 ALLOW_UNAUTH=false
 BUILD_IMAGE=false
 
@@ -127,7 +129,7 @@ while [[ $# -gt 0 ]]; do
             ;;
         *)
             log_error "Unknown option: $1"
-            echo "Usage: $0 [--service all|handler|agent] [--image IMAGE] [--handler-image IMAGE] [--mcp-image IMAGE] [--allow-unauthenticated] [--build]"
+            echo "Usage: $0 [--service all|handler|mcp|agent] [--image IMAGE] [--handler-image IMAGE] [--mcp-image IMAGE] [--allow-unauthenticated] [--build]"
             exit 1
             ;;
     esac
@@ -188,6 +190,14 @@ build_handler_image() {
 deploy_agent() {
     log_info "Deploying agent with service.yaml..."
 
+    # Warn if MCP service isn't deployed yet (agent needs its URL)
+    if ! gcloud run services describe "$MCP_SERVICE_NAME" \
+        --region="$REGION" --project="$PROJECT_ID" &>/dev/null; then
+        log_warn "MCP service '$MCP_SERVICE_NAME' is not deployed yet."
+        log_warn "Deploy it first with: $0 --service mcp"
+        log_warn "The agent's MCP_SERVER_URL will be a placeholder until the MCP service is deployed."
+    fi
+
     # Create temporary file with substituted values
     local tmp_yaml
     tmp_yaml=$(mktemp)
@@ -195,7 +205,7 @@ deploy_agent() {
     # Substitute variables in service.yaml
     # Note: Image substitution must happen BEFORE PROJECT_ID substitution
     sed -e "s|gcr.io/\${PROJECT_ID}/lightspeed-agent:latest|${AGENT_IMAGE}|g" \
-        -e "s|\${MCP_IMAGE}|${MCP_IMAGE}|g" \
+        -e "s|\${MCP_SERVICE_NAME}|${MCP_SERVICE_NAME}|g" \
         -e "s|\${PROJECT_ID}|${PROJECT_ID}|g" \
         -e "s|\${REGION}|${REGION}|g" \
         -e "s|\${SERVICE_NAME}|${SERVICE_NAME}|g" \
@@ -261,6 +271,45 @@ deploy_handler() {
     rm -f "$tmp_yaml"
 }
 
+deploy_mcp() {
+    log_info "Deploying MCP server with mcp-service.yaml..."
+
+    # Create temporary file with substituted values
+    local tmp_yaml
+    tmp_yaml=$(mktemp)
+
+    # Substitute variables in mcp-service.yaml
+    sed -e "s|\${MCP_IMAGE}|${MCP_IMAGE}|g" \
+        -e "s|\${MCP_SERVICE_NAME}|${MCP_SERVICE_NAME}|g" \
+        -e "s|\${PROJECT_ID}|${PROJECT_ID}|g" \
+        -e "s|\${SERVICE_ACCOUNT_NAME}|${SERVICE_ACCOUNT_NAME}|g" \
+        deploy/cloudrun/mcp-service.yaml > "$tmp_yaml"
+
+    # Deploy using the YAML
+    gcloud run services replace "$tmp_yaml" \
+        --region "$REGION" \
+        --project "$PROJECT_ID"
+
+    # Only the agent's service account may invoke the MCP service.
+    # ingress: internal blocks external traffic at the network level;
+    # this IAM binding restricts internal callers to the agent SA only.
+    #
+    # The agent sends its Google ID token via X-Serverless-Authorization
+    # so Cloud Run IAM validates the caller without consuming the
+    # Authorization header, which carries the Red Hat SSO JWT that the
+    # MCP binary needs to call console.redhat.com on behalf of the user.
+    local agent_sa="${SERVICE_ACCOUNT_NAME}@${PROJECT_ID}.iam.gserviceaccount.com"
+    log_info "Granting run.invoker to agent SA ($agent_sa) on MCP service..."
+    gcloud run services add-iam-policy-binding "$MCP_SERVICE_NAME" \
+        --region "$REGION" \
+        --project "$PROJECT_ID" \
+        --member="serviceAccount:$agent_sa" \
+        --role="roles/run.invoker"
+
+    # Cleanup
+    rm -f "$tmp_yaml"
+}
+
 # =============================================================================
 # Configure Pub/Sub push subscription
 # =============================================================================
@@ -359,18 +408,22 @@ case "$DEPLOY_SERVICE" in
     all)
         deploy_handler
         configure_pubsub_push
+        deploy_mcp
         deploy_agent
         ;;
     handler)
         deploy_handler
         configure_pubsub_push
         ;;
+    mcp)
+        deploy_mcp
+        ;;
     agent)
         deploy_agent
         ;;
     *)
         log_error "Unknown service: $DEPLOY_SERVICE"
-        echo "Valid services: all, handler, agent"
+        echo "Valid services: all, handler, mcp, agent"
         exit 1
         ;;
 esac
@@ -406,10 +459,13 @@ case "$DEPLOY_SERVICE" in
         echo ""
         show_service_info "$HANDLER_SERVICE_NAME"
         echo ""
+        show_service_info "$MCP_SERVICE_NAME"
+        echo ""
         show_service_info "$SERVICE_NAME"
 
-        # Update AGENT_PROVIDER_URL (agent base URL) and MARKETPLACE_HANDLER_URL
-        # on the agent service so the AgentCard advertises the correct URLs.
+        # Update AGENT_PROVIDER_URL (agent base URL), MARKETPLACE_HANDLER_URL,
+        # and MCP_SERVER_URL on the agent service so it can reach the other
+        # services.
         # Note: AGENT_PROVIDER_ORGANIZATION_URL (JWT audience for DCR) is set
         # in service.yaml and does NOT change per deployment — it's the
         # provider's website (e.g., https://www.redhat.com).
@@ -421,6 +477,10 @@ case "$DEPLOY_SERVICE" in
             --region="$REGION" \
             --project="$PROJECT_ID" \
             --format='value(status.url)' 2>/dev/null || echo "")
+        mcp_url=$(gcloud run services describe "$MCP_SERVICE_NAME" \
+            --region="$REGION" \
+            --project="$PROJECT_ID" \
+            --format='value(status.url)' 2>/dev/null || echo "")
 
         if [[ -n "$service_url" ]]; then
             env_vars="AGENT_PROVIDER_URL=$service_url"
@@ -430,6 +490,11 @@ case "$DEPLOY_SERVICE" in
                 log_warn "Could not retrieve $HANDLER_SERVICE_NAME URL. MARKETPLACE_HANDLER_URL not set."
                 log_warn "DCR endpoints in the AgentCard will fall back to AGENT_PROVIDER_URL."
             fi
+            if [[ -n "$mcp_url" ]]; then
+                env_vars="$env_vars,MCP_SERVER_URL=$mcp_url"
+            else
+                log_warn "Could not retrieve $MCP_SERVICE_NAME URL. MCP_SERVER_URL not set."
+            fi
             log_info "Updating agent env vars with service URLs"
             gcloud run services update "$SERVICE_NAME" \
                 --region="$REGION" \
@@ -442,7 +507,8 @@ case "$DEPLOY_SERVICE" in
         echo ""
         echo "Architecture:"
         echo "  1. Marketplace Handler receives Pub/Sub events and DCR requests"
-        echo "  2. Agent handles A2A protocol and user interactions"
+        echo "  2. MCP Server provides Red Hat Insights API tools (internal only)"
+        echo "  3. Agent handles A2A protocol and user interactions"
         echo ""
         echo "Test endpoints:"
         echo "  Handler health: curl \$(gcloud run services describe $HANDLER_SERVICE_NAME --region=$REGION --format='value(status.url)')/health"
@@ -456,13 +522,21 @@ case "$DEPLOY_SERVICE" in
         echo "  - Pub/Sub events from Google Cloud Marketplace"
         echo "  - DCR requests from Gemini Enterprise"
         ;;
+    mcp)
+        echo ""
+        show_service_info "$MCP_SERVICE_NAME"
+        echo ""
+        echo "The MCP server is deployed with ingress: internal."
+        echo "It is only reachable from the agent service account in the same GCP project."
+        ;;
     agent)
         echo ""
         show_service_info "$SERVICE_NAME"
 
-        # Update AGENT_PROVIDER_URL (agent base URL) and MARKETPLACE_HANDLER_URL
-        # on the agent service. AGENT_PROVIDER_ORGANIZATION_URL is set in
-        # service.yaml and does NOT change per deployment.
+        # Update AGENT_PROVIDER_URL (agent base URL), MARKETPLACE_HANDLER_URL,
+        # and MCP_SERVER_URL on the agent service.
+        # AGENT_PROVIDER_ORGANIZATION_URL is set in service.yaml and does NOT
+        # change per deployment.
         service_url=$(gcloud run services describe "$SERVICE_NAME" \
             --region="$REGION" \
             --project="$PROJECT_ID" \
@@ -471,6 +545,10 @@ case "$DEPLOY_SERVICE" in
             --region="$REGION" \
             --project="$PROJECT_ID" \
             --format='value(status.url)' 2>/dev/null || echo "")
+        mcp_url=$(gcloud run services describe "$MCP_SERVICE_NAME" \
+            --region="$REGION" \
+            --project="$PROJECT_ID" \
+            --format='value(status.url)' 2>/dev/null || echo "")
 
         if [[ -n "$service_url" ]]; then
             env_vars="AGENT_PROVIDER_URL=$service_url"
@@ -480,6 +558,11 @@ case "$DEPLOY_SERVICE" in
                 log_warn "Could not retrieve $HANDLER_SERVICE_NAME URL. MARKETPLACE_HANDLER_URL not set."
                 log_warn "DCR endpoints in the AgentCard will fall back to AGENT_PROVIDER_URL."
             fi
+            if [[ -n "$mcp_url" ]]; then
+                env_vars="$env_vars,MCP_SERVER_URL=$mcp_url"
+            else
+                log_warn "Could not retrieve $MCP_SERVICE_NAME URL. MCP_SERVER_URL not set."
+            fi
             log_info "Updating agent env vars with service URLs"
             gcloud run services update "$SERVICE_NAME" \
                 --region="$REGION" \
@@ -498,4 +581,5 @@ esac
 echo ""
 echo "View logs:"
 echo "  gcloud run services logs read $HANDLER_SERVICE_NAME --region=$REGION --project=$PROJECT_ID"
+echo "  gcloud run services logs read $MCP_SERVICE_NAME --region=$REGION --project=$PROJECT_ID"
 echo "  gcloud run services logs read $SERVICE_NAME --region=$REGION --project=$PROJECT_ID"
diff --git a/deploy/cloudrun/mcp-service.yaml b/deploy/cloudrun/mcp-service.yaml
new file mode 100644
index 00000000..ec64b963
--- /dev/null
+++ b/deploy/cloudrun/mcp-service.yaml
@@ -0,0 +1,70 @@
+# Cloud Run Service Configuration for MCP Server
+#
+# Standalone Cloud Run service for the Red Hat Lightspeed MCP Server.
+# Provides MCP tools for interacting with Red Hat Insights APIs
+# (Advisor, Inventory, Vulnerability, Remediations, etc.)
+#
+# Security:
+#   - ingress: internal -- only reachable from services in the same GCP project
+#   - IAM: only the agent's service account has roles/run.invoker
+#   - The agent sends its Google ID token via X-Serverless-Authorization
+#     so Cloud Run validates the caller without consuming the Authorization
+#     header, which carries the Red Hat SSO JWT that the MCP binary needs
+#     to call console.redhat.com on behalf of the user.
+#   - HTTPS is automatic -- all Cloud Run service URLs use https://
+#
+# Note: Cloud Run doesn't support Quay.io directly. Copy the image to GCR:
+#   docker pull quay.io/redhat-services-prod/insights-management-tenant/insights-mcp/red-hat-lightspeed-mcp:latest
+#   docker tag quay.io/.../red-hat-lightspeed-mcp:latest gcr.io/${PROJECT_ID}/red-hat-lightspeed-mcp:latest
+#   docker push gcr.io/${PROJECT_ID}/red-hat-lightspeed-mcp:latest
+#
+# Usage:
+#   gcloud run services replace deploy/cloudrun/mcp-service.yaml \
+#     --region=us-central1 --project=YOUR_PROJECT_ID
+
+apiVersion: serving.knative.dev/v1
+kind: Service
+metadata:
+  name: ${MCP_SERVICE_NAME}
+  labels:
+    app: ${MCP_SERVICE_NAME}
+    component: mcp
+  annotations:
+    run.googleapis.com/description: "Red Hat Lightspeed MCP Server - Insights API tools for the Lightspeed Agent"
+    run.googleapis.com/ingress: internal
+    run.googleapis.com/launch-stage: GA
+spec:
+  template:
+    metadata:
+      annotations:
+        # Scaling: stateless, can scale to zero.
+        # maxScale should be >= the agent's maxScale because each agent
+        # instance can issue multiple concurrent MCP tool calls.
+        autoscaling.knative.dev/minScale: "0"
+        autoscaling.knative.dev/maxScale: "4"
+        run.googleapis.com/cpu-throttling: "false"
+        run.googleapis.com/startup-cpu-boost: "true"
+    spec:
+      containerConcurrency: 80
+      timeoutSeconds: 300
+      serviceAccountName: ${SERVICE_ACCOUNT_NAME}@${PROJECT_ID}.iam.gserviceaccount.com
+      containers:
+        - name: rh-lightspeed-mcp
+          image: ${MCP_IMAGE}
+          ports:
+            - name: http1
+              containerPort: 8080
+          args:
+            - "--readonly"
+            - "http"
+            - "--port"
+            - "8080"
+            - "--host"
+            - "0.0.0.0"
+          resources:
+            limits:
+              cpu: "1"
+              memory: 512Mi
+  traffic:
+    - percent: 100
+      latestRevision: true
diff --git a/deploy/cloudrun/service.yaml b/deploy/cloudrun/service.yaml
index 62e2332c..a7e30dc5 100644
--- a/deploy/cloudrun/service.yaml
+++ b/deploy/cloudrun/service.yaml
@@ -1,8 +1,12 @@
 # Cloud Run Service Configuration
 # Declarative configuration for the Lightspeed Agent service
 #
-# This deploys the agent with the MCP server as a sidecar container.
-# The MCP server provides tools for interacting with Red Hat Insights APIs.
+# The MCP server runs as a separate Cloud Run service (see mcp-service.yaml).
+# The agent connects to it over HTTPS using the MCP_SERVER_URL env var,
+# which is set automatically by deploy.sh after deployment.
+#
+# IMPORTANT: Deploy the MCP service BEFORE the agent so that deploy.sh can
+# discover the MCP URL and set MCP_SERVER_URL on the agent.
 #
 # Usage:
 #   gcloud run services replace deploy/cloudrun/service.yaml \
@@ -102,11 +106,13 @@ spec:
             - name: SKIP_JWT_VALIDATION
               value: "false"
             # MCP Configuration
-            # The agent connects to the MCP sidecar container over HTTP
+            # The agent connects to the MCP server (separate Cloud Run service).
+            # MCP_SERVER_URL is auto-discovered and set by deploy.sh after
+            # the MCP service is deployed.  The placeholder below is overwritten.
             - name: MCP_TRANSPORT_MODE
               value: "http"
             - name: MCP_SERVER_URL
-              value: "http://localhost:8080"
+              value: "https://MCP-SERVICE-URL-SET-BY-DEPLOY-SH"
             - name: MCP_READ_ONLY
               value: "true"
             # Secrets from Secret Manager
@@ -164,36 +170,6 @@ spec:
               port: 8000
             periodSeconds: 30
             failureThreshold: 3
-
-        # =====================================================================
-        # Red Hat Lightspeed MCP Server (Sidecar)
-        # =====================================================================
-        # Provides MCP tools for interacting with Red Hat Insights APIs
-        # (Advisor, Inventory, Vulnerability, Remediations, etc.)
-        #
-        # Authentication: The agent forwards the caller's JWT token to the
-        # MCP server via the Authorization header.  The MCP server uses this
-        # token to authenticate with console.redhat.com on behalf of the user.
-        #
-        # Sidecar container - no ports section (only main container can expose port)
-        #
-        # Note: Cloud Run doesn't support Quay.io directly. Copy the image to GCR:
-        #   docker pull quay.io/redhat-services-prod/insights-management-tenant/insights-mcp/red-hat-lightspeed-mcp:latest
-        #   docker tag quay.io/.../red-hat-lightspeed-mcp:latest gcr.io/${PROJECT_ID}/red-hat-lightspeed-mcp:latest
-        #   docker push gcr.io/${PROJECT_ID}/red-hat-lightspeed-mcp:latest
-        - name: insights-mcp
-          image: ${MCP_IMAGE}
-          args:
-            - "--readonly"
-            - "http"
-            - "--port"
-            - "8080"
-            - "--host"
-            - "0.0.0.0"
-          resources:
-            limits:
-              cpu: "1"
-              memory: 512Mi
   traffic:
     - percent: 100
       latestRevision: true
diff --git a/deploy/cloudrun/setup.sh b/deploy/cloudrun/setup.sh
index 1fa20000..816096d3 100755
--- a/deploy/cloudrun/setup.sh
+++ b/deploy/cloudrun/setup.sh
@@ -359,7 +359,7 @@ echo "   docker pull quay.io/redhat-services-prod/insights-management-tenant/ins
 echo "   docker tag quay.io/redhat-services-prod/insights-management-tenant/insights-mcp/red-hat-lightspeed-mcp:latest gcr.io/$PROJECT_ID/red-hat-lightspeed-mcp:latest"
 echo "   docker push gcr.io/$PROJECT_ID/red-hat-lightspeed-mcp:latest"
 echo ""
-echo "4. Build and deploy the agent (includes MCP sidecar):"
+echo "4. Build and deploy the services:"
 echo "   ./deploy/cloudrun/deploy.sh --build --service all --allow-unauthenticated"
 echo ""
 echo "5. Get the service URL:"
diff --git a/docs/architecture.md b/docs/architecture.md
index 640985cd..731f2259 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -6,10 +6,11 @@ This document describes the architecture of the Red Hat Lightspeed Agent for Goo
 
 The Red Hat Lightspeed Agent for Google Cloud is an A2A-ready (Agent-to-Agent) service that provides AI-powered access to Red Hat Insights. It is built using Google's Agent Development Kit (ADK) and integrates with Red Hat's MCP (Model Context Protocol) server for Insights data access.
 
-The system consists of **two separate services**:
+The system consists of **three separate services**:
 
 1. **Marketplace Handler** - Always running service that handles provisioning and client registration
-2. **Lightspeed Agent** - The AI agent that handles user interactions (deployed after provisioning)
+2. **MCP Server** - Red Hat Lightspeed MCP server providing Insights API tools (internal only)
+3. **Lightspeed Agent** - The AI agent that handles user interactions (deployed after provisioning)
 
 ## Architecture Diagram
 
@@ -69,41 +70,44 @@ The system consists of **two separate services**:
 │  │  │                        Agent Core                               │      │  │
 │  │  │                  (Google ADK + Gemini)                          │      │  │
 │  │  └─────────────────────────────────────────────────────────────────┘      │  │
-│  │                              │                                            │  │
-│  │                              ▼                                            │  │
-│  │  ┌─────────────────────────────────────────────────────────────────┐      │  │
-│  │  │                      MCP Sidecar                                │      │  │
-│  │  │              (Red Hat Lightspeed MCP Server)                    │      │  │
-│  │  └─────────────────────────────────────────────────────────────────┘      │  │
 │  └───────────────────────────────────────────────────────────────────────────┘  │
 └─────────────────────────────────────────────────────────────────────────────────┘
          │                    │
          ▼                    ▼
-┌─────────────┐      ┌─────────────────────────┐
-│   Gemini    │      │  Red Hat Insights APIs  │
-│     API     │      │  (via MCP Server)       │
-│  (Vertex)   │      │  - Advisor              │
-└─────────────┘      │  - Vulnerability        │
-                     │  - Patch                │
-                     │  - Content              │
-                     └─────────────────────────┘
+┌─────────────┐    ┌─────────────────────────────────────────┐
+│   Gemini    │    │     MCP Server Service                  │
+│     API     │    │     (Cloud Run - ingress: internal)     │
+│  (Vertex)   │    │     HTTPS from Agent                    │
+└─────────────┘    └────────────────┬────────────────────────┘
+                                    │
+                                    ▼
+                          ┌─────────────────────────┐
+                          │  Red Hat Insights APIs  │
+                          │  (via MCP Server)       │
+                          │  - Advisor              │
+                          │  - Vulnerability        │
+                          │  - Patch                │
+                          │  - Content              │
+                          └─────────────────────────┘
 ```
 
-## Two-Service Architecture
+## Three-Service Architecture
 
-### Why Two Services?
+### Why Three Services?
 
-The system is split into two services for important operational reasons:
+The system is split into three services for important operational reasons:
 
 | Service | Purpose | Lifecycle |
 |---------|---------|-----------|
 | **Marketplace Handler** | Handles provisioning and DCR | Always running (minScale=1) |
+| **MCP Server** | Red Hat Insights API tools | Scale to zero (stateless) |
 | **Lightspeed Agent** | AI agent for user queries | Deployed after provisioning |
 
 1. **Marketplace Handler must be always running** to receive Pub/Sub events from Google Cloud Marketplace for account and entitlement approvals
-2. **Agent can be deployed on-demand** after a customer has been provisioned
-3. **Separation of concerns**: Provisioning logic is isolated from agent logic
-4. **Independent scaling**: Handler scales for provisioning traffic, Agent scales for user traffic
+2. **MCP Server scales independently** from the agent — stateless, can scale to zero
+3. **Agent can be deployed on-demand** after a customer has been provisioned
+4. **Separation of concerns**: Provisioning logic, MCP tools, and agent logic are isolated
+5. **Independent scaling**: Each service scales for its own traffic pattern
 
 ## Components
 
@@ -141,9 +145,9 @@ The AI agent built with Google ADK:
 - **Tool Orchestration**: Manages tool calls to MCP server
 - **Session Management**: Maintains conversation context
 
-### MCP Sidecar
+### MCP Server Service
 
-Runs as a sidecar container connecting to Red Hat Insights:
+Runs as a separate Cloud Run service connecting to Red Hat Insights:
 
 - **Tool Discovery**: Discovers available Insights tools
 - **Tool Execution**: Executes tools and returns results
@@ -236,7 +240,7 @@ This flow handles actual user interactions with the agent:
 3. Query passed to Agent Core
 4. Agent processes query with Gemini
 5. Agent calls MCP tools as needed
-6. MCP sidecar queries Red Hat Insights APIs
+6. MCP service queries Red Hat Insights APIs
 7. Results aggregated and returned to user
 ```
 
@@ -287,7 +291,7 @@ src/lightspeed_agent/
 |-------|---------|------|---------|
 | `lightspeed-agent` | Agent | 8000 | A2A protocol, user queries |
 | `marketplace-handler` | Handler | 8001 | Pub/Sub events, DCR |
-| `insights-mcp` | MCP Sidecar | 8081 | Red Hat Lightspeed tools |
+| `insights-mcp` | MCP Service | 8080 | Red Hat Lightspeed tools |
 
 ## External Dependencies
 
@@ -314,6 +318,7 @@ src/lightspeed_agent/
 | Service | Min Instances | Max Instances | Notes |
 |---------|---------------|---------------|-------|
 | Marketplace Handler | 1 | 5 | Always running for Pub/Sub |
+| MCP Server | 0 | 4 | Stateless; should be >= agent maxScale |
 | Lightspeed Agent | 0 | 10 | Scale to zero when idle |
 
 ### Resource Requirements
@@ -322,7 +327,7 @@ src/lightspeed_agent/
 |---------|-----|--------|-------|
 | Marketplace Handler | 1 | 512Mi | Lightweight, event-driven |
 | Lightspeed Agent | 2 | 2Gi | AI processing, MCP calls |
-| MCP Sidecar | 0.5 | 256Mi | Red Hat Insights queries |
+| MCP Server | 1 | 512Mi | Red Hat Insights queries |
 
 ### Connection Pooling
 
diff --git a/docs/authentication.md b/docs/authentication.md
index 312efa4b..514e63cd 100644
--- a/docs/authentication.md
+++ b/docs/authentication.md
@@ -8,7 +8,7 @@ The system uses three distinct authentication flows:
 
 1. **Dynamic Client Registration (DCR)** -- Handler creates per-order OAuth clients in Red Hat SSO
 2. **Token Introspection** -- Agent validates access tokens via Red Hat SSO introspection endpoint (RFC 7662) and checks for `api.console` and `api.ocm` scopes
-3. **MCP JWT Pass-Through** -- Agent forwards the caller's JWT token to the MCP sidecar, which uses it to call console.redhat.com APIs on behalf of the user
+3. **MCP JWT Pass-Through** -- Agent forwards the caller's JWT token to the MCP server, which uses it to call console.redhat.com APIs on behalf of the user
 
 Clients obtain access tokens directly from Red Hat SSO using their DCR-issued credentials. The agent acts purely as a **Resource Server** — it validates incoming tokens but does not proxy or participate in the OAuth authorization flow.
 
@@ -73,7 +73,7 @@ Clients obtain access tokens directly from Red Hat SSO using their DCR-issued cr
                                                               | |                |               |
                                                               | |                v               |
                                                               | |  +----------------------------+|
-                                                              | +--| MCP Sidecar (8081)         ||
+                                                              | +--| MCP Server (internal)      ||
                                                               |    | 8. Calls APIs using the    ||
                                                               |    |    forwarded JWT token     ||
                                                               |    +-------------+--------------+|
@@ -100,8 +100,8 @@ Clients obtain access tokens directly from Red Hat SSO using their DCR-issued cr
 | 4 | Handler -> Red Hat SSO | Create OAuth client via GMA SSO API |
 | 5 | Client -> Red Hat SSO | Client obtains access token directly from Red Hat SSO (e.g., `client_credentials` grant) |
 | 6 | Agent -> Red Hat SSO | Introspect token on every A2A request; check `api.console` and `api.ocm` scopes |
-| 7 | Agent -> MCP Sidecar | Tool call with caller's JWT token in Authorization header |
-| 8 | MCP Sidecar -> console.redhat.com | Call Insights APIs using the forwarded JWT token |
+| 7 | Agent -> MCP Server | Tool call with caller's JWT token in Authorization header |
+| 8 | MCP Server -> console.redhat.com | Call Insights APIs using the forwarded JWT token |
 
 ## Dynamic Client Registration (DCR)
 
@@ -173,9 +173,9 @@ A test script is available at `scripts/test_dcr.py` that signs a software_statem
 - **Secret encryption**: Client secrets are encrypted with Fernet before storage in PostgreSQL.
 - **Client secrets**: Encrypted with Fernet before storage in PostgreSQL.
 
-## MCP Sidecar Authentication
+## MCP Server Authentication
 
-The agent forwards the caller's JWT token to the MCP sidecar via the `Authorization: Bearer <token>` header on every tool call. The MCP sidecar uses this token to authenticate with console.redhat.com APIs (Advisor, Inventory, Vulnerability, etc.) on behalf of the calling user. See [MCP Integration](mcp-integration.md) for full details.
+The agent forwards the caller's JWT token to the MCP server via the `Authorization: Bearer <token>` header on every tool call. The MCP server uses this token to authenticate with console.redhat.com APIs (Advisor, Inventory, Vulnerability, etc.) on behalf of the calling user. See [MCP Integration](mcp-integration.md) for full details.
 
 ## Token Introspection
 
@@ -235,7 +235,7 @@ default: `openid,profile,email,api.console,api.ocm`).  Tokens carrying scopes
 outside this list are rejected with **403 Forbidden**.
 
 This is a defense-in-depth measure: since the agent forwards the caller's JWT
-to the MCP sidecar and downstream APIs, restricting scopes prevents tokens with
+to the MCP server and downstream APIs, restricting scopes prevents tokens with
 elevated privileges from being exercised against those services.
 
 All permitted scopes must be explicitly listed in `AGENT_ALLOWED_SCOPES`.
diff --git a/docs/configuration.md b/docs/configuration.md
index 507e9b7b..d562e82b 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -51,12 +51,12 @@ RED_HAT_SSO_CLIENT_SECRET=my-client-secret
 
 ### Red Hat Lightspeed MCP
 
-The MCP server runs as a sidecar container and provides tools for accessing Red Hat Insights APIs. The agent forwards the caller's JWT token to the MCP server, which uses it to authenticate with console.redhat.com on behalf of the user. See [MCP Integration](mcp-integration.md) for details.
+The MCP server runs as a separate Cloud Run service and provides tools for accessing Red Hat Insights APIs. The agent forwards the caller's JWT token to the MCP server, which uses it to authenticate with console.redhat.com on behalf of the user. See [MCP Integration](mcp-integration.md) for details.
 
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `MCP_TRANSPORT_MODE` | `http` | MCP transport: `stdio`, `http`, or `sse` |
-| `MCP_SERVER_URL` | `http://localhost:8080` | MCP server URL (use 8081 for Podman to avoid A2A Inspector conflict) |
+| `MCP_SERVER_URL` | `http://localhost:8080` | MCP server URL (use HTTPS URL for Cloud Run production; 8081 for Podman) |
 | `MCP_READ_ONLY` | `true` | Enable read-only mode for MCP tools |
 
 **Development (stdio mode):**
@@ -67,12 +67,12 @@ MCP_TRANSPORT_MODE=stdio
 MCP_READ_ONLY=true
 ```
 
-**Production (http mode with sidecar):**
+**Production (http mode with MCP service):**
 
 ```bash
-# Agent connects to MCP server sidecar via HTTP
+# Agent connects to MCP server via HTTPS
 MCP_TRANSPORT_MODE=http
-MCP_SERVER_URL=http://localhost:8081  # Use 8081 for Podman (8080 for Cloud Run)
+MCP_SERVER_URL=http://localhost:8081  # Podman: use 8081. Cloud Run: set by deploy.sh (https:// URL)
 MCP_READ_ONLY=true
 ```
 
diff --git a/docs/mcp-integration.md b/docs/mcp-integration.md
index 297d67b6..1cae2aab 100644
--- a/docs/mcp-integration.md
+++ b/docs/mcp-integration.md
@@ -4,7 +4,7 @@ This document explains how the Lightspeed Agent integrates with the Red Hat Ligh
 
 ## Overview
 
-The agent uses the [Red Hat Lightspeed MCP Server](https://github.com/RedHatInsights/insights-mcp) as a sidecar to access Red Hat Insights APIs. The MCP (Model Context Protocol) server provides tools that the agent can call to retrieve data from:
+The agent uses the [Red Hat Lightspeed MCP Server](https://github.com/RedHatInsights/insights-mcp) to access Red Hat Insights APIs. The MCP (Model Context Protocol) server provides tools that the agent can call to retrieve data from:
 
 - **Advisor**: System configuration recommendations
 - **Inventory**: Registered systems and host information
@@ -16,42 +16,37 @@ The agent uses the [Red Hat Lightspeed MCP Server](https://github.com/RedHatInsi
 ## Architecture
 
 ```
-┌─────────────────────────────────────────────────────────────────┐
-│                         Deployment Pod                          │
-│                                                                 │
-│  ┌─────────────────────┐      ┌─────────────────────────────┐   │
-│  │  Lightspeed Agent   │      │   Red Hat Lightspeed MCP    │   │
-│  │                     │      │   Server                    │   │
-│  │   ┌─────────────┐   │ HTTP │   ┌─────────────────────┐   │   │
-│  │   │   Gemini    │   │◄────►│   │   MCP Tools         │   │   │
-│  │   │   Model     │   │:8080 │   │   - advisor         │   │   │
-│  │   └─────────────┘   │      │   │   - inventory       │   │   │
-│  │          │          │      │   │   - vulnerability   │   │   │
-│  │          ▼          │      │   │   - remediations    │   │   │
-│  │   ┌─────────────┐   │      │   └─────────────────────┘   │   │
-│  │   │  ADK Agent  │   │      │             │               │   │
-│  │   └─────────────┘   │      │             │               │   │
-│  │                     │      │             ▼               │   │
-│  │   Port 8000         │      │   ┌─────────────────────┐   │   │
-│  └─────────────────────┘      │   │   OAuth2 Client     │   │   │
-│                               │   │   (Lightspeed)      │   │   │
-│                               │   └──────────┬──────────┘   │   │
-│                               │              │              │   │
-│                               └──────────────┼──────────────┘   │
-│                                              │                  │
-└──────────────────────────────────────────────┼──────────────────┘
-                                               │
-                                               ▼
-                                    ┌─────────────────────┐
-                                    │  console.redhat.com │
-                                    │                     │
-                                    │  - Advisor API      │
-                                    │  - Inventory API    │
-                                    │  - Vulnerability API│
-                                    │  - Remediations API │
-                                    │  - Patch API        │
-                                    │  - Image Builder API│
-                                    └─────────────────────┘
+┌─────────────────────┐       HTTPS        ┌─────────────────────────────┐
+│  Lightspeed Agent   │ ◄────────────────► │   Red Hat Lightspeed MCP    │
+│  (Cloud Run)        │                    │   Server (Cloud Run)        │
+│                     │                    │   ingress: internal         │
+│   ┌─────────────┐   │                    │   ┌─────────────────────┐   │
+│   │   Gemini    │   │                    │   │   MCP Tools         │   │
+│   │   Model     │   │                    │   │   - advisor         │   │
+│   └─────────────┘   │                    │   │   - inventory       │   │
+│          │          │                    │   │   - vulnerability   │   │
+│          ▼          │                    │   │   - remediations    │   │
+│   ┌─────────────┐   │                    │   └─────────────────────┘   │
+│   │  ADK Agent  │   │                    │             │               │
+│   └─────────────┘   │                    │             ▼               │
+│                     │                    │   ┌─────────────────────┐   │
+│   Port 8000         │                    │   │   OAuth2 Client     │   │
+└─────────────────────┘                    │   │   (Lightspeed)      │   │
+                                           │   └──────────┬──────────┘   │
+                                           │              │              │
+                                           └──────────────┼──────────────┘
+                                                          │
+                                                          ▼
+                                               ┌─────────────────────┐
+                                               │  console.redhat.com │
+                                               │                     │
+                                               │  - Advisor API      │
+                                               │  - Inventory API    │
+                                               │  - Vulnerability API│
+                                               │  - Remediations API │
+                                               │  - Patch API        │
+                                               │  - Image Builder API│
+                                               └─────────────────────┘
 ```
 
 ## Credential Flow
@@ -116,15 +111,11 @@ MCP_SERVER_MODE: http
 MCP_SERVER_PORT: 8081
 ```
 
-**Cloud Run deployment** (port 8080 for sidecar):
+**Cloud Run deployment** (separate service, HTTPS):
 ```yaml
-# Agent configuration
+# Agent configuration (MCP_SERVER_URL set automatically by deploy.sh)
 MCP_TRANSPORT_MODE: http
-MCP_SERVER_URL: http://localhost:8080
-
-# MCP server configuration
-MCP_SERVER_MODE: http
-MCP_SERVER_PORT: 8080
+MCP_SERVER_URL: https://rh-lightspeed-mcp-xxxxx.run.app
 ```
 
 ### stdio Transport (Development)
@@ -162,9 +153,10 @@ containers:
 
 ### Cloud Run
 
-The MCP server runs as a sidecar container in the Cloud Run service:
+The MCP server runs as a separate Cloud Run service (see `deploy/cloudrun/mcp-service.yaml`). The agent connects to it over HTTPS:
 
 ```yaml
+# Agent service (service.yaml)
 containers:
   - name: lightspeed-agent
     # ... agent configuration ...
@@ -172,12 +164,11 @@ containers:
       - name: MCP_TRANSPORT_MODE
         value: "http"
       - name: MCP_SERVER_URL
-        value: "http://localhost:8080"
-
-  - name: insights-mcp
-    image: ghcr.io/redhatinsights/red-hat-lightspeed-mcp:latest
+        value: "https://rh-lightspeed-mcp-xxxxx.run.app"  # Set by deploy.sh
 ```
 
+The MCP service uses `ingress: internal` so it is only reachable from the agent's service account in the same GCP project. `deploy.sh` auto-discovers the MCP URL and sets `MCP_SERVER_URL` on the agent.
+
 ## Available Tools
 
 The MCP server provides these tools to the agent:
diff --git a/src/lightspeed_agent/config/settings.py b/src/lightspeed_agent/config/settings.py
index 841f58cd..52bc161a 100644
--- a/src/lightspeed_agent/config/settings.py
+++ b/src/lightspeed_agent/config/settings.py
@@ -60,7 +60,7 @@ class Settings(BaseSettings):
     )
     mcp_server_url: str = Field(
         default="http://localhost:8080",
-        description="MCP server URL for http/sse modes",
+        description="MCP server URL for http/sse modes (use HTTPS URL for Cloud Run production)",
     )
     mcp_read_only: bool = Field(
         default=True,

From 1041e49914cdf479cc075fb8dbf61955e665284e Mon Sep 17 00:00:00 2001
From: Luis Tomas Bolivar <ltomasbo@redhat.com>
Date: Fri, 27 Mar 2026 11:44:01 +0100
Subject: [PATCH 2/3] feat: enforce HTTPS for MCP_SERVER_URL

Add a Pydantic model validator that requires MCP_SERVER_URL to use
HTTPS when transport mode is http or sse. Only http://localhost is
allowed for local development. This ensures the Red Hat SSO JWT
token forwarded to the MCP server is always transmitted over an
encrypted connection.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/lightspeed_agent/config/settings.py | 20 ++++++++-
 tests/test_settings.py                  | 60 +++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/src/lightspeed_agent/config/settings.py b/src/lightspeed_agent/config/settings.py
index 52bc161a..93664bf1 100644
--- a/src/lightspeed_agent/config/settings.py
+++ b/src/lightspeed_agent/config/settings.py
@@ -60,7 +60,7 @@ class Settings(BaseSettings):
     )
     mcp_server_url: str = Field(
         default="http://localhost:8080",
-        description="MCP server URL for http/sse modes (use HTTPS URL for Cloud Run production)",
+        description="MCP server URL for http/sse modes (must be HTTPS except for localhost)",
     )
     mcp_read_only: bool = Field(
         default=True,
@@ -326,6 +326,24 @@ def _validate_session_backend(self) -> "Settings":
             )
         return self
 
+    @model_validator(mode="after")
+    def _validate_mcp_server_url(self) -> "Settings":
+        """Enforce HTTPS for MCP_SERVER_URL (except localhost for development).
+
+        When the transport mode is http or sse, the MCP server URL must start
+        with https:// or http://localhost.  This ensures that the Red Hat SSO
+        JWT token forwarded to the MCP server is always transmitted over an
+        encrypted connection.
+        """
+        if self.mcp_transport_mode in ("http", "sse"):
+            url = self.mcp_server_url
+            if not url.startswith(("https://", "http://localhost")):
+                raise ValueError(
+                    f"MCP_SERVER_URL must use HTTPS (got {url!r}). "
+                    "Only http://localhost is allowed for local development."
+                )
+        return self
+
     # OpenTelemetry Configuration
     otel_enabled: bool = Field(
         default=False,
diff --git a/tests/test_settings.py b/tests/test_settings.py
index 30631e4a..a7ad73ba 100644
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -48,3 +48,63 @@ def test_skip_jwt_defaults_to_false(self):
         ):
             settings = Settings(skip_jwt_validation=False)
             assert settings.skip_jwt_validation is False
+
+
+class TestMcpServerUrlHttpsGuard:
+    """Verify MCP_SERVER_URL requires HTTPS (except localhost)."""
+
+    def test_https_url_allowed(self):
+        """HTTPS URLs are accepted for http/sse transport modes."""
+        settings = Settings(
+            mcp_transport_mode="http",
+            mcp_server_url="https://rh-lightspeed-mcp-abc123.run.app",
+        )
+        assert settings.mcp_server_url == "https://rh-lightspeed-mcp-abc123.run.app"
+
+    def test_http_localhost_allowed(self):
+        """http://localhost is allowed for local development."""
+        settings = Settings(
+            mcp_transport_mode="http",
+            mcp_server_url="http://localhost:8080",
+        )
+        assert settings.mcp_server_url == "http://localhost:8080"
+
+    def test_http_localhost_no_port_allowed(self):
+        """http://localhost without port is allowed."""
+        settings = Settings(
+            mcp_transport_mode="http",
+            mcp_server_url="http://localhost",
+        )
+        assert settings.mcp_server_url == "http://localhost"
+
+    def test_plain_http_rejected(self):
+        """Plain HTTP to a non-localhost host must be rejected."""
+        with pytest.raises(ValidationError, match="must use HTTPS"):
+            Settings(
+                mcp_transport_mode="http",
+                mcp_server_url="http://mcp-server:8080",
+            )
+
+    def test_plain_http_remote_rejected(self):
+        """HTTP to a remote host must be rejected."""
+        with pytest.raises(ValidationError, match="must use HTTPS"):
+            Settings(
+                mcp_transport_mode="http",
+                mcp_server_url="http://10.0.0.5:8080",
+            )
+
+    def test_sse_mode_also_validates(self):
+        """SSE transport mode also enforces HTTPS."""
+        with pytest.raises(ValidationError, match="must use HTTPS"):
+            Settings(
+                mcp_transport_mode="sse",
+                mcp_server_url="http://mcp-server:8080",
+            )
+
+    def test_stdio_mode_skips_validation(self):
+        """stdio mode does not use MCP_SERVER_URL, so no URL validation."""
+        settings = Settings(
+            mcp_transport_mode="stdio",
+            mcp_server_url="http://anything:9999",
+        )
+        assert settings.mcp_server_url == "http://anything:9999"

From 8499e838f5212daef3226a915ac740edb1424faa Mon Sep 17 00:00:00 2001
From: Luis Tomas Bolivar <ltomasbo@redhat.com>
Date: Fri, 27 Mar 2026 12:05:13 +0100
Subject: [PATCH 3/3] feat: make MCP deployment mode configurable (sidecar vs
 service)

Add MCP_DEPLOY_MODE env var (service|sidecar, default: service) to
control how the MCP server is deployed to Cloud Run:

- service mode (default): MCP runs as a separate Cloud Run service
  with ingress: internal, HTTPS, and IAM restricted to the agent SA.
  Uses service.yaml + mcp-service.yaml.

- sidecar mode: MCP runs as a second container inside the agent pod.
  Agent connects via http://localhost:8080 (no network hop).
  Uses service-sidecar.yaml (no separate MCP service needed).

Changes:
- Create service-sidecar.yaml with MCP container block (no ports)
- deploy.sh: select YAML template based on MCP_DEPLOY_MODE, skip
  deploy_mcp() and MCP URL discovery in sidecar mode
- cleanup.sh: skip MCP service deletion in sidecar mode
- Update Cloud Run README with mode comparison table and examples
- Add MCP_DEPLOY_MODE to .env.example
- Add cross-reference comments in service.yaml header

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .env.example                         |   8 +-
 deploy/cloudrun/README.md            |  59 ++++++--
 deploy/cloudrun/cleanup.sh           |  34 +++--
 deploy/cloudrun/deploy.sh            | 157 +++++++++++++-------
 deploy/cloudrun/service-sidecar.yaml | 205 +++++++++++++++++++++++++++
 deploy/cloudrun/service.yaml         |   7 +-
 6 files changed, 398 insertions(+), 72 deletions(-)
 create mode 100644 deploy/cloudrun/service-sidecar.yaml

diff --git a/.env.example b/.env.example
index 55cbcc62..75ebac57 100644
--- a/.env.example
+++ b/.env.example
@@ -45,10 +45,16 @@ GMA_CLIENT_SECRET=your_gma_client_secret
 # Red Hat Lightspeed MCP Server Configuration
 # -----------------------------------------------------------------------------
 # The MCP server provides tools to access Red Hat Insights APIs.
-# It runs as a separate service (ingress: internal) in Cloud Run.
+# It runs as a separate service (ingress: internal) in Cloud Run by default,
+# or as a sidecar container inside the agent pod (MCP_DEPLOY_MODE=sidecar).
 # The agent forwards the caller's JWT token to the MCP server, which uses
 # it to authenticate with console.redhat.com on behalf of the calling user.
 #
+# MCP deployment mode for Cloud Run (deploy-time only, not used by Python):
+#   service  - MCP runs as a separate Cloud Run service (default)
+#   sidecar  - MCP runs as a second container in the agent pod
+# MCP_DEPLOY_MODE=service
+#
 # The MCP server can access:
 #   - Advisor (recommendations)
 #   - Inventory (registered systems)
diff --git a/deploy/cloudrun/README.md b/deploy/cloudrun/README.md
index 4efd05cb..83472867 100644
--- a/deploy/cloudrun/README.md
+++ b/deploy/cloudrun/README.md
@@ -4,7 +4,25 @@ Deploy the Red Hat Lightspeed Agent for Google Cloud to Google Cloud Run for pro
 
 ## Architecture
 
-The deployment consists of **three separate Cloud Run services** plus **Cloud Memorystore for Redis** (for rate limiting):
+The deployment consists of **two or three Cloud Run services** (depending on `MCP_DEPLOY_MODE`) plus **Cloud Memorystore for Redis** (for rate limiting):
+
+### MCP Deployment Modes
+
+The MCP server can be deployed in two modes, controlled by the `MCP_DEPLOY_MODE` environment variable:
+
+| Mode | Value | Description | YAML Configs |
+|------|-------|-------------|--------------|
+| **Separate Service** (default) | `service` | MCP runs as its own Cloud Run service with `ingress: internal`. Agent connects via HTTPS. | `service.yaml` + `mcp-service.yaml` |
+| **Sidecar** | `sidecar` | MCP runs as a second container inside the agent pod. Agent connects via `http://localhost:8080`. | `service-sidecar.yaml` |
+
+| Consideration | Service mode | Sidecar mode |
+|---------------|-------------|--------------|
+| Independent scaling | MCP scales independently (maxScale: 4) | MCP scales with agent (1:1) |
+| Cold start latency | MCP may cold start separately | Both start together |
+| Network security | HTTPS between services + IAM | localhost (no network hop) |
+| Operational complexity | 3 Cloud Run services | 2 Cloud Run services |
+
+### Architecture (MCP_DEPLOY_MODE=service)
 
 ```
                               Google Cloud Marketplace
@@ -66,14 +84,27 @@ The deployment consists of **three separate Cloud Run services** plus **Cloud Me
 
 ### Deployment Order
 
+**Service mode** (default):
+
 1. **Set up Cloud Memorystore Redis and VPC connector** - Required for agent rate limiting (see [Redis Setup](#redis-setup-for-rate-limiting))
 2. **Deploy Marketplace Handler first** - Must be running to receive provisioning events
 3. **Deploy MCP Server** - Must be running before the agent can call Insights APIs
 4. **Deploy Agent after provisioning** - Can be deployed when customers are ready to use the agent
 
-The MCP server runs as a separate Cloud Run service with `ingress: internal`, reachable only by the agent's service account. The agent connects to it over HTTPS. The agent forwards the caller's JWT token to the MCP server, which uses it to authenticate with console.redhat.com on behalf of the user (see [MCP Authentication](#mcp-authentication)).
+The MCP server runs as a separate Cloud Run service with `ingress: internal`, reachable only by the agent's service account. The agent connects to it over HTTPS. `deploy.sh` auto-discovers the MCP service URL after deployment and sets `MCP_SERVER_URL` on the agent.
+
+**Sidecar mode** (`MCP_DEPLOY_MODE=sidecar`):
+
+1. **Set up Cloud Memorystore Redis and VPC connector**
+2. **Deploy Marketplace Handler first**
+3. **Deploy Agent** (MCP sidecar is included in the agent pod, no separate step needed)
+
+```bash
+export MCP_DEPLOY_MODE=sidecar
+./deploy/cloudrun/deploy.sh --service all --allow-unauthenticated
+```
 
-> **Note:** The `MCP_SERVER_URL` in `service.yaml` is a placeholder. `deploy.sh` auto-discovers the MCP service URL after deployment and sets it on the agent.
+> **Note:** If you switch from `service` to `sidecar` mode, the standalone MCP Cloud Run service will still exist. Delete it manually or run `cleanup.sh` first.
 
 ## Service Accounts
 
@@ -401,6 +432,8 @@ curl -s $AGENT_URL/.well-known/agent.json | jq '.capabilities.extensions'
 
 **Deploy script options:**
 
+| Flag | Description |
+|------|-------------|
 | Flag | Description |
 |------|-------------|
 | `--service <service>` | Which service to deploy: `all` (default), `handler`, `mcp`, `agent` |
@@ -410,14 +443,20 @@ curl -s $AGENT_URL/.well-known/agent.json | jq '.capabilities.extensions'
 | `--build` | Build the image(s) before deploying |
 | `--allow-unauthenticated` | Allow public access (required for A2A and Pub/Sub) |
 
+**Environment variables:**
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `MCP_DEPLOY_MODE` | `service` | `service` (separate Cloud Run service) or `sidecar` (MCP inside agent pod) |
+
 **Service deployment:**
 
-| Service | YAML Config | Description |
-|---------|-------------|-------------|
-| `handler` | `marketplace-handler.yaml` | Pub/Sub events, DCR requests |
-| `mcp` | `mcp-service.yaml` | Red Hat Insights MCP tools (internal) |
-| `agent` | `service.yaml` | A2A queries, user interactions |
-| `all` | All three | Deploy all services |
+| Service | YAML Config (service mode) | YAML Config (sidecar mode) | Description |
+|---------|---------------------------|----------------------------|-------------|
+| `handler` | `marketplace-handler.yaml` | `marketplace-handler.yaml` | Pub/Sub events, DCR requests |
+| `mcp` | `mcp-service.yaml` | _(skipped)_ | Red Hat Insights MCP tools (internal) |
+| `agent` | `service.yaml` | `service-sidecar.yaml` | A2A queries, user interactions |
+| `all` | All three | Handler + Agent (with sidecar) | Deploy all services |
 
 The deploy script performs variable substitution on the YAML configs
 (`${PROJECT_ID}`, `${REGION}`, image references, etc.) and deploys using
@@ -1448,7 +1487,7 @@ To remove all resources created by the setup and deploy scripts:
 ```
 
 This will delete:
-- Cloud Run services (lightspeed-agent, marketplace-handler, rh-lightspeed-mcp)
+- Cloud Run services (lightspeed-agent, marketplace-handler, and rh-lightspeed-mcp if `MCP_DEPLOY_MODE=service`)
 - Pub/Sub topic and subscription
 - Secret Manager secrets
 - Service accounts (runtime + Pub/Sub invoker) and IAM bindings
diff --git a/deploy/cloudrun/cleanup.sh b/deploy/cloudrun/cleanup.sh
index 64e67590..f9476d3f 100755
--- a/deploy/cloudrun/cleanup.sh
+++ b/deploy/cloudrun/cleanup.sh
@@ -43,6 +43,7 @@ SERVICE_NAME="${SERVICE_NAME:-lightspeed-agent}"
 SERVICE_ACCOUNT_NAME="${SERVICE_ACCOUNT_NAME:-${SERVICE_NAME}}"
 HANDLER_SERVICE_NAME="${HANDLER_SERVICE_NAME:-marketplace-handler}"
 MCP_SERVICE_NAME="${MCP_SERVICE_NAME:-rh-lightspeed-mcp}"
+MCP_DEPLOY_MODE="${MCP_DEPLOY_MODE:-service}"
 DB_INSTANCE_NAME="${DB_INSTANCE_NAME:-lightspeed-agent-db}"
 SERVICE_ACCOUNT="${SERVICE_ACCOUNT_NAME}@${PROJECT_ID}.iam.gserviceaccount.com"
 
@@ -80,7 +81,12 @@ fi
 
 log_warn "This will delete the following resources from project: $PROJECT_ID"
 echo ""
-echo "  - Cloud Run services: $SERVICE_NAME, $HANDLER_SERVICE_NAME, $MCP_SERVICE_NAME"
+if [[ "$MCP_DEPLOY_MODE" == "service" ]]; then
+    echo "  - Cloud Run services: $SERVICE_NAME, $HANDLER_SERVICE_NAME, $MCP_SERVICE_NAME"
+else
+    echo "  - Cloud Run services: $SERVICE_NAME, $HANDLER_SERVICE_NAME"
+    echo "    (MCP_DEPLOY_MODE=sidecar: no standalone MCP service)"
+fi
 echo "  - Pub/Sub topic: $PUBSUB_TOPIC"
 echo "  - Pub/Sub subscription: $PUBSUB_SUBSCRIPTION"
 echo "  - Secrets: redhat-sso-client-id, redhat-sso-client-secret, database-url,"
@@ -130,15 +136,19 @@ else
     log_info "Cloud Run service '$HANDLER_SERVICE_NAME' does not exist, skipping"
 fi
 
-# Delete MCP server service
-if gcloud run services describe "$MCP_SERVICE_NAME" --region="$REGION" --project="$PROJECT_ID" &>/dev/null; then
-    gcloud run services delete "$MCP_SERVICE_NAME" \
-        --region="$REGION" \
-        --project="$PROJECT_ID" \
-        --quiet
-    log_info "Cloud Run service '$MCP_SERVICE_NAME' deleted"
+# Delete MCP server service (only exists in service mode)
+if [[ "$MCP_DEPLOY_MODE" == "service" ]]; then
+    if gcloud run services describe "$MCP_SERVICE_NAME" --region="$REGION" --project="$PROJECT_ID" &>/dev/null; then
+        gcloud run services delete "$MCP_SERVICE_NAME" \
+            --region="$REGION" \
+            --project="$PROJECT_ID" \
+            --quiet
+        log_info "Cloud Run service '$MCP_SERVICE_NAME' deleted"
+    else
+        log_info "Cloud Run service '$MCP_SERVICE_NAME' does not exist, skipping"
+    fi
 else
-    log_info "Cloud Run service '$MCP_SERVICE_NAME' does not exist, skipping"
+    log_info "MCP_DEPLOY_MODE=sidecar: no standalone MCP service to delete"
 fi
 
 # =============================================================================
@@ -276,7 +286,11 @@ log_info "Cleanup complete!"
 log_info "=========================================="
 echo ""
 echo "The following resources have been removed:"
-echo "  - Cloud Run services ($SERVICE_NAME, $HANDLER_SERVICE_NAME, $MCP_SERVICE_NAME)"
+if [[ "$MCP_DEPLOY_MODE" == "service" ]]; then
+    echo "  - Cloud Run services ($SERVICE_NAME, $HANDLER_SERVICE_NAME, $MCP_SERVICE_NAME)"
+else
+    echo "  - Cloud Run services ($SERVICE_NAME, $HANDLER_SERVICE_NAME)"
+fi
 echo "  - Pub/Sub topic and subscription"
 echo "  - Secret Manager secrets"
 echo "  - Service accounts (runtime + Pub/Sub invoker) and IAM bindings"
diff --git a/deploy/cloudrun/deploy.sh b/deploy/cloudrun/deploy.sh
index 72e84d8d..d6faf0f5 100755
--- a/deploy/cloudrun/deploy.sh
+++ b/deploy/cloudrun/deploy.sh
@@ -3,13 +3,19 @@
 # Google Cloud Run Deployment Script
 # =============================================================================
 #
-# Deploys three services to Google Cloud Run:
-# 1. marketplace-handler - Handles DCR and Pub/Sub events (always running)
-# 2. rh-lightspeed-mcp   - MCP server for Red Hat Insights APIs (internal)
-# 3. lightspeed-agent    - A2A agent (runs after provisioning)
+# Deploys services to Google Cloud Run.  The number of services depends on
+# MCP_DEPLOY_MODE (default: "service"):
 #
-# Uses the YAML service configs (service.yaml, mcp-service.yaml, and
-# marketplace-handler.yaml) with variable substitution to deploy each service.
+#   service  — 3 Cloud Run services:
+#     1. marketplace-handler  – DCR + Pub/Sub events (always running)
+#     2. rh-lightspeed-mcp    – MCP server for Insights APIs (internal)
+#     3. lightspeed-agent     – A2A agent (runs after provisioning)
+#
+#   sidecar  — 2 Cloud Run services:
+#     1. marketplace-handler  – DCR + Pub/Sub events (always running)
+#     2. lightspeed-agent     – A2A agent with MCP sidecar container
+#
+# Uses the YAML service configs with variable substitution to deploy each service.
 #
 # Usage:
 #   ./deploy/cloudrun/deploy.sh [OPTIONS]
@@ -26,7 +32,7 @@
 #   --allow-unauthenticated   Allow public access
 #   --build                   Build images before deploying
 #
-# Architecture:
+# Architecture (MCP_DEPLOY_MODE=service):
 #   ┌─────────────────────────┐  ┌──────────────────────┐  ┌─────────────────────────┐
 #   │  Marketplace Handler    │  │  MCP Server          │  │   Lightspeed Agent      │
 #   │  (Cloud Run #1)         │  │  (Cloud Run #2)      │  │   (Cloud Run #3)        │
@@ -37,6 +43,17 @@
 #   │  - GMA SSO API          │  │  - Remediations      │  │  - Calls MCP via HTTPS  │
 #   └─────────────────────────┘  └──────────────────────┘  └─────────────────────────┘
 #
+# Architecture (MCP_DEPLOY_MODE=sidecar):
+#   ┌─────────────────────────┐  ┌──────────────────────────────────────────┐
+#   │  Marketplace Handler    │  │   Lightspeed Agent (Cloud Run #2)       │
+#   │  (Cloud Run #1)         │  │  ┌─────────────┐  ┌──────────────────┐  │
+#   │                         │  │  │  Agent      │  │  MCP Server      │  │
+#   │  - POST /dcr            │  │  │  (8000)     │  │  (sidecar, 8080) │  │
+#   │  - Pub/Sub push         │  │  │  A2A, OAuth │  │  Insights APIs   │  │
+#   │  - Account approval     │  │  │             │──│  localhost:8080   │  │
+#   │  - GMA SSO API          │  │  └─────────────┘  └──────────────────┘  │
+#   └─────────────────────────┘  └──────────────────────────────────────────┘
+#
 # Prerequisites:
 #   - Run setup.sh first to configure GCP services
 #   - Update secrets in Secret Manager with actual values
@@ -65,6 +82,9 @@ SERVICE_NAME="${SERVICE_NAME:-lightspeed-agent}"
 SERVICE_ACCOUNT_NAME="${SERVICE_ACCOUNT_NAME:-${SERVICE_NAME}}"
 HANDLER_SERVICE_NAME="${HANDLER_SERVICE_NAME:-marketplace-handler}"
 MCP_SERVICE_NAME="${MCP_SERVICE_NAME:-rh-lightspeed-mcp}"
+# MCP deployment mode: "service" (separate Cloud Run service) or "sidecar"
+# (MCP container inside the agent pod). Default: "service".
+MCP_DEPLOY_MODE="${MCP_DEPLOY_MODE:-service}"
 DB_INSTANCE_NAME="${DB_INSTANCE_NAME:-lightspeed-agent-db}"
 VPC_CONNECTOR_NAME="${VPC_CONNECTOR_NAME:-lightspeed-redis-conn}"
 IMAGE_TAG="${IMAGE_TAG:-latest}"
@@ -141,6 +161,12 @@ if [[ -z "$PROJECT_ID" ]]; then
     exit 1
 fi
 
+# Validate MCP_DEPLOY_MODE
+if [[ "$MCP_DEPLOY_MODE" != "service" && "$MCP_DEPLOY_MODE" != "sidecar" ]]; then
+    log_error "Invalid MCP_DEPLOY_MODE: $MCP_DEPLOY_MODE (must be 'service' or 'sidecar')"
+    exit 1
+fi
+
 # Set default images if not specified
 if [[ -z "$AGENT_IMAGE" ]]; then
     AGENT_IMAGE="gcr.io/${PROJECT_ID}/lightspeed-agent:${IMAGE_TAG}"
@@ -153,6 +179,7 @@ log_info "Deploying to Cloud Run"
 log_info "  Project: $PROJECT_ID"
 log_info "  Region: $REGION"
 log_info "  Service(s): $DEPLOY_SERVICE"
+log_info "  MCP Deploy Mode: $MCP_DEPLOY_MODE"
 log_info "  Agent Image: $AGENT_IMAGE"
 log_info "  Handler Image: $HANDLER_IMAGE"
 log_info "  MCP Image: $MCP_IMAGE"
@@ -188,23 +215,32 @@ build_handler_image() {
 # Deploy using service YAML configs
 # =============================================================================
 deploy_agent() {
-    log_info "Deploying agent with service.yaml..."
-
-    # Warn if MCP service isn't deployed yet (agent needs its URL)
-    if ! gcloud run services describe "$MCP_SERVICE_NAME" \
-        --region="$REGION" --project="$PROJECT_ID" &>/dev/null; then
-        log_warn "MCP service '$MCP_SERVICE_NAME' is not deployed yet."
-        log_warn "Deploy it first with: $0 --service mcp"
-        log_warn "The agent's MCP_SERVER_URL will be a placeholder until the MCP service is deployed."
+    local yaml_file
+
+    if [[ "$MCP_DEPLOY_MODE" == "sidecar" ]]; then
+        log_info "Deploying agent with MCP sidecar (service-sidecar.yaml)..."
+        yaml_file="deploy/cloudrun/service-sidecar.yaml"
+    else
+        log_info "Deploying agent with service.yaml (MCP as separate service)..."
+        yaml_file="deploy/cloudrun/service.yaml"
+
+        # Warn if MCP service isn't deployed yet (agent needs its URL)
+        if ! gcloud run services describe "$MCP_SERVICE_NAME" \
+            --region="$REGION" --project="$PROJECT_ID" &>/dev/null; then
+            log_warn "MCP service '$MCP_SERVICE_NAME' is not deployed yet."
+            log_warn "Deploy it first with: $0 --service mcp"
+            log_warn "The agent's MCP_SERVER_URL will be a placeholder until the MCP service is deployed."
+        fi
     fi
 
     # Create temporary file with substituted values
     local tmp_yaml
     tmp_yaml=$(mktemp)
 
-    # Substitute variables in service.yaml
+    # Substitute variables in the selected YAML
     # Note: Image substitution must happen BEFORE PROJECT_ID substitution
     sed -e "s|gcr.io/\${PROJECT_ID}/lightspeed-agent:latest|${AGENT_IMAGE}|g" \
+        -e "s|\${MCP_IMAGE}|${MCP_IMAGE}|g" \
         -e "s|\${MCP_SERVICE_NAME}|${MCP_SERVICE_NAME}|g" \
         -e "s|\${PROJECT_ID}|${PROJECT_ID}|g" \
         -e "s|\${REGION}|${REGION}|g" \
@@ -212,7 +248,7 @@ deploy_agent() {
         -e "s|\${SERVICE_ACCOUNT_NAME}|${SERVICE_ACCOUNT_NAME}|g" \
         -e "s|\${DB_INSTANCE_NAME}|${DB_INSTANCE_NAME}|g" \
         -e "s|\${VPC_CONNECTOR_NAME}|${VPC_CONNECTOR_NAME}|g" \
-        deploy/cloudrun/service.yaml > "$tmp_yaml"
+        "$yaml_file" > "$tmp_yaml"
 
     # Deploy using the YAML
     gcloud run services replace "$tmp_yaml" \
@@ -272,6 +308,13 @@ deploy_handler() {
 }
 
 deploy_mcp() {
+    if [[ "$MCP_DEPLOY_MODE" == "sidecar" ]]; then
+        log_warn "MCP_DEPLOY_MODE=sidecar: MCP runs inside the agent pod."
+        log_warn "Skipping standalone MCP service deployment."
+        log_warn "Deploy the agent instead: $0 --service agent"
+        return 0
+    fi
+
     log_info "Deploying MCP server with mcp-service.yaml..."
 
     # Create temporary file with substituted values
@@ -408,7 +451,11 @@ case "$DEPLOY_SERVICE" in
     all)
         deploy_handler
         configure_pubsub_push
-        deploy_mcp
+        if [[ "$MCP_DEPLOY_MODE" == "service" ]]; then
+            deploy_mcp
+        else
+            log_info "MCP_DEPLOY_MODE=sidecar: skipping standalone MCP deployment"
+        fi
         deploy_agent
         ;;
     handler)
@@ -458,17 +505,17 @@ case "$DEPLOY_SERVICE" in
     all)
         echo ""
         show_service_info "$HANDLER_SERVICE_NAME"
-        echo ""
-        show_service_info "$MCP_SERVICE_NAME"
+        if [[ "$MCP_DEPLOY_MODE" == "service" ]]; then
+            echo ""
+            show_service_info "$MCP_SERVICE_NAME"
+        fi
         echo ""
         show_service_info "$SERVICE_NAME"
 
-        # Update AGENT_PROVIDER_URL (agent base URL), MARKETPLACE_HANDLER_URL,
-        # and MCP_SERVER_URL on the agent service so it can reach the other
-        # services.
+        # Update AGENT_PROVIDER_URL (agent base URL) and MARKETPLACE_HANDLER_URL
+        # on the agent service.  In service mode, also set MCP_SERVER_URL.
         # Note: AGENT_PROVIDER_ORGANIZATION_URL (JWT audience for DCR) is set
-        # in service.yaml and does NOT change per deployment — it's the
-        # provider's website (e.g., https://www.redhat.com).
+        # in the YAML and does NOT change per deployment.
         service_url=$(gcloud run services describe "$SERVICE_NAME" \
             --region="$REGION" \
             --project="$PROJECT_ID" \
@@ -477,10 +524,6 @@ case "$DEPLOY_SERVICE" in
             --region="$REGION" \
             --project="$PROJECT_ID" \
             --format='value(status.url)' 2>/dev/null || echo "")
-        mcp_url=$(gcloud run services describe "$MCP_SERVICE_NAME" \
-            --region="$REGION" \
-            --project="$PROJECT_ID" \
-            --format='value(status.url)' 2>/dev/null || echo "")
 
         if [[ -n "$service_url" ]]; then
             env_vars="AGENT_PROVIDER_URL=$service_url"
@@ -490,10 +533,16 @@ case "$DEPLOY_SERVICE" in
                 log_warn "Could not retrieve $HANDLER_SERVICE_NAME URL. MARKETPLACE_HANDLER_URL not set."
                 log_warn "DCR endpoints in the AgentCard will fall back to AGENT_PROVIDER_URL."
             fi
-            if [[ -n "$mcp_url" ]]; then
-                env_vars="$env_vars,MCP_SERVER_URL=$mcp_url"
-            else
-                log_warn "Could not retrieve $MCP_SERVICE_NAME URL. MCP_SERVER_URL not set."
+            if [[ "$MCP_DEPLOY_MODE" == "service" ]]; then
+                mcp_url=$(gcloud run services describe "$MCP_SERVICE_NAME" \
+                    --region="$REGION" \
+                    --project="$PROJECT_ID" \
+                    --format='value(status.url)' 2>/dev/null || echo "")
+                if [[ -n "$mcp_url" ]]; then
+                    env_vars="$env_vars,MCP_SERVER_URL=$mcp_url"
+                else
+                    log_warn "Could not retrieve $MCP_SERVICE_NAME URL. MCP_SERVER_URL not set."
+                fi
             fi
             log_info "Updating agent env vars with service URLs"
             gcloud run services update "$SERVICE_NAME" \
@@ -505,10 +554,16 @@ case "$DEPLOY_SERVICE" in
         fi
 
         echo ""
-        echo "Architecture:"
-        echo "  1. Marketplace Handler receives Pub/Sub events and DCR requests"
-        echo "  2. MCP Server provides Red Hat Insights API tools (internal only)"
-        echo "  3. Agent handles A2A protocol and user interactions"
+        if [[ "$MCP_DEPLOY_MODE" == "sidecar" ]]; then
+            echo "Architecture (MCP_DEPLOY_MODE=sidecar):"
+            echo "  1. Marketplace Handler receives Pub/Sub events and DCR requests"
+            echo "  2. Agent handles A2A protocol (MCP server runs as sidecar)"
+        else
+            echo "Architecture (MCP_DEPLOY_MODE=service):"
+            echo "  1. Marketplace Handler receives Pub/Sub events and DCR requests"
+            echo "  2. MCP Server provides Red Hat Insights API tools (internal only)"
+            echo "  3. Agent handles A2A protocol and user interactions"
+        fi
         echo ""
         echo "Test endpoints:"
         echo "  Handler health: curl \$(gcloud run services describe $HANDLER_SERVICE_NAME --region=$REGION --format='value(status.url)')/health"
@@ -533,9 +588,9 @@ case "$DEPLOY_SERVICE" in
         echo ""
         show_service_info "$SERVICE_NAME"
 
-        # Update AGENT_PROVIDER_URL (agent base URL), MARKETPLACE_HANDLER_URL,
-        # and MCP_SERVER_URL on the agent service.
-        # AGENT_PROVIDER_ORGANIZATION_URL is set in service.yaml and does NOT
+        # Update AGENT_PROVIDER_URL (agent base URL) and MARKETPLACE_HANDLER_URL
+        # on the agent service.  In service mode, also set MCP_SERVER_URL.
+        # AGENT_PROVIDER_ORGANIZATION_URL is set in the YAML and does NOT
         # change per deployment.
         service_url=$(gcloud run services describe "$SERVICE_NAME" \
             --region="$REGION" \
@@ -545,10 +600,6 @@ case "$DEPLOY_SERVICE" in
             --region="$REGION" \
             --project="$PROJECT_ID" \
             --format='value(status.url)' 2>/dev/null || echo "")
-        mcp_url=$(gcloud run services describe "$MCP_SERVICE_NAME" \
-            --region="$REGION" \
-            --project="$PROJECT_ID" \
-            --format='value(status.url)' 2>/dev/null || echo "")
 
         if [[ -n "$service_url" ]]; then
             env_vars="AGENT_PROVIDER_URL=$service_url"
@@ -558,10 +609,16 @@ case "$DEPLOY_SERVICE" in
                 log_warn "Could not retrieve $HANDLER_SERVICE_NAME URL. MARKETPLACE_HANDLER_URL not set."
                 log_warn "DCR endpoints in the AgentCard will fall back to AGENT_PROVIDER_URL."
             fi
-            if [[ -n "$mcp_url" ]]; then
-                env_vars="$env_vars,MCP_SERVER_URL=$mcp_url"
-            else
-                log_warn "Could not retrieve $MCP_SERVICE_NAME URL. MCP_SERVER_URL not set."
+            if [[ "$MCP_DEPLOY_MODE" == "service" ]]; then
+                mcp_url=$(gcloud run services describe "$MCP_SERVICE_NAME" \
+                    --region="$REGION" \
+                    --project="$PROJECT_ID" \
+                    --format='value(status.url)' 2>/dev/null || echo "")
+                if [[ -n "$mcp_url" ]]; then
+                    env_vars="$env_vars,MCP_SERVER_URL=$mcp_url"
+                else
+                    log_warn "Could not retrieve $MCP_SERVICE_NAME URL. MCP_SERVER_URL not set."
+                fi
             fi
             log_info "Updating agent env vars with service URLs"
             gcloud run services update "$SERVICE_NAME" \
@@ -581,5 +638,7 @@ esac
 echo ""
 echo "View logs:"
 echo "  gcloud run services logs read $HANDLER_SERVICE_NAME --region=$REGION --project=$PROJECT_ID"
-echo "  gcloud run services logs read $MCP_SERVICE_NAME --region=$REGION --project=$PROJECT_ID"
+if [[ "$MCP_DEPLOY_MODE" == "service" ]]; then
+    echo "  gcloud run services logs read $MCP_SERVICE_NAME --region=$REGION --project=$PROJECT_ID"
+fi
 echo "  gcloud run services logs read $SERVICE_NAME --region=$REGION --project=$PROJECT_ID"
diff --git a/deploy/cloudrun/service-sidecar.yaml b/deploy/cloudrun/service-sidecar.yaml
new file mode 100644
index 00000000..e94c33a6
--- /dev/null
+++ b/deploy/cloudrun/service-sidecar.yaml
@@ -0,0 +1,205 @@
+# Cloud Run Service Configuration — MCP_DEPLOY_MODE=sidecar
+# Declarative configuration for the Lightspeed Agent with MCP sidecar.
+#
+# This YAML is used when MCP_DEPLOY_MODE=sidecar.
+# For MCP_DEPLOY_MODE=service (the default), see service.yaml.
+#
+# The MCP server runs as a sidecar container inside the agent pod.
+# The agent connects to it via http://localhost:8080 (container-to-container).
+# No separate MCP Cloud Run service is needed.
+#
+# Note: Cloud Run doesn't support Quay.io directly. Copy the image to GCR:
+#   docker pull quay.io/redhat-services-prod/insights-management-tenant/insights-mcp/red-hat-lightspeed-mcp:latest
+#   docker tag quay.io/.../red-hat-lightspeed-mcp:latest gcr.io/${PROJECT_ID}/red-hat-lightspeed-mcp:latest
+#   docker push gcr.io/${PROJECT_ID}/red-hat-lightspeed-mcp:latest
+#
+# Usage:
+#   gcloud run services replace deploy/cloudrun/service-sidecar.yaml \
+#     --region=us-central1 --project=YOUR_PROJECT_ID
+#
+# Note: Replace ${PROJECT_ID} and ${REGION} with actual values before applying
+
+apiVersion: serving.knative.dev/v1
+kind: Service
+metadata:
+  name: ${SERVICE_NAME}
+  labels:
+    app: ${SERVICE_NAME}
+    managed-by: cloud-build
+  annotations:
+    run.googleapis.com/description: "Red Hat Lightspeed Agent for Google Cloud - A2A-ready agent using Google ADK"
+    run.googleapis.com/ingress: all
+    run.googleapis.com/launch-stage: GA
+spec:
+  template:
+    metadata:
+      annotations:
+        # Scaling configuration
+        autoscaling.knative.dev/minScale: "0"
+        autoscaling.knative.dev/maxScale: "2"
+        # CPU allocation
+        run.googleapis.com/cpu-throttling: "false"
+        # Startup probe
+        run.googleapis.com/startup-cpu-boost: "true"
+        # Cloud SQL connection
+        run.googleapis.com/cloudsql-instances: "${PROJECT_ID}:${REGION}:${DB_INSTANCE_NAME}"
+        # VPC connector for Cloud Memorystore Redis (rate limiting)
+        run.googleapis.com/vpc-access-connector: projects/${PROJECT_ID}/locations/${REGION}/connectors/${VPC_CONNECTOR_NAME}
+        run.googleapis.com/vpc-access-egress: private-ranges-only
+    spec:
+      containerConcurrency: 80
+      timeoutSeconds: 300
+      serviceAccountName: ${SERVICE_ACCOUNT_NAME}@${PROJECT_ID}.iam.gserviceaccount.com
+      containers:
+        - name: lightspeed-agent
+          image: gcr.io/${PROJECT_ID}/lightspeed-agent:latest
+          ports:
+            - name: http1
+              containerPort: 8000
+          resources:
+            limits:
+              cpu: "2"
+              memory: 2Gi
+          env:
+            # Google AI Configuration
+            - name: GOOGLE_GENAI_USE_VERTEXAI
+              value: "TRUE"
+            - name: GOOGLE_CLOUD_PROJECT
+              value: "${PROJECT_ID}"
+            - name: GOOGLE_CLOUD_LOCATION
+              value: "${REGION}"
+            - name: GEMINI_MODEL
+              value: "gemini-2.5-flash"
+            # Agent Configuration
+            - name: AGENT_HOST
+              value: "0.0.0.0"
+            - name: AGENT_PORT
+              value: "8000"
+            - name: AGENT_NAME
+              value: "lightspeed_agent"
+            - name: AGENT_PROVIDER_URL
+              value: "https://lightspeed-agent.example.com"
+            # Agent provider's organization website URL.
+            # Used in AgentCard provider.url and as the expected JWT audience
+            # for Google DCR software_statement validation.
+            # Must match the aud claim Google sends in the software_statement.
+            - name: AGENT_PROVIDER_ORGANIZATION_URL
+              value: "https://www.redhat.com"
+            # Marketplace handler URL for DCR endpoints in the AgentCard.
+            # Must point to the marketplace-handler Cloud Run service.
+            # Updated automatically by deploy.sh after deployment.
+            - name: MARKETPLACE_HANDLER_URL
+              value: "https://marketplace-handler.example.com"
+            - name: LOG_LEVEL
+              value: "INFO"
+            - name: LOG_FORMAT
+              value: "json"
+            - name: AGENT_LOGGING_DETAIL
+              value: "basic"
+            # Red Hat SSO Configuration
+            - name: RED_HAT_SSO_ISSUER
+              value: "https://sso.redhat.com/auth/realms/redhat-external"
+            # Comma-separated OAuth scopes required in access tokens (checked via introspection).
+            # Set to empty string to disable scope checking.
+            - name: AGENT_REQUIRED_SCOPE
+              value: "api.console,api.ocm"
+            # Comma-separated allowlist of OAuth scopes permitted in access tokens.
+            # Tokens carrying scopes outside this list are rejected (HTTP 403).
+            - name: AGENT_ALLOWED_SCOPES
+              value: "openid,profile,email,api.console,api.ocm"
+            # Ensure production environment do not skip JWT validation
+            - name: SKIP_JWT_VALIDATION
+              value: "false"
+            # MCP Configuration
+            # MCP server runs as a sidecar container in this pod.
+            # It is reachable at http://localhost:8080 (container-to-container).
+            - name: MCP_TRANSPORT_MODE
+              value: "http"
+            - name: MCP_SERVER_URL
+              value: "http://localhost:8080"
+            - name: MCP_READ_ONLY
+              value: "true"
+            # Secrets from Secret Manager
+            - name: RED_HAT_SSO_CLIENT_ID
+              valueFrom:
+                secretKeyRef:
+                  name: redhat-sso-client-id
+                  key: latest
+            - name: RED_HAT_SSO_CLIENT_SECRET
+              valueFrom:
+                secretKeyRef:
+                  name: redhat-sso-client-secret
+                  key: latest
+            # Marketplace Database (shared with handler for order validation)
+            - name: DATABASE_URL
+              valueFrom:
+                secretKeyRef:
+                  name: database-url
+                  key: latest
+            # Session backend: "database" for production persistence,
+            # "memory" for in-memory (sessions lost on restart)
+            - name: SESSION_BACKEND
+              value: "database"
+            # Session Database (required when SESSION_BACKEND=database)
+            - name: SESSION_DATABASE_URL
+              valueFrom:
+                secretKeyRef:
+                  name: session-database-url
+                  key: latest
+            # Rate Limiting (Redis-backed, required for API Server)
+            - name: RATE_LIMIT_REDIS_URL
+              valueFrom:
+                secretKeyRef:
+                  name: rate-limit-redis-url
+                  key: latest
+            - name: RATE_LIMIT_REDIS_TIMEOUT_MS
+              value: "200"
+            - name: RATE_LIMIT_KEY_PREFIX
+              value: "lightspeed:ratelimit"
+            - name: RATE_LIMIT_REQUESTS_PER_MINUTE
+              value: "60"
+            - name: RATE_LIMIT_REQUESTS_PER_HOUR
+              value: "1000"
+          # Health checks
+          startupProbe:
+            httpGet:
+              path: /health
+              port: 8000
+            initialDelaySeconds: 5
+            periodSeconds: 10
+            failureThreshold: 3
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: 8000
+            periodSeconds: 30
+            failureThreshold: 3
+
+        # =====================================================================
+        # Red Hat Lightspeed MCP Server (Sidecar)
+        # =====================================================================
+        # Provides MCP tools for interacting with Red Hat Insights APIs
+        # (Advisor, Inventory, Vulnerability, Remediations, etc.)
+        #
+        # The agent forwards the caller's JWT token to the MCP server via
+        # the Authorization header.  The MCP server uses this token to
+        # authenticate with console.redhat.com on behalf of the user.
+        #
+        # Sidecar container — no ports section (only the ingress container
+        # can declare ports in Cloud Run multi-container deployments).
+        - name: rh-lightspeed-mcp
+          image: ${MCP_IMAGE}
+          args:
+            - "--readonly"
+            - "http"
+            - "--port"
+            - "8080"
+            - "--host"
+            - "0.0.0.0"
+          resources:
+            limits:
+              cpu: "1"
+              memory: 512Mi
+  traffic:
+    - percent: 100
+      latestRevision: true
diff --git a/deploy/cloudrun/service.yaml b/deploy/cloudrun/service.yaml
index a7e30dc5..bad6aace 100644
--- a/deploy/cloudrun/service.yaml
+++ b/deploy/cloudrun/service.yaml
@@ -1,5 +1,8 @@
-# Cloud Run Service Configuration
-# Declarative configuration for the Lightspeed Agent service
+# Cloud Run Service Configuration — MCP_DEPLOY_MODE=service
+# Declarative configuration for the Lightspeed Agent service.
+#
+# This YAML is used when MCP_DEPLOY_MODE=service (the default).
+# For MCP_DEPLOY_MODE=sidecar, see service-sidecar.yaml.
 #
 # The MCP server runs as a separate Cloud Run service (see mcp-service.yaml).
 # The agent connects to it over HTTPS using the MCP_SERVER_URL env var,