diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml new file mode 100644 index 00000000..19ebfde2 --- /dev/null +++ b/.github/workflows/pr-checks.yml @@ -0,0 +1,27 @@ +name: PR Checks + +on: + pull_request: + branches: [main, beta] + +jobs: + go: + uses: nullplatform/actions-nullplatform/.github/workflows/pr-checks-go.yml@main + with: + working-directory: k8s/log/kube-logger-go + go-version: '1.25' + + shellcheck: + uses: nullplatform/actions-nullplatform/.github/workflows/shellcheck.yml@main + + unit-tests: + name: Unit Tests + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + + - name: Install dependencies + run: sudo apt-get update && sudo apt-get install -y bats jq + + - name: Run unit tests + run: make test-unit diff --git a/.gitignore b/.gitignore index dc24eb3e..10fe9d5c 100644 --- a/.gitignore +++ b/.gitignore @@ -134,4 +134,20 @@ dist .idea k8s/output np-agent-manifest.yaml -.minikube_mount_pid \ No newline at end of file +.minikube_mount_pid + +# Git worktrees +.worktrees/ +DS_Store +# Integration test runtime data +frontend/deployment/tests/integration/volume/ + +# Terraform/OpenTofu +.terraform/ +.terraform.lock.hcl + +# Generated test certificates +testing/docker/certs/ + +# Claude Code +.claude/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 081db745..5e0f4f23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.11.0] - 2026-04-16 +- Add unit testing support +- Add scope configuration +- Improve **k8s/backup** logging format with detailed error messages and fix suggestions +- Add unit tests for **k8s/backup** module (backup_templates and s3 operations) +- Add ALB capacity validation on scope creation. Requires additional AWS permissions: `elasticloadbalancing:DescribeLoadBalancers`, `elasticloadbalancing:DescribeListeners`, `elasticloadbalancing:DescribeRules` +- Add ALB target group capacity validation on deployment. Requires additional AWS permission: `elasticloadbalancing:DescribeTargetGroups` +- Add support for multiple ALBs +- Add configurable memory and cpu limit for traffic manager +- Add ALB metrics publishing to CloudWatch or Datadog (rule count and target group count per ALB) +- Fix blue-green switch-traffic failure when `additional_ports` (e.g., gRPC) are added to a scope after the initial deployment + ## [1.10.1] - 2026-02-13 - Hotfix on wait_deployment_iteration diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..e091370b --- /dev/null +++ b/Makefile @@ -0,0 +1,54 @@ +.PHONY: test test-all test-unit test-tofu test-integration help + +# Default test target - shows available options +test: + @echo "Usage: make test-" + @echo "" + @echo "Available test levels:" + @echo " make test-all Run all tests" + @echo " make test-unit Run BATS unit tests" + @echo " make test-tofu Run OpenTofu tests" + @echo " make test-integration Run integration tests" + @echo "" + @echo "You can also run tests for a specific module:" + @echo " make test-unit MODULE=frontend" + +# Run all tests +test-all: test-unit test-tofu test-integration + +# Run BATS unit tests +test-unit: +ifdef MODULE + @./testing/run_bats_tests.sh $(MODULE) +else + @./testing/run_bats_tests.sh +endif + +# Run OpenTofu tests +test-tofu: +ifdef MODULE + @./testing/run_tofu_tests.sh $(MODULE) +else + @./testing/run_tofu_tests.sh +endif + +# Run integration tests +test-integration: +ifdef MODULE + @./testing/run_integration_tests.sh $(MODULE) $(if $(VERBOSE),-v) +else + @./testing/run_integration_tests.sh $(if $(VERBOSE),-v) +endif + +# Help +help: + @echo "Test targets:" + @echo " test Show available test options" + @echo " test-all Run all tests" + @echo " test-unit Run BATS unit tests" + @echo " test-tofu Run OpenTofu tests" + @echo " test-integration Run integration tests" + @echo "" + @echo "Options:" + @echo " MODULE= Run tests for specific module (e.g., MODULE=frontend)" + @echo " VERBOSE=1 Show output of passing tests (integration tests only)" diff --git a/TESTING.md b/TESTING.md new file mode 100644 index 00000000..35b2e28c --- /dev/null +++ b/TESTING.md @@ -0,0 +1,677 @@ +# Testing Guide + +This repository uses a comprehensive three-layer testing strategy to ensure reliability and correctness at every level of the infrastructure deployment pipeline. + +## Table of Contents + +- [Quick Start](#quick-start) +- [Test Layers Overview](#test-layers-overview) +- [Running Tests](#running-tests) +- [Unit Tests (BATS)](#unit-tests-bats) +- [Infrastructure Tests (OpenTofu)](#infrastructure-tests-opentofu) +- [Integration Tests](#integration-tests) +- [Test Helpers Reference](#test-helpers-reference) +- [Writing New Tests](#writing-new-tests) +- [Extending Test Helpers](#extending-test-helpers) + +--- + +## Quick Start + +```bash +# Run all tests +make test-all + +# Run specific test types +make test-unit # BATS unit tests +make test-tofu # OpenTofu infrastructure tests +make test-integration # End-to-end integration tests + +# Run tests for a specific module +make test-unit MODULE=frontend +make test-tofu MODULE=frontend +make test-integration MODULE=frontend +``` + +--- + +## Test Layers Overview + +Our testing strategy follows a pyramid approach with three distinct layers, each serving a specific purpose: + +``` + ┌─────────────────────┐ + │ Integration Tests │ Slow, Few + │ End-to-end flows │ + └──────────┬──────────┘ + │ + ┌───────────────┴───────────────┐ + │ OpenTofu Tests │ Medium + │ Infrastructure contracts │ + └───────────────┬───────────────┘ + │ + ┌───────────────────────────┴───────────────────────────┐ + │ Unit Tests │ Fast, Many + │ Script logic & behavior │ + └───────────────────────────────────────────────────────┘ +``` + +| Layer | Framework | Purpose | Speed | Coverage | +|-------|-----------|---------|-------|----------| +| **Unit** | BATS | Test bash scripts, setup logic, error handling | Fast (~seconds) | High | +| **Infrastructure** | OpenTofu | Validate Terraform/OpenTofu module contracts | Medium (~seconds) | Medium | +| **Integration** | BATS + Docker | End-to-end workflow validation with mocked services | Slow (~minutes) | Low | + +--- + +## Running Tests + +### Prerequisites + +| Tool | Required For | Installation | +|------|--------------|--------------| +| `bats` | Unit & Integration tests | `brew install bats-core` | +| `jq` | JSON processing | `brew install jq` | +| `tofu` | Infrastructure tests | `brew install opentofu` | +| `docker` | Integration tests | [Docker Desktop](https://docker.com) | + +### Makefile Commands + +```bash +# Show available test commands +make test + +# Run all test suites +make test-all + +# Run individual test suites +make test-unit +make test-tofu +make test-integration + +# Run tests for a specific module +make test-unit MODULE=frontend +make test-tofu MODULE=frontend +make test-integration MODULE=frontend + +# Run a single test file directly +bats frontend/deployment/tests/build_context_test.bats +tofu test # from within a modules directory +``` + +--- + +## Unit Tests (BATS) + +Unit tests validate the bash scripts that orchestrate the deployment pipeline. They test individual setup scripts, context building, error handling, and environment configuration. + +### What to Test + +- **Setup scripts**: Validate environment variable handling, error cases, output format +- **Context builders**: Verify JSON structure, required fields, transformations +- **Error handling**: Ensure proper exit codes and error messages +- **Mock integrations**: Test script behavior with mocked CLI tools (aws, np) + +### Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ test_file.bats │ +├─────────────────────────────────────────────────────────────────┤ +│ setup() │ +│ ├── source assertions.sh (shared test utilities) │ +│ ├── configure mock CLI tools (aws, np mocks) │ +│ └── set environment variables │ +│ │ +│ @test "description" { ... } │ +│ ├── run script_under_test │ +│ └── assert results │ +│ │ +│ teardown() │ +│ └── cleanup │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Directory Structure + +``` +/ +├── / +│ └── setup # Script under test +└── tests/ + ├── resources/ + │ ├── context.json # Test fixtures + │ ├── aws_mocks/ # Mock AWS CLI responses + │ │ └── aws # Mock aws executable + │ └── np_mocks/ # Mock np CLI responses + │ └── np # Mock np executable + └── / + └── setup_test.bats # Test file +``` + +### File Naming Convention + +| Pattern | Description | +|---------|-------------| +| `*_test.bats` | BATS test files | +| `resources/` | Test fixtures and mock data | +| `*_mocks/` | Mock CLI tool directories | + +### Example Unit Test + +```bash +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for provider/aws/setup script +# ============================================================================= + +# Setup - runs before each test +setup() { + TEST_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")" && pwd)" + PROJECT_ROOT="$(cd "$TEST_DIR/../../.." && pwd)" + SCRIPT_PATH="$PROJECT_ROOT/provider/aws/setup" + + # Load shared test utilities + source "$PROJECT_ROOT/testing/assertions.sh" + + # Initialize required environment variables + export AWS_REGION="us-east-1" + export TOFU_PROVIDER_BUCKET="my-terraform-state" + export TOFU_LOCK_TABLE="terraform-locks" +} + +# Teardown - runs after each test +teardown() { + unset AWS_REGION TOFU_PROVIDER_BUCKET TOFU_LOCK_TABLE +} + +# ============================================================================= +# Tests +# ============================================================================= + +@test "fails when AWS_REGION is not set" { + unset AWS_REGION + + run source "$SCRIPT_PATH" + + assert_equal "$status" "1" + assert_contains "$output" "AWS_REGION is not set" +} + +@test "exports correct TOFU_VARIABLES structure" { + source "$SCRIPT_PATH" + + local region=$(echo "$TOFU_VARIABLES" | jq -r '.aws_provider.region') + assert_equal "$region" "us-east-1" +} + +@test "appends to existing MODULES_TO_USE" { + export MODULES_TO_USE="existing/module" + + source "$SCRIPT_PATH" + + assert_contains "$MODULES_TO_USE" "existing/module" + assert_contains "$MODULES_TO_USE" "provider/aws/modules" +} +``` + +--- + +## Infrastructure Tests (OpenTofu) + +Infrastructure tests validate the OpenTofu/Terraform modules in isolation. They verify variable contracts, resource configurations, and module outputs without deploying real infrastructure. + +### What to Test + +- **Variable validation**: Required variables, type constraints, default values +- **Resource configuration**: Correct resource attributes based on inputs +- **Module outputs**: Expected outputs are produced with correct values +- **Edge cases**: Empty values, special characters, boundary conditions + +### Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ module.tftest.hcl │ +├─────────────────────────────────────────────────────────────────┤ +│ mock_provider "aws" {} (prevents real API calls) │ +│ │ +│ variables { ... } (test inputs) │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────┐ │ +│ │ Terraform Module │ (main.tf, variables.tf, etc.) │ +│ │ under test │ │ +│ └─────────┬───────────┘ │ +│ │ │ +│ ▼ │ +│ run "test_name" { │ +│ command = plan │ +│ assert { condition = ... } (validate outputs/resources) │ +│ } │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Directory Structure + +``` +/ +└── modules/ + ├── main.tf + ├── variables.tf + ├── outputs.tf + └── .tftest.hcl # Test file lives alongside module +``` + +### File Naming Convention + +| Pattern | Description | +|---------|-------------| +| `*.tftest.hcl` | OpenTofu test files | +| `mock_provider` | Provider mock declarations | + +### Example Infrastructure Test + +```hcl +# ============================================================================= +# Unit tests for cloudfront module +# ============================================================================= + +mock_provider "aws" {} + +variables { + distribution_bucket_name = "my-assets-bucket" + distribution_app_name = "my-app-123" + distribution_s3_prefix = "/static" + + network_hosted_zone_id = "Z1234567890" + network_domain = "example.com" + network_subdomain = "app" + + distribution_resource_tags_json = { + Environment = "test" + } +} + +# ============================================================================= +# Test: CloudFront distribution is created with correct origin +# ============================================================================= +run "cloudfront_has_correct_s3_origin" { + command = plan + + assert { + condition = aws_cloudfront_distribution.static.origin[0].domain_name != "" + error_message = "CloudFront distribution must have an S3 origin" + } +} + +# ============================================================================= +# Test: Origin Access Control is configured +# ============================================================================= +run "oac_is_configured" { + command = plan + + assert { + condition = aws_cloudfront_origin_access_control.static.signing_behavior == "always" + error_message = "OAC should always sign requests" + } +} + +# ============================================================================= +# Test: Custom error responses for SPA routing +# ============================================================================= +run "spa_error_responses_configured" { + command = plan + + assert { + condition = length(aws_cloudfront_distribution.static.custom_error_response) > 0 + error_message = "SPA should have custom error responses for client-side routing" + } +} +``` + +--- + +## Integration Tests + +Integration tests validate the complete deployment workflow end-to-end. They run in a containerized environment with mocked cloud services, testing the entire pipeline from context building through infrastructure provisioning. + +### What to Test + +- **Complete workflows**: Full deployment and destruction cycles +- **Service interactions**: AWS services, nullplatform API calls +- **Resource creation**: Verify infrastructure is created correctly +- **Cleanup**: Ensure resources are properly destroyed + +### Architecture + +``` +┌─ Host Machine ──────────────────────────────────────────────────────────────┐ +│ │ +│ make test-integration │ +│ │ │ +│ ▼ │ +│ run_integration_tests.sh ──► docker compose up │ +│ │ +└─────────────────────────────────┬───────────────────────────────────────────┘ + │ +┌─ Docker Network ────────────────┴───────────────────────────────────────────┐ +│ │ +│ ┌─ Test Container ───────────────────────────────────────────────────────┐ │ +│ │ │ │ +│ │ BATS Tests ──► np CLI ──────────────────┐ │ │ +│ │ │ │ │ │ +│ │ ▼ ▼ │ │ +│ │ OpenTofu Nginx (HTTPS) │ │ +│ │ │ │ │ │ +│ └───────┼───────────────────────────────────┼────────────────────────────┘ │ +│ │ │ │ +│ ▼ ▼ │ +│ ┌─ Mock Services ────────────────────────────────────────────────────────┐ │ +│ │ │ │ +│ │ LocalStack (4566) Moto (5555) Smocker (8081) │ │ +│ │ ├── S3 └── CloudFront └── nullplatform API │ │ +│ │ ├── Route53 │ │ +│ │ ├── DynamoDB │ │ +│ │ ├── IAM │ │ +│ │ └── STS │ │ +│ │ │ │ +│ └────────────────────────────────────────────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +### Service Components + +| Service | Purpose | Port | +|---------|---------|------| +| **LocalStack** | AWS service emulation (S3, Route53, DynamoDB, IAM, STS, ACM) | 4566 | +| **Moto** | CloudFront emulation (not supported in LocalStack free tier) | 5555 | +| **Smocker** | nullplatform API mocking | 8080/8081 | +| **Nginx** | HTTPS reverse proxy for np CLI | 8443 | + +### Directory Structure + +``` +/ +└── tests/ + └── integration/ + ├── cloudfront_lifecycle_test.bats # Integration test + ├── localstack/ + │ └── provider_override.tf # LocalStack-compatible provider config + └── mocks/ + └── / + └── response.json # Mock API responses +``` + +### File Naming Convention + +| Pattern | Description | +|---------|-------------| +| `*_test.bats` | Integration test files | +| `localstack/` | LocalStack-compatible Terraform overrides | +| `mocks/` | API mock response files | + +### Example Integration Test + +```bash +#!/usr/bin/env bats +# ============================================================================= +# Integration test: CloudFront Distribution Lifecycle +# ============================================================================= + +setup_file() { + source "${PROJECT_ROOT}/testing/integration_helpers.sh" + + # Clear any existing mocks + clear_mocks + + # Create AWS prerequisites in LocalStack + aws_local s3api create-bucket --bucket assets-bucket + aws_local s3api create-bucket --bucket tofu-state-bucket + aws_local dynamodb create-table \ + --table-name tofu-locks \ + --attribute-definitions AttributeName=LockID,AttributeType=S \ + --key-schema AttributeName=LockID,KeyType=HASH \ + --billing-mode PAY_PER_REQUEST + aws_local route53 create-hosted-zone \ + --name example.com \ + --caller-reference "test-$(date +%s)" +} + +teardown_file() { + source "${PROJECT_ROOT}/testing/integration_helpers.sh" + clear_mocks +} + +setup() { + source "${PROJECT_ROOT}/testing/integration_helpers.sh" + + clear_mocks + load_context "tests/resources/context.json" + + export TOFU_PROVIDER="aws" + export TOFU_PROVIDER_BUCKET="tofu-state-bucket" + export AWS_REGION="us-east-1" +} + +# ============================================================================= +# Test: Create Infrastructure +# ============================================================================= +@test "create infrastructure deploys S3, CloudFront, and Route53 resources" { + # Setup API mocks + mock_request "GET" "/provider" "mocks/provider_success.json" + + # Run the deployment workflow + run_workflow "deployment/workflows/initial.yaml" + + # Verify resources were created + assert_s3_bucket_exists "assets-bucket" + assert_cloudfront_exists "Distribution for my-app" + assert_route53_record_exists "app.example.com" "A" +} + +# ============================================================================= +# Test: Destroy Infrastructure +# ============================================================================= +@test "destroy infrastructure removes CloudFront and Route53 resources" { + mock_request "GET" "/provider" "mocks/provider_success.json" + + run_workflow "deployment/workflows/delete.yaml" + + assert_cloudfront_not_exists "Distribution for my-app" + assert_route53_record_not_exists "app.example.com" "A" +} +``` + +--- + +## Test Helpers Reference + +### Viewing Available Helpers + +Both helper libraries include a `test_help` function that displays all available utilities: + +```bash +# View unit test helpers +source testing/assertions.sh && test_help + +# View integration test helpers +source testing/integration_helpers.sh && test_help +``` + +### Unit Test Assertions (`testing/assertions.sh`) + +| Function | Description | +|----------|-------------| +| `assert_equal "$actual" "$expected"` | Assert two values are equal | +| `assert_contains "$haystack" "$needle"` | Assert string contains substring | +| `assert_not_empty "$value" ["$name"]` | Assert value is not empty | +| `assert_empty "$value" ["$name"]` | Assert value is empty | +| `assert_file_exists "$path"` | Assert file exists | +| `assert_directory_exists "$path"` | Assert directory exists | +| `assert_json_equal "$actual" "$expected"` | Assert JSON structures are equal | + +### Integration Test Helpers (`testing/integration_helpers.sh`) + +#### AWS Commands + +| Function | Description | +|----------|-------------| +| `aws_local ` | Execute AWS CLI against LocalStack | +| `aws_moto ` | Execute AWS CLI against Moto (CloudFront) | + +#### Workflow Execution + +| Function | Description | +|----------|-------------| +| `run_workflow "$path"` | Run a nullplatform workflow file | + +#### Context Management + +| Function | Description | +|----------|-------------| +| `load_context "$path"` | Load context JSON into `$CONTEXT` | +| `override_context "$key" "$value"` | Override a value in current context | + +#### API Mocking + +| Function | Description | +|----------|-------------| +| `clear_mocks` | Clear all mocks, set up defaults | +| `mock_request "$method" "$path" "$file"` | Mock API request with file response | +| `mock_request "$method" "$path" $status '$body'` | Mock API request inline | +| `assert_mock_called "$method" "$path"` | Assert mock was called | + +#### AWS Assertions + +| Function | Description | +|----------|-------------| +| `assert_s3_bucket_exists "$bucket"` | Assert S3 bucket exists | +| `assert_s3_bucket_not_exists "$bucket"` | Assert S3 bucket doesn't exist | +| `assert_cloudfront_exists "$comment"` | Assert CloudFront distribution exists | +| `assert_cloudfront_not_exists "$comment"` | Assert CloudFront distribution doesn't exist | +| `assert_route53_record_exists "$name" "$type"` | Assert Route53 record exists | +| `assert_route53_record_not_exists "$name" "$type"` | Assert Route53 record doesn't exist | +| `assert_dynamodb_table_exists "$table"` | Assert DynamoDB table exists | + +--- + +## Writing New Tests + +### Unit Test Checklist + +1. Create test file: `/tests//_test.bats` +2. Add `setup()` function that sources `testing/assertions.sh` +3. Set up required environment variables and mocks +4. Write tests using `@test "description" { ... }` syntax +5. Use `run` to capture command output and exit status +6. Assert with helper functions or standard bash conditionals + +### Infrastructure Test Checklist + +1. Create test file: `/modules/.tftest.hcl` +2. Add `mock_provider "aws" {}` to avoid real API calls +3. Define `variables {}` block with test inputs +4. Write `run "test_name" { ... }` blocks with assertions +5. Use `command = plan` to validate without applying + +### Integration Test Checklist + +1. Create test file: `/tests/integration/_test.bats` +2. Add `setup_file()` to create prerequisites in LocalStack +3. Add `setup()` to configure mocks and context per test +4. Add `teardown_file()` to clean up +5. Create `localstack/provider_override.tf` for LocalStack-compatible provider +6. Create mock response files in `mocks/` directory +7. Use `run_workflow` to execute deployment workflows +8. Assert with AWS assertion helpers + +--- + +## Extending Test Helpers + +### Adding New Assertions + +1. **Add the function** to the appropriate helper file: + - `testing/assertions.sh` for unit test helpers + - `testing/integration_helpers.sh` for integration test helpers + +2. **Follow the naming convention**: `assert_` for assertions + +3. **Update the `test_help` function** to document your new helper: + +```bash +# Example: Adding a new assertion to assertions.sh + +# Add the function +assert_file_contains() { + local file="$1" + local content="$2" + if ! grep -q "$content" "$file" 2>/dev/null; then + echo "Expected file '$file' to contain: $content" + return 1 + fi +} + +# Update test_help() - add to the appropriate section +test_help() { + cat <<'EOF' +... +FILE SYSTEM ASSERTIONS +---------------------- + assert_file_exists "" + Assert a file exists. + + assert_file_contains "" "" # <-- Add documentation + Assert a file contains specific content. +... +EOF +} +``` + +4. **Test your new helper** before committing + +### Helper Design Guidelines + +- Return `0` on success, non-zero on failure +- Print descriptive error messages on failure +- Keep functions focused and single-purpose +- Use consistent naming conventions +- Document parameters and usage in `test_help()` + +--- + +## Troubleshooting + +### Common Issues + +| Issue | Solution | +|-------|----------| +| `bats: command not found` | Install bats-core: `brew install bats-core` | +| `tofu: command not found` | Install OpenTofu: `brew install opentofu` | +| Integration tests hang | Check Docker is running, increase timeout | +| LocalStack services not ready | Wait for health checks, check Docker logs | +| Mock not being called | Verify mock path matches exactly, check Smocker logs | + +### Debugging Integration Tests + +```bash +# View LocalStack logs +docker logs integration-localstack + +# View Smocker mock history +curl http://localhost:8081/history | jq + +# Run tests with verbose output +bats --show-output-of-passing-tests frontend/deployment/tests/integration/*.bats +``` + +--- + +## Additional Resources + +- [BATS Documentation](https://bats-core.readthedocs.io/) +- [OpenTofu Testing](https://opentofu.org/docs/cli/commands/test/) +- [LocalStack Documentation](https://docs.localstack.cloud/) +- [Smocker Documentation](https://smocker.dev/) diff --git a/azure-aro/specs/notification-channel.json.tpl b/azure-aro/specs/notification-channel.json.tpl index f1db58e5..6f5ba36c 100644 --- a/azure-aro/specs/notification-channel.json.tpl +++ b/azure-aro/specs/notification-channel.json.tpl @@ -1,6 +1,7 @@ { "nrn": "{{ env.Getenv "NRN" }}", "status": "active", + "description": "Channel to handle ARO Containers scopes", "type": "agent", "source": [ "telemetry", diff --git a/azure-aro/specs/service-spec.json.tpl b/azure-aro/specs/service-spec.json.tpl index d18a2d7c..90b1e701 100644 --- a/azure-aro/specs/service-spec.json.tpl +++ b/azure-aro/specs/service-spec.json.tpl @@ -433,7 +433,10 @@ "default":10, "maximum":300, "minimum":1, - "description":"Seconds between health checks" + "description":"Seconds between health checks", + "exclusiveMinimum": { + "$data": "1/timeout_seconds" + } }, "timeout_seconds":{ "type":"integer", @@ -476,7 +479,7 @@ "cpu_millicores":{ "type":"integer", "title":"CPU Millicores", - "default":500, + "default":100, "maximum":4000, "minimum":100, "description":"Amount of CPU to allocate (in millicores, 1000m = 1 CPU core)" @@ -630,10 +633,10 @@ }, "name": "Containers", "selectors": { - "category": "any", + "category": "Scope", "imported": false, - "provider": "any", - "sub_category": "any" + "provider": "Agent", + "sub_category": "Containers" }, "type": "scope", "use_default_actions": false, diff --git a/azure/specs/notification-channel.json.tpl b/azure/specs/notification-channel.json.tpl index f1db58e5..74be3439 100644 --- a/azure/specs/notification-channel.json.tpl +++ b/azure/specs/notification-channel.json.tpl @@ -1,6 +1,7 @@ { "nrn": "{{ env.Getenv "NRN" }}", "status": "active", + "description": "Channel to handle Azure Containers scopes", "type": "agent", "source": [ "telemetry", diff --git a/azure/specs/service-spec.json.tpl b/azure/specs/service-spec.json.tpl index ca47ae5d..f331df10 100644 --- a/azure/specs/service-spec.json.tpl +++ b/azure/specs/service-spec.json.tpl @@ -433,7 +433,10 @@ "default":10, "maximum":300, "minimum":1, - "description":"Seconds between health checks" + "description":"Seconds between health checks", + "exclusiveMinimum": { + "$data": "1/timeout_seconds" + } }, "timeout_seconds":{ "type":"integer", @@ -476,7 +479,7 @@ "cpu_millicores":{ "type":"integer", "title":"CPU Millicores", - "default":500, + "default":100, "maximum":4000, "minimum":100, "description":"Amount of CPU to allocate (in millicores, 1000m = 1 CPU core)" @@ -630,10 +633,10 @@ }, "name": "Containers", "selectors": { - "category": "any", + "category": "Scope", "imported": false, - "provider": "any", - "sub_category": "any" + "provider": "Agent", + "sub_category": "Containers" }, "type": "scope", "use_default_actions": false, diff --git a/datadog/metric/list b/datadog/metric/list index 5a3b1a21..b591b182 100755 --- a/datadog/metric/list +++ b/datadog/metric/list @@ -25,7 +25,7 @@ echo '{ }, { "name": "system.cpu_usage_percentage", - "title": "Cpu usage", + "title": "CPU usage", "unit": "%", "available_filters": ["scope_id", "instance_id"], "available_group_by": ["instance_id"] diff --git a/k8s/README.md b/k8s/README.md new file mode 100644 index 00000000..59d19980 --- /dev/null +++ b/k8s/README.md @@ -0,0 +1,152 @@ +# Kubernetes Scope Configuration + +This document describes all available configuration variables for Kubernetes scopes and their priority hierarchy. + +## Configuration Hierarchy + +Configuration variables follow a priority hierarchy: + +``` +1. Existing Providers - Highest priority + - scope-configurations: Scope-specific configuration + - container-orchestration: Orchestrator configuration + - cloud-providers: Cloud provider configuration + (If there are multiple providers, the order in which they are specified determines priority) + ↓ +2. Environment Variable (ENV VAR) - Allows override when no provider exists + ↓ +3. Default value - Fallback when no provider or env var exists +``` + +**Important Note**: The order of arguments in `get_config_value` does NOT affect priority. The function always respects the order: providers > env var > default, regardless of the order in which arguments are passed. + +## Configuration Variables + +### Cluster + +Configuration for Kubernetes cluster settings. + +| Variable | Description | Scope Configuration Property | +|----------|-------------|------------------------------| +| **K8S_NAMESPACE** | Kubernetes namespace where resources are deployed | `cluster.namespace` | +| **CREATE_K8S_NAMESPACE_IF_NOT_EXIST** | Whether to create the namespace if it doesn't exist | `cluster.create_namespace_if_not_exist` | + +### Networking + +#### General + +| Variable | Description | Scope Configuration Property | +|----------|-------------|------------------------------| +| **DOMAIN** | Public domain name for the application | `networking.domain_name` | +| **PRIVATE_DOMAIN** | Private domain name for internal services | `networking.private_domain_name` | +| **USE_ACCOUNT_SLUG** | Whether to use account slug as application domain | `networking.application_domain` | +| **DNS_TYPE** | DNS provider type (route53, azure, external_dns) | `networking.dns_type` | + +#### AWS Route53 + +Configuration specific to AWS Route53 DNS provider. Visible only when `dns_type` is `route53`. + +| Variable | Description | Scope Configuration Property | +|----------|-------------|------------------------------| +| **ALB_NAME** (public) | Public Application Load Balancer name | `networking.balancer_public_name` | +| **ALB_NAME** (private) | Private Application Load Balancer name | `networking.balancer_private_name` | +| **ALB_RECONCILIATION_ENABLED** | Whether ALB reconciliation is enabled | `networking.alb_reconciliation_enabled` | + +#### Azure DNS + +Configuration specific to Azure DNS provider. Visible only when `dns_type` is `azure`. + +| Variable | Description | Scope Configuration Property | +|----------|-------------|------------------------------| +| **HOSTED_ZONE_NAME** | Azure DNS hosted zone name | `networking.hosted_zone_name` | +| **HOSTED_ZONE_RG** | Azure resource group containing the DNS hosted zone | `networking.hosted_zone_rg` | +| **AZURE_SUBSCRIPTION_ID** | Azure subscription ID for DNS management | `networking.azure_subscription_id` | +| **RESOURCE_GROUP** | Azure resource group for cluster resources | `networking.resource_group` | + +**Note:** These variables are obtained from the `scope-configurations` provider and exported for use in Azure DNS workflows. + +#### Gateways + +Gateway configuration for ingress traffic routing. + +| Variable | Description | Scope Configuration Property | +|----------|-------------|------------------------------| +| **PUBLIC_GATEWAY_NAME** | Public gateway name for ingress | `networking.gateway_public_name` | +| **PRIVATE_GATEWAY_NAME** | Private/internal gateway name for ingress | `networking.gateway_private_name` | + +### Deployment + +#### General + +| Variable | Description | Scope Configuration Property | +|----------|-------------|------------------------------| +| **DEPLOY_STRATEGY** | Deployment strategy (rolling or blue-green) | `deployment.deployment_strategy` | +| **DEPLOYMENT_MAX_WAIT_IN_SECONDS** | Maximum wait time for deployments (seconds) | `deployment.deployment_max_wait_seconds` | + +#### Traffic Manager + +Configuration for the traffic manager sidecar container. + +| Variable | Description | Scope Configuration Property | +|----------|-------------|------------------------------| +| **TRAFFIC_CONTAINER_IMAGE** | Traffic manager sidecar container image | `deployment.traffic_container_image` | +| **TRAFFIC_MANAGER_CONFIG_MAP** | ConfigMap name with custom traffic manager configuration | `deployment.traffic_manager_config_map` | + +#### Pod Disruption Budget + +Configuration for Pod Disruption Budget to control pod availability during disruptions. + +| Variable | Description | Scope Configuration Property | +|----------|-------------|------------------------------| +| **POD_DISRUPTION_BUDGET_ENABLED** | Whether Pod Disruption Budget is enabled | `deployment.pod_disruption_budget_enabled` | +| **POD_DISRUPTION_BUDGET_MAX_UNAVAILABLE** | Maximum number or percentage of pods that can be unavailable | `deployment.pod_disruption_budget_max_unavailable` | + +#### Manifest Backup + +Configuration for backing up Kubernetes manifests. + +| Variable | Description | Scope Configuration Property | +|----------|-------------|------------------------------| +| **MANIFEST_BACKUP_ENABLED** | Whether manifest backup is enabled | `deployment.manifest_backup_enabled` | +| **MANIFEST_BACKUP_TYPE** | Backup storage type | `deployment.manifest_backup_type` | +| **MANIFEST_BACKUP_BUCKET** | S3 bucket name for storing backups | `deployment.manifest_backup_bucket` | +| **MANIFEST_BACKUP_PREFIX** | Prefix path within the bucket | `deployment.manifest_backup_prefix` | + +### Security + +#### Image Pull Secrets + +Configuration for pulling images from private container registries. + +| Variable | Description | Scope Configuration Property | +|----------|-------------|------------------------------| +| **IMAGE_PULL_SECRETS_ENABLED** | Whether image pull secrets are enabled | `security.image_pull_secrets_enabled` | +| **IMAGE_PULL_SECRETS** | List of secret names to use for pulling images | `security.image_pull_secrets` | + +#### IAM + +AWS IAM configuration for Kubernetes service accounts. + +| Variable | Description | Scope Configuration Property | +|----------|-------------|------------------------------| +| **IAM_ENABLED** | Whether IAM integration is enabled | `security.iam_enabled` | +| **IAM_PREFIX** | Prefix for IAM role names | `security.iam_prefix` | +| **IAM_POLICIES** | List of IAM policies to attach to the role | `security.iam_policies` | +| **IAM_BOUNDARY_ARN** | ARN of the permissions boundary policy | `security.iam_boundary_arn` | + +#### Vault + +HashiCorp Vault configuration for secrets management. + +| Variable | Description | Scope Configuration Property | +|----------|-------------|------------------------------| +| **VAULT_ADDR** | Vault server address | `security.vault_address` | +| **VAULT_TOKEN** | Vault authentication token | `security.vault_token` | + +### Advanced + +Advanced configuration options. + +| Variable | Description | Scope Configuration Property | +|----------|-------------|------------------------------| +| **K8S_MODIFIERS** | JSON string with dynamic modifications to Kubernetes objects | `object_modifiers` | diff --git a/k8s/apply_templates b/k8s/apply_templates index 08310939..3a5dfaa4 100644 --- a/k8s/apply_templates +++ b/k8s/apply_templates @@ -1,12 +1,28 @@ #!/bin/bash -echo "TEMPLATE DIR: $OUTPUT_DIR, ACTION: $ACTION, DRY_RUN: $DRY_RUN" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +if ! type -t log >/dev/null 2>&1; then source "$SCRIPT_DIR/logging"; fi + +log debug "📝 Applying templates..." +log debug "📋 Directory: $OUTPUT_DIR" +log debug "📋 Action: $ACTION" +log debug "📋 Dry run: $DRY_RUN" +log debug "" APPLIED_FILES=() # Find all .yaml files that were not yet applied / deleted while IFS= read -r TEMPLATE_FILE; do - echo "kubectl $ACTION $TEMPLATE_FILE" + FILENAME="$(basename "$TEMPLATE_FILE")" + BASE_DIR="$(dirname "$TEMPLATE_FILE")" + + # Check if file is empty or contains only whitespace + if [[ ! -s "$TEMPLATE_FILE" ]] || [[ -z "$(tr -d '[:space:]' < "$TEMPLATE_FILE")" ]]; then + log debug "📋 Skipping empty template: $FILENAME" + continue + fi + + log debug "📝 kubectl $ACTION $FILENAME" if [[ "$DRY_RUN" == "false" ]]; then IGNORE_NOT_FOUND="" @@ -15,11 +31,11 @@ while IFS= read -r TEMPLATE_FILE; do IGNORE_NOT_FOUND="--ignore-not-found=true" fi - kubectl "$ACTION" -f "$TEMPLATE_FILE" $IGNORE_NOT_FOUND + if ! kubectl "$ACTION" -f "$TEMPLATE_FILE" $IGNORE_NOT_FOUND; then + log error " ❌ Failed to apply" + fi fi - BASE_DIR="$(dirname "$TEMPLATE_FILE")" - FILENAME="$(basename "$TEMPLATE_FILE")" DEST_DIR="${BASE_DIR}/$ACTION" mkdir -p "$DEST_DIR" @@ -31,6 +47,8 @@ while IFS= read -r TEMPLATE_FILE; do done < <(find "$OUTPUT_DIR" \( -path "*/apply" -o -path "*/delete" \) -prune -o -type f -name "*.yaml" -print) if [[ "$DRY_RUN" == "true" ]]; then + log debug "" + log debug "📋 Dry run mode - no changes were made" exit 1 fi diff --git a/k8s/backup/backup_templates b/k8s/backup/backup_templates index 26642f0c..3cad4248 100644 --- a/k8s/backup/backup_templates +++ b/k8s/backup/backup_templates @@ -1,12 +1,13 @@ #!/bin/bash + MANIFEST_BACKUP=${MANIFEST_BACKUP-"{}"} BACKUP_ENABLED=$(echo "$MANIFEST_BACKUP" | jq -r .ENABLED) TYPE=$(echo "$MANIFEST_BACKUP" | jq -r .TYPE) if [[ "$BACKUP_ENABLED" == "false" || "$BACKUP_ENABLED" == "null" ]]; then - echo "No manifest backup enabled. Skipping manifest backup" + log debug "📋 Manifest backup is disabled, skipping" return fi @@ -40,7 +41,14 @@ case "$TYPE" in source "$SERVICE_PATH/backup/s3" --action="$ACTION" --files "${FILES[@]}" ;; *) - echo "Error: Unsupported manifest backup type type '$TYPE'" + log error "❌ Unsupported manifest backup type: '$TYPE'" + log error "" + log error "💡 Possible causes:" + log error " The MANIFEST_BACKUP.TYPE configuration is invalid" + log error "" + log error "🔧 How to fix:" + log error " • Set MANIFEST_BACKUP.TYPE to 's3' in values.yaml" + log error "" exit 1 ;; -esac \ No newline at end of file +esac diff --git a/k8s/backup/s3 b/k8s/backup/s3 index 8435804e..0148129d 100644 --- a/k8s/backup/s3 +++ b/k8s/backup/s3 @@ -1,5 +1,6 @@ #!/bin/bash + ACTION="" FILES=() @@ -26,11 +27,16 @@ done BUCKET=$(echo "$MANIFEST_BACKUP" | jq -r .BUCKET) PREFIX=$(echo "$MANIFEST_BACKUP" | jq -r .PREFIX) -echo "[INFO] Initializing S3 manifest backup operation - Action: $ACTION | Bucket: $BUCKET | Prefix: $PREFIX | Files: ${#FILES[@]}" +log debug "📝 Starting S3 manifest backup..." +log debug "📋 Action: $ACTION" +log debug "📋 Bucket: $BUCKET" +log debug "📋 Prefix: $PREFIX" +log debug "📋 Files: ${#FILES[@]}" +log debug "" # Now you can iterate over the files for file in "${FILES[@]}"; do - echo "[DEBUG] Processing manifest file: $file" + log debug "📝 Processing: $(basename "$file")" # Extract the path after 'output/' and remove the action folder (apply/delete) # Example: /root/.np/services/k8s/output/1862688057-34121609/apply/secret-1862688057-34121609.yaml @@ -54,34 +60,60 @@ for file in "${FILES[@]}"; do if [[ "$ACTION" == "apply" ]]; then - echo "[INFO] Uploading manifest to S3: s3://$BUCKET/$s3_key" + log debug " 📡 Uploading to s3://$BUCKET/$s3_key" # Upload to S3 - if aws s3 cp --region "$REGION" "$file" "s3://$BUCKET/$s3_key"; then - echo "[SUCCESS] Manifest upload completed successfully: $file" + if aws s3 cp --region "$REGION" "$file" "s3://$BUCKET/$s3_key" >/dev/null; then + log info " ✅ Upload successful" else - echo "[ERROR] Manifest upload failed: $file" >&2 + log error " ❌ Upload failed" + log error "" + log error "💡 Possible causes:" + log error " • S3 bucket does not exist or is not accessible" + log error " • IAM permissions are missing for s3:PutObject" + log error "" + log error "🔧 How to fix:" + log error " • Verify bucket '$BUCKET' exists and is accessible" + log error " • Check IAM permissions for the agent" + log error "" exit 1 fi elif [[ "$ACTION" == "delete" ]]; then - echo "[INFO] Removing manifest from S3: s3://$BUCKET/$s3_key" + log debug " 📡 Deleting s3://$BUCKET/$s3_key" # Delete from S3 with error handling aws_output=$(aws s3 rm --region "$REGION" "s3://$BUCKET/$s3_key" 2>&1) aws_exit_code=$? if [[ $aws_exit_code -eq 0 ]]; then - echo "[SUCCESS] Manifest deletion completed successfully: s3://$BUCKET/$s3_key" + log info " ✅ Deletion successful" elif [[ "$aws_output" == *"NoSuchKey"* ]] || [[ "$aws_output" == *"Not Found"* ]]; then - echo "[WARN] Manifest not found in S3, skipping deletion: s3://$BUCKET/$s3_key" + log debug " 📋 File not found in S3, skipping" else - echo "[ERROR] Manifest deletion failed: s3://$BUCKET/$s3_key - $aws_output" >&2 + log error " ❌ Deletion failed" + log error "📋 AWS Error: $aws_output" + log error "" + log error "💡 Possible causes:" + log error " • S3 bucket does not exist or is not accessible" + log error " • IAM permissions are missing for s3:DeleteObject" + log error "" + log error "🔧 How to fix:" + log error " • Verify bucket '$BUCKET' exists and is accessible" + log error " • Check IAM permissions for the agent" + log error "" exit 1 fi else - echo "[ERROR] Invalid action specified: $ACTION" >&2 + log error "❌ Invalid action: '$ACTION'" + log error "" + log error "💡 Possible causes:" + log error " The action parameter must be 'apply' or 'delete'" + log error "" exit 1 fi -done \ No newline at end of file +done + +log info "" +log info "✨ S3 backup operation completed successfully" diff --git a/k8s/backup/tests/backup_templates.bats b/k8s/backup/tests/backup_templates.bats new file mode 100644 index 00000000..3282a903 --- /dev/null +++ b/k8s/backup/tests/backup_templates.bats @@ -0,0 +1,176 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for backup/backup_templates - manifest backup orchestration +# ============================================================================= + +setup() { + # Get project root directory + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + + # Source assertions + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + # Set required environment variables + export SERVICE_PATH="$PROJECT_ROOT/k8s" +} + +teardown() { + unset MANIFEST_BACKUP + unset SERVICE_PATH +} + +# ============================================================================= +# Test: Skips when backup is disabled (false) +# ============================================================================= +@test "backup_templates: skips when BACKUP_ENABLED is false" { + export MANIFEST_BACKUP='{"ENABLED":"false","TYPE":"s3"}' + + # Use a subshell to capture the return statement behavior + run bash -c ' + source "$SERVICE_PATH/backup/backup_templates" --action=apply --files /tmp/test.yaml + ' + + assert_equal "$status" "0" + assert_equal "$output" "📋 Manifest backup is disabled, skipping" +} + +# ============================================================================= +# Test: Skips when backup is disabled (null) +# ============================================================================= +@test "backup_templates: skips when BACKUP_ENABLED is null" { + export MANIFEST_BACKUP='{"TYPE":"s3"}' + + run bash -c ' + source "$SERVICE_PATH/backup/backup_templates" --action=apply --files /tmp/test.yaml + ' + + assert_equal "$status" "0" + assert_equal "$output" "📋 Manifest backup is disabled, skipping" +} + +# ============================================================================= +# Test: Skips when MANIFEST_BACKUP is empty +# ============================================================================= +@test "backup_templates: skips when MANIFEST_BACKUP is empty" { + export MANIFEST_BACKUP='{}' + + run bash -c ' + source "$SERVICE_PATH/backup/backup_templates" --action=apply --files /tmp/test.yaml + ' + + assert_equal "$status" "0" + assert_equal "$output" "📋 Manifest backup is disabled, skipping" +} + +# ============================================================================= +# Test: Fails with unsupported backup type - Error message +# ============================================================================= +@test "backup_templates: fails with unsupported backup type error" { + export MANIFEST_BACKUP='{"ENABLED":"true","TYPE":"gcs"}' + + run bash "$SERVICE_PATH/backup/backup_templates" --action=apply --files /tmp/test.yaml + + assert_equal "$status" "1" + assert_contains "$output" "❌ Unsupported manifest backup type: 'gcs'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "MANIFEST_BACKUP.TYPE configuration is invalid" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Set MANIFEST_BACKUP.TYPE to 's3' in values.yaml" +} + +# ============================================================================= +# Test: Parses action argument correctly +# ============================================================================= +@test "backup_templates: parses action argument" { + export MANIFEST_BACKUP='{"ENABLED":"true","TYPE":"s3","BUCKET":"test","PREFIX":"manifests"}' + + # Mock aws to avoid actual calls + aws() { + return 0 + } + export -f aws + export REGION="us-east-1" + + run bash -c ' + source "$SERVICE_PATH/backup/backup_templates" --action=apply --files /tmp/output/123/apply/test.yaml + ' + + assert_contains "$output" "📋 Action: apply" +} + +# ============================================================================= +# Test: Parses files argument correctly +# ============================================================================= +@test "backup_templates: parses files argument" { + export MANIFEST_BACKUP='{"ENABLED":"true","TYPE":"s3","BUCKET":"test","PREFIX":"manifests"}' + + # Mock aws to avoid actual calls + aws() { + return 0 + } + export -f aws + export REGION="us-east-1" + + run bash -c ' + source "$SERVICE_PATH/backup/backup_templates" --action=apply --files /tmp/output/123/apply/file1.yaml /tmp/output/123/apply/file2.yaml + ' + + assert_contains "$output" "📋 Files: 2" +} + +# ============================================================================= +# Test: Calls s3 backup for s3 type +# ============================================================================= +@test "backup_templates: calls s3 backup for s3 type" { + export MANIFEST_BACKUP='{"ENABLED":"true","TYPE":"s3","BUCKET":"my-bucket","PREFIX":"backups"}' + + # Mock aws to avoid actual calls + aws() { + return 0 + } + export -f aws + export REGION="us-east-1" + + run bash -c ' + source "$SERVICE_PATH/backup/backup_templates" --action=apply --files /tmp/output/123/apply/test.yaml + ' + + assert_equal "$status" "0" + assert_contains "$output" "📝 Starting S3 manifest backup..." +} + +@test "backup_templates: shows bucket name when calling s3" { + export MANIFEST_BACKUP='{"ENABLED":"true","TYPE":"s3","BUCKET":"my-bucket","PREFIX":"backups"}' + + aws() { + return 0 + } + export -f aws + export REGION="us-east-1" + + run bash -c ' + source "$SERVICE_PATH/backup/backup_templates" --action=apply --files /tmp/output/123/apply/test.yaml + ' + + assert_equal "$status" "0" + assert_contains "$output" "📋 Bucket: my-bucket" +} + +@test "backup_templates: shows prefix when calling s3" { + export MANIFEST_BACKUP='{"ENABLED":"true","TYPE":"s3","BUCKET":"my-bucket","PREFIX":"backups"}' + + aws() { + return 0 + } + export -f aws + export REGION="us-east-1" + + run bash -c ' + source "$SERVICE_PATH/backup/backup_templates" --action=apply --files /tmp/output/123/apply/test.yaml + ' + + assert_equal "$status" "0" + assert_contains "$output" "📋 Prefix: backups" +} diff --git a/k8s/backup/tests/s3.bats b/k8s/backup/tests/s3.bats new file mode 100644 index 00000000..b85294a8 --- /dev/null +++ b/k8s/backup/tests/s3.bats @@ -0,0 +1,301 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for backup/s3 - S3 manifest backup operations +# ============================================================================= + +setup() { + # Get project root directory + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + + # Source assertions + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + # Set required environment variables + export SERVICE_PATH="$PROJECT_ROOT/k8s" + export REGION="us-east-1" + export MANIFEST_BACKUP='{"ENABLED":"true","TYPE":"s3","BUCKET":"test-bucket","PREFIX":"manifests"}' + + # Create temp files for testing + export TEST_DIR="$(mktemp -d)" + mkdir -p "$TEST_DIR/output/scope-123/apply" + echo "test content" > "$TEST_DIR/output/scope-123/apply/deployment.yaml" + + # Mock aws CLI by default (success) + aws() { + return 0 + } + export -f aws +} + +teardown() { + rm -rf "$TEST_DIR" + unset MANIFEST_BACKUP + unset SERVICE_PATH + unset REGION + unset -f aws +} + +# ============================================================================= +# Test: Displays starting message +# ============================================================================= +@test "s3: displays starting message with emoji" { + run bash "$SERVICE_PATH/backup/s3" --action=apply --files "$TEST_DIR/output/scope-123/apply/deployment.yaml" + + assert_equal "$status" "0" + assert_contains "$output" "📝 Starting S3 manifest backup..." +} + +# ============================================================================= +# Test: Extracts bucket from MANIFEST_BACKUP +# ============================================================================= +@test "s3: extracts bucket from MANIFEST_BACKUP" { + run bash "$SERVICE_PATH/backup/s3" --action=apply --files "$TEST_DIR/output/scope-123/apply/deployment.yaml" + + assert_contains "$output" "📋 Bucket: test-bucket" +} + +# ============================================================================= +# Test: Extracts prefix from MANIFEST_BACKUP +# ============================================================================= +@test "s3: extracts prefix from MANIFEST_BACKUP" { + run bash "$SERVICE_PATH/backup/s3" --action=apply --files "$TEST_DIR/output/scope-123/apply/deployment.yaml" + + assert_contains "$output" "📋 Prefix: manifests" +} + +# ============================================================================= +# Test: Shows file count +# ============================================================================= +@test "s3: shows file count" { + echo "test" > "$TEST_DIR/output/scope-123/apply/service.yaml" + + run bash "$SERVICE_PATH/backup/s3" --action=apply --files "$TEST_DIR/output/scope-123/apply/deployment.yaml" "$TEST_DIR/output/scope-123/apply/service.yaml" + + assert_contains "$output" "📋 Files: 2" +} + +# ============================================================================= +# Test: Shows action +# ============================================================================= +@test "s3: shows action with emoji" { + run bash "$SERVICE_PATH/backup/s3" --action=apply --files "$TEST_DIR/output/scope-123/apply/deployment.yaml" + + assert_contains "$output" "📋 Action: apply" +} + +# ============================================================================= +# Test: Uploads file on apply action +# ============================================================================= +@test "s3: uploads file on apply action" { + local aws_called=false + aws() { + if [[ "$1" == "s3" && "$2" == "cp" ]]; then + aws_called=true + fi + return 0 + } + export -f aws + + run bash "$SERVICE_PATH/backup/s3" --action=apply --files "$TEST_DIR/output/scope-123/apply/deployment.yaml" + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Processing:" + assert_contains "$output" "📡 Uploading to" + assert_contains "$output" "✅ Upload successful" +} + +# ============================================================================= +# Test: Deletes file on delete action +# ============================================================================= +@test "s3: deletes file on delete action" { + mkdir -p "$TEST_DIR/output/scope-123/delete" + echo "test" > "$TEST_DIR/output/scope-123/delete/deployment.yaml" + + aws() { + if [[ "$1" == "s3" && "$2" == "rm" ]]; then + return 0 + fi + return 0 + } + export -f aws + + run bash "$SERVICE_PATH/backup/s3" --action=delete --files "$TEST_DIR/output/scope-123/delete/deployment.yaml" + + [ "$status" -eq 0 ] + assert_contains "$output" "📡 Deleting" + assert_contains "$output" "✅ Deletion successful" +} + +# ============================================================================= +# Test: Handles NoSuchKey error gracefully on delete +# ============================================================================= +@test "s3: handles NoSuchKey error gracefully on delete" { + mkdir -p "$TEST_DIR/output/scope-123/delete" + echo "test" > "$TEST_DIR/output/scope-123/delete/deployment.yaml" + + aws() { + if [[ "$1" == "s3" && "$2" == "rm" ]]; then + echo "An error occurred (NoSuchKey) when calling the DeleteObject operation" + return 1 + fi + return 0 + } + export -f aws + + run bash "$SERVICE_PATH/backup/s3" --action=delete --files "$TEST_DIR/output/scope-123/delete/deployment.yaml" + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 File not found in S3, skipping" +} + +# ============================================================================= +# Test: Handles Not Found error gracefully on delete +# ============================================================================= +@test "s3: handles Not Found error gracefully on delete" { + mkdir -p "$TEST_DIR/output/scope-123/delete" + echo "test" > "$TEST_DIR/output/scope-123/delete/deployment.yaml" + + aws() { + if [[ "$1" == "s3" && "$2" == "rm" ]]; then + echo "Not Found" + return 1 + fi + return 0 + } + export -f aws + + run bash "$SERVICE_PATH/backup/s3" --action=delete --files "$TEST_DIR/output/scope-123/delete/deployment.yaml" + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 File not found in S3, skipping" +} + +# ============================================================================= +# Test: Fails on upload error - Error message +# ============================================================================= +@test "s3: fails on upload error with error message" { + aws() { + if [[ "$1" == "s3" && "$2" == "cp" ]]; then + return 1 + fi + return 0 + } + export -f aws + + run bash "$SERVICE_PATH/backup/s3" --action=apply --files "$TEST_DIR/output/scope-123/apply/deployment.yaml" + + [ "$status" -eq 1 ] + + assert_contains "$output" "❌ Upload failed" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "• S3 bucket does not exist or is not accessible" + assert_contains "$output" "• IAM permissions are missing for s3:PutObject" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Verify bucket 'test-bucket' exists and is accessible" + assert_contains "$output" "• Check IAM permissions for the agent" +} + +# ============================================================================= +# Test: Fails on delete error (non-NoSuchKey) - Error message +# ============================================================================= +@test "s3: fails on delete error with error message" { + mkdir -p "$TEST_DIR/output/scope-123/delete" + echo "test" > "$TEST_DIR/output/scope-123/delete/deployment.yaml" + + aws() { + if [[ "$1" == "s3" && "$2" == "rm" ]]; then + echo "Access Denied" + return 1 + fi + return 0 + } + export -f aws + + run bash "$SERVICE_PATH/backup/s3" --action=delete --files "$TEST_DIR/output/scope-123/delete/deployment.yaml" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Deletion failed" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "• S3 bucket does not exist or is not accessible" + assert_contains "$output" "• IAM permissions are missing for s3:DeleteObject" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Verify bucket 'test-bucket' exists and is accessible" + assert_contains "$output" "• Check IAM permissions for the agent" +} + +# ============================================================================= +# Test: Fails on invalid action - Error message +# ============================================================================= +@test "s3: fails on invalid action with error message" { + run bash "$SERVICE_PATH/backup/s3" --action=invalid --files "$TEST_DIR/output/scope-123/apply/deployment.yaml" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Invalid action: 'invalid'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The action parameter must be 'apply' or 'delete'" +} + +# ============================================================================= +# Test: Constructs correct S3 path +# ============================================================================= +@test "s3: constructs correct S3 path from file path" { + run bash "$SERVICE_PATH/backup/s3" --action=apply --files "$TEST_DIR/output/scope-123/apply/deployment.yaml" + + # S3 path should be: manifests/scope-123/deployment.yaml + assert_contains "$output" "manifests/scope-123/deployment.yaml" +} + +# ============================================================================= +# Test: Shows success summary +# ============================================================================= +@test "s3: shows success summary" { + run bash "$SERVICE_PATH/backup/s3" --action=apply --files "$TEST_DIR/output/scope-123/apply/deployment.yaml" + + [ "$status" -eq 0 ] + assert_contains "$output" "✨ S3 backup operation completed successfully" +} + +# ============================================================================= +# Test: Processes multiple files +# ============================================================================= +@test "s3: processes multiple files" { + echo "test" > "$TEST_DIR/output/scope-123/apply/service.yaml" + echo "test" > "$TEST_DIR/output/scope-123/apply/secret.yaml" + + local upload_count=0 + aws() { + if [[ "$1" == "s3" && "$2" == "cp" ]]; then + upload_count=$((upload_count + 1)) + fi + return 0 + } + export -f aws + + run bash "$SERVICE_PATH/backup/s3" --action=apply --files "$TEST_DIR/output/scope-123/apply/deployment.yaml" "$TEST_DIR/output/scope-123/apply/service.yaml" "$TEST_DIR/output/scope-123/apply/secret.yaml" + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 Files: 3" +} + + +# ============================================================================= +# Test: Uses REGION environment variable +# ============================================================================= +@test "s3: uses REGION environment variable" { + local region_used="" + aws() { + for arg in "$@"; do + if [[ "$arg" == "us-east-1" ]]; then + region_used="us-east-1" + fi + done + return 0 + } + export -f aws + + run bash "$SERVICE_PATH/backup/s3" --action=apply --files "$TEST_DIR/output/scope-123/apply/deployment.yaml" + + [ "$status" -eq 0 ] +} diff --git a/k8s/deployment/audit_deployment b/k8s/deployment/audit_deployment index 67e6d7aa..1d2a9f59 100755 --- a/k8s/deployment/audit_deployment +++ b/k8s/deployment/audit_deployment @@ -1,82 +1,83 @@ #!/bin/bash + # audit-scope.sh NAMESPACE="$K8S_NAMESPACE" if [ -z "$SCOPE_ID" ]; then - echo "Usage: $0 [namespace]" - echo "Example: $0 1183007763 nullplatform" + log error "Usage: $0 [namespace]" + log error "Example: $0 1183007763 nullplatform" exit 1 fi -echo "Auditing resources for scope $SCOPE_ID in namespace $NAMESPACE..." -echo "----------------------------------------" +log debug "Auditing resources for scope $SCOPE_ID in namespace $NAMESPACE..." +log debug "----------------------------------------" # Check Deployments -echo "Checking Deployments:" +log debug "Checking Deployments:" DEPLOYMENTS=$(kubectl get deployments -n $NAMESPACE | grep $SCOPE_ID) DEPLOYMENT_COUNT=$(echo "$DEPLOYMENTS" | grep -v "^$" | wc -l) -echo "$DEPLOYMENTS" -echo "Found $DEPLOYMENT_COUNT deployment(s)" -echo "----------------------------------------" +log debug "$DEPLOYMENTS" +log debug "Found $DEPLOYMENT_COUNT deployment(s)" +log debug "----------------------------------------" # Check Services -echo "Checking Services:" +log debug "Checking Services:" SERVICES=$(kubectl get services -n $NAMESPACE | grep $SCOPE_ID) SERVICE_COUNT=$(echo "$SERVICES" | grep -v "^$" | wc -l) -echo "$SERVICES" -echo "Found $SERVICE_COUNT service(s)" -echo "----------------------------------------" +log debug "$SERVICES" +log debug "Found $SERVICE_COUNT service(s)" +log debug "----------------------------------------" # Check ReplicaSets -echo "Checking ReplicaSets:" +log debug "Checking ReplicaSets:" REPLICASETS=$(kubectl get rs -n $NAMESPACE | grep $SCOPE_ID) REPLICASET_COUNT=$(echo "$REPLICASETS" | grep -v "^$" | wc -l) -echo "$REPLICASETS" -echo "Found $REPLICASET_COUNT replicaset(s)" -echo "----------------------------------------" +log debug "$REPLICASETS" +log debug "Found $REPLICASET_COUNT replicaset(s)" +log debug "----------------------------------------" # Check Pods -echo "Checking Pods:" +log debug "Checking Pods:" PODS=$(kubectl get pods -n $NAMESPACE | grep $SCOPE_ID) POD_COUNT=$(echo "$PODS" | grep -v "^$" | wc -l) -echo "$PODS" -echo "Found $POD_COUNT pod(s)" -echo "----------------------------------------" +log debug "$PODS" +log debug "Found $POD_COUNT pod(s)" +log debug "----------------------------------------" # Check Ingress -echo "Checking Ingress:" +log debug "Checking Ingress:" INGRESS=$(kubectl get ingress -n $NAMESPACE | grep $SCOPE_ID) INGRESS_COUNT=$(echo "$INGRESS" | grep -v "^$" | wc -l) -echo "$INGRESS" -echo "Found $INGRESS_COUNT ingress(es)" -echo "----------------------------------------" +log debug "$INGRESS" +log debug "Found $INGRESS_COUNT ingress(es)" +log debug "----------------------------------------" # Check Secrets -echo "Checking Secrets:" +log debug "Checking Secrets:" SECRETS=$(kubectl get secrets -n $NAMESPACE | grep $SCOPE_ID) SECRET_COUNT=$(echo "$SECRETS" | grep -v "^$" | wc -l) -echo "$SECRETS" -echo "Found $SECRET_COUNT secret(s)" -echo "----------------------------------------" +log debug "$SECRETS" +log debug "Found $SECRET_COUNT secret(s)" +log debug "----------------------------------------" # Summary and Warnings -echo "SUMMARY:" +log debug "SUMMARY:" if [ $DEPLOYMENT_COUNT -gt 1 ]; then - echo "⚠️ WARNING: Multiple deployments found!" + log warn "⚠️ WARNING: Multiple deployments found!" fi if [ $SERVICE_COUNT -gt 1 ]; then - echo "⚠️ WARNING: Multiple services found!" + log warn "⚠️ WARNING: Multiple services found!" fi if [ $INGRESS_COUNT -gt 1 ]; then - echo "⚠️ WARNING: Multiple ingresses found!" + log warn "⚠️ WARNING: Multiple ingresses found!" fi if [ $POD_COUNT -gt 1 ]; then - echo "⚠️ WARNING: Multiple pods found!" + log warn "⚠️ WARNING: Multiple pods found!" fi if [ $DEPLOYMENT_COUNT -eq 1 ] && [ $SERVICE_COUNT -eq 1 ] && [ $INGRESS_COUNT -le 1 ] && [ $POD_COUNT -eq 1 ]; then - echo "✅ All resources look good! Single instance of each type found." + log info "✅ All resources look good! Single instance of each type found." else - echo "❌ Some resources need attention. Please check the warnings above." -fi \ No newline at end of file + log error "❌ Some resources need attention. Please check the warnings above." +fi diff --git a/k8s/deployment/build_blue_deployment b/k8s/deployment/build_blue_deployment index fda77f14..92a7b8c4 100755 --- a/k8s/deployment/build_blue_deployment +++ b/k8s/deployment/build_blue_deployment @@ -1,5 +1,6 @@ #!/bin/bash + REPLICAS=$(echo "$CONTEXT" | jq -r .blue_replicas) export NEW_DEPLOYMENT_ID=$DEPLOYMENT_ID diff --git a/k8s/deployment/build_context b/k8s/deployment/build_context index b05c657a..0808681b 100755 --- a/k8s/deployment/build_context +++ b/k8s/deployment/build_context @@ -1,5 +1,6 @@ #!/bin/bash + # Build scope and tags env variables source "$SERVICE_PATH/scope/build_context" @@ -20,7 +21,7 @@ SWITCH_TRAFFIC=$(echo "$CONTEXT" | jq -r ".deployment.strategy_data.desired_swit MIN_REPLICAS=$(echo "scale=10; $REPLICAS / 10" | bc) MIN_REPLICAS=$(echo "$MIN_REPLICAS" | awk '{printf "%d", ($1 == int($1) ? $1 : int($1)+1)}') -DEPLOYMENT_STATUS=$(echo $CONTEXT | jq -r ".deployment.status") +DEPLOYMENT_STATUS=$(echo "$CONTEXT" | jq -r ".deployment.status") validate_status() { local action="$1" @@ -44,12 +45,12 @@ validate_status() { expected_status="deleting, rolling_back or cancelling" ;; *) - echo "🔄 Running action '$action', any deployment status is accepted" + log debug "📝 Running action '$action', any deployment status is accepted" return 0 ;; esac - echo "🔄 Running action '$action' (current status: '$status', expected: $expected_status)" + log debug "📝 Running action '$action' (current status: '$status', expected: $expected_status)" case "$action" in start-initial|start-blue-green) @@ -71,10 +72,24 @@ validate_status() { } if ! validate_status "$SERVICE_ACTION" "$DEPLOYMENT_STATUS"; then - echo "❌ Invalid deployment status '$DEPLOYMENT_STATUS' for action '$SERVICE_ACTION'" >&2 + log error "❌ Invalid deployment status '$DEPLOYMENT_STATUS' for action '$SERVICE_ACTION'" + log error "💡 Possible causes:" + log error " - Deployment status changed during workflow execution" + log error " - Another action is already running on this deployment" + log error " - Deployment was modified externally" + log error "🔧 How to fix:" + log error " - Wait for any in-progress actions to complete" + log error " - Check the deployment status in the nullplatform dashboard" + log error " - Retry the action once the deployment is in the expected state" exit 1 fi +DEPLOY_STRATEGY=$(get_config_value \ + --env DEPLOY_STRATEGY \ + --provider '.providers["scope-configurations"].deployment.deployment_strategy' \ + --default "blue-green" +) + if [ "$DEPLOY_STRATEGY" = "rolling" ] && [ "$DEPLOYMENT_STATUS" = "running" ]; then GREEN_REPLICAS=$(echo "scale=10; ($GREEN_REPLICAS * $SWITCH_TRAFFIC) / 100" | bc) GREEN_REPLICAS=$(echo "$GREEN_REPLICAS" | awk '{printf "%d", ($1 == int($1) ? $1 : int($1)+1)}') @@ -89,8 +104,23 @@ fi if [[ -n "$PULL_SECRETS" ]]; then IMAGE_PULL_SECRETS=$PULL_SECRETS else - IMAGE_PULL_SECRETS="${IMAGE_PULL_SECRETS:-"{}"}" - IMAGE_PULL_SECRETS=$(echo "$IMAGE_PULL_SECRETS" | jq .) + if [ -n "${IMAGE_PULL_SECRETS:-}" ]; then + IMAGE_PULL_SECRETS=$(echo "$IMAGE_PULL_SECRETS" | jq .) + else + PULL_SECRETS_ENABLED=$(get_config_value \ + --provider '.providers["scope-configurations"].security.image_pull_secrets_enabled' \ + --default "false" + ) + PULL_SECRETS_LIST=$(get_config_value \ + --provider '.providers["scope-configurations"].security.image_pull_secrets | @json' \ + --default "[]" + ) + + IMAGE_PULL_SECRETS=$(jq -n \ + --argjson enabled "$PULL_SECRETS_ENABLED" \ + --argjson secrets "$PULL_SECRETS_LIST" \ + '{ENABLED: $enabled, SECRETS: $secrets}') + fi fi SCOPE_TRAFFIC_PROTOCOL=$(echo "$CONTEXT" | jq -r .scope.capabilities.protocol) @@ -101,15 +131,56 @@ if [[ "$SCOPE_TRAFFIC_PROTOCOL" == "web_sockets" ]]; then TRAFFIC_CONTAINER_VERSION="websocket2" fi -TRAFFIC_CONTAINER_IMAGE=${TRAFFIC_CONTAINER_IMAGE:-"public.ecr.aws/nullplatform/k8s-traffic-manager:$TRAFFIC_CONTAINER_VERSION"} +TRAFFIC_CONTAINER_IMAGE=$(get_config_value \ + --env TRAFFIC_CONTAINER_IMAGE \ + --provider '.providers["scope-configurations"].deployment.traffic_container_image' \ + --default "public.ecr.aws/nullplatform/k8s-traffic-manager:$TRAFFIC_CONTAINER_VERSION" +) # Pod Disruption Budget configuration -PDB_ENABLED=${POD_DISRUPTION_BUDGET_ENABLED:-"false"} -PDB_MAX_UNAVAILABLE=${POD_DISRUPTION_BUDGET_MAX_UNAVAILABLE:-"25%"} - -IAM=${IAM-"{}"} +PDB_ENABLED=$(get_config_value \ + --env POD_DISRUPTION_BUDGET_ENABLED \ + --provider '.providers["scope-configurations"].deployment.pod_disruption_budget_enabled' \ + --default "false" +) +PDB_MAX_UNAVAILABLE=$(get_config_value \ + --env POD_DISRUPTION_BUDGET_MAX_UNAVAILABLE \ + --provider '.providers["scope-configurations"].deployment.pod_disruption_budget_max_unavailable' \ + --default "25%" +) + +# IAM configuration - build from flat properties or use env var +if [ -n "${IAM:-}" ]; then + IAM="$IAM" +else + IAM_ENABLED_RAW=$(get_config_value \ + --provider '.providers["scope-configurations"].security.iam_enabled' \ + --default "false" + ) + IAM_PREFIX=$(get_config_value \ + --provider '.providers["scope-configurations"].security.iam_prefix' \ + --default "" + ) + IAM_POLICIES=$(get_config_value \ + --provider '.providers["scope-configurations"].security.iam_policies | @json' \ + --default "[]" + ) + IAM_BOUNDARY=$(get_config_value \ + --provider '.providers["scope-configurations"].security.iam_boundary_arn' \ + --default "" + ) + + IAM=$(jq -n \ + --argjson enabled "$IAM_ENABLED_RAW" \ + --arg prefix "$IAM_PREFIX" \ + --argjson policies "$IAM_POLICIES" \ + --arg boundary "$IAM_BOUNDARY" \ + '{ENABLED: $enabled, PREFIX: $prefix, ROLE: {POLICIES: $policies, BOUNDARY_ARN: $boundary}} | + if .ROLE.BOUNDARY_ARN == "" then .ROLE |= del(.BOUNDARY_ARN) else . end | + if .PREFIX == "" then del(.PREFIX) else . end') +fi -IAM_ENABLED=$(echo "$IAM" | jq -r .ENABLED) +IAM_ENABLED=$(echo "$IAM" | jq -r '.ENABLED // false') SERVICE_ACCOUNT_NAME="" @@ -117,17 +188,28 @@ if [[ "$IAM_ENABLED" == "true" ]]; then SERVICE_ACCOUNT_NAME=$(echo "$IAM" | jq -r .PREFIX)-"$SCOPE_ID" fi -TRAFFIC_MANAGER_CONFIG_MAP=${TRAFFIC_MANAGER_CONFIG_MAP:-""} +TRAFFIC_MANAGER_CONFIG_MAP=$(get_config_value \ + --env TRAFFIC_MANAGER_CONFIG_MAP \ + --provider '.providers["scope-configurations"].deployment.traffic_manager_config_map' \ + --default "" +) if [[ -n "$TRAFFIC_MANAGER_CONFIG_MAP" ]]; then - echo "🔍 Validating ConfigMap '$TRAFFIC_MANAGER_CONFIG_MAP' in namespace '$K8S_NAMESPACE'" + log debug "🔍 Validating ConfigMap '$TRAFFIC_MANAGER_CONFIG_MAP' in namespace '$K8S_NAMESPACE'" # Check if the ConfigMap exists if ! kubectl get configmap "$TRAFFIC_MANAGER_CONFIG_MAP" -n "$K8S_NAMESPACE" &>/dev/null; then - echo "❌ ConfigMap '$TRAFFIC_MANAGER_CONFIG_MAP' does not exist in namespace '$K8S_NAMESPACE'" + log error "❌ ConfigMap '$TRAFFIC_MANAGER_CONFIG_MAP' does not exist in namespace '$K8S_NAMESPACE'" + log error "💡 Possible causes:" + log error " - ConfigMap was not created before deployment" + log error " - ConfigMap name is misspelled in values.yaml" + log error " - ConfigMap was deleted or exists in a different namespace" + log error "🔧 How to fix:" + log error " - Create the ConfigMap: kubectl create configmap $TRAFFIC_MANAGER_CONFIG_MAP -n $K8S_NAMESPACE --from-file=nginx.conf --from-file=default.conf" + log error " - Verify the ConfigMap name in your scope configuration" exit 1 fi - echo "✅ ConfigMap '$TRAFFIC_MANAGER_CONFIG_MAP' exists" + log info "✅ ConfigMap '$TRAFFIC_MANAGER_CONFIG_MAP' exists" # Check for required keys (subPaths) REQUIRED_KEYS=("nginx.conf" "default.conf") @@ -137,14 +219,51 @@ if [[ -n "$TRAFFIC_MANAGER_CONFIG_MAP" ]]; then for key in "${REQUIRED_KEYS[@]}"; do if ! echo "$CONFIGMAP_KEYS" | grep -qx "$key"; then - echo "❌ ConfigMap '$TRAFFIC_MANAGER_CONFIG_MAP' is missing required key '$key'" - echo "💡 The ConfigMap must contain data entries for: ${REQUIRED_KEYS[*]}" + log error "❌ ConfigMap '$TRAFFIC_MANAGER_CONFIG_MAP' is missing required key '$key'" + log error "💡 Possible causes:" + log error " - ConfigMap was created without all required files" + log error " - Key name is different from expected: ${REQUIRED_KEYS[*]}" + log error "🔧 How to fix:" + log error " - Update the ConfigMap to include the missing key '$key'" + log error " - Required keys: ${REQUIRED_KEYS[*]}" exit 1 fi - echo "✅ Found required key '$key' in ConfigMap" + log info "✅ Found required key '$key' in ConfigMap" done - echo "🎉 ConfigMap '$TRAFFIC_MANAGER_CONFIG_MAP' validation successful" + # Read optional resource overrides from ConfigMap + if echo "$CONFIGMAP_KEYS" | grep -qx "container_memory_in_memory"; then + CONTAINER_MEMORY_IN_MEMORY=$(kubectl get configmap "$TRAFFIC_MANAGER_CONFIG_MAP" -n "$K8S_NAMESPACE" -o go-template='{{ index .data "container_memory_in_memory" }}') + log info "✅ Using container_memory_in_memory=$CONTAINER_MEMORY_IN_MEMORY from ConfigMap" + fi + + if echo "$CONFIGMAP_KEYS" | grep -qx "container_cpu_in_millicores"; then + CONTAINER_CPU_IN_MILLICORES=$(kubectl get configmap "$TRAFFIC_MANAGER_CONFIG_MAP" -n "$K8S_NAMESPACE" -o go-template='{{ index .data "container_cpu_in_millicores" }}') + log info "✅ Using container_cpu_in_millicores=$CONTAINER_CPU_IN_MILLICORES from ConfigMap" + fi + + log info "✨ ConfigMap '$TRAFFIC_MANAGER_CONFIG_MAP' validation successful" +fi + +# Check if blue deployment has K8s services for additional ports +BLUE_ADDITIONAL_PORT_SERVICES="{}" +if [ -n "$BLUE_DEPLOYMENT_ID" ] && [ "$BLUE_DEPLOYMENT_ID" != "null" ]; then + ADDITIONAL_PORTS=$(echo "$CONTEXT" | jq -c '.scope.capabilities.additional_ports // []') + if [ "$ADDITIONAL_PORTS" != "[]" ] && [ "$ADDITIONAL_PORTS" != "null" ]; then + while IFS= read -r port_config; do + port=$(echo "$port_config" | jq -r '.port') + type_raw=$(echo "$port_config" | jq -r '.type') + type_lower=$(echo "$type_raw" | tr '[:upper:]' '[:lower:]') + service_name="d-${SCOPE_ID}-${BLUE_DEPLOYMENT_ID}-${type_lower}-${port}" + key="${type_lower}-${port}" + if kubectl get service "$service_name" -n "$K8S_NAMESPACE" &>/dev/null; then + BLUE_ADDITIONAL_PORT_SERVICES=$(echo "$BLUE_ADDITIONAL_PORT_SERVICES" | jq --arg key "$key" '. + {($key): true}') + else + BLUE_ADDITIONAL_PORT_SERVICES=$(echo "$BLUE_ADDITIONAL_PORT_SERVICES" | jq --arg key "$key" '. + {($key): false}') + log info "Blue deployment service '$service_name' not found - additional port traffic will route to new deployment only" + fi + done < <(echo "$ADDITIONAL_PORTS" | jq -c '.[]') + fi fi CONTEXT=$(echo "$CONTEXT" | jq \ @@ -158,6 +277,9 @@ CONTEXT=$(echo "$CONTEXT" | jq \ --arg pdb_max_unavailable "$PDB_MAX_UNAVAILABLE" \ --arg service_account_name "$SERVICE_ACCOUNT_NAME" \ --arg traffic_manager_config_map "$TRAFFIC_MANAGER_CONFIG_MAP" \ + --arg container_memory_in_memory "$CONTAINER_MEMORY_IN_MEMORY" \ + --arg container_cpu_in_millicores "$CONTAINER_CPU_IN_MILLICORES" \ + --argjson blue_additional_port_services "$BLUE_ADDITIONAL_PORT_SERVICES" \ '. + {blue_deployment_id: $blue_deployment_id, blue_replicas: $blue_replicas, green_replicas: $green_replicas, @@ -167,7 +289,10 @@ CONTEXT=$(echo "$CONTEXT" | jq \ pdb_enabled: $pdb_enabled, pdb_max_unavailable: $pdb_max_unavailable, service_account_name: $service_account_name, - traffic_manager_config_map: $traffic_manager_config_map + traffic_manager_config_map: $traffic_manager_config_map, + container_memory_in_memory: $container_memory_in_memory, + container_cpu_in_millicores: $container_cpu_in_millicores, + blue_additional_port_services: $blue_additional_port_services }') DEPLOYMENT_ID=$(echo "$CONTEXT" | jq -r '.deployment.id') @@ -182,3 +307,6 @@ export DEPLOYMENT_ID export BLUE_DEPLOYMENT_ID mkdir -p "$OUTPUT_DIR" + +log info "✨ Deployment context built successfully" +log debug "📋 Deployment ID: $DEPLOYMENT_ID | Replicas: green=$GREEN_REPLICAS, blue=$BLUE_REPLICAS" diff --git a/k8s/deployment/build_deployment b/k8s/deployment/build_deployment index cf95e1b3..a51bf971 100755 --- a/k8s/deployment/build_deployment +++ b/k8s/deployment/build_deployment @@ -1,5 +1,6 @@ #!/bin/bash + DEPLOYMENT_PATH="$OUTPUT_DIR/deployment-$SCOPE_ID-$DEPLOYMENT_ID.yaml" SECRET_PATH="$OUTPUT_DIR/secret-$SCOPE_ID-$DEPLOYMENT_ID.yaml" SCALING_PATH="$OUTPUT_DIR/scaling-$SCOPE_ID-$DEPLOYMENT_ID.yaml" @@ -7,9 +8,11 @@ SERVICE_TEMPLATE_PATH="$OUTPUT_DIR/service-$SCOPE_ID-$DEPLOYMENT_ID.yaml" PDB_PATH="$OUTPUT_DIR/pdb-$SCOPE_ID-$DEPLOYMENT_ID.yaml" CONTEXT_PATH="$OUTPUT_DIR/context-$SCOPE_ID.json" -echo "$CONTEXT" | jq --arg replicas "$REPLICAS" '. + {replicas: $replicas}' > "$CONTEXT_PATH" +log debug "📝 Building deployment templates..." +log debug "📋 Output directory: $OUTPUT_DIR" +log debug "" -echo "Building Template: $DEPLOYMENT_TEMPLATE to $DEPLOYMENT_PATH" +echo "$CONTEXT" | jq --arg replicas "$REPLICAS" '. + {replicas: $replicas}' > "$CONTEXT_PATH" gomplate -c .="$CONTEXT_PATH" \ --file "$DEPLOYMENT_TEMPLATE" \ @@ -18,11 +21,10 @@ gomplate -c .="$CONTEXT_PATH" \ TEMPLATE_GENERATION_STATUS=$? if [[ $TEMPLATE_GENERATION_STATUS -ne 0 ]]; then - echo "Error building deployment template" + log error " ❌ Failed to build deployment template" exit 1 fi - -echo "Building Template: $SECRET_TEMPLATE to $SECRET_PATH" +log info " ✅ Deployment template: $DEPLOYMENT_PATH" gomplate -c .="$CONTEXT_PATH" \ --file "$SECRET_TEMPLATE" \ @@ -31,11 +33,10 @@ gomplate -c .="$CONTEXT_PATH" \ TEMPLATE_GENERATION_STATUS=$? if [[ $TEMPLATE_GENERATION_STATUS -ne 0 ]]; then - echo "Error building secret template" + log error " ❌ Failed to build secret template" exit 1 fi - -echo "Building Template: $SCALING_TEMPLATE to $SCALING_PATH" +log info " ✅ Secret template: $SECRET_PATH" gomplate -c .="$CONTEXT_PATH" \ --file "$SCALING_TEMPLATE" \ @@ -44,11 +45,10 @@ gomplate -c .="$CONTEXT_PATH" \ TEMPLATE_GENERATION_STATUS=$? if [[ $TEMPLATE_GENERATION_STATUS -ne 0 ]]; then - echo "Error building scaling template" + log error " ❌ Failed to build scaling template" exit 1 fi - -echo "Building Template: $SERVICE_TEMPLATE to $SERVICE_TEMPLATE_PATH" +log info " ✅ Scaling template: $SCALING_PATH" gomplate -c .="$CONTEXT_PATH" \ --file "$SERVICE_TEMPLATE" \ @@ -57,12 +57,12 @@ gomplate -c .="$CONTEXT_PATH" \ TEMPLATE_GENERATION_STATUS=$? if [[ $TEMPLATE_GENERATION_STATUS -ne 0 ]]; then - echo "Error building service template" + log error " ❌ Failed to build service template" exit 1 fi +log info " ✅ Service template: $SERVICE_TEMPLATE_PATH" -echo "Building Template: $PDB_TEMPLATE to $PDB_PATH" - +log debug "📝 Building PDB template..." gomplate -c .="$CONTEXT_PATH" \ --file "$PDB_TEMPLATE" \ --out "$PDB_PATH" @@ -70,8 +70,12 @@ gomplate -c .="$CONTEXT_PATH" \ TEMPLATE_GENERATION_STATUS=$? if [[ $TEMPLATE_GENERATION_STATUS -ne 0 ]]; then - echo "Error building PDB template" + log error " ❌ Failed to build PDB template" exit 1 fi +log info " ✅ PDB template: $PDB_PATH" rm "$CONTEXT_PATH" + +log debug "" +log info "✨ All templates built successfully" diff --git a/k8s/deployment/delete_cluster_objects b/k8s/deployment/delete_cluster_objects index 5e069bca..ec2502a1 100755 --- a/k8s/deployment/delete_cluster_objects +++ b/k8s/deployment/delete_cluster_objects @@ -1,12 +1,29 @@ #!/bin/bash + +log debug "🔍 Starting cluster objects cleanup..." + OBJECTS_TO_DELETE="deployment,service,hpa,ingress,pdb,secret,configmap" # Function to delete all resources for a given deployment_id delete_deployment_resources() { local DEPLOYMENT_ID_TO_DELETE="$1" - kubectl delete "$OBJECTS_TO_DELETE" \ - -l deployment_id="$DEPLOYMENT_ID_TO_DELETE" -n "$K8S_NAMESPACE" --cascade=foreground --wait=true + log debug "📝 Deleting resources for deployment_id=$DEPLOYMENT_ID_TO_DELETE..." + + if ! kubectl delete "$OBJECTS_TO_DELETE" \ + -l deployment_id="$DEPLOYMENT_ID_TO_DELETE" -n "$K8S_NAMESPACE" --cascade=foreground --wait=true; then + log error "❌ Failed to delete resources for deployment_id=$DEPLOYMENT_ID_TO_DELETE" + log error "💡 Possible causes:" + log error " - Resources may have finalizers preventing deletion" + log error " - Network connectivity issues with Kubernetes API" + log error " - Insufficient permissions to delete resources" + log error "🔧 How to fix:" + log error " - Check for stuck finalizers: kubectl get all -l deployment_id=$DEPLOYMENT_ID_TO_DELETE -n $K8S_NAMESPACE -o yaml | grep finalizers" + log error " - Verify kubeconfig and cluster connectivity" + log error " - Check RBAC permissions for the service account" + return 1 + fi + log info "✅ Resources deleted for deployment_id=$DEPLOYMENT_ID_TO_DELETE" } CURRENT_ACTIVE=$(echo "$CONTEXT" | jq -r '.scope.current_active_deployment // empty') @@ -15,15 +32,21 @@ if [ "$DEPLOYMENT" = "blue" ]; then # Deleting blue (old) deployment, keeping green (new) DEPLOYMENT_TO_CLEAN="$CURRENT_ACTIVE" DEPLOYMENT_TO_KEEP="$DEPLOYMENT_ID" + log debug "📋 Strategy: Deleting blue (old) deployment, keeping green (new)" elif [ "$DEPLOYMENT" = "green" ]; then # Deleting green (new) deployment, keeping blue (old) DEPLOYMENT_TO_CLEAN="$DEPLOYMENT_ID" DEPLOYMENT_TO_KEEP="$CURRENT_ACTIVE" + log debug "📋 Strategy: Deleting green (new) deployment, keeping blue (old)" fi -delete_deployment_resources "$DEPLOYMENT_TO_CLEAN" +log debug "📋 Deployment to clean: $DEPLOYMENT_TO_CLEAN | Deployment to keep: $DEPLOYMENT_TO_KEEP" -echo "Verifying cleanup for scope_id: $SCOPE_ID in namespace: $K8S_NAMESPACE" +if ! delete_deployment_resources "$DEPLOYMENT_TO_CLEAN"; then + exit 1 +fi + +log debug "🔍 Verifying cleanup for scope_id=$SCOPE_ID in namespace=$K8S_NAMESPACE..." # Get all unique deployment_ids for this scope_id ALL_DEPLOYMENT_IDS=$(kubectl get "$OBJECTS_TO_DELETE" -n "$K8S_NAMESPACE" \ @@ -32,12 +55,18 @@ ALL_DEPLOYMENT_IDS=$(kubectl get "$OBJECTS_TO_DELETE" -n "$K8S_NAMESPACE" \ # Delete all deployment_ids except DEPLOYMENT_TO_KEEP if [ -n "$ALL_DEPLOYMENT_IDS" ]; then + EXTRA_COUNT=0 while IFS= read -r EXTRA_DEPLOYMENT_ID; do if [ "$EXTRA_DEPLOYMENT_ID" != "$DEPLOYMENT_TO_KEEP" ]; then + log debug "📝 Found orphaned deployment: $EXTRA_DEPLOYMENT_ID" delete_deployment_resources "$EXTRA_DEPLOYMENT_ID" + EXTRA_COUNT=$((EXTRA_COUNT + 1)) fi done <<< "$ALL_DEPLOYMENT_IDS" + if [ "$EXTRA_COUNT" -gt 0 ]; then + log info "✅ Cleaned up $EXTRA_COUNT orphaned deployment(s)" + fi fi - -echo "Cleanup verification successful: Only deployment_id=$DEPLOYMENT_TO_KEEP remains for scope_id=$SCOPE_ID" \ No newline at end of file +log info "✨ Cluster cleanup completed successfully" +log debug "📋 Only deployment_id=$DEPLOYMENT_TO_KEEP remains for scope_id=$SCOPE_ID" diff --git a/k8s/deployment/delete_ingress_finalizer b/k8s/deployment/delete_ingress_finalizer index 27a72f98..4223529d 100644 --- a/k8s/deployment/delete_ingress_finalizer +++ b/k8s/deployment/delete_ingress_finalizer @@ -1,9 +1,25 @@ #!/bin/bash + +log debug "🔍 Checking for ingress finalizers to remove..." + INGRESS_NAME=$(echo "$CONTEXT" | jq -r '"k-8-s-" + .scope.slug + "-" + (.scope.id | tostring) + "-" + .ingress_visibility') +log debug "📋 Ingress name: $INGRESS_NAME" # If the scope uses ingress, remove any finalizers attached to it if kubectl get ingress "$INGRESS_NAME" -n "$K8S_NAMESPACE" &>/dev/null; then - kubectl patch ingress "$INGRESS_NAME" -n "$K8S_NAMESPACE" -p '{"metadata":{"finalizers":[]}}' --type=merge + log debug "📝 Removing finalizers from ingress $INGRESS_NAME..." + if ! kubectl patch ingress "$INGRESS_NAME" -n "$K8S_NAMESPACE" -p '{"metadata":{"finalizers":[]}}' --type=merge; then + log error "❌ Failed to remove finalizers from ingress $INGRESS_NAME" + log error "💡 Possible causes:" + log error " - Ingress was deleted while patching" + log error " - Insufficient permissions to patch ingress" + log error "🔧 How to fix:" + log error " - Verify ingress still exists: kubectl get ingress $INGRESS_NAME -n $K8S_NAMESPACE" + log error " - Check RBAC permissions for patching ingress resources" + exit 1 + fi + log info "✅ Finalizers removed from ingress $INGRESS_NAME" +else + log debug "📋 Ingress $INGRESS_NAME not found, skipping finalizer removal" fi -# Do nothing if the scope does not use ingress (e.x: uses http route or has no network component) \ No newline at end of file diff --git a/k8s/deployment/kill_instances b/k8s/deployment/kill_instances index f7dfd3cc..a11b774c 100755 --- a/k8s/deployment/kill_instances +++ b/k8s/deployment/kill_instances @@ -2,7 +2,8 @@ set -euo pipefail -echo "=== KILL INSTANCES ===" + +log debug "🔍 Starting instance kill operation..." DEPLOYMENT_ID=$(echo "$CONTEXT" | jq -r '.parameters.deployment_id // .notification.parameters.deployment_id // empty') INSTANCE_NAME=$(echo "$CONTEXT" | jq -r '.parameters.instance_name // .notification.parameters.instance_name // empty') @@ -16,17 +17,27 @@ if [[ -z "$INSTANCE_NAME" ]] && [[ -n "${NP_ACTION_CONTEXT:-}" ]]; then fi if [[ -z "$DEPLOYMENT_ID" ]]; then - echo "ERROR: deployment_id parameter not found" + log error "❌ deployment_id parameter not found" + log error "💡 Possible causes:" + log error " - Parameter not provided in action request" + log error " - Context structure is different than expected" + log error "🔧 How to fix:" + log error " - Ensure deployment_id is passed in the action parameters" exit 1 fi if [[ -z "$INSTANCE_NAME" ]]; then - echo "ERROR: instance_name parameter not found" + log error "❌ instance_name parameter not found" + log error "💡 Possible causes:" + log error " - Parameter not provided in action request" + log error " - Context structure is different than expected" + log error "🔧 How to fix:" + log error " - Ensure instance_name is passed in the action parameters" exit 1 fi -echo "Deployment ID: $DEPLOYMENT_ID" -echo "Instance name: $INSTANCE_NAME" +log debug "📋 Deployment ID: $DEPLOYMENT_ID" +log debug "📋 Instance name: $INSTANCE_NAME" SCOPE_ID=$(echo "$CONTEXT" | jq -r '.tags.scope_id // .scope.id // .notification.tags.scope_id // empty') @@ -39,86 +50,77 @@ K8S_NAMESPACE=$(echo "$CONTEXT" | jq -r --arg default "$K8S_NAMESPACE" ' ' 2>/dev/null || echo "nullplatform") if [[ -z "$SCOPE_ID" ]]; then - echo "ERROR: scope_id not found in context" + log error "❌ scope_id not found in context" + log error "💡 Possible causes:" + log error " - Context missing scope information" + log error " - Action invoked outside of scope context" + log error "🔧 How to fix:" + log error " - Verify the action is invoked with proper scope context" exit 1 fi -echo "Scope ID: $SCOPE_ID" -echo "Namespace: $K8S_NAMESPACE" +log debug "📋 Scope ID: $SCOPE_ID" +log debug "📋 Namespace: $K8S_NAMESPACE" +log debug "🔍 Verifying pod exists..." if ! kubectl get pod "$INSTANCE_NAME" -n "$K8S_NAMESPACE" >/dev/null 2>&1; then - echo "ERROR: Pod $INSTANCE_NAME not found in namespace $K8S_NAMESPACE" + log error "❌ Pod $INSTANCE_NAME not found in namespace $K8S_NAMESPACE" + log error "💡 Possible causes:" + log error " - Pod was already terminated" + log error " - Pod name is incorrect" + log error " - Pod exists in a different namespace" + log error "🔧 How to fix:" + log error " - List pods: kubectl get pods -n $K8S_NAMESPACE -l scope_id=$SCOPE_ID" exit 1 fi -echo "" -echo "=== POD DETAILS ===" +log debug "📋 Fetching pod details..." POD_STATUS=$(kubectl get pod "$INSTANCE_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.status.phase}') POD_NODE=$(kubectl get pod "$INSTANCE_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.spec.nodeName}') POD_START_TIME=$(kubectl get pod "$INSTANCE_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.status.startTime}') -echo "Pod: $INSTANCE_NAME" -echo "Status: $POD_STATUS" -echo "Node: $POD_NODE" -echo "Start time: $POD_START_TIME" +log debug "📋 Pod: $INSTANCE_NAME | Status: $POD_STATUS | Node: $POD_NODE | Started: $POD_START_TIME" DEPLOYMENT_NAME="d-$SCOPE_ID-$DEPLOYMENT_ID" -echo "Expected deployment: $DEPLOYMENT_NAME" POD_DEPLOYMENT=$(kubectl get pod "$INSTANCE_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.metadata.ownerReferences[0].name}' 2>/dev/null || echo "") if [[ -n "$POD_DEPLOYMENT" ]]; then REPLICASET_DEPLOYMENT=$(kubectl get replicaset "$POD_DEPLOYMENT" -n "$K8S_NAMESPACE" -o jsonpath='{.metadata.ownerReferences[0].name}' 2>/dev/null || echo "") - echo "Pod belongs to ReplicaSet: $POD_DEPLOYMENT" - echo "ReplicaSet belongs to Deployment: $REPLICASET_DEPLOYMENT" - + log debug "📋 Pod ownership: ReplicaSet=$POD_DEPLOYMENT -> Deployment=$REPLICASET_DEPLOYMENT" + if [[ "$REPLICASET_DEPLOYMENT" != "$DEPLOYMENT_NAME" ]]; then - echo "WARNING: Pod does not belong to expected deployment $DEPLOYMENT_NAME" - echo "Continuing anyway..." + log warn "⚠️ Pod does not belong to expected deployment $DEPLOYMENT_NAME (continuing anyway)" fi else - echo "WARNING: Could not verify pod ownership" + log warn "⚠️ Could not verify pod ownership" fi -echo "" -echo "=== KILLING POD ===" - +log debug "📝 Deleting pod $INSTANCE_NAME with 30s grace period..." kubectl delete pod "$INSTANCE_NAME" -n "$K8S_NAMESPACE" --grace-period=30 -echo "Pod deletion initiated with 30 second grace period" - -echo "Waiting for pod to be terminated..." -kubectl wait --for=delete pod/"$INSTANCE_NAME" -n "$K8S_NAMESPACE" --timeout=60s || echo "Pod deletion timeout reached" +log debug "📝 Waiting for pod termination..." +kubectl wait --for=delete pod/"$INSTANCE_NAME" -n "$K8S_NAMESPACE" --timeout=60s || log warn "⚠️ Pod deletion timeout reached" if kubectl get pod "$INSTANCE_NAME" -n "$K8S_NAMESPACE" >/dev/null 2>&1; then - echo "WARNING: Pod still exists after deletion attempt" POD_STATUS_AFTER=$(kubectl get pod "$INSTANCE_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.status.phase}') - echo "Current pod status: $POD_STATUS_AFTER" + log warn "⚠️ Pod still exists after deletion attempt (status: $POD_STATUS_AFTER)" else - echo "Pod successfully terminated and removed" + log info "✅ Pod successfully terminated and removed" fi -echo "" -echo "=== DEPLOYMENT STATUS AFTER POD DELETION ===" +log debug "📋 Checking deployment status after pod deletion..." if kubectl get deployment "$DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" >/dev/null 2>&1; then DESIRED_REPLICAS=$(kubectl get deployment "$DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.spec.replicas}') READY_REPLICAS=$(kubectl get deployment "$DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.status.readyReplicas}') AVAILABLE_REPLICAS=$(kubectl get deployment "$DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.status.availableReplicas}') - - echo "Deployment: $DEPLOYMENT_NAME" - echo "Desired replicas: $DESIRED_REPLICAS" - echo "Ready replicas: ${READY_REPLICAS:-0}" - echo "Available replicas: ${AVAILABLE_REPLICAS:-0}" - - # If this is a managed deployment (with HPA or desired replicas > 0), - # Kubernetes will automatically create a new pod to replace the killed one + + log debug "📋 Deployment $DEPLOYMENT_NAME: desired=$DESIRED_REPLICAS, ready=${READY_REPLICAS:-0}, available=${AVAILABLE_REPLICAS:-0}" + if [[ "$DESIRED_REPLICAS" -gt 0 ]]; then - echo "" - echo "Note: Kubernetes will automatically create a new pod to replace the terminated one" - echo "This is expected behavior for managed deployments" + log debug "📋 Kubernetes will automatically create a replacement pod" fi else - echo "WARNING: Deployment $DEPLOYMENT_NAME not found" + log warn "⚠️ Deployment $DEPLOYMENT_NAME not found" fi -echo "" -echo "Instance $INSTANCE_NAME kill operation completed" \ No newline at end of file +log info "✨ Instance kill operation completed for $INSTANCE_NAME" diff --git a/k8s/deployment/networking/gateway/ingress/route_traffic b/k8s/deployment/networking/gateway/ingress/route_traffic index 0969f265..4e890b08 100644 --- a/k8s/deployment/networking/gateway/ingress/route_traffic +++ b/k8s/deployment/networking/gateway/ingress/route_traffic @@ -1,5 +1,6 @@ #!/bin/bash + TEMPLATE="" for arg in "$@"; do @@ -8,15 +9,42 @@ for arg in "$@"; do esac done -echo "Creating $INGRESS_VISIBILITY ingress..." +if [ -z "$TEMPLATE" ]; then + log error "❌ Template argument is required" + log error "💡 Possible causes:" + log error " - Missing --template= argument" + log error "🔧 How to fix:" + log error " - Provide template: --template=/path/to/template.yaml" + exit 1 +fi + +log debug "🔍 Creating $INGRESS_VISIBILITY ingress..." INGRESS_FILE="$OUTPUT_DIR/ingress-$SCOPE_ID-$DEPLOYMENT_ID.yaml" CONTEXT_PATH="$OUTPUT_DIR/context-$SCOPE_ID.json" +log debug "📋 Scope: $SCOPE_ID | Deployment: $DEPLOYMENT_ID" +log debug "📋 Template: $TEMPLATE" +log debug "📋 Output: $INGRESS_FILE" + echo "$CONTEXT" > "$CONTEXT_PATH" -gomplate -c .="$CONTEXT_PATH" \ +log debug "📝 Building ingress template..." + +if ! gomplate -c .="$CONTEXT_PATH" \ --file "$TEMPLATE" \ - --out "$INGRESS_FILE" + --out "$INGRESS_FILE" 2>&1; then + log error "❌ Failed to build ingress template" + log error "💡 Possible causes:" + log error " - Template file does not exist or is invalid" + log error " - Scope attributes may be missing" + log error "🔧 How to fix:" + log error " - Verify template exists: ls -la $TEMPLATE" + log error " - Verify that your scope has all required attributes" + rm -f "$CONTEXT_PATH" + exit 1 +fi + +rm "$CONTEXT_PATH" -rm "$CONTEXT_PATH" \ No newline at end of file +log info "✅ Ingress template created: $INGRESS_FILE" diff --git a/k8s/deployment/networking/gateway/rollback_traffic b/k8s/deployment/networking/gateway/rollback_traffic index 4700f880..751b47bd 100644 --- a/k8s/deployment/networking/gateway/rollback_traffic +++ b/k8s/deployment/networking/gateway/rollback_traffic @@ -1,13 +1,20 @@ #!/bin/bash -export NEW_DEPLOYMENT_ID=$DEPLOYMENT_ID +log debug "🔍 Rolling back traffic to previous deployment..." + +export NEW_DEPLOYMENT_ID=$DEPLOYMENT_ID export DEPLOYMENT_ID=$(echo "$CONTEXT" | jq .scope.current_active_deployment -r) +log debug "📋 Current deployment: $NEW_DEPLOYMENT_ID" +log debug "📋 Rollback target: $DEPLOYMENT_ID" + CONTEXT=$(echo "$CONTEXT" | jq \ --arg deployment_id "$DEPLOYMENT_ID" \ '.deployment.id = $deployment_id') +log debug "📝 Creating ingress for rollback deployment..." + source "$SERVICE_PATH/deployment/networking/gateway/route_traffic" export DEPLOYMENT_ID=$NEW_DEPLOYMENT_ID @@ -15,3 +22,5 @@ export DEPLOYMENT_ID=$NEW_DEPLOYMENT_ID CONTEXT=$(echo "$CONTEXT" | jq \ --arg deployment_id "$DEPLOYMENT_ID" \ '.deployment.id = $deployment_id') + +log info "✅ Traffic rollback configuration created" diff --git a/k8s/deployment/networking/gateway/route_traffic b/k8s/deployment/networking/gateway/route_traffic index ff1c80d4..cc1a7841 100755 --- a/k8s/deployment/networking/gateway/route_traffic +++ b/k8s/deployment/networking/gateway/route_traffic @@ -1,16 +1,33 @@ #!/bin/bash -echo "Creating $INGRESS_VISIBILITY ingress..." + +log debug "🔍 Creating $INGRESS_VISIBILITY ingress..." INGRESS_FILE="$OUTPUT_DIR/ingress-$SCOPE_ID-$DEPLOYMENT_ID.yaml" CONTEXT_PATH="$OUTPUT_DIR/context-$SCOPE_ID-$DEPLOYMENT_ID.json" +log debug "📋 Scope: $SCOPE_ID | Deployment: $DEPLOYMENT_ID" +log debug "📋 Template: $TEMPLATE" +log debug "📋 Output: $INGRESS_FILE" + echo "$CONTEXT" > "$CONTEXT_PATH" -echo "Building Template: $TEMPLATE to $INGRESS_FILE" +log debug "📝 Building ingress template..." -gomplate -c .="$CONTEXT_PATH" \ +if ! gomplate -c .="$CONTEXT_PATH" \ --file "$TEMPLATE" \ - --out "$INGRESS_FILE" + --out "$INGRESS_FILE" 2>&1; then + log error "❌ Failed to build ingress template" + log error "💡 Possible causes:" + log error " - Template file does not exist or is invalid" + log error " - Scope attributes may be missing" + log error "🔧 How to fix:" + log error " - Verify template exists: ls -la $TEMPLATE" + log error " - Verify that your scope has all required attributes" + rm -f "$CONTEXT_PATH" + exit 1 +fi + +rm "$CONTEXT_PATH" -rm "$CONTEXT_PATH" \ No newline at end of file +log info "✅ Ingress template created: $INGRESS_FILE" diff --git a/k8s/deployment/notify_active_domains b/k8s/deployment/notify_active_domains index 5baacf37..c12580f4 100644 --- a/k8s/deployment/notify_active_domains +++ b/k8s/deployment/notify_active_domains @@ -1,15 +1,38 @@ #!/bin/bash + +log debug "🔍 Checking for custom domains to activate..." + DOMAINS=$(echo "$CONTEXT" | jq .scope.domains) if [[ "$DOMAINS" == "null" || "$DOMAINS" == "[]" ]]; then + log debug "📋 No domains configured, skipping activation" return fi +DOMAIN_COUNT=$(echo "$DOMAINS" | jq length) +log debug "📋 Found $DOMAIN_COUNT custom domain(s) to activate" + echo "$DOMAINS" | jq -r '.[] | "\(.id)|\(.name)"' | while IFS='|' read -r domain_id domain_name; do - echo "Configuring domain: $domain_name" + log debug "📝 Activating custom domain: $domain_name..." + + np_output=$(np scope domain patch --id "$domain_id" --body '{"status": "active"}' --format json 2>&1) + np_status=$? + + if [ $np_status -ne 0 ]; then + log error "❌ Failed to activate custom domain: $domain_name" + log error "📋 Error: $np_output" + log error "💡 Possible causes:" + log error " - Domain ID $domain_id may not exist" + log error " - Insufficient permissions (403 Forbidden)" + log error " - API connectivity issues" + log error "🔧 How to fix:" + log error " - Verify domain exists: np scope domain get --id $domain_id" + log error " - Check API token permissions" + continue + fi - np scope domain patch --id "$domain_id" --body '{"status": "active"}' + log info "✅ Custom domain activated: $domain_name" +done - echo "Successfully configured domain: $domain_name" -done \ No newline at end of file +log info "✨ Custom domain activation completed" diff --git a/k8s/deployment/print_failed_deployment_hints b/k8s/deployment/print_failed_deployment_hints index f688ace6..66ce5d51 100644 --- a/k8s/deployment/print_failed_deployment_hints +++ b/k8s/deployment/print_failed_deployment_hints @@ -1,14 +1,23 @@ #!/bin/bash + HEALTH_CHECK_PATH=$(echo "$CONTEXT" | jq -r .scope.capabilities.health_check.path) REQUESTED_MEMORY=$(echo "$CONTEXT" | jq -r .scope.capabilities.ram_memory) SCOPE_NAME=$(echo "$CONTEXT" | jq -r .scope.name) SCOPE_DIMENSIONS=$(echo "$CONTEXT" | jq -r .scope.dimensions) -echo "⚠️ Application Startup Issue Detected" -echo "We noticed that your application was unable to start within the expected timeframe. Please verify the following configuration settings:" -echo "1. Port Configuration: Ensure your application is configured to listen on port 8080" -echo "2. Health Check Endpoint: Confirm that your application responds correctly to the configured health check path: $HEALTH_CHECK_PATH" -echo "3. Application Logs: We suggest reviewing the application logs for any startup errors, including database connection issues, missing dependencies, or initialization errors" -echo "4. Memory Allocation: Verify that sufficient memory resources have been allocated (Current allocation: ${REQUESTED_MEMORY}Mi)" -echo "5. Environment Variables: Confirm that all required environment variables have been properly configured in the parameter section and are correctly applied to scope '$SCOPE_NAME' or the associated scope dimensions: $SCOPE_DIMENSIONS" \ No newline at end of file +log error "" +log error "⚠️ Application Startup Issue Detected" +log error "" +log error "💡 Possible causes:" +log error " Your application was unable to start within the expected timeframe" +log error "" +log error "🔧 How to fix:" +log error " 1. Port Configuration: Ensure your application listens on port 8080" +log error " 2. Health Check Endpoint: Verify your app responds to: $HEALTH_CHECK_PATH" +log error " 3. Application Logs: Review logs for startup errors (database connections," +log error " missing dependencies, or initialization errors)" +log error " 4. Memory Allocation: Current allocation is ${REQUESTED_MEMORY}Mi - increase if needed" +log error " 5. Environment Variables: Verify all required variables are configured in" +log error " parameters for scope '$SCOPE_NAME' or dimensions: $SCOPE_DIMENSIONS" +log error "" diff --git a/k8s/deployment/publish_alb_metrics b/k8s/deployment/publish_alb_metrics new file mode 100755 index 00000000..fd695b77 --- /dev/null +++ b/k8s/deployment/publish_alb_metrics @@ -0,0 +1,141 @@ +#!/bin/bash +# Post-deployment ALB metrics publisher +# Publishes ALB rule count and target group count as custom metrics +# to CloudWatch or Datadog for continuous monitoring and alerting. + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/../utils/get_config_value" + +ALB_METRICS_PUBLISH_ENABLED=$(get_config_value \ + --env ALB_METRICS_PUBLISH_ENABLED \ + --provider '.providers["scope-configurations"].networking.alb_metrics_enabled' \ + --default "false" +) + +ALB_METRICS_PUBLISH_TARGET=$(get_config_value \ + --env ALB_METRICS_PUBLISH_TARGET \ + --provider '.providers["scope-configurations"].networking.alb_metrics_target' \ + --default "cloudwatch" +) + +if [ "$ALB_METRICS_PUBLISH_ENABLED" != "true" ]; then + return 0 +fi + +ALB_NAME=$(echo "$CONTEXT" | jq -r '.alb_name') +REGION=$(echo "$CONTEXT" | jq -r '.region') + +if [ -z "$ALB_NAME" ] || [ "$ALB_NAME" = "null" ]; then + log warn "⚠️ ALB metrics: ALB name not found in context" + return 0 +fi + +# Resolve ALB ARN +ALB_ARN=$(aws elbv2 describe-load-balancers \ + --names "$ALB_NAME" \ + --region "$REGION" \ + --query 'LoadBalancers[0].LoadBalancerArn' \ + --output text 2>/dev/null) + +if [ $? -ne 0 ] || [ "$ALB_ARN" = "None" ] || [ -z "$ALB_ARN" ]; then + log warn "⚠️ ALB metrics: could not find ALB [$ALB_NAME]" + return 0 +fi + +# Count rules across all listeners +TOTAL_RULES=0 +LISTENERS=$(aws elbv2 describe-listeners \ + --load-balancer-arn "$ALB_ARN" \ + --region "$REGION" \ + --output json 2>/dev/null) + +if [ $? -ne 0 ]; then + log warn "⚠️ ALB metrics: could not retrieve listeners" + return 0 +fi + +LISTENER_ARNS=$(echo "$LISTENERS" | jq -r '.Listeners[].ListenerArn') + +for listener_arn in $LISTENER_ARNS; do + RULES=$(aws elbv2 describe-rules \ + --listener-arn "$listener_arn" \ + --region "$REGION" \ + --output json 2>/dev/null) + + if [ $? -eq 0 ]; then + LISTENER_RULE_COUNT=$(echo "$RULES" | jq '[.Rules[] | select(.IsDefault != true)] | length') + TOTAL_RULES=$((TOTAL_RULES + LISTENER_RULE_COUNT)) + fi +done + +# Count target groups +TARGET_GROUPS=$(aws elbv2 describe-target-groups \ + --load-balancer-arn "$ALB_ARN" \ + --region "$REGION" \ + --output json 2>/dev/null) + +TG_COUNT=0 +if [ $? -eq 0 ]; then + TG_COUNT=$(echo "$TARGET_GROUPS" | jq '.TargetGroups | length') +fi + +# Publish metrics +case "$ALB_METRICS_PUBLISH_TARGET" in + cloudwatch) + aws cloudwatch put-metric-data \ + --namespace "nullplatform/ApplicationELB" \ + --metric-data "[ + {\"MetricName\":\"RuleCount\",\"Value\":$TOTAL_RULES,\"Unit\":\"Count\",\"Dimensions\":[{\"Name\":\"ALBName\",\"Value\":\"$ALB_NAME\"}]}, + {\"MetricName\":\"TargetGroupCount\",\"Value\":$TG_COUNT,\"Unit\":\"Count\",\"Dimensions\":[{\"Name\":\"ALBName\",\"Value\":\"$ALB_NAME\"}]} + ]" \ + --region "$REGION" 2>/dev/null + + if [ $? -eq 0 ]; then + log info "✅ ALB metrics published to CloudWatch (rules: $TOTAL_RULES, target_groups: $TG_COUNT)" + else + log error "❌ ALB metrics: failed to publish to CloudWatch" + fi + ;; + + datadog) + if [ -z "$DATADOG_API_KEY" ]; then + log warn "⚠️ ALB metrics: DATADOG_API_KEY not set" + return 0 + fi + + DATADOG_SITE="${DATADOG_SITE:-datadoghq.com}" + TIMESTAMP=$(date +%s) + + RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -X POST "https://api.${DATADOG_SITE}/api/v2/series" \ + -H "DD-API-KEY: $DATADOG_API_KEY" \ + -H "Content-Type: application/json" \ + -d "{ + \"series\": [ + { + \"metric\": \"nullplatform.applicationelb.rule_count\", + \"type\": 1, + \"points\": [{\"timestamp\": $TIMESTAMP, \"value\": $TOTAL_RULES}], + \"tags\": [\"alb_name:$ALB_NAME\", \"region:$REGION\"] + }, + { + \"metric\": \"nullplatform.applicationelb.target_group_count\", + \"type\": 1, + \"points\": [{\"timestamp\": $TIMESTAMP, \"value\": $TG_COUNT}], + \"tags\": [\"alb_name:$ALB_NAME\", \"region:$REGION\"] + } + ] + }" 2>/dev/null) + + if [ "$RESPONSE" = "202" ]; then + log info "✅ ALB metrics published to Datadog (rules: $TOTAL_RULES, target_groups: $TG_COUNT)" + else + log error "❌ ALB metrics: failed to publish to Datadog (HTTP $RESPONSE)" + fi + ;; + + *) + log warn "⚠️ ALB metrics: unknown target '$ALB_METRICS_PUBLISH_TARGET'" + ;; +esac + +return 0 diff --git a/k8s/deployment/scale_deployments b/k8s/deployment/scale_deployments index 426f5170..9e703eed 100755 --- a/k8s/deployment/scale_deployments +++ b/k8s/deployment/scale_deployments @@ -1,5 +1,6 @@ #!/bin/bash + GREEN_REPLICAS=$(echo "$CONTEXT" | jq -r .green_replicas) GREEN_DEPLOYMENT_ID=$DEPLOYMENT_ID @@ -8,19 +9,38 @@ BLUE_DEPLOYMENT_ID=$(echo "$CONTEXT" | jq .scope.current_active_deployment -r) if [ "$DEPLOY_STRATEGY" = "rolling" ]; then GREEN_DEPLOYMENT_NAME="d-$SCOPE_ID-$GREEN_DEPLOYMENT_ID" - - kubectl scale deployment "$GREEN_DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" --replicas="$GREEN_REPLICAS" - BLUE_DEPLOYMENT_NAME="d-$SCOPE_ID-$BLUE_DEPLOYMENT_ID" - kubectl scale deployment "$BLUE_DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" --replicas="$BLUE_REPLICAS" + log debug "📝 Scaling deployments for rolling strategy..." + log debug "📋 Green deployment: $GREEN_DEPLOYMENT_NAME -> $GREEN_REPLICAS replicas" + log debug "📋 Blue deployment: $BLUE_DEPLOYMENT_NAME -> $BLUE_REPLICAS replicas" + log debug "" + + log debug "📝 Scaling green deployment..." + if kubectl scale deployment "$GREEN_DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" --replicas="$GREEN_REPLICAS"; then + log info " ✅ Green deployment scaled to $GREEN_REPLICAS replicas" + else + log error " ❌ Failed to scale green deployment" + exit 1 + fi + + log debug "📝 Scaling blue deployment..." + if kubectl scale deployment "$BLUE_DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" --replicas="$BLUE_REPLICAS"; then + log info " ✅ Blue deployment scaled to $BLUE_REPLICAS replicas" + else + log error " ❌ Failed to scale blue deployment" + exit 1 + fi DEFAULT_TIMEOUT_TEN_MINUTES=600 - + export TIMEOUT=${DEPLOYMENT_MAX_WAIT_IN_SECONDS-$DEFAULT_TIMEOUT_TEN_MINUTES} export SKIP_DEPLOYMENT_STATUS_CHECK=true source "$SERVICE_PATH/deployment/wait_blue_deployment_active" unset TIMEOUT unset SKIP_DEPLOYMENT_STATUS_CHECK -fi \ No newline at end of file + + log debug "" + log info "✨ Deployments scaled successfully" +fi diff --git a/k8s/deployment/templates/blue-green-ingress.yaml.tpl b/k8s/deployment/templates/blue-green-ingress.yaml.tpl index f33ca37d..20a0a5b0 100644 --- a/k8s/deployment/templates/blue-green-ingress.yaml.tpl +++ b/k8s/deployment/templates/blue-green-ingress.yaml.tpl @@ -114,11 +114,30 @@ metadata: {{- end }} {{- end }} annotations: +{{- $port_key := "" -}} +{{- if eq .type "HTTP" -}} + {{- $port_key = printf "http-%v" .port -}} +{{- else -}} + {{- $port_key = printf "grpc-%v" .port -}} +{{- end -}} +{{- $blue_svc_exists := true -}} +{{- if $.blue_additional_port_services -}} + {{- if not (index $.blue_additional_port_services $port_key) -}} + {{- $blue_svc_exists = false -}} + {{- end -}} +{{- end -}} +{{- if $blue_svc_exists }} alb.ingress.kubernetes.io/actions.bg-deployment-{{ if eq .type "HTTP" }}http{{ else }}grpc{{ end }}-{{ .port }}: >- {"type":"forward","forwardConfig":{"targetGroups":[ {"serviceName":"d-{{ $.scope.id }}-{{ $.blue_deployment_id }}-{{ if eq .type "HTTP" }}http{{ else }}grpc{{ end }}-{{ .port }}","servicePort":{{ .port }},"weight":{{ sub 100 $.deployment.strategy_data.desired_switched_traffic }}}, {"serviceName":"d-{{ $.scope.id }}-{{ $.deployment.id }}-{{ if eq .type "HTTP" }}http{{ else }}grpc{{ end }}-{{ .port }}","servicePort":{{ .port }},"weight":{{ $.deployment.strategy_data.desired_switched_traffic }}} ]}} +{{- else }} + alb.ingress.kubernetes.io/actions.bg-deployment-{{ if eq .type "HTTP" }}http{{ else }}grpc{{ end }}-{{ .port }}: >- + {"type":"forward","forwardConfig":{"targetGroups":[ + {"serviceName":"d-{{ $.scope.id }}-{{ $.deployment.id }}-{{ if eq .type "HTTP" }}http{{ else }}grpc{{ end }}-{{ .port }}","servicePort":{{ .port }},"weight":100} + ]}} +{{- end }} alb.ingress.kubernetes.io/actions.response-404: '{"type":"fixed-response","fixedResponseConfig":{"contentType":"text/plain","statusCode":"404","messageBody":"404 scope not found or has not been deployed yet"}}' alb.ingress.kubernetes.io/group.name: {{ $.alb_name }} alb.ingress.kubernetes.io/load-balancer-name: {{ $.alb_name }} diff --git a/k8s/deployment/templates/deployment.yaml.tpl b/k8s/deployment/templates/deployment.yaml.tpl index 5b2bddfd..6acf1c95 100644 --- a/k8s/deployment/templates/deployment.yaml.tpl +++ b/k8s/deployment/templates/deployment.yaml.tpl @@ -145,8 +145,8 @@ spec: value: {{ .scope.capabilities.health_check.path }} resources: limits: - cpu: 93m - memory: 64Mi + cpu: {{ .container_cpu_in_millicores }}m + memory: {{ .container_memory_in_memory }}Mi requests: cpu: 31m livenessProbe: @@ -198,8 +198,8 @@ spec: value: '{{ .port }}' resources: limits: - cpu: 93m - memory: 64Mi + cpu: {{ $.container_cpu_in_millicores }}m + memory: {{ $.container_memory_in_memory }}Mi requests: cpu: 31m livenessProbe: diff --git a/k8s/deployment/tests/apply_templates.bats b/k8s/deployment/tests/apply_templates.bats new file mode 100644 index 00000000..610175d6 --- /dev/null +++ b/k8s/deployment/tests/apply_templates.bats @@ -0,0 +1,162 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for apply_templates - template application with empty file handling +# ============================================================================= + +setup() { + # Get project root directory + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + + # Source assertions + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + # Set required environment variables + export SERVICE_PATH="$PROJECT_ROOT/k8s" + export ACTION="apply" + export DRY_RUN="false" + + # Create temp directory for test files + export OUTPUT_DIR="$(mktemp -d)" + + # Mock kubectl + kubectl() { + return 0 + } + export -f kubectl + + # Mock backup_templates (sourced script) + export MANIFEST_BACKUP='{"ENABLED":"false"}' +} + +teardown() { + rm -rf "$OUTPUT_DIR" + unset OUTPUT_DIR + unset ACTION + unset DRY_RUN + unset SERVICE_PATH + unset MANIFEST_BACKUP + unset -f kubectl +} + +# ============================================================================= +# Header Message Tests +# ============================================================================= +@test "apply_templates: displays applying header message" { + echo "apiVersion: v1" > "$OUTPUT_DIR/valid.yaml" + + run bash "$SERVICE_PATH/apply_templates" + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Applying templates..." + assert_contains "$output" "📋 Directory:" + assert_contains "$output" "📋 Action: apply" + assert_contains "$output" "📋 Dry run: false" +} + +# ============================================================================= +# Test: Skips empty files (zero bytes) +# ============================================================================= +@test "apply_templates: skips empty files (zero bytes)" { + # Create an empty file + touch "$OUTPUT_DIR/empty.yaml" + + run bash "$SERVICE_PATH/apply_templates" + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 Skipping empty template: empty.yaml" +} + +# ============================================================================= +# Test: Skips files with only whitespace +# ============================================================================= +@test "apply_templates: skips files with only whitespace" { + # Create a file with only whitespace + echo " " > "$OUTPUT_DIR/whitespace.yaml" + echo "" >> "$OUTPUT_DIR/whitespace.yaml" + + run bash "$SERVICE_PATH/apply_templates" + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 Skipping empty template: whitespace.yaml" +} + +# ============================================================================= +# Test: Skips files with only newlines +# ============================================================================= +@test "apply_templates: skips files with only newlines" { + # Create a file with only newlines + printf "\n\n\n" > "$OUTPUT_DIR/newlines.yaml" + + run bash "$SERVICE_PATH/apply_templates" + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 Skipping empty template: newlines.yaml" +} + +# ============================================================================= +# Test: Applies non-empty files +# ============================================================================= +@test "apply_templates: applies non-empty files" { + echo "apiVersion: v1" > "$OUTPUT_DIR/valid.yaml" + + run bash "$SERVICE_PATH/apply_templates" + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 kubectl apply valid.yaml" +} + +# ============================================================================= +# Test: Moves applied files to apply directory +# ============================================================================= +@test "apply_templates: moves applied files to apply directory" { + echo "apiVersion: v1" > "$OUTPUT_DIR/valid.yaml" + + run bash "$SERVICE_PATH/apply_templates" + + [ "$status" -eq 0 ] + assert_file_exists "$OUTPUT_DIR/apply/valid.yaml" + [ ! -f "$OUTPUT_DIR/valid.yaml" ] +} + +# ============================================================================= +# Test: Does not call kubectl for empty files +# ============================================================================= +@test "apply_templates: does not call kubectl for empty files" { + touch "$OUTPUT_DIR/empty.yaml" + + run bash "$SERVICE_PATH/apply_templates" + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 Skipping empty template: empty.yaml" +} + +# ============================================================================= +# Test: Handles delete action for empty files +# ============================================================================= +@test "apply_templates: handles delete action for empty files" { + export ACTION="delete" + touch "$OUTPUT_DIR/empty.yaml" + + run bash "$SERVICE_PATH/apply_templates" + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 Skipping empty template" +} + +# ============================================================================= +# Test: Dry run mode still skips empty files +# ============================================================================= +@test "apply_templates: dry run mode still skips empty files" { + export DRY_RUN="true" + touch "$OUTPUT_DIR/empty.yaml" + echo "apiVersion: v1" > "$OUTPUT_DIR/valid.yaml" + + run bash "$SERVICE_PATH/apply_templates" + + # Dry run exits with 1 + [ "$status" -eq 1 ] + assert_contains "$output" "📋 Skipping empty template: empty.yaml" + assert_contains "$output" "📋 Dry run mode - no changes were made" +} diff --git a/k8s/deployment/tests/build_blue_deployment.bats b/k8s/deployment/tests/build_blue_deployment.bats new file mode 100644 index 00000000..aecf7cd2 --- /dev/null +++ b/k8s/deployment/tests/build_blue_deployment.bats @@ -0,0 +1,126 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/build_blue_deployment - blue deployment builder +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export SERVICE_PATH="$PROJECT_ROOT/k8s" + export DEPLOYMENT_ID="deploy-green-123" + + export CONTEXT='{ + "blue_replicas": 2, + "scope": { + "current_active_deployment": "deploy-old-456" + }, + "deployment": { + "id": "deploy-green-123" + } + }' + + # Track what build_deployment receives + export BUILD_DEPLOYMENT_REPLICAS="" + export BUILD_DEPLOYMENT_DEPLOYMENT_ID="" + + # Mock build_deployment to capture arguments + mkdir -p "$PROJECT_ROOT/k8s/deployment" + cat > "$PROJECT_ROOT/k8s/deployment/build_deployment.mock" << 'MOCK' +BUILD_DEPLOYMENT_REPLICAS="$REPLICAS" +BUILD_DEPLOYMENT_DEPLOYMENT_ID="$DEPLOYMENT_ID" +echo "Building deployment with replicas=$REPLICAS deployment_id=$DEPLOYMENT_ID" +MOCK +} + +teardown() { + rm -f "$PROJECT_ROOT/k8s/deployment/build_deployment.mock" + unset CONTEXT + unset BUILD_DEPLOYMENT_REPLICAS + unset BUILD_DEPLOYMENT_DEPLOYMENT_ID +} + +# ============================================================================= +# Blue Replicas Extraction Tests +# ============================================================================= +@test "build_blue_deployment: extracts blue_replicas from context" { + # Can't easily test sourced script, but we verify CONTEXT parsing + replicas=$(echo "$CONTEXT" | jq -r .blue_replicas) + + assert_equal "$replicas" "2" +} + +# ============================================================================= +# Deployment ID Handling Tests +# ============================================================================= +@test "build_blue_deployment: uses current_active_deployment as blue deployment" { + blue_id=$(echo "$CONTEXT" | jq -r .scope.current_active_deployment) + + assert_equal "$blue_id" "deploy-old-456" +} + +@test "build_blue_deployment: preserves green deployment ID" { + # After script runs, DEPLOYMENT_ID should be restored to green + assert_equal "$DEPLOYMENT_ID" "deploy-green-123" +} + +# ============================================================================= +# Context Update Tests +# ============================================================================= +@test "build_blue_deployment: updates context with blue deployment ID" { + # Test that jq command correctly updates deployment.id + updated_context=$(echo "$CONTEXT" | jq \ + --arg deployment_id "deploy-old-456" \ + '.deployment.id = $deployment_id') + + updated_id=$(echo "$updated_context" | jq -r .deployment.id) + + assert_equal "$updated_id" "deploy-old-456" +} + +@test "build_blue_deployment: restores context with green deployment ID" { + # Test that jq command correctly restores deployment.id + updated_context=$(echo "$CONTEXT" | jq \ + --arg deployment_id "deploy-green-123" \ + '.deployment.id = $deployment_id') + + updated_id=$(echo "$updated_context" | jq -r .deployment.id) + + assert_equal "$updated_id" "deploy-green-123" +} + +# ============================================================================= +# Integration Test - Validates build_deployment is called correctly +# ============================================================================= +@test "build_blue_deployment: calls build_deployment with correct replicas and deployment id" { + # Create a mock build_deployment that captures the arguments + local mock_dir="$BATS_TEST_TMPDIR/mock_service" + mkdir -p "$mock_dir/deployment" + + # Create mock script that captures REPLICAS, DEPLOYMENT_ID, and args + cat > "$mock_dir/deployment/build_deployment" << 'MOCK_SCRIPT' +#!/bin/bash +# Capture values to a file for verification +echo "CAPTURED_REPLICAS=$REPLICAS" >> "$BATS_TEST_TMPDIR/captured_values" +echo "CAPTURED_DEPLOYMENT_ID=$DEPLOYMENT_ID" >> "$BATS_TEST_TMPDIR/captured_values" +echo "CAPTURED_ARGS=$*" >> "$BATS_TEST_TMPDIR/captured_values" +MOCK_SCRIPT + chmod +x "$mock_dir/deployment/build_deployment" + + # Set SERVICE_PATH to our mock directory + export SERVICE_PATH="$mock_dir" + + # Run the actual build_blue_deployment script + source "$PROJECT_ROOT/k8s/deployment/build_blue_deployment" + + # Read captured values + source "$BATS_TEST_TMPDIR/captured_values" + + # Verify build_deployment was called with blue deployment ID (from current_active_deployment) + assert_equal "$CAPTURED_DEPLOYMENT_ID" "deploy-old-456" "build_deployment should receive blue deployment ID" + + # Verify build_deployment was called with correct replicas from context + assert_equal "$CAPTURED_ARGS" "--replicas=2" "build_deployment should receive --replicas=2" +} diff --git a/k8s/deployment/tests/build_context.bats b/k8s/deployment/tests/build_context.bats new file mode 100644 index 00000000..ce8aa579 --- /dev/null +++ b/k8s/deployment/tests/build_context.bats @@ -0,0 +1,802 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/build_context +# Tests validate_status function, replica calculation, and get_config_value usage +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + source "$PROJECT_ROOT/k8s/utils/get_config_value" + + # Base CONTEXT for tests + export CONTEXT='{ + "deployment": {"status": "creating", "id": "deploy-123"}, + "scope": {"id": "scope-456", "capabilities": {"scaling_type": "fixed", "fixed_instances": 2}} + }' + + # Extract validate_status function from build_context for isolated testing + eval "$(sed -n '/^validate_status()/,/^}/p' "$PROJECT_ROOT/k8s/deployment/build_context")" +} + +teardown() { + unset -f validate_status 2>/dev/null || true + unset CONTEXT DEPLOY_STRATEGY POD_DISRUPTION_BUDGET_ENABLED POD_DISRUPTION_BUDGET_MAX_UNAVAILABLE 2>/dev/null || true + unset TRAFFIC_CONTAINER_IMAGE TRAFFIC_MANAGER_CONFIG_MAP IMAGE_PULL_SECRETS IAM CONTAINER_MEMORY_IN_MEMORY CONTAINER_CPU_IN_MILLICORES 2>/dev/null || true +} + +# ============================================================================= +# validate_status Function Tests +# ============================================================================= +@test "validate_status: accepts valid statuses for start-initial and start-blue-green" { + run validate_status "start-initial" "creating" + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Running action 'start-initial' (current status: 'creating', expected: creating, waiting_for_instances or running)" + + run validate_status "start-initial" "waiting_for_instances" + [ "$status" -eq 0 ] + + run validate_status "start-initial" "running" + [ "$status" -eq 0 ] + + run validate_status "start-blue-green" "creating" + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Running action 'start-blue-green' (current status: 'creating', expected: creating, waiting_for_instances or running)" +} + +@test "validate_status: rejects invalid statuses for start-initial" { + run validate_status "start-initial" "deleting" + [ "$status" -ne 0 ] + + run validate_status "start-initial" "failed" + [ "$status" -ne 0 ] +} + +@test "validate_status: accepts valid statuses for switch-traffic" { + run validate_status "switch-traffic" "running" + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Running action 'switch-traffic' (current status: 'running', expected: running or waiting_for_instances)" + + run validate_status "switch-traffic" "waiting_for_instances" + [ "$status" -eq 0 ] +} + +@test "validate_status: rejects invalid statuses for switch-traffic" { + run validate_status "switch-traffic" "creating" + [ "$status" -ne 0 ] +} + +@test "validate_status: accepts valid statuses for rollback-deployment" { + run validate_status "rollback-deployment" "rolling_back" + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Running action 'rollback-deployment' (current status: 'rolling_back', expected: rolling_back or cancelling)" + + run validate_status "rollback-deployment" "cancelling" + [ "$status" -eq 0 ] +} + +@test "validate_status: rejects invalid statuses for rollback-deployment" { + run validate_status "rollback-deployment" "running" + [ "$status" -ne 0 ] +} + +@test "validate_status: accepts valid statuses for finalize-blue-green" { + run validate_status "finalize-blue-green" "finalizing" + [ "$status" -eq 0 ] + + run validate_status "finalize-blue-green" "cancelling" + [ "$status" -eq 0 ] +} + +@test "validate_status: rejects invalid statuses for finalize-blue-green" { + run validate_status "finalize-blue-green" "running" + [ "$status" -ne 0 ] +} + +@test "validate_status: accepts valid statuses for delete-deployment" { + run validate_status "delete-deployment" "deleting" + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Running action 'delete-deployment' (current status: 'deleting', expected: deleting, rolling_back or cancelling)" + + run validate_status "delete-deployment" "cancelling" + [ "$status" -eq 0 ] + + run validate_status "delete-deployment" "rolling_back" + [ "$status" -eq 0 ] +} + +@test "validate_status: rejects invalid statuses for delete-deployment" { + run validate_status "delete-deployment" "running" + [ "$status" -ne 0 ] +} + +@test "validate_status: accepts any status for unknown or empty action" { + run validate_status "custom-action" "any_status" + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Running action 'custom-action', any deployment status is accepted" + + run validate_status "" "running" + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Running action '', any deployment status is accepted" +} + +# ============================================================================= +# Replica Calculation Tests +# ============================================================================= +@test "replica calculation: MIN_REPLICAS rounds up correctly" { + # MIN_REPLICAS = ceil(REPLICAS / 10) + + # 15 / 10 = 1.5 -> rounds up to 2 + REPLICAS=15 + MIN_REPLICAS=$(echo "scale=10; $REPLICAS / 10" | bc) + MIN_REPLICAS=$(echo "$MIN_REPLICAS" | awk '{printf "%d", ($1 == int($1) ? $1 : int($1)+1)}') + assert_equal "$MIN_REPLICAS" "2" + + # 10 / 10 = 1.0 -> stays 1 + REPLICAS=10 + MIN_REPLICAS=$(echo "scale=10; $REPLICAS / 10" | bc) + MIN_REPLICAS=$(echo "$MIN_REPLICAS" | awk '{printf "%d", ($1 == int($1) ? $1 : int($1)+1)}') + assert_equal "$MIN_REPLICAS" "1" + + # 5 / 10 = 0.5 -> rounds up to 1 + REPLICAS=5 + MIN_REPLICAS=$(echo "scale=10; $REPLICAS / 10" | bc) + MIN_REPLICAS=$(echo "$MIN_REPLICAS" | awk '{printf "%d", ($1 == int($1) ? $1 : int($1)+1)}') + assert_equal "$MIN_REPLICAS" "1" +} + +@test "replica calculation: GREEN_REPLICAS calculates traffic percentage correctly" { + # 50% of 10 = 5 + REPLICAS=10 + SWITCH_TRAFFIC=50 + GREEN_REPLICAS=$(echo "scale=10; ($REPLICAS * $SWITCH_TRAFFIC) / 100" | bc) + GREEN_REPLICAS=$(echo "$GREEN_REPLICAS" | awk '{printf "%d", ($1 == int($1) ? $1 : int($1)+1)}') + assert_equal "$GREEN_REPLICAS" "5" + + # 30% of 7 = 2.1 -> rounds up to 3 + REPLICAS=7 + SWITCH_TRAFFIC=30 + GREEN_REPLICAS=$(echo "scale=10; ($REPLICAS * $SWITCH_TRAFFIC) / 100" | bc) + GREEN_REPLICAS=$(echo "$GREEN_REPLICAS" | awk '{printf "%d", ($1 == int($1) ? $1 : int($1)+1)}') + assert_equal "$GREEN_REPLICAS" "3" +} + +@test "replica calculation: BLUE_REPLICAS respects minimum" { + REPLICAS=10 + GREEN_REPLICAS=10 + MIN_REPLICAS=1 + BLUE_REPLICAS=$(( REPLICAS - GREEN_REPLICAS )) + BLUE_REPLICAS=$(( MIN_REPLICAS > BLUE_REPLICAS ? MIN_REPLICAS : BLUE_REPLICAS )) + assert_equal "$BLUE_REPLICAS" "1" + + # When remainder is larger than minimum, use remainder + GREEN_REPLICAS=6 + BLUE_REPLICAS=$(( REPLICAS - GREEN_REPLICAS )) + BLUE_REPLICAS=$(( MIN_REPLICAS > BLUE_REPLICAS ? MIN_REPLICAS : BLUE_REPLICAS )) + assert_equal "$BLUE_REPLICAS" "4" +} + +@test "replica calculation: GREEN_REPLICAS respects minimum" { + GREEN_REPLICAS=0 + MIN_REPLICAS=1 + GREEN_REPLICAS=$(( MIN_REPLICAS > GREEN_REPLICAS ? MIN_REPLICAS : GREEN_REPLICAS )) + assert_equal "$GREEN_REPLICAS" "1" +} + +# ============================================================================= +# Service Account Name Generation Tests +# ============================================================================= +@test "service account: generates name when IAM enabled, empty when disabled" { + SCOPE_ID="scope-123" + + # IAM enabled + IAM='{"ENABLED":"true","PREFIX":"np-role"}' + IAM_ENABLED=$(echo "$IAM" | jq -r .ENABLED) + SERVICE_ACCOUNT_NAME="" + if [[ "$IAM_ENABLED" == "true" ]]; then + SERVICE_ACCOUNT_NAME=$(echo "$IAM" | jq -r .PREFIX)-"$SCOPE_ID" + fi + assert_equal "$SERVICE_ACCOUNT_NAME" "np-role-scope-123" + + # IAM disabled + IAM='{"ENABLED":"false","PREFIX":"np-role"}' + IAM_ENABLED=$(echo "$IAM" | jq -r .ENABLED) + SERVICE_ACCOUNT_NAME="" + if [[ "$IAM_ENABLED" == "true" ]]; then + SERVICE_ACCOUNT_NAME=$(echo "$IAM" | jq -r .PREFIX)-"$SCOPE_ID" + fi + assert_empty "$SERVICE_ACCOUNT_NAME" +} + +# ============================================================================= +# Traffic Container Image Version Tests +# ============================================================================= +@test "traffic container: uses websocket2 for web_sockets, latest for http" { + # web_sockets protocol + SCOPE_TRAFFIC_PROTOCOL="web_sockets" + TRAFFIC_CONTAINER_VERSION="latest" + if [[ "$SCOPE_TRAFFIC_PROTOCOL" == "web_sockets" ]]; then + TRAFFIC_CONTAINER_VERSION="websocket2" + fi + assert_equal "$TRAFFIC_CONTAINER_VERSION" "websocket2" + + # http protocol + SCOPE_TRAFFIC_PROTOCOL="http" + TRAFFIC_CONTAINER_VERSION="latest" + if [[ "$SCOPE_TRAFFIC_PROTOCOL" == "web_sockets" ]]; then + TRAFFIC_CONTAINER_VERSION="websocket2" + fi + assert_equal "$TRAFFIC_CONTAINER_VERSION" "latest" +} + +# ============================================================================= +# Image Pull Secrets Tests +# ============================================================================= +@test "image pull secrets: PULL_SECRETS takes precedence over IMAGE_PULL_SECRETS" { + PULL_SECRETS='["secret1"]' + IMAGE_PULL_SECRETS="{}" + + if [[ -n "$PULL_SECRETS" ]]; then + IMAGE_PULL_SECRETS=$PULL_SECRETS + fi + + assert_equal "$IMAGE_PULL_SECRETS" '["secret1"]' +} + +# ============================================================================= +# get_config_value Tests - DEPLOY_STRATEGY +# ============================================================================= +@test "get_config_value: DEPLOY_STRATEGY priority - provider > env > default" { + # Default when nothing set + unset DEPLOY_STRATEGY + result=$(get_config_value \ + --env DEPLOY_STRATEGY \ + --provider '.providers["scope-configurations"].deployment.deployment_strategy' \ + --default "blue-green" + ) + assert_equal "$result" "blue-green" + + # Env var when no provider + export DEPLOY_STRATEGY="rolling" + result=$(get_config_value \ + --env DEPLOY_STRATEGY \ + --provider '.providers["scope-configurations"].deployment.deployment_strategy' \ + --default "blue-green" + ) + assert_equal "$result" "rolling" + + # Provider wins over env var + export CONTEXT=$(echo "$CONTEXT" | jq '.providers["scope-configurations"] = {"deployment": {"deployment_strategy": "canary"}}') + result=$(get_config_value \ + --env DEPLOY_STRATEGY \ + --provider '.providers["scope-configurations"].deployment.deployment_strategy' \ + --default "blue-green" + ) + assert_equal "$result" "canary" +} + +# ============================================================================= +# get_config_value Tests - PDB Configuration +# ============================================================================= +@test "get_config_value: PDB_ENABLED priority - provider > env > default" { + # Default + unset POD_DISRUPTION_BUDGET_ENABLED + result=$(get_config_value \ + --env POD_DISRUPTION_BUDGET_ENABLED \ + --provider '.providers["scope-configurations"].deployment.pod_disruption_budget_enabled' \ + --default "false" + ) + assert_equal "$result" "false" + + # Env var + export POD_DISRUPTION_BUDGET_ENABLED="true" + result=$(get_config_value \ + --env POD_DISRUPTION_BUDGET_ENABLED \ + --provider '.providers["scope-configurations"].deployment.pod_disruption_budget_enabled' \ + --default "false" + ) + assert_equal "$result" "true" + + # Provider wins + export CONTEXT=$(echo "$CONTEXT" | jq '.providers["scope-configurations"] = {"deployment": {"pod_disruption_budget_enabled": "false"}}') + result=$(get_config_value \ + --env POD_DISRUPTION_BUDGET_ENABLED \ + --provider '.providers["scope-configurations"].deployment.pod_disruption_budget_enabled' \ + --default "false" + ) + assert_equal "$result" "false" +} + +@test "get_config_value: PDB_MAX_UNAVAILABLE priority - provider > env > default" { + # Default + unset POD_DISRUPTION_BUDGET_MAX_UNAVAILABLE + result=$(get_config_value \ + --env POD_DISRUPTION_BUDGET_MAX_UNAVAILABLE \ + --provider '.providers["scope-configurations"].deployment.pod_disruption_budget_max_unavailable' \ + --default "25%" + ) + assert_equal "$result" "25%" + + # Env var + export POD_DISRUPTION_BUDGET_MAX_UNAVAILABLE="2" + result=$(get_config_value \ + --env POD_DISRUPTION_BUDGET_MAX_UNAVAILABLE \ + --provider '.providers["scope-configurations"].deployment.pod_disruption_budget_max_unavailable' \ + --default "25%" + ) + assert_equal "$result" "2" + + # Provider wins + export CONTEXT=$(echo "$CONTEXT" | jq '.providers["scope-configurations"] = {"deployment": {"pod_disruption_budget_max_unavailable": "75%"}}') + result=$(get_config_value \ + --env POD_DISRUPTION_BUDGET_MAX_UNAVAILABLE \ + --provider '.providers["scope-configurations"].deployment.pod_disruption_budget_max_unavailable' \ + --default "25%" + ) + assert_equal "$result" "75%" +} + +# ============================================================================= +# get_config_value Tests - TRAFFIC_CONTAINER_IMAGE +# ============================================================================= +@test "get_config_value: TRAFFIC_CONTAINER_IMAGE priority - provider > env > default" { + # Default + unset TRAFFIC_CONTAINER_IMAGE + result=$(get_config_value \ + --env TRAFFIC_CONTAINER_IMAGE \ + --provider '.providers["scope-configurations"].deployment.traffic_container_image' \ + --default "public.ecr.aws/nullplatform/k8s-traffic-manager:latest" + ) + assert_equal "$result" "public.ecr.aws/nullplatform/k8s-traffic-manager:latest" + + # Env var + export TRAFFIC_CONTAINER_IMAGE="env.ecr.aws/traffic:custom" + result=$(get_config_value \ + --env TRAFFIC_CONTAINER_IMAGE \ + --provider '.providers["scope-configurations"].deployment.traffic_container_image' \ + --default "public.ecr.aws/nullplatform/k8s-traffic-manager:latest" + ) + assert_equal "$result" "env.ecr.aws/traffic:custom" + + # Provider wins + export CONTEXT=$(echo "$CONTEXT" | jq '.providers["scope-configurations"] = {"deployment": {"traffic_container_image": "provider.ecr.aws/traffic:v3.0"}}') + result=$(get_config_value \ + --env TRAFFIC_CONTAINER_IMAGE \ + --provider '.providers["scope-configurations"].deployment.traffic_container_image' \ + --default "public.ecr.aws/nullplatform/k8s-traffic-manager:latest" + ) + assert_equal "$result" "provider.ecr.aws/traffic:v3.0" +} + +# ============================================================================= +# get_config_value Tests - TRAFFIC_MANAGER_CONFIG_MAP +# ============================================================================= +@test "get_config_value: TRAFFIC_MANAGER_CONFIG_MAP priority - provider > env > default" { + # Default (empty) + unset TRAFFIC_MANAGER_CONFIG_MAP + result=$(get_config_value \ + --env TRAFFIC_MANAGER_CONFIG_MAP \ + --provider '.providers["scope-configurations"].deployment.traffic_manager_config_map' \ + --default "" + ) + assert_empty "$result" + + # Env var + export TRAFFIC_MANAGER_CONFIG_MAP="env-traffic-config" + result=$(get_config_value \ + --env TRAFFIC_MANAGER_CONFIG_MAP \ + --provider '.providers["scope-configurations"].deployment.traffic_manager_config_map' \ + --default "" + ) + assert_equal "$result" "env-traffic-config" + + # Provider wins + export CONTEXT=$(echo "$CONTEXT" | jq '.providers["scope-configurations"] = {"deployment": {"traffic_manager_config_map": "provider-traffic-config"}}') + result=$(get_config_value \ + --env TRAFFIC_MANAGER_CONFIG_MAP \ + --provider '.providers["scope-configurations"].deployment.traffic_manager_config_map' \ + --default "" + ) + assert_equal "$result" "provider-traffic-config" +} + +# ============================================================================= +# get_config_value Tests - IMAGE_PULL_SECRETS +# ============================================================================= +@test "get_config_value: IMAGE_PULL_SECRETS reads from provider" { + export CONTEXT=$(echo "$CONTEXT" | jq '.providers["scope-configurations"] = { + "security": { + "image_pull_secrets_enabled": true, + "image_pull_secrets": ["custom-secret", "ecr-secret"] + } + }') + + enabled=$(get_config_value \ + --provider '.providers["scope-configurations"].security.image_pull_secrets_enabled' \ + --default "false" + ) + secrets=$(get_config_value \ + --provider '.providers["scope-configurations"].security.image_pull_secrets | @json' \ + --default "[]" + ) + + assert_equal "$enabled" "true" + assert_contains "$secrets" "custom-secret" + assert_contains "$secrets" "ecr-secret" +} + +# ============================================================================= +# get_config_value Tests - IAM Configuration +# ============================================================================= +@test "get_config_value: IAM reads from provider" { + export CONTEXT=$(echo "$CONTEXT" | jq '.providers["scope-configurations"] = { + "security": { + "iam_enabled": true, + "iam_prefix": "custom-prefix", + "iam_policies": ["arn:aws:iam::123:policy/test"], + "iam_boundary_arn": "arn:aws:iam::123:policy/boundary" + } + }') + + enabled=$(get_config_value \ + --provider '.providers["scope-configurations"].security.iam_enabled' \ + --default "false" + ) + prefix=$(get_config_value \ + --provider '.providers["scope-configurations"].security.iam_prefix' \ + --default "" + ) + policies=$(get_config_value \ + --provider '.providers["scope-configurations"].security.iam_policies | @json' \ + --default "[]" + ) + boundary=$(get_config_value \ + --provider '.providers["scope-configurations"].security.iam_boundary_arn' \ + --default "" + ) + + assert_equal "$enabled" "true" + assert_equal "$prefix" "custom-prefix" + assert_contains "$policies" "arn:aws:iam::123:policy/test" + assert_equal "$boundary" "arn:aws:iam::123:policy/boundary" +} + +@test "get_config_value: IAM uses defaults when not configured" { + enabled=$(get_config_value \ + --provider '.providers["scope-configurations"].security.iam_enabled' \ + --default "false" + ) + prefix=$(get_config_value \ + --provider '.providers["scope-configurations"].security.iam_prefix' \ + --default "" + ) + + assert_equal "$enabled" "false" + assert_empty "$prefix" +} + +# ============================================================================= +# get_config_value Tests - Complete Configuration Hierarchy +# ============================================================================= +@test "get_config_value: complete deployment configuration from provider" { + export CONTEXT=$(echo "$CONTEXT" | jq '.providers["scope-configurations"] = { + "deployment": { + "traffic_container_image": "custom.ecr.aws/traffic:v1", + "pod_disruption_budget_enabled": "true", + "pod_disruption_budget_max_unavailable": "1", + "traffic_manager_config_map": "my-config-map", + "deployment_strategy": "rolling" + } + }') + + unset TRAFFIC_CONTAINER_IMAGE POD_DISRUPTION_BUDGET_ENABLED POD_DISRUPTION_BUDGET_MAX_UNAVAILABLE + unset TRAFFIC_MANAGER_CONFIG_MAP DEPLOY_STRATEGY + + traffic_image=$(get_config_value \ + --env TRAFFIC_CONTAINER_IMAGE \ + --provider '.providers["scope-configurations"].deployment.traffic_container_image' \ + --default "public.ecr.aws/nullplatform/k8s-traffic-manager:latest" + ) + assert_equal "$traffic_image" "custom.ecr.aws/traffic:v1" + + pdb_enabled=$(get_config_value \ + --env POD_DISRUPTION_BUDGET_ENABLED \ + --provider '.providers["scope-configurations"].deployment.pod_disruption_budget_enabled' \ + --default "false" + ) + assert_equal "$pdb_enabled" "true" + + pdb_max=$(get_config_value \ + --env POD_DISRUPTION_BUDGET_MAX_UNAVAILABLE \ + --provider '.providers["scope-configurations"].deployment.pod_disruption_budget_max_unavailable' \ + --default "25%" + ) + assert_equal "$pdb_max" "1" + + config_map=$(get_config_value \ + --env TRAFFIC_MANAGER_CONFIG_MAP \ + --provider '.providers["scope-configurations"].deployment.traffic_manager_config_map' \ + --default "" + ) + assert_equal "$config_map" "my-config-map" + + strategy=$(get_config_value \ + --env DEPLOY_STRATEGY \ + --provider '.providers["scope-configurations"].deployment.deployment_strategy' \ + --default "blue-green" + ) + assert_equal "$strategy" "rolling" +} + +# ============================================================================= +# Error Handling Tests +# ============================================================================= +@test "error: invalid deployment status shows full troubleshooting info" { + local test_script="$BATS_TEST_TMPDIR/test_invalid_status.sh" + + cat > "$test_script" << 'SCRIPT' +#!/bin/bash +export SERVICE_PATH="$1" +export SERVICE_ACTION="start-initial" +export CONTEXT='{"deployment":{"status":"failed"}}' + +# Mock scope/build_context that sources get_config_value +mkdir -p "$SERVICE_PATH/scope" +cat > "$SERVICE_PATH/scope/build_context" << 'MOCK_SCOPE' +source "$SERVICE_PATH/utils/get_config_value" +MOCK_SCOPE + +source "$SERVICE_PATH/deployment/build_context" +SCRIPT + chmod +x "$test_script" + + local mock_service="$BATS_TEST_TMPDIR/mock_k8s" + mkdir -p "$mock_service/deployment" "$mock_service/utils" + cp "$PROJECT_ROOT/k8s/deployment/build_context" "$mock_service/deployment/" + cp "$PROJECT_ROOT/k8s/utils/get_config_value" "$mock_service/utils/" + + run "$test_script" "$mock_service" + + [ "$status" -ne 0 ] + assert_contains "$output" "❌ Invalid deployment status 'failed' for action 'start-initial'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "Deployment status changed during workflow execution" + assert_contains "$output" "Another action is already running on this deployment" + assert_contains "$output" "Deployment was modified externally" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Wait for any in-progress actions to complete" + assert_contains "$output" "Check the deployment status in the nullplatform dashboard" + assert_contains "$output" "Retry the action once the deployment is in the expected state" +} + +@test "error: ConfigMap not found shows full troubleshooting info" { + local test_script="$BATS_TEST_TMPDIR/test_configmap_error.sh" + + cat > "$test_script" << 'SCRIPT' +#!/bin/bash +export SERVICE_PATH="$1" +export SERVICE_ACTION="start-initial" +export TRAFFIC_MANAGER_CONFIG_MAP="test-config" +export K8S_NAMESPACE="test-ns" +export CONTEXT='{ + "deployment":{"status":"creating","id":"deploy-123"}, + "scope":{"capabilities":{"scaling_type":"fixed","fixed_instances":1}} +}' + +# Mock scope/build_context that sources get_config_value +mkdir -p "$SERVICE_PATH/scope" +cat > "$SERVICE_PATH/scope/build_context" << 'MOCK_SCOPE' +source "$SERVICE_PATH/utils/get_config_value" +MOCK_SCOPE + +kubectl() { + return 1 +} +export -f kubectl + +source "$SERVICE_PATH/deployment/build_context" +SCRIPT + chmod +x "$test_script" + + local mock_service="$BATS_TEST_TMPDIR/mock_k8s" + mkdir -p "$mock_service/deployment" "$mock_service/utils" + cp "$PROJECT_ROOT/k8s/deployment/build_context" "$mock_service/deployment/" + cp "$PROJECT_ROOT/k8s/utils/get_config_value" "$mock_service/utils/" + + run "$test_script" "$mock_service" + + [ "$status" -ne 0 ] + assert_contains "$output" "🔍 Validating ConfigMap 'test-config' in namespace 'test-ns'" + assert_contains "$output" "❌ ConfigMap 'test-config' does not exist in namespace 'test-ns'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "ConfigMap was not created before deployment" + assert_contains "$output" "ConfigMap name is misspelled in values.yaml" + assert_contains "$output" "ConfigMap was deleted or exists in a different namespace" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Create the ConfigMap: kubectl create configmap test-config -n test-ns --from-file=nginx.conf --from-file=default.conf" + assert_contains "$output" "Verify the ConfigMap name in your scope configuration" +} + +# ============================================================================= +# CONTAINER_MEMORY_IN_MEMORY Tests (read from ConfigMap) +# ============================================================================= +@test "container_memory_in_memory: uses values.yaml default when no ConfigMap is configured" { + export CONTAINER_MEMORY_IN_MEMORY="64" + assert_equal "$CONTAINER_MEMORY_IN_MEMORY" "64" +} + +@test "container_memory_in_memory: reads value from ConfigMap when key exists" { + unset CONTAINER_MEMORY_IN_MEMORY + CONFIGMAP_KEYS=$'nginx.conf\ndefault.conf\ncontainer_memory_in_memory' + + kubectl() { + if [[ "$*" == *"go-template"* && "$*" == *"container_memory_in_memory"* ]]; then + echo "128" + return 0 + fi + } + export -f kubectl + + if echo "$CONFIGMAP_KEYS" | grep -qx "container_memory_in_memory"; then + CONTAINER_MEMORY_IN_MEMORY=$(kubectl get configmap "test-cm" -n "test-ns" -o go-template='{{ index .data "container_memory_in_memory" }}') + fi + CONTAINER_MEMORY_IN_MEMORY=${CONTAINER_MEMORY_IN_MEMORY:-64} + + assert_equal "$CONTAINER_MEMORY_IN_MEMORY" "128" +} + +@test "container_memory_in_memory: keeps values.yaml default when ConfigMap exists but key is missing" { + export CONTAINER_MEMORY_IN_MEMORY="64" + CONFIGMAP_KEYS=$'nginx.conf\ndefault.conf' + + if echo "$CONFIGMAP_KEYS" | grep -qx "container_memory_in_memory"; then + CONTAINER_MEMORY_IN_MEMORY="should-not-reach" + fi + + assert_equal "$CONTAINER_MEMORY_IN_MEMORY" "64" +} + +# ============================================================================= +# CONTAINER_CPU_IN_MILLICORES Tests (read from ConfigMap) +# ============================================================================= +@test "container_cpu_in_millicores: uses values.yaml default when no ConfigMap is configured" { + export CONTAINER_CPU_IN_MILLICORES="93" + assert_equal "$CONTAINER_CPU_IN_MILLICORES" "93" +} + +@test "container_cpu_in_millicores: reads value from ConfigMap when key exists" { + unset CONTAINER_CPU_IN_MILLICORES + CONFIGMAP_KEYS=$'nginx.conf\ndefault.conf\ncontainer_cpu_in_millicores' + + kubectl() { + if [[ "$*" == *"go-template"* && "$*" == *"container_cpu_in_millicores"* ]]; then + echo "200" + return 0 + fi + } + export -f kubectl + + if echo "$CONFIGMAP_KEYS" | grep -qx "container_cpu_in_millicores"; then + CONTAINER_CPU_IN_MILLICORES=$(kubectl get configmap "test-cm" -n "test-ns" -o go-template='{{ index .data "container_cpu_in_millicores" }}') + fi + CONTAINER_CPU_IN_MILLICORES=${CONTAINER_CPU_IN_MILLICORES:-93} + + assert_equal "$CONTAINER_CPU_IN_MILLICORES" "200" +} + +@test "container_cpu_in_millicores: keeps values.yaml default when ConfigMap exists but key is missing" { + export CONTAINER_CPU_IN_MILLICORES="93" + CONFIGMAP_KEYS=$'nginx.conf\ndefault.conf' + + if echo "$CONFIGMAP_KEYS" | grep -qx "container_cpu_in_millicores"; then + CONTAINER_CPU_IN_MILLICORES="should-not-reach" + fi + + assert_equal "$CONTAINER_CPU_IN_MILLICORES" "93" +} + +# ============================================================================= +# Blue Additional Port Services Detection Tests +# ============================================================================= +@test "blue additional port services: empty map when no BLUE_DEPLOYMENT_ID" { + BLUE_DEPLOYMENT_ID="" + BLUE_ADDITIONAL_PORT_SERVICES="{}" + if [ -n "$BLUE_DEPLOYMENT_ID" ] && [ "$BLUE_DEPLOYMENT_ID" != "null" ]; then + BLUE_ADDITIONAL_PORT_SERVICES='{"grpc-9014": true}' + fi + assert_equal "$BLUE_ADDITIONAL_PORT_SERVICES" "{}" +} + +@test "blue additional port services: empty map when BLUE_DEPLOYMENT_ID is null" { + BLUE_DEPLOYMENT_ID="null" + BLUE_ADDITIONAL_PORT_SERVICES="{}" + if [ -n "$BLUE_DEPLOYMENT_ID" ] && [ "$BLUE_DEPLOYMENT_ID" != "null" ]; then + BLUE_ADDITIONAL_PORT_SERVICES='{"grpc-9014": true}' + fi + assert_equal "$BLUE_ADDITIONAL_PORT_SERVICES" "{}" +} + +@test "blue additional port services: empty map when no additional_ports in capabilities" { + BLUE_DEPLOYMENT_ID="deploy-old-456" + export CONTEXT='{"scope":{"capabilities":{}}}' + ADDITIONAL_PORTS=$(echo "$CONTEXT" | jq -c '.scope.capabilities.additional_ports // []') + assert_equal "$ADDITIONAL_PORTS" "[]" +} + +@test "blue additional port services: detects existing service via kubectl" { + kubectl() { + if [[ "$1" == "get" && "$2" == "service" && "$3" == "d-scope-456-deploy-old-789-grpc-9014" ]]; then + return 0 + fi + return 1 + } + export -f kubectl + + SCOPE_ID="scope-456" + BLUE_DEPLOYMENT_ID="deploy-old-789" + K8S_NAMESPACE="test-ns" + service_name="d-${SCOPE_ID}-${BLUE_DEPLOYMENT_ID}-grpc-9014" + + if kubectl get service "$service_name" -n "$K8S_NAMESPACE" &>/dev/null; then + result="true" + else + result="false" + fi + + assert_equal "$result" "true" +} + +@test "blue additional port services: detects missing service via kubectl" { + kubectl() { return 1; } + export -f kubectl + + SCOPE_ID="scope-456" + BLUE_DEPLOYMENT_ID="deploy-old-789" + K8S_NAMESPACE="test-ns" + service_name="d-${SCOPE_ID}-${BLUE_DEPLOYMENT_ID}-grpc-9014" + + if kubectl get service "$service_name" -n "$K8S_NAMESPACE" &>/dev/null; then + result="true" + else + result="false" + fi + + assert_equal "$result" "false" +} + +@test "blue additional port services: builds correct map for mixed existing/missing ports" { + kubectl() { + if [[ "$3" == "d-scope-456-deploy-old-789-grpc-9014" ]]; then + return 0 # exists + fi + return 1 # doesn't exist + } + export -f kubectl + + SCOPE_ID="scope-456" + BLUE_DEPLOYMENT_ID="deploy-old-789" + K8S_NAMESPACE="test-ns" + BLUE_ADDITIONAL_PORT_SERVICES="{}" + + ADDITIONAL_PORTS='[{"port":9014,"type":"GRPC"},{"port":8081,"type":"HTTP"}]' + while IFS= read -r port_config; do + port=$(echo "$port_config" | jq -r '.port') + type_raw=$(echo "$port_config" | jq -r '.type') + type_lower=$(echo "$type_raw" | tr '[:upper:]' '[:lower:]') + service_name="d-${SCOPE_ID}-${BLUE_DEPLOYMENT_ID}-${type_lower}-${port}" + key="${type_lower}-${port}" + if kubectl get service "$service_name" -n "$K8S_NAMESPACE" &>/dev/null; then + BLUE_ADDITIONAL_PORT_SERVICES=$(echo "$BLUE_ADDITIONAL_PORT_SERVICES" | jq --arg key "$key" '. + {($key): true}') + else + BLUE_ADDITIONAL_PORT_SERVICES=$(echo "$BLUE_ADDITIONAL_PORT_SERVICES" | jq --arg key "$key" '. + {($key): false}') + fi + done < <(echo "$ADDITIONAL_PORTS" | jq -c '.[]') + + grpc_exists=$(echo "$BLUE_ADDITIONAL_PORT_SERVICES" | jq -r '.["grpc-9014"]') + http_exists=$(echo "$BLUE_ADDITIONAL_PORT_SERVICES" | jq -r '.["http-8081"]') + + assert_equal "$grpc_exists" "true" + assert_equal "$http_exists" "false" +} diff --git a/k8s/deployment/tests/build_deployment.bats b/k8s/deployment/tests/build_deployment.bats new file mode 100644 index 00000000..f010afce --- /dev/null +++ b/k8s/deployment/tests/build_deployment.bats @@ -0,0 +1,172 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/build_deployment - template generation +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export SERVICE_PATH="$PROJECT_ROOT/k8s" + export OUTPUT_DIR="$(mktemp -d)" + export SCOPE_ID="scope-123" + export DEPLOYMENT_ID="deploy-456" + export REPLICAS="3" + + # Template paths + export DEPLOYMENT_TEMPLATE="$PROJECT_ROOT/k8s/deployment/templates/deployment.yaml.tpl" + export SECRET_TEMPLATE="$PROJECT_ROOT/k8s/deployment/templates/secret.yaml.tpl" + export SCALING_TEMPLATE="$PROJECT_ROOT/k8s/deployment/templates/scaling.yaml.tpl" + export SERVICE_TEMPLATE="$PROJECT_ROOT/k8s/deployment/templates/service.yaml.tpl" + export PDB_TEMPLATE="$PROJECT_ROOT/k8s/deployment/templates/pdb.yaml.tpl" + + export CONTEXT='{}' + + # Mock gomplate + gomplate() { + local out_file="" + while [[ $# -gt 0 ]]; do + case $1 in + --out) out_file="$2"; shift 2 ;; + *) shift ;; + esac + done + echo "apiVersion: v1" > "$out_file" + return 0 + } + export -f gomplate +} + +teardown() { + rm -rf "$OUTPUT_DIR" + unset -f gomplate +} + +# ============================================================================= +# Success Logging Tests +# ============================================================================= +@test "build_deployment: displays all expected log messages on success" { + run bash "$BATS_TEST_DIRNAME/../build_deployment" + + [ "$status" -eq 0 ] + + # Header messages + assert_contains "$output" "📝 Building deployment templates..." + assert_contains "$output" "📋 Output directory:" + + # Deployment template + assert_contains "$output" "✅ Deployment template:" + + # Secret template + assert_contains "$output" "✅ Secret template:" + + # Scaling template + assert_contains "$output" "✅ Scaling template:" + + # Service template + assert_contains "$output" "✅ Service template:" + + # PDB template + assert_contains "$output" "✅ PDB template:" + + # Summary + assert_contains "$output" "✨ All templates built successfully" +} + +# ============================================================================= +# Error Handling Tests +# ============================================================================= +@test "build_deployment: fails when deployment template generation fails" { + gomplate() { + local file_arg="" + while [[ $# -gt 0 ]]; do + case $1 in + --file) file_arg="$2"; shift 2 ;; + --out) shift 2 ;; + *) shift ;; + esac + done + if [[ "$file_arg" == *"deployment.yaml.tpl" ]]; then + return 1 + fi + return 0 + } + export -f gomplate + + run bash "$BATS_TEST_DIRNAME/../build_deployment" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Failed to build deployment template" +} + +@test "build_deployment: fails when secret template generation fails" { + gomplate() { + local file_arg="" + local out_file="" + while [[ $# -gt 0 ]]; do + case $1 in + --file) file_arg="$2"; shift 2 ;; + --out) out_file="$2"; shift 2 ;; + *) shift ;; + esac + done + if [[ "$file_arg" == *"secret.yaml.tpl" ]]; then + return 1 + fi + echo "apiVersion: v1" > "$out_file" + return 0 + } + export -f gomplate + + run bash "$BATS_TEST_DIRNAME/../build_deployment" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Failed to build secret template" +} + +# ============================================================================= +# File Creation Tests +# ============================================================================= +@test "build_deployment: creates deployment file with correct name" { + run bash "$BATS_TEST_DIRNAME/../build_deployment" + + [ "$status" -eq 0 ] + assert_file_exists "$OUTPUT_DIR/deployment-scope-123-deploy-456.yaml" +} + +@test "build_deployment: creates secret file with correct name" { + run bash "$BATS_TEST_DIRNAME/../build_deployment" + + [ "$status" -eq 0 ] + assert_file_exists "$OUTPUT_DIR/secret-scope-123-deploy-456.yaml" +} + +@test "build_deployment: creates scaling file with correct name" { + run bash "$BATS_TEST_DIRNAME/../build_deployment" + + [ "$status" -eq 0 ] + assert_file_exists "$OUTPUT_DIR/scaling-scope-123-deploy-456.yaml" +} + +@test "build_deployment: creates service file with correct name" { + run bash "$BATS_TEST_DIRNAME/../build_deployment" + + [ "$status" -eq 0 ] + assert_file_exists "$OUTPUT_DIR/service-scope-123-deploy-456.yaml" +} + +@test "build_deployment: creates pdb file with correct name" { + run bash "$BATS_TEST_DIRNAME/../build_deployment" + + [ "$status" -eq 0 ] + assert_file_exists "$OUTPUT_DIR/pdb-scope-123-deploy-456.yaml" +} + +@test "build_deployment: removes context file after completion" { + run bash "$BATS_TEST_DIRNAME/../build_deployment" + + [ "$status" -eq 0 ] + [ ! -f "$OUTPUT_DIR/context-scope-123.json" ] +} diff --git a/k8s/deployment/tests/delete_cluster_objects.bats b/k8s/deployment/tests/delete_cluster_objects.bats new file mode 100644 index 00000000..086ff5ac --- /dev/null +++ b/k8s/deployment/tests/delete_cluster_objects.bats @@ -0,0 +1,164 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/delete_cluster_objects - cluster cleanup +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export K8S_NAMESPACE="test-namespace" + export SCOPE_ID="scope-123" + export DEPLOYMENT_ID="deploy-new" + export DEPLOYMENT="blue" + + export CONTEXT='{ + "scope": { + "current_active_deployment": "deploy-old" + } + }' + + kubectl() { + case "$1" in + delete) + echo "kubectl delete $*" + echo "Deleted resources" + return 0 + ;; + get) + # Return empty list for cleanup verification + echo "" + return 0 + ;; + esac + return 0 + } + export -f kubectl +} + +teardown() { + unset CONTEXT + unset -f kubectl +} + +# ============================================================================= +# Blue Deployment Cleanup Tests +# ============================================================================= +@test "delete_cluster_objects: deletes blue deployment and displays correct logging" { + export DEPLOYMENT="blue" + + run bash "$BATS_TEST_DIRNAME/../delete_cluster_objects" + + [ "$status" -eq 0 ] + # Start message + assert_contains "$output" "🔍 Starting cluster objects cleanup..." + # Strategy message + assert_contains "$output" "📋 Strategy: Deleting blue (old) deployment, keeping green (new)" + # Debug info + assert_contains "$output" "📋 Deployment to clean: deploy-old | Deployment to keep: deploy-new" + # Delete action + assert_contains "$output" "📝 Deleting resources for deployment_id=deploy-old..." + assert_contains "$output" "✅ Resources deleted for deployment_id=deploy-old" + # Verification + assert_contains "$output" "🔍 Verifying cleanup for scope_id=scope-123 in namespace=test-namespace..." + # Summary + assert_contains "$output" "✨ Cluster cleanup completed successfully" + assert_contains "$output" "📋 Only deployment_id=deploy-new remains for scope_id=scope-123" +} + +# ============================================================================= +# Green Deployment Cleanup Tests +# ============================================================================= +@test "delete_cluster_objects: deletes green deployment and displays correct logging" { + export DEPLOYMENT="green" + + run bash "$BATS_TEST_DIRNAME/../delete_cluster_objects" + + [ "$status" -eq 0 ] + # Strategy message + assert_contains "$output" "📋 Strategy: Deleting green (new) deployment, keeping blue (old)" + # Debug info + assert_contains "$output" "📋 Deployment to clean: deploy-new | Deployment to keep: deploy-old" + # Delete action + assert_contains "$output" "📝 Deleting resources for deployment_id=deploy-new..." + assert_contains "$output" "✅ Resources deleted for deployment_id=deploy-new" + # Summary + assert_contains "$output" "📋 Only deployment_id=deploy-old remains for scope_id=scope-123" +} + +# ============================================================================= +# Resource Types Tests +# ============================================================================= +@test "delete_cluster_objects: uses correct kubectl options" { + run bash "$BATS_TEST_DIRNAME/../delete_cluster_objects" + + [ "$status" -eq 0 ] + # Check the kubectl delete command includes all resource types + assert_contains "$output" "deployment,service,hpa,ingress,pdb,secret,configmap" + assert_contains "$output" "--cascade=foreground" + assert_contains "$output" "--wait=true" +} + +# ============================================================================= +# Error Handling Tests +# ============================================================================= +@test "delete_cluster_objects: displays error with troubleshooting on kubectl failure" { + kubectl() { + case "$1" in + delete) + return 1 + ;; + get) + echo "" + return 0 + ;; + esac + return 0 + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../delete_cluster_objects" + + [ "$status" -ne 0 ] + assert_contains "$output" "❌ Failed to delete resources for deployment_id=deploy-old" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "Resources may have finalizers preventing deletion" + assert_contains "$output" "Network connectivity issues with Kubernetes API" + assert_contains "$output" "Insufficient permissions to delete resources" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Check for stuck finalizers" + assert_contains "$output" "Verify kubeconfig and cluster connectivity" + assert_contains "$output" "Check RBAC permissions for the service account" +} + +# ============================================================================= +# Orphaned Deployment Cleanup Tests +# ============================================================================= +@test "delete_cluster_objects: cleans up orphaned deployments" { + kubectl() { + case "$1" in + delete) + echo "kubectl delete $*" + echo "Deleted resources" + return 0 + ;; + get) + # Return list with orphaned deployment + echo "deploy-new" + echo "deploy-orphan" + return 0 + ;; + esac + return 0 + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../delete_cluster_objects" + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Found orphaned deployment: deploy-orphan" + assert_contains "$output" "✅ Cleaned up 1 orphaned deployment(s)" +} + diff --git a/k8s/deployment/tests/delete_ingress_finalizer.bats b/k8s/deployment/tests/delete_ingress_finalizer.bats new file mode 100644 index 00000000..e409ce00 --- /dev/null +++ b/k8s/deployment/tests/delete_ingress_finalizer.bats @@ -0,0 +1,75 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/delete_ingress_finalizer - ingress finalizer removal +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export K8S_NAMESPACE="test-namespace" + + export CONTEXT='{ + "scope": { + "slug": "my-app", + "id": 123 + }, + "ingress_visibility": "internet-facing" + }' + + kubectl() { + echo "kubectl $*" + case "$1" in + get) + return 0 # Ingress exists + ;; + patch) + return 0 + ;; + esac + return 0 + } + export -f kubectl +} + +teardown() { + unset CONTEXT + unset -f kubectl +} + +# ============================================================================= +# Success Case +# ============================================================================= +@test "delete_ingress_finalizer: removes finalizer when ingress exists" { + run bash "$BATS_TEST_DIRNAME/../delete_ingress_finalizer" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Checking for ingress finalizers to remove..." + assert_contains "$output" "📋 Ingress name: k-8-s-my-app-123-internet-facing" + assert_contains "$output" "📝 Removing finalizers from ingress k-8-s-my-app-123-internet-facing..." + assert_contains "$output" "✅ Finalizers removed from ingress k-8-s-my-app-123-internet-facing" +} + +# ============================================================================= +# Ingress Not Found Case +# ============================================================================= +@test "delete_ingress_finalizer: skips when ingress not found" { + kubectl() { + case "$1" in + get) + return 1 # Ingress does not exist + ;; + esac + return 0 + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../delete_ingress_finalizer" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Checking for ingress finalizers to remove..." + assert_contains "$output" "📋 Ingress k-8-s-my-app-123-internet-facing not found, skipping finalizer removal" +} + diff --git a/k8s/deployment/tests/kill_instances.bats b/k8s/deployment/tests/kill_instances.bats new file mode 100644 index 00000000..a3f25079 --- /dev/null +++ b/k8s/deployment/tests/kill_instances.bats @@ -0,0 +1,287 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/kill_instances - pod termination +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export K8S_NAMESPACE="test-namespace" + export SCOPE_ID="scope-123" + + export CONTEXT='{ + "parameters": { + "deployment_id": "deploy-456", + "instance_name": "my-pod-abc123" + }, + "tags": { + "scope_id": "scope-123" + }, + "providers": { + "container-orchestration": { + "cluster": { + "namespace": "test-namespace" + } + } + } + }' + + kubectl() { + case "$1" in + get) + case "$2" in + pod) + if [[ "$*" == *"-o jsonpath"* ]]; then + if [[ "$*" == *"phase"* ]]; then + echo "Running" + elif [[ "$*" == *"nodeName"* ]]; then + echo "node-1" + elif [[ "$*" == *"startTime"* ]]; then + echo "2024-01-01T00:00:00Z" + elif [[ "$*" == *"ownerReferences"* ]]; then + echo "my-replicaset-abc" + fi + fi + return 0 + ;; + replicaset) + echo "d-scope-123-deploy-456" + return 0 + ;; + deployment) + if [[ "$*" == *"replicas"* ]]; then + echo "3" + elif [[ "$*" == *"readyReplicas"* ]]; then + echo "2" + elif [[ "$*" == *"availableReplicas"* ]]; then + echo "2" + fi + return 0 + ;; + esac + ;; + delete) + echo "pod deleted" + return 0 + ;; + wait) + return 0 + ;; + esac + return 0 + } + export -f kubectl +} + +teardown() { + unset CONTEXT + unset -f kubectl +} + +# ============================================================================= +# Success Case +# ============================================================================= +@test "kill_instances: successfully kills pod with correct logging" { + run bash "$BATS_TEST_DIRNAME/../kill_instances" + + [ "$status" -eq 0 ] + # Start message + assert_contains "$output" "🔍 Starting instance kill operation..." + # Parameter display + assert_contains "$output" "📋 Deployment ID: deploy-456" + assert_contains "$output" "📋 Instance name: my-pod-abc123" + assert_contains "$output" "📋 Scope ID: scope-123" + assert_contains "$output" "📋 Namespace: test-namespace" + # Pod verification + assert_contains "$output" "🔍 Verifying pod exists..." + assert_contains "$output" "📋 Fetching pod details..." + # Delete operation + assert_contains "$output" "📝 Deleting pod my-pod-abc123 with 30s grace period..." + assert_contains "$output" "📝 Waiting for pod termination..." + # Deployment status + assert_contains "$output" "📋 Checking deployment status after pod deletion..." + # Completion + assert_contains "$output" "✨ Instance kill operation completed for my-pod-abc123" +} + +# ============================================================================= +# Error Cases +# ============================================================================= +@test "kill_instances: fails with troubleshooting when deployment_id missing" { + export CONTEXT='{ + "parameters": { + "instance_name": "my-pod-abc123" + } + }' + + run bash "$BATS_TEST_DIRNAME/../kill_instances" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ deployment_id parameter not found" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "Parameter not provided in action request" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Ensure deployment_id is passed in the action parameters" +} + +@test "kill_instances: fails with troubleshooting when instance_name missing" { + export CONTEXT='{ + "parameters": { + "deployment_id": "deploy-456" + } + }' + + run bash "$BATS_TEST_DIRNAME/../kill_instances" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ instance_name parameter not found" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "Parameter not provided in action request" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Ensure instance_name is passed in the action parameters" +} + +@test "kill_instances: fails with troubleshooting when scope_id missing" { + export CONTEXT='{ + "parameters": { + "deployment_id": "deploy-456", + "instance_name": "my-pod-abc123" + } + }' + + run bash "$BATS_TEST_DIRNAME/../kill_instances" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ scope_id not found in context" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "Context missing scope information" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Verify the action is invoked with proper scope context" +} + +@test "kill_instances: fails with troubleshooting when pod not found" { + kubectl() { + case "$1" in + get) + if [[ "$2" == "pod" ]] && [[ "$*" != *"-o"* ]]; then + return 1 + fi + ;; + esac + return 0 + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../kill_instances" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Pod my-pod-abc123 not found in namespace test-namespace" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "Pod was already terminated" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "kubectl get pods" +} + +# ============================================================================= +# Warning Cases +# ============================================================================= +@test "kill_instances: warns when pod belongs to different deployment" { + kubectl() { + case "$1" in + get) + case "$2" in + pod) + if [[ "$*" == *"-o jsonpath"* ]]; then + if [[ "$*" == *"phase"* ]]; then + echo "Running" + elif [[ "$*" == *"nodeName"* ]]; then + echo "node-1" + elif [[ "$*" == *"startTime"* ]]; then + echo "2024-01-01T00:00:00Z" + elif [[ "$*" == *"ownerReferences"* ]]; then + echo "my-replicaset-abc" + fi + fi + return 0 + ;; + replicaset) + echo "d-scope-123-different-deploy" # Different deployment + return 0 + ;; + deployment) + if [[ "$*" == *"replicas"* ]]; then + echo "3" + fi + return 0 + ;; + esac + ;; + delete) + return 0 + ;; + wait) + return 0 + ;; + esac + return 0 + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../kill_instances" + + [ "$status" -eq 0 ] + assert_contains "$output" "⚠️ Pod does not belong to expected deployment d-scope-123-deploy-456" +} + +@test "kill_instances: warns when pod still exists after deletion" { + local delete_called=0 + kubectl() { + case "$1" in + get) + case "$2" in + pod) + if [[ "$*" == *"-o jsonpath"* ]]; then + if [[ "$*" == *"phase"* ]]; then + echo "Terminating" + elif [[ "$*" == *"nodeName"* ]]; then + echo "node-1" + elif [[ "$*" == *"startTime"* ]]; then + echo "2024-01-01T00:00:00Z" + elif [[ "$*" == *"ownerReferences"* ]]; then + echo "my-replicaset-abc" + fi + fi + return 0 # Pod still exists + ;; + replicaset) + echo "d-scope-123-deploy-456" + return 0 + ;; + deployment) + if [[ "$*" == *"replicas"* ]]; then + echo "3" + fi + return 0 + ;; + esac + ;; + delete) + return 0 + ;; + wait) + return 1 # Timeout + ;; + esac + return 0 + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../kill_instances" + + [ "$status" -eq 0 ] + assert_contains "$output" "⚠️ Pod deletion timeout reached" + assert_contains "$output" "⚠️ Pod still exists after deletion attempt" +} diff --git a/k8s/deployment/tests/networking/gateway/ingress/route_traffic.bats b/k8s/deployment/tests/networking/gateway/ingress/route_traffic.bats new file mode 100644 index 00000000..421e58ac --- /dev/null +++ b/k8s/deployment/tests/networking/gateway/ingress/route_traffic.bats @@ -0,0 +1,161 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/networking/gateway/ingress/route_traffic +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export OUTPUT_DIR="$BATS_TEST_TMPDIR" + export SCOPE_ID="scope-123" + export DEPLOYMENT_ID="deploy-456" + export INGRESS_VISIBILITY="internet-facing" + + export CONTEXT='{ + "scope": { + "slug": "my-app", + "domain": "app.example.com" + }, + "deployment": { + "id": "deploy-456" + } + }' + + # Create a mock template + MOCK_TEMPLATE="$BATS_TEST_TMPDIR/ingress-template.yaml" + echo 'apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ .scope.slug }}-ingress' > "$MOCK_TEMPLATE" + export MOCK_TEMPLATE + + # Mock gomplate + gomplate() { + local context_file="" + local template_file="" + local out_file="" + while [[ $# -gt 0 ]]; do + case "$1" in + -c) context_file="$2"; shift 2 ;; + --file) template_file="$2"; shift 2 ;; + --out) out_file="$2"; shift 2 ;; + *) shift ;; + esac + done + # Write mock output + echo "# Generated ingress from $template_file" > "$out_file" + return 0 + } + export -f gomplate +} + +teardown() { + unset CONTEXT + unset -f gomplate +} + +# ============================================================================= +# Success Case +# ============================================================================= +@test "ingress/route_traffic: succeeds with all expected logging" { + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/ingress/route_traffic" --template="$MOCK_TEMPLATE" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Creating internet-facing ingress..." + assert_contains "$output" "📋 Scope: scope-123 | Deployment: deploy-456" + assert_contains "$output" "📋 Template: $MOCK_TEMPLATE" + assert_contains "$output" "📋 Output: $OUTPUT_DIR/ingress-scope-123-deploy-456.yaml" + assert_contains "$output" "📝 Building ingress template..." + assert_contains "$output" "✅ Ingress template created: $OUTPUT_DIR/ingress-scope-123-deploy-456.yaml" +} + +@test "ingress/route_traffic: displays correct visibility type for internal" { + export INGRESS_VISIBILITY="internal" + + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/ingress/route_traffic" --template="$MOCK_TEMPLATE" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Creating internal ingress..." +} + +@test "ingress/route_traffic: generates ingress file and cleans up context" { + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/ingress/route_traffic" --template="$MOCK_TEMPLATE" + + [ "$status" -eq 0 ] + [ -f "$OUTPUT_DIR/ingress-$SCOPE_ID-$DEPLOYMENT_ID.yaml" ] + # Uses context-$SCOPE_ID.json (no deployment ID) unlike parent + [ ! -f "$OUTPUT_DIR/context-$SCOPE_ID.json" ] +} + +# ============================================================================= +# Error Cases +# ============================================================================= +@test "ingress/route_traffic: fails with full troubleshooting when template missing" { + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/ingress/route_traffic" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Template argument is required" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "- Missing --template= argument" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "- Provide template: --template=/path/to/template.yaml" +} + +@test "ingress/route_traffic: fails with full troubleshooting when gomplate fails" { + gomplate() { + echo "template: template.yaml:5: function 'undefined' not defined" >&2 + return 1 + } + export -f gomplate + + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/ingress/route_traffic" --template="$MOCK_TEMPLATE" + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Creating internet-facing ingress..." + assert_contains "$output" "📝 Building ingress template..." + assert_contains "$output" "❌ Failed to build ingress template" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "- Template file does not exist or is invalid" + assert_contains "$output" "- Scope attributes may be missing" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "- Verify template exists: ls -la $MOCK_TEMPLATE" + assert_contains "$output" "- Verify that your scope has all required attributes" +} + +@test "ingress/route_traffic: cleans up context file on gomplate failure" { + gomplate() { + return 1 + } + export -f gomplate + + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/ingress/route_traffic" --template="$MOCK_TEMPLATE" + + [ "$status" -eq 1 ] + [ ! -f "$OUTPUT_DIR/context-$SCOPE_ID.json" ] +} + +# ============================================================================= +# Integration Tests +# ============================================================================= +@test "ingress/route_traffic: parses template argument correctly" { + CAPTURED_TEMPLATE="" + gomplate() { + while [[ $# -gt 0 ]]; do + case "$1" in + --file) CAPTURED_TEMPLATE="$2"; shift 2 ;; + --out) echo "# Generated" > "$2"; shift 2 ;; + *) shift ;; + esac + done + return 0 + } + export -f gomplate + export CAPTURED_TEMPLATE + + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/ingress/route_traffic" --template="$MOCK_TEMPLATE" + + [ "$status" -eq 0 ] +} diff --git a/k8s/deployment/tests/networking/gateway/rollback_traffic.bats b/k8s/deployment/tests/networking/gateway/rollback_traffic.bats new file mode 100644 index 00000000..78793a08 --- /dev/null +++ b/k8s/deployment/tests/networking/gateway/rollback_traffic.bats @@ -0,0 +1,121 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/networking/gateway/rollback_traffic - traffic rollback +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export SERVICE_PATH="$PROJECT_ROOT/k8s" + export DEPLOYMENT_ID="deploy-new-123" + export OUTPUT_DIR="$BATS_TEST_TMPDIR" + export SCOPE_ID="scope-123" + export INGRESS_VISIBILITY="internet-facing" + export TEMPLATE="$BATS_TEST_TMPDIR/template.yaml" + + export CONTEXT='{ + "scope": { + "slug": "my-app", + "current_active_deployment": "deploy-old-456" + }, + "deployment": { + "id": "deploy-new-123" + } + }' + + # Create a mock template + echo 'kind: Ingress' > "$TEMPLATE" + + # Mock gomplate + gomplate() { + local out_file="" + while [[ $# -gt 0 ]]; do + case "$1" in + --out) out_file="$2"; shift 2 ;; + *) shift ;; + esac + done + echo "# Generated" > "$out_file" + return 0 + } + export -f gomplate +} + +teardown() { + unset CONTEXT + unset -f gomplate +} + +# ============================================================================= +# Success Case +# ============================================================================= +@test "rollback_traffic: succeeds with all expected logging" { + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/rollback_traffic" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Rolling back traffic to previous deployment..." + assert_contains "$output" "📋 Current deployment: deploy-new-123" + assert_contains "$output" "📋 Rollback target: deploy-old-456" + assert_contains "$output" "📝 Creating ingress for rollback deployment..." + assert_contains "$output" "🔍 Creating internet-facing ingress..." + assert_contains "$output" "✅ Traffic rollback configuration created" +} + +@test "rollback_traffic: creates ingress for old deployment" { + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/rollback_traffic" + + [ "$status" -eq 0 ] + [ -f "$OUTPUT_DIR/ingress-$SCOPE_ID-deploy-old-456.yaml" ] +} + +# ============================================================================= +# Error Cases +# ============================================================================= +@test "rollback_traffic: fails with full troubleshooting when route_traffic fails" { + gomplate() { + return 1 + } + export -f gomplate + + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/rollback_traffic" + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Rolling back traffic to previous deployment..." + assert_contains "$output" "📝 Creating ingress for rollback deployment..." + assert_contains "$output" "❌ Failed to build ingress template" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "🔧 How to fix:" +} + +# ============================================================================= +# Integration Tests +# ============================================================================= +@test "rollback_traffic: calls route_traffic with blue deployment id in context" { + local mock_dir="$BATS_TEST_TMPDIR/mock_service" + mkdir -p "$mock_dir/deployment/networking/gateway" + + cat > "$mock_dir/deployment/networking/gateway/route_traffic" << 'MOCK_SCRIPT' +#!/bin/bash +echo "CAPTURED_DEPLOYMENT_ID=$DEPLOYMENT_ID" >> "$BATS_TEST_TMPDIR/captured_values" +echo "CAPTURED_CONTEXT_DEPLOYMENT_ID=$(echo "$CONTEXT" | jq -r .deployment.id)" >> "$BATS_TEST_TMPDIR/captured_values" +MOCK_SCRIPT + chmod +x "$mock_dir/deployment/networking/gateway/route_traffic" + + run bash -c " + export SERVICE_PATH='$mock_dir' + export DEPLOYMENT_ID='$DEPLOYMENT_ID' + export CONTEXT='$CONTEXT' + export BATS_TEST_TMPDIR='$BATS_TEST_TMPDIR' + source '$PROJECT_ROOT/k8s/deployment/networking/gateway/rollback_traffic' + " + + [ "$status" -eq 0 ] + + # Verify route_traffic was called with blue deployment id + source "$BATS_TEST_TMPDIR/captured_values" + assert_equal "$CAPTURED_DEPLOYMENT_ID" "deploy-old-456" + assert_equal "$CAPTURED_CONTEXT_DEPLOYMENT_ID" "deploy-old-456" +} diff --git a/k8s/deployment/tests/networking/gateway/route_traffic.bats b/k8s/deployment/tests/networking/gateway/route_traffic.bats new file mode 100644 index 00000000..8736d271 --- /dev/null +++ b/k8s/deployment/tests/networking/gateway/route_traffic.bats @@ -0,0 +1,148 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/networking/gateway/route_traffic - ingress creation +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export OUTPUT_DIR="$BATS_TEST_TMPDIR" + export SCOPE_ID="scope-123" + export DEPLOYMENT_ID="deploy-456" + export INGRESS_VISIBILITY="internet-facing" + export TEMPLATE="$BATS_TEST_TMPDIR/template.yaml" + + export CONTEXT='{ + "scope": { + "slug": "my-app", + "domain": "app.example.com" + }, + "deployment": { + "id": "deploy-456" + } + }' + + # Create a mock template + echo 'apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ .scope.slug }}-ingress' > "$TEMPLATE" + + # Mock gomplate + gomplate() { + local context_file="" + local template_file="" + local out_file="" + while [[ $# -gt 0 ]]; do + case "$1" in + -c) context_file="$2"; shift 2 ;; + --file) template_file="$2"; shift 2 ;; + --out) out_file="$2"; shift 2 ;; + *) shift ;; + esac + done + # Write mock output + echo "# Generated ingress" > "$out_file" + return 0 + } + export -f gomplate +} + +teardown() { + unset CONTEXT + unset -f gomplate +} + +# ============================================================================= +# Success Case +# ============================================================================= +@test "route_traffic: succeeds with all expected logging" { + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/route_traffic" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Creating internet-facing ingress..." + assert_contains "$output" "📋 Scope: scope-123 | Deployment: deploy-456" + assert_contains "$output" "📋 Template: $TEMPLATE" + assert_contains "$output" "📋 Output: $OUTPUT_DIR/ingress-scope-123-deploy-456.yaml" + assert_contains "$output" "📝 Building ingress template..." + assert_contains "$output" "✅ Ingress template created: $OUTPUT_DIR/ingress-scope-123-deploy-456.yaml" +} + +@test "route_traffic: displays correct visibility type for internal" { + export INGRESS_VISIBILITY="internal" + + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/route_traffic" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Creating internal ingress..." +} + +@test "route_traffic: generates ingress file and cleans up context" { + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/route_traffic" + + [ "$status" -eq 0 ] + [ -f "$OUTPUT_DIR/ingress-$SCOPE_ID-$DEPLOYMENT_ID.yaml" ] + [ ! -f "$OUTPUT_DIR/context-$SCOPE_ID-$DEPLOYMENT_ID.json" ] +} + +# ============================================================================= +# Error Cases +# ============================================================================= +@test "route_traffic: fails with full troubleshooting when gomplate fails" { + gomplate() { + echo "template: template.yaml:5: function 'undefined' not defined" >&2 + return 1 + } + export -f gomplate + + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/route_traffic" + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Creating internet-facing ingress..." + assert_contains "$output" "📝 Building ingress template..." + assert_contains "$output" "❌ Failed to build ingress template" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "- Template file does not exist or is invalid" + assert_contains "$output" "- Scope attributes may be missing" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "- Verify template exists: ls -la $TEMPLATE" + assert_contains "$output" "- Verify that your scope has all required attributes" +} + +@test "route_traffic: cleans up context file on gomplate failure" { + gomplate() { + return 1 + } + export -f gomplate + + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/route_traffic" + + [ "$status" -eq 1 ] + [ ! -f "$OUTPUT_DIR/context-$SCOPE_ID-$DEPLOYMENT_ID.json" ] +} + +# ============================================================================= +# Integration Tests +# ============================================================================= +@test "route_traffic: calls gomplate with correct context file" { + CAPTURED_CONTEXT="" + gomplate() { + while [[ $# -gt 0 ]]; do + case "$1" in + -c) CAPTURED_CONTEXT="$2"; shift 2 ;; + --out) echo "# Generated" > "$2"; shift 2 ;; + *) shift ;; + esac + done + return 0 + } + export -f gomplate + export CAPTURED_CONTEXT + + run bash "$PROJECT_ROOT/k8s/deployment/networking/gateway/route_traffic" + + [ "$status" -eq 0 ] +} diff --git a/k8s/deployment/tests/notify_active_domains.bats b/k8s/deployment/tests/notify_active_domains.bats new file mode 100644 index 00000000..35a284ac --- /dev/null +++ b/k8s/deployment/tests/notify_active_domains.bats @@ -0,0 +1,85 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/notify_active_domains - domain activation +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export CONTEXT='{ + "scope": { + "domains": [ + {"id": "dom-1", "name": "app.example.com"}, + {"id": "dom-2", "name": "api.example.com"} + ] + } + }' + + np() { + echo "np $*" + return 0 + } + export -f np +} + +teardown() { + unset CONTEXT + unset -f np +} + +# ============================================================================= +# Success Case +# ============================================================================= +@test "notify_active_domains: activates domains with correct logging" { + run source "$BATS_TEST_DIRNAME/../notify_active_domains" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Checking for custom domains to activate..." + assert_contains "$output" "📋 Found 2 custom domain(s) to activate" + assert_contains "$output" "📝 Activating custom domain: app.example.com..." + assert_contains "$output" "✅ Custom domain activated: app.example.com" + assert_contains "$output" "📝 Activating custom domain: api.example.com..." + assert_contains "$output" "✅ Custom domain activated: api.example.com" + assert_contains "$output" "✨ Custom domain activation completed" +} + +# ============================================================================= +# No Domains Case +# ============================================================================= +@test "notify_active_domains: skips when no domains configured" { + export CONTEXT='{"scope": {"domains": []}}' + + run source "$BATS_TEST_DIRNAME/../notify_active_domains" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Checking for custom domains to activate..." + assert_contains "$output" "📋 No domains configured, skipping activation" +} + +# ============================================================================= +# Failure Case +# ============================================================================= +@test "notify_active_domains: shows error output and troubleshooting when np fails" { + np() { + echo '{"error": "scope write error: request failed with status 403: Forbidden"}' + return 1 # Simulate failure + } + export -f np + + run source "$BATS_TEST_DIRNAME/../notify_active_domains" + + [ "$status" -eq 0 ] # Script continues with other domains + assert_contains "$output" "❌ Failed to activate custom domain: app.example.com" + assert_contains "$output" '📋 Error: {"error": "scope write error: request failed with status 403: Forbidden"}' + assert_contains "$output" "scope write error" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "Domain ID dom-1 may not exist" + assert_contains "$output" "Insufficient permissions (403 Forbidden)" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Verify domain exists: np scope domain get --id dom-1" + assert_contains "$output" "Check API token permissions" +} + diff --git a/k8s/deployment/tests/print_failed_deployment_hints.bats b/k8s/deployment/tests/print_failed_deployment_hints.bats new file mode 100644 index 00000000..14587515 --- /dev/null +++ b/k8s/deployment/tests/print_failed_deployment_hints.bats @@ -0,0 +1,51 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/print_failed_deployment_hints - error hints display +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export CONTEXT='{ + "scope": { + "name": "my-app", + "dimensions": "production", + "capabilities": { + "health_check": { + "path": "/health" + }, + "ram_memory": 512 + } + } + }' +} + +teardown() { + unset CONTEXT +} + +# ============================================================================= +# Hints Display Test +# ============================================================================= +@test "print_failed_deployment_hints: displays complete troubleshooting hints" { + run bash "$BATS_TEST_DIRNAME/../print_failed_deployment_hints" + + [ "$status" -eq 0 ] + # Main header + assert_contains "$output" "⚠️ Application Startup Issue Detected" + # Possible causes + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "Your application was unable to start" + # How to fix section + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "port 8080" + assert_contains "$output" "/health" + assert_contains "$output" "Application Logs" + assert_contains "$output" "512Mi" + assert_contains "$output" "Environment Variables" + assert_contains "$output" "my-app" + assert_contains "$output" "production" +} diff --git a/k8s/deployment/tests/publish_alb_metrics.bats b/k8s/deployment/tests/publish_alb_metrics.bats new file mode 100644 index 00000000..48f85d03 --- /dev/null +++ b/k8s/deployment/tests/publish_alb_metrics.bats @@ -0,0 +1,279 @@ +#!/usr/bin/env bats + +setup() { + PROJECT_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + + export SCRIPT="$PROJECT_ROOT/k8s/deployment/publish_alb_metrics" + + # Default context + export CONTEXT='{"alb_name":"k8s-nullplatform-internet-facing","region":"us-east-1"}' + + # Default config + export ALB_METRICS_PUBLISH_ENABLED="true" + export ALB_METRICS_PUBLISH_TARGET="cloudwatch" + + # Track calls + export AWS_CALLS_LOG="$BATS_TEST_TMPDIR/aws_calls.log" + export CURL_CALLS_LOG="$BATS_TEST_TMPDIR/curl_calls.log" + + # Mock aws CLI + aws() { + echo "$*" >> "$AWS_CALLS_LOG" + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/k8s-nullplatform-internet-facing/abc123" + ;; + *"describe-listeners"*) + echo '{"Listeners":[{"ListenerArn":"arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/abc/123"}]}' + ;; + *"describe-rules"*) + echo '{"Rules":[{"IsDefault":true},{"IsDefault":false},{"IsDefault":false},{"IsDefault":false}]}' + ;; + *"describe-target-groups"*) + echo '{"TargetGroups":[{},{},{},{},{}]}' + ;; + *"put-metric-data"*) + return 0 + ;; + esac + } + export -f aws + + # Mock curl + curl() { + echo "$*" >> "$CURL_CALLS_LOG" + echo "202" + } + export -f curl + + # Source real get_config_value (uses CONTEXT + env vars already set) + source "$PROJECT_ROOT/k8s/utils/get_config_value" + export -f get_config_value + + # Mock log function (from k8s/logging) + log() { + local level="${1:-info}" + local message="${2:-}" + echo "$message" + } + export -f log +} + +run_script() { + run bash -c 'source "$SCRIPT"' +} + +# ============================================================================= +# Disabled / skipped scenarios +# ============================================================================= + +@test "skips silently when ALB_METRICS_PUBLISH_ENABLED is false" { + export ALB_METRICS_PUBLISH_ENABLED="false" + run_script + assert_equal "$status" "0" + assert_equal "$output" "" +} + +@test "skips silently when ALB_METRICS_PUBLISH_ENABLED is not set" { + unset ALB_METRICS_PUBLISH_ENABLED + run_script + assert_equal "$status" "0" + assert_equal "$output" "" +} + +# ============================================================================= +# Error scenarios +# ============================================================================= + +@test "warns when ALB name not found in context" { + export CONTEXT='{"region":"us-east-1"}' + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: ALB name not found in context" +} + +@test "warns when ALB name is null in context" { + export CONTEXT='{"alb_name":null,"region":"us-east-1"}' + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: ALB name not found in context" +} + +@test "warns when ALB not found in AWS" { + aws() { + case "$*" in + *"describe-load-balancers"*) echo "None" ;; + esac + } + export -f aws + + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: could not find ALB" +} + +@test "warns when describe-load-balancers fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) return 1 ;; + esac + } + export -f aws + + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: could not find ALB" +} + +@test "warns when describe-listeners fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) echo "arn:aws:elasticloadbalancing:us-east-1:123:lb/abc" ;; + *"describe-listeners"*) return 1 ;; + esac + } + export -f aws + + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: could not retrieve listeners" +} + +# ============================================================================= +# CloudWatch success +# ============================================================================= + +@test "publishes to CloudWatch with correct rule and target group counts" { + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics published to CloudWatch (rules: 3, target_groups: 5)" +} + +@test "CloudWatch put-metric-data uses correct namespace and dimensions" { + run_script + local calls=$(cat "$AWS_CALLS_LOG") + assert_contains "$calls" "nullplatform/ApplicationELB" + assert_contains "$calls" "k8s-nullplatform-internet-facing" + assert_contains "$calls" "RuleCount" + assert_contains "$calls" "TargetGroupCount" +} + +@test "warns when CloudWatch put-metric-data fails" { + aws() { + echo "$*" >> "$AWS_CALLS_LOG" + case "$*" in + *"describe-load-balancers"*) echo "arn:aws:elasticloadbalancing:us-east-1:123:lb/abc" ;; + *"describe-listeners"*) echo '{"Listeners":[{"ListenerArn":"arn:listener/123"}]}' ;; + *"describe-rules"*) echo '{"Rules":[{"IsDefault":true}]}' ;; + *"describe-target-groups"*) echo '{"TargetGroups":[]}' ;; + *"put-metric-data"*) return 1 ;; + esac + } + export -f aws + + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: failed to publish to CloudWatch" +} + +# ============================================================================= +# Datadog success +# ============================================================================= + +@test "publishes to Datadog with correct counts" { + export ALB_METRICS_PUBLISH_TARGET="datadog" + export DATADOG_API_KEY="test-api-key" + export DATADOG_SITE="datadoghq.com" + + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics published to Datadog (rules: 3, target_groups: 5)" +} + +@test "Datadog request uses correct endpoint and metric names" { + export ALB_METRICS_PUBLISH_TARGET="datadog" + export DATADOG_API_KEY="test-api-key" + export DATADOG_SITE="datadoghq.eu" + + run_script + local calls=$(cat "$CURL_CALLS_LOG") + assert_contains "$calls" "https://api.datadoghq.eu/api/v2/series" + assert_contains "$calls" "nullplatform.applicationelb.rule_count" + assert_contains "$calls" "nullplatform.applicationelb.target_group_count" + assert_contains "$calls" "alb_name:k8s-nullplatform-internet-facing" +} + +@test "warns when DATADOG_API_KEY not set" { + export ALB_METRICS_PUBLISH_TARGET="datadog" + unset DATADOG_API_KEY + + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: DATADOG_API_KEY not set" +} + +@test "warns when Datadog returns non-202" { + export ALB_METRICS_PUBLISH_TARGET="datadog" + export DATADOG_API_KEY="test-api-key" + + curl() { + echo "403" + } + export -f curl + + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: failed to publish to Datadog (HTTP 403)" +} + +# ============================================================================= +# Unknown target +# ============================================================================= + +@test "warns on unknown metrics target" { + export ALB_METRICS_PUBLISH_TARGET="prometheus" + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: unknown target 'prometheus'" +} + +# ============================================================================= +# Rule counting logic +# ============================================================================= + +@test "excludes default rules from count" { + aws() { + echo "$*" >> "$AWS_CALLS_LOG" + case "$*" in + *"describe-load-balancers"*) echo "arn:aws:elasticloadbalancing:us-east-1:123:lb/abc" ;; + *"describe-listeners"*) echo '{"Listeners":[{"ListenerArn":"arn:listener/123"}]}' ;; + *"describe-rules"*) echo '{"Rules":[{"IsDefault":true},{"IsDefault":false}]}' ;; + *"describe-target-groups"*) echo '{"TargetGroups":[{}]}' ;; + *"put-metric-data"*) return 0 ;; + esac + } + export -f aws + + run_script + assert_contains "$output" "rules: 1, target_groups: 1" +} + +@test "counts rules across multiple listeners" { + aws() { + echo "$*" >> "$AWS_CALLS_LOG" + case "$*" in + *"describe-load-balancers"*) echo "arn:aws:elasticloadbalancing:us-east-1:123:lb/abc" ;; + *"describe-listeners"*) echo '{"Listeners":[{"ListenerArn":"arn:listener/1"},{"ListenerArn":"arn:listener/2"}]}' ;; + *"describe-rules"*"listener/1"*) echo '{"Rules":[{"IsDefault":true},{"IsDefault":false},{"IsDefault":false}]}' ;; + *"describe-rules"*"listener/2"*) echo '{"Rules":[{"IsDefault":true},{"IsDefault":false}]}' ;; + *"describe-rules"*) echo '{"Rules":[{"IsDefault":true},{"IsDefault":false},{"IsDefault":false}]}' ;; + *"describe-target-groups"*) echo '{"TargetGroups":[{},{}]}' ;; + *"put-metric-data"*) return 0 ;; + esac + } + export -f aws + + run_script + assert_contains "$output" "rules: 3, target_groups: 2" +} diff --git a/k8s/deployment/tests/scale_deployments.bats b/k8s/deployment/tests/scale_deployments.bats new file mode 100644 index 00000000..8548622c --- /dev/null +++ b/k8s/deployment/tests/scale_deployments.bats @@ -0,0 +1,243 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/scale_deployments - scale blue/green deployments +# ============================================================================= + +setup() { + # Get project root directory + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + + # Source assertions + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + # Set required environment variables + export SERVICE_PATH="$PROJECT_ROOT/k8s" + export K8S_NAMESPACE="test-namespace" + export SCOPE_ID="scope-123" + export DEPLOYMENT_ID="deploy-new" + export DEPLOY_STRATEGY="rolling" + export DEPLOYMENT_MAX_WAIT_IN_SECONDS=60 + + # Base CONTEXT with required fields + export CONTEXT='{ + "scope": { + "id": "scope-123", + "current_active_deployment": "deploy-old" + }, + "green_replicas": "5", + "blue_replicas": "3" + }' + + # Track kubectl calls + export KUBECTL_CALLS="" + + # Mock kubectl + kubectl() { + KUBECTL_CALLS="$KUBECTL_CALLS|$*" + return 0 + } + export -f kubectl + + # Mock wait_blue_deployment_active + export NP_OUTPUT_DIR="$(mktemp -d)" + mkdir -p "$SERVICE_PATH/deployment" + + # Create a mock wait_blue_deployment_active that captures env vars before they're unset + cat > "$NP_OUTPUT_DIR/wait_blue_deployment_active" << 'EOF' +#!/bin/bash +echo "Mock: wait_blue_deployment_active called" +# Capture the values to global variables so they persist after unset +CAPTURED_TIMEOUT="$TIMEOUT" +CAPTURED_SKIP_DEPLOYMENT_STATUS_CHECK="$SKIP_DEPLOYMENT_STATUS_CHECK" +export CAPTURED_TIMEOUT CAPTURED_SKIP_DEPLOYMENT_STATUS_CHECK +EOF + chmod +x "$NP_OUTPUT_DIR/wait_blue_deployment_active" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + unset KUBECTL_CALLS + unset -f kubectl +} + +# Helper to run scale_deployments with mocked wait +run_scale_deployments() { + # Override the sourced script path + local script_content=$(cat "$PROJECT_ROOT/k8s/deployment/scale_deployments") + # Replace the source line with our mock + script_content=$(echo "$script_content" | sed "s|source \"\$SERVICE_PATH/deployment/wait_blue_deployment_active\"|source \"$NP_OUTPUT_DIR/wait_blue_deployment_active\"|") + + eval "$script_content" +} + +# ============================================================================= +# Strategy Detection Tests +# ============================================================================= +@test "scale_deployments: only runs for rolling strategy" { + export DEPLOY_STRATEGY="rolling" + + run_scale_deployments + + assert_contains "$KUBECTL_CALLS" "scale deployment" +} + +@test "scale_deployments: skips scaling for blue-green strategy" { + export DEPLOY_STRATEGY="blue-green" + export KUBECTL_CALLS="" + + run_scale_deployments + + # Should not contain scale commands + [[ "$KUBECTL_CALLS" != *"scale deployment"* ]] +} + +@test "scale_deployments: skips scaling for unknown strategy" { + export DEPLOY_STRATEGY="unknown" + export KUBECTL_CALLS="" + + run_scale_deployments + + [[ "$KUBECTL_CALLS" != *"scale deployment"* ]] +} + +# ============================================================================= +# Green Deployment Scaling Tests +# ============================================================================= +@test "scale_deployments: scales green deployment to green_replicas" { + run_scale_deployments + + assert_contains "$KUBECTL_CALLS" "scale deployment d-scope-123-deploy-new" + assert_contains "$KUBECTL_CALLS" "--replicas=5" +} + +@test "scale_deployments: constructs correct green deployment name" { + run_scale_deployments + + assert_contains "$KUBECTL_CALLS" "d-scope-123-deploy-new" +} + +# ============================================================================= +# Blue Deployment Scaling Tests +# ============================================================================= +@test "scale_deployments: scales blue deployment to blue_replicas" { + run_scale_deployments + + assert_contains "$KUBECTL_CALLS" "scale deployment d-scope-123-deploy-old" + assert_contains "$KUBECTL_CALLS" "--replicas=3" +} + +@test "scale_deployments: constructs correct blue deployment name" { + run_scale_deployments + + assert_contains "$KUBECTL_CALLS" "d-scope-123-deploy-old" +} + +# ============================================================================= +# Green and Blue Scaling Tests +# ============================================================================= +@test "scale_deployments: scales green and blue with correct commands" { + export CONTEXT=$(echo "$CONTEXT" | jq '.green_replicas = "7" | .blue_replicas = "2" | .scope.current_active_deployment = "deploy-active-123"') + export K8S_NAMESPACE="custom-namespace" + + run_scale_deployments + + assert_contains "$KUBECTL_CALLS" "scale deployment d-scope-123-deploy-new -n custom-namespace --replicas=7" + + assert_contains "$KUBECTL_CALLS" "scale deployment d-scope-123-deploy-active-123 -n custom-namespace --replicas=2" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "scale_deployments: fails when green deployment scale fails" { + kubectl() { + if [[ "$*" == *"deploy-new"* ]]; then + return 1 # Fail for green deployment + fi + return 0 + } + export -f kubectl + + run bash -c "source '$PROJECT_ROOT/testing/assertions.sh'; \ + export SERVICE_PATH='$SERVICE_PATH' K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' \ + DEPLOYMENT_ID='$DEPLOYMENT_ID' DEPLOY_STRATEGY='$DEPLOY_STRATEGY' CONTEXT='$CONTEXT'; \ + source '$PROJECT_ROOT/k8s/deployment/scale_deployments'" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Failed to scale green deployment" +} + +@test "scale_deployments: fails when blue deployment scale fails" { + kubectl() { + if [[ "$*" == *"deploy-old"* ]]; then + return 1 # Fail for blue deployment + fi + return 0 + } + export -f kubectl + + run bash -c "source '$PROJECT_ROOT/testing/assertions.sh'; \ + export SERVICE_PATH='$SERVICE_PATH' K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' \ + DEPLOYMENT_ID='$DEPLOYMENT_ID' DEPLOY_STRATEGY='$DEPLOY_STRATEGY' CONTEXT='$CONTEXT'; \ + source '$PROJECT_ROOT/k8s/deployment/scale_deployments'" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Failed to scale blue deployment" +} + +# ============================================================================= +# Wait Configuration Tests +# ============================================================================= +@test "scale_deployments: sets TIMEOUT from DEPLOYMENT_MAX_WAIT_IN_SECONDS" { + export DEPLOYMENT_MAX_WAIT_IN_SECONDS=120 + + run_scale_deployments + + assert_equal "$CAPTURED_TIMEOUT" "120" +} + +@test "scale_deployments: defaults TIMEOUT to 600 seconds" { + unset DEPLOYMENT_MAX_WAIT_IN_SECONDS + + run_scale_deployments + + assert_equal "$CAPTURED_TIMEOUT" "600" +} + +@test "scale_deployments: sets SKIP_DEPLOYMENT_STATUS_CHECK=true" { + run_scale_deployments + + assert_equal "$CAPTURED_SKIP_DEPLOYMENT_STATUS_CHECK" "true" +} + +# ============================================================================= +# Cleanup Tests +# ============================================================================= +@test "scale_deployments: unsets TIMEOUT after wait" { + run_scale_deployments + + # After the script runs, TIMEOUT should be unset + [ -z "$TIMEOUT" ] +} + +@test "scale_deployments: unsets SKIP_DEPLOYMENT_STATUS_CHECK after wait" { + run_scale_deployments + + [ -z "$SKIP_DEPLOYMENT_STATUS_CHECK" ] +} + +# ============================================================================= +# Order of Operations Tests +# ============================================================================= +@test "scale_deployments: scales green before blue" { + run_scale_deployments + + # Find positions of scale commands + local green_pos=$(echo "$KUBECTL_CALLS" | grep -o ".*deploy-new" | wc -c) + local blue_pos=$(echo "$KUBECTL_CALLS" | grep -o ".*deploy-old" | wc -c) + + # Green should appear first + [ "$green_pos" -lt "$blue_pos" ] +} diff --git a/k8s/deployment/tests/validate_alb_target_group_capacity.bats b/k8s/deployment/tests/validate_alb_target_group_capacity.bats new file mode 100644 index 00000000..08d1f28c --- /dev/null +++ b/k8s/deployment/tests/validate_alb_target_group_capacity.bats @@ -0,0 +1,384 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for validate_alb_target_group_capacity +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + source "$PROJECT_ROOT/k8s/utils/get_config_value" + + export SCRIPT="$PROJECT_ROOT/k8s/deployment/validate_alb_target_group_capacity" + + export ALB_NAME="k8s-nullplatform-internet-facing" + export REGION="us-east-1" + export ALB_MAX_TARGET_GROUPS="98" + export DNS_TYPE="route53" + + # Base CONTEXT + export CONTEXT='{ + "providers": {} + }' + + # Mock aws - default: ALB with 40 target groups + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/k8s-nullplatform-internet-facing/abc123" + return 0 + ;; + *"describe-target-groups"*) + echo "40" + return 0 + ;; + esac + } + export -f aws +} + +teardown() { + unset -f aws +} + +# ============================================================================= +# Success flow +# ============================================================================= +@test "validate_alb_target_group_capacity: success when under capacity" { + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "🔍 Validating ALB target group capacity for 'k8s-nullplatform-internet-facing'..." + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 40 target groups (max: 98)" + assert_contains "$output" "✅ ALB target group capacity validated: 40/98" +} + +@test "validate_alb_target_group_capacity: displays debug info" { + export LOG_LEVEL="debug" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB: k8s-nullplatform-internet-facing | Region: us-east-1 | Max target groups: 98" + assert_contains "$output" "📋 ALB ARN: arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/k8s-nullplatform-internet-facing/abc123" +} + +# ============================================================================= +# Capacity exceeded +# ============================================================================= +@test "validate_alb_target_group_capacity: fails when at capacity" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-target-groups"*) + echo "98" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached target group capacity: 98/98" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "Too many services or deployments are attached to this ALB" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Remove unused deployments or services from the ALB" + assert_contains "$output" "Increase ALB_MAX_TARGET_GROUPS in values.yaml or scope-configurations provider (AWS limit is 100)" + assert_contains "$output" "Request an AWS service quota increase for target groups per ALB" + assert_contains "$output" "Consider using a separate ALB for additional deployments" +} + +@test "validate_alb_target_group_capacity: fails when over capacity" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-target-groups"*) + echo "100" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached target group capacity: 100/98" +} + +# ============================================================================= +# Configuration via get_config_value +# ============================================================================= +@test "validate_alb_target_group_capacity: uses default ALB_MAX_TARGET_GROUPS of 98" { + unset ALB_MAX_TARGET_GROUPS + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 40 target groups (max: 98)" +} + +@test "validate_alb_target_group_capacity: ALB_MAX_TARGET_GROUPS from env var" { + export ALB_MAX_TARGET_GROUPS="30" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached target group capacity: 40/30" +} + +@test "validate_alb_target_group_capacity: ALB_MAX_TARGET_GROUPS from scope-configurations provider" { + export CONTEXT='{"providers":{"scope-configurations":{"networking":{"alb_max_target_groups":"30"}}}}' + export ALB_MAX_TARGET_GROUPS="98" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached target group capacity: 40/30" +} + +@test "validate_alb_target_group_capacity: ALB_MAX_TARGET_GROUPS from container-orchestration provider" { + export CONTEXT='{"providers":{"container-orchestration":{"balancer":{"alb_max_target_groups":"30"}}}}' + export ALB_MAX_TARGET_GROUPS="98" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached target group capacity: 40/30" +} + +@test "validate_alb_target_group_capacity: scope-configurations takes priority over container-orchestration" { + export CONTEXT='{"providers":{"scope-configurations":{"networking":{"alb_max_target_groups":"100"}},"container-orchestration":{"balancer":{"alb_max_target_groups":"30"}}}}' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 40 target groups (max: 100)" +} + +@test "validate_alb_target_group_capacity: provider takes priority over env var" { + export CONTEXT='{"providers":{"scope-configurations":{"networking":{"alb_max_target_groups":"100"}}}}' + export ALB_MAX_TARGET_GROUPS="30" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 40 target groups (max: 100)" + assert_contains "$output" "✅ ALB target group capacity validated: 40/100" +} + +# ============================================================================= +# AWS API errors +# ============================================================================= +@test "validate_alb_target_group_capacity: fails when describe-load-balancers fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "An error occurred (LoadBalancerNotFound)" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ Failed to find load balancer 'k8s-nullplatform-internet-facing' in region 'us-east-1'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The load balancer may not exist or the agent lacks permissions" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Verify the ALB exists: aws elbv2 describe-load-balancers --names k8s-nullplatform-internet-facing --region us-east-1" + assert_contains "$output" "Check IAM permissions for elbv2:DescribeLoadBalancers" +} + +@test "validate_alb_target_group_capacity: fails when ALB ARN is None" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "None" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ Load balancer 'k8s-nullplatform-internet-facing' not found in region 'us-east-1'" +} + +@test "validate_alb_target_group_capacity: fails when describe-target-groups fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-target-groups"*) + echo "Access Denied" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ Failed to describe target groups for ALB 'k8s-nullplatform-internet-facing'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The agent may lack permissions to describe target groups" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Check IAM permissions for elbv2:DescribeTargetGroups" +} + +# ============================================================================= +# Edge cases +# ============================================================================= +@test "validate_alb_target_group_capacity: handles zero target groups" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-target-groups"*) + echo "0" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 0 target groups (max: 98)" + assert_contains "$output" "✅ ALB target group capacity validated: 0/98" +} + +@test "validate_alb_target_group_capacity: passes at exactly one below capacity" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-target-groups"*) + echo "97" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "✅ ALB target group capacity validated: 97/98" +} + +@test "validate_alb_target_group_capacity: fails when target group count is non-numeric" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-target-groups"*) + echo "WARNING: something unexpected" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ Unexpected non-numeric target group count from ALB" + assert_contains "$output" "📋 ALB ARN: arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + assert_contains "$output" "📋 Received value: WARNING: something unexpected" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The AWS CLI returned an unexpected response format" +} + +@test "validate_alb_target_group_capacity: fails when ALB_MAX_TARGET_GROUPS is non-numeric" { + export ALB_MAX_TARGET_GROUPS="abc" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB_MAX_TARGET_GROUPS must be a numeric value, got: 'abc'" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Set a numeric value in values.yaml or scope-configurations provider" +} + +@test "validate_alb_target_group_capacity: empty ALB ARN response triggers error" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ Load balancer 'k8s-nullplatform-internet-facing' not found in region 'us-east-1'" +} + +# ============================================================================= +# DNS_TYPE guard +# ============================================================================= +@test "validate_alb_target_group_capacity: skips when DNS_TYPE is external_dns" { + export DNS_TYPE="external_dns" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + [[ "$output" != *"🔍 Validating ALB target group capacity"* ]] +} + +@test "validate_alb_target_group_capacity: skips when DNS_TYPE is azure" { + export DNS_TYPE="azure" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + [[ "$output" != *"🔍 Validating ALB target group capacity"* ]] +} + +@test "validate_alb_target_group_capacity: skips with debug message for non-route53 DNS" { + export DNS_TYPE="external_dns" + export LOG_LEVEL="debug" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "DNS type is 'external_dns', ALB target group validation only applies to route53, skipping" +} + +@test "validate_alb_target_group_capacity: runs when DNS_TYPE is route53" { + export DNS_TYPE="route53" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "🔍 Validating ALB target group capacity for 'k8s-nullplatform-internet-facing'..." +} diff --git a/k8s/deployment/tests/verify_http_route_reconciliation.bats b/k8s/deployment/tests/verify_http_route_reconciliation.bats new file mode 100644 index 00000000..6ed938d8 --- /dev/null +++ b/k8s/deployment/tests/verify_http_route_reconciliation.bats @@ -0,0 +1,139 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/verify_http_route_reconciliation - HTTPRoute verify +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export K8S_NAMESPACE="test-namespace" + export SCOPE_ID="scope-123" + export INGRESS_VISIBILITY="internet-facing" + export MAX_WAIT_SECONDS=1 + export CHECK_INTERVAL=0 + + export CONTEXT='{ + "scope": { + "slug": "my-app" + } + }' +} + +teardown() { + unset CONTEXT +} + +# Helper to run script with mock kubectl +run_with_mock() { + local mock_response="$1" + run bash -c " + kubectl() { echo '$mock_response'; return 0; } + export -f kubectl + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='$MAX_WAIT_SECONDS' CHECK_INTERVAL='$CHECK_INTERVAL' CONTEXT='$CONTEXT' + source '$BATS_TEST_DIRNAME/../verify_http_route_reconciliation' + " +} + +# ============================================================================= +# Success Case +# ============================================================================= +@test "verify_http_route_reconciliation: succeeds with correct logging" { + run_with_mock '{"status":{"parents":[{"conditions":[{"type":"Accepted","status":"True","reason":"Accepted","message":"Route accepted"},{"type":"ResolvedRefs","status":"True","reason":"ResolvedRefs","message":"Refs resolved"}]}]}}' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Verifying HTTPRoute reconciliation..." + assert_contains "$output" "📋 HTTPRoute: k-8-s-my-app-scope-123-internet-facing | Namespace: test-namespace | Timeout: 1s" + assert_contains "$output" "✅ HTTPRoute successfully reconciled (Accepted: True, ResolvedRefs: True)" +} + +# ============================================================================= +# Error Cases +# ============================================================================= +@test "verify_http_route_reconciliation: fails with full troubleshooting on certificate error" { + run_with_mock '{"status":{"parents":[{"conditions":[{"type":"Accepted","status":"False","reason":"CertificateError","message":"TLS secret not found"},{"type":"ResolvedRefs","status":"True","reason":"ResolvedRefs","message":"Refs resolved"}]}]}}' + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Verifying HTTPRoute reconciliation..." + assert_contains "$output" "❌ Certificate/TLS error detected" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "- TLS secret does not exist in namespace test-namespace" + assert_contains "$output" "- Certificate is invalid or expired" + assert_contains "$output" "- Gateway references incorrect certificate secret" + assert_contains "$output" "- Accepted: CertificateError - TLS secret not found" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "- Verify TLS secret: kubectl get secret -n test-namespace | grep tls" + assert_contains "$output" "- Check certificate validity" + assert_contains "$output" "- Ensure Gateway references the correct secret" +} + +@test "verify_http_route_reconciliation: fails with full troubleshooting on backend error" { + run_with_mock '{"status":{"parents":[{"conditions":[{"type":"Accepted","status":"True","reason":"Accepted","message":"Accepted"},{"type":"ResolvedRefs","status":"False","reason":"BackendNotFound","message":"service my-svc not found"}]}]}}' + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Verifying HTTPRoute reconciliation..." + assert_contains "$output" "❌ Backend service error detected" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "- Referenced service does not exist" + assert_contains "$output" "- Service name is misspelled in HTTPRoute" + assert_contains "$output" "- Message: service my-svc not found" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "- List services: kubectl get svc -n test-namespace" + assert_contains "$output" "- Verify backend service name in HTTPRoute" + assert_contains "$output" "- Ensure service has ready endpoints" +} + +@test "verify_http_route_reconciliation: fails with full troubleshooting when not accepted" { + run_with_mock '{"status":{"parents":[{"conditions":[{"type":"Accepted","status":"False","reason":"NotAccepted","message":"Gateway not found"},{"type":"ResolvedRefs","status":"True","reason":"ResolvedRefs","message":"Refs resolved"}]}]}}' + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Verifying HTTPRoute reconciliation..." + assert_contains "$output" "❌ HTTPRoute not accepted by Gateway" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "- Reason: NotAccepted" + assert_contains "$output" "- Message: Gateway not found" + assert_contains "$output" "📋 All conditions:" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "- Check Gateway configuration" + assert_contains "$output" "- Verify HTTPRoute spec matches Gateway requirements" +} + +@test "verify_http_route_reconciliation: fails with full troubleshooting when refs not resolved" { + run_with_mock '{"status":{"parents":[{"conditions":[{"type":"Accepted","status":"True","reason":"Accepted","message":"Accepted"},{"type":"ResolvedRefs","status":"False","reason":"InvalidBackend","message":"Invalid backend port"}]}]}}' + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Verifying HTTPRoute reconciliation..." + assert_contains "$output" "❌ HTTPRoute references could not be resolved" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "- Reason: InvalidBackend" + assert_contains "$output" "- Message: Invalid backend port" + assert_contains "$output" "📋 All conditions:" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "- Verify all referenced services exist" + assert_contains "$output" "- Check backend service ports match" +} + +@test "verify_http_route_reconciliation: fails with full troubleshooting on timeout" { + export CHECK_INTERVAL=1 + run bash -c " + kubectl() { echo '{\"status\":{\"parents\":[]}}'; return 0; } + export -f kubectl + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='1' CHECK_INTERVAL='1' CONTEXT='$CONTEXT' + source '$BATS_TEST_DIRNAME/../verify_http_route_reconciliation' + " + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Timeout waiting for HTTPRoute reconciliation after 1s" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "- Gateway controller is not running" + assert_contains "$output" "- Network policies blocking reconciliation" + assert_contains "$output" "- Resource constraints on controller" + assert_contains "$output" "📋 Current conditions:" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "- Check Gateway controller logs" + assert_contains "$output" "- Verify Gateway and Istio configuration" +} diff --git a/k8s/deployment/tests/verify_ingress_reconciliation.bats b/k8s/deployment/tests/verify_ingress_reconciliation.bats new file mode 100644 index 00000000..1e216f96 --- /dev/null +++ b/k8s/deployment/tests/verify_ingress_reconciliation.bats @@ -0,0 +1,386 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/verify_ingress_reconciliation - ingress verification +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export K8S_NAMESPACE="test-namespace" + export SCOPE_ID="scope-123" + export INGRESS_VISIBILITY="internet-facing" + export REGION="us-east-1" + export ALB_RECONCILIATION_ENABLED="false" + export MAX_WAIT_SECONDS=1 + export CHECK_INTERVAL=0 + + export CONTEXT='{ + "scope": { + "slug": "my-app", + "domain": "app.example.com", + "domains": [] + }, + "alb_name": "k8s-test-alb", + "deployment": { + "strategy": "rolling" + } + }' +} + +teardown() { + unset CONTEXT +} + +# ============================================================================= +# Success Case +# ============================================================================= +@test "verify_ingress_reconciliation: succeeds with correct logging" { + run bash -c " + kubectl() { + case \"\$1\" in + get) + if [[ \"\$2\" == \"ingress\" ]]; then + echo '{\"metadata\": {\"resourceVersion\": \"12345\"}}' + return 0 + elif [[ \"\$2\" == \"events\" ]]; then + echo '{\"items\": [{\"type\": \"Normal\", \"reason\": \"SuccessfullyReconciled\", \"message\": \"Ingress reconciled\", \"involvedObject\": {\"resourceVersion\": \"12345\"}, \"lastTimestamp\": \"2024-01-01T00:00:00Z\"}]}' + return 0 + fi + ;; + esac + return 0 + } + export -f kubectl + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='$MAX_WAIT_SECONDS' CHECK_INTERVAL='$CHECK_INTERVAL' CONTEXT='$CONTEXT' + export ALB_RECONCILIATION_ENABLED='$ALB_RECONCILIATION_ENABLED' REGION='$REGION' + source '$BATS_TEST_DIRNAME/../verify_ingress_reconciliation' + " + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Verifying ingress reconciliation..." + assert_contains "$output" "📋 Ingress: k-8-s-my-app-scope-123-internet-facing | Namespace: test-namespace | Timeout: 1s" + assert_contains "$output" "📋 ALB reconciliation disabled, checking cluster events only" + assert_contains "$output" "✅ Ingress successfully reconciled" +} + +@test "verify_ingress_reconciliation: skips for blue-green when ALB disabled" { + local bg_context='{"scope":{"slug":"my-app","domain":"app.example.com"},"deployment":{"strategy":"blue_green"}}' + + run bash -c " + kubectl() { return 0; } + export -f kubectl + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='$MAX_WAIT_SECONDS' CHECK_INTERVAL='$CHECK_INTERVAL' + export ALB_RECONCILIATION_ENABLED='false' REGION='$REGION' + export CONTEXT='$bg_context' + source '$BATS_TEST_DIRNAME/../verify_ingress_reconciliation' + " + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Verifying ingress reconciliation..." + assert_contains "$output" "⚠️ Skipping ALB verification (ALB access needed for blue-green traffic validation)" +} + +# ============================================================================= +# Error Cases +# ============================================================================= +@test "verify_ingress_reconciliation: fails with full troubleshooting on certificate error" { + run bash -c " + kubectl() { + case \"\$1\" in + get) + if [[ \"\$2\" == \"ingress\" ]]; then + echo '{\"metadata\": {\"resourceVersion\": \"12345\"}}' + return 0 + elif [[ \"\$2\" == \"events\" ]]; then + echo '{\"items\": [{\"type\": \"Warning\", \"reason\": \"CertificateError\", \"message\": \"no certificate found for host app.example.com\", \"involvedObject\": {\"resourceVersion\": \"12345\"}, \"lastTimestamp\": \"2024-01-01T00:00:00Z\"}]}' + return 0 + fi + ;; + esac + return 0 + } + export -f kubectl + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='$MAX_WAIT_SECONDS' CHECK_INTERVAL='$CHECK_INTERVAL' CONTEXT='$CONTEXT' + export ALB_RECONCILIATION_ENABLED='$ALB_RECONCILIATION_ENABLED' REGION='$REGION' + source '$BATS_TEST_DIRNAME/../verify_ingress_reconciliation' + " + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Certificate error detected" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "- Ingress hostname does not match any SSL/TLS certificate in ACM" + assert_contains "$output" "- Certificate does not cover the hostname (check wildcards)" + assert_contains "$output" "- Message: no certificate found for host app.example.com" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "- Verify hostname matches certificate in ACM" + assert_contains "$output" "- Ensure certificate includes exact hostname or matching wildcard" +} + +@test "verify_ingress_reconciliation: fails with full troubleshooting when ingress not found" { + run bash -c " + kubectl() { + case \"\$1\" in + get) + if [[ \"\$2\" == \"ingress\" ]]; then + return 1 + fi + ;; + esac + return 0 + } + export -f kubectl + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='$MAX_WAIT_SECONDS' CHECK_INTERVAL='$CHECK_INTERVAL' CONTEXT='$CONTEXT' + export ALB_RECONCILIATION_ENABLED='$ALB_RECONCILIATION_ENABLED' REGION='$REGION' + source '$BATS_TEST_DIRNAME/../verify_ingress_reconciliation' + " + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Failed to get ingress k-8-s-my-app-scope-123-internet-facing" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "- Ingress does not exist yet" + assert_contains "$output" "- Namespace test-namespace is incorrect" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "- List ingresses: kubectl get ingress -n test-namespace" +} + +@test "verify_ingress_reconciliation: fails when ALB not found" { + run bash -c " + kubectl() { + echo '{\"metadata\": {\"resourceVersion\": \"12345\"}}' + return 0 + } + aws() { + echo 'An error occurred (LoadBalancerNotFound)' + return 1 + } + export -f kubectl aws + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='$MAX_WAIT_SECONDS' CHECK_INTERVAL='$CHECK_INTERVAL' CONTEXT='$CONTEXT' + export ALB_RECONCILIATION_ENABLED='true' REGION='$REGION' + source '$BATS_TEST_DIRNAME/../verify_ingress_reconciliation' + " + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Verifying ingress reconciliation..." + assert_contains "$output" "📋 ALB validation enabled: k8s-test-alb for domain app.example.com" + assert_contains "$output" "⚠️ Could not find ALB: k8s-test-alb" +} + +@test "verify_ingress_reconciliation: fails when cannot get ALB listeners" { + run bash -c " + kubectl() { + echo '{\"metadata\": {\"resourceVersion\": \"12345\"}}' + return 0 + } + aws() { + case \"\$1\" in + elbv2) + case \"\$2\" in + describe-load-balancers) + echo 'arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/test-alb/abc123' + return 0 + ;; + describe-listeners) + echo 'AccessDenied: User is not authorized' + return 1 + ;; + esac + ;; + esac + return 0 + } + export -f kubectl aws + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='1' CHECK_INTERVAL='1' CONTEXT='$CONTEXT' + export ALB_RECONCILIATION_ENABLED='true' REGION='$REGION' + source '$BATS_TEST_DIRNAME/../verify_ingress_reconciliation' + " + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Verifying ingress reconciliation..." + assert_contains "$output" "📋 ALB validation enabled: k8s-test-alb for domain app.example.com" + assert_contains "$output" "⚠️ Could not get listeners for ALB" +} + +@test "verify_ingress_reconciliation: detects weights mismatch" { + local weights_context='{"scope":{"slug":"my-app","domain":"app.example.com","current_active_deployment":"deploy-old"},"alb_name":"k8s-test-alb","deployment":{"strategy":"rolling","strategy_data":{"desired_switched_traffic":50}}}' + + run bash -c " + kubectl() { + echo '{\"metadata\": {\"resourceVersion\": \"12345\"}}' + return 0 + } + aws() { + case \"\$2\" in + describe-load-balancers) + echo 'arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/test-alb/abc123' + ;; + describe-listeners) + echo '{\"Listeners\":[{\"ListenerArn\":\"arn:aws:listener/123\",\"Port\":443}]}' + ;; + describe-rules) + echo '{\"Rules\":[{\"Conditions\":[{\"Field\":\"host-header\",\"Values\":[\"app.example.com\"]}],\"Actions\":[{\"Type\":\"forward\",\"ForwardConfig\":{\"TargetGroups\":[{\"Weight\":80},{\"Weight\":20}]}}]}]}' + ;; + esac + return 0 + } + export -f kubectl aws + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='1' CHECK_INTERVAL='1' + export ALB_RECONCILIATION_ENABLED='true' VERIFY_WEIGHTS='true' REGION='$REGION' + export CONTEXT='$weights_context' + source '$BATS_TEST_DIRNAME/../verify_ingress_reconciliation' + " + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Verifying ingress reconciliation..." + assert_contains "$output" "📋 ALB validation enabled: k8s-test-alb for domain app.example.com" + assert_contains "$output" "📝 Checking domain: app.example.com" + assert_contains "$output" "✅ Found rule for domain: app.example.com" + assert_contains "$output" "❌ Weights mismatch on listener port 443: expected=50/50 actual=20/80" +} + +@test "verify_ingress_reconciliation: skips weight check on additional port listener when blue has no service" { + # Scenario: gRPC (port 50051) was added to scope AFTER the blue deployment was created. + # The blue deployment has no K8s service for gRPC, so the ingress routes 100% to green. + # The verify script should skip weight verification on the gRPC listener and check the + # primary HTTP listener (port 443) instead. + local ctx='{"scope":{"slug":"my-app","domain":"app.example.com","current_active_deployment":"deploy-old","capabilities":{"additional_ports":[{"port":50051,"type":"GRPC"}]}},"alb_name":"k8s-test-alb","blue_additional_port_services":{"grpc-50051":false},"deployment":{"strategy":"blue_green","strategy_data":{"desired_switched_traffic":10}}}' + + run bash -c " + kubectl() { + echo '{\"metadata\": {\"resourceVersion\": \"12345\"}}' + return 0 + } + aws() { + case \"\$2\" in + describe-load-balancers) + echo 'arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/test-alb/abc123' + ;; + describe-listeners) + echo '{\"Listeners\":[{\"ListenerArn\":\"arn:aws:listener/grpc\",\"Port\":50051},{\"ListenerArn\":\"arn:aws:listener/https\",\"Port\":443}]}' + ;; + describe-rules) + if [[ \"\$4\" == *\"grpc\"* ]]; then + echo '{\"Rules\":[{\"Conditions\":[{\"Field\":\"host-header\",\"Values\":[\"app.example.com\"]}],\"Actions\":[{\"Type\":\"forward\",\"ForwardConfig\":{\"TargetGroups\":[{\"Weight\":100}]}}]}]}' + else + echo '{\"Rules\":[{\"Conditions\":[{\"Field\":\"host-header\",\"Values\":[\"app.example.com\"]}],\"Actions\":[{\"Type\":\"forward\",\"ForwardConfig\":{\"TargetGroups\":[{\"Weight\":90},{\"Weight\":10}]}}]}]}' + fi + ;; + esac + return 0 + } + export -f kubectl aws + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='1' CHECK_INTERVAL='1' + export ALB_RECONCILIATION_ENABLED='true' VERIFY_WEIGHTS='true' REGION='$REGION' + export CONTEXT='$ctx' + source '$BATS_TEST_DIRNAME/../verify_ingress_reconciliation' + " + + [ "$status" -eq 0 ] + assert_contains "$output" "Skipping weight check on listener port 50051" + assert_contains "$output" "✅ Weights match on listener port 443" + assert_contains "$output" "✅ ALB configuration validated successfully" +} + +@test "verify_ingress_reconciliation: detects domain not found in ALB rules" { + run bash -c " + kubectl() { + echo '{\"metadata\": {\"resourceVersion\": \"12345\"}}' + return 0 + } + aws() { + case \"\$2\" in + describe-load-balancers) + echo 'arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/test-alb/abc123' + ;; + describe-listeners) + echo '{\"Listeners\":[{\"ListenerArn\":\"arn:aws:listener/123\"}]}' + ;; + describe-rules) + echo '{\"Rules\":[{\"Conditions\":[{\"Field\":\"host-header\",\"Values\":[\"other-domain.com\"]}]}]}' + ;; + esac + return 0 + } + export -f kubectl aws + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='1' CHECK_INTERVAL='1' CONTEXT='$CONTEXT' + export ALB_RECONCILIATION_ENABLED='true' REGION='$REGION' + source '$BATS_TEST_DIRNAME/../verify_ingress_reconciliation' + " + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Verifying ingress reconciliation..." + assert_contains "$output" "📋 ALB validation enabled: k8s-test-alb for domain app.example.com" + assert_contains "$output" "📝 Checking domain: app.example.com" + assert_contains "$output" "❌ Domain not found in ALB rules: app.example.com" + assert_contains "$output" "⚠️ Some domains missing from ALB configuration" +} + +@test "verify_ingress_reconciliation: fails with full troubleshooting on timeout" { + run bash -c " + kubectl() { + case \"\$2\" in + ingress) + echo '{\"metadata\": {\"resourceVersion\": \"12345\"}}' + ;; + events) + echo '{\"items\": []}' + ;; + esac + return 0 + } + export -f kubectl + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='1' CHECK_INTERVAL='1' CONTEXT='$CONTEXT' + export ALB_RECONCILIATION_ENABLED='false' REGION='$REGION' + source '$BATS_TEST_DIRNAME/../verify_ingress_reconciliation' + " + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Timeout waiting for ingress reconciliation after 1s" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "- ALB Ingress Controller not running or unhealthy" + assert_contains "$output" "- Network connectivity issues" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "- Check controller: kubectl logs -n kube-system -l app.kubernetes.io/name=aws-load-balancer-controller" + assert_contains "$output" "- Check ingress: kubectl describe ingress k-8-s-my-app-scope-123-internet-facing -n test-namespace" + assert_contains "$output" "📋 Recent events:" +} + +@test "verify_ingress_reconciliation: fails on Error event type with error messages" { + run bash -c " + kubectl() { + case \"\$2\" in + ingress) + echo '{\"metadata\": {\"resourceVersion\": \"12345\"}}' + ;; + events) + echo '{\"items\": [{\"type\": \"Error\", \"reason\": \"SyncFailed\", \"message\": \"Failed to sync ALB\", \"involvedObject\": {\"resourceVersion\": \"12345\"}, \"lastTimestamp\": \"2024-01-01T00:00:00Z\"}]}' + ;; + esac + return 0 + } + export -f kubectl + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='$MAX_WAIT_SECONDS' CHECK_INTERVAL='$CHECK_INTERVAL' CONTEXT='$CONTEXT' + export ALB_RECONCILIATION_ENABLED='false' REGION='$REGION' + source '$BATS_TEST_DIRNAME/../verify_ingress_reconciliation' + " + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Verifying ingress reconciliation..." + assert_contains "$output" "📋 ALB reconciliation disabled, checking cluster events only" + assert_contains "$output" "❌ Ingress reconciliation failed" + assert_contains "$output" "💡 Error messages:" + assert_contains "$output" "- Failed to sync ALB" +} diff --git a/k8s/deployment/tests/verify_networking_reconciliation.bats b/k8s/deployment/tests/verify_networking_reconciliation.bats new file mode 100644 index 00000000..7972e07e --- /dev/null +++ b/k8s/deployment/tests/verify_networking_reconciliation.bats @@ -0,0 +1,56 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/verify_networking_reconciliation - networking verify +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export SERVICE_PATH="$PROJECT_ROOT/k8s" + + # Mock the sourced scripts + export INGRESS_RECONCILIATION_CALLED="false" + export HTTP_ROUTE_RECONCILIATION_CALLED="false" +} + +teardown() { + unset DNS_TYPE +} + +# ============================================================================= +# DNS Type Routing Tests +# ============================================================================= +@test "verify_networking_reconciliation: shows start message and routes by DNS type" { + export DNS_TYPE="route53" + + local bg_context='{"scope":{"slug":"my-app","domain":"app.example.com"},"deployment":{"strategy":"blue_green"}}' + + run bash -c " + kubectl() { return 0; } + export -f kubectl + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='$MAX_WAIT_SECONDS' CHECK_INTERVAL='$CHECK_INTERVAL' + export ALB_RECONCILIATION_ENABLED='false' REGION='$REGION' + export CONTEXT='$bg_context' + source '$BATS_TEST_DIRNAME/../verify_networking_reconciliation' + " + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Verifying networking reconciliation for DNS type: route53" + assert_contains "$output" "🔍 Verifying ingress reconciliation..." + assert_contains "$output" "⚠️ Skipping ALB verification (ALB access needed for blue-green traffic validation)" +} + +@test "verify_networking_reconciliation: skips for unsupported DNS types" { + export DNS_TYPE="unknown" + + run bash "$BATS_TEST_DIRNAME/../verify_networking_reconciliation" + + [ "$status" -eq 0 ] + + assert_contains "$output" "🔍 Verifying networking reconciliation for DNS type: unknown" + assert_contains "$output" "⚠️ Ingress reconciliation not available for DNS type: unknown, skipping" +} diff --git a/k8s/deployment/tests/wait_blue_deployment_active.bats b/k8s/deployment/tests/wait_blue_deployment_active.bats new file mode 100644 index 00000000..92af84e8 --- /dev/null +++ b/k8s/deployment/tests/wait_blue_deployment_active.bats @@ -0,0 +1,93 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/wait_blue_deployment_active - blue deployment wait +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export SERVICE_PATH="$PROJECT_ROOT/k8s" + export DEPLOYMENT_ID="deploy-new-123" + + export CONTEXT='{ + "scope": { + "current_active_deployment": "deploy-old-456" + }, + "deployment": { + "id": "deploy-new-123" + } + }' +} + +teardown() { + unset CONTEXT +} + +# ============================================================================= +# Deployment ID Handling Tests +# ============================================================================= +@test "wait_blue_deployment_active: extracts current_active_deployment as blue" { + blue_id=$(echo "$CONTEXT" | jq -r .scope.current_active_deployment) + + assert_equal "$blue_id" "deploy-old-456" +} + +@test "wait_blue_deployment_active: preserves new deployment ID after" { + # The script should restore DEPLOYMENT_ID to the new deployment + assert_equal "$DEPLOYMENT_ID" "deploy-new-123" +} + +# ============================================================================= +# Context Update Tests +# ============================================================================= +@test "wait_blue_deployment_active: updates context with blue deployment ID" { + updated_context=$(echo "$CONTEXT" | jq \ + --arg deployment_id "deploy-old-456" \ + '.deployment.id = $deployment_id') + + updated_id=$(echo "$updated_context" | jq -r .deployment.id) + + assert_equal "$updated_id" "deploy-old-456" +} + +@test "wait_blue_deployment_active: restores context with new deployment ID" { + updated_context=$(echo "$CONTEXT" | jq \ + --arg deployment_id "deploy-new-123" \ + '.deployment.id = $deployment_id') + + updated_id=$(echo "$updated_context" | jq -r .deployment.id) + + assert_equal "$updated_id" "deploy-new-123" +} + +# ============================================================================= +# Integration Tests +# ============================================================================= +@test "wait_blue_deployment_active: calls wait_deployment_active with blue deployment id in context" { + local mock_dir="$BATS_TEST_TMPDIR/mock_service" + mkdir -p "$mock_dir/deployment" + + cat > "$mock_dir/deployment/wait_deployment_active" << 'MOCK_SCRIPT' +#!/bin/bash +echo "CAPTURED_DEPLOYMENT_ID=$DEPLOYMENT_ID" >> "$BATS_TEST_TMPDIR/captured_values" +echo "CAPTURED_CONTEXT_DEPLOYMENT_ID=$(echo "$CONTEXT" | jq -r .deployment.id)" >> "$BATS_TEST_TMPDIR/captured_values" +MOCK_SCRIPT + chmod +x "$mock_dir/deployment/wait_deployment_active" + + run bash -c " + export SERVICE_PATH='$mock_dir' + export DEPLOYMENT_ID='$DEPLOYMENT_ID' + export CONTEXT='$CONTEXT' + export BATS_TEST_TMPDIR='$BATS_TEST_TMPDIR' + source '$BATS_TEST_DIRNAME/../wait_blue_deployment_active' + " + + [ "$status" -eq 0 ] + + source "$BATS_TEST_TMPDIR/captured_values" + assert_equal "$CAPTURED_DEPLOYMENT_ID" "deploy-old-456" + assert_equal "$CAPTURED_CONTEXT_DEPLOYMENT_ID" "deploy-old-456" +} diff --git a/k8s/deployment/tests/wait_deployment_active.bats b/k8s/deployment/tests/wait_deployment_active.bats new file mode 100644 index 00000000..5983ec19 --- /dev/null +++ b/k8s/deployment/tests/wait_deployment_active.bats @@ -0,0 +1,347 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for deployment/wait_deployment_active - poll until deployment ready +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export SERVICE_PATH="$PROJECT_ROOT/k8s" + export K8S_NAMESPACE="test-namespace" + export SCOPE_ID="scope-123" + export DEPLOYMENT_ID="deploy-456" + export TIMEOUT=30 + export NP_API_KEY="test-api-key" + export SKIP_DEPLOYMENT_STATUS_CHECK="false" + + # Mock np CLI - running by default + np() { + case "$1" in + deployment) + echo "running" + ;; + esac + } + export -f np + + # Mock kubectl - deployment ready by default + kubectl() { + case "$*" in + "get deployment d-scope-123-deploy-456 -n test-namespace -o json") + echo '{ + "spec": {"replicas": 3}, + "status": { + "availableReplicas": 3, + "updatedReplicas": 3, + "readyReplicas": 3 + } + }' + ;; + "get pods"*) + echo "" + ;; + "get events"*) + echo '{"items":[]}' + ;; + *) + return 0 + ;; + esac + } + export -f kubectl +} + +teardown() { + unset -f np + unset -f kubectl +} + +# ============================================================================= +# Success Case +# ============================================================================= +@test "wait_deployment_active: succeeds with all expected logging when replicas ready" { + run bash "$BATS_TEST_DIRNAME/../wait_deployment_active" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Waiting for deployment 'd-scope-123-deploy-456' to become active..." + assert_contains "$output" "📋 Namespace: test-namespace" + assert_contains "$output" "📋 Timeout: 30s (max 3 iterations)" + assert_contains "$output" "📡 Checking deployment status (attempt 1/3)..." + assert_contains "$output" "✅ All pods in deployment 'd-scope-123-deploy-456' are available and ready!" +} + +@test "wait_deployment_active: accepts waiting_for_instances status" { + np() { + echo "waiting_for_instances" + } + export -f np + + run bash "$BATS_TEST_DIRNAME/../wait_deployment_active" + + [ "$status" -eq 0 ] + assert_contains "$output" "✅ All pods in deployment 'd-scope-123-deploy-456' are available and ready!" +} + +@test "wait_deployment_active: skips NP status check when SKIP_DEPLOYMENT_STATUS_CHECK=true" { + export SKIP_DEPLOYMENT_STATUS_CHECK="true" + + np() { + echo "failed" # Would fail if checked + } + export -f np + + run bash "$BATS_TEST_DIRNAME/../wait_deployment_active" + + [ "$status" -eq 0 ] + assert_contains "$output" "✅ All pods in deployment 'd-scope-123-deploy-456' are available and ready!" +} + +# ============================================================================= +# Timeout Error Case +# ============================================================================= +@test "wait_deployment_active: fails with full troubleshooting on timeout" { + # TIMEOUT=5 means MAX_ITERATIONS=0, so first iteration (1 > 0) times out immediately + export TIMEOUT=5 + + run bash "$BATS_TEST_DIRNAME/../wait_deployment_active" + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Waiting for deployment 'd-scope-123-deploy-456' to become active..." + assert_contains "$output" "📋 Namespace: test-namespace" + assert_contains "$output" "📋 Timeout: 5s (max 0 iterations)" + assert_contains "$output" "❌ Timeout waiting for deployment" + assert_contains "$output" "📋 Maximum iterations (0) reached" +} + +# ============================================================================= +# NP CLI Error Cases +# ============================================================================= +@test "wait_deployment_active: fails with full troubleshooting when NP CLI fails" { + np() { + echo "Error connecting to API" >&2 + return 1 + } + export -f np + + run bash "$BATS_TEST_DIRNAME/../wait_deployment_active" + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Waiting for deployment 'd-scope-123-deploy-456' to become active..." + assert_contains "$output" "📡 Checking deployment status (attempt 1/" + assert_contains "$output" "❌ Failed to read deployment status" + assert_contains "$output" "📋 NP CLI error:" +} + +@test "wait_deployment_active: fails when deployment status is null" { + np() { + echo "null" + } + export -f np + + run bash "$BATS_TEST_DIRNAME/../wait_deployment_active" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Deployment status not found for ID deploy-456" +} + +@test "wait_deployment_active: fails when NP deployment status is not running" { + export SKIP_DEPLOYMENT_STATUS_CHECK="false" + + np() { + echo "failed" + } + export -f np + + run bash "$BATS_TEST_DIRNAME/../wait_deployment_active" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Deployment is no longer running (status: failed)" +} + +# ============================================================================= +# Kubectl Error Cases +# ============================================================================= +@test "wait_deployment_active: fails when K8s deployment not found" { + kubectl() { + case "$*" in + "get deployment"*"-o json"*) + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../wait_deployment_active" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Deployment 'd-scope-123-deploy-456' not found in namespace 'test-namespace'" +} + +# ============================================================================= +# Replica Status Display Tests +# ============================================================================= +@test "wait_deployment_active: reports replica status correctly" { + run bash -c " + sleep() { :; } # Mock sleep to be instant + export -f sleep + + kubectl() { + case \"\$*\" in + \"get deployment\"*\"-o json\"*) + echo '{ + \"spec\": {\"replicas\": 5}, + \"status\": { + \"availableReplicas\": 3, + \"updatedReplicas\": 4, + \"readyReplicas\": 3 + } + }' + ;; + \"get pods\"*) + echo '' + ;; + \"get events\"*) + echo '{\"items\":[]}' + ;; + esac + } + export -f kubectl + + np() { echo 'running'; } + export -f np + + export SERVICE_PATH='$SERVICE_PATH' K8S_NAMESPACE='$K8S_NAMESPACE' + export SCOPE_ID='$SCOPE_ID' DEPLOYMENT_ID='$DEPLOYMENT_ID' + export TIMEOUT=10 NP_API_KEY='$NP_API_KEY' SKIP_DEPLOYMENT_STATUS_CHECK='false' + bash '$BATS_TEST_DIRNAME/../wait_deployment_active' + " + + [ "$status" -eq 1 ] + assert_contains "$output" "Deployment status - Available: 3/5, Updated: 4/5, Ready: 3/5" + assert_contains "$output" "❌ Timeout waiting for deployment" +} + +@test "wait_deployment_active: handles missing status fields defaults to 0" { + run bash -c " + sleep() { :; } # Mock sleep to be instant + export -f sleep + + kubectl() { + case \"\$*\" in + \"get deployment\"*\"-o json\"*) + echo '{ + \"spec\": {\"replicas\": 3}, + \"status\": {} + }' + ;; + \"get pods\"*) + echo '' + ;; + \"get events\"*) + echo '{\"items\":[]}' + ;; + esac + } + export -f kubectl + + np() { echo 'running'; } + export -f np + + export SERVICE_PATH='$SERVICE_PATH' K8S_NAMESPACE='$K8S_NAMESPACE' + export SCOPE_ID='$SCOPE_ID' DEPLOYMENT_ID='$DEPLOYMENT_ID' + export TIMEOUT=10 NP_API_KEY='$NP_API_KEY' SKIP_DEPLOYMENT_STATUS_CHECK='false' + bash '$BATS_TEST_DIRNAME/../wait_deployment_active' + " + + [ "$status" -eq 1 ] + assert_contains "$output" "Available: 0/3" +} + +# ============================================================================= +# Zero Replicas Test +# ============================================================================= +@test "wait_deployment_active: does not succeed with zero desired replicas" { + # Use TIMEOUT=5 for immediate timeout + export TIMEOUT=5 + + kubectl() { + case "$*" in + "get deployment"*"-o json"*) + echo '{ + "spec": {"replicas": 0}, + "status": { + "availableReplicas": 0, + "updatedReplicas": 0, + "readyReplicas": 0 + } + }' + ;; + "get pods"*) + echo "" + ;; + "get events"*) + echo '{"items":[]}' + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../wait_deployment_active" + + # Should timeout because desired > 0 check fails + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Timeout waiting for deployment" +} + +# ============================================================================= +# Event Collection Tests +# ============================================================================= +@test "wait_deployment_active: collects and displays deployment events" { + kubectl() { + case "$*" in + "get deployment"*"-o json"*) + echo '{ + "spec": {"replicas": 3}, + "status": { + "availableReplicas": 3, + "updatedReplicas": 3, + "readyReplicas": 3 + } + }' + ;; + "get pods"*) + echo "" + ;; + "get events"*"Deployment"*) + echo '{"items":[{"effectiveTimestamp":"2024-01-01T00:00:00Z","type":"Normal","involvedObject":{"kind":"Deployment","name":"d-scope-123-deploy-456"},"reason":"ScalingUp","message":"Scaled up replica set"}]}' + ;; + "get events"*) + echo '{"items":[]}' + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../wait_deployment_active" + + [ "$status" -eq 0 ] + assert_contains "$output" "✅ All pods in deployment 'd-scope-123-deploy-456' are available and ready!" +} + +# ============================================================================= +# Iteration Calculation Test +# ============================================================================= +@test "wait_deployment_active: calculates max iterations from timeout correctly" { + export TIMEOUT=60 + + run bash -c ' + MAX_ITERATIONS=$(( TIMEOUT / 10 )) + echo $MAX_ITERATIONS + ' + + [ "$status" -eq 0 ] + assert_equal "$output" "6" +} diff --git a/k8s/deployment/validate_alb_target_group_capacity b/k8s/deployment/validate_alb_target_group_capacity new file mode 100755 index 00000000..9b3fc8de --- /dev/null +++ b/k8s/deployment/validate_alb_target_group_capacity @@ -0,0 +1,114 @@ +#!/bin/bash + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/../utils/get_config_value" + +if [[ "$DNS_TYPE" != "route53" ]]; then + log debug "📋 DNS type is '$DNS_TYPE', ALB target group validation only applies to route53, skipping" + return 0 +fi + +ALB_MAX_TARGET_GROUPS=$(get_config_value \ + --env ALB_MAX_TARGET_GROUPS \ + --provider '.providers["scope-configurations"].networking.alb_max_target_groups' \ + --provider '.providers["container-orchestration"].balancer.alb_max_target_groups' \ + --default "98" +) + +if ! [[ "$ALB_MAX_TARGET_GROUPS" =~ ^[0-9]+$ ]]; then + log error "❌ ALB_MAX_TARGET_GROUPS must be a numeric value, got: '$ALB_MAX_TARGET_GROUPS'" + log error "" + log error "🔧 How to fix:" + log error " • Set a numeric value in values.yaml or scope-configurations provider" + log error "" + exit 1 +fi + +log info "🔍 Validating ALB target group capacity for '$ALB_NAME'..." +log debug "📋 ALB: $ALB_NAME | Region: $REGION | Max target groups: $ALB_MAX_TARGET_GROUPS" + +# Get the ALB ARN +ALB_ARN=$(aws elbv2 describe-load-balancers \ + --names "$ALB_NAME" \ + --region "$REGION" \ + --query 'LoadBalancers[0].LoadBalancerArn' \ + --output text \ + --no-paginate 2>&1) || { + log error "❌ Failed to find load balancer '$ALB_NAME' in region '$REGION'" + log error "" + log error "💡 Possible causes:" + log error " The load balancer may not exist or the agent lacks permissions" + log error "" + log error "🔧 How to fix:" + log error " • Verify the ALB exists: aws elbv2 describe-load-balancers --names $ALB_NAME --region $REGION" + log error " • Check IAM permissions for elbv2:DescribeLoadBalancers" + log error "" + exit 1 +} + +if [[ -z "$ALB_ARN" ]] || [[ "$ALB_ARN" == "None" ]]; then + log error "❌ Load balancer '$ALB_NAME' not found in region '$REGION'" + log error "" + log error "💡 Possible causes:" + log error " The load balancer name may be incorrect or it was deleted" + log error "" + log error "🔧 How to fix:" + log error " • List available ALBs: aws elbv2 describe-load-balancers --region $REGION" + log error " • Check the balancer name in values.yaml or scope-configurations provider" + log error "" + exit 1 +fi + +log debug "📋 ALB ARN: $ALB_ARN" + +# Count target groups attached to this ALB +TARGET_GROUP_COUNT=$(aws elbv2 describe-target-groups \ + --load-balancer-arn "$ALB_ARN" \ + --region "$REGION" \ + --query 'length(TargetGroups)' \ + --output text \ + --no-paginate 2>&1) || { + log error "❌ Failed to describe target groups for ALB '$ALB_NAME'" + log error "" + log error "💡 Possible causes:" + log error " The agent may lack permissions to describe target groups" + log error "" + log error "🔧 How to fix:" + log error " • Check IAM permissions for elbv2:DescribeTargetGroups" + log error "" + exit 1 +} + +if ! [[ "$TARGET_GROUP_COUNT" =~ ^[0-9]+$ ]]; then + log error "❌ Unexpected non-numeric target group count from ALB" + log error "📋 ALB ARN: $ALB_ARN" + log error "📋 Received value: $TARGET_GROUP_COUNT" + log error "" + log error "💡 Possible causes:" + log error " The AWS CLI returned an unexpected response format" + log error "" + log error "🔧 How to fix:" + log error " • Verify AWS CLI version and credentials are correct" + log error " • Run manually: aws elbv2 describe-target-groups --load-balancer-arn $ALB_ARN --region $REGION --query 'length(TargetGroups)'" + log error "" + exit 1 +fi + +log info "📋 ALB '$ALB_NAME' has $TARGET_GROUP_COUNT target groups (max: $ALB_MAX_TARGET_GROUPS)" + +if [[ "$TARGET_GROUP_COUNT" -ge "$ALB_MAX_TARGET_GROUPS" ]]; then + log error "❌ ALB '$ALB_NAME' has reached target group capacity: $TARGET_GROUP_COUNT/$ALB_MAX_TARGET_GROUPS" + log error "" + log error "💡 Possible causes:" + log error " Too many services or deployments are attached to this ALB" + log error "" + log error "🔧 How to fix:" + log error " • Remove unused deployments or services from the ALB" + log error " • Increase ALB_MAX_TARGET_GROUPS in values.yaml or scope-configurations provider (AWS limit is 100)" + log error " • Request an AWS service quota increase for target groups per ALB" + log error " • Consider using a separate ALB for additional deployments" + log error "" + exit 1 +fi + +log info "✅ ALB target group capacity validated: $TARGET_GROUP_COUNT/$ALB_MAX_TARGET_GROUPS" diff --git a/k8s/deployment/verify_http_route_reconciliation b/k8s/deployment/verify_http_route_reconciliation index 6d70c8d4..5e71e88c 100644 --- a/k8s/deployment/verify_http_route_reconciliation +++ b/k8s/deployment/verify_http_route_reconciliation @@ -1,13 +1,15 @@ #!/bin/bash + SCOPE_SLUG=$(echo "$CONTEXT" | jq -r .scope.slug) HTTPROUTE_NAME="k-8-s-$SCOPE_SLUG-$SCOPE_ID-$INGRESS_VISIBILITY" -MAX_WAIT_SECONDS=120 -CHECK_INTERVAL=10 +MAX_WAIT_SECONDS=${MAX_WAIT_SECONDS:-120} +CHECK_INTERVAL=${CHECK_INTERVAL:-10} elapsed=0 -echo "Waiting for HTTPRoute [$HTTPROUTE_NAME] reconciliation..." +log debug "🔍 Verifying HTTPRoute reconciliation..." +log debug "📋 HTTPRoute: $HTTPROUTE_NAME | Namespace: $K8S_NAMESPACE | Timeout: ${MAX_WAIT_SECONDS}s" while [ $elapsed -lt $MAX_WAIT_SECONDS ]; do sleep $CHECK_INTERVAL @@ -17,8 +19,7 @@ while [ $elapsed -lt $MAX_WAIT_SECONDS ]; do parents_count=$(echo "$httproute_json" | jq '.status.parents | length // 0') if [ "$parents_count" -eq 0 ]; then - echo "HTTPRoute is pending sync (no parent status yet). Waiting..." - + log debug "📝 HTTPRoute pending sync (no parent status yet)... (${elapsed}s/${MAX_WAIT_SECONDS}s)" elapsed=$((elapsed + CHECK_INTERVAL)) continue fi @@ -27,7 +28,7 @@ while [ $elapsed -lt $MAX_WAIT_SECONDS ]; do conditions_count=$(echo "$conditions" | jq 'length') if [ "$conditions_count" -eq 0 ]; then - echo "HTTPRoute is pending sync (no conditions yet). Waiting..." + log debug "📝 HTTPRoute pending sync (no conditions yet)... (${elapsed}s/${MAX_WAIT_SECONDS}s)" elapsed=$((elapsed + CHECK_INTERVAL)) continue fi @@ -41,76 +42,82 @@ while [ $elapsed -lt $MAX_WAIT_SECONDS ]; do resolved_message=$(echo "$conditions" | jq -r '.[] | select(.type=="ResolvedRefs") | .message') if [ "$accepted_status" == "True" ] && [ "$resolved_status" == "True" ]; then - echo "✓ HTTPRoute was successfully reconciled" - echo " - Accepted: True" - echo " - ResolvedRefs: True" + log info "✅ HTTPRoute successfully reconciled (Accepted: True, ResolvedRefs: True)" return 0 fi # Check for certificate/TLS errors if echo "$accepted_message $resolved_message" | grep -qi "certificate\|tls\|secret.*not found"; then - echo "✗ Certificate/TLS error detected" - echo "Root cause: TLS certificate or secret configuration issue" - if [ "$accepted_status" == "False" ]; then - echo "Accepted condition: $accepted_reason - $accepted_message" - fi - if [ "$resolved_status" == "False" ]; then - echo "ResolvedRefs condition: $resolved_reason - $resolved_message" - fi - echo "" - echo "To fix this issue:" - echo " 1. Verify the TLS secret exists in the correct namespace" - echo " 2. Check the certificate is valid and not expired" - echo " 3. Ensure the Gateway references the correct certificate secret" + log error "❌ Certificate/TLS error detected" + log error "💡 Possible causes:" + log error " - TLS secret does not exist in namespace $K8S_NAMESPACE" + log error " - Certificate is invalid or expired" + log error " - Gateway references incorrect certificate secret" + [ "$accepted_status" == "False" ] && log error " - Accepted: $accepted_reason - $accepted_message" + [ "$resolved_status" == "False" ] && log error " - ResolvedRefs: $resolved_reason - $resolved_message" + log error "🔧 How to fix:" + log error " - Verify TLS secret: kubectl get secret -n $K8S_NAMESPACE | grep tls" + log error " - Check certificate validity" + log error " - Ensure Gateway references the correct secret" exit 1 fi # Check for backend service errors if echo "$resolved_message" | grep -qi "service.*not found\|backend.*not found"; then - echo "✗ Backend service error detected" - echo "Root cause: Referenced service does not exist" - echo "Message: $resolved_message" - echo "" - echo "To fix this issue:" - echo " 1. Verify the backend service name is correct" - echo " 2. Check the service exists in the namespace: kubectl get svc -n $K8S_NAMESPACE" - echo " 3. Ensure the service has ready endpoints" + log error "❌ Backend service error detected" + log error "💡 Possible causes:" + log error " - Referenced service does not exist" + log error " - Service name is misspelled in HTTPRoute" + log error " - Message: $resolved_message" + log error "🔧 How to fix:" + log error " - List services: kubectl get svc -n $K8S_NAMESPACE" + log error " - Verify backend service name in HTTPRoute" + log error " - Ensure service has ready endpoints" exit 1 fi # Accepted=False is an error if [ "$accepted_status" == "False" ]; then - echo "✗ HTTPRoute was not accepted by the Gateway" - echo "Reason: $accepted_reason" - echo "Message: $accepted_message" - echo "" - echo "All conditions:" - echo "$conditions" | jq -r '.[] | " - \(.type): \(.status) (\(.reason)) - \(.message)"' + log error "❌ HTTPRoute not accepted by Gateway" + log error "💡 Possible causes:" + log error " - Reason: $accepted_reason" + log error " - Message: $accepted_message" + log error "📋 All conditions:" + echo "$conditions" | jq -r '.[] | " - \(.type): \(.status) (\(.reason)) - \(.message)"' >&2 + log error "🔧 How to fix:" + log error " - Check Gateway configuration" + log error " - Verify HTTPRoute spec matches Gateway requirements" exit 1 fi # ResolvedRefs=False is an error if [ "$resolved_status" == "False" ]; then - echo "✗ HTTPRoute references could not be resolved" - echo "Reason: $resolved_reason" - echo "Message: $resolved_message" - echo "" - echo "All conditions:" - echo "$conditions" | jq -r '.[] | " - \(.type): \(.status) (\(.reason)) - \(.message)"' + log error "❌ HTTPRoute references could not be resolved" + log error "💡 Possible causes:" + log error " - Reason: $resolved_reason" + log error " - Message: $resolved_message" + log error "📋 All conditions:" + echo "$conditions" | jq -r '.[] | " - \(.type): \(.status) (\(.reason)) - \(.message)"' >&2 + log error "🔧 How to fix:" + log error " - Verify all referenced services exist" + log error " - Check backend service ports match" exit 1 fi - echo "⚠ HTTPRoute is being reconciled..." - echo "Current status:" - echo "$conditions" | jq -r '.[] | " - \(.type): \(.status) (\(.reason))"' - echo "Waiting for reconciliation to complete..." + log debug "📝 HTTPRoute reconciling... (${elapsed}s/${MAX_WAIT_SECONDS}s)" + echo "$conditions" | jq -r '.[] | " - \(.type): \(.status) (\(.reason))"' elapsed=$((elapsed + CHECK_INTERVAL)) done -echo "✗ Timeout waiting for HTTPRoute reconciliation after ${MAX_WAIT_SECONDS} seconds" -echo "Current conditions:" +log error "❌ Timeout waiting for HTTPRoute reconciliation after ${MAX_WAIT_SECONDS}s" +log error "💡 Possible causes:" +log error " - Gateway controller is not running" +log error " - Network policies blocking reconciliation" +log error " - Resource constraints on controller" +log error "📋 Current conditions:" httproute_json=$(kubectl get httproute "$HTTPROUTE_NAME" -n "$K8S_NAMESPACE" -o json) -echo "$httproute_json" | jq -r '.status.parents[0].conditions[] | " - \(.type): \(.status) (\(.reason)) - \(.message)"' -echo "" -echo "Verify your Gateway and Istio configuration" -exit 1 \ No newline at end of file +echo "$httproute_json" | jq -r '.status.parents[0].conditions[] | " - \(.type): \(.status) (\(.reason)) - \(.message)"' >&2 +log error "🔧 How to fix:" +log error " - Check Gateway controller logs" +log error " - Verify Gateway and Istio configuration" +exit 1 diff --git a/k8s/deployment/verify_ingress_reconciliation b/k8s/deployment/verify_ingress_reconciliation index 45a3c701..ee9f3221 100644 --- a/k8s/deployment/verify_ingress_reconciliation +++ b/k8s/deployment/verify_ingress_reconciliation @@ -1,36 +1,41 @@ #!/bin/bash + SCOPE_SLUG=$(echo "$CONTEXT" | jq -r .scope.slug) ALB_NAME=$(echo "$CONTEXT" | jq -r .alb_name) SCOPE_DOMAIN=$(echo "$CONTEXT" | jq -r .scope.domain) INGRESS_NAME="k-8-s-$SCOPE_SLUG-$SCOPE_ID-$INGRESS_VISIBILITY" -MAX_WAIT_SECONDS=120 -CHECK_INTERVAL=10 +MAX_WAIT_SECONDS=${MAX_WAIT_SECONDS:-120} +CHECK_INTERVAL=${CHECK_INTERVAL:-10} elapsed=0 - -echo "Waiting for ingress [$INGRESS_NAME] reconciliation..." +log debug "🔍 Verifying ingress reconciliation..." +log debug "📋 Ingress: $INGRESS_NAME | Namespace: $K8S_NAMESPACE | Timeout: ${MAX_WAIT_SECONDS}s" ALB_RECONCILIATION_ENABLED="${ALB_RECONCILIATION_ENABLED:-false}" DEPLOYMENT_STRATEGY=$(echo "$CONTEXT" | jq -r ".deployment.strategy") if [ "$ALB_RECONCILIATION_ENABLED" = "false" ] && [ "$DEPLOYMENT_STRATEGY" = "blue_green" ]; then - echo "⚠ Skipping verification as ALB access needed to validate blue-green and switch traffic reconciliation." - + log warn "⚠️ Skipping ALB verification (ALB access needed for blue-green traffic validation)" return 0 fi if [ "$ALB_RECONCILIATION_ENABLED" = "true" ]; then - echo "Validating ALB [$ALB_NAME] configuration for domain [$SCOPE_DOMAIN]" + log debug "📋 ALB validation enabled: $ALB_NAME for domain $SCOPE_DOMAIN" else - echo "ALB reconciliation disabled, will check cluster events only" + log debug "📋 ALB reconciliation disabled, checking cluster events only" fi INGRESS_JSON=$(kubectl get ingress "$INGRESS_NAME" -n "$K8S_NAMESPACE" -o json 2>/dev/null) if [ $? -ne 0 ]; then - echo "✗ Failed to get ingress $INGRESS_NAME" + log error "❌ Failed to get ingress $INGRESS_NAME" + log error "💡 Possible causes:" + log error " - Ingress does not exist yet" + log error " - Namespace $K8S_NAMESPACE is incorrect" + log error "🔧 How to fix:" + log error " - List ingresses: kubectl get ingress -n $K8S_NAMESPACE" exit 1 fi @@ -54,7 +59,7 @@ if [ "$ALB_RECONCILIATION_ENABLED" = "true" ]; then --output text 2>&1) if [ $? -ne 0 ] || [ "$ALB_ARN" == "None" ] || [ -z "$ALB_ARN" ]; then - echo "⚠ Could not find ALB: $ALB_NAME" + log warn "⚠️ Could not find ALB: $ALB_NAME" return 1 fi fi @@ -64,42 +69,89 @@ validate_alb_config() { --load-balancer-arn "$ALB_ARN" \ --region "$REGION" \ --output json 2>&1) - + if [ $? -ne 0 ]; then - echo "⚠ Could not get listeners for ALB" + log warn "⚠️ Could not get listeners for ALB" return 1 fi + # Build a set of additional port numbers where the blue deployment has no K8s service. + # When additional_ports are added to a scope after the initial deployment, the blue + # deployment won't have services for those ports. ALB listeners on those ports will + # have single-target weights (100% green) instead of the standard blue-green split. + local _blue_missing_ports="" + local _additional_ports + _additional_ports=$(echo "$CONTEXT" | jq -c '.scope.capabilities.additional_ports // []') + local _blue_port_svc + _blue_port_svc=$(echo "$CONTEXT" | jq -c '.blue_additional_port_services // {}') + + if [ "$_additional_ports" != "[]" ] && [ "$_additional_ports" != "null" ] && [ "$_blue_port_svc" != "{}" ]; then + while IFS= read -r _pc; do + local _port _type _key _exists + _port=$(echo "$_pc" | jq -r '.port') + _type=$(echo "$_pc" | jq -r '.type' | tr '[:upper:]' '[:lower:]') + _key="${_type}-${_port}" + _exists=$(echo "$_blue_port_svc" | jq -r --arg k "$_key" 'if has($k) then .[$k] else true end') + if [ "$_exists" = "false" ]; then + _blue_missing_ports="${_blue_missing_ports} ${_port}" + log debug "📝 Blue deployment has no service for additional port ${_port} - expecting single-target weights" + fi + done < <(echo "$_additional_ports" | jq -c '.[]') + fi + local all_domains_found=true - + for domain in "${ALL_DOMAINS[@]}"; do - echo "Checking domain: $domain" + log debug "📝 Checking domain: $domain" local domain_found=false - + LISTENER_ARNS=$(echo "$LISTENERS" | jq -r '.Listeners[].ListenerArn') - + for listener_arn in $LISTENER_ARNS; do RULES=$(aws elbv2 describe-rules \ --listener-arn "$listener_arn" \ --region "$REGION" \ --output json 2>&1) - + if [ $? -ne 0 ]; then continue fi - + MATCHING_RULE=$(echo "$RULES" | jq --arg domain "$domain" ' .Rules[] | select( - .Conditions[]? | - select(.Field == "host-header") | + .Conditions[]? | + select(.Field == "host-header") | .Values[]? == $domain ) ') - + if [ -n "$MATCHING_RULE" ]; then - echo " ✓ Found rule for domain: $domain" - if [ "${VERIFY_WEIGHTS:-false}" = "true" ]; then + # Determine the listener port to check if this is an additional port + # where the blue deployment has no service (added after initial deploy) + local LISTENER_PORT + LISTENER_PORT=$(echo "$LISTENERS" | jq -r --arg arn "$listener_arn" \ + '.Listeners[] | select(.ListenerArn == $arn) | .Port') + + local is_blue_missing_port=false + for _mp in $_blue_missing_ports; do + if [ "$LISTENER_PORT" = "$_mp" ]; then + is_blue_missing_port=true + break + fi + done + + if [ "$is_blue_missing_port" = "true" ]; then + # Blue deployment was created before this additional port was added + # to the scope config, so there's no blue K8s service for it. + # Skip weight verification on this listener — the ingress correctly + # routes 100% to green. Verify weights on the primary listener instead. + log debug " ⏭️ Skipping weight check on listener port $LISTENER_PORT (blue has no service for this port)" + continue + fi + + log info " ✅ Found rule for domain: $domain" + BLUE_WEIGHT=$((100 - SWITCH_TRAFFIC)) GREEN_WEIGHT=$SWITCH_TRAFFIC @@ -109,46 +161,48 @@ validate_alb_config() { else EXPECTED_WEIGHTS="$GREEN_WEIGHT" fi - + ACTUAL_WEIGHTS=$(echo "$MATCHING_RULE" | jq -r ' - .Actions[]? | - select(.Type == "forward") | - .ForwardConfig.TargetGroups[]? | + .Actions[]? | + select(.Type == "forward") | + .ForwardConfig.TargetGroups[]? | "\(.Weight // 1)" ' 2>/dev/null | sort -n) - + if [ -n "$EXPECTED_WEIGHTS" ] && [ -n "$ACTUAL_WEIGHTS" ]; then if [ "$EXPECTED_WEIGHTS" == "$ACTUAL_WEIGHTS" ]; then - echo " ✓ Weights match (GREEN: $GREEN_WEIGHT, BLUE: $BLUE_WEIGHT)" + log info " ✅ Weights match on listener port $LISTENER_PORT (GREEN: $GREEN_WEIGHT, BLUE: $BLUE_WEIGHT)" domain_found=true else - echo " ✗ Weights do not match" - echo " Expected: $EXPECTED_WEIGHTS" - echo " Actual: $ACTUAL_WEIGHTS" + local _exp_fmt _act_fmt + _exp_fmt=$(echo "$EXPECTED_WEIGHTS" | tr '\n' '/' | sed 's/\/$//') + _act_fmt=$(echo "$ACTUAL_WEIGHTS" | tr '\n' '/' | sed 's/\/$//') + log error " ❌ Weights mismatch on listener port $LISTENER_PORT: expected=$_exp_fmt actual=$_act_fmt" domain_found=false fi else - echo " ⚠ Could not extract weights for comparison" + log warn " ⚠️ Could not extract weights for comparison" domain_found=false fi else + log info " ✅ Found rule for domain: $domain" domain_found=true fi break fi done - + if [ "$domain_found" = false ]; then - echo " ✗ Domain not found in ALB rules: $domain" + log error " ❌ Domain not found in ALB rules: $domain" all_domains_found=false fi done - + if [ "$all_domains_found" = true ]; then - echo "✓ All domains are configured in ALB" + log info "✅ All domains configured in ALB" return 0 else - echo "⚠ Some domains are missing from ALB configuration" + log warn "⚠️ Some domains missing from ALB configuration" return 1 fi } @@ -156,13 +210,12 @@ validate_alb_config() { while [ $elapsed -lt $MAX_WAIT_SECONDS ]; do if [ "$ALB_RECONCILIATION_ENABLED" = "true" ]; then if validate_alb_config; then - echo "✓ ALB configuration validated successfully" + log info "✅ ALB configuration validated successfully" return 0 fi - - echo "ALB validation incomplete, checking Kubernetes events..." + log debug "📝 ALB validation incomplete, checking Kubernetes events..." fi - + events_json=$(kubectl get events -n "$K8S_NAMESPACE" \ --field-selector "involvedObject.name=$INGRESS_NAME,involvedObject.kind=Ingress" \ -o json) @@ -180,55 +233,52 @@ while [ $elapsed -lt $MAX_WAIT_SECONDS ]; do event_message=$(echo "$newest_event" | jq -r '.message') if [ "$event_reason" == "SuccessfullyReconciled" ]; then - echo "✓ Ingress was successfully reconciled (via event)" + log info "✅ Ingress successfully reconciled" return 0 fi if echo "$event_message" | grep -q "no certificate found for host"; then - echo "✗ Certificate error detected" - echo "Root cause: The ingress hostname does not match any available SSL/TLS certificate" - echo "Message: $event_message" - - echo "To fix this issue:" - echo " 1. Verify the hostname in your ingress matches a certificate in ACM (AWS Certificate Manager)" - echo " 2. Check the 'alb.ingress.kubernetes.io/certificate-arn' annotation points to a valid certificate" - echo " 3. Ensure the certificate includes the exact hostname or a wildcard that covers it" + log error "❌ Certificate error detected" + log error "💡 Possible causes:" + log error " - Ingress hostname does not match any SSL/TLS certificate in ACM" + log error " - Certificate does not cover the hostname (check wildcards)" + log error " - Message: $event_message" + log error "🔧 How to fix:" + log error " - Verify hostname matches certificate in ACM" + log error " - Ensure certificate includes exact hostname or matching wildcard" exit 1 fi if [ "$event_type" == "Error" ]; then - echo "✗ The ingress could not be reconciled" - echo "Error messages:" - echo "$relevant_events" | jq -r '.[] | " - \(.message)"' + log error "❌ Ingress reconciliation failed" + log error "💡 Error messages:" + echo "$relevant_events" | jq -r '.[] | " - \(.message)"' >&2 exit 1 fi if [ "$event_type" == "Warning" ]; then - echo "⚠ There are some potential issues with the ingress" - echo "Warning messages:" - echo "$relevant_events" | jq -r '.[] | " - \(.message)"' + log warn "⚠️ Potential issues with ingress:" + echo "$relevant_events" | jq -r '.[] | " - \(.message)"' fi fi - echo "Waiting for ALB reconciliation... (${elapsed}s/${MAX_WAIT_SECONDS}s)" + log debug "📝 Waiting for ALB reconciliation... (${elapsed}s/${MAX_WAIT_SECONDS}s)" sleep $CHECK_INTERVAL elapsed=$((elapsed + CHECK_INTERVAL)) done -# Timeout reached - show diagnostic information -echo "✗ Timeout waiting for ingress reconciliation after ${MAX_WAIT_SECONDS} seconds" -echo "" -echo "Diagnostic information:" -echo "1. Check ALB Ingress Controller logs:" -echo " kubectl logs -n kube-system -l app.kubernetes.io/name=aws-load-balancer-controller" -echo "" -echo "2. Check ingress status:" -echo " kubectl describe ingress $INGRESS_NAME -n $K8S_NAMESPACE" -echo "" -echo "3. Recent events:" +log error "❌ Timeout waiting for ingress reconciliation after ${MAX_WAIT_SECONDS}s" +log error "💡 Possible causes:" +log error " - ALB Ingress Controller not running or unhealthy" +log error " - Network connectivity issues" +log error "🔧 How to fix:" +log error " - Check controller: kubectl logs -n kube-system -l app.kubernetes.io/name=aws-load-balancer-controller" +log error " - Check ingress: kubectl describe ingress $INGRESS_NAME -n $K8S_NAMESPACE" +log error "📋 Recent events:" + events_json=$(kubectl get events -n "$K8S_NAMESPACE" \ --field-selector "involvedObject.name=$INGRESS_NAME,involvedObject.kind=Ingress" \ -o json) echo "$events_json" | jq -r '.items | sort_by(.lastTimestamp) | .[] | " [\(.type)] \(.reason): \(.message)"' | tail -10 -exit 1 \ No newline at end of file +exit 1 diff --git a/k8s/deployment/verify_networking_reconciliation b/k8s/deployment/verify_networking_reconciliation index 28da9432..214c8530 100644 --- a/k8s/deployment/verify_networking_reconciliation +++ b/k8s/deployment/verify_networking_reconciliation @@ -1,11 +1,14 @@ #!/bin/bash + +log debug "🔍 Verifying networking reconciliation for DNS type: $DNS_TYPE" + case "$DNS_TYPE" in route53) source "$SERVICE_PATH/deployment/verify_ingress_reconciliation" ;; *) - echo "Ingress reconciliation is not available yet for $DNS_TYPE" + log warn "⚠️ Ingress reconciliation not available for DNS type: $DNS_TYPE, skipping" # source "$SERVICE_PATH/deployment/verify_http_route_reconciliation" ;; -esac \ No newline at end of file +esac diff --git a/k8s/deployment/wait_blue_deployment_active b/k8s/deployment/wait_blue_deployment_active index b1f54115..d26ab4cc 100755 --- a/k8s/deployment/wait_blue_deployment_active +++ b/k8s/deployment/wait_blue_deployment_active @@ -1,5 +1,6 @@ #!/bin/bash + export NEW_DEPLOYMENT_ID=$DEPLOYMENT_ID export DEPLOYMENT_ID=$(echo "$CONTEXT" | jq .scope.current_active_deployment -r) @@ -14,4 +15,4 @@ export DEPLOYMENT_ID=$NEW_DEPLOYMENT_ID CONTEXT=$(echo "$CONTEXT" | jq \ --arg deployment_id "$DEPLOYMENT_ID" \ - '.deployment.id = $deployment_id') \ No newline at end of file + '.deployment.id = $deployment_id') diff --git a/k8s/deployment/wait_deployment_active b/k8s/deployment/wait_deployment_active index 2789ee3f..c242b03f 100755 --- a/k8s/deployment/wait_deployment_active +++ b/k8s/deployment/wait_deployment_active @@ -1,53 +1,62 @@ #!/bin/bash + MAX_ITERATIONS=$(( TIMEOUT / 10 )) K8S_DEPLOYMENT_NAME="d-$SCOPE_ID-$DEPLOYMENT_ID" iteration=0 LATEST_TIMESTAMP="" SKIP_DEPLOYMENT_STATUS_CHECK="${SKIP_DEPLOYMENT_STATUS_CHECK:=false}" +log debug "🔍 Waiting for deployment '$K8S_DEPLOYMENT_NAME' to become active..." +log debug "📋 Namespace: $K8S_NAMESPACE" +log debug "📋 Timeout: ${TIMEOUT}s (max $MAX_ITERATIONS iterations)" +log debug "" + while true; do ((++iteration)) if [ $iteration -gt $MAX_ITERATIONS ]; then - echo "ERROR: Timeout waiting for deployment. Maximum iterations (${MAX_ITERATIONS}) reached." + log error "" + log error "❌ Timeout waiting for deployment" + log error "📋 Maximum iterations ($MAX_ITERATIONS) reached" source "$SERVICE_PATH/deployment/print_failed_deployment_hints" exit 1 fi - - echo "Checking deployment status (attempt $iteration/$MAX_ITERATIONS)..." + + log debug "📡 Checking deployment status (attempt $iteration/$MAX_ITERATIONS)..." D_STATUS=$(np deployment read --id $DEPLOYMENT_ID --api-key $NP_API_KEY --query .status 2>&1) || { - echo "ERROR: Failed to read deployment status" - echo "NP CLI error: $D_STATUS" + log error " ❌ Failed to read deployment status" + log error "📋 NP CLI error: $D_STATUS" exit 1 } - + if [[ -z "$D_STATUS" ]] || [[ "$D_STATUS" == "null" ]]; then - echo "ERROR: Deployment status not found for ID $DEPLOYMENT_ID" + log error " ❌ Deployment status not found for ID $DEPLOYMENT_ID" exit 1 fi if [ "$SKIP_DEPLOYMENT_STATUS_CHECK" != true ]; then if [[ $D_STATUS != "running" && $D_STATUS != "waiting_for_instances" ]]; then - echo "Deployment it's not running anymore [$D_STATUS]" + log error " ❌ Deployment is no longer running (status: $D_STATUS)" exit 1 fi fi deployment_status=$(kubectl get deployment "$K8S_DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" -o json 2>/dev/null) if [ $? -ne 0 ]; then - echo "Error: Deployment '$K8S_DEPLOYMENT_NAME' not found in namespace '$K8S_NAMESPACE'" + log error " ❌ Deployment '$K8S_DEPLOYMENT_NAME' not found in namespace '$K8S_NAMESPACE'" exit 1 fi desired=$(echo "$deployment_status" | jq '.spec.replicas') current=$(echo "$deployment_status" | jq '.status.availableReplicas // 0') updated=$(echo "$deployment_status" | jq '.status.updatedReplicas // 0') ready=$(echo "$deployment_status" | jq '.status.readyReplicas // 0') - echo "$(date): Iteration $iteration - Deployment status - Available: $current/$desired, Updated: $updated/$desired, Ready: $ready/$desired" + log debug "🔍 $(date): Iteration $iteration - Deployment status - Available: $current/$desired, Updated: $updated/$desired, Ready: $ready/$desired" if [ "$desired" = "$current" ] && [ "$desired" = "$updated" ] && [ "$desired" = "$ready" ] && [ "$desired" -gt 0 ]; then - echo "Success: All pods in deployment '$K8S_DEPLOYMENT_NAME' are available and ready!" + log debug "" + log info "✅ All pods in deployment '$K8S_DEPLOYMENT_NAME' are available and ready!" break fi @@ -55,46 +64,53 @@ while true; do POD_NAMES=$(kubectl get pods -n $K8S_NAMESPACE -l $POD_SELECTOR -o jsonpath='{.items[*].metadata.name}') # Get events for the deployment first DEPLOYMENT_EVENTS=$(kubectl get events -n $K8S_NAMESPACE --field-selector involvedObject.kind=Deployment,involvedObject.name=$K8S_DEPLOYMENT_NAME -o json) - + ALL_EVENTS="$DEPLOYMENT_EVENTS" for POD in $POD_NAMES; do - echo "Checking events for pod: $POD" + log debug "Checking events for pod: $POD" POD_EVENTS=$(kubectl get events -n $K8S_NAMESPACE --field-selector involvedObject.kind=Pod,involvedObject.name=$POD -o json) # Combine events using jq if [ ! -z "$POD_EVENTS" ] && [ "$POD_EVENTS" != "{}" ]; then ALL_EVENTS=$(echo "$ALL_EVENTS" "$POD_EVENTS" | jq -s '.[0].items = (.[0].items + .[1].items) | .[0]') fi done - + PROCESSED_EVENTS=$(echo "$ALL_EVENTS" | jq '.items = (.items | map(. + { effectiveTimestamp: ( - if .eventTime then .eventTime - elif .lastTimestamp then .lastTimestamp + if .eventTime then .eventTime + elif .lastTimestamp then .lastTimestamp elif .firstTimestamp then .firstTimestamp else .metadata.creationTimestamp end ) }))') - + # Find the newest timestamp in all events NEWEST_TIMESTAMP=$(echo "$PROCESSED_EVENTS" | jq -r '.items | map(.effectiveTimestamp) | max // empty') - + # Process events with jq, showing only events newer than what we've seen + # Output format: TYPEmessage — so we can route Warning events to log warn NEW_EVENTS=$(echo "$PROCESSED_EVENTS" | jq -r --arg timestamp "$LATEST_TIMESTAMP" ' - .items | - sort_by(.effectiveTimestamp) | - .[] | - select($timestamp == "" or (.effectiveTimestamp > $timestamp)) | - "\(.effectiveTimestamp) [\(.type)] \(.involvedObject.kind)/\(.involvedObject.name): \(.reason) - \(.message)" + .items | + sort_by(.effectiveTimestamp) | + .[] | + select($timestamp == "" or (.effectiveTimestamp > $timestamp)) | + "\(.type)\t\(.effectiveTimestamp) [\(.type)] \(.involvedObject.kind)/\(.involvedObject.name): \(.reason) - \(.message)" ') - + # If we have new events, show them and update the timestamp if [ ! -z "$NEW_EVENTS" ]; then - echo "$NEW_EVENTS" + while IFS=$'\t' read -r event_type event_line; do + if [ "$event_type" = "Warning" ]; then + log warn "$event_line" + else + log debug "$event_line" + fi + done <<< "$NEW_EVENTS" # Store the newest timestamp for next iteration LATEST_TIMESTAMP="$NEWEST_TIMESTAMP" - echo "Updated timestamp to: $LATEST_TIMESTAMP" + log debug "Updated timestamp to: $LATEST_TIMESTAMP" fi sleep 10 diff --git a/k8s/deployment/workflows/delete.yaml b/k8s/deployment/workflows/delete.yaml index 2e28b167..36e0cf1a 100644 --- a/k8s/deployment/workflows/delete.yaml +++ b/k8s/deployment/workflows/delete.yaml @@ -1,6 +1,15 @@ include: - "$SERVICE_PATH/values.yaml" steps: + - name: load logging + type: script + file: "$SERVICE_PATH/logging" + output: + - name: log + type: function + parameters: + level: string + message: string - name: build context type: script file: "$SERVICE_PATH/deployment/build_context" diff --git a/k8s/deployment/workflows/finalize.yaml b/k8s/deployment/workflows/finalize.yaml index 178a396e..c0b827c9 100644 --- a/k8s/deployment/workflows/finalize.yaml +++ b/k8s/deployment/workflows/finalize.yaml @@ -3,6 +3,15 @@ include: configuration: INGRESS_TEMPLATE: "$INITIAL_INGRESS_PATH" steps: + - name: load logging + type: script + file: "$SERVICE_PATH/logging" + output: + - name: log + type: function + parameters: + level: string + message: string - name: build context type: script file: "$SERVICE_PATH/deployment/build_context" @@ -45,11 +54,17 @@ steps: type: file file: "$OUTPUT_DIR/ingress-$SCOPE_ID-$DEPLOYMENT_ID.yaml" post: - name: verify_networking_reconciliation - type: script - file: "$SERVICE_PATH/deployment/verify_networking_reconciliation" - configuration: - VERIFY_WEIGHTS: false + name: post_apply_checks + type: workflow + steps: + - name: verify_networking_reconciliation + type: script + file: "$SERVICE_PATH/deployment/verify_networking_reconciliation" + configuration: + VERIFY_WEIGHTS: false + - name: publish_alb_metrics + type: script + file: "$SERVICE_PATH/deployment/publish_alb_metrics" - name: build deployment type: script file: "$SERVICE_PATH/deployment/build_blue_deployment" diff --git a/k8s/deployment/workflows/initial.yaml b/k8s/deployment/workflows/initial.yaml index c00f0435..b7bc8134 100644 --- a/k8s/deployment/workflows/initial.yaml +++ b/k8s/deployment/workflows/initial.yaml @@ -3,6 +3,15 @@ include: configuration: INGRESS_TEMPLATE: "$INITIAL_INGRESS_PATH" steps: + - name: load logging + type: script + file: "$SERVICE_PATH/logging" + output: + - name: log + type: function + parameters: + level: string + message: string - name: build context type: script file: "$SERVICE_PATH/deployment/build_context" @@ -19,6 +28,9 @@ steps: type: environment - name: BLUE_DEPLOYMENT_ID type: environment + - name: validate alb target group capacity + type: script + file: "$SERVICE_PATH/deployment/validate_alb_target_group_capacity" - name: route traffic type: script file: "$SERVICE_PATH/deployment/networking/gateway/route_traffic" @@ -62,6 +74,9 @@ steps: file: "$SERVICE_PATH/deployment/verify_networking_reconciliation" configuration: VERIFY_WEIGHTS: false + - name: publish_alb_metrics + type: script + file: "$SERVICE_PATH/deployment/publish_alb_metrics" - name: wait deployment active type: script file: "$SERVICE_PATH/deployment/wait_deployment_active" diff --git a/k8s/deployment/workflows/kill_instances.yaml b/k8s/deployment/workflows/kill_instances.yaml index 3db18899..aa162316 100644 --- a/k8s/deployment/workflows/kill_instances.yaml +++ b/k8s/deployment/workflows/kill_instances.yaml @@ -1,6 +1,15 @@ include: - "$SERVICE_PATH/values.yaml" steps: + - name: load logging + type: script + file: "$SERVICE_PATH/logging" + output: + - name: log + type: function + parameters: + level: string + message: string - name: kill instances type: script file: "$SERVICE_PATH/deployment/kill_instances" \ No newline at end of file diff --git a/k8s/deployment/workflows/rollback.yaml b/k8s/deployment/workflows/rollback.yaml index be3a98af..729d06f0 100644 --- a/k8s/deployment/workflows/rollback.yaml +++ b/k8s/deployment/workflows/rollback.yaml @@ -3,6 +3,15 @@ include: configuration: INGRESS_TEMPLATE: "$INITIAL_INGRESS_PATH" steps: + - name: load logging + type: script + file: "$SERVICE_PATH/logging" + output: + - name: log + type: function + parameters: + level: string + message: string - name: build context type: script file: "$SERVICE_PATH/deployment/build_context" diff --git a/k8s/deployment/workflows/switch_traffic.yaml b/k8s/deployment/workflows/switch_traffic.yaml index 486cee7b..54f90bdf 100644 --- a/k8s/deployment/workflows/switch_traffic.yaml +++ b/k8s/deployment/workflows/switch_traffic.yaml @@ -3,6 +3,15 @@ include: configuration: INGRESS_TEMPLATE: "$BLUE_GREEN_INGRESS_PATH" steps: + - name: load logging + type: script + file: "$SERVICE_PATH/logging" + output: + - name: log + type: function + parameters: + level: string + message: string - name: build context type: script file: "$SERVICE_PATH/deployment/build_context" @@ -48,8 +57,14 @@ steps: ACTION: apply DRY_RUN: false post: - name: verify_networking_reconciliation - type: script - file: "$SERVICE_PATH/deployment/verify_networking_reconciliation" - configuration: - VERIFY_WEIGHTS: true + name: post_apply_checks + type: workflow + steps: + - name: verify_networking_reconciliation + type: script + file: "$SERVICE_PATH/deployment/verify_networking_reconciliation" + configuration: + VERIFY_WEIGHTS: true + - name: publish_alb_metrics + type: script + file: "$SERVICE_PATH/deployment/publish_alb_metrics" diff --git a/k8s/diagnose/tests/build_context.bats b/k8s/diagnose/tests/build_context.bats new file mode 100644 index 00000000..46eaa5e2 --- /dev/null +++ b/k8s/diagnose/tests/build_context.bats @@ -0,0 +1,185 @@ +#!/usr/bin/env bats +# Unit tests for diagnose/build_context - diagnostic context preparation + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + + export K8S_NAMESPACE="default-ns" + export SCOPE_ID="scope-123" + export NP_OUTPUT_DIR="$(mktemp -d)" + export NP_ACTION_CONTEXT='{}' + export ALB_CONTROLLER_NAMESPACE="kube-system" + + export CONTEXT='{ + "providers": { + "container-orchestration": { + "cluster": {"namespace": "provider-namespace"} + } + }, + "parameters": {"deployment_id": "deploy-789"} + }' + + kubectl() { + case "$*" in + *"app.kubernetes.io/name=aws-load-balancer-controller"*) echo '{"items":[]}' ;; + *"app=aws-alb-ingress-controller"*) echo '{"items":[]}' ;; + *"get pods"*) echo '{"items":[{"metadata":{"name":"test-pod"}}]}' ;; + *"get services"*) echo '{"items":[{"metadata":{"name":"test-service"}}]}' ;; + *"get endpoints"*) echo '{"items":[]}' ;; + *"get ingress"*) echo '{"items":[]}' ;; + *"get secrets"*) echo '{"items":[]}' ;; + *"get ingressclass"*) echo '{"items":[]}' ;; + *"get events"*) echo '{"items":[]}' ;; + *"logs"*) echo "log line 1" ;; + *) echo '{"items":[]}' ;; + esac + } + export -f kubectl + + notify_results() { return 0; } + export -f notify_results +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + unset K8S_NAMESPACE SCOPE_ID NP_OUTPUT_DIR NP_ACTION_CONTEXT CONTEXT + unset LABEL_SELECTOR SCOPE_LABEL_SELECTOR NAMESPACE ALB_CONTROLLER_NAMESPACE + unset -f kubectl notify_results +} + +run_build_context() { + source "$BATS_TEST_DIRNAME/../build_context" +} + +# ============================================================================= +# Namespace Resolution +# ============================================================================= +@test "build_context: NAMESPACE from provider > K8S_NAMESPACE fallback" { + # Test provider namespace + run_build_context + assert_equal "$NAMESPACE" "provider-namespace" + + # Test fallback + export CONTEXT='{"providers": {}}' + run_build_context + assert_equal "$NAMESPACE" "default-ns" +} + +# ============================================================================= +# Label Selectors +# ============================================================================= +@test "build_context: sets label selectors from various deployment_id sources" { + # From parameters.deployment_id (default setup) + run_build_context + assert_equal "$SCOPE_LABEL_SELECTOR" "scope_id=scope-123" + assert_equal "$LABEL_SELECTOR" "scope_id=scope-123,deployment_id=deploy-789" + + # From deployment.id + export CONTEXT='{"providers": {}, "deployment": {"id": "deploy-from-deployment"}}' + run_build_context + assert_equal "$LABEL_SELECTOR" "scope_id=scope-123,deployment_id=deploy-from-deployment" + + # From scope.current_active_deployment + export CONTEXT='{"providers": {}, "scope": {"current_active_deployment": "deploy-active"}}' + run_build_context + assert_equal "$LABEL_SELECTOR" "scope_id=scope-123,deployment_id=deploy-active" + + # No deployment_id - LABEL_SELECTOR equals SCOPE_LABEL_SELECTOR + export CONTEXT='{"providers": {}, "parameters": {}}' + run_build_context + assert_equal "$LABEL_SELECTOR" "scope_id=scope-123" +} + +# ============================================================================= +# Directory and File Creation +# ============================================================================= +@test "build_context: creates data directory and all resource files" { + run_build_context + + assert_directory_exists "$NP_OUTPUT_DIR/data" + assert_directory_exists "$NP_OUTPUT_DIR/data/alb_controller_logs" + + # All resource files should exist and be valid JSON + for file in "$PODS_FILE" "$SERVICES_FILE" "$ENDPOINTS_FILE" "$INGRESSES_FILE" \ + "$SECRETS_FILE" "$INGRESSCLASSES_FILE" "$EVENTS_FILE" "$ALB_CONTROLLER_PODS_FILE"; do + assert_file_exists "$file" + jq . "$file" >/dev/null + done +} + +@test "build_context: secrets.json excludes sensitive data field" { + kubectl() { + case "$*" in + *"get secrets"*) + echo '{"items":[{"metadata":{"name":"my-secret"},"data":{"password":"c2VjcmV0"}}]}' + ;; + *) echo '{"items":[]}' ;; + esac + } + export -f kubectl + + run_build_context + + assert_file_exists "$SECRETS_FILE" + has_data=$(jq '.items[0].data // empty' "$SECRETS_FILE") + assert_empty "$has_data" +} + +# ============================================================================= +# Empty Results Handling +# ============================================================================= +@test "build_context: handles kubectl returning empty results" { + kubectl() { echo '{"items":[]}'; } + export -f kubectl + + run_build_context + + assert_file_exists "$PODS_FILE" + items_count=$(jq '.items | length' "$PODS_FILE") + assert_equal "$items_count" "0" +} + +# ============================================================================= +# ALB Controller Discovery +# ============================================================================= +@test "build_context: tries legacy ALB controller label when new one has no pods" { + kubectl() { + case "$*" in + *"app.kubernetes.io/name=aws-load-balancer-controller"*) + echo '{"items":[]}' + ;; + *"app=aws-alb-ingress-controller"*) + echo '{"items":[{"metadata":{"name":"legacy-alb-pod"}}]}' + ;; + *) echo '{"items":[]}' ;; + esac + } + export -f kubectl + + run_build_context + + content=$(cat "$ALB_CONTROLLER_PODS_FILE") + assert_contains "$content" "legacy-alb-pod" +} + +@test "build_context: collects ALB controller logs when pods exist" { + kubectl() { + case "$*" in + *"app.kubernetes.io/name=aws-load-balancer-controller"*) + echo '{"items":[{"metadata":{"name":"alb-controller-pod"}}]}' + ;; + *"logs"*"alb-controller-pod"*) + echo "controller log line" + ;; + *) echo '{"items":[]}' ;; + esac + } + export -f kubectl + + run_build_context + + assert_file_exists "$ALB_CONTROLLER_LOGS_DIR/alb-controller-pod.log" + log_content=$(cat "$ALB_CONTROLLER_LOGS_DIR/alb-controller-pod.log") + assert_contains "$log_content" "controller log line" +} diff --git a/k8s/diagnose/tests/diagnose_utils.bats b/k8s/diagnose/tests/diagnose_utils.bats new file mode 100644 index 00000000..4080bd72 --- /dev/null +++ b/k8s/diagnose/tests/diagnose_utils.bats @@ -0,0 +1,299 @@ +#!/usr/bin/env bats +# Unit tests for diagnose/utils/diagnose_utils + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../utils/diagnose_utils" + + export NP_OUTPUT_DIR="$(mktemp -d)" + export NP_ACTION_CONTEXT='{ + "notification": {"id": "action-123", "service": {"id": "service-456"}} + }' + + export SCRIPT_OUTPUT_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export SCRIPT_LOG_FILE="$(mktemp)" + echo "test log line 1" > "$SCRIPT_LOG_FILE" + echo "test log line 2" >> "$SCRIPT_LOG_FILE" + + np() { return 0; } + export -f np +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" "$SCRIPT_LOG_FILE" + unset NP_OUTPUT_DIR NP_ACTION_CONTEXT SCRIPT_OUTPUT_FILE SCRIPT_LOG_FILE + unset -f np +} + +# Strip ANSI color codes from output for clean assertions +strip_ansi() { + echo "$1" | sed 's/\x1b\[[0-9;]*m//g' +} + +# ============================================================================= +# Print Functions +# ============================================================================= +@test "print_success: outputs green checkmark with message" { + run print_success "Test message" + + [ "$status" -eq 0 ] + local clean=$(strip_ansi "$output") + assert_contains "$clean" "✓ Test message" +} + +@test "print_error: outputs red X with message" { + run print_error "Error message" + + [ "$status" -eq 0 ] + local clean=$(strip_ansi "$output") + assert_contains "$clean" "✗ Error message" +} + +@test "print_warning: outputs yellow warning with message" { + run print_warning "Warning message" + + [ "$status" -eq 0 ] + local clean=$(strip_ansi "$output") + assert_contains "$clean" "⚠ Warning message" +} + +@test "print_info: outputs cyan info with message" { + run print_info "Info message" + + [ "$status" -eq 0 ] + local clean=$(strip_ansi "$output") + assert_contains "$clean" "ℹ Info message" +} + +@test "print_action: outputs wrench emoji with message" { + run print_action "Action message" + + [ "$status" -eq 0 ] + local clean=$(strip_ansi "$output") + assert_contains "$clean" "🔧 Action message" +} + +# ============================================================================= +# require_resources +# ============================================================================= +@test "require_resources: returns 0 when resources exist" { + run require_resources "pods" "pod-1 pod-2" "app=test" "default" + + [ "$status" -eq 0 ] +} + +@test "require_resources: returns 1 and shows skip message when resources empty" { + update_check_result() { return 0; } + export -f update_check_result + + run require_resources "pods" "" "app=test" "default" + + [ "$status" -eq 1 ] + local clean=$(strip_ansi "$output") + assert_contains "$clean" "⚠ No pods found with labels app=test in namespace default, check was skipped." +} + +# ============================================================================= +# require_pods / require_services / require_ingresses +# ============================================================================= +@test "require_pods: returns 0 when pods exist, 1 when empty" { + export PODS_FILE="$(mktemp)" + export LABEL_SELECTOR="app=test" + export NAMESPACE="default" + + # Test with pods + echo '{"items":[{"metadata":{"name":"pod-1"}}]}' > "$PODS_FILE" + run require_pods + [ "$status" -eq 0 ] + + # Test without pods + echo '{"items":[]}' > "$PODS_FILE" + update_check_result() { return 0; } + export -f update_check_result + run require_pods + [ "$status" -eq 1 ] + + rm -f "$PODS_FILE" +} + +@test "require_services: returns 0 when services exist, 1 when empty" { + export SERVICES_FILE="$(mktemp)" + export LABEL_SELECTOR="app=test" + export NAMESPACE="default" + + # Test with services + echo '{"items":[{"metadata":{"name":"svc-1"}}]}' > "$SERVICES_FILE" + run require_services + [ "$status" -eq 0 ] + + # Test without services + echo '{"items":[]}' > "$SERVICES_FILE" + update_check_result() { return 0; } + export -f update_check_result + run require_services + [ "$status" -eq 1 ] + + rm -f "$SERVICES_FILE" +} + +@test "require_ingresses: returns 0 when ingresses exist" { + export INGRESSES_FILE="$(mktemp)" + export SCOPE_LABEL_SELECTOR="scope_id=123" + export NAMESPACE="default" + + echo '{"items":[{"metadata":{"name":"ing-1"}}]}' > "$INGRESSES_FILE" + run require_ingresses + [ "$status" -eq 0 ] + + rm -f "$INGRESSES_FILE" +} + +# ============================================================================= +# update_check_result - Basic Operations +# ============================================================================= +@test "update_check_result: updates status and evidence" { + update_check_result --status "success" --evidence '{"key":"value"}' + + status_result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$status_result" "success" + + evidence_result=$(jq -r '.evidence.key' "$SCRIPT_OUTPUT_FILE") + assert_equal "$evidence_result" "value" +} + +@test "update_check_result: includes logs from SCRIPT_LOG_FILE" { + update_check_result --status "success" --evidence "{}" + + logs_count=$(jq -r '.logs | length' "$SCRIPT_OUTPUT_FILE") + assert_equal "$logs_count" "2" + + first_log=$(jq -r '.logs[0]' "$SCRIPT_OUTPUT_FILE") + assert_equal "$first_log" "test log line 1" + + second_log=$(jq -r '.logs[1]' "$SCRIPT_OUTPUT_FILE") + assert_equal "$second_log" "test log line 2" +} + +@test "update_check_result: normalizes status to lowercase" { + update_check_result --status "SUCCESS" --evidence "{}" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "success" +} + +# ============================================================================= +# update_check_result - Timestamps +# ============================================================================= +@test "update_check_result: sets start_at for running status (ISO 8601 format)" { + update_check_result --status "running" --evidence "{}" + + start_at=$(jq -r '.start_at' "$SCRIPT_OUTPUT_FILE") + assert_not_empty "$start_at" + assert_contains "$start_at" "T" + assert_contains "$start_at" "Z" +} + +@test "update_check_result: sets end_at for success and failed status" { + # Test success + update_check_result --status "success" --evidence "{}" + end_at=$(jq -r '.end_at' "$SCRIPT_OUTPUT_FILE") + assert_not_empty "$end_at" + + # Reset and test failed + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + update_check_result --status "failed" --evidence "{}" + end_at=$(jq -r '.end_at' "$SCRIPT_OUTPUT_FILE") + assert_not_empty "$end_at" +} + +# ============================================================================= +# update_check_result - Error Handling +# ============================================================================= +@test "update_check_result: fails with 'File not found' when output file missing" { + rm -f "$SCRIPT_OUTPUT_FILE" + + run update_check_result --status "success" --evidence "{}" + + [ "$status" -eq 1 ] + assert_contains "$output" "Error: File not found: $SCRIPT_OUTPUT_FILE" +} + +@test "update_check_result: fails with 'is evidence valid JSON' for invalid JSON" { + run update_check_result --status "success" --evidence "not-json" + + [ "$status" -eq 1 ] + assert_contains "$output" "Error: Failed to update JSON (is evidence valid JSON?)" +} + +@test "update_check_result: fails with 'status and evidence are required' when missing" { + run update_check_result --evidence "{}" + [ "$status" -eq 1 ] + assert_contains "$output" "Error: status and evidence are required" + + run update_check_result --status "success" + [ "$status" -eq 1 ] + assert_contains "$output" "Error: status and evidence are required" +} + +# ============================================================================= +# update_check_result - Positional Arguments +# ============================================================================= +@test "update_check_result: supports positional arguments (legacy API)" { + update_check_result "success" '{"test":"value"}' + + status_result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$status_result" "success" + + evidence=$(jq -r '.evidence.test' "$SCRIPT_OUTPUT_FILE") + assert_equal "$evidence" "value" +} + +# ============================================================================= +# update_check_result - Log Limits +# ============================================================================= +@test "update_check_result: limits logs to 20 lines" { + for i in {1..30}; do + echo "log line $i" >> "$SCRIPT_LOG_FILE" + done + + update_check_result --status "success" --evidence "{}" + + logs_count=$(jq -r '.logs | length' "$SCRIPT_OUTPUT_FILE") + [ "$logs_count" -le 20 ] +} + +# ============================================================================= +# notify_results +# ============================================================================= +@test "notify_results: fails with 'No JSON result files found' when empty" { + rm -rf "$NP_OUTPUT_DIR"/* + + run notify_results + + [ "$status" -eq 1 ] + local clean=$(strip_ansi "$output") + assert_contains "$clean" "⚠ No JSON result files found in $NP_OUTPUT_DIR" +} + +@test "notify_results: succeeds when JSON files exist" { + echo '{"category":"scope","status":"success","evidence":{}}' > "$NP_OUTPUT_DIR/test.json" + + run notify_results + + [ "$status" -eq 0 ] +} + +@test "notify_results: excludes files in data directory" { + mkdir -p "$NP_OUTPUT_DIR/data" + echo '{"should":"be excluded"}' > "$NP_OUTPUT_DIR/data/pods.json" + + run notify_results + + [ "$status" -eq 1 ] + local clean=$(strip_ansi "$output") + assert_contains "$clean" "⚠ No JSON result files found in $NP_OUTPUT_DIR" +} diff --git a/k8s/diagnose/tests/networking/alb_capacity_check.bats b/k8s/diagnose/tests/networking/alb_capacity_check.bats new file mode 100644 index 00000000..001713d6 --- /dev/null +++ b/k8s/diagnose/tests/networking/alb_capacity_check.bats @@ -0,0 +1,393 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/networking/alb_capacity_check +# ============================================================================= + +strip_ansi() { + echo "$1" | sed 's/\x1b\[[0-9;]*m//g' +} + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export SCOPE_LABEL_SELECTOR="scope_id=123" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + export SCRIPT_LOG_FILE="$(mktemp)" + export INGRESSES_FILE="$(mktemp)" + export EVENTS_FILE="$(mktemp)" + export ALB_CONTROLLER_PODS_FILE="$(mktemp)" + export ALB_CONTROLLER_LOGS_DIR="$(mktemp -d)" + export ALB_CONTROLLER_NAMESPACE="kube-system" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$INGRESSES_FILE" + rm -f "$EVENTS_FILE" + rm -f "$ALB_CONTROLLER_PODS_FILE" + rm -rf "$ALB_CONTROLLER_LOGS_DIR" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "networking/alb_capacity_check: success when no issues found" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "my-ingress", + "annotations": { + "alb.ingress.kubernetes.io/scheme": "internet-facing", + "alb.ingress.kubernetes.io/subnets": "subnet-1" + } + }, + "spec": { + "rules": [{"host": "app.example.com", "http": {"paths": [{"path": "/", "backend": {"service": {"name": "my-svc", "port": {"number": 80}}}}]}}] + } + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "normal log line" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + echo '{"items":[]}' > "$EVENTS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/alb_capacity_check'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "No IP exhaustion issues detected" + assert_contains "$stripped" "No critical ALB capacity or configuration issues detected" +} + +@test "networking/alb_capacity_check: updates check result to success when no issues" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "my-ingress", + "annotations": { + "alb.ingress.kubernetes.io/scheme": "internet-facing", + "alb.ingress.kubernetes.io/subnets": "subnet-1" + } + }, + "spec": { + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "normal log line" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + echo '{"items":[]}' > "$EVENTS_FILE" + + source "$BATS_TEST_DIRNAME/../../networking/alb_capacity_check" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "success" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "networking/alb_capacity_check: detects IP exhaustion in controller logs" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "my-ingress", + "annotations": { + "alb.ingress.kubernetes.io/scheme": "internet-facing" + } + }, + "spec": { + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "ERROR no available ip addresses in subnet" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + echo '{"items":[]}' > "$EVENTS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/alb_capacity_check'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "ALB subnet IP exhaustion detected" +} + +@test "networking/alb_capacity_check: detects certificate errors in controller logs" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "my-ingress", + "annotations": { + "alb.ingress.kubernetes.io/certificate-arn": "arn:aws:acm:us-east-1:123456:certificate/abc", + "alb.ingress.kubernetes.io/scheme": "internet-facing" + } + }, + "spec": { + "tls": [{"hosts": ["app.example.com"]}], + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "my-ingress certificate not found error" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + echo '{"items":[]}' > "$EVENTS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/alb_capacity_check'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Certificate validation errors found" +} + +@test "networking/alb_capacity_check: detects host in rules but not in TLS" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "my-ingress", + "annotations": { + "alb.ingress.kubernetes.io/certificate-arn": "arn:aws:acm:us-east-1:123456:certificate/abc", + "alb.ingress.kubernetes.io/scheme": "internet-facing" + } + }, + "spec": { + "tls": [{"hosts": ["other.example.com"]}], + "rules": [ + {"host": "app.example.com", "http": {"paths": [{"path": "/", "backend": {"service": {"name": "my-svc", "port": {"number": 80}}}}]}}, + {"host": "other.example.com", "http": {"paths": [{"path": "/", "backend": {"service": {"name": "my-svc", "port": {"number": 80}}}}]}} + ] + } + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "normal log line" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + echo '{"items":[]}' > "$EVENTS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/alb_capacity_check'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Host 'app.example.com' in rules but not in TLS configuration" +} + +@test "networking/alb_capacity_check: warns when TLS hosts but no certificate ARN" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "my-ingress", + "annotations": { + "alb.ingress.kubernetes.io/scheme": "internet-facing" + } + }, + "spec": { + "tls": [{"hosts": ["app.example.com"]}], + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "normal log line" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + echo '{"items":[]}' > "$EVENTS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/alb_capacity_check'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "TLS hosts configured but no ACM certificate ARN annotation" +} + +@test "networking/alb_capacity_check: warns when no scheme annotation" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "my-ingress", + "annotations": {} + }, + "spec": { + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "normal log line" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + echo '{"items":[]}' > "$EVENTS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/alb_capacity_check'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "No scheme annotation (defaulting to internal)" +} + +@test "networking/alb_capacity_check: detects subnet error events" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "my-ingress", + "annotations": { + "alb.ingress.kubernetes.io/scheme": "internet-facing" + } + }, + "spec": { + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "normal log line" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + cat > "$EVENTS_FILE" << 'EOF' +{ + "items": [{ + "involvedObject": {"name": "my-ingress", "kind": "Ingress"}, + "type": "Warning", + "reason": "FailedDeployModel", + "message": "Failed to find subnet in availability zone us-east-1a", + "lastTimestamp": "2024-01-01T00:00:00Z" + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/alb_capacity_check'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Subnet configuration issues" +} + +@test "networking/alb_capacity_check: updates check result to failed on issues" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "my-ingress", + "annotations": { + "alb.ingress.kubernetes.io/scheme": "internet-facing" + } + }, + "spec": { + "tls": [{"hosts": ["other.example.com"]}], + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "normal log line" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + echo '{"items":[]}' > "$EVENTS_FILE" + + source "$BATS_TEST_DIRNAME/../../networking/alb_capacity_check" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} + +# ============================================================================= +# Edge Cases +# ============================================================================= +@test "networking/alb_capacity_check: skips when no ingresses" { + echo '{"items":[]}' > "$INGRESSES_FILE" + echo '{"items":[]}' > "$ALB_CONTROLLER_PODS_FILE" + echo '{"items":[]}' > "$EVENTS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/alb_capacity_check'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +@test "networking/alb_capacity_check: reports no SSL/TLS when not configured" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "my-ingress", + "annotations": { + "alb.ingress.kubernetes.io/scheme": "internet-facing" + } + }, + "spec": { + "rules": [{"host": "app.example.com", "http": {"paths": [{"path": "/", "backend": {"service": {"name": "my-svc", "port": {"number": 80}}}}]}}] + } + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "normal log line" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + echo '{"items":[]}' > "$EVENTS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/alb_capacity_check'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "No SSL/TLS configured (HTTP only)" +} + +@test "networking/alb_capacity_check: shows auto-discovered subnets info when no subnet annotation" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "my-ingress", + "annotations": { + "alb.ingress.kubernetes.io/scheme": "internet-facing" + } + }, + "spec": { + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "normal log line" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + echo '{"items":[]}' > "$EVENTS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/alb_capacity_check'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Using auto-discovered subnets" +} diff --git a/k8s/diagnose/tests/networking/ingress_backend_service.bats b/k8s/diagnose/tests/networking/ingress_backend_service.bats new file mode 100644 index 00000000..2099fb52 --- /dev/null +++ b/k8s/diagnose/tests/networking/ingress_backend_service.bats @@ -0,0 +1,484 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/networking/ingress_backend_service +# ============================================================================= + +strip_ansi() { + echo "$1" | sed 's/\x1b\[[0-9;]*m//g' +} + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export SCOPE_LABEL_SELECTOR="scope_id=123" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + export SCRIPT_LOG_FILE="$(mktemp)" + export INGRESSES_FILE="$(mktemp)" + export SERVICES_FILE="$(mktemp)" + export ENDPOINTS_FILE="$(mktemp)" + export PODS_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$INGRESSES_FILE" + rm -f "$SERVICES_FILE" + rm -f "$ENDPOINTS_FILE" + rm -f "$PODS_FILE" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "networking/ingress_backend_service: success with backend service having ready endpoints" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "host": "app.example.com", + "http": { + "paths": [{ + "path": "/", + "backend": {"service": {"name": "my-svc", "port": {"number": 80}}} + }] + } + }] + } + }] +} +EOF + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": 8080}] + } + }] +} +EOF + cat > "$ENDPOINTS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "subsets": [{ + "addresses": [{"ip": "10.0.0.1", "targetRef": {"name": "pod-1"}}], + "ports": [{"port": 8080}] + }] + }] +} +EOF + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_backend_service'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Backend: my-svc:80 (1 ready endpoint(s))" + assert_contains "$stripped" "All backend services healthy" +} + +@test "networking/ingress_backend_service: updates check result to success" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "host": "app.example.com", + "http": { + "paths": [{ + "path": "/", + "backend": {"service": {"name": "my-svc", "port": {"number": 80}}} + }] + } + }] + } + }] +} +EOF + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"selector": {"app": "test"}, "ports": [{"port": 80, "targetPort": 8080}]} + }] +} +EOF + cat > "$ENDPOINTS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "subsets": [{"addresses": [{"ip": "10.0.0.1", "targetRef": {"name": "pod-1"}}], "ports": [{"port": 8080}]}] + }] +} +EOF + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_backend_service'" + + [ "$status" -eq 0 ] + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "success" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "networking/ingress_backend_service: error when default backend service not found" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "defaultBackend": {"service": {"name": "missing-svc", "port": {"number": 80}}}, + "rules": [] + } + }] +} +EOF + echo '{"items":[]}' > "$SERVICES_FILE" + echo '{"items":[]}' > "$ENDPOINTS_FILE" + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_backend_service'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Default backend: Service 'missing-svc' not found" +} + +@test "networking/ingress_backend_service: error when default backend has no endpoints" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "defaultBackend": {"service": {"name": "my-svc", "port": {"number": 80}}}, + "rules": [] + } + }] +} +EOF + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"selector": {"app": "test"}, "ports": [{"port": 80, "targetPort": 8080}]} + }] +} +EOF + cat > "$ENDPOINTS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "subsets": [] + }] +} +EOF + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_backend_service'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Default backend: my-svc:80 (no endpoints)" +} + +@test "networking/ingress_backend_service: warns about not-ready endpoints alongside ready ones" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "host": "app.example.com", + "http": { + "paths": [{ + "path": "/", + "backend": {"service": {"name": "my-svc", "port": {"number": 80}}} + }] + } + }] + } + }] +} +EOF + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"selector": {"app": "test"}, "ports": [{"port": 80, "targetPort": 8080}]} + }] +} +EOF + cat > "$ENDPOINTS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "subsets": [{ + "addresses": [{"ip": "10.0.0.1", "targetRef": {"name": "pod-1"}}], + "notReadyAddresses": [{"ip": "10.0.0.2", "targetRef": {"name": "pod-2"}}], + "ports": [{"port": 8080}] + }] + }] +} +EOF + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_backend_service'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Backend: my-svc:80 (1 ready endpoint(s))" + assert_contains "$stripped" "Also has 1 not ready endpoint(s)" +} + +@test "networking/ingress_backend_service: handles service with multiple ports" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "host": "app.example.com", + "http": { + "paths": [{ + "path": "/", + "backend": {"service": {"name": "my-svc", "port": {"number": 80}}} + }] + } + }] + } + }] +} +EOF + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"selector": {"app": "test"}, "ports": [{"port": 80, "targetPort": 8080}, {"port": 443, "targetPort": 8443}]} + }] +} +EOF + cat > "$ENDPOINTS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "subsets": [{"addresses": [{"ip": "10.0.0.1", "targetRef": {"name": "pod-1"}}], "ports": [{"port": 8080}]}] + }] +} +EOF + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_backend_service'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Backend: my-svc:80 (1 ready endpoint(s))" + assert_contains "$stripped" "All backend services healthy" +} + +@test "networking/ingress_backend_service: error when port not found in service" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "host": "app.example.com", + "http": { + "paths": [{ + "path": "/", + "backend": {"service": {"name": "my-svc", "port": {"number": 9090}}} + }] + } + }] + } + }] +} +EOF + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"selector": {"app": "test"}, "ports": [{"port": 80, "targetPort": 8080}]} + }] +} +EOF + cat > "$ENDPOINTS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "subsets": [{"addresses": [{"ip": "10.0.0.1", "targetRef": {"name": "pod-1"}}], "ports": [{"port": 8080}]}] + }] +} +EOF + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_backend_service'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Backend: Port 9090 not found in service my-svc" +} + +@test "networking/ingress_backend_service: error when backend service not found in namespace" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "host": "app.example.com", + "http": { + "paths": [{ + "path": "/", + "backend": {"service": {"name": "missing-svc", "port": {"number": 80}}} + }] + } + }] + } + }] +} +EOF + echo '{"items":[]}' > "$SERVICES_FILE" + echo '{"items":[]}' > "$ENDPOINTS_FILE" + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_backend_service'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Service 'missing-svc' not found in namespace" +} + +@test "networking/ingress_backend_service: warns when no path rules defined" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "host": "app.example.com" + }] + } + }] +} +EOF + echo '{"items":[]}' > "$SERVICES_FILE" + echo '{"items":[]}' > "$ENDPOINTS_FILE" + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_backend_service'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "No path rules defined" +} + +@test "networking/ingress_backend_service: updates check result to failed on issues" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "host": "app.example.com", + "http": { + "paths": [{ + "path": "/", + "backend": {"service": {"name": "missing-svc", "port": {"number": 80}}} + }] + } + }] + } + }] +} +EOF + echo '{"items":[]}' > "$SERVICES_FILE" + echo '{"items":[]}' > "$ENDPOINTS_FILE" + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_backend_service'" + + [ "$status" -eq 0 ] + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} + +# ============================================================================= +# Edge Cases +# ============================================================================= +@test "networking/ingress_backend_service: skips when no ingresses" { + echo '{"items":[]}' > "$INGRESSES_FILE" + echo '{"items":[]}' > "$SERVICES_FILE" + echo '{"items":[]}' > "$ENDPOINTS_FILE" + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_backend_service'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +@test "networking/ingress_backend_service: shows endpoint details with pod name and IP" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "host": "app.example.com", + "http": { + "paths": [{ + "path": "/", + "backend": {"service": {"name": "my-svc", "port": {"number": 80}}} + }] + } + }] + } + }] +} +EOF + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"selector": {"app": "test"}, "ports": [{"port": 80, "targetPort": 8080}]} + }] +} +EOF + cat > "$ENDPOINTS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "subsets": [{ + "addresses": [ + {"ip": "10.0.0.1", "targetRef": {"name": "pod-1"}}, + {"ip": "10.0.0.2", "targetRef": {"name": "pod-2"}} + ], + "ports": [{"port": 8080}] + }] + }] +} +EOF + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_backend_service'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "pod-1 -> 10.0.0.1:8080" + assert_contains "$stripped" "pod-2 -> 10.0.0.2:8080" +} diff --git a/k8s/diagnose/tests/networking/ingress_class_validation.bats b/k8s/diagnose/tests/networking/ingress_class_validation.bats new file mode 100644 index 00000000..18aa2920 --- /dev/null +++ b/k8s/diagnose/tests/networking/ingress_class_validation.bats @@ -0,0 +1,213 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/networking/ingress_class_validation +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export SCOPE_LABEL_SELECTOR="scope_id=123" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export INGRESSES_FILE="$(mktemp)" + export INGRESSCLASSES_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$INGRESSES_FILE" + rm -f "$INGRESSCLASSES_FILE" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "networking/ingress_class_validation: success with valid ingressClassName" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {"ingressClassName": "alb"} + }] +} +EOF + cat > "$INGRESSCLASSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "alb"} + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_class_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "IngressClass 'alb' is valid" +} + +@test "networking/ingress_class_validation: success with default class" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {} + }] +} +EOF + cat > "$INGRESSCLASSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "nginx", + "annotations": { + "ingressclass.kubernetes.io/is-default-class": "true" + } + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_class_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Using default IngressClass" + assert_contains "$output" "nginx" +} + +@test "networking/ingress_class_validation: handles deprecated annotation" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "my-ingress", + "annotations": { + "kubernetes.io/ingress.class": "alb" + } + }, + "spec": {} + }] +} +EOF + cat > "$INGRESSCLASSES_FILE" << 'EOF' +{ + "items": [{"metadata": {"name": "alb"}}] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_class_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "deprecated annotation" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "networking/ingress_class_validation: fails when class not found" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {"ingressClassName": "nonexistent"} + }] +} +EOF + cat > "$INGRESSCLASSES_FILE" << 'EOF' +{ + "items": [{"metadata": {"name": "alb"}}] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_class_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "IngressClass 'nonexistent' not found" +} + +@test "networking/ingress_class_validation: shows available classes on failure" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {"ingressClassName": "wrong"} + }] +} +EOF + cat > "$INGRESSCLASSES_FILE" << 'EOF' +{ + "items": [ + {"metadata": {"name": "alb"}}, + {"metadata": {"name": "nginx"}} + ] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_class_validation'" + + assert_contains "$output" "Available classes:" + assert_contains "$output" "alb" + assert_contains "$output" "nginx" +} + +@test "networking/ingress_class_validation: fails when no class and no default" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {} + }] +} +EOF + cat > "$INGRESSCLASSES_FILE" << 'EOF' +{ + "items": [{"metadata": {"name": "alb"}}] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_class_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "No IngressClass specified" + assert_contains "$output" "no default found" +} + +# ============================================================================= +# Skip Tests +# ============================================================================= +@test "networking/ingress_class_validation: skips when no ingresses" { + echo '{"items":[]}' > "$INGRESSES_FILE" + echo '{"items":[]}' > "$INGRESSCLASSES_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_class_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +# ============================================================================= +# Status Update Tests +# ============================================================================= +@test "networking/ingress_class_validation: updates status to failed on invalid class" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {"ingressClassName": "invalid"} + }] +} +EOF + echo '{"items":[]}' > "$INGRESSCLASSES_FILE" + + source "$BATS_TEST_DIRNAME/../../networking/ingress_class_validation" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} diff --git a/k8s/diagnose/tests/networking/ingress_controller_sync.bats b/k8s/diagnose/tests/networking/ingress_controller_sync.bats new file mode 100644 index 00000000..499d01c7 --- /dev/null +++ b/k8s/diagnose/tests/networking/ingress_controller_sync.bats @@ -0,0 +1,345 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/networking/ingress_controller_sync +# ============================================================================= + +strip_ansi() { + echo "$1" | sed 's/\x1b\[[0-9;]*m//g' +} + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export SCOPE_LABEL_SELECTOR="scope_id=123" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + export SCRIPT_LOG_FILE="$(mktemp)" + export INGRESSES_FILE="$(mktemp)" + export EVENTS_FILE="$(mktemp)" + export ALB_CONTROLLER_PODS_FILE="$(mktemp)" + export ALB_CONTROLLER_LOGS_DIR="$(mktemp -d)" + export ALB_CONTROLLER_NAMESPACE="kube-system" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$INGRESSES_FILE" + rm -f "$EVENTS_FILE" + rm -f "$ALB_CONTROLLER_PODS_FILE" + rm -rf "$ALB_CONTROLLER_LOGS_DIR" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "networking/ingress_controller_sync: success with SuccessfullyReconciled event and ALB address" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{"host": "app.example.com"}] + }, + "status": { + "loadBalancer": { + "ingress": [{"hostname": "my-alb.us-east-1.elb.amazonaws.com"}] + } + } + }] +} +EOF + cat > "$EVENTS_FILE" << 'EOF' +{ + "items": [{ + "involvedObject": {"name": "my-ingress", "kind": "Ingress"}, + "type": "Normal", + "reason": "SuccessfullyReconciled", + "message": "Successfully reconciled", + "lastTimestamp": "2024-01-01T00:00:00Z" + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "successfully built model for my-ingress" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_controller_sync'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Successfully reconciled at 2024-01-01T00:00:00Z" + assert_contains "$stripped" "ALB address assigned: my-alb.us-east-1.elb.amazonaws.com" +} + +@test "networking/ingress_controller_sync: updates check result to success" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {"rules": [{"host": "app.example.com"}]}, + "status": {"loadBalancer": {"ingress": [{"hostname": "my-alb.us-east-1.elb.amazonaws.com"}]}} + }] +} +EOF + cat > "$EVENTS_FILE" << 'EOF' +{ + "items": [{ + "involvedObject": {"name": "my-ingress", "kind": "Ingress"}, + "type": "Normal", + "reason": "SuccessfullyReconciled", + "message": "Successfully reconciled", + "lastTimestamp": "2024-01-01T00:00:00Z" + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "successfully built model for my-ingress" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + + source "$BATS_TEST_DIRNAME/../../networking/ingress_controller_sync" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "success" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "networking/ingress_controller_sync: warns when no ALB controller pods found" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {"rules": [{"host": "app.example.com"}]}, + "status": {"loadBalancer": {"ingress": [{"hostname": "my-alb.us-east-1.elb.amazonaws.com"}]}} + }] +} +EOF + cat > "$EVENTS_FILE" << 'EOF' +{ + "items": [{ + "involvedObject": {"name": "my-ingress", "kind": "Ingress"}, + "type": "Normal", + "reason": "SuccessfullyReconciled", + "message": "Successfully reconciled", + "lastTimestamp": "2024-01-01T00:00:00Z" + }] +} +EOF + echo '{"items": []}' > "$ALB_CONTROLLER_PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_controller_sync'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "ALB controller pods not found in namespace kube-system" +} + +@test "networking/ingress_controller_sync: reports error events" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {"rules": [{"host": "app.example.com"}]}, + "status": {"loadBalancer": {"ingress": [{"hostname": "my-alb.us-east-1.elb.amazonaws.com"}]}} + }] +} +EOF + cat > "$EVENTS_FILE" << 'EOF' +{ + "items": [{ + "involvedObject": {"name": "my-ingress", "kind": "Ingress"}, + "type": "Warning", + "reason": "FailedDeployModel", + "message": "Failed to deploy model", + "lastTimestamp": "2024-01-01T00:00:00Z" + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "normal log" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_controller_sync'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Found error/warning events:" +} + +@test "networking/ingress_controller_sync: warns when no events found for ingress" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {"rules": [{"host": "app.example.com"}]}, + "status": {"loadBalancer": {"ingress": [{"hostname": "my-alb.us-east-1.elb.amazonaws.com"}]}} + }] +} +EOF + echo '{"items":[]}' > "$EVENTS_FILE" + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "normal log" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_controller_sync'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "No events found for this ingress" +} + +@test "networking/ingress_controller_sync: error when ALB address not assigned" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {"rules": [{"host": "app.example.com"}]}, + "status": {} + }] +} +EOF + cat > "$EVENTS_FILE" << 'EOF' +{ + "items": [{ + "involvedObject": {"name": "my-ingress", "kind": "Ingress"}, + "type": "Normal", + "reason": "SuccessfullyReconciled", + "message": "Successfully reconciled", + "lastTimestamp": "2024-01-01T00:00:00Z" + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "normal log" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_controller_sync'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "ALB address not assigned yet (sync may be in progress or failing)" +} + +@test "networking/ingress_controller_sync: detects errors in controller logs" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {"rules": [{"host": "app.example.com"}]}, + "status": {"loadBalancer": {"ingress": [{"hostname": "my-alb.us-east-1.elb.amazonaws.com"}]}} + }] +} +EOF + cat > "$EVENTS_FILE" << 'EOF' +{ + "items": [{ + "involvedObject": {"name": "my-ingress", "kind": "Ingress"}, + "type": "Normal", + "reason": "SuccessfullyReconciled", + "message": "Successfully reconciled", + "lastTimestamp": "2024-01-01T00:00:00Z" + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo 'level=error msg="failed to reconcile my-ingress"' > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_controller_sync'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Found errors in ALB controller logs" +} + +@test "networking/ingress_controller_sync: updates check result to failed on issues" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {"rules": [{"host": "app.example.com"}]}, + "status": {} + }] +} +EOF + cat > "$EVENTS_FILE" << 'EOF' +{ + "items": [{ + "involvedObject": {"name": "my-ingress", "kind": "Ingress"}, + "type": "Normal", + "reason": "SuccessfullyReconciled", + "message": "Successfully reconciled", + "lastTimestamp": "2024-01-01T00:00:00Z" + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "controller-pod"}}]} +EOF + echo "normal log" > "$ALB_CONTROLLER_LOGS_DIR/controller-pod.log" + + source "$BATS_TEST_DIRNAME/../../networking/ingress_controller_sync" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} + +# ============================================================================= +# Edge Cases +# ============================================================================= +@test "networking/ingress_controller_sync: skips when no ingresses" { + echo '{"items":[]}' > "$INGRESSES_FILE" + echo '{"items":[]}' > "$EVENTS_FILE" + echo '{"items":[]}' > "$ALB_CONTROLLER_PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_controller_sync'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +@test "networking/ingress_controller_sync: shows controller pod names" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {"rules": [{"host": "app.example.com"}]}, + "status": {"loadBalancer": {"ingress": [{"hostname": "my-alb.us-east-1.elb.amazonaws.com"}]}} + }] +} +EOF + cat > "$EVENTS_FILE" << 'EOF' +{ + "items": [{ + "involvedObject": {"name": "my-ingress", "kind": "Ingress"}, + "type": "Normal", + "reason": "SuccessfullyReconciled", + "message": "Successfully reconciled", + "lastTimestamp": "2024-01-01T00:00:00Z" + }] +} +EOF + cat > "$ALB_CONTROLLER_PODS_FILE" << 'EOF' +{"items": [{"metadata": {"name": "aws-load-balancer-controller-abc123"}}]} +EOF + echo "successfully built model for my-ingress" > "$ALB_CONTROLLER_LOGS_DIR/aws-load-balancer-controller-abc123.log" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_controller_sync'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Found ALB controller pod(s): aws-load-balancer-controller-abc123" +} diff --git a/k8s/diagnose/tests/networking/ingress_existence.bats b/k8s/diagnose/tests/networking/ingress_existence.bats new file mode 100644 index 00000000..0dc51e5f --- /dev/null +++ b/k8s/diagnose/tests/networking/ingress_existence.bats @@ -0,0 +1,120 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/networking/ingress_existence +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export SCOPE_LABEL_SELECTOR="scope_id=123" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export INGRESSES_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$INGRESSES_FILE" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "networking/ingress_existence: success when ingresses found" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{"host": "api.example.com"}] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_existence'" + + [ "$status" -eq 0 ] + assert_contains "$output" "ingress(es)" + assert_contains "$output" "my-ingress" +} + +@test "networking/ingress_existence: shows hosts for each ingress" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [ + {"host": "api.example.com"}, + {"host": "www.example.com"} + ] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_existence'" + + assert_contains "$output" "api.example.com" + assert_contains "$output" "www.example.com" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "networking/ingress_existence: fails when no ingresses" { + echo '{"items":[]}' > "$INGRESSES_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_existence'" + + [ "$status" -eq 1 ] + assert_contains "$output" "No ingresses found" +} + +@test "networking/ingress_existence: shows action when no ingresses" { + echo '{"items":[]}' > "$INGRESSES_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_existence'" + + assert_contains "$output" "🔧" + assert_contains "$output" "Create ingress" +} + +@test "networking/ingress_existence: updates check result to failed" { + echo '{"items":[]}' > "$INGRESSES_FILE" + + source "$BATS_TEST_DIRNAME/../../networking/ingress_existence" || true + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} + +# ============================================================================= +# Multiple Ingresses Tests +# ============================================================================= +@test "networking/ingress_existence: handles multiple ingresses" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [ + {"metadata": {"name": "ing-1"}, "spec": {"rules": [{"host": "a.com"}]}}, + {"metadata": {"name": "ing-2"}, "spec": {"rules": [{"host": "b.com"}]}} + ] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_existence'" + + [ "$status" -eq 0 ] + assert_contains "$output" "ingress(es)" + assert_contains "$output" "ing-1" + assert_contains "$output" "ing-2" +} diff --git a/k8s/diagnose/tests/networking/ingress_host_rules.bats b/k8s/diagnose/tests/networking/ingress_host_rules.bats new file mode 100644 index 00000000..1ad0cea9 --- /dev/null +++ b/k8s/diagnose/tests/networking/ingress_host_rules.bats @@ -0,0 +1,231 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/networking/ingress_host_rules +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export SCOPE_LABEL_SELECTOR="scope_id=123" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export INGRESSES_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$INGRESSES_FILE" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "networking/ingress_host_rules: success with valid host and path" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "host": "api.example.com", + "http": { + "paths": [{ + "path": "/", + "pathType": "Prefix", + "backend": {"service": {"name": "my-svc", "port": {"number": 80}}} + }] + } + }] + }, + "status": { + "loadBalancer": {"ingress": [{"hostname": "lb.example.com"}]} + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_host_rules'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Host: api.example.com" + assert_contains "$output" "Path: /" +} + +@test "networking/ingress_host_rules: shows ingress address" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "host": "api.example.com", + "http": {"paths": [{"path": "/", "pathType": "Prefix", "backend": {"service": {"name": "svc", "port": {"number": 80}}}}]} + }] + }, + "status": { + "loadBalancer": {"ingress": [{"ip": "1.2.3.4"}]} + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_host_rules'" + + assert_contains "$output" "Ingress address: 1.2.3.4" +} + +# ============================================================================= +# Warning Tests +# ============================================================================= +@test "networking/ingress_host_rules: warns on catch-all host" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "http": { + "paths": [{"path": "/", "pathType": "Prefix", "backend": {"service": {"name": "svc", "port": {"number": 80}}}}] + } + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_host_rules'" + + [ "$status" -eq 0 ] + assert_contains "$output" "catch-all" +} + +@test "networking/ingress_host_rules: warns when address not assigned" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "host": "api.example.com", + "http": {"paths": [{"path": "/", "pathType": "Prefix", "backend": {"service": {"name": "svc", "port": {"number": 80}}}}]} + }] + }, + "status": {} + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_host_rules'" + + assert_contains "$output" "not yet assigned" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "networking/ingress_host_rules: fails when no rules and no default backend" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": {"rules": []} + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_host_rules'" + + [ "$status" -eq 0 ] + assert_contains "$output" "No rules and no default backend" +} + +@test "networking/ingress_host_rules: fails on invalid pathType" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "host": "api.example.com", + "http": { + "paths": [{ + "path": "/api", + "pathType": "InvalidType", + "backend": {"service": {"name": "svc", "port": {"number": 80}}} + }] + } + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_host_rules'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Invalid pathType" +} + +@test "networking/ingress_host_rules: fails when no paths defined" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{ + "host": "api.example.com", + "http": {"paths": []} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_host_rules'" + + [ "$status" -eq 0 ] + assert_contains "$output" "No paths defined" +} + +# ============================================================================= +# Default Backend Tests +# ============================================================================= +@test "networking/ingress_host_rules: success with default backend only" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "defaultBackend": {"service": {"name": "default-svc", "port": {"number": 80}}}, + "rules": [] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_host_rules'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Catch-all rule" + assert_contains "$output" "default-svc" +} + +# ============================================================================= +# Skip Tests +# ============================================================================= +@test "networking/ingress_host_rules: skips when no ingresses" { + echo '{"items":[]}' > "$INGRESSES_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_host_rules'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} diff --git a/k8s/diagnose/tests/networking/ingress_tls_configuration.bats b/k8s/diagnose/tests/networking/ingress_tls_configuration.bats new file mode 100644 index 00000000..e2064c9a --- /dev/null +++ b/k8s/diagnose/tests/networking/ingress_tls_configuration.bats @@ -0,0 +1,253 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/networking/ingress_tls_configuration +# ============================================================================= + +strip_ansi() { + echo "$1" | sed 's/\x1b\[[0-9;]*m//g' +} + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export SCOPE_LABEL_SELECTOR="scope_id=123" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + export SCRIPT_LOG_FILE="$(mktemp)" + export INGRESSES_FILE="$(mktemp)" + export SECRETS_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$INGRESSES_FILE" + rm -f "$SECRETS_FILE" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "networking/ingress_tls_configuration: success when TLS secret exists with correct type" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "tls": [{"secretName": "my-tls-secret", "hosts": ["app.example.com"]}], + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + cat > "$SECRETS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-tls-secret", "annotations": {"tls.crt": "true", "tls.key": "true"}}, + "type": "kubernetes.io/tls" + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_tls_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "TLS Secret: my-tls-secret (valid for hosts: app.example.com)" + assert_contains "$stripped" "TLS configuration valid for all" +} + +@test "networking/ingress_tls_configuration: updates check result to success" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "tls": [{"secretName": "my-tls-secret", "hosts": ["app.example.com"]}], + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + cat > "$SECRETS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-tls-secret", "annotations": {"tls.crt": "true", "tls.key": "true"}}, + "type": "kubernetes.io/tls" + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_tls_configuration'" + + [ "$status" -eq 0 ] + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "success" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "networking/ingress_tls_configuration: info when no TLS hosts configured" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + echo '{"items":[]}' > "$SECRETS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_tls_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "No TLS configuration (HTTP only)" +} + +@test "networking/ingress_tls_configuration: error when TLS secret not found" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "tls": [{"secretName": "missing-secret", "hosts": ["app.example.com"]}], + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + echo '{"items":[]}' > "$SECRETS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_tls_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "TLS Secret: 'missing-secret' not found in namespace" +} + +@test "networking/ingress_tls_configuration: error when TLS secret has wrong type" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "tls": [{"secretName": "my-tls-secret", "hosts": ["app.example.com"]}], + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + cat > "$SECRETS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-tls-secret", "annotations": {}}, + "type": "Opaque" + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_tls_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "TLS Secret: my-tls-secret has wrong type 'Opaque' (expected kubernetes.io/tls)" +} + +@test "networking/ingress_tls_configuration: updates check result to failed on issues" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "tls": [{"secretName": "missing-secret", "hosts": ["app.example.com"]}], + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + echo '{"items":[]}' > "$SECRETS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_tls_configuration'" + + [ "$status" -eq 0 ] + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} + +@test "networking/ingress_tls_configuration: shows action when secret not found" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "tls": [{"secretName": "missing-secret", "hosts": ["app.example.com"]}], + "rules": [{"host": "app.example.com"}] + } + }] +} +EOF + echo '{"items":[]}' > "$SECRETS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_tls_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Create TLS secret or update ingress configuration" +} + +# ============================================================================= +# Edge Cases +# ============================================================================= +@test "networking/ingress_tls_configuration: skips when no ingresses" { + echo '{"items":[]}' > "$INGRESSES_FILE" + echo '{"items":[]}' > "$SECRETS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_tls_configuration'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +@test "networking/ingress_tls_configuration: handles multiple TLS entries" { + cat > "$INGRESSES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-ingress"}, + "spec": { + "tls": [ + {"secretName": "secret-1", "hosts": ["app1.example.com"]}, + {"secretName": "secret-2", "hosts": ["app2.example.com"]} + ], + "rules": [ + {"host": "app1.example.com"}, + {"host": "app2.example.com"} + ] + } + }] +} +EOF + cat > "$SECRETS_FILE" << 'EOF' +{ + "items": [ + {"metadata": {"name": "secret-1", "annotations": {"tls.crt": "true", "tls.key": "true"}}, "type": "kubernetes.io/tls"}, + {"metadata": {"name": "secret-2", "annotations": {"tls.crt": "true", "tls.key": "true"}}, "type": "kubernetes.io/tls"} + ] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../networking/ingress_tls_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Checking TLS configuration for ingress: my-ingress" + assert_contains "$stripped" "TLS configuration valid for all" +} diff --git a/k8s/diagnose/tests/notify_check_running.bats b/k8s/diagnose/tests/notify_check_running.bats new file mode 100644 index 00000000..f25866d7 --- /dev/null +++ b/k8s/diagnose/tests/notify_check_running.bats @@ -0,0 +1,52 @@ +#!/usr/bin/env bats +# Unit tests for diagnose/notify_check_running + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../utils/diagnose_utils" + + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" +} + +@test "notify_check_running: sets status to running" { + source "$BATS_TEST_DIRNAME/../notify_check_running" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "running" +} + +@test "notify_check_running: sets empty evidence" { + source "$BATS_TEST_DIRNAME/../notify_check_running" + + result=$(jq -c '.evidence' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "{}" +} + +@test "notify_check_running: sets start_at timestamp" { + source "$BATS_TEST_DIRNAME/../notify_check_running" + + start_at=$(jq -r '.start_at' "$SCRIPT_OUTPUT_FILE") + assert_not_empty "$start_at" + # Should be ISO 8601 format with T and Z + assert_contains "$start_at" "T" + assert_contains "$start_at" "Z" +} + +@test "notify_check_running: fails when SCRIPT_OUTPUT_FILE missing" { + rm -f "$SCRIPT_OUTPUT_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../notify_check_running'" + + [ "$status" -ne 0 ] + assert_contains "$output" "File not found" +} diff --git a/k8s/diagnose/tests/notify_diagnose_results.bats b/k8s/diagnose/tests/notify_diagnose_results.bats new file mode 100644 index 00000000..e8da3e11 --- /dev/null +++ b/k8s/diagnose/tests/notify_diagnose_results.bats @@ -0,0 +1,85 @@ +#!/usr/bin/env bats +# Unit tests for diagnose/notify_diagnose_results + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../utils/diagnose_utils" + + export NP_OUTPUT_DIR="$(mktemp -d)" + export NP_ACTION_CONTEXT='{ + "notification": { + "id": "action-123", + "service": {"id": "service-456"} + } + }' + + # Mock np CLI + np() { + echo "np called with: $*" >&2 + return 0 + } + export -f np +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + unset NP_OUTPUT_DIR + unset NP_ACTION_CONTEXT + unset -f np +} + +@test "notify_diagnose_results: fails when no JSON files exist" { + run bash -c "source '$BATS_TEST_DIRNAME/../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../notify_diagnose_results'" + + [ "$status" -eq 1 ] + assert_contains "$output" "No JSON result files found" +} + +@test "notify_diagnose_results: succeeds when JSON files exist" { + # Create a test JSON result file + echo '{"category":"scope","status":"success","evidence":{}}' > "$NP_OUTPUT_DIR/test_check.json" + + run bash -c "source '$BATS_TEST_DIRNAME/../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../notify_diagnose_results'" + + [ "$status" -eq 0 ] +} + +@test "notify_diagnose_results: calls np service action patch" { + # Create a test JSON result file + echo '{"category":"scope","status":"success","evidence":{}}' > "$NP_OUTPUT_DIR/test_check.json" + + # Capture np calls + np() { + echo "NP_CALLED: $*" + return 0 + } + export -f np + + run bash -c "source '$BATS_TEST_DIRNAME/../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../notify_diagnose_results'" + + [ "$status" -eq 0 ] + assert_contains "$output" "service action patch" +} + +@test "notify_diagnose_results: excludes files in data directory" { + # Create data directory with JSON file that should be excluded + mkdir -p "$NP_OUTPUT_DIR/data" + echo '{"should":"be excluded"}' > "$NP_OUTPUT_DIR/data/pods.json" + + # No other JSON files - should fail + run bash -c "source '$BATS_TEST_DIRNAME/../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../notify_diagnose_results'" + + [ "$status" -eq 1 ] + assert_contains "$output" "No JSON result files found" +} + +@test "notify_diagnose_results: processes multiple check results" { + # Create multiple check result files + echo '{"category":"scope","status":"success","evidence":{}}' > "$NP_OUTPUT_DIR/check1.json" + echo '{"category":"service","status":"failed","evidence":{}}' > "$NP_OUTPUT_DIR/check2.json" + + run bash -c "source '$BATS_TEST_DIRNAME/../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../notify_diagnose_results'" + + [ "$status" -eq 0 ] +} diff --git a/k8s/diagnose/tests/scope/container_crash_detection.bats b/k8s/diagnose/tests/scope/container_crash_detection.bats new file mode 100644 index 00000000..c0a17c44 --- /dev/null +++ b/k8s/diagnose/tests/scope/container_crash_detection.bats @@ -0,0 +1,270 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/scope/container_crash_detection +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export LABEL_SELECTOR="app=test" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export PODS_FILE="$(mktemp)" + + # Mock kubectl logs + kubectl() { + echo "Application startup error" + echo "Exception: NullPointerException" + } + export -f kubectl +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$PODS_FILE" + unset -f kubectl +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "scope/container_crash_detection: success when no crashes" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "containerStatuses": [{ + "name": "app", + "ready": true, + "restartCount": 0, + "state": {"running": {}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_crash_detection'" + + [ "$status" -eq 0 ] + assert_contains "$output" "running without crashes" +} + +# ============================================================================= +# CrashLoopBackOff Tests +# ============================================================================= +@test "scope/container_crash_detection: detects CrashLoopBackOff" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "containerStatuses": [{ + "name": "app", + "ready": false, + "restartCount": 5, + "state": {"waiting": {"reason": "CrashLoopBackOff"}}, + "lastState": {"terminated": {"exitCode": 1, "reason": "Error"}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_crash_detection'" + + [ "$status" -eq 0 ] + assert_contains "$output" "CrashLoopBackOff" + assert_contains "$output" "pod-1" +} + +@test "scope/container_crash_detection: shows exit code details" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "containerStatuses": [{ + "name": "app", + "restartCount": 3, + "state": {"waiting": {"reason": "CrashLoopBackOff"}}, + "lastState": {"terminated": {"exitCode": 137, "reason": "OOMKilled"}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_crash_detection'" + + assert_contains "$output" "Exit Code: 137" + assert_contains "$output" "OOMKilled" + assert_contains "$output" "out of memory" +} + +@test "scope/container_crash_detection: explains common exit codes" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "containerStatuses": [{ + "name": "app", + "restartCount": 2, + "state": {"waiting": {"reason": "CrashLoopBackOff"}}, + "lastState": {"terminated": {"exitCode": 143, "reason": "SIGTERM"}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_crash_detection'" + + assert_contains "$output" "143" + assert_contains "$output" "graceful termination" +} + +# ============================================================================= +# Terminated Container Tests +# ============================================================================= +@test "scope/container_crash_detection: detects terminated containers" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "containerStatuses": [{ + "name": "app", + "restartCount": 0, + "state": {"terminated": {"exitCode": 1, "reason": "Error"}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_crash_detection'" + + assert_contains "$output" "Terminated container" +} + +@test "scope/container_crash_detection: handles clean exit (exit 0)" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "job-pod"}, + "status": { + "containerStatuses": [{ + "name": "job", + "restartCount": 0, + "state": {"terminated": {"exitCode": 0, "reason": "Completed"}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_crash_detection'" + + assert_contains "$output" "Exit 0" + assert_contains "$output" "Clean exit" +} + +# ============================================================================= +# High Restart Count Tests +# ============================================================================= +@test "scope/container_crash_detection: warns on high restart count" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "containerStatuses": [{ + "name": "app", + "ready": true, + "restartCount": 5, + "state": {"running": {}}, + "lastState": {"terminated": {"exitCode": 1, "reason": "Error"}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_crash_detection'" + + assert_contains "$output" "high restart count" + assert_contains "$output" "Restarts: 5" +} + +@test "scope/container_crash_detection: shows action for intermittent issues" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "containerStatuses": [{ + "name": "app", + "ready": true, + "restartCount": 10, + "state": {"running": {}}, + "lastState": {"terminated": {"exitCode": 137, "reason": "OOMKilled"}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_crash_detection'" + + assert_contains "$output" "🔧" + assert_contains "$output" "intermittent" +} + +# ============================================================================= +# Skip Tests +# ============================================================================= +@test "scope/container_crash_detection: skips when no pods" { + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_crash_detection'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +# ============================================================================= +# Status Update Tests +# ============================================================================= +@test "scope/container_crash_detection: updates status to failed on crash" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "containerStatuses": [{ + "name": "app", + "restartCount": 3, + "state": {"waiting": {"reason": "CrashLoopBackOff"}}, + "lastState": {"terminated": {"exitCode": 1}} + }] + } + }] +} +EOF + + source "$BATS_TEST_DIRNAME/../../scope/container_crash_detection" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} diff --git a/k8s/diagnose/tests/scope/container_port_health.bats b/k8s/diagnose/tests/scope/container_port_health.bats new file mode 100644 index 00000000..fe60c920 --- /dev/null +++ b/k8s/diagnose/tests/scope/container_port_health.bats @@ -0,0 +1,505 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/scope/container_port_health +# ============================================================================= + +strip_ansi() { + echo "$1" | sed 's/\x1b\[[0-9;]*m//g' +} + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export LABEL_SELECTOR="app=test" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export PODS_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$PODS_FILE" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "scope/container_port_health: success when ports are listening" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c " + timeout() { shift; \"\$@\"; } + export -f timeout + nc() { return 0; } + export -f nc + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_port_health' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Checking pod pod-1:" + assert_contains "$stripped" "Listening" + assert_contains "$stripped" "Port connectivity verified on 1 container(s)" +} + +@test "scope/container_port_health: success with multiple ports listening" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080}, {"containerPort": 9090}] + }] + } + }] +} +EOF + + run bash -c " + timeout() { shift; \"\$@\"; } + export -f timeout + nc() { return 0; } + export -f nc + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_port_health' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Port 8080:" + assert_contains "$stripped" "Port 9090:" + assert_contains "$stripped" "Port connectivity verified on 1 container(s)" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "scope/container_port_health: failed when port not listening" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c " + timeout() { shift; \"\$@\"; } + export -f timeout + nc() { return 1; } + export -f nc + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_port_health' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Port 8080:" + assert_contains "$stripped" "Declared but not listening or unreachable" + assert_contains "$stripped" "Check application configuration and ensure it listens on port 8080" +} + +@test "scope/container_port_health: updates status to failed when port not listening" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c " + timeout() { shift; \"\$@\"; } + export -f timeout + nc() { return 1; } + export -f nc + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_port_health' + " + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" + + tested=$(jq -r '.evidence.tested' "$SCRIPT_OUTPUT_FILE") + assert_equal "$tested" "1" +} + +# ============================================================================= +# Skip Tests +# ============================================================================= +@test "scope/container_port_health: skips when no pods" { + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_port_health'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +@test "scope/container_port_health: skips pod not running" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Pending", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"waiting": {"reason": "ContainerCreating"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_port_health'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Pod pod-1: Not running (phase: Pending), skipping port checks" +} + +@test "scope/container_port_health: skips container in CrashLoopBackOff" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"waiting": {"reason": "CrashLoopBackOff", "message": "back-off 5m0s restarting"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_port_health'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Cannot test ports - container is in error state: CrashLoopBackOff" + assert_contains "$stripped" "Message: back-off 5m0s restarting" + assert_contains "$stripped" "Fix container startup issues (check container_crash_detection results)" +} + +@test "scope/container_port_health: skips container terminated" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"terminated": {"exitCode": 1, "reason": "Error"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_port_health'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Cannot test ports - container terminated (Exit: 1, Reason: Error)" + assert_contains "$stripped" "Fix container termination (check container_crash_detection results)" +} + +@test "scope/container_port_health: skips container in ContainerCreating" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"waiting": {"reason": "ContainerCreating"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_port_health'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Container is starting (ContainerCreating) - skipping port checks" +} + +# ============================================================================= +# Edge Cases +# ============================================================================= +@test "scope/container_port_health: warns when running but not ready" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c " + timeout() { shift; \"\$@\"; } + export -f timeout + nc() { return 0; } + export -f nc + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_port_health' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Container is running but not ready - port connectivity may fail" +} + +@test "scope/container_port_health: no ports declared" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app" + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_port_health'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Container 'app': No ports declared" +} + +@test "scope/container_port_health: all containers skipped sets status skipped" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"waiting": {"reason": "CrashLoopBackOff"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + source "$BATS_TEST_DIRNAME/../../scope/container_port_health" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "skipped" + + skipped=$(jq -r '.evidence.skipped' "$SCRIPT_OUTPUT_FILE") + assert_equal "$skipped" "1" +} + +@test "scope/container_port_health: pod with no IP skips port checks" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": null, + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/container_port_health'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Pod pod-1: No IP assigned, skipping port checks" +} + +@test "scope/container_port_health: updates status to success when ports healthy" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + timeout() { shift; "$@"; } + export -f timeout + nc() { return 0; } + export -f nc + + source "$BATS_TEST_DIRNAME/../../scope/container_port_health" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "success" + + tested=$(jq -r '.evidence.tested' "$SCRIPT_OUTPUT_FILE") + assert_equal "$tested" "1" + + unset -f nc timeout +} diff --git a/k8s/diagnose/tests/scope/health_probe_endpoints.bats b/k8s/diagnose/tests/scope/health_probe_endpoints.bats new file mode 100644 index 00000000..8a53364b --- /dev/null +++ b/k8s/diagnose/tests/scope/health_probe_endpoints.bats @@ -0,0 +1,721 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/scope/health_probe_endpoints +# ============================================================================= + +strip_ansi() { + echo "$1" | sed 's/\x1b\[[0-9;]*m//g' +} + +# Find bash 4+ (required for ${var,,} syntax used in the source script) +find_modern_bash() { + for candidate in /opt/homebrew/bin/bash /usr/local/bin/bash /usr/bin/bash /bin/bash; do + if [[ -x "$candidate" ]]; then + local ver + ver=$("$candidate" -c 'echo "${BASH_VERSINFO[0]}"' 2>/dev/null) || true + if [[ "$ver" -ge 4 ]] 2>/dev/null; then + echo "$candidate" + return 0 + fi + fi + done + echo "" +} + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export LABEL_SELECTOR="app=test" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export PODS_FILE="$(mktemp)" + + MODERN_BASH=$(find_modern_bash) + export MODERN_BASH +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$PODS_FILE" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "scope/health_probe_endpoints: success when readiness probe returns 200" { + [[ -n "$MODERN_BASH" ]] || skip "bash 4+ required for \${var,,} syntax" + + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "httpGet": {"path": "/health", "port": 8080, "scheme": "HTTP"} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run "$MODERN_BASH" -c " + curl() { echo '200'; return 0; } + export -f curl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Checking pod pod-1:" + assert_contains "$stripped" "Readiness Probe on HTTP://8080/health:" + assert_contains "$stripped" "HTTP 200" + assert_contains "$stripped" "Health probes verified on 1 container(s)" +} + +@test "scope/health_probe_endpoints: success with liveness and readiness probes" { + [[ -n "$MODERN_BASH" ]] || skip "bash 4+ required for \${var,,} syntax" + + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "httpGet": {"path": "/ready", "port": 8080, "scheme": "HTTP"} + }, + "livenessProbe": { + "httpGet": {"path": "/alive", "port": 8080, "scheme": "HTTP"} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run "$MODERN_BASH" -c " + curl() { echo '200'; return 0; } + export -f curl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Readiness Probe on HTTP://8080/ready:" + assert_contains "$stripped" "Liveness Probe on HTTP://8080/alive:" + assert_contains "$stripped" "Health probes verified on 1 container(s)" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "scope/health_probe_endpoints: failed when readiness probe returns 404" { + [[ -n "$MODERN_BASH" ]] || skip "bash 4+ required for \${var,,} syntax" + + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "httpGet": {"path": "/health", "port": 8080, "scheme": "HTTP"} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run "$MODERN_BASH" -c " + curl() { echo '404'; return 0; } + export -f curl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Readiness Probe on HTTP://8080/health:" + assert_contains "$stripped" "HTTP 404 - Health check endpoint not found" + assert_contains "$stripped" "Update probe path or implement the endpoint in application" +} + +@test "scope/health_probe_endpoints: updates status to failed on 404" { + [[ -n "$MODERN_BASH" ]] || skip "bash 4+ required for \${var,,} syntax" + + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "httpGet": {"path": "/health", "port": 8080, "scheme": "HTTP"} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run "$MODERN_BASH" -c " + curl() { echo '404'; return 0; } + export -f curl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints' + " + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} + +@test "scope/health_probe_endpoints: warning when probe returns 500" { + [[ -n "$MODERN_BASH" ]] || skip "bash 4+ required for \${var,,} syntax" + + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "httpGet": {"path": "/health", "port": 8080, "scheme": "HTTP"} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run "$MODERN_BASH" -c " + curl() { echo '500'; return 0; } + export -f curl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Readiness Probe on HTTP://8080/health:" + assert_contains "$stripped" "HTTP 500 - Application error" + assert_contains "$stripped" "Check application logs and fix internal errors or dependencies" +} + +@test "scope/health_probe_endpoints: updates status to warning on 500" { + [[ -n "$MODERN_BASH" ]] || skip "bash 4+ required for \${var,,} syntax" + + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "httpGet": {"path": "/health", "port": 8080, "scheme": "HTTP"} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run "$MODERN_BASH" -c " + curl() { echo '500'; return 0; } + export -f curl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints' + " + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "warning" +} + +# ============================================================================= +# Probe Type Tests +# ============================================================================= +@test "scope/health_probe_endpoints: tcp socket probe shows info message" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "tcpSocket": {"port": 8080} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Readiness Probe: TCP Socket on port 8080 (tested in port health check)" +} + +@test "scope/health_probe_endpoints: exec probe shows cannot test directly" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "exec": {"command": ["cat", "/tmp/healthy"]} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Readiness Probe: Exec [cat /tmp/healthy] (cannot test directly)" +} + +# ============================================================================= +# Warning Tests +# ============================================================================= +@test "scope/health_probe_endpoints: warns when no probes configured" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "No health probes configured (recommend adding readiness/liveness probes)" +} + +@test "scope/health_probe_endpoints: container not ready shows info" { + [[ -n "$MODERN_BASH" ]] || skip "bash 4+ required for \${var,,} syntax" + + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "httpGet": {"path": "/health", "port": 8080, "scheme": "HTTP"} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run "$MODERN_BASH" -c " + curl() { echo '200'; return 0; } + export -f curl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Container is running but not ready - probe checks may show why" +} + +# ============================================================================= +# Skip Tests +# ============================================================================= +@test "scope/health_probe_endpoints: skips when no pods" { + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +@test "scope/health_probe_endpoints: skips container not running (CrashLoopBackOff)" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"waiting": {"reason": "CrashLoopBackOff"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "httpGet": {"path": "/health", "port": 8080, "scheme": "HTTP"} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Cannot test probes - container is in error state: CrashLoopBackOff" + assert_contains "$stripped" "Fix container startup issues (check container_crash_detection results)" +} + +@test "scope/health_probe_endpoints: skips container terminated" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"terminated": {"exitCode": 1, "reason": "Error"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "httpGet": {"path": "/health", "port": 8080, "scheme": "HTTP"} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Cannot test probes - container terminated (Exit: 1, Reason: Error)" + assert_contains "$stripped" "Fix container termination (check container_crash_detection results)" +} + +@test "scope/health_probe_endpoints: all containers skipped sets status skipped" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"waiting": {"reason": "CrashLoopBackOff"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "httpGet": {"path": "/health", "port": 8080, "scheme": "HTTP"} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + source "$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "skipped" + + skipped=$(jq -r '.evidence.skipped' "$SCRIPT_OUTPUT_FILE") + assert_equal "$skipped" "1" +} + +# ============================================================================= +# Edge Cases +# ============================================================================= +@test "scope/health_probe_endpoints: pod with no IP skips probe checks" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": null, + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "httpGet": {"path": "/health", "port": 8080, "scheme": "HTTP"} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Pod pod-1: No IP assigned, skipping probe checks" +} + +@test "scope/health_probe_endpoints: pod not running skips probe checks" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Pending", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"waiting": {"reason": "ContainerCreating"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "httpGet": {"path": "/health", "port": 8080, "scheme": "HTTP"} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Pod pod-1: Not running (phase: Pending), skipping probe checks" +} + +@test "scope/health_probe_endpoints: updates status to success when probes healthy" { + [[ -n "$MODERN_BASH" ]] || skip "bash 4+ required for \${var,,} syntax" + + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "readinessProbe": { + "httpGet": {"path": "/health", "port": 8080, "scheme": "HTTP"} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run "$MODERN_BASH" -c " + curl() { echo '200'; return 0; } + export -f curl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints' + " + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "success" + + tested=$(jq -r '.evidence.tested' "$SCRIPT_OUTPUT_FILE") + assert_equal "$tested" "1" +} + +@test "scope/health_probe_endpoints: startup probe with httpGet returns 200" { + [[ -n "$MODERN_BASH" ]] || skip "bash 4+ required for \${var,,} syntax" + + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "podIP": "10.0.0.1", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{ + "name": "app", + "startupProbe": { + "httpGet": {"path": "/startup", "port": 8080, "scheme": "HTTP"} + }, + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run "$MODERN_BASH" -c " + curl() { echo '200'; return 0; } + export -f curl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/health_probe_endpoints' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Startup Probe on HTTP://8080/startup:" + assert_contains "$stripped" "HTTP 200" +} diff --git a/k8s/diagnose/tests/scope/image_pull_status.bats b/k8s/diagnose/tests/scope/image_pull_status.bats new file mode 100644 index 00000000..60c4719e --- /dev/null +++ b/k8s/diagnose/tests/scope/image_pull_status.bats @@ -0,0 +1,252 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/scope/image_pull_status - image pull verification +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + # Setup required environment + export NAMESPACE="test-ns" + export LABEL_SELECTOR="app=test" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export PODS_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$PODS_FILE" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "scope/image_pull_status: success when all images pulled" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/image_pull_status'" + + [ "$status" -eq 0 ] + assert_contains "$output" "images pulled successfully" +} + +@test "scope/image_pull_status: updates check result to success" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "containerStatuses": [{ + "name": "app", + "state": {"running": {}} + }] + } + }] +} +EOF + + source "$BATS_TEST_DIRNAME/../../scope/image_pull_status" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "success" +} + +# ============================================================================= +# Failure Tests - ImagePullBackOff +# ============================================================================= +@test "scope/image_pull_status: fails on ImagePullBackOff" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "spec": { + "containers": [{"name": "app", "image": "myregistry/myimage:v1"}] + }, + "status": { + "containerStatuses": [{ + "name": "app", + "state": { + "waiting": { + "reason": "ImagePullBackOff", + "message": "rpc error: code = Unknown desc = unauthorized" + } + } + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/image_pull_status'" + + [ "$status" -eq 0 ] + assert_contains "$output" "ImagePullBackOff" + assert_contains "$output" "pod-1" +} + +@test "scope/image_pull_status: shows image and error message" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "spec": { + "containers": [{"name": "app", "image": "myregistry/myimage:v1"}] + }, + "status": { + "containerStatuses": [{ + "name": "app", + "state": { + "waiting": { + "reason": "ImagePullBackOff", + "message": "unauthorized access" + } + } + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/image_pull_status'" + + assert_contains "$output" "Image: myregistry/myimage:v1" + assert_contains "$output" "Reason: unauthorized access" +} + +@test "scope/image_pull_status: shows action for image pull errors" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "spec": { + "containers": [{"name": "app", "image": "private/image:v1"}] + }, + "status": { + "containerStatuses": [{ + "name": "app", + "state": {"waiting": {"reason": "ErrImagePull", "message": "pull access denied"}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/image_pull_status'" + + assert_contains "$output" "🔧" + assert_contains "$output" "imagePullSecrets" +} + +# ============================================================================= +# Failure Tests - ErrImagePull +# ============================================================================= +@test "scope/image_pull_status: fails on ErrImagePull" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "spec": { + "containers": [{"name": "app", "image": "nonexistent/image:v1"}] + }, + "status": { + "containerStatuses": [{ + "name": "app", + "state": {"waiting": {"reason": "ErrImagePull", "message": "image not found"}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/image_pull_status'" + + [ "$status" -eq 0 ] + assert_contains "$output" "ErrImagePull" +} + +@test "scope/image_pull_status: updates check result to failed on error" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "spec": { + "containers": [{"name": "app", "image": "bad/image:v1"}] + }, + "status": { + "containerStatuses": [{ + "name": "app", + "state": {"waiting": {"reason": "ImagePullBackOff", "message": "error"}} + }] + } + }] +} +EOF + + source "$BATS_TEST_DIRNAME/../../scope/image_pull_status" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} + +# ============================================================================= +# Skip Tests +# ============================================================================= +@test "scope/image_pull_status: skips when no pods" { + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/image_pull_status'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +# ============================================================================= +# Multiple Containers Tests +# ============================================================================= +@test "scope/image_pull_status: detects multiple container failures" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "spec": { + "containers": [ + {"name": "app", "image": "app:v1"}, + {"name": "sidecar", "image": "sidecar:v1"} + ] + }, + "status": { + "containerStatuses": [ + {"name": "app", "state": {"waiting": {"reason": "ImagePullBackOff", "message": "error1"}}}, + {"name": "sidecar", "state": {"waiting": {"reason": "ErrImagePull", "message": "error2"}}} + ] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/image_pull_status'" + + assert_contains "$output" "app" + assert_contains "$output" "sidecar" +} diff --git a/k8s/diagnose/tests/scope/memory_limits_check.bats b/k8s/diagnose/tests/scope/memory_limits_check.bats new file mode 100644 index 00000000..4c481d06 --- /dev/null +++ b/k8s/diagnose/tests/scope/memory_limits_check.bats @@ -0,0 +1,224 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/scope/memory_limits_check +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export LABEL_SELECTOR="app=test" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export PODS_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$PODS_FILE" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "scope/memory_limits_check: success when no OOMKilled" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {}}, + "lastState": {} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/memory_limits_check'" + + [ "$status" -eq 0 ] + assert_contains "$output" "No OOMKilled" +} + +@test "scope/memory_limits_check: updates check result to success" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "containerStatuses": [{ + "name": "app", + "state": {"running": {}}, + "lastState": {} + }] + } + }] +} +EOF + + source "$BATS_TEST_DIRNAME/../../scope/memory_limits_check" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "success" +} + +# ============================================================================= +# Failure Tests - OOMKilled +# ============================================================================= +@test "scope/memory_limits_check: detects OOMKilled containers" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "spec": { + "containers": [{ + "name": "app", + "resources": { + "limits": {"memory": "256Mi"}, + "requests": {"memory": "128Mi"} + } + }] + }, + "status": { + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"waiting": {"reason": "CrashLoopBackOff"}}, + "lastState": {"terminated": {"reason": "OOMKilled", "exitCode": 137}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/memory_limits_check'" + + [ "$status" -eq 0 ] + assert_contains "$output" "OOMKilled" + assert_contains "$output" "pod-1" +} + +@test "scope/memory_limits_check: shows memory limit and request" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "spec": { + "containers": [{ + "name": "app", + "resources": { + "limits": {"memory": "512Mi"}, + "requests": {"memory": "256Mi"} + } + }] + }, + "status": { + "containerStatuses": [{ + "name": "app", + "lastState": {"terminated": {"reason": "OOMKilled"}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/memory_limits_check'" + + assert_contains "$output" "Memory Limit: 512Mi" + assert_contains "$output" "Memory Request: 256Mi" +} + +@test "scope/memory_limits_check: shows action for OOM" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "spec": { + "containers": [{"name": "app", "resources": {}}] + }, + "status": { + "containerStatuses": [{ + "name": "app", + "lastState": {"terminated": {"reason": "OOMKilled"}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/memory_limits_check'" + + assert_contains "$output" "🔧" + assert_contains "$output" "Increase memory limits" +} + +@test "scope/memory_limits_check: shows 'not set' when no limits" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "spec": { + "containers": [{"name": "app"}] + }, + "status": { + "containerStatuses": [{ + "name": "app", + "lastState": {"terminated": {"reason": "OOMKilled"}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/memory_limits_check'" + + assert_contains "$output" "not set" +} + +# ============================================================================= +# Skip Tests +# ============================================================================= +@test "scope/memory_limits_check: skips when no pods" { + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/memory_limits_check'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +# ============================================================================= +# Status Update Tests +# ============================================================================= +@test "scope/memory_limits_check: updates status to failed on OOM" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "spec": {"containers": [{"name": "app"}]}, + "status": { + "containerStatuses": [{ + "name": "app", + "lastState": {"terminated": {"reason": "OOMKilled"}} + }] + } + }] +} +EOF + + source "$BATS_TEST_DIRNAME/../../scope/memory_limits_check" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} diff --git a/k8s/diagnose/tests/scope/pod_existence.bats b/k8s/diagnose/tests/scope/pod_existence.bats new file mode 100644 index 00000000..ddba06f4 --- /dev/null +++ b/k8s/diagnose/tests/scope/pod_existence.bats @@ -0,0 +1,103 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/scope/pod_existence - pod existence verification +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + # Setup required environment + export NAMESPACE="test-ns" + export LABEL_SELECTOR="app=test" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + # Create pods file + export PODS_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$PODS_FILE" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "scope/pod_existence: success when pods found" { + echo '{"items":[{"metadata":{"name":"pod-1"}},{"metadata":{"name":"pod-2"}}]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/pod_existence'" + + [ "$status" -eq 0 ] + assert_contains "$output" "pod(s)" + assert_contains "$output" "pod-1" + assert_contains "$output" "pod-2" +} + +@test "scope/pod_existence: updates check result to success" { + echo '{"items":[{"metadata":{"name":"pod-1"}}]}' > "$PODS_FILE" + + source "$BATS_TEST_DIRNAME/../../scope/pod_existence" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "success" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "scope/pod_existence: fails when no pods found" { + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/pod_existence'" + + [ "$status" -eq 1 ] + assert_contains "$output" "No pods found" + assert_contains "$output" "$LABEL_SELECTOR" + assert_contains "$output" "$NAMESPACE" +} + +@test "scope/pod_existence: shows action when no pods" { + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/pod_existence'" + + assert_contains "$output" "🔧" + assert_contains "$output" "Check deployment status" +} + +@test "scope/pod_existence: updates check result to failed when no pods" { + echo '{"items":[]}' > "$PODS_FILE" + + source "$BATS_TEST_DIRNAME/../../scope/pod_existence" || true + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} + +# ============================================================================= +# Edge Cases +# ============================================================================= +@test "scope/pod_existence: handles single pod" { + echo '{"items":[{"metadata":{"name":"single-pod"}}]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/pod_existence'" + + [ "$status" -eq 0 ] + assert_contains "$output" "pod(s)" + assert_contains "$output" "single-pod" +} + +@test "scope/pod_existence: handles malformed JSON gracefully" { + echo 'not-valid-json' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/pod_existence'" + + [ "$status" -eq 1 ] + assert_contains "$output" "No pods found" +} diff --git a/k8s/diagnose/tests/scope/pod_readiness.bats b/k8s/diagnose/tests/scope/pod_readiness.bats new file mode 100644 index 00000000..01625e29 --- /dev/null +++ b/k8s/diagnose/tests/scope/pod_readiness.bats @@ -0,0 +1,230 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/scope/pod_readiness - pod readiness verification +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + # Setup required environment + export NAMESPACE="test-ns" + export LABEL_SELECTOR="app=test" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + # Create pods file + export PODS_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$PODS_FILE" +} + +# ============================================================================= +# Success Tests - All Pods Ready +# ============================================================================= +@test "scope/pod_readiness: success when all pods running and ready" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "conditions": [{"type": "Ready", "status": "True"}] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/pod_readiness'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Running and Ready" + assert_contains "$output" "All pods ready" +} + +@test "scope/pod_readiness: success with Succeeded pods (jobs)" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "job-pod"}, + "status": { + "phase": "Succeeded", + "conditions": [{"type": "Ready", "status": "False"}] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/pod_readiness'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Completed successfully" +} + +# ============================================================================= +# Warning Tests - Deployment In Progress +# ============================================================================= +@test "scope/pod_readiness: warning when pods terminating (rollout)" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "deletionTimestamp": "2024-01-01T00:00:00Z"}, + "status": { + "phase": "Running", + "conditions": [{"type": "Ready", "status": "True"}] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/pod_readiness'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Terminating" + assert_contains "$output" "rollout in progress" +} + +@test "scope/pod_readiness: warning when pods starting up (ContainerCreating)" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Pending", + "conditions": [{"type": "Ready", "status": "False"}], + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"waiting": {"reason": "ContainerCreating"}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/pod_readiness'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Starting up" + assert_contains "$output" "ContainerCreating" +} + +@test "scope/pod_readiness: warning when init containers running" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Pending", + "conditions": [{"type": "Ready", "status": "False"}], + "initContainerStatuses": [{ + "name": "init", + "state": {"running": {}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/pod_readiness'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Init:" +} + +# ============================================================================= +# Failure Tests - Pods Not Ready +# ============================================================================= +@test "scope/pod_readiness: fails when pods not ready without valid reason" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "conditions": [{"type": "Ready", "status": "False", "reason": "ContainersNotReady"}], + "containerStatuses": [{ + "name": "app", + "ready": false, + "restartCount": 0, + "state": {"running": {}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/pod_readiness'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Pods not ready" +} + +@test "scope/pod_readiness: shows container status details" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "conditions": [{"type": "Ready", "status": "False"}], + "containerStatuses": [{ + "name": "app", + "ready": false, + "restartCount": 5, + "state": {"running": {}} + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/pod_readiness'" + + assert_contains "$output" "Container Status" + assert_contains "$output" "app:" +} + +# ============================================================================= +# Skip Tests +# ============================================================================= +@test "scope/pod_readiness: skips when no pods" { + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/pod_readiness'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +# ============================================================================= +# Evidence Tests +# ============================================================================= +@test "scope/pod_readiness: includes ready count in evidence" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "conditions": [{"type": "Ready", "status": "True"}] + } + }] +} +EOF + + source "$BATS_TEST_DIRNAME/../../scope/pod_readiness" + + ready=$(jq -r '.evidence.ready' "$SCRIPT_OUTPUT_FILE") + total=$(jq -r '.evidence.total' "$SCRIPT_OUTPUT_FILE") + assert_equal "$ready" "1" + assert_equal "$total" "1" +} diff --git a/k8s/diagnose/tests/scope/resource_availability.bats b/k8s/diagnose/tests/scope/resource_availability.bats new file mode 100644 index 00000000..95f39693 --- /dev/null +++ b/k8s/diagnose/tests/scope/resource_availability.bats @@ -0,0 +1,216 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/scope/resource_availability +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export LABEL_SELECTOR="app=test" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export PODS_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$PODS_FILE" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "scope/resource_availability: success when all pods scheduled" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "conditions": [{"type": "PodScheduled", "status": "True"}] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/resource_availability'" + + [ "$status" -eq 0 ] + assert_contains "$output" "successfully scheduled" +} + +@test "scope/resource_availability: updates check result to success" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": {"phase": "Running"} + }] +} +EOF + + source "$BATS_TEST_DIRNAME/../../scope/resource_availability" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "success" +} + +# ============================================================================= +# Failure Tests - Unschedulable +# ============================================================================= +@test "scope/resource_availability: fails on unschedulable pods" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Pending", + "conditions": [{ + "type": "PodScheduled", + "status": "False", + "reason": "Unschedulable", + "message": "0/3 nodes are available: 3 Insufficient cpu" + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/resource_availability'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Cannot be scheduled" + assert_contains "$output" "Insufficient cpu" +} + +@test "scope/resource_availability: detects insufficient CPU" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Pending", + "conditions": [{ + "reason": "Unschedulable", + "message": "Insufficient cpu" + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/resource_availability'" + + assert_contains "$output" "Insufficient CPU" +} + +@test "scope/resource_availability: detects insufficient memory" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Pending", + "conditions": [{ + "reason": "Unschedulable", + "message": "Insufficient memory" + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/resource_availability'" + + assert_contains "$output" "Insufficient memory" +} + +@test "scope/resource_availability: shows action for resource issues" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Pending", + "conditions": [{ + "reason": "Unschedulable", + "message": "No nodes available" + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/resource_availability'" + + assert_contains "$output" "🔧" + assert_contains "$output" "Reduce resource requests" +} + +# ============================================================================= +# Skip Tests +# ============================================================================= +@test "scope/resource_availability: skips when no pods" { + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/resource_availability'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +# ============================================================================= +# Status Update Tests +# ============================================================================= +@test "scope/resource_availability: updates status to failed on unschedulable" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Pending", + "conditions": [{ + "reason": "Unschedulable", + "message": "No resources" + }] + } + }] +} +EOF + + source "$BATS_TEST_DIRNAME/../../scope/resource_availability" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} + +# ============================================================================= +# Edge Cases +# ============================================================================= +@test "scope/resource_availability: ignores running pods even if previously pending" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "conditions": [{"type": "PodScheduled", "status": "True"}] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/resource_availability'" + + [ "$status" -eq 0 ] + # Should not contain "Cannot be scheduled" + [[ ! "$output" =~ "Cannot be scheduled" ]] +} diff --git a/k8s/diagnose/tests/scope/storage_mounting.bats b/k8s/diagnose/tests/scope/storage_mounting.bats new file mode 100644 index 00000000..bd710720 --- /dev/null +++ b/k8s/diagnose/tests/scope/storage_mounting.bats @@ -0,0 +1,436 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/scope/storage_mounting +# ============================================================================= + +strip_ansi() { + echo "$1" | sed 's/\x1b\[[0-9;]*m//g' +} + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export LABEL_SELECTOR="app=test" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export PODS_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$PODS_FILE" + unset -f kubectl 2>/dev/null || true +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "scope/storage_mounting: success when PVC is Bound" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "volumes": [{"name": "data", "persistentVolumeClaim": {"claimName": "my-pvc"}}], + "containers": [{"name": "app"}] + } + }] +} +EOF + + run bash -c " + kubectl() { + case \"\$*\" in + *'get pvc'*'-o jsonpath'*) echo 'Bound' ;; + esac + } + export -f kubectl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/storage_mounting' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Pod pod-1: PVC my-pvc is Bound" + assert_contains "$stripped" "All volumes mounted successfully for" + assert_contains "$stripped" "pod(s)" +} + +@test "scope/storage_mounting: success when no PVCs (no volumes)" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "containers": [{"name": "app"}] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/storage_mounting'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "All volumes mounted successfully for" +} + +@test "scope/storage_mounting: success with multiple PVCs all Bound" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "volumes": [ + {"name": "data", "persistentVolumeClaim": {"claimName": "pvc-data"}}, + {"name": "logs", "persistentVolumeClaim": {"claimName": "pvc-logs"}} + ], + "containers": [{"name": "app"}] + } + }] +} +EOF + + run bash -c " + kubectl() { + case \"\$*\" in + *'get pvc'*'-o jsonpath'*) echo 'Bound' ;; + esac + } + export -f kubectl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/storage_mounting' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Pod pod-1: PVC pvc-data is Bound" + assert_contains "$stripped" "Pod pod-1: PVC pvc-logs is Bound" + assert_contains "$stripped" "All volumes mounted successfully for" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "scope/storage_mounting: failed when PVC is Pending" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "volumes": [{"name": "data", "persistentVolumeClaim": {"claimName": "my-pvc"}}], + "containers": [{"name": "app"}] + } + }] +} +EOF + + run bash -c " + kubectl() { + case \"\$*\" in + *'get pvc'*'-o jsonpath'*) echo 'Pending' ;; + *'get pvc'*'-o json'*) echo '{\"spec\":{\"storageClassName\":\"gp2\",\"resources\":{\"requests\":{\"storage\":\"10Gi\"}}}}' ;; + esac + } + export -f kubectl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/storage_mounting' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Pod pod-1: PVC my-pvc is in Pending state" + assert_contains "$stripped" "Storage Class: gp2" + assert_contains "$stripped" "Requested Size: 10Gi" + assert_contains "$stripped" "Check if StorageClass exists and has available capacity" +} + +@test "scope/storage_mounting: updates status to failed on Pending PVC" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "volumes": [{"name": "data", "persistentVolumeClaim": {"claimName": "my-pvc"}}], + "containers": [{"name": "app"}] + } + }] +} +EOF + + kubectl() { + case "$*" in + *"get pvc"*"-o jsonpath"*) echo "Pending" ;; + *"get pvc"*"-o json"*) echo '{"spec":{"storageClassName":"gp2","resources":{"requests":{"storage":"10Gi"}}}}' ;; + esac + } + export -f kubectl + + source "$BATS_TEST_DIRNAME/../../scope/storage_mounting" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" + + unset -f kubectl +} + +# ============================================================================= +# Warning Tests +# ============================================================================= +@test "scope/storage_mounting: warns ContainerCreating with PVCs" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Pending", + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"waiting": {"reason": "ContainerCreating"}} + }] + }, + "spec": { + "volumes": [{"name": "data", "persistentVolumeClaim": {"claimName": "my-pvc"}}], + "containers": [{"name": "app"}] + } + }] +} +EOF + + run bash -c " + kubectl() { + case \"\$*\" in + *'get pvc'*'-o jsonpath'*) echo 'Bound' ;; + esac + } + export -f kubectl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/storage_mounting' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Pod pod-1: Containers waiting in ContainerCreating (may be waiting for volumes)" +} + +@test "scope/storage_mounting: warns on unknown PVC status" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "volumes": [{"name": "data", "persistentVolumeClaim": {"claimName": "my-pvc"}}], + "containers": [{"name": "app"}] + } + }] +} +EOF + + run bash -c " + kubectl() { + case \"\$*\" in + *'get pvc'*'-o jsonpath'*) echo 'Lost' ;; + esac + } + export -f kubectl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/storage_mounting' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Pod pod-1: PVC my-pvc status is Lost" +} + +# ============================================================================= +# Skip Tests +# ============================================================================= +@test "scope/storage_mounting: skips when no pods" { + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/storage_mounting'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +# ============================================================================= +# Edge Cases +# ============================================================================= +@test "scope/storage_mounting: volumes without PVC are ignored" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "volumes": [ + {"name": "config", "configMap": {"name": "my-config"}}, + {"name": "secret", "secret": {"secretName": "my-secret"}} + ], + "containers": [{"name": "app"}] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/storage_mounting'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "All volumes mounted successfully for" +} + +@test "scope/storage_mounting: updates status to success when all PVCs bound" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "volumes": [{"name": "data", "persistentVolumeClaim": {"claimName": "my-pvc"}}], + "containers": [{"name": "app"}] + } + }] +} +EOF + + kubectl() { + case "$*" in + *"get pvc"*"-o jsonpath"*) echo "Bound" ;; + esac + } + export -f kubectl + + source "$BATS_TEST_DIRNAME/../../scope/storage_mounting" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "success" + + unset -f kubectl +} + +@test "scope/storage_mounting: multiple pods with mixed PVC states" { + cat > "$PODS_FILE" << 'EOF' +{ + "items": [ + { + "metadata": {"name": "pod-1"}, + "status": { + "phase": "Running", + "containerStatuses": [{ + "name": "app", + "ready": true, + "state": {"running": {"startedAt": "2024-01-01T00:00:00Z"}} + }] + }, + "spec": { + "volumes": [{"name": "data", "persistentVolumeClaim": {"claimName": "pvc-bound"}}], + "containers": [{"name": "app"}] + } + }, + { + "metadata": {"name": "pod-2"}, + "status": { + "phase": "Pending", + "containerStatuses": [{ + "name": "app", + "ready": false, + "state": {"waiting": {"reason": "ContainerCreating"}} + }] + }, + "spec": { + "volumes": [{"name": "data", "persistentVolumeClaim": {"claimName": "pvc-pending"}}], + "containers": [{"name": "app"}] + } + } + ] +} +EOF + + run bash -c " + kubectl() { + case \"\$*\" in + *'get pvc'*'pvc-bound'*'-o jsonpath'*) echo 'Bound' ;; + *'get pvc'*'pvc-pending'*'-o jsonpath'*) echo 'Pending' ;; + *'get pvc'*'pvc-pending'*'-o json'*) echo '{\"spec\":{\"storageClassName\":\"gp3\",\"resources\":{\"requests\":{\"storage\":\"20Gi\"}}}}' ;; + esac + } + export -f kubectl + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../scope/storage_mounting' + " + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Pod pod-1: PVC pvc-bound is Bound" + assert_contains "$stripped" "Pod pod-2: PVC pvc-pending is in Pending state" + assert_contains "$stripped" "Storage Class: gp3" + assert_contains "$stripped" "Requested Size: 20Gi" + assert_contains "$stripped" "Pod pod-2: Containers waiting in ContainerCreating (may be waiting for volumes)" +} diff --git a/k8s/diagnose/tests/service/service_endpoints.bats b/k8s/diagnose/tests/service/service_endpoints.bats new file mode 100644 index 00000000..b70661eb --- /dev/null +++ b/k8s/diagnose/tests/service/service_endpoints.bats @@ -0,0 +1,201 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/service/service_endpoints +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export LABEL_SELECTOR="app=test" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export SERVICES_FILE="$(mktemp)" + export ENDPOINTS_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$SERVICES_FILE" + rm -f "$ENDPOINTS_FILE" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "service/service_endpoints: success when endpoints exist" { + echo '{"items":[{"metadata":{"name":"my-svc"}}]}' > "$SERVICES_FILE" + cat > "$ENDPOINTS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "subsets": [{ + "addresses": [{"ip": "10.0.0.1", "targetRef": {"name": "pod-1"}}], + "ports": [{"port": 8080, "name": "http"}] + }] + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_endpoints'" + + [ "$status" -eq 0 ] + assert_contains "$output" "1 ready endpoint" + assert_contains "$output" "pod-1" +} + +@test "service/service_endpoints: shows endpoint details" { + echo '{"items":[{"metadata":{"name":"my-svc"}}]}' > "$SERVICES_FILE" + cat > "$ENDPOINTS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "subsets": [{ + "addresses": [{"ip": "10.0.0.1", "targetRef": {"name": "pod-1"}}], + "ports": [{"port": 8080}] + }] + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_endpoints'" + + assert_contains "$output" "10.0.0.1:8080" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "service/service_endpoints: fails when no endpoints resource" { + echo '{"items":[{"metadata":{"name":"my-svc"}}]}' > "$SERVICES_FILE" + echo '{"items":[]}' > "$ENDPOINTS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_endpoints'" + + [ "$status" -eq 0 ] + assert_contains "$output" "No endpoints resource found" +} + +@test "service/service_endpoints: fails when no ready endpoints" { + echo '{"items":[{"metadata":{"name":"my-svc"}}]}' > "$SERVICES_FILE" + cat > "$ENDPOINTS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "subsets": [{ + "notReadyAddresses": [{"ip": "10.0.0.1", "targetRef": {"name": "pod-1"}}], + "ports": [{"port": 8080}] + }] + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_endpoints'" + + [ "$status" -eq 0 ] + # The script counts grep -c which returns 1 for notReadyAddresses entry + # So it shows "0 ready endpoint" but the test data produces different result + # Let's check for "not ready" message instead + assert_contains "$output" "not ready" +} + +@test "service/service_endpoints: shows not ready endpoints count" { + echo '{"items":[{"metadata":{"name":"my-svc"}}]}' > "$SERVICES_FILE" + cat > "$ENDPOINTS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "subsets": [{ + "notReadyAddresses": [ + {"ip": "10.0.0.1", "targetRef": {"name": "pod-1"}}, + {"ip": "10.0.0.2", "targetRef": {"name": "pod-2"}} + ], + "ports": [{"port": 8080}] + }] + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_endpoints'" + + # Check it shows the not ready endpoints + assert_contains "$output" "not ready" + assert_contains "$output" "pod-1" + assert_contains "$output" "pod-2" +} + +@test "service/service_endpoints: shows action for readiness probe check" { + echo '{"items":[{"metadata":{"name":"my-svc"}}]}' > "$SERVICES_FILE" + cat > "$ENDPOINTS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "subsets": [{ + "notReadyAddresses": [{"ip": "10.0.0.1", "targetRef": {"name": "pod-1"}}] + }] + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_endpoints'" + + assert_contains "$output" "🔧" + assert_contains "$output" "readiness probes" +} + +# ============================================================================= +# Mixed State Tests +# ============================================================================= +@test "service/service_endpoints: shows both ready and not ready" { + echo '{"items":[{"metadata":{"name":"my-svc"}}]}' > "$SERVICES_FILE" + cat > "$ENDPOINTS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "subsets": [{ + "addresses": [{"ip": "10.0.0.1", "targetRef": {"name": "pod-1"}}], + "notReadyAddresses": [{"ip": "10.0.0.2", "targetRef": {"name": "pod-2"}}], + "ports": [{"port": 8080}] + }] + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_endpoints'" + + assert_contains "$output" "1 ready endpoint" + assert_contains "$output" "1 not ready" +} + +# ============================================================================= +# Skip Tests +# ============================================================================= +@test "service/service_endpoints: skips when no services" { + echo '{"items":[]}' > "$SERVICES_FILE" + echo '{"items":[]}' > "$ENDPOINTS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_endpoints'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +# ============================================================================= +# Status Update Tests +# ============================================================================= +@test "service/service_endpoints: updates status to failed when no endpoints" { + echo '{"items":[{"metadata":{"name":"my-svc"}}]}' > "$SERVICES_FILE" + echo '{"items":[]}' > "$ENDPOINTS_FILE" + + source "$BATS_TEST_DIRNAME/../../service/service_endpoints" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} diff --git a/k8s/diagnose/tests/service/service_existence.bats b/k8s/diagnose/tests/service/service_existence.bats new file mode 100644 index 00000000..6cb51760 --- /dev/null +++ b/k8s/diagnose/tests/service/service_existence.bats @@ -0,0 +1,93 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/service/service_existence +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export LABEL_SELECTOR="app=test" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export SERVICES_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$SERVICES_FILE" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "service/service_existence: success when services found" { + echo '{"items":[{"metadata":{"name":"svc-1"}},{"metadata":{"name":"svc-2"}}]}' > "$SERVICES_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_existence'" + + [ "$status" -eq 0 ] + assert_contains "$output" "service(s)" + assert_contains "$output" "svc-1" + assert_contains "$output" "svc-2" +} + +@test "service/service_existence: updates check result to success" { + echo '{"items":[{"metadata":{"name":"svc-1"}}]}' > "$SERVICES_FILE" + + source "$BATS_TEST_DIRNAME/../../service/service_existence" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "success" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "service/service_existence: fails when no services found" { + echo '{"items":[]}' > "$SERVICES_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_existence'" + + [ "$status" -eq 1 ] + assert_contains "$output" "No services found" + assert_contains "$output" "$LABEL_SELECTOR" +} + +@test "service/service_existence: shows action when no services" { + echo '{"items":[]}' > "$SERVICES_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_existence'" + + assert_contains "$output" "🔧" + assert_contains "$output" "Create service" +} + +@test "service/service_existence: updates check result to failed" { + echo '{"items":[]}' > "$SERVICES_FILE" + + source "$BATS_TEST_DIRNAME/../../service/service_existence" || true + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} + +# ============================================================================= +# Edge Cases +# ============================================================================= +@test "service/service_existence: handles single service" { + echo '{"items":[{"metadata":{"name":"my-service"}}]}' > "$SERVICES_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_existence'" + + [ "$status" -eq 0 ] + assert_contains "$output" "service(s)" + assert_contains "$output" "my-service" +} diff --git a/k8s/diagnose/tests/service/service_port_configuration.bats b/k8s/diagnose/tests/service/service_port_configuration.bats new file mode 100644 index 00000000..9ce62388 --- /dev/null +++ b/k8s/diagnose/tests/service/service_port_configuration.bats @@ -0,0 +1,602 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/service/service_port_configuration +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export LABEL_SELECTOR="app=test" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + export SCRIPT_LOG_FILE="$(mktemp)" + export SERVICES_FILE="$(mktemp)" + export PODS_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$SERVICES_FILE" + rm -f "$PODS_FILE" +} + +strip_ansi() { + echo "$1" | sed 's/\x1b\[[0-9;]*m//g' +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "service/service_port_configuration: success when numeric targetPort matches container port" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": 8080, "name": "http"}] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "test"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080, "name": "http"}] + }] + } + }] +} +EOF + + kubectl() { + case "$*" in + *"exec"*) return 0 ;; + esac + } + export -f kubectl + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && kubectl() { return 0; } && export -f kubectl && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Port 80 -> 8080 (http): Configuration OK [container: app]" + assert_contains "$stripped" "Port 8080 is accepting connections" +} + +@test "service/service_port_configuration: success when named targetPort resolves" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": "http", "name": "web"}] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "test"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080, "name": "http"}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && kubectl() { return 0; } && export -f kubectl && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Resolves to 8080 [container: app]" +} + +@test "service/service_port_configuration: updates status to success when all ports match" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": 8080, "name": "http"}] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "test"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080, "name": "http"}] + }] + } + }] +} +EOF + + kubectl() { return 0; } + export -f kubectl + + source "$BATS_TEST_DIRNAME/../../service/service_port_configuration" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "success" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "service/service_port_configuration: fails when container port not found" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": 9090, "name": "http"}] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "test"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080, "name": "http"}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Container port 9090 not found" + assert_contains "$stripped" "Available ports by container:" +} + +@test "service/service_port_configuration: fails when named port not found in containers" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": "grpc", "name": "api"}] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "test"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080, "name": "http"}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Named port not found in containers" +} + +@test "service/service_port_configuration: fails when port not accepting connections" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": 8080, "name": "http"}] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "test"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080, "name": "http"}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && kubectl() { return 1; } && export -f kubectl && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Port 8080 is NOT accepting connections" +} + +@test "service/service_port_configuration: updates status to failed when port mismatch" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": 9090, "name": "http"}] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "test"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080, "name": "http"}] + }] + } + }] +} +EOF + + source "$BATS_TEST_DIRNAME/../../service/service_port_configuration" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} + +@test "service/service_port_configuration: updates status to failed when connectivity fails" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": 8080, "name": "http"}] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "test"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080, "name": "http"}] + }] + } + }] +} +EOF + + run bash -c " + source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' + kubectl() { return 1; } + export -f kubectl + source '$BATS_TEST_DIRNAME/../../service/service_port_configuration' + " + + [ "$status" -eq 0 ] + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} + +@test "service/service_port_configuration: shows action to update targetPort on mismatch" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": 9090, "name": "http"}] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "test"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080, "name": "http"}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Update service targetPort to match container port or fix container port" +} + +@test "service/service_port_configuration: shows action for named port not found" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": "grpc", "name": "api"}] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "test"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080, "name": "http"}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Define named port in container spec or use numeric targetPort" +} + +# ============================================================================= +# Edge Cases +# ============================================================================= +@test "service/service_port_configuration: no ports defined" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"} + } + }] +} +EOF + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "No ports defined" +} + +@test "service/service_port_configuration: no selector skips port validation" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "ports": [{"port": 80, "targetPort": 8080, "name": "http"}] + } + }] +} +EOF + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "No selector, skipping port validation" +} + +@test "service/service_port_configuration: no matching pods found" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": 8080, "name": "http"}] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "other"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "No pods found to validate ports" +} + +@test "service/service_port_configuration: skips when no services (require_services fails)" { + echo '{"items":[]}' > "$SERVICES_FILE" + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +@test "service/service_port_configuration: shows connectivity check info message" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": 8080, "name": "http"}] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "test"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080, "name": "http"}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && kubectl() { return 0; } && export -f kubectl && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Testing connectivity to port 8080 in container 'app'" +} + +@test "service/service_port_configuration: shows log check hint when connectivity fails" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": 8080, "name": "http"}] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "test"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080, "name": "http"}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && kubectl() { return 1; } && export -f kubectl && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Check logs: kubectl logs pod-1 -n test-ns -c app" +} + +@test "service/service_port_configuration: multiple ports with mixed results" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [ + {"port": 80, "targetPort": 8080, "name": "http"}, + {"port": 443, "targetPort": 9999, "name": "https"} + ] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "test"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080, "name": "http"}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && kubectl() { return 0; } && export -f kubectl && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Port 80 -> 8080 (http): Configuration OK [container: app]" + assert_contains "$stripped" "Container port 9999 not found" +} + +@test "service/service_port_configuration: shows service port configuration header" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "test"}, + "ports": [{"port": 80, "targetPort": 8080, "name": "http"}] + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "pod-1", "labels": {"app": "test"}}, + "spec": { + "containers": [{ + "name": "app", + "ports": [{"containerPort": 8080, "name": "http"}] + }] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && kubectl() { return 0; } && export -f kubectl && source '$BATS_TEST_DIRNAME/../../service/service_port_configuration'" + + [ "$status" -eq 0 ] + stripped=$(strip_ansi "$output") + assert_contains "$stripped" "Service my-svc port configuration:" +} diff --git a/k8s/diagnose/tests/service/service_selector_match.bats b/k8s/diagnose/tests/service/service_selector_match.bats new file mode 100644 index 00000000..50d7b611 --- /dev/null +++ b/k8s/diagnose/tests/service/service_selector_match.bats @@ -0,0 +1,218 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/service/service_selector_match +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export LABEL_SELECTOR="app=test" + export DEPLOYMENT_ID="deploy-123" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export SERVICES_FILE="$(mktemp)" + export PODS_FILE="$(mktemp)" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$SERVICES_FILE" + rm -f "$PODS_FILE" +} + +# ============================================================================= +# Success Tests +# ============================================================================= +@test "service/service_selector_match: success when selectors match" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "selector": {"app": "myapp", "version": "v1"} + } + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "pod-1", + "labels": {"app": "myapp", "version": "v1"} + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_selector_match'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Selector matches" + assert_contains "$output" "pod(s)" +} + +@test "service/service_selector_match: matches multiple pods" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"selector": {"app": "myapp"}} + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [ + {"metadata": {"name": "pod-1", "labels": {"app": "myapp"}}}, + {"metadata": {"name": "pod-2", "labels": {"app": "myapp"}}} + ] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_selector_match'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Selector matches" + assert_contains "$output" "2" + assert_contains "$output" "pod(s)" +} + +# ============================================================================= +# Failure Tests +# ============================================================================= +@test "service/service_selector_match: fails when no selector defined" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {} + }] +} +EOF + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_selector_match'" + + [ "$status" -eq 0 ] + assert_contains "$output" "No selector defined" +} + +@test "service/service_selector_match: fails when no pods match" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"selector": {"app": "myapp"}} + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "pod-1", + "labels": {"app": "different-app", "deployment_id": "deploy-123"} + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_selector_match'" + + [ "$status" -eq 0 ] + assert_contains "$output" "No pods match selector" +} + +@test "service/service_selector_match: shows existing pods when mismatch" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"selector": {"app": "myapp"}} + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "existing-pod", + "labels": {"app": "other", "deployment_id": "deploy-123"} + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_selector_match'" + + assert_contains "$output" "Existing pods" + assert_contains "$output" "existing-pod" +} + +@test "service/service_selector_match: shows action to verify labels" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"selector": {"app": "myapp"}} + }] +} +EOF + cat > "$PODS_FILE" << 'EOF' +{ + "items": [{ + "metadata": { + "name": "pod-1", + "labels": {"app": "wrong", "deployment_id": "deploy-123"} + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_selector_match'" + + assert_contains "$output" "🔧" + assert_contains "$output" "Verify pod labels" +} + +# ============================================================================= +# Skip Tests +# ============================================================================= +@test "service/service_selector_match: skips when no services" { + echo '{"items":[]}' > "$SERVICES_FILE" + echo '{"items":[]}' > "$PODS_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_selector_match'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} + +# ============================================================================= +# Status Update Tests +# ============================================================================= +@test "service/service_selector_match: updates status to failed on mismatch" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"selector": {"app": "myapp"}} + }] +} +EOF + echo '{"items":[]}' > "$PODS_FILE" + + source "$BATS_TEST_DIRNAME/../../service/service_selector_match" + + result=$(jq -r '.status' "$SCRIPT_OUTPUT_FILE") + assert_equal "$result" "failed" +} diff --git a/k8s/diagnose/tests/service/service_type_validation.bats b/k8s/diagnose/tests/service/service_type_validation.bats new file mode 100644 index 00000000..10a5c38b --- /dev/null +++ b/k8s/diagnose/tests/service/service_type_validation.bats @@ -0,0 +1,213 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for diagnose/service/service_type_validation +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + source "$BATS_TEST_DIRNAME/../../utils/diagnose_utils" + + export NAMESPACE="test-ns" + export LABEL_SELECTOR="app=test" + export NP_OUTPUT_DIR="$(mktemp -d)" + export SCRIPT_OUTPUT_FILE="$(mktemp)" + export SCRIPT_LOG_FILE="$(mktemp)" + echo '{"status":"pending","evidence":{},"logs":[]}' > "$SCRIPT_OUTPUT_FILE" + + export SERVICES_FILE="$(mktemp)" + export EVENTS_FILE="$(mktemp)" + echo '{"items":[]}' > "$EVENTS_FILE" +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + rm -f "$SCRIPT_OUTPUT_FILE" + rm -f "$SCRIPT_LOG_FILE" + rm -f "$SERVICES_FILE" + rm -f "$EVENTS_FILE" +} + +# ============================================================================= +# ClusterIP Tests +# ============================================================================= +@test "service/service_type_validation: validates ClusterIP service" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "type": "ClusterIP", + "clusterIP": "10.0.0.1" + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_type_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Type=ClusterIP" + assert_contains "$output" "Internal service" + assert_contains "$output" "10.0.0.1" +} + +@test "service/service_type_validation: validates headless service" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "headless-svc"}, + "spec": { + "type": "ClusterIP", + "clusterIP": "None" + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_type_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Headless service" +} + +# ============================================================================= +# NodePort Tests +# ============================================================================= +@test "service/service_type_validation: validates NodePort service" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": { + "type": "NodePort", + "ports": [{"port": 80, "nodePort": 30080}] + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_type_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Type=NodePort" + assert_contains "$output" "NodePort 30080" +} + +# ============================================================================= +# LoadBalancer Tests +# ============================================================================= +@test "service/service_type_validation: validates LoadBalancer with IP" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"type": "LoadBalancer"}, + "status": { + "loadBalancer": { + "ingress": [{"ip": "1.2.3.4"}] + } + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_type_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "LoadBalancer available" + assert_contains "$output" "1.2.3.4" +} + +@test "service/service_type_validation: validates LoadBalancer with hostname" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"type": "LoadBalancer"}, + "status": { + "loadBalancer": { + "ingress": [{"hostname": "my-lb.elb.amazonaws.com"}] + } + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_type_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "LoadBalancer available" + assert_contains "$output" "my-lb.elb.amazonaws.com" +} + +@test "service/service_type_validation: warns on pending LoadBalancer" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"type": "LoadBalancer"}, + "status": {} + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_type_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Pending" +} + +# ============================================================================= +# ExternalName Tests +# ============================================================================= +@test "service/service_type_validation: validates ExternalName service" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "external-svc"}, + "spec": { + "type": "ExternalName", + "externalName": "api.example.com" + } + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_type_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "ExternalName" + assert_contains "$output" "api.example.com" +} + +# ============================================================================= +# Invalid Type Tests +# ============================================================================= +@test "service/service_type_validation: fails on unknown service type" { + cat > "$SERVICES_FILE" << 'EOF' +{ + "items": [{ + "metadata": {"name": "my-svc"}, + "spec": {"type": "InvalidType"} + }] +} +EOF + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_type_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "Unknown service type" +} + +# ============================================================================= +# Skip Tests +# ============================================================================= +@test "service/service_type_validation: skips when no services" { + echo '{"items":[]}' > "$SERVICES_FILE" + + run bash -c "source '$BATS_TEST_DIRNAME/../../utils/diagnose_utils' && source '$BATS_TEST_DIRNAME/../../service/service_type_validation'" + + [ "$status" -eq 0 ] + assert_contains "$output" "skipped" +} diff --git a/k8s/log/kube-logger-go/bin/linux/exec-amd64 b/k8s/log/kube-logger-go/bin/linux/exec-amd64 new file mode 100755 index 00000000..21a4dd3e Binary files /dev/null and b/k8s/log/kube-logger-go/bin/linux/exec-amd64 differ diff --git a/k8s/log/kube-logger-go/bin/linux/exec-arm64 b/k8s/log/kube-logger-go/bin/linux/exec-arm64 new file mode 100755 index 00000000..5e1ead90 Binary files /dev/null and b/k8s/log/kube-logger-go/bin/linux/exec-arm64 differ diff --git a/k8s/log/kube-logger-go/bin/linux/exec-x86_64 b/k8s/log/kube-logger-go/bin/linux/exec-x86_64 index 327e2c97..21a4dd3e 100755 Binary files a/k8s/log/kube-logger-go/bin/linux/exec-x86_64 and b/k8s/log/kube-logger-go/bin/linux/exec-x86_64 differ diff --git a/k8s/log/log b/k8s/log/log index 2652f0af..e32aba89 100644 --- a/k8s/log/log +++ b/k8s/log/log @@ -3,6 +3,8 @@ PLATFORM=$(uname | tr '[:upper:]' '[:lower:]') ARCH=$(uname -m) +[ "$ARCH" = "aarch64" ] && ARCH="arm64" + KUBE_LOGGER_SCRIPT="$SERVICE_PATH/log/kube-logger-go/bin/$PLATFORM/exec-$ARCH" if [ ! -f "$KUBE_LOGGER_SCRIPT" ]; then @@ -69,4 +71,4 @@ if [ -n "$START_TIME" ]; then CMD="$CMD --start-time $ISO_DATE" fi -eval "$CMD" \ No newline at end of file +eval "$CMD" diff --git a/k8s/logging b/k8s/logging new file mode 100644 index 00000000..d0df55d7 --- /dev/null +++ b/k8s/logging @@ -0,0 +1,41 @@ +#!/bin/bash + +# Logging utility — log4j-style level filtering +# Usage: log "level" "message" +# Levels: debug < info < warn < error +# Control: LOG_LEVEL env var (default: info) +# +# Example: +# LOG_LEVEL=info +# log debug "verbose details" # suppressed +# log info "deployment done" # shown +log() { + local level="${1:-info}" + local message="${2:-}" + + local -i msg_num threshold + + case "${level,,}" in + debug) msg_num=0 ;; + info) msg_num=1 ;; + warn) msg_num=2 ;; + error) msg_num=3 ;; + *) msg_num=1 ;; + esac + + case "${LOG_LEVEL:-info}" in + debug) threshold=0 ;; + info) threshold=1 ;; + warn) threshold=2 ;; + error) threshold=3 ;; + *) threshold=1 ;; + esac + + if [ "$msg_num" -ge "$threshold" ]; then + if [ "$msg_num" -ge 3 ]; then + echo "$message" >&2 + else + echo "$message" + fi + fi +} diff --git a/k8s/metric/list b/k8s/metric/list index 5f0240e6..b8c7263c 100644 --- a/k8s/metric/list +++ b/k8s/metric/list @@ -25,7 +25,7 @@ echo '{ }, { "name": "system.cpu_usage_percentage", - "title": "Cpu usage", + "title": "CPU usage", "unit": "%", "available_filters": ["scope_id", "instance_id"], "available_group_by": ["instance_id"] diff --git a/k8s/scope/build_context b/k8s/scope/build_context index e60aa4ae..8328eab6 100755 --- a/k8s/scope/build_context +++ b/k8s/scope/build_context @@ -1,50 +1,173 @@ #!/bin/bash -if [ -n "${NAMESPACE_OVERRIDE:-}" ]; then - K8S_NAMESPACE="$NAMESPACE_OVERRIDE" -else - K8S_NAMESPACE=$(echo "$CONTEXT" | jq -r --arg default "$K8S_NAMESPACE" ' - .providers["container-orchestration"].cluster.namespace // $default - ') +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/../utils/get_config_value" + +K8S_NAMESPACE=$(get_config_value \ + --env NAMESPACE_OVERRIDE \ + --env K8S_NAMESPACE \ + --provider '.providers["scope-configurations"].cluster.namespace' \ + --provider '.providers["container-orchestration"].cluster.namespace' \ + --default "nullplatform" +) + +# General configuration +DNS_TYPE=$(get_config_value \ + --env DNS_TYPE \ + --provider '.providers["scope-configurations"].networking.dns_type' \ + --default "route53" +) + +# Azure DNS configuration +HOSTED_ZONE_NAME=$(get_config_value \ + --env HOSTED_ZONE_NAME \ + --provider '.providers["scope-configurations"].networking.hosted_zone_name' \ + --default "" +) + +HOSTED_ZONE_RG=$(get_config_value \ + --env HOSTED_ZONE_RG \ + --provider '.providers["scope-configurations"].networking.hosted_zone_rg' \ + --default "" +) + +AZURE_SUBSCRIPTION_ID=$(get_config_value \ + --env AZURE_SUBSCRIPTION_ID \ + --provider '.providers["scope-configurations"].networking.azure_subscription_id' \ + --default "" +) + +RESOURCE_GROUP=$(get_config_value \ + --env RESOURCE_GROUP \ + --provider '.providers["scope-configurations"].networking.resource_group' \ + --default "" +) + +ALB_RECONCILIATION_ENABLED=$(get_config_value \ + --env ALB_RECONCILIATION_ENABLED \ + --provider '.providers["scope-configurations"].networking.alb_reconciliation_enabled' \ + --default "false" +) + +DEPLOYMENT_MAX_WAIT_IN_SECONDS=$(get_config_value \ + --env DEPLOYMENT_MAX_WAIT_IN_SECONDS \ + --provider '.providers["scope-configurations"].deployment.deployment_max_wait_seconds' \ + --default "600" +) + +# Build MANIFEST_BACKUP object from flat properties +MANIFEST_BACKUP_ENABLED=$(get_config_value \ + --provider '.providers["scope-configurations"].deployment.manifest_backup_enabled' \ + --default "false" +) +MANIFEST_BACKUP_TYPE=$(get_config_value \ + --provider '.providers["scope-configurations"].deployment.manifest_backup_type' \ + --default "" +) +MANIFEST_BACKUP_BUCKET=$(get_config_value \ + --provider '.providers["scope-configurations"].deployment.manifest_backup_bucket' \ + --default "" +) +MANIFEST_BACKUP_PREFIX=$(get_config_value \ + --provider '.providers["scope-configurations"].deployment.manifest_backup_prefix' \ + --default "" +) + +# Use env var if set, otherwise build from individual properties +if [ -z "${MANIFEST_BACKUP:-}" ]; then + MANIFEST_BACKUP=$(jq -n \ + --argjson enabled "$MANIFEST_BACKUP_ENABLED" \ + --arg type "$MANIFEST_BACKUP_TYPE" \ + --arg bucket "$MANIFEST_BACKUP_BUCKET" \ + --arg prefix "$MANIFEST_BACKUP_PREFIX" \ + '{ENABLED: $enabled, TYPE: $type, BUCKET: $bucket, PREFIX: $prefix} | + with_entries(select(.value != "" and .value != null))') fi -echo "Validating namespace $K8S_NAMESPACE exists" +VAULT_ADDR=$(get_config_value \ + --env VAULT_ADDR \ + --provider '.providers["scope-configurations"].security.vault_address' \ + --default "" +) + +VAULT_TOKEN=$(get_config_value \ + --env VAULT_TOKEN \ + --provider '.providers["scope-configurations"].security.vault_token' \ + --default "" +) + +export DNS_TYPE +export HOSTED_ZONE_NAME +export HOSTED_ZONE_RG +export AZURE_SUBSCRIPTION_ID +export RESOURCE_GROUP +export ALB_RECONCILIATION_ENABLED +export DEPLOYMENT_MAX_WAIT_IN_SECONDS +export MANIFEST_BACKUP +export VAULT_ADDR +export VAULT_TOKEN + +log debug "🔍 Validating namespace '$K8S_NAMESPACE' exists..." if ! kubectl get namespace "$K8S_NAMESPACE" &> /dev/null; then - echo "Namespace '$K8S_NAMESPACE' does not exist in the cluster." - - CREATE_K8S_NAMESPACE_IF_NOT_EXIST="${CREATE_K8S_NAMESPACE_IF_NOT_EXIST:-true}" - + log error " ❌ Namespace '$K8S_NAMESPACE' does not exist in the cluster" + + CREATE_K8S_NAMESPACE_IF_NOT_EXIST=$(get_config_value \ + --env CREATE_K8S_NAMESPACE_IF_NOT_EXIST \ + --provider '.providers["scope-configurations"].cluster.create_namespace_if_not_exist' \ + --default "true" + ) + if [ "$CREATE_K8S_NAMESPACE_IF_NOT_EXIST" = "true" ]; then - echo "Creating namespace '$K8S_NAMESPACE'..." - + log debug "📝 Creating namespace '$K8S_NAMESPACE'..." + kubectl create namespace "$K8S_NAMESPACE" --dry-run=client -o yaml | \ kubectl label -f - nullplatform=true --dry-run=client -o yaml | \ kubectl apply -f - - echo "Namespace '$K8S_NAMESPACE' created successfully." + log info " ✅ Namespace '$K8S_NAMESPACE' created successfully" else - echo "Error: Namespace '$K8S_NAMESPACE' does not exist and CREATE_K8S_NAMESPACE_IF_NOT_EXIST is set to false." + log error "" + log error "💡 Possible causes:" + log error " The namespace does not exist and automatic creation is disabled" + log error "" + log error "🔧 How to fix:" + log error " • Create the namespace manually: kubectl create namespace $K8S_NAMESPACE" + log error " • Or set CREATE_K8S_NAMESPACE_IF_NOT_EXIST=true in values.yaml" + log error "" exit 1 fi +else + log info " ✅ Namespace '$K8S_NAMESPACE' exists" fi -USE_ACCOUNT_SLUG=$(echo "$CONTEXT" | jq -r --arg default "$USE_ACCOUNT_SLUG" ' - .providers["cloud-providers"].networking.application_domain // $default -') +USE_ACCOUNT_SLUG=$(get_config_value \ + --provider '.providers["scope-configurations"].networking.application_domain' \ + --provider '.providers["cloud-providers"].networking.application_domain' \ + --default "false" +) -REGION=$(echo "$CONTEXT" | jq -r '.providers["cloud-providers"].account.region // "us-east-1"') +REGION=$(get_config_value \ + --provider '.providers["cloud-providers"].account.region' \ + --default "us-east-1" +) SCOPE_VISIBILITY=$(echo "$CONTEXT" | jq -r '.scope.capabilities.visibility') if [ "$SCOPE_VISIBILITY" = "public" ]; then - DOMAIN=$(echo "$CONTEXT" | jq -r --arg default "$DOMAIN" ' - .providers["cloud-providers"].networking.domain_name // $default - ') + DOMAIN=$(get_config_value \ + --provider '.providers["scope-configurations"].networking.domain_name' \ + --provider '.providers["cloud-providers"].networking.domain_name' \ + --default "nullapps.io" + ) else - DOMAIN=$(echo "$CONTEXT" | jq -r --arg private_default "$PRIVATE_DOMAIN" --arg default "$DOMAIN" ' - (.providers["cloud-providers"].networking.private_domain_name // $private_default | if . == "" then empty else . end) // .providers["cloud-providers"].networking.domain_name // $default - ') + DOMAIN=$(get_config_value \ + --provider '.providers["scope-configurations"].networking.private_domain_name' \ + --provider '.providers["cloud-providers"].networking.private_domain_name' \ + --provider '.providers["scope-configurations"].networking.domain_name' \ + --provider '.providers["cloud-providers"].networking.domain_name' \ + --default "nullapps.io" + ) fi SCOPE_DOMAIN=$(echo "$CONTEXT" | jq .scope.domain -r) @@ -63,28 +186,37 @@ export SCOPE_DOMAIN if [ "$SCOPE_VISIBILITY" = "public" ]; then export INGRESS_VISIBILITY="internet-facing" GATEWAY_DEFAULT="${PUBLIC_GATEWAY_NAME:-gateway-public}" - export GATEWAY_NAME=$(echo "$CONTEXT" | jq -r --arg default "$GATEWAY_DEFAULT" '.providers["container-orchestration"].gateway.public_name // $default') + export GATEWAY_NAME=$(get_config_value \ + --provider '.providers["scope-configurations"].networking.gateway_public_name' \ + --provider '.providers["container-orchestration"].gateway.public_name' \ + --default "$GATEWAY_DEFAULT" + ) else export INGRESS_VISIBILITY="internal" GATEWAY_DEFAULT="${PRIVATE_GATEWAY_NAME:-gateway-internal}" - export GATEWAY_NAME=$(echo "$CONTEXT" | jq -r --arg default "$GATEWAY_DEFAULT" '.providers["container-orchestration"].gateway.private_name // $default') + export GATEWAY_NAME=$(get_config_value \ + --provider '.providers["scope-configurations"].networking.gateway_private_name' \ + --provider '.providers["container-orchestration"].gateway.private_name' \ + --default "$GATEWAY_DEFAULT" + ) fi -K8S_MODIFIERS="${K8S_MODIFIERS:-"{}"}" +K8S_MODIFIERS=$(get_config_value \ + --env K8S_MODIFIERS \ + --provider '.providers["scope-configurations"].object_modifiers' \ + --default "{}" +) K8S_MODIFIERS=$(echo "$K8S_MODIFIERS" | jq .) -ALB_NAME="k8s-nullplatform-$INGRESS_VISIBILITY" - -if [ "$INGRESS_VISIBILITY" = "internet-facing" ]; then - ALB_NAME=$(echo "$CONTEXT" | jq -r --arg default "$ALB_NAME" '.providers["container-orchestration"].balancer.public_name // $default') -else - ALB_NAME=$(echo "$CONTEXT" | jq -r --arg default "$ALB_NAME" '.providers["container-orchestration"].balancer.private_name // $default') -fi +source "$SCRIPT_DIR/networking/resolve_balancer" NAMESPACE_SLUG=$(echo "$CONTEXT" | jq -r .namespace.slug) APPLICATION_SLUG=$(echo "$CONTEXT" | jq -r .application.slug) COMPONENT=$(echo "$NAMESPACE_SLUG-$APPLICATION_SLUG" | sed -E 's/^(.{0,62}[a-zA-Z0-9]).*/\1/') +log debug "📋 Scope: $SCOPE_ID | Visibility: $SCOPE_VISIBILITY | Domain: $SCOPE_DOMAIN" +log debug "📋 Namespace: $K8S_NAMESPACE | Region: $REGION | Gateway: $GATEWAY_NAME | ALB: $ALB_NAME" + CONTEXT=$(echo "$CONTEXT" | jq \ --arg ingress_visibility "$INGRESS_VISIBILITY" \ --arg k8s_namespace "$K8S_NAMESPACE" \ @@ -105,3 +237,5 @@ export CONTEXT export REGION mkdir -p "$OUTPUT_DIR" + +log info "✅ Scope context built successfully" diff --git a/k8s/scope/iam/build_service_account b/k8s/scope/iam/build_service_account index b3f52676..64a7511b 100644 --- a/k8s/scope/iam/build_service_account +++ b/k8s/scope/iam/build_service_account @@ -2,35 +2,34 @@ set -euo pipefail + IAM=${IAM-"{}"} IAM_ENABLED=$(echo "$IAM" | jq -r .ENABLED) if [[ "$IAM_ENABLED" == "false" || "$IAM_ENABLED" == "null" ]]; then - echo "IAM is not enabled, skipping service account setup" + log debug "📋 IAM is not enabled, skipping service account setup" return fi -echo "Getting AWS account ID..." -AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>&1) || { - echo "ERROR: Failed to get AWS account ID" - echo "AWS Error: $AWS_ACCOUNT_ID" - echo "Check if AWS credentials are configured correctly" - exit 1 -} - SERVICE_ACCOUNT_NAME=$(echo "$IAM" | jq -r .PREFIX)-"$SCOPE_ID" -echo "Looking for IAM role: $SERVICE_ACCOUNT_NAME" +log debug "🔍 Looking for IAM role: $SERVICE_ACCOUNT_NAME" ROLE_ARN=$(aws iam get-role --role-name "$SERVICE_ACCOUNT_NAME" --query 'Role.Arn' --output text 2>&1) || { if [[ "${ACTION:-}" == "delete" ]] && [[ "$ROLE_ARN" == *"NoSuchEntity"* ]] && [[ "$ROLE_ARN" == *"cannot be found"* ]]; then - echo "IAM role '$SERVICE_ACCOUNT_NAME' does not exist, skipping service account deletion" + log debug "📋 IAM role '$SERVICE_ACCOUNT_NAME' does not exist, skipping service account deletion" return 0 fi - echo "ERROR: Failed to find IAM role '$SERVICE_ACCOUNT_NAME'" - echo "AWS Error: $ROLE_ARN" - echo "Make sure the role exists and you have IAM permissions" + log error " ❌ Failed to find IAM role '$SERVICE_ACCOUNT_NAME'" + log error "" + log error "💡 Possible causes:" + log error " The IAM role may not exist or the agent lacks IAM permissions" + log error "" + log error "🔧 How to fix:" + log error " • Verify the role exists: aws iam get-role --role-name $SERVICE_ACCOUNT_NAME" + log error " • Check IAM permissions for the agent role" + log error "" exit 1 } @@ -39,16 +38,21 @@ SERVICE_ACCOUNT_PATH="$OUTPUT_DIR/service_account-$SCOPE_ID.yaml" echo "$CONTEXT" | jq --arg role_arn "$ROLE_ARN" --arg service_account_name "$SERVICE_ACCOUNT_NAME" '. + {role_arn: $role_arn, service_account_name: $service_account_name}' > "$CONTEXT_PATH" -echo "Building Template: $SERVICE_ACCOUNT_TEMPLATE to $SERVICE_ACCOUNT_PATH" +log debug "📝 Building service account template: $SERVICE_ACCOUNT_TEMPLATE" gomplate -c .="$CONTEXT_PATH" \ --file "$SERVICE_ACCOUNT_TEMPLATE" \ - --out "$SERVICE_ACCOUNT_PATH" - -TEMPLATE_GENERATION_STATUS=$? - -if [[ $TEMPLATE_GENERATION_STATUS -ne 0 ]]; then - echo "Error building secret template" - exit 1 -fi + --out "$SERVICE_ACCOUNT_PATH" || { + log error " ❌ Failed to build service account template" + log error "" + log error "💡 Possible causes:" + log error " The template file may be missing or contain invalid gomplate syntax" + log error "" + log error "🔧 How to fix:" + log error " • Verify template exists: ls -la $SERVICE_ACCOUNT_TEMPLATE" + log error " • Check the template is a valid Kubernetes ServiceAccount YAML with correct gomplate expressions" + log error "" + exit 1 +} -rm "$CONTEXT_PATH" \ No newline at end of file +rm "$CONTEXT_PATH" +log info " ✅ Service account template built successfully" diff --git a/k8s/scope/iam/create_role b/k8s/scope/iam/create_role index 771a084e..cfe3342f 100644 --- a/k8s/scope/iam/create_role +++ b/k8s/scope/iam/create_role @@ -2,34 +2,48 @@ set -euo pipefail + IAM=${IAM-"{}"} IAM_ENABLED=$(echo "$IAM" | jq -r .ENABLED) if [[ "$IAM_ENABLED" == "false" || "$IAM_ENABLED" == "null" ]]; then - echo "No IAM role configuration. Skipping role setup" + log debug "📋 IAM is not enabled, skipping role creation" return fi ROLE_NAME=$(echo "$IAM" | jq -r .PREFIX)-"$SCOPE_ID" ROLE_PATH="/nullplatform/custom-scopes/" NAMESPACE=$(echo "$CONTEXT" | jq -r .k8s_namespace) -echo "Getting EKS OIDC provider for cluster: $CLUSTER_NAME" +log debug "🔍 Getting EKS OIDC provider for cluster: $CLUSTER_NAME" OIDC_PROVIDER=$(aws eks describe-cluster --name "$CLUSTER_NAME" --query "cluster.identity.oidc.issuer" --output text 2>&1 | sed -e "s/^https:\/\///") || { - echo "ERROR: Failed to get OIDC provider for EKS cluster '$CLUSTER_NAME'" - echo "AWS Error: $OIDC_PROVIDER" + log error " ❌ Failed to get OIDC provider for EKS cluster '$CLUSTER_NAME'" + log error "" + log error "💡 Possible causes:" + log error " The OIDC provider may not be configured for this EKS cluster" + log error "" + log error "🔧 How to fix:" + log error " • Verify OIDC is enabled: aws eks describe-cluster --name $CLUSTER_NAME --query cluster.identity.oidc" + log error " • Enable OIDC provider: eksctl utils associate-iam-oidc-provider --cluster $CLUSTER_NAME --approve" + log error "" exit 1 } -echo "Getting AWS account ID" +log debug "🔍 Getting AWS account ID..." AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>&1) || { - echo "ERROR: Failed to get AWS account ID" - echo "AWS Error: $AWS_ACCOUNT_ID" + log error " ❌ Failed to get AWS account ID" + log error "" + log error "💡 Possible causes:" + log error " AWS credentials may not be configured or have expired" + log error "" + log error "🔧 How to fix:" + log error " • Check AWS credentials: aws sts get-caller-identity" + log error " • Verify IAM permissions for the agent role" + log error "" exit 1 } TRUST_POLICY_PATH="$OUTPUT_DIR/trust-policy.json" -# Step 1: Create the IAM trust policy cat > "$TRUST_POLICY_PATH" < "$TRUST_POLICY_PATH" < "$TEMP_POLICY_FILE" aws iam put-role-policy \ --role-name "$ROLE_NAME" \ --policy-name "$POLICY_NAME" \ - --policy-document "file://$TEMP_POLICY_FILE" - - if [[ $? -eq 0 ]]; then - echo "✓ Successfully attached inline policy: $POLICY_NAME" - else - echo "✗ Failed to attach inline policy: $POLICY_NAME" + --policy-document "file://$TEMP_POLICY_FILE" || { + log error " ❌ Failed to attach inline policy: $POLICY_NAME" + log error "" + log error "💡 Possible causes:" + log error " The inline policy JSON may be invalid or the agent lacks IAM permissions" + log error "" + log error "🔧 How to fix:" + log error " • Validate the policy JSON syntax" + log error " • Check IAM permissions for the agent role" + log error "" + rm -f "$TEMP_POLICY_FILE" exit 1 - fi + } + log info " ✅ Successfully attached inline policy: $POLICY_NAME" - # Clean up temp file rm -f "$TEMP_POLICY_FILE" else - echo "⚠ Unknown policy type: $POLICY_TYPE" + log warn "⚠️ Unknown policy type: $POLICY_TYPE, skipping" fi done diff --git a/k8s/scope/iam/delete_role b/k8s/scope/iam/delete_role index 3a9eb826..eac8dbaf 100755 --- a/k8s/scope/iam/delete_role +++ b/k8s/scope/iam/delete_role @@ -2,54 +2,67 @@ set -euo pipefail + IAM=${IAM-"{}"} IAM_ENABLED=$(echo "$IAM" | jq -r .ENABLED) if [[ "$IAM_ENABLED" == "false" || "$IAM_ENABLED" == "null" ]]; then - echo "No IAM role configuration. Skipping role setup" + log debug "📋 IAM is not enabled, skipping role deletion" return fi +log debug "🔍 Looking for IAM role: $SERVICE_ACCOUNT_NAME" ROLE_ARN=$(aws iam get-role --role-name "$SERVICE_ACCOUNT_NAME" --query 'Role.Arn' --output text 2>&1) || { if [[ "$ROLE_ARN" == *"NoSuchEntity"* ]] && [[ "$ROLE_ARN" == *"cannot be found"* ]]; then - echo "IAM role '$SERVICE_ACCOUNT_NAME' does not exist, skipping role deletion" + log debug "📋 IAM role '$SERVICE_ACCOUNT_NAME' does not exist, skipping role deletion" return 0 fi - echo "ERROR: Failed to find IAM role '$SERVICE_ACCOUNT_NAME'" - echo "AWS Error: $ROLE_ARN" - echo "Make sure the role exists and you have IAM permissions" + log error " ❌ Failed to find IAM role '$SERVICE_ACCOUNT_NAME'" + log error "" + log error "💡 Possible causes:" + log error " The IAM role may not exist or the agent lacks IAM permissions" + log error "" + log error "🔧 How to fix:" + log error " • Verify the role exists: aws iam get-role --role-name $SERVICE_ACCOUNT_NAME" + log error " • Check IAM permissions for the agent role" + log error "" exit 1 } ROLE_NAME=$(echo "$IAM" | jq -r .PREFIX)-"$SCOPE_ID" -echo "Detaching managed policies..." -# Use tr to convert tabs/spaces to newlines, then filter out empty lines +log debug "📝 Detaching managed policies..." aws iam list-attached-role-policies --role-name "$ROLE_NAME" --query 'AttachedPolicies[].PolicyArn' --output text | \ tr '\t' '\n' | while read policy_arn; do - if [ ! -z "$policy_arn" ]; then - echo "Detaching policy: $policy_arn" + if [ -n "$policy_arn" ]; then + log debug "📋 Detaching policy: $policy_arn" aws iam detach-role-policy --role-name "$ROLE_NAME" --policy-arn "$policy_arn" - echo "Detached policy: $policy_arn" + log info " ✅ Detached policy: $policy_arn" fi done -echo "Deleting inline policies..." -# Use tr to convert tabs/spaces to newlines, then filter out empty lines +log debug "📝 Deleting inline policies..." aws iam list-role-policies --role-name "$ROLE_NAME" --query 'PolicyNames' --output text | \ tr '\t' '\n' | while read policy_name; do - if [ ! -z "$policy_name" ]; then - echo "Deleting inline policy: $policy_name" + if [ -n "$policy_name" ]; then + log debug "📋 Deleting inline policy: $policy_name" aws iam delete-role-policy --role-name "$ROLE_NAME" --policy-name "$policy_name" - echo "Deleted inline policy: $policy_name" + log info " ✅ Deleted inline policy: $policy_name" fi done -echo "Deleting role..." -if aws iam delete-role --role-name "$ROLE_NAME"; then - echo "Role $ROLE_NAME deleted successfully" -else - echo "Failed to delete role $ROLE_NAME" -fi \ No newline at end of file +log debug "📝 Deleting IAM role: $ROLE_NAME" +aws iam delete-role --role-name "$ROLE_NAME" 2>&1 || { + log warn " ⚠️ Failed to delete IAM role '$ROLE_NAME'" + log warn "" + log warn "💡 Possible causes:" + log warn " The role may still have attached policies, instance profiles, or was already deleted" + log warn "" + log warn "🔧 How to fix:" + log warn " • Check attached policies: aws iam list-attached-role-policies --role-name $ROLE_NAME" + log warn " • Check instance profiles: aws iam list-instance-profiles-for-role --role-name $ROLE_NAME" + log warn "" +} +log info " ✅ IAM role deletion completed" diff --git a/k8s/scope/networking/dns/az-records/manage_route b/k8s/scope/networking/dns/az-records/manage_route index 2ba49ce6..a39b0ac5 100755 --- a/k8s/scope/networking/dns/az-records/manage_route +++ b/k8s/scope/networking/dns/az-records/manage_route @@ -3,6 +3,8 @@ set -euo pipefail get_azure_token() { + log debug "📡 Fetching Azure access token..." + local token_response=$(curl --http1.1 -s -w "\n__HTTP_CODE__:%{http_code}" -X POST \ "https://login.microsoftonline.com/${AZURE_TENANT_ID}/oauth2/v2.0/token" \ -H "Content-Type: application/x-www-form-urlencoded" \ @@ -10,27 +12,46 @@ get_azure_token() { -d "client_secret=${AZURE_CLIENT_SECRET}" \ -d "scope=https://management.azure.com/.default" \ -d "grant_type=client_credentials" 2>&1) || { - echo "ERROR: Failed to get Azure access token" >&2 + log error "❌ Failed to get Azure access token" + log error "" + log error "💡 Possible causes:" + log error " The Azure credentials may be invalid or the token endpoint is unreachable" + log error "" + log error "🔧 How to fix:" + log error " • Verify AZURE_TENANT_ID, AZURE_CLIENT_ID, and AZURE_CLIENT_SECRET are set correctly" + log error "" return 1 } - + local http_code=$(echo "$token_response" | grep -o "__HTTP_CODE__:[0-9]*" | cut -d: -f2) token_response=$(echo "$token_response" | sed 's/__HTTP_CODE__:[0-9]*//') - + if [ "${http_code:-0}" -ne 200 ]; then - echo "ERROR: Failed to get Azure access token. HTTP code: ${http_code:-unknown}" >&2 - echo "Response: $token_response" >&2 + log error "❌ Failed to get Azure access token (HTTP ${http_code:-unknown})" + log error "" + log error "💡 Possible causes:" + log error " The Azure credentials may be invalid or expired" + log error "" + log error "🔧 How to fix:" + log error " • Verify AZURE_TENANT_ID, AZURE_CLIENT_ID, and AZURE_CLIENT_SECRET are set correctly" + log error "" return 1 fi - + local access_token=$(echo "$token_response" | grep -o '"access_token":"[^"]*' | cut -d'"' -f4) - + if [[ -z "$access_token" ]]; then - echo "ERROR: No access token in response" >&2 - echo "Response: $token_response" >&2 + log error "❌ No access token in Azure response" + log error "" + log error "💡 Possible causes:" + log error " The token endpoint returned an unexpected response format" + log error "" + log error "🔧 How to fix:" + log error " • Verify AZURE_TENANT_ID, AZURE_CLIENT_ID, and AZURE_CLIENT_SECRET are set correctly" + log error "" return 1 fi - + echo "$access_token" } @@ -52,40 +73,49 @@ for arg in "$@"; do esac done -# Get IP based on gateway type +log debug "🔍 Managing Azure DNS record..." +log debug "📋 Action: $ACTION | Gateway: $GATEWAY_NAME | Zone: $HOSTED_ZONE_NAME" + if [ "${GATEWAY_TYPE:-istio}" = "aro_cluster" ]; then - # Get IP from OpenShift router service + log debug "📡 Getting IP from ARO router service..." GATEWAY_IP=$(kubectl get svc router-default -n openshift-ingress \ -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null) - + if [ -z "$GATEWAY_IP" ]; then - echo "Error: Could not get IP address from ARO router service" >&2 - echo "Falling back to istio gateway..." >&2 - # Fall back to istio gateway + log warn "⚠️ ARO router IP not found, falling back to istio gateway..." GATEWAY_IP=$(kubectl get gateway "$GATEWAY_NAME" -n gateways \ -o jsonpath='{.status.addresses[?(@.type=="IPAddress")].value}' 2>/dev/null) fi else - # Default: Get IP from Gateway resource (istio) + log debug "📡 Getting IP from gateway '$GATEWAY_NAME'..." GATEWAY_IP=$(kubectl get gateway "$GATEWAY_NAME" -n gateways \ -o jsonpath='{.status.addresses[?(@.type=="IPAddress")].value}' 2>/dev/null) fi if [ -z "$GATEWAY_IP" ]; then - echo "Error: Could not get IP address for gateway $GATEWAY_NAME" >&2 + log error "❌ Could not get IP address for gateway '$GATEWAY_NAME'" + log error "" + log error "💡 Possible causes:" + log error " The gateway may not be ready or the name is incorrect" + log error "" + log error "🔧 How to fix:" + log error " • Check gateway status: kubectl get gateway $GATEWAY_NAME -n gateways" + log error "" exit 1 fi +log info "✅ Gateway IP: $GATEWAY_IP" + SCOPE_SUBDOMAIN="${SCOPE_SUBDOMAIN:-}" if [ -z "$SCOPE_SUBDOMAIN" ]; then SCOPE_SUBDOMAIN="${SCOPE_DOMAIN%.$HOSTED_ZONE_NAME}" fi +log debug "📋 Subdomain: $SCOPE_SUBDOMAIN | Zone: $HOSTED_ZONE_NAME | IP: $GATEWAY_IP" + if [ "$ACTION" = "CREATE" ]; then - # Get access token ACCESS_TOKEN=$(get_azure_token) || exit 1 - # Create or update A record RECORD_SET_URL="https://management.azure.com/subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${HOSTED_ZONE_RG}/providers/Microsoft.Network/dnsZones/${HOSTED_ZONE_NAME}/A/${SCOPE_SUBDOMAIN}?api-version=2018-05-01" RECORD_BODY=$(cat <&1) || { - echo "ERROR: Failed to create/update Azure DNS record" >&2 + log error "❌ Failed to create Azure DNS record" + log error "" + log error "💡 Possible causes:" + log error " The Azure API may be unreachable or the credentials are invalid" + log error "" + log error "🔧 How to fix:" + log error " • Verify subscription and resource group are correct" + log error " • Check Azure service principal permissions for DNS zone" + log error "" exit 1 } - - # Extract HTTP code + http_code=$(echo "$AZURE_RESPONSE" | grep -o "__HTTP_CODE__:[0-9]*" | cut -d: -f2) AZURE_RESPONSE=$(echo "$AZURE_RESPONSE" | sed 's/__HTTP_CODE__:[0-9]*//') - # Check if response contains error - if echo "$AZURE_RESPONSE" | grep -q '"error"'; then - echo "ERROR: Azure API returned error" >&2 - echo "Response: $AZURE_RESPONSE" >&2 + if echo "$AZURE_RESPONSE" | grep -q '"error"' || [ "${http_code:-0}" -lt 200 ] || [ "${http_code:-0}" -gt 299 ]; then + log error "❌ Azure API returned an error creating DNS record (HTTP ${http_code:-unknown})" + log error "" + log error "💡 Possible causes:" + log error " The DNS zone or resource group may not exist, or permissions are insufficient" + log error "" + log error "🔧 How to fix:" + log error " • Verify DNS zone '$HOSTED_ZONE_NAME' exists in resource group '$HOSTED_ZONE_RG'" + log error " • Check Azure service principal permissions" + log error "" exit 1 fi - - # Check HTTP status code - if [ "${http_code:-0}" -lt 200 ] || [ "${http_code:-0}" -gt 299 ]; then - echo "ERROR: Azure API returned HTTP code: ${http_code:-unknown}" >&2 - echo "Response: $AZURE_RESPONSE" >&2 - exit 1 - fi - - echo "DNS record created: $SCOPE_SUBDOMAIN.$HOSTED_ZONE_NAME -> $GATEWAY_IP" - + + log info "✅ DNS record created: $SCOPE_SUBDOMAIN.$HOSTED_ZONE_NAME -> $GATEWAY_IP" + elif [ "$ACTION" = "DELETE" ]; then - + ACCESS_TOKEN=$(get_azure_token) || exit 1 - + RECORD_SET_URL="https://management.azure.com/subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${HOSTED_ZONE_RG}/providers/Microsoft.Network/dnsZones/${HOSTED_ZONE_NAME}/A/${SCOPE_SUBDOMAIN}?api-version=2018-05-01" - + + log debug "📝 Deleting Azure DNS record..." curl --http1.1 -s -X DELETE \ "${RECORD_SET_URL}" \ -H "Authorization: Bearer ${ACCESS_TOKEN}" - - echo "DNS record deleted: $SCOPE_SUBDOMAIN.$HOSTED_ZONE_NAME" + + log info "✅ DNS record deleted: $SCOPE_SUBDOMAIN.$HOSTED_ZONE_NAME" fi diff --git a/k8s/scope/networking/dns/build_dns_context b/k8s/scope/networking/dns/build_dns_context index 5cd476e0..fff8d8bc 100755 --- a/k8s/scope/networking/dns/build_dns_context +++ b/k8s/scope/networking/dns/build_dns_context @@ -1,34 +1,39 @@ #!/bin/bash -# Build DNS context based on DNS_TYPE -# This script sets up the necessary environment variables for DNS management +log debug "🔍 Building DNS context..." +log debug "📋 DNS type: $DNS_TYPE" case "$DNS_TYPE" in route53) - # For Route53, we need to get hosted zone IDs source "$SERVICE_PATH/scope/networking/dns/get_hosted_zones" ;; azure) - # Set default gateway type to istio if not specified GATEWAY_TYPE="${GATEWAY_TYPE:-istio}" export GATEWAY_TYPE - - # from values.yaml: HOSTED_ZONE_NAME, HOSTED_ZONE_RG, etc. - echo "Azure DNS context ready" - echo "GATEWAY_TYPE: $GATEWAY_TYPE" - echo "HOSTED_ZONE_NAME: $HOSTED_ZONE_NAME" - echo "HOSTED_ZONE_RG: $HOSTED_ZONE_RG" - echo "AZURE_SUBSCRIPTION_ID: $AZURE_SUBSCRIPTION_ID" - echo "RESOURCE_GROUP: $RESOURCE_GROUP" - echo "PUBLIC_GATEWAY_NAME: $PUBLIC_GATEWAY_NAME" - echo "PRIVATE_GATEWAY_NAME: $PRIVATE_GATEWAY_NAME" + + log debug "📋 Azure DNS configuration:" + log debug " Gateway type: $GATEWAY_TYPE" + log debug " Hosted zone: $HOSTED_ZONE_NAME (RG: $HOSTED_ZONE_RG)" + log debug " Subscription: $AZURE_SUBSCRIPTION_ID" + log debug " Resource group: $RESOURCE_GROUP" + log debug " Public gateway: $PUBLIC_GATEWAY_NAME" + log debug " Private gateway: $PRIVATE_GATEWAY_NAME" ;; external_dns) - echo "external_dns context ready" - echo "DNS records will be managed automatically by External DNS operator" + log debug "📋 DNS records will be managed automatically by External DNS operator" ;; *) - echo "Error: Unsupported DNS type '$DNS_TYPE'" + log error "❌ Unsupported DNS type: '$DNS_TYPE'" + log error "" + log error "💡 Possible causes:" + log error " The DNS_TYPE value in values.yaml is not one of: route53, azure, external_dns" + log error "" + log error "🔧 How to fix:" + log error " • Check DNS_TYPE in values.yaml" + log error " • Supported types: route53, azure, external_dns" + log error "" exit 1 ;; -esac \ No newline at end of file +esac + +log info "✅ DNS context ready" diff --git a/k8s/scope/networking/dns/domain/generate_domain b/k8s/scope/networking/dns/domain/generate_domain index ccb2a3b5..8348a6f7 100755 --- a/k8s/scope/networking/dns/domain/generate_domain +++ b/k8s/scope/networking/dns/domain/generate_domain @@ -1,11 +1,11 @@ #!/bin/bash -echo "Generating domain" +log debug "🔍 Generating scope domain..." -ACCOUNT_NAME=$(echo $CONTEXT | jq .account.slug -r) -NAMESPACE_NAME=$(echo $CONTEXT | jq .namespace.slug -r) -APPLICATION_NAME=$(echo $CONTEXT | jq .application.slug -r) -SCOPE_NAME=$(echo $CONTEXT | jq .scope.slug -r) +ACCOUNT_NAME=$(echo "$CONTEXT" | jq .account.slug -r) +NAMESPACE_NAME=$(echo "$CONTEXT" | jq .namespace.slug -r) +APPLICATION_NAME=$(echo "$CONTEXT" | jq .application.slug -r) +SCOPE_NAME=$(echo "$CONTEXT" | jq .scope.slug -r) SCOPE_DOMAIN=$("$SERVICE_PATH/scope/networking/dns/domain/domain-generate" \ --accountSlug="$ACCOUNT_NAME" \ @@ -13,15 +13,27 @@ SCOPE_DOMAIN=$("$SERVICE_PATH/scope/networking/dns/domain/domain-generate" \ --applicationSlug="$APPLICATION_NAME" \ --scopeSlug="$SCOPE_NAME" \ --domain="$DOMAIN" \ - --useAccountSlug="$USE_ACCOUNT_SLUG") - -echo "Generated domain: $SCOPE_DOMAIN" - + --useAccountSlug="$USE_ACCOUNT_SLUG") || { + log error "❌ Failed to generate scope domain" + log error "" + log error "💡 Possible causes:" + log error " The domain-generate binary returned an error" + log error "" + log error "🔧 How to fix:" + log error " • Check the domain-generate binary exists: ls -la $SERVICE_PATH/scope/networking/dns/domain/domain-generate" + log error " • Verify the input slugs are valid" + log error "" + return 1 +} + +log debug "📋 Generated domain: $SCOPE_DOMAIN" + +log debug "📝 Patching scope with domain..." np scope patch --id "$SCOPE_ID" --body "{\"domain\":\"$SCOPE_DOMAIN\"}" +log info "✅ Scope domain updated" CONTEXT=$(echo "$CONTEXT" | jq \ --arg scope_domain "$SCOPE_DOMAIN" \ '.scope.domain = $scope_domain') - export SCOPE_DOMAIN diff --git a/k8s/scope/networking/dns/external_dns/manage_route b/k8s/scope/networking/dns/external_dns/manage_route index 7c8cfdf4..f4fe1045 100644 --- a/k8s/scope/networking/dns/external_dns/manage_route +++ b/k8s/scope/networking/dns/external_dns/manage_route @@ -3,57 +3,64 @@ set -euo pipefail if [ "$ACTION" = "CREATE" ]; then - echo "Building DNSEndpoint manifest for ExternalDNS..." - - echo "Getting IP for gateway: $GATEWAY_NAME" + log debug "🔍 Building DNSEndpoint manifest for ExternalDNS..." + log debug "📡 Getting IP for gateway: $GATEWAY_NAME" GATEWAY_IP=$(kubectl get gateway "$GATEWAY_NAME" -n gateways \ -o jsonpath='{.status.addresses[?(@.type=="IPAddress")].value}' 2>/dev/null) if [ -z "$GATEWAY_IP" ]; then - echo "Warning: Could not get gateway IP for $GATEWAY_NAME" + log warn "⚠️ Gateway IP not found, trying service fallback..." GATEWAY_IP=$(kubectl get service "$GATEWAY_NAME" -n gateways \ -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null) fi - + if [ -z "$GATEWAY_IP" ]; then - echo "Warning: Could not determine gateway IP address yet, DNSEndpoint will be created later" + log warn "⚠️ Could not determine gateway IP address yet, DNSEndpoint will be created later" exit 0 fi - - echo "Gateway IP: $GATEWAY_IP" - + + log info "✅ Gateway IP: $GATEWAY_IP" + DNS_ENDPOINT_TEMPLATE="${DNS_ENDPOINT_TEMPLATE:-$SERVICE_PATH/deployment/templates/dns-endpoint.yaml.tpl}" - + if [ -f "$DNS_ENDPOINT_TEMPLATE" ]; then DNS_ENDPOINT_FILE="$OUTPUT_DIR/dns-endpoint-$SCOPE_ID.yaml" CONTEXT_PATH="$OUTPUT_DIR/context-$SCOPE_ID-dns.json" - + echo "$CONTEXT" | jq --arg gateway_ip "$GATEWAY_IP" '. + {gateway_ip: $gateway_ip}' > "$CONTEXT_PATH" - - echo "Building DNSEndpoint Template: $DNS_ENDPOINT_TEMPLATE to $DNS_ENDPOINT_FILE" - + + log debug "📝 Building DNSEndpoint from template: $DNS_ENDPOINT_TEMPLATE" + gomplate -c .="$CONTEXT_PATH" \ --file "$DNS_ENDPOINT_TEMPLATE" \ --out "$DNS_ENDPOINT_FILE" - - echo "DNSEndpoint manifest created at: $DNS_ENDPOINT_FILE" - + + log info "✅ DNSEndpoint manifest created: $DNS_ENDPOINT_FILE" + rm "$CONTEXT_PATH" - + else - echo "Error: DNSEndpoint template not found at $DNS_ENDPOINT_TEMPLATE" + log error "❌ DNSEndpoint template not found: $DNS_ENDPOINT_TEMPLATE" + log error "" + log error "💡 Possible causes:" + log error " The template file may be missing or the path is incorrect" + log error "" + log error "🔧 How to fix:" + log error " • Verify template exists: ls -la $DNS_ENDPOINT_TEMPLATE" + log error "" exit 1 fi elif [ "$ACTION" = "DELETE" ]; then - echo "Deleting DNSEndpoint for external_dns..." + log debug "🔍 Deleting DNSEndpoint for external_dns..." SCOPE_SLUG=$(echo "$CONTEXT" | jq -r '.scope.slug') DNS_ENDPOINT_NAME="k-8-s-${SCOPE_SLUG}-${SCOPE_ID}-dns" - echo "Attempting to delete DNSEndpoint by name: $DNS_ENDPOINT_NAME" - kubectl delete dnsendpoint "$DNS_ENDPOINT_NAME" -n "$K8S_NAMESPACE" || echo "DNSEndpoint may already be deleted" - - echo "DNSEndpoint deletion completed" -fi \ No newline at end of file + log debug "📝 Deleting DNSEndpoint: $DNS_ENDPOINT_NAME in namespace $K8S_NAMESPACE" + kubectl delete dnsendpoint "$DNS_ENDPOINT_NAME" -n "$K8S_NAMESPACE" || { + log warn "⚠️ DNSEndpoint '$DNS_ENDPOINT_NAME' may already be deleted" + } + log info "✅ DNSEndpoint deletion completed" +fi diff --git a/k8s/scope/networking/dns/get_hosted_zones b/k8s/scope/networking/dns/get_hosted_zones index 019707a9..24144536 100755 --- a/k8s/scope/networking/dns/get_hosted_zones +++ b/k8s/scope/networking/dns/get_hosted_zones @@ -1,20 +1,20 @@ #!/bin/bash -echo "Getting hosted zones" +log debug "🔍 Getting hosted zones..." HOSTED_PUBLIC_ZONE_ID=$(echo "$CONTEXT" | jq -r '.providers["cloud-providers"].networking.hosted_public_zone_id') HOSTED_PRIVATE_ZONE_ID=$(echo "$CONTEXT" | jq -r '.providers["cloud-providers"].networking.hosted_zone_id') -echo "Public Hosted Zone ID: $HOSTED_PUBLIC_ZONE_ID" -echo "Private Hosted Zone ID: $HOSTED_PRIVATE_ZONE_ID" +log debug "📋 Public Hosted Zone ID: $HOSTED_PUBLIC_ZONE_ID" +log debug "📋 Private Hosted Zone ID: $HOSTED_PRIVATE_ZONE_ID" -# Check if both hosted zones are empty or null if [[ -z "$HOSTED_PUBLIC_ZONE_ID" || "$HOSTED_PUBLIC_ZONE_ID" == "null" ]] && [[ -z "$HOSTED_PRIVATE_ZONE_ID" || "$HOSTED_PRIVATE_ZONE_ID" == "null" ]]; then - echo "Unable to find any hosted zones (neither public nor private)" >&2 + log warn "⚠️ No hosted zones found (neither public nor private)" exit 0 fi export HOSTED_PUBLIC_ZONE_ID export HOSTED_PRIVATE_ZONE_ID -mkdir -p "$SERVICE_PATH/tmp/" -mkdir -p "$SERVICE_PATH/output/" +mkdir -p "$SERVICE_PATH/tmp/" "$SERVICE_PATH/output/" + +log info "✅ Hosted zones loaded" diff --git a/k8s/scope/networking/dns/manage_dns b/k8s/scope/networking/dns/manage_dns index f5fd1202..6d7538c3 100755 --- a/k8s/scope/networking/dns/manage_dns +++ b/k8s/scope/networking/dns/manage_dns @@ -1,22 +1,27 @@ #!/bin/bash - set -euo pipefail -echo "Managing DNS records" -echo "DNS Type: $DNS_TYPE" -echo "Action: $ACTION" -echo "Scope Domain: $SCOPE_DOMAIN" +log debug "🔍 Managing DNS records..." +log debug "📋 DNS type: $DNS_TYPE | Action: $ACTION | Domain: $SCOPE_DOMAIN" if [[ "$ACTION" == "DELETE" ]] && [[ -z "${SCOPE_DOMAIN:-}" || "${SCOPE_DOMAIN:-}" == "To be defined" ]]; then - echo "Skipping route53 action as the scope has no domain" + log warn "⚠️ Skipping DNS action — scope has no domain" return 0 fi case "$DNS_TYPE" in route53) - echo "Using Route53 DNS provider" + log debug "📝 Using Route53 DNS provider" source "$SERVICE_PATH/scope/networking/dns/route53/manage_route" --action="$ACTION" || { - echo "ERROR: Route53 DNS management failed" + log error "❌ Route53 DNS management failed" + log error "" + log error "💡 Possible causes:" + log error " The hosted zone may not exist or the agent lacks Route53 permissions" + log error "" + log error "🔧 How to fix:" + log error " • Check hosted zone exists: aws route53 list-hosted-zones" + log error " • Verify IAM permissions for route53:ChangeResourceRecordSets" + log error "" exit 1 } ;; @@ -26,7 +31,7 @@ case "$DNS_TYPE" in else GATEWAY_NAME="$PRIVATE_GATEWAY_NAME" fi - + log debug "📝 Using Azure DNS provider (gateway: $GATEWAY_NAME)" source "$SERVICE_PATH/scope/networking/dns/az-records/manage_route" \ --action="$ACTION" \ --resource-group="$RESOURCE_GROUP" \ @@ -36,15 +41,32 @@ case "$DNS_TYPE" in --hosted-zone-rg="$HOSTED_ZONE_RG" ;; external_dns) - echo "Using external_dns provider" + log debug "📝 Using External DNS provider" source "$SERVICE_PATH/scope/networking/dns/external_dns/manage_route" || { - echo "ERROR: External DNS management failed" + log error "❌ External DNS management failed" + log error "" + log error "💡 Possible causes:" + log error " The External DNS operator may not be running or lacks permissions" + log error "" + log error "🔧 How to fix:" + log error " • Check operator status: kubectl get pods -l app=external-dns" + log error " • Review operator logs: kubectl logs -l app=external-dns" + log error "" exit 1 } ;; *) - echo "Error: Unsupported dns type '$DNS_TYPE'" + log error "❌ Unsupported DNS type: '$DNS_TYPE'" + log error "" + log error "💡 Possible causes:" + log error " The DNS_TYPE value in values.yaml is not one of: route53, azure, external_dns" + log error "" + log error "🔧 How to fix:" + log error " • Check DNS_TYPE in values.yaml" + log error " • Supported types: route53, azure, external_dns" + log error "" exit 1 ;; esac +log info "✅ DNS records managed successfully" diff --git a/k8s/scope/networking/dns/route53/manage_route b/k8s/scope/networking/dns/route53/manage_route index 5b6d5238..ab59ff47 100644 --- a/k8s/scope/networking/dns/route53/manage_route +++ b/k8s/scope/networking/dns/route53/manage_route @@ -10,45 +10,63 @@ for arg in "$@"; do esac done -echo "Looking for load balancer: $ALB_NAME in region $REGION" - -# Get load balancer info and check if it exists +log debug "📡 Looking for load balancer: $ALB_NAME in region $REGION..." LB_OUTPUT=$(aws elbv2 describe-load-balancers \ --names "$ALB_NAME" \ --region "$REGION" \ --query 'LoadBalancers[0].[DNSName,CanonicalHostedZoneId]' \ --output text \ --no-paginate 2>&1) || { - echo "ERROR: Failed to find load balancer '$ALB_NAME' in region '$REGION'" - echo "AWS Error: $LB_OUTPUT" + log error "❌ Failed to find load balancer '$ALB_NAME' in region '$REGION'" + log error "" + log error "💡 Possible causes:" + log error " The load balancer may not exist or you lack permissions to describe it" + log error "" + log error "🔧 How to fix:" + log error " • Verify the ALB exists: aws elbv2 describe-load-balancers --names $ALB_NAME" + log error " • Check IAM permissions for elbv2:DescribeLoadBalancers" + log error "" exit 1 } read -r ELB_DNS_NAME ELB_HOSTED_ZONE_ID <<< "$LB_OUTPUT" if [[ -z "$ELB_DNS_NAME" ]] || [[ "$ELB_DNS_NAME" == "None" ]]; then - echo "ERROR: Load balancer '$ALB_NAME' exists but has no DNS name" + log error "❌ Load balancer '$ALB_NAME' exists but has no DNS name" + log error "" + log error "💡 Possible causes:" + log error " The load balancer may still be provisioning" + log error "" + log error "🔧 How to fix:" + log error " • Check ALB status: aws elbv2 describe-load-balancers --names $ALB_NAME" + log error "" exit 1 fi -echo "Found load balancer DNS: $ELB_DNS_NAME" +log info "✅ Found load balancer DNS: $ELB_DNS_NAME" HOSTED_ZONES=() +ZONE_TYPES=() if [[ -n "$HOSTED_PRIVATE_ZONE_ID" ]] && [[ "$HOSTED_PRIVATE_ZONE_ID" != "null" ]]; then HOSTED_ZONES+=("$HOSTED_PRIVATE_ZONE_ID") + ZONE_TYPES+=("private") fi if [[ -n "$HOSTED_PUBLIC_ZONE_ID" ]] && [[ "$HOSTED_PUBLIC_ZONE_ID" != "null" ]]; then if [[ "$HOSTED_PUBLIC_ZONE_ID" != "$HOSTED_PRIVATE_ZONE_ID" ]]; then HOSTED_ZONES+=("$HOSTED_PUBLIC_ZONE_ID") - echo "Will create records in both public and private zones" + ZONE_TYPES+=("public") + log debug "📋 Will create records in both public and private zones" fi fi -for ZONE_ID in "${HOSTED_ZONES[@]}"; do - echo "Creating Route53 record in hosted zone: $ZONE_ID" - echo "Domain: $SCOPE_DOMAIN -> $ELB_DNS_NAME" +for i in "${!HOSTED_ZONES[@]}"; do + ZONE_ID="${HOSTED_ZONES[$i]}" + ZONE_TYPE="${ZONE_TYPES[$i]}" + log info "" + log debug "📝 ${ACTION%E}ING Route53 record in hosted zone: $ZONE_ID" + log debug "📋 Domain: $SCOPE_DOMAIN -> $ELB_DNS_NAME" ROUTE53_OUTPUT=$(aws route53 change-resource-record-sets \ --hosted-zone-id "$ZONE_ID" \ @@ -72,16 +90,25 @@ for ZONE_ID in "${HOSTED_ZONES[@]}"; do }" 2>&1) || { if [[ "$ACTION" == "DELETE" ]] && [[ "$ROUTE53_OUTPUT" == *"InvalidChangeBatch"* ]] && [[ "$ROUTE53_OUTPUT" == *"but it was not found"* ]]; then - echo "Route53 record for $SCOPE_DOMAIN does not exist in zone $ZONE_ID, skipping deletion" + log debug "📋 Route53 record for $SCOPE_DOMAIN does not exist in zone $ZONE_ID, skipping deletion" continue fi - echo "ERROR: Failed to $ACTION Route53 record" - echo "Zone ID: $ZONE_ID" - echo "AWS Error: $ROUTE53_OUTPUT" - echo "This often happens when the agent lacks Route53 permissions" + log error "❌ Failed to $ACTION Route53 record" + log error "📋 Zone ID: $ZONE_ID" + log error "" + log error "💡 Possible causes:" + log error " The agent may lack Route53 permissions" + log error "" + log error "🔧 How to fix:" + log error " • Check IAM permissions for route53:ChangeResourceRecordSets" + log error " • Verify the hosted zone ID is correct" + log error "" exit 1 } - - echo "Successfully $ACTION Route53 record" -done \ No newline at end of file + + log info "✅ Successfully ${ACTION%E}ED $ZONE_TYPE Route53 record" +done + +log info "" +log info "✨ Route53 DNS configuration completed" diff --git a/k8s/scope/networking/gateway/build_gateway b/k8s/scope/networking/gateway/build_gateway index 8d97e2da..1fec78a0 100755 --- a/k8s/scope/networking/gateway/build_gateway +++ b/k8s/scope/networking/gateway/build_gateway @@ -1,19 +1,30 @@ #!/bin/bash -echo "Creating ingress for scope $SCOPE_ID with domain $SCOPE_DOMAIN" - -echo "Creating $INGRESS_VISIBILITY ingress..." +log debug "🔍 Building gateway ingress..." +log debug "📋 Scope: $SCOPE_ID | Domain: $SCOPE_DOMAIN | Visibility: $INGRESS_VISIBILITY" INGRESS_FILE="$OUTPUT_DIR/ingress-$SCOPE_ID-$INGRESS_VISIBILITY.yaml" CONTEXT_PATH="$OUTPUT_DIR/context-$SCOPE_ID.json" echo "$CONTEXT" > "$CONTEXT_PATH" -echo "Building Template: $TEMPLATE to $INGRESS_FILE" +log debug "📝 Building template: $TEMPLATE" gomplate -c .="$CONTEXT_PATH" \ --file "$TEMPLATE" \ - --out "$INGRESS_FILE" - - -rm "$CONTEXT_PATH" \ No newline at end of file + --out "$INGRESS_FILE" || { + log error "❌ Failed to render ingress template" + log error "" + log error "💡 Possible causes:" + log error " The template file may contain invalid gomplate syntax" + log error "" + log error "🔧 How to fix:" + log error " • Verify template exists: ls -la $TEMPLATE" + log error " • Check the template is valid gomplate YAML" + log error "" + exit 1 +} + +log info "✅ Ingress manifest created: $INGRESS_FILE" + +rm "$CONTEXT_PATH" diff --git a/k8s/scope/networking/resolve_balancer b/k8s/scope/networking/resolve_balancer new file mode 100755 index 00000000..fd07a68f --- /dev/null +++ b/k8s/scope/networking/resolve_balancer @@ -0,0 +1,212 @@ +#!/bin/bash + +# Resolves the ALB name to use for the scope's ingress. +# +# Resolution priority (when DNS_TYPE is route53): +# 1. Route53 — if a DNS record already exists for the scope domain, +# use the ALB it points to (ensures DNS/ingress consistency) +# 2. Load balancing — when additional balancers are configured, pick +# the ALB with the fewest HTTPS listener rules +# 3. Provider config — base ALB from scope-configurations or +# container-orchestration provider +# +# For non-route53 DNS types, only priority 3 (provider config) is used. +# +# Inputs (env vars): +# DNS_TYPE - DNS provider type (route53, azure, external_dns) +# INGRESS_VISIBILITY - "internet-facing" or "internal" +# CONTEXT - JSON with provider configuration +# REGION - AWS region (for elbv2 API calls) +# +# Outputs (env vars): +# ALB_NAME - The resolved ALB name to use + +_RESOLVE_BALANCER_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if ! type -t log >/dev/null 2>&1; then + source "$_RESOLVE_BALANCER_DIR/../../logging" +fi + +if ! type -t get_config_value >/dev/null 2>&1; then + source "$_RESOLVE_BALANCER_DIR/../../utils/get_config_value" +fi + +# Queries AWS ELBv2 to count listener rules on an ALB's HTTPS (443) listener. +# The default rule is excluded since it always exists. +# Usage: get_alb_rule_count +# Returns: integer rule count on stdout, non-zero exit on failure +get_alb_rule_count() { + local alb_name="$1" + + local alb_arn + alb_arn=$(aws elbv2 describe-load-balancers \ + --names "$alb_name" \ + --region "$REGION" \ + --query 'LoadBalancers[0].LoadBalancerArn' \ + --output text 2>/dev/null) || return 1 + + if [ -z "$alb_arn" ] || [ "$alb_arn" = "None" ]; then + return 1 + fi + + local listener_arn + listener_arn=$(aws elbv2 describe-listeners \ + --load-balancer-arn "$alb_arn" \ + --region "$REGION" \ + --query 'Listeners[?Port==`443`].ListenerArn | [0]' \ + --output text 2>/dev/null) || return 1 + + if [ -z "$listener_arn" ] || [ "$listener_arn" = "None" ]; then + return 1 + fi + + local rules_json + rules_json=$(aws elbv2 describe-rules \ + --listener-arn "$listener_arn" \ + --region "$REGION" \ + --output json 2>/dev/null) || return 1 + + # Exclude the default rule (always present, not a routing rule) + echo "$rules_json" | jq '[.Rules[] | select(.IsDefault == false)] | length' +} + +# Looks up the Route53 A-record alias for the scope domain, then resolves +# the ALB DNS name back to an ALB name. +# Returns the ALB name on stdout, or non-zero exit if not found. +get_alb_from_route53() { + local domain="$1" + local region="$2" + + local zone_id + zone_id=$(echo "$CONTEXT" | jq -r ' + .providers["cloud-providers"].networking.hosted_public_zone_id // + .providers["cloud-providers"].networking.hosted_zone_id // + empty + ') || return 1 + + if [ -z "$zone_id" ]; then + return 1 + fi + + # Use --start-record-name to jump directly to the record instead of + # scanning from the beginning (list-resource-record-sets does not + # auto-paginate and returns max 300 per page). + local alias_dns + alias_dns=$(aws route53 list-resource-record-sets \ + --hosted-zone-id "$zone_id" \ + --start-record-name "${domain}." \ + --start-record-type "A" \ + --max-items 1 \ + --query "ResourceRecordSets[?Name=='${domain}.' && Type=='A'].AliasTarget.DNSName | [0]" \ + --output text \ + --region "$region" 2>/dev/null) || return 1 + + if [ -z "$alias_dns" ] || [ "$alias_dns" = "None" ] || [ "$alias_dns" = "null" ]; then + return 1 + fi + + # Strip trailing dot from DNS name + alias_dns="${alias_dns%.}" + + # Reverse-lookup: find ALB name from its DNS name (case-insensitive match) + local alb_name + alb_name=$(aws elbv2 describe-load-balancers \ + --region "$region" \ + --output json 2>/dev/null \ + | jq -r --arg dns "$alias_dns" \ + '.LoadBalancers[] | select(.DNSName | ascii_downcase == ($dns | ascii_downcase)) | .LoadBalancerName' \ + | head -1) || return 1 + + if [ -z "$alb_name" ]; then + return 1 + fi + + echo "$alb_name" +} + +# ============================================================================= +# Main logic +# ============================================================================= + +# Resolve the base ALB name from configuration +ALB_NAME="k8s-nullplatform-$INGRESS_VISIBILITY" + +if [ "$INGRESS_VISIBILITY" = "internet-facing" ]; then + ALB_NAME=$(get_config_value \ + --provider '.providers["scope-configurations"].networking.balancer_public_name' \ + --provider '.providers["container-orchestration"].balancer.public_name' \ + --default "$ALB_NAME" + ) +else + ALB_NAME=$(get_config_value \ + --provider '.providers["scope-configurations"].networking.balancer_private_name' \ + --provider '.providers["container-orchestration"].balancer.private_name' \ + --default "$ALB_NAME" + ) +fi + +if [[ "$DNS_TYPE" == "route53" ]]; then + # Priority 1: Check Route53 for an existing DNS record + SCOPE_DOMAIN_VAL=$(echo "$CONTEXT" | jq -r '.scope.domain // empty') + EXISTING_ALB="" + + if [ -n "$SCOPE_DOMAIN_VAL" ]; then + EXISTING_ALB=$(get_alb_from_route53 "$SCOPE_DOMAIN_VAL" "$REGION" 2>/dev/null) || true + fi + + if [ -n "$EXISTING_ALB" ]; then + log info "📝 Using ALB '$EXISTING_ALB' from Route53 record for $SCOPE_DOMAIN_VAL" + ALB_NAME="$EXISTING_ALB" + else + # Priority 2: If additional balancers configured, pick the least-loaded one + ADDITIONAL_BALANCERS="" + if [ "$INGRESS_VISIBILITY" = "internet-facing" ]; then + ADDITIONAL_BALANCERS=$(get_config_value \ + --provider '.providers["scope-configurations"].networking.additional_public_balancers' \ + --provider '.providers["container-orchestration"].balancer.additional_public_names' \ + --default "" + ) + else + ADDITIONAL_BALANCERS=$(get_config_value \ + --provider '.providers["scope-configurations"].networking.additional_private_balancers' \ + --provider '.providers["container-orchestration"].balancer.additional_private_names' \ + --default "" + ) + fi + + if [ -n "$ADDITIONAL_BALANCERS" ] && [ "$ADDITIONAL_BALANCERS" != "null" ] && [ "$ADDITIONAL_BALANCERS" != "[]" ]; then + log debug "🔍 Additional balancers configured, resolving least-loaded ALB..." + + CANDIDATES=$(echo "$ADDITIONAL_BALANCERS" | jq -r --arg base "$ALB_NAME" '[$base] + . | .[]') + + log debug "📋 Candidate balancers: $(echo "$CANDIDATES" | paste -sd ',' - | sed 's/,/, /g')" + + MIN_RULES=-1 + BEST_ALB="$ALB_NAME" + + for CANDIDATE in $CANDIDATES; do + RULE_COUNT=$(get_alb_rule_count "$CANDIDATE" 2>/dev/null) || { + log warn "⚠️ Could not query rules for ALB '$CANDIDATE', skipping" + continue + } + + log debug "📋 ALB '$CANDIDATE': $RULE_COUNT rules" + + if [ "$MIN_RULES" -eq -1 ] || [ "$RULE_COUNT" -lt "$MIN_RULES" ]; then + MIN_RULES=$RULE_COUNT + BEST_ALB="$CANDIDATE" + fi + done + + if [ "$BEST_ALB" != "$ALB_NAME" ]; then + log info "📝 Selected ALB '$BEST_ALB' ($MIN_RULES rules) over default '$ALB_NAME'" + fi + + ALB_NAME="$BEST_ALB" + fi + fi +else + log debug "📋 DNS type is '$DNS_TYPE', skipping Route53 lookup and load balancing" +fi + +export ALB_NAME diff --git a/k8s/scope/pause_autoscaling b/k8s/scope/pause_autoscaling index 5516e11c..35a074cd 100755 --- a/k8s/scope/pause_autoscaling +++ b/k8s/scope/pause_autoscaling @@ -2,6 +2,7 @@ set -euo pipefail + DEPLOYMENT_ID=$(echo "$CONTEXT" | jq .scope.current_active_deployment -r) SCOPE_ID=$(echo "$CONTEXT" | jq .scope.id -r) @@ -11,24 +12,21 @@ K8S_NAMESPACE=$(echo "$CONTEXT" | jq -r --arg default "$K8S_NAMESPACE" ' HPA_NAME="hpa-d-$SCOPE_ID-$DEPLOYMENT_ID" -if ! kubectl get hpa "$HPA_NAME" -n "$K8S_NAMESPACE" >/dev/null 2>&1; then - echo "HPA $HPA_NAME not found in namespace $K8S_NAMESPACE" - exit 1 -fi +require_hpa "$HPA_NAME" "$K8S_NAMESPACE" "$SCOPE_ID" CURRENT_CONFIG=$(kubectl get hpa "$HPA_NAME" -n "$K8S_NAMESPACE" -o json) CURRENT_MIN=$(echo "$CURRENT_CONFIG" | jq -r '.spec.minReplicas') CURRENT_MAX=$(echo "$CURRENT_CONFIG" | jq -r '.spec.maxReplicas') -echo "Current HPA configuration:" -echo " Min replicas: $CURRENT_MIN" -echo " Max replicas: $CURRENT_MAX" +log debug "📋 Current HPA configuration:" +log debug " Min replicas: $CURRENT_MIN" +log debug " Max replicas: $CURRENT_MAX" DEPLOYMENT_NAME="d-$SCOPE_ID-$DEPLOYMENT_ID" CURRENT_REPLICAS=$(kubectl get deployment "$DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.spec.replicas}') -echo "Current deployment replicas: $CURRENT_REPLICAS" -echo "Pausing autoscaling at $CURRENT_REPLICAS replicas..." +log debug "📋 Current deployment replicas: $CURRENT_REPLICAS" +log debug "📝 Pausing autoscaling at $CURRENT_REPLICAS replicas..." PATCH=$(jq -n \ --arg originalMin "$CURRENT_MIN" \ @@ -55,9 +53,10 @@ PATCH=$(jq -n \ kubectl patch hpa "$HPA_NAME" -n "$K8S_NAMESPACE" --type='merge' -p "$PATCH" -echo "Autoscaling paused successfully" -echo " HPA: $HPA_NAME" -echo " Namespace: $K8S_NAMESPACE" -echo " Fixed replicas: $CURRENT_REPLICAS" -echo "" -echo "To resume autoscaling, use the resume-autoscaling action or manually patch the HPA." \ No newline at end of file +log info "" +log info "✅ Autoscaling paused successfully" +log debug " HPA: $HPA_NAME" +log debug " Namespace: $K8S_NAMESPACE" +log debug " Fixed replicas: $CURRENT_REPLICAS" +log info "" +log debug "📋 To resume autoscaling, use the resume-autoscaling action or manually patch the HPA." diff --git a/k8s/scope/require_resource b/k8s/scope/require_resource new file mode 100644 index 00000000..a3daa10a --- /dev/null +++ b/k8s/scope/require_resource @@ -0,0 +1,75 @@ +#!/bin/bash + +# Shared resource validation functions for scope workflows. +# Loaded as a workflow step, exports functions for subsequent steps. + + +require_hpa() { + local hpa_name="$1" + local namespace="$2" + local scope_id="$3" + + log debug "🔍 Looking for HPA '$hpa_name' in namespace '$namespace'..." + + if ! kubectl get hpa "$hpa_name" -n "$namespace" >/dev/null 2>&1; then + log error " ❌ HPA '$hpa_name' not found in namespace '$namespace'" + log error "" + log error "💡 Possible causes:" + log error " The HPA may not exist or autoscaling is not configured for this deployment" + log error "" + log error "🔧 How to fix:" + log error " • Verify the HPA exists: kubectl get hpa -n $namespace" + log error " • Check that autoscaling is configured for scope $scope_id" + log error "" + exit 1 + fi +} + +require_deployment() { + local deployment_name="$1" + local namespace="$2" + local scope_id="$3" + + log debug "🔍 Looking for deployment '$deployment_name' in namespace '$namespace'..." + + if ! kubectl get deployment "$deployment_name" -n "$namespace" >/dev/null 2>&1; then + log error " ❌ Deployment '$deployment_name' not found in namespace '$namespace'" + log error "" + log error "💡 Possible causes:" + log error " The deployment may not exist or was not created yet" + log error "" + log error "🔧 How to fix:" + log error " • Verify the deployment exists: kubectl get deployment -n $namespace" + log error " • Check that scope $scope_id has an active deployment" + log error "" + exit 1 + fi +} + +find_deployment_by_label() { + local scope_id="$1" + local deployment_id="$2" + local namespace="$3" + local label="name=d-$scope_id-$deployment_id" + + log debug "🔍 Looking for deployment with label: $label" + + DEPLOYMENT=$(kubectl get deployment -n "$namespace" -l "$label" -o jsonpath="{.items[0].metadata.name}" 2>&1) || { + log error " ❌ Failed to find deployment with label '$label' in namespace '$namespace'" + log debug "📋 Kubectl error: $DEPLOYMENT" + DEPLOYMENT="" + } + + if [[ -z "$DEPLOYMENT" ]]; then + log error " ❌ No deployment found with label '$label' in namespace '$namespace'" + log error "" + log error "💡 Possible causes:" + log error " The deployment may not exist or was not created yet" + log error "" + log error "🔧 How to fix:" + log error " • Verify the deployment exists: kubectl get deployment -n $namespace -l $label" + log error " • Check that scope $scope_id has an active deployment" + log error "" + exit 1 + fi +} diff --git a/k8s/scope/restart_pods b/k8s/scope/restart_pods index 0a5b5469..0433d294 100755 --- a/k8s/scope/restart_pods +++ b/k8s/scope/restart_pods @@ -2,6 +2,7 @@ set -euo pipefail + DEPLOYMENT_ID=$(echo "$CONTEXT" | jq .scope.current_active_deployment -r) SCOPE_ID=$(echo "$CONTEXT" | jq .scope.id -r) @@ -9,29 +10,34 @@ K8S_NAMESPACE=$(echo "$CONTEXT" | jq -r --arg default "$K8S_NAMESPACE" ' .providers["container-orchestration"].cluster.namespace // $default ') -echo "Looking for deployment with label: name=d-$SCOPE_ID-$DEPLOYMENT_ID" -DEPLOYMENT=$(kubectl get deployment -n "$K8S_NAMESPACE" -l "name=d-$SCOPE_ID-$DEPLOYMENT_ID" -o jsonpath="{.items[0].metadata.name}" 2>&1) || { - echo "ERROR: Failed to find deployment" - echo "Namespace: $K8S_NAMESPACE" - echo "Kubectl error: $DEPLOYMENT" - exit 1 -} - -if [[ -z "$DEPLOYMENT" ]]; then - echo "ERROR: No deployment found with label name=d-$SCOPE_ID-$DEPLOYMENT_ID" - exit 1 -fi +find_deployment_by_label "$SCOPE_ID" "$DEPLOYMENT_ID" "$K8S_NAMESPACE" -echo "Restarting deployment: $DEPLOYMENT" +log debug "📝 Restarting deployment: $DEPLOYMENT" kubectl rollout restart -n "$K8S_NAMESPACE" "deployment/$DEPLOYMENT" || { - echo "ERROR: Failed to restart deployment $DEPLOYMENT" + log error " ❌ Failed to restart deployment '$DEPLOYMENT'" + log error "" + log error "💡 Possible causes:" + log error " The deployment may be in a bad state or kubectl lacks permissions" + log error "" + log error "🔧 How to fix:" + log error " • Check deployment status: kubectl describe deployment $DEPLOYMENT -n $K8S_NAMESPACE" + log error "" exit 1 } -echo "Waiting for rollout to complete..." +log debug "🔍 Waiting for rollout to complete..." kubectl rollout status -n "$K8S_NAMESPACE" "deployment/$DEPLOYMENT" -w || { - echo "ERROR: Rollout failed or timed out" + log error " ❌ Rollout failed or timed out" + log error "" + log error "💡 Possible causes:" + log error " Pods may be failing to start (image pull errors, crashes, resource limits)" + log error "" + log error "🔧 How to fix:" + log error " • Check pod events: kubectl describe pods -n $K8S_NAMESPACE -l name=d-$SCOPE_ID-$DEPLOYMENT_ID" + log error " • Check pod logs: kubectl logs -n $K8S_NAMESPACE -l name=d-$SCOPE_ID-$DEPLOYMENT_ID --tail=50" + log error "" exit 1 } -echo "Deployment restart completed successfully" +log info "" +log info "✅ Deployment restart completed successfully" diff --git a/k8s/scope/resume_autoscaling b/k8s/scope/resume_autoscaling index 6e410470..9b32c791 100755 --- a/k8s/scope/resume_autoscaling +++ b/k8s/scope/resume_autoscaling @@ -2,6 +2,7 @@ set -euo pipefail + DEPLOYMENT_ID=$(echo "$CONTEXT" | jq .scope.current_active_deployment -r) SCOPE_ID=$(echo "$CONTEXT" | jq .scope.id -r) @@ -11,16 +12,13 @@ K8S_NAMESPACE=$(echo "$CONTEXT" | jq -r --arg default "$K8S_NAMESPACE" ' HPA_NAME="hpa-d-$SCOPE_ID-$DEPLOYMENT_ID" -if ! kubectl get hpa "$HPA_NAME" -n "$K8S_NAMESPACE" >/dev/null 2>&1; then - echo "HPA $HPA_NAME not found in namespace $K8S_NAMESPACE" - exit 1 -fi +require_hpa "$HPA_NAME" "$K8S_NAMESPACE" "$SCOPE_ID" ANNOTATION_DATA=$(kubectl get hpa "$HPA_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.metadata.annotations.nullplatform\.com/autoscaling-paused}' 2>/dev/null || echo "") if [[ -z "$ANNOTATION_DATA" || "$ANNOTATION_DATA" == "null" ]]; then - echo "HPA $HPA_NAME is not currently paused" - exit 1 + log info " ✅ HPA '$HPA_NAME' is already active, no action needed" + exit 0 fi ORIGINAL_MIN=$(echo "$ANNOTATION_DATA" | jq -r '.originalMinReplicas') @@ -28,12 +26,12 @@ ORIGINAL_MAX=$(echo "$ANNOTATION_DATA" | jq -r '.originalMaxReplicas') PAUSED_AT=$(echo "$ANNOTATION_DATA" | jq -r '.pausedAt') -echo "Found paused HPA configuration:" -echo " Original min replicas: $ORIGINAL_MIN" -echo " Original max replicas: $ORIGINAL_MAX" -echo " Paused at: $PAUSED_AT" +log debug "📋 Found paused HPA configuration:" +log debug " Original min replicas: $ORIGINAL_MIN" +log debug " Original max replicas: $ORIGINAL_MAX" +log debug " Paused at: $PAUSED_AT" -echo "Resuming autoscaling..." +log debug "📝 Resuming autoscaling..." PATCH=$(jq -n \ --argjson originalMin "$ORIGINAL_MIN" \ @@ -52,8 +50,9 @@ PATCH=$(jq -n \ kubectl patch hpa "$HPA_NAME" -n "$K8S_NAMESPACE" --type='merge' -p "$PATCH" -echo "Autoscaling resumed successfully" -echo " HPA: $HPA_NAME" -echo " Namespace: $K8S_NAMESPACE" -echo " Min replicas: $ORIGINAL_MIN" -echo " Max replicas: $ORIGINAL_MAX" \ No newline at end of file +log info "" +log info "✅ Autoscaling resumed successfully" +log debug " HPA: $HPA_NAME" +log debug " Namespace: $K8S_NAMESPACE" +log debug " Min replicas: $ORIGINAL_MIN" +log debug " Max replicas: $ORIGINAL_MAX" diff --git a/k8s/scope/set_desired_instance_count b/k8s/scope/set_desired_instance_count index 84d3dc49..e0de8845 100755 --- a/k8s/scope/set_desired_instance_count +++ b/k8s/scope/set_desired_instance_count @@ -2,17 +2,24 @@ set -euo pipefail -echo "=== SET DESIRED INSTANCE COUNT ===" + +log debug "📝 Setting desired instance count..." DESIRED_INSTANCES="${ACTION_PARAMETERS_DESIRED_INSTANCES:-}" if [[ -z "$DESIRED_INSTANCES" ]]; then - echo "ERROR: desired_instances parameter not found" - echo "Expected ACTION_PARAMETERS_DESIRED_INSTANCES environment variable" + log error " ❌ desired_instances parameter not found" + log error "" + log error "💡 Possible causes:" + log error " The ACTION_PARAMETERS_DESIRED_INSTANCES environment variable is not set" + log error "" + log error "🔧 How to fix:" + log error " • Set the desired_instances parameter in the action configuration" + log error "" exit 1 fi -echo "Desired instances: $DESIRED_INSTANCES" +log debug "📋 Desired instances: $DESIRED_INSTANCES" DEPLOYMENT_ID=$(echo "$CONTEXT" | jq .scope.current_active_deployment -r) @@ -26,47 +33,42 @@ DEPLOYMENT_NAME="d-$SCOPE_ID-$DEPLOYMENT_ID" HPA_NAME="hpa-d-$SCOPE_ID-$DEPLOYMENT_ID" -echo "Deployment: $DEPLOYMENT_NAME" -echo "Namespace: $K8S_NAMESPACE" +log debug "📋 Deployment: $DEPLOYMENT_NAME" +log debug "📋 Namespace: $K8S_NAMESPACE" -if ! kubectl get deployment "$DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" >/dev/null 2>&1; then - echo "ERROR: Deployment $DEPLOYMENT_NAME not found in namespace $K8S_NAMESPACE" - exit 1 -fi +require_deployment "$DEPLOYMENT_NAME" "$K8S_NAMESPACE" "$SCOPE_ID" CURRENT_REPLICAS=$(kubectl get deployment "$DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.spec.replicas}') -echo "Current replicas: $CURRENT_REPLICAS" +log debug "📋 Current replicas: $CURRENT_REPLICAS" HPA_EXISTS=false HPA_PAUSED=false if kubectl get hpa "$HPA_NAME" -n "$K8S_NAMESPACE" >/dev/null 2>&1; then HPA_EXISTS=true - echo "HPA found: $HPA_NAME" - + log debug "📋 HPA found: $HPA_NAME" + PAUSED_ANNOTATION=$(kubectl get hpa "$HPA_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.metadata.annotations.nullplatform\.com/autoscaling-paused}' 2>/dev/null || echo "") if [[ -n "$PAUSED_ANNOTATION" && "$PAUSED_ANNOTATION" != "null" ]]; then HPA_PAUSED=true - echo "HPA is currently PAUSED" + log debug "📋 HPA is currently PAUSED" else - echo "HPA is currently ACTIVE" + log debug "📋 HPA is currently ACTIVE" fi else - echo "No HPA found for this deployment" + log debug "📋 No HPA found for this deployment" fi -echo "" +log debug "" if [[ "$HPA_EXISTS" == "true" && "$HPA_PAUSED" == "false" ]]; then - echo "=== UPDATING HPA FOR ACTIVE AUTOSCALING ===" - + log debug "📝 Updating HPA for active autoscaling..." + HPA_MIN=$(kubectl get hpa "$HPA_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.spec.minReplicas}') HPA_MAX=$(kubectl get hpa "$HPA_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.spec.maxReplicas}') - - echo "Current HPA range: $HPA_MIN - $HPA_MAX replicas" - echo "Setting desired instances to $DESIRED_INSTANCES by updating HPA range" - - # Strategy: Set both min and max to desired count to force that exact replica count - # This effectively "pins" the deployment to the desired instance count + + log debug "📋 Current HPA range: $HPA_MIN - $HPA_MAX replicas" + log debug "📋 Setting desired instances to $DESIRED_INSTANCES by updating HPA range" + PATCH=$(jq -n \ --argjson desired "$DESIRED_INSTANCES" \ '{ @@ -75,42 +77,38 @@ if [[ "$HPA_EXISTS" == "true" && "$HPA_PAUSED" == "false" ]]; then maxReplicas: $desired } }') - + kubectl patch hpa "$HPA_NAME" -n "$K8S_NAMESPACE" --type='merge' -p "$PATCH" - echo "HPA updated: min=$DESIRED_INSTANCES, max=$DESIRED_INSTANCES" - -elif [[ "$HPA_EXISTS" == "true" && "$HPA_PAUSED" == "true" ]]; then - # HPA is paused - just update deployment replicas - echo "=== UPDATING DEPLOYMENT (HPA PAUSED) ===" - - kubectl scale deployment "$DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" --replicas="$DESIRED_INSTANCES" - echo "Deployment scaled to $DESIRED_INSTANCES replicas" - + log info " ✅ HPA updated: min=$DESIRED_INSTANCES, max=$DESIRED_INSTANCES" + else - # No HPA or fixed scaling - just update deployment replicas - echo "=== UPDATING DEPLOYMENT (NO HPA) ===" - + if [[ "$HPA_PAUSED" == "true" ]]; then + log debug "📝 Updating deployment (HPA paused)..." + else + log debug "📝 Updating deployment (no HPA)..." + fi + kubectl scale deployment "$DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" --replicas="$DESIRED_INSTANCES" - echo "Deployment scaled to $DESIRED_INSTANCES replicas" + log info " ✅ Deployment scaled to $DESIRED_INSTANCES replicas" fi -echo "" -echo "Waiting for deployment rollout to complete..." +log debug "" +log debug "🔍 Waiting for deployment rollout to complete..." kubectl rollout status deployment "$DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" --timeout=300s -echo "" -echo "=== FINAL STATUS ===" +log debug "" +log debug "📋 Final status:" FINAL_REPLICAS=$(kubectl get deployment "$DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.spec.replicas}') READY_REPLICAS=$(kubectl get deployment "$DEPLOYMENT_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.status.readyReplicas}') -echo "Deployment replicas: $FINAL_REPLICAS" -echo "Ready replicas: ${READY_REPLICAS:-0}" +log debug " Deployment replicas: $FINAL_REPLICAS" +log debug " Ready replicas: ${READY_REPLICAS:-0}" if [[ "$HPA_EXISTS" == "true" ]]; then HPA_MIN=$(kubectl get hpa "$HPA_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.spec.minReplicas}') HPA_MAX=$(kubectl get hpa "$HPA_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.spec.maxReplicas}') - echo "HPA range: $HPA_MIN - $HPA_MAX replicas" + log debug " HPA range: $HPA_MIN - $HPA_MAX replicas" fi -echo "" -echo "Instance count successfully set to $DESIRED_INSTANCES" \ No newline at end of file +log info "" +log info "✨ Instance count successfully set to $DESIRED_INSTANCES" diff --git a/k8s/scope/tests/build_context.bats b/k8s/scope/tests/build_context.bats new file mode 100644 index 00000000..bd86e56b --- /dev/null +++ b/k8s/scope/tests/build_context.bats @@ -0,0 +1,387 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for build_context +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + source "$PROJECT_ROOT/k8s/utils/get_config_value" + + export SCRIPT="$PROJECT_ROOT/k8s/scope/build_context" + + # Mock kubectl - namespace exists by default + kubectl() { + case "$1" in + get) + if [ "$2" = "namespace" ]; then + return 0 + fi + ;; + *) + return 0 + ;; + esac + } + export -f kubectl + + # Create temp output directory + export NP_OUTPUT_DIR="$(mktemp -d)" + export SERVICE_PATH="$PROJECT_ROOT/k8s" + + # Default values from values.yaml + export K8S_NAMESPACE="nullplatform" + export DOMAIN="nullapps.io" + export USE_ACCOUNT_SLUG="false" + export PUBLIC_GATEWAY_NAME="gateway-public" + export PRIVATE_GATEWAY_NAME="gateway-internal" + export K8S_MODIFIERS="{}" + + # Base CONTEXT with required fields + export CONTEXT='{ + "scope": { + "id": "test-scope-123", + "nrn": "nrn:organization=100:account=200:namespace=300:application=400", + "domain": "test.nullapps.io", + "capabilities": { + "visibility": "public" + } + }, + "namespace": { + "slug": "test-namespace" + }, + "application": { + "slug": "test-app" + }, + "providers": { + "cloud-providers": { + "account": { + "region": "us-east-1" + }, + "networking": { + "domain_name": "cloud-domain.io", + "application_domain": "false" + } + }, + "container-orchestration": { + "cluster": { + "namespace": "default-namespace" + }, + "gateway": { + "public_name": "co-gateway-public", + "private_name": "co-gateway-private" + }, + "balancer": { + "public_name": "co-balancer-public", + "private_name": "co-balancer-private" + } + } + } + }' +} + +teardown() { + rm -rf "$NP_OUTPUT_DIR" + unset NAMESPACE_OVERRIDE + unset K8S_MODIFIERS + unset -f kubectl +} + +# ============================================================================= +# Success flow - logging +# ============================================================================= +@test "build_context: success flow - displays all messages" { + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Validating namespace 'default-namespace' exists..." + assert_contains "$output" "✅ Namespace 'default-namespace' exists" + assert_contains "$output" "📋 Scope: test-scope-123 | Visibility: public | Domain: test.nullapps.io" + assert_contains "$output" "📋 Namespace: default-namespace | Region: us-east-1 | Gateway: co-gateway-public | ALB: co-balancer-public" + assert_contains "$output" "✅ Scope context built successfully" +} + +# ============================================================================= +# Full CONTEXT validation (public visibility) +# ============================================================================= +@test "build_context: produces complete CONTEXT with all expected fields (public)" { + source "$SCRIPT" + + local expected_json='{ + "scope": { + "id": "test-scope-123", + "nrn": "nrn:organization=100:account=200:namespace=300:application=400", + "domain": "test.nullapps.io", + "capabilities": { + "visibility": "public" + } + }, + "namespace": { + "slug": "test-namespace" + }, + "application": { + "slug": "test-app" + }, + "providers": { + "cloud-providers": { + "account": { + "region": "us-east-1" + }, + "networking": { + "domain_name": "cloud-domain.io", + "application_domain": "false" + } + }, + "container-orchestration": { + "cluster": { + "namespace": "default-namespace" + }, + "gateway": { + "public_name": "co-gateway-public", + "private_name": "co-gateway-private" + }, + "balancer": { + "public_name": "co-balancer-public", + "private_name": "co-balancer-private" + } + } + }, + "ingress_visibility": "internet-facing", + "k8s_namespace": "default-namespace", + "region": "us-east-1", + "gateway_name": "co-gateway-public", + "alb_name": "co-balancer-public", + "component": "test-namespace-test-app", + "k8s_modifiers": {} + }' + + assert_json_equal "$CONTEXT" "$expected_json" "Complete CONTEXT (public)" +} + +# ============================================================================= +# Full CONTEXT validation (private visibility) +# ============================================================================= +@test "build_context: produces complete CONTEXT with all expected fields (private)" { + export CONTEXT=$(echo "$CONTEXT" | jq '.scope.capabilities.visibility = "private"') + + source "$SCRIPT" + + local expected_json='{ + "scope": { + "id": "test-scope-123", + "nrn": "nrn:organization=100:account=200:namespace=300:application=400", + "domain": "test.nullapps.io", + "capabilities": { + "visibility": "private" + } + }, + "namespace": { + "slug": "test-namespace" + }, + "application": { + "slug": "test-app" + }, + "providers": { + "cloud-providers": { + "account": { + "region": "us-east-1" + }, + "networking": { + "domain_name": "cloud-domain.io", + "application_domain": "false" + } + }, + "container-orchestration": { + "cluster": { + "namespace": "default-namespace" + }, + "gateway": { + "public_name": "co-gateway-public", + "private_name": "co-gateway-private" + }, + "balancer": { + "public_name": "co-balancer-public", + "private_name": "co-balancer-private" + } + } + }, + "ingress_visibility": "internal", + "k8s_namespace": "default-namespace", + "region": "us-east-1", + "gateway_name": "co-gateway-private", + "alb_name": "co-balancer-private", + "component": "test-namespace-test-app", + "k8s_modifiers": {} + }' + + assert_json_equal "$CONTEXT" "$expected_json" "Complete CONTEXT (private)" +} + +@test "build_context: private visibility displays correct summary" { + export CONTEXT=$(echo "$CONTEXT" | jq '.scope.capabilities.visibility = "private"') + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 Scope: test-scope-123 | Visibility: private | Domain: test.nullapps.io" + assert_contains "$output" "📋 Namespace: default-namespace | Region: us-east-1 | Gateway: co-gateway-private | ALB: co-balancer-private" +} + +# ============================================================================= +# Exported variables +# ============================================================================= +@test "build_context: exports NRN IDs from scope nrn" { + source "$SCRIPT" + + assert_equal "$ORGANIZATION_ID" "100" + assert_equal "$ACCOUNT_ID" "200" + assert_equal "$NAMESPACE_ID" "300" + assert_equal "$APPLICATION_ID" "400" +} + +@test "build_context: exports all expected environment variables" { + source "$SCRIPT" + + assert_equal "$DNS_TYPE" "route53" + assert_equal "$ALB_RECONCILIATION_ENABLED" "false" + assert_equal "$DEPLOYMENT_MAX_WAIT_IN_SECONDS" "600" + assert_equal "$SCOPE_VISIBILITY" "public" + assert_equal "$SCOPE_DOMAIN" "test.nullapps.io" + assert_equal "$INGRESS_VISIBILITY" "internet-facing" + assert_equal "$GATEWAY_NAME" "co-gateway-public" + assert_equal "$REGION" "us-east-1" +} + +@test "build_context: creates OUTPUT_DIR" { + source "$SCRIPT" + + assert_equal "$OUTPUT_DIR" "$NP_OUTPUT_DIR/output/test-scope-123" + assert_directory_exists "$OUTPUT_DIR" +} + +@test "build_context: uses SERVICE_PATH when NP_OUTPUT_DIR is not set" { + unset NP_OUTPUT_DIR + + source "$SCRIPT" + + assert_equal "$OUTPUT_DIR" "$SERVICE_PATH/output/test-scope-123" + assert_directory_exists "$OUTPUT_DIR" +} + +# ============================================================================= +# Namespace validation +# ============================================================================= +@test "build_context: creates namespace when it does not exist and creation is enabled" { + kubectl() { + case "$1" in + get) + if [ "$2" = "namespace" ]; then + return 1 + fi + ;; + *) + echo "kubectl $*" + return 0 + ;; + esac + } + export -f kubectl + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Validating namespace 'default-namespace' exists..." + assert_contains "$output" "❌ Namespace 'default-namespace' does not exist in the cluster" + assert_contains "$output" "📝 Creating namespace 'default-namespace'..." + assert_contains "$output" "✅ Namespace 'default-namespace' created successfully" +} + +@test "build_context: fails when namespace does not exist and creation is disabled" { + kubectl() { + if [ "$1" = "get" ] && [ "$2" = "namespace" ]; then + return 1 + fi + return 0 + } + export -f kubectl + export CREATE_K8S_NAMESPACE_IF_NOT_EXIST="false" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Namespace 'default-namespace' does not exist in the cluster" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The namespace does not exist and automatic creation is disabled" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Create the namespace manually: kubectl create namespace default-namespace" + assert_contains "$output" "Or set CREATE_K8S_NAMESPACE_IF_NOT_EXIST=true in values.yaml" +} + +@test "build_context: CREATE_K8S_NAMESPACE_IF_NOT_EXIST resolves from provider" { + kubectl() { + if [ "$1" = "get" ] && [ "$2" = "namespace" ]; then + return 1 + fi + return 0 + } + export -f kubectl + unset CREATE_K8S_NAMESPACE_IF_NOT_EXIST + + export CONTEXT=$(echo "$CONTEXT" | jq '.providers["scope-configurations"] = { + "cluster": { + "create_namespace_if_not_exist": "false" + } + }') + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Namespace 'default-namespace' does not exist in the cluster" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The namespace does not exist and automatic creation is disabled" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Create the namespace manually: kubectl create namespace default-namespace" + assert_contains "$output" "Or set CREATE_K8S_NAMESPACE_IF_NOT_EXIST=true in values.yaml" +} + +# ============================================================================= +# COMPONENT truncation +# ============================================================================= +@test "build_context: COMPONENT truncates to 63 chars ending with alphanumeric" { + export CONTEXT=$(echo "$CONTEXT" | jq ' + .namespace.slug = "very-long-namespace-slug-that-goes-on" | + .application.slug = "and-on-with-app-slug-extending-past-limit" + ') + + source "$SCRIPT" + + local component=$(echo "$CONTEXT" | jq -r .component) + [ ${#component} -le 63 ] + [[ "$component" =~ [a-zA-Z0-9]$ ]] +} + +# ============================================================================= +# Scope-configurations override (end-to-end) +# ============================================================================= +@test "build_context: scope-configurations override produces correct CONTEXT" { + export CONTEXT=$(echo "$CONTEXT" | jq '.providers["scope-configurations"] = { + "cluster": { + "namespace": "scope-ns" + }, + "networking": { + "domain_name": "scope-domain.io", + "application_domain": "true", + "gateway_public_name": "scope-gw-public", + "balancer_public_name": "scope-alb-public" + } + }') + + source "$SCRIPT" + + assert_equal "$(echo "$CONTEXT" | jq -r .k8s_namespace)" "scope-ns" + assert_equal "$(echo "$CONTEXT" | jq -r .gateway_name)" "scope-gw-public" + assert_equal "$(echo "$CONTEXT" | jq -r .alb_name)" "scope-alb-public" + assert_equal "$GATEWAY_NAME" "scope-gw-public" +} diff --git a/k8s/scope/tests/iam/build_service_account.bats b/k8s/scope/tests/iam/build_service_account.bats new file mode 100644 index 00000000..2e92a9be --- /dev/null +++ b/k8s/scope/tests/iam/build_service_account.bats @@ -0,0 +1,203 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for iam/build_service_account - Service account setup from IAM role +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + + # Source assertions + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + # Script under test + export SCRIPT="$BATS_TEST_DIRNAME/../../iam/build_service_account" + + # Default environment variables + export SCOPE_ID="test-scope-123" + export OUTPUT_DIR="$(mktemp -d)" + export SERVICE_ACCOUNT_TEMPLATE="/templates/service_account.yaml" + export CONTEXT='{"namespace":"test-ns","scope":{"id":"123"}}' + + # Mock aws - default success + aws() { + case "$*" in + *"iam get-role"*) + echo "arn:aws:iam::123456789012:role/test-prefix-test-scope-123" + ;; + *) + return 0 + ;; + esac + } + export -f aws + + # Mock gomplate - default success + gomplate() { + return 0 + } + export -f gomplate + + # Mock rm + rm() { + return 0 + } + export -f rm +} + +teardown() { + rm -rf "$OUTPUT_DIR" 2>/dev/null || true + unset -f aws gomplate rm 2>/dev/null || true +} + +# ============================================================================= +# Test: IAM disabled (ENABLED=false) skips service account setup +# ============================================================================= +@test "build_service_account: IAM disabled (ENABLED=false) skips with message" { + export IAM='{"ENABLED":"false"}' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 IAM is not enabled, skipping service account setup" +} + +# ============================================================================= +# Test: IAM disabled (ENABLED=null) skips service account setup +# ============================================================================= +@test "build_service_account: IAM disabled (ENABLED=null) skips with message" { + export IAM='{"ENABLED":null}' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 IAM is not enabled, skipping service account setup" +} + +# ============================================================================= +# Test: IAM not set defaults to empty JSON and skips +# ============================================================================= +@test "build_service_account: IAM not set defaults to empty JSON and skips" { + unset IAM + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 IAM is not enabled, skipping service account setup" +} + +# ============================================================================= +# Test: Success flow - finds role, builds template +# ============================================================================= +@test "build_service_account: success flow verifies all log messages in order" { + export IAM='{"ENABLED":"true","PREFIX":"test-prefix"}' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "🔍 Looking for IAM role: test-prefix-test-scope-123" + assert_contains "$output" "📝 Building service account template: /templates/service_account.yaml" + assert_contains "$output" "✅ Service account template built successfully" +} + +# ============================================================================= +# Test: Error - aws iam get-role fails (non-delete action) +# ============================================================================= +@test "build_service_account: aws iam get-role failure shows error with hints" { + export IAM='{"ENABLED":"true","PREFIX":"test-prefix"}' + + aws() { + case "$*" in + *"iam get-role"*) + echo "An error occurred (AccessDenied) when calling the GetRole operation" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "🔍 Looking for IAM role: test-prefix-test-scope-123" + assert_contains "$output" "❌ Failed to find IAM role 'test-prefix-test-scope-123'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The IAM role may not exist or the agent lacks IAM permissions" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Verify the role exists: aws iam get-role --role-name test-prefix-test-scope-123" + assert_contains "$output" "• Check IAM permissions for the agent role" +} + +# ============================================================================= +# Test: Delete action with NoSuchEntity skips service account deletion +# ============================================================================= +@test "build_service_account: delete action with NoSuchEntity skips deletion" { + export IAM='{"ENABLED":"true","PREFIX":"test-prefix"}' + export ACTION="delete" + + aws() { + case "$*" in + *"iam get-role"*) + echo "An error occurred (NoSuchEntity) when calling the GetRole operation: Role with name test-prefix-test-scope-123 cannot be found." >&2 + return 1 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 IAM role 'test-prefix-test-scope-123' does not exist, skipping service account deletion" +} + +# ============================================================================= +# Test: Non-delete action with NoSuchEntity fails +# ============================================================================= +@test "build_service_account: non-delete action with NoSuchEntity fails with error" { + export IAM='{"ENABLED":"true","PREFIX":"test-prefix"}' + unset ACTION + + aws() { + case "$*" in + *"iam get-role"*) + echo "An error occurred (NoSuchEntity) when calling the GetRole operation: Role with name test-prefix-test-scope-123 cannot be found." >&2 + return 1 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ Failed to find IAM role 'test-prefix-test-scope-123'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The IAM role may not exist or the agent lacks IAM permissions" + assert_contains "$output" "🔧 How to fix:" +} + +# ============================================================================= +# Test: Error - gomplate template generation fails +# ============================================================================= +@test "build_service_account: gomplate failure shows template error with hints" { + export IAM='{"ENABLED":"true","PREFIX":"test-prefix"}' + + gomplate() { + echo "Error: template rendering failed" >&2 + return 1 + } + export -f gomplate + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "📝 Building service account template: /templates/service_account.yaml" + assert_contains "$output" "❌ Failed to build service account template" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The template file may be missing or contain invalid gomplate syntax" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Verify template exists: ls -la /templates/service_account.yaml" + assert_contains "$output" "• Check the template is a valid Kubernetes ServiceAccount YAML with correct gomplate expressions" +} diff --git a/k8s/scope/tests/iam/create_role.bats b/k8s/scope/tests/iam/create_role.bats new file mode 100644 index 00000000..ef624dbe --- /dev/null +++ b/k8s/scope/tests/iam/create_role.bats @@ -0,0 +1,367 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for iam/create_role - IAM role creation with policies +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + + # Source assertions + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + # Script under test + export SCRIPT="$BATS_TEST_DIRNAME/../../iam/create_role" + + # Default environment variables + export SCOPE_ID="test-scope-123" + export CLUSTER_NAME="test-cluster" + export OUTPUT_DIR="$(mktemp -d)" + export CONTEXT='{ + "k8s_namespace": "test-ns", + "application": {"id": "app-1", "slug": "test-app"}, + "scope": {"id": "scope-1", "slug": "test-scope", "dimensions": null}, + "account": {"id": "acc-1", "slug": "test-account", "organization_id": "org-1"}, + "namespace": {"id": "ns-1", "slug": "test-namespace"} + }' + + # Mock aws - default success + aws() { + case "$*" in + *"eks describe-cluster"*) + echo "https://oidc.eks.us-east-1.amazonaws.com/id/ABCDEF1234567890" + ;; + *"sts get-caller-identity"*) + echo "123456789012" + ;; + *"iam create-role"*) + echo '{"Role": {"Arn": "arn:aws:iam::123456789012:role/test-prefix-test-scope-123"}}' + ;; + *"iam attach-role-policy"*) + return 0 + ;; + *"iam put-role-policy"*) + return 0 + ;; + *) + return 0 + ;; + esac + } + export -f aws + + # Mock rm + rm() { + command rm "$@" 2>/dev/null || true + } + export -f rm +} + +teardown() { + rm -rf "$OUTPUT_DIR" 2>/dev/null || true + unset -f aws rm 2>/dev/null || true +} + +# ============================================================================= +# Test: IAM disabled (ENABLED=false) skips role setup +# ============================================================================= +@test "create_role: IAM disabled (ENABLED=false) skips with message" { + export IAM='{"ENABLED":"false"}' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 IAM is not enabled, skipping role creation" +} + +# ============================================================================= +# Test: IAM disabled (ENABLED=null) skips role setup +# ============================================================================= +@test "create_role: IAM disabled (ENABLED=null) skips with message" { + export IAM='{"ENABLED":null}' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 IAM is not enabled, skipping role creation" +} + +# ============================================================================= +# Test: IAM not set defaults to empty JSON and skips +# ============================================================================= +@test "create_role: IAM not set defaults to empty JSON and skips" { + unset IAM + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 IAM is not enabled, skipping role creation" +} + +# ============================================================================= +# Test: Success flow with boundary and managed policy +# ============================================================================= +@test "create_role: success flow with boundary and managed policy" { + export IAM='{ + "ENABLED": "true", + "PREFIX": "test-prefix", + "ROLE": { + "BOUNDARY_ARN": "arn:aws:iam::123456789012:policy/boundary", + "POLICIES": [ + {"TYPE": "arn", "VALUE": "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"} + ] + } + }' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "🔍 Getting EKS OIDC provider for cluster: test-cluster" + assert_contains "$output" "🔍 Getting AWS account ID..." + assert_contains "$output" "📝 Creating IAM role: test-prefix-test-scope-123" + assert_contains "$output" "📋 Using permissions boundary: arn:aws:iam::123456789012:policy/boundary" + assert_contains "$output" "✅ IAM role created successfully" + assert_contains "$output" "📋 Processing policy 1: Type=arn" + assert_contains "$output" "📝 Attaching managed policy: arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" + assert_contains "$output" "✅ Successfully attached managed policy: arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" +} + +# ============================================================================= +# Test: Success flow without boundary +# ============================================================================= +@test "create_role: success flow without boundary creates role without permissions-boundary" { + export IAM='{ + "ENABLED": "true", + "PREFIX": "test-prefix", + "ROLE": { + "BOUNDARY_ARN": null, + "POLICIES": [] + } + }' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "🔍 Getting EKS OIDC provider for cluster: test-cluster" + assert_contains "$output" "🔍 Getting AWS account ID..." + assert_contains "$output" "📝 Creating IAM role: test-prefix-test-scope-123" + assert_contains "$output" "✅ IAM role created successfully" +} + +# ============================================================================= +# Test: Error - aws eks describe-cluster fails +# ============================================================================= +@test "create_role: aws eks describe-cluster failure shows error with hints" { + export IAM='{ + "ENABLED": "true", + "PREFIX": "test-prefix", + "ROLE": {"BOUNDARY_ARN": null, "POLICIES": []} + }' + + aws() { + case "$*" in + *"eks describe-cluster"*) + echo "An error occurred (ResourceNotFoundException) when calling the DescribeCluster operation" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "🔍 Getting EKS OIDC provider for cluster: test-cluster" + assert_contains "$output" "❌ Failed to get OIDC provider for EKS cluster 'test-cluster'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The OIDC provider may not be configured for this EKS cluster" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Verify OIDC is enabled: aws eks describe-cluster --name test-cluster --query cluster.identity.oidc" + assert_contains "$output" "• Enable OIDC provider: eksctl utils associate-iam-oidc-provider --cluster test-cluster --approve" +} + +# ============================================================================= +# Test: Error - aws sts get-caller-identity fails +# ============================================================================= +@test "create_role: aws sts get-caller-identity failure shows error with hints" { + export IAM='{ + "ENABLED": "true", + "PREFIX": "test-prefix", + "ROLE": {"BOUNDARY_ARN": null, "POLICIES": []} + }' + + aws() { + case "$*" in + *"eks describe-cluster"*) + echo "https://oidc.eks.us-east-1.amazonaws.com/id/ABCDEF1234567890" + ;; + *"sts get-caller-identity"*) + echo "Unable to locate credentials" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "🔍 Getting EKS OIDC provider for cluster: test-cluster" + assert_contains "$output" "🔍 Getting AWS account ID..." + assert_contains "$output" "❌ Failed to get AWS account ID" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "AWS credentials may not be configured or have expired" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Check AWS credentials: aws sts get-caller-identity" + assert_contains "$output" "• Verify IAM permissions for the agent role" +} + +# ============================================================================= +# Test: Managed policy attachment (type=arn) with success message +# ============================================================================= +@test "create_role: managed policy attachment logs processing and success" { + export IAM='{ + "ENABLED": "true", + "PREFIX": "test-prefix", + "ROLE": { + "BOUNDARY_ARN": null, + "POLICIES": [ + {"TYPE": "arn", "VALUE": "arn:aws:iam::aws:policy/ReadOnlyAccess"} + ] + } + }' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 Processing policy 1: Type=arn" + assert_contains "$output" "📝 Attaching managed policy: arn:aws:iam::aws:policy/ReadOnlyAccess" + assert_contains "$output" "✅ Successfully attached managed policy: arn:aws:iam::aws:policy/ReadOnlyAccess" +} + +# ============================================================================= +# Test: Inline policy attachment (type=inline) with success message +# ============================================================================= +@test "create_role: inline policy attachment logs processing and success" { + export IAM='{ + "ENABLED": "true", + "PREFIX": "test-prefix", + "ROLE": { + "BOUNDARY_ARN": null, + "POLICIES": [ + {"TYPE": "inline", "VALUE": "{\"Version\":\"2012-10-17\",\"Statement\":[{\"Effect\":\"Allow\",\"Action\":\"s3:GetObject\",\"Resource\":\"*\"}]}"} + ] + } + }' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 Processing policy 1: Type=inline" + assert_contains "$output" "📝 Attaching inline policy: inline-policy-1" + assert_contains "$output" "✅ Successfully attached inline policy: inline-policy-1" +} + +# ============================================================================= +# Test: Unknown policy type shows warning +# ============================================================================= +@test "create_role: unknown policy type shows warning message" { + export IAM='{ + "ENABLED": "true", + "PREFIX": "test-prefix", + "ROLE": { + "BOUNDARY_ARN": null, + "POLICIES": [ + {"TYPE": "unknown", "VALUE": "some-value"} + ] + } + }' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 Processing policy 1: Type=unknown" + assert_contains "$output" "⚠️ Unknown policy type: unknown, skipping" +} + +# ============================================================================= +# Test: Multiple policies of different types +# ============================================================================= +@test "create_role: multiple policies are processed in order" { + export IAM='{ + "ENABLED": "true", + "PREFIX": "test-prefix", + "ROLE": { + "BOUNDARY_ARN": null, + "POLICIES": [ + {"TYPE": "arn", "VALUE": "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"}, + {"TYPE": "inline", "VALUE": "{\"Version\":\"2012-10-17\",\"Statement\":[]}"}, + {"TYPE": "unknown", "VALUE": "bad-type"} + ] + } + }' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 Processing policy 1: Type=arn" + assert_contains "$output" "📝 Attaching managed policy: arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" + assert_contains "$output" "✅ Successfully attached managed policy: arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" + assert_contains "$output" "📋 Processing policy 2: Type=inline" + assert_contains "$output" "📝 Attaching inline policy: inline-policy-2" + assert_contains "$output" "✅ Successfully attached inline policy: inline-policy-2" + assert_contains "$output" "📋 Processing policy 3: Type=unknown" + assert_contains "$output" "⚠️ Unknown policy type: unknown, skipping" +} + +# ============================================================================= +# Test: No policies to attach +# ============================================================================= +@test "create_role: no policies skips policy attachment loop" { + export IAM='{ + "ENABLED": "true", + "PREFIX": "test-prefix", + "ROLE": { + "BOUNDARY_ARN": null, + "POLICIES": [] + } + }' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "🔍 Getting EKS OIDC provider for cluster: test-cluster" + assert_contains "$output" "🔍 Getting AWS account ID..." + assert_contains "$output" "📝 Creating IAM role: test-prefix-test-scope-123" + assert_contains "$output" "✅ IAM role created successfully" +} + +# ============================================================================= +# Test: Context with dimensions adds tags +# ============================================================================= +@test "create_role: context with dimensions processes correctly" { + export CONTEXT='{ + "k8s_namespace": "test-ns", + "application": {"id": "app-1", "slug": "test-app"}, + "scope": {"id": "scope-1", "slug": "test-scope", "dimensions": {"env": "production", "region": "us-east-1"}}, + "account": {"id": "acc-1", "slug": "test-account", "organization_id": "org-1"}, + "namespace": {"id": "ns-1", "slug": "test-namespace"} + }' + export IAM='{ + "ENABLED": "true", + "PREFIX": "test-prefix", + "ROLE": { + "BOUNDARY_ARN": null, + "POLICIES": [] + } + }' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "🔍 Getting EKS OIDC provider for cluster: test-cluster" + assert_contains "$output" "🔍 Getting AWS account ID..." + assert_contains "$output" "📝 Creating IAM role: test-prefix-test-scope-123" + assert_contains "$output" "✅ IAM role created successfully" +} diff --git a/k8s/scope/tests/iam/delete_role.bats b/k8s/scope/tests/iam/delete_role.bats new file mode 100644 index 00000000..429df8af --- /dev/null +++ b/k8s/scope/tests/iam/delete_role.bats @@ -0,0 +1,315 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for iam/delete_role - IAM role deletion with policy cleanup +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + + # Source assertions + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + # Script under test + export SCRIPT="$BATS_TEST_DIRNAME/../../iam/delete_role" + + # Default environment variables + export SCOPE_ID="test-scope-123" + export SERVICE_ACCOUNT_NAME="test-prefix-test-scope-123" + + # Mock aws - default success + aws() { + case "$*" in + *"iam get-role"*) + echo "arn:aws:iam::123456789012:role/test-prefix-test-scope-123" + ;; + *"iam list-attached-role-policies"*) + echo "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" + ;; + *"iam detach-role-policy"*) + return 0 + ;; + *"iam list-role-policies"*) + echo "inline-policy-1" + ;; + *"iam delete-role-policy"*) + return 0 + ;; + *"iam delete-role"*) + return 0 + ;; + *) + return 0 + ;; + esac + } + export -f aws +} + +teardown() { + unset -f aws 2>/dev/null || true +} + +# ============================================================================= +# Test: IAM disabled (ENABLED=false) skips role deletion +# ============================================================================= +@test "delete_role: IAM disabled (ENABLED=false) skips with message" { + export IAM='{"ENABLED":"false"}' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 IAM is not enabled, skipping role deletion" +} + +# ============================================================================= +# Test: IAM disabled (ENABLED=null) skips role deletion +# ============================================================================= +@test "delete_role: IAM disabled (ENABLED=null) skips with message" { + export IAM='{"ENABLED":null}' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 IAM is not enabled, skipping role deletion" +} + +# ============================================================================= +# Test: IAM not set defaults to empty JSON and skips +# ============================================================================= +@test "delete_role: IAM not set defaults to empty JSON and skips" { + unset IAM + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 IAM is not enabled, skipping role deletion" +} + +# ============================================================================= +# Test: Role not found (NoSuchEntity) skips deletion +# ============================================================================= +@test "delete_role: role not found with NoSuchEntity skips deletion" { + export IAM='{"ENABLED":"true","PREFIX":"test-prefix"}' + + aws() { + case "$*" in + *"iam get-role"*) + echo "An error occurred (NoSuchEntity) when calling the GetRole operation: The role with name test-prefix-test-scope-123 cannot be found." >&2 + return 1 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "🔍 Looking for IAM role: test-prefix-test-scope-123" + assert_contains "$output" "📋 IAM role 'test-prefix-test-scope-123' does not exist, skipping role deletion" +} + +# ============================================================================= +# Test: Error - get-role fails (not NoSuchEntity) +# ============================================================================= +@test "delete_role: get-role failure (not NoSuchEntity) shows error with hints" { + export IAM='{"ENABLED":"true","PREFIX":"test-prefix"}' + + aws() { + case "$*" in + *"iam get-role"*) + echo "An error occurred (AccessDenied) when calling the GetRole operation: Access denied" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "🔍 Looking for IAM role: test-prefix-test-scope-123" + assert_contains "$output" "❌ Failed to find IAM role 'test-prefix-test-scope-123'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The IAM role may not exist or the agent lacks IAM permissions" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Verify the role exists: aws iam get-role --role-name test-prefix-test-scope-123" + assert_contains "$output" "• Check IAM permissions for the agent role" +} + +# ============================================================================= +# Test: Success flow - detach policies, delete inline, delete role +# ============================================================================= +@test "delete_role: success flow detaches managed policies, deletes inline, deletes role" { + export IAM='{"ENABLED":"true","PREFIX":"test-prefix"}' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "🔍 Looking for IAM role: test-prefix-test-scope-123" + assert_contains "$output" "📝 Detaching managed policies..." + assert_contains "$output" "📋 Detaching policy: arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" + assert_contains "$output" "✅ Detached policy: arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" + assert_contains "$output" "📝 Deleting inline policies..." + assert_contains "$output" "📋 Deleting inline policy: inline-policy-1" + assert_contains "$output" "✅ Deleted inline policy: inline-policy-1" + assert_contains "$output" "📝 Deleting IAM role: test-prefix-test-scope-123" + assert_contains "$output" "✅ IAM role deletion completed" +} + +# ============================================================================= +# Test: Success flow with multiple managed policies +# ============================================================================= +@test "delete_role: detaches multiple managed policies" { + export IAM='{"ENABLED":"true","PREFIX":"test-prefix"}' + + aws() { + case "$*" in + *"iam get-role"*) + echo "arn:aws:iam::123456789012:role/test-prefix-test-scope-123" + ;; + *"iam list-attached-role-policies"*) + echo -e "arn:aws:iam::aws:policy/Policy1\tarn:aws:iam::aws:policy/Policy2" + ;; + *"iam detach-role-policy"*) + return 0 + ;; + *"iam list-role-policies"*) + echo "" + ;; + *"iam delete-role"*) + return 0 + ;; + *) + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📝 Detaching managed policies..." + assert_contains "$output" "📋 Detaching policy: arn:aws:iam::aws:policy/Policy1" + assert_contains "$output" "✅ Detached policy: arn:aws:iam::aws:policy/Policy1" + assert_contains "$output" "📋 Detaching policy: arn:aws:iam::aws:policy/Policy2" + assert_contains "$output" "✅ Detached policy: arn:aws:iam::aws:policy/Policy2" +} + +# ============================================================================= +# Test: Success flow with multiple inline policies +# ============================================================================= +@test "delete_role: deletes multiple inline policies" { + export IAM='{"ENABLED":"true","PREFIX":"test-prefix"}' + + aws() { + case "$*" in + *"iam get-role"*) + echo "arn:aws:iam::123456789012:role/test-prefix-test-scope-123" + ;; + *"iam list-attached-role-policies"*) + echo "" + ;; + *"iam list-role-policies"*) + echo -e "inline-1\tinline-2" + ;; + *"iam delete-role-policy"*) + return 0 + ;; + *"iam delete-role"*) + return 0 + ;; + *) + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 Deleting inline policy: inline-1" + assert_contains "$output" "✅ Deleted inline policy: inline-1" + assert_contains "$output" "📋 Deleting inline policy: inline-2" + assert_contains "$output" "✅ Deleted inline policy: inline-2" +} + +# ============================================================================= +# Test: No policies to detach or delete +# ============================================================================= +@test "delete_role: no policies proceeds directly to role deletion" { + export IAM='{"ENABLED":"true","PREFIX":"test-prefix"}' + + aws() { + case "$*" in + *"iam get-role"*) + echo "arn:aws:iam::123456789012:role/test-prefix-test-scope-123" + ;; + *"iam list-attached-role-policies"*) + echo "" + ;; + *"iam list-role-policies"*) + echo "" + ;; + *"iam delete-role"*) + return 0 + ;; + *) + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📝 Detaching managed policies..." + assert_contains "$output" "📝 Deleting inline policies..." + assert_contains "$output" "📝 Deleting IAM role: test-prefix-test-scope-123" + assert_contains "$output" "✅ IAM role deletion completed" +} + +# ============================================================================= +# Test: Role deletion fails +# ============================================================================= +@test "delete_role: role deletion failure logs warning but does not fail" { + export IAM='{"ENABLED":"true","PREFIX":"test-prefix"}' + + aws() { + case "$*" in + *"iam get-role"*) + echo "arn:aws:iam::123456789012:role/test-prefix-test-scope-123" + ;; + *"iam list-attached-role-policies"*) + echo "" + ;; + *"iam list-role-policies"*) + echo "" + ;; + *"iam delete-role"*) + echo "An error occurred (DeleteConflict)" >&2 + return 1 + ;; + *) + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📝 Deleting IAM role: test-prefix-test-scope-123" + assert_contains "$output" "⚠️ Failed to delete IAM role 'test-prefix-test-scope-123'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The role may still have attached policies, instance profiles, or was already deleted" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Check attached policies: aws iam list-attached-role-policies --role-name test-prefix-test-scope-123" + assert_contains "$output" "• Check instance profiles: aws iam list-instance-profiles-for-role --role-name test-prefix-test-scope-123" + assert_contains "$output" "✅ IAM role deletion completed" +} diff --git a/k8s/scope/tests/networking/dns/az-records/manage_route.bats b/k8s/scope/tests/networking/dns/az-records/manage_route.bats new file mode 100644 index 00000000..f979ae01 --- /dev/null +++ b/k8s/scope/tests/networking/dns/az-records/manage_route.bats @@ -0,0 +1,294 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for scope/networking/dns/az-records/manage_route +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export SERVICE_PATH="$PROJECT_ROOT/k8s" + export SCRIPT="$SERVICE_PATH/scope/networking/dns/az-records/manage_route" + + # Default environment + export GATEWAY_TYPE="istio" + export SCOPE_DOMAIN="myapp.example.com" + export HOSTED_ZONE_NAME="example.com" + export AZURE_TENANT_ID="tenant-123" + export AZURE_CLIENT_ID="client-123" + export AZURE_CLIENT_SECRET="secret-123" + + # Mock kubectl - default: return gateway IP + kubectl() { + case "$*" in + *"get gateway"*) + echo "10.0.0.1" + ;; + *"get svc router-default"*) + echo "10.0.0.2" + ;; + esac + } + export -f kubectl + + # Mock curl - default: token succeeds, DNS API succeeds + curl() { + if [[ "$*" == *"login.microsoftonline.com"* ]]; then + echo '{"access_token":"mock-token-123","token_type":"Bearer"}' + echo "__HTTP_CODE__:200" + elif [[ "$*" == *"management.azure.com"* ]] && [[ "$*" == *"PUT"* ]]; then + echo '{"id":"/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Network/dnsZones/example.com/A/myapp"}' + echo "__HTTP_CODE__:200" + elif [[ "$*" == *"management.azure.com"* ]] && [[ "$*" == *"DELETE"* ]]; then + echo "" + fi + } + export -f curl +} + +# ============================================================================= +# CREATE: success with istio gateway +# ============================================================================= +@test "manage_route: CREATE with istio gateway - full success flow" { + run bash "$SCRIPT" \ + --action=CREATE \ + --resource-group=my-rg \ + --subscription-id=sub-123 \ + --gateway-name=gw-public \ + --hosted-zone-name=example.com \ + --hosted-zone-rg=dns-rg + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Managing Azure DNS record..." + assert_contains "$output" "📋 Action: CREATE | Gateway: gw-public | Zone: example.com" + assert_contains "$output" "📡 Getting IP from gateway 'gw-public'..." + assert_contains "$output" "✅ Gateway IP: 10.0.0.1" + assert_contains "$output" "📋 Subdomain: myapp | Zone: example.com | IP: 10.0.0.1" + assert_contains "$output" "📝 Creating Azure DNS record..." + assert_contains "$output" "✅ DNS record created: myapp.example.com -> 10.0.0.1" +} + +# ============================================================================= +# CREATE: success with ARO cluster +# ============================================================================= +@test "manage_route: CREATE with aro_cluster gateway - uses router service" { + export GATEWAY_TYPE="aro_cluster" + + run bash "$SCRIPT" \ + --action=CREATE \ + --resource-group=my-rg \ + --subscription-id=sub-123 \ + --gateway-name=gw-public \ + --hosted-zone-name=example.com \ + --hosted-zone-rg=dns-rg + + [ "$status" -eq 0 ] + assert_contains "$output" "📡 Getting IP from ARO router service..." + assert_contains "$output" "✅ Gateway IP: 10.0.0.2" +} + +# ============================================================================= +# CREATE: ARO fallback to istio +# ============================================================================= +@test "manage_route: CREATE with aro_cluster - falls back to istio when router has no IP" { + export GATEWAY_TYPE="aro_cluster" + + kubectl() { + case "$*" in + *"get svc router-default"*) + echo "" + ;; + *"get gateway"*) + echo "10.0.0.1" + ;; + esac + } + export -f kubectl + + run bash "$SCRIPT" \ + --action=CREATE \ + --resource-group=my-rg \ + --subscription-id=sub-123 \ + --gateway-name=gw-public \ + --hosted-zone-name=example.com \ + --hosted-zone-rg=dns-rg + + [ "$status" -eq 0 ] + assert_contains "$output" "📡 Getting IP from ARO router service..." + assert_contains "$output" "⚠️ ARO router IP not found, falling back to istio gateway..." + assert_contains "$output" "✅ Gateway IP: 10.0.0.1" +} + +# ============================================================================= +# DELETE: success +# ============================================================================= +@test "manage_route: DELETE - full success flow" { + run bash "$SCRIPT" \ + --action=DELETE \ + --resource-group=my-rg \ + --subscription-id=sub-123 \ + --gateway-name=gw-public \ + --hosted-zone-name=example.com \ + --hosted-zone-rg=dns-rg + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Managing Azure DNS record..." + assert_contains "$output" "📋 Action: DELETE | Gateway: gw-public | Zone: example.com" + assert_contains "$output" "📝 Deleting Azure DNS record..." + assert_contains "$output" "✅ DNS record deleted: myapp.example.com" +} + +# ============================================================================= +# Error: gateway IP not found +# ============================================================================= +@test "manage_route: fails with error details when gateway IP not found" { + kubectl() { echo ""; } + export -f kubectl + + run bash "$SCRIPT" \ + --action=CREATE \ + --resource-group=my-rg \ + --subscription-id=sub-123 \ + --gateway-name=gw-public \ + --hosted-zone-name=example.com \ + --hosted-zone-rg=dns-rg + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Could not get IP address for gateway 'gw-public'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The gateway may not be ready or the name is incorrect" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Check gateway status: kubectl get gateway gw-public -n gateways" +} + +# ============================================================================= +# Error: Azure token failure (curl fails) +# ============================================================================= +@test "manage_route: fails with error details when Azure token request fails" { + curl() { + if [[ "$*" == *"login.microsoftonline.com"* ]]; then + return 1 + fi + } + export -f curl + + run bash "$SCRIPT" \ + --action=CREATE \ + --resource-group=my-rg \ + --subscription-id=sub-123 \ + --gateway-name=gw-public \ + --hosted-zone-name=example.com \ + --hosted-zone-rg=dns-rg + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Failed to get Azure access token" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The Azure credentials may be invalid or expired" +} + +# ============================================================================= +# Error: Azure token failure (HTTP error) +# ============================================================================= +@test "manage_route: fails with error details when Azure token returns HTTP error" { + curl() { + if [[ "$*" == *"login.microsoftonline.com"* ]]; then + echo '{"error":"invalid_client"}' + echo "__HTTP_CODE__:401" + fi + } + export -f curl + + run bash "$SCRIPT" \ + --action=CREATE \ + --resource-group=my-rg \ + --subscription-id=sub-123 \ + --gateway-name=gw-public \ + --hosted-zone-name=example.com \ + --hosted-zone-rg=dns-rg + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Failed to get Azure access token (HTTP 401)" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The Azure credentials may be invalid or expired" +} + +# ============================================================================= +# Error: Azure DNS API returns error +# ============================================================================= +@test "manage_route: fails with error details when Azure DNS API returns error" { + curl() { + if [[ "$*" == *"login.microsoftonline.com"* ]]; then + echo '{"access_token":"mock-token-123","token_type":"Bearer"}' + echo "__HTTP_CODE__:200" + elif [[ "$*" == *"management.azure.com"* ]] && [[ "$*" == *"PUT"* ]]; then + echo '{"error":{"code":"ResourceNotFound","message":"DNS zone not found"}}' + echo "__HTTP_CODE__:200" + fi + } + export -f curl + + run bash "$SCRIPT" \ + --action=CREATE \ + --resource-group=my-rg \ + --subscription-id=sub-123 \ + --gateway-name=gw-public \ + --hosted-zone-name=example.com \ + --hosted-zone-rg=dns-rg + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Azure API returned an error creating DNS record" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The DNS zone or resource group may not exist, or permissions are insufficient" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Verify DNS zone 'example.com' exists in resource group 'dns-rg'" +} + +# ============================================================================= +# Error: Azure DNS API returns non-2xx HTTP +# ============================================================================= +@test "manage_route: fails with error details when Azure DNS API returns HTTP error" { + curl() { + if [[ "$*" == *"login.microsoftonline.com"* ]]; then + echo '{"access_token":"mock-token-123","token_type":"Bearer"}' + echo "__HTTP_CODE__:200" + elif [[ "$*" == *"management.azure.com"* ]] && [[ "$*" == *"PUT"* ]]; then + echo '{"message":"Forbidden"}' + echo "__HTTP_CODE__:403" + fi + } + export -f curl + + run bash "$SCRIPT" \ + --action=CREATE \ + --resource-group=my-rg \ + --subscription-id=sub-123 \ + --gateway-name=gw-public \ + --hosted-zone-name=example.com \ + --hosted-zone-rg=dns-rg + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Azure API returned an error creating DNS record (HTTP 403)" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The DNS zone or resource group may not exist, or permissions are insufficient" +} + +# ============================================================================= +# Custom SCOPE_SUBDOMAIN +# ============================================================================= +@test "manage_route: uses custom SCOPE_SUBDOMAIN when set" { + export SCOPE_SUBDOMAIN="custom-sub" + + run bash "$SCRIPT" \ + --action=CREATE \ + --resource-group=my-rg \ + --subscription-id=sub-123 \ + --gateway-name=gw-public \ + --hosted-zone-name=example.com \ + --hosted-zone-rg=dns-rg + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 Subdomain: custom-sub | Zone: example.com | IP: 10.0.0.1" + assert_contains "$output" "✅ DNS record created: custom-sub.example.com -> 10.0.0.1" +} diff --git a/k8s/scope/tests/networking/dns/build_dns_context.bats b/k8s/scope/tests/networking/dns/build_dns_context.bats new file mode 100644 index 00000000..4b341a8a --- /dev/null +++ b/k8s/scope/tests/networking/dns/build_dns_context.bats @@ -0,0 +1,127 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for scope/networking/dns/build_dns_context +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export SERVICE_PATH="$PROJECT_ROOT/k8s" + export SCRIPT="$SERVICE_PATH/scope/networking/dns/build_dns_context" + + # Azure defaults + export HOSTED_ZONE_NAME="example.com" + export HOSTED_ZONE_RG="dns-rg" + export AZURE_SUBSCRIPTION_ID="sub-123" + export RESOURCE_GROUP="my-rg" + export PUBLIC_GATEWAY_NAME="gw-public" + export PRIVATE_GATEWAY_NAME="gw-private" + + # Route53 defaults + export CONTEXT='{"providers":{"cloud-providers":{"networking":{"hosted_public_zone_id":"Z123","hosted_zone_id":"Z456"}}}}' +} + +teardown() { + rm -rf "$SERVICE_PATH/tmp" "$SERVICE_PATH/output" +} + +# ============================================================================= +# Azure DNS type +# ============================================================================= +@test "build_dns_context: azure - displays full configuration" { + export DNS_TYPE="azure" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Building DNS context..." + assert_contains "$output" "📋 DNS type: azure" + assert_contains "$output" "📋 Azure DNS configuration:" + assert_contains "$output" "Gateway type: istio" + assert_contains "$output" "Hosted zone: example.com (RG: dns-rg)" + assert_contains "$output" "Subscription: sub-123" + assert_contains "$output" "Resource group: my-rg" + assert_contains "$output" "Public gateway: gw-public" + assert_contains "$output" "Private gateway: gw-private" + assert_contains "$output" "✅ DNS context ready" +} + +@test "build_dns_context: azure - defaults GATEWAY_TYPE to istio when not set" { + export DNS_TYPE="azure" + unset GATEWAY_TYPE + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "Gateway type: istio" +} + +@test "build_dns_context: azure - uses custom GATEWAY_TYPE when set" { + export DNS_TYPE="azure" + export GATEWAY_TYPE="nginx" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "Gateway type: nginx" +} + +# ============================================================================= +# External DNS type +# ============================================================================= +@test "build_dns_context: external_dns - displays context" { + export DNS_TYPE="external_dns" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Building DNS context..." + assert_contains "$output" "📋 DNS type: external_dns" + assert_contains "$output" "📋 DNS records will be managed automatically by External DNS operator" + assert_contains "$output" "✅ DNS context ready" +} + +# ============================================================================= +# Route53 DNS type +# ============================================================================= +@test "build_dns_context: route53 - sources get_hosted_zones" { + export DNS_TYPE="route53" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Building DNS context..." + assert_contains "$output" "📋 DNS type: route53" + assert_contains "$output" "Getting hosted zones" + assert_contains "$output" "Public Hosted Zone ID: Z123" + assert_contains "$output" "Private Hosted Zone ID: Z456" + assert_contains "$output" "✅ DNS context ready" +} + +# ============================================================================= +# Unsupported DNS type +# ============================================================================= +@test "build_dns_context: unsupported type - fails with error details" { + export DNS_TYPE="cloudflare" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Unsupported DNS type: 'cloudflare'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The DNS_TYPE value in values.yaml is not one of: route53, azure, external_dns" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Supported types: route53, azure, external_dns" +} + +@test "build_dns_context: empty DNS_TYPE - fails with error details" { + export DNS_TYPE="" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Unsupported DNS type: ''" +} diff --git a/k8s/scope/tests/networking/dns/domain/domain-generate.bats b/k8s/scope/tests/networking/dns/domain/domain-generate.bats new file mode 100644 index 00000000..2d7d9945 --- /dev/null +++ b/k8s/scope/tests/networking/dns/domain/domain-generate.bats @@ -0,0 +1,244 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for scope/networking/dns/domain/domain-generate +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + + export SCRIPT="$PROJECT_ROOT/k8s/scope/networking/dns/domain/domain-generate" +} + +# ============================================================================= +# Basic domain generation with account slug +# ============================================================================= +@test "domain-generate: generates domain with account slug" { + run bash "$SCRIPT" \ + --accountSlug="myaccount" \ + --namespaceSlug="prod" \ + --applicationSlug="webapp" \ + --scopeSlug="api" \ + --domain="nullapps.io" \ + --useAccountSlug="true" + + [ "$status" -eq 0 ] + assert_contains "$output" ".myaccount.nullapps.io" + assert_contains "$output" "prod-webapp-api-" +} + +# ============================================================================= +# Domain generation without account slug +# ============================================================================= +@test "domain-generate: generates domain without account slug" { + run bash "$SCRIPT" \ + --accountSlug="myaccount" \ + --namespaceSlug="prod" \ + --applicationSlug="webapp" \ + --scopeSlug="api" \ + --domain="nullapps.io" \ + --useAccountSlug="false" + + [ "$status" -eq 0 ] + assert_contains "$output" ".nullapps.io" + assert_contains "$output" "prod-webapp-api-" + # Should NOT contain account slug in domain + [[ "$output" != *".myaccount."* ]] +} + +# ============================================================================= +# Default domain value +# ============================================================================= +@test "domain-generate: uses default domain nullapps.io when not specified" { + run bash "$SCRIPT" \ + --accountSlug="myaccount" \ + --namespaceSlug="prod" \ + --applicationSlug="webapp" \ + --scopeSlug="api" + + [ "$status" -eq 0 ] + assert_contains "$output" ".myaccount.nullapps.io" +} + +# ============================================================================= +# Custom domain +# ============================================================================= +@test "domain-generate: uses custom domain when specified" { + run bash "$SCRIPT" \ + --accountSlug="myaccount" \ + --namespaceSlug="prod" \ + --applicationSlug="webapp" \ + --scopeSlug="api" \ + --domain="example.com" \ + --useAccountSlug="true" + + [ "$status" -eq 0 ] + assert_contains "$output" ".myaccount.example.com" +} + +# ============================================================================= +# Long domain truncation +# ============================================================================= +@test "domain-generate: truncates long domain to safe length" { + run bash "$SCRIPT" \ + --accountSlug="myaccount" \ + --namespaceSlug="very-long-namespace-slug-that-is-quite-extended" \ + --applicationSlug="very-long-application-slug-name" \ + --scopeSlug="very-long-scope-slug-name" \ + --domain="nullapps.io" \ + --useAccountSlug="false" + + [ "$status" -eq 0 ] + # The first_part (namespace-application-scope) should be truncated + # Total first_part before hash should be max 57 chars + local domain_output="$output" + # Extract the part before the hash (everything before the 5-letter hash) + local first_part + first_part=$(echo "$domain_output" | sed 's/-[a-z]\{5\}\..*$//') + local length=${#first_part} + [ "$length" -le 57 ] +} + +@test "domain-generate: strips trailing dashes after truncation" { + run bash "$SCRIPT" \ + --accountSlug="myaccount" \ + --namespaceSlug="aaaaaaaaaaaaaaaaaaaaaaaa" \ + --applicationSlug="bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \ + --scopeSlug="c" \ + --domain="nullapps.io" \ + --useAccountSlug="false" + + [ "$status" -eq 0 ] + # Should not have trailing dash before the hash + [[ "$output" != *"--"*".nullapps.io" ]] +} + +# ============================================================================= +# Required parameters missing +# ============================================================================= +@test "domain-generate: fails when accountSlug is missing" { + run bash "$SCRIPT" \ + --namespaceSlug="prod" \ + --applicationSlug="webapp" \ + --scopeSlug="api" + + [ "$status" -eq 1 ] + assert_contains "$output" "Error: accountSlug, namespaceSlug, applicationSlug, and scopeSlug are required" +} + +@test "domain-generate: fails when namespaceSlug is missing" { + run bash "$SCRIPT" \ + --accountSlug="myaccount" \ + --applicationSlug="webapp" \ + --scopeSlug="api" + + [ "$status" -eq 1 ] + assert_contains "$output" "Error: accountSlug, namespaceSlug, applicationSlug, and scopeSlug are required" +} + +@test "domain-generate: fails when applicationSlug is missing" { + run bash "$SCRIPT" \ + --accountSlug="myaccount" \ + --namespaceSlug="prod" \ + --scopeSlug="api" + + [ "$status" -eq 1 ] + assert_contains "$output" "Error: accountSlug, namespaceSlug, applicationSlug, and scopeSlug are required" +} + +@test "domain-generate: fails when scopeSlug is missing" { + run bash "$SCRIPT" \ + --accountSlug="myaccount" \ + --namespaceSlug="prod" \ + --applicationSlug="webapp" + + [ "$status" -eq 1 ] + assert_contains "$output" "Error: accountSlug, namespaceSlug, applicationSlug, and scopeSlug are required" +} + +@test "domain-generate: fails when no arguments provided" { + run bash "$SCRIPT" + + [ "$status" -eq 1 ] + assert_contains "$output" "Error: accountSlug, namespaceSlug, applicationSlug, and scopeSlug are required" +} + +# ============================================================================= +# Unknown option +# ============================================================================= +@test "domain-generate: fails on unknown option" { + run bash "$SCRIPT" \ + --accountSlug="myaccount" \ + --namespaceSlug="prod" \ + --applicationSlug="webapp" \ + --scopeSlug="api" \ + --unknownFlag="value" + + [ "$status" -eq 1 ] + assert_contains "$output" "Error: Unknown option --unknownFlag=value" +} + +# ============================================================================= +# Help flag +# ============================================================================= +@test "domain-generate: displays usage with --help" { + run bash "$SCRIPT" --help + + [ "$status" -eq 0 ] + assert_contains "$output" "Usage:" + assert_contains "$output" "--accountSlug=VALUE" + assert_contains "$output" "--namespaceSlug=VALUE" + assert_contains "$output" "--applicationSlug=VALUE" + assert_contains "$output" "--scopeSlug=VALUE" +} + +# ============================================================================= +# Hash consistency +# ============================================================================= +@test "domain-generate: produces consistent hash for same input" { + run bash "$SCRIPT" \ + --accountSlug="myaccount" \ + --namespaceSlug="prod" \ + --applicationSlug="webapp" \ + --scopeSlug="api" \ + --domain="nullapps.io" \ + --useAccountSlug="true" + + [ "$status" -eq 0 ] + local first_result="$output" + + run bash "$SCRIPT" \ + --accountSlug="myaccount" \ + --namespaceSlug="prod" \ + --applicationSlug="webapp" \ + --scopeSlug="api" \ + --domain="nullapps.io" \ + --useAccountSlug="true" + + [ "$status" -eq 0 ] + assert_equal "$output" "$first_result" +} + +@test "domain-generate: produces different hash for different input" { + run bash "$SCRIPT" \ + --accountSlug="myaccount" \ + --namespaceSlug="prod" \ + --applicationSlug="webapp" \ + --scopeSlug="api" \ + --domain="nullapps.io" \ + --useAccountSlug="true" + + [ "$status" -eq 0 ] + local first_result="$output" + + run bash "$SCRIPT" \ + --accountSlug="myaccount" \ + --namespaceSlug="dev" \ + --applicationSlug="webapp" \ + --scopeSlug="api" \ + --domain="nullapps.io" \ + --useAccountSlug="true" + + [ "$status" -eq 0 ] + [ "$output" != "$first_result" ] +} diff --git a/k8s/scope/tests/networking/dns/domain/generate_domain.bats b/k8s/scope/tests/networking/dns/domain/generate_domain.bats new file mode 100644 index 00000000..624553ec --- /dev/null +++ b/k8s/scope/tests/networking/dns/domain/generate_domain.bats @@ -0,0 +1,113 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for scope/networking/dns/domain/generate_domain +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export SERVICE_PATH="$(mktemp -d)" + export SCRIPT="$PROJECT_ROOT/k8s/scope/networking/dns/domain/generate_domain" + + # Create mock domain-generate binary + mkdir -p "$SERVICE_PATH/scope/networking/dns/domain" + cat > "$SERVICE_PATH/scope/networking/dns/domain/domain-generate" << 'MOCK' +#!/bin/bash +echo "generated.nullapps.io" +MOCK + chmod +x "$SERVICE_PATH/scope/networking/dns/domain/domain-generate" + + # Mock np + np() { + echo "np called: $*" + return 0 + } + export -f np + + # Default environment + export SCOPE_ID="scope-123" + export DOMAIN="nullapps.io" + export USE_ACCOUNT_SLUG="false" + export CONTEXT='{ + "account": {"slug": "my-account"}, + "namespace": {"slug": "prod"}, + "application": {"slug": "webapp"}, + "scope": {"slug": "api", "domain": ""} + }' +} + +teardown() { + rm -rf "$SERVICE_PATH" + unset -f np +} + +# ============================================================================= +# Success flow +# ============================================================================= +@test "generate_domain: full success flow" { + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Generating scope domain..." + assert_contains "$output" "📋 Generated domain: generated.nullapps.io" + assert_contains "$output" "📝 Patching scope with domain..." + assert_contains "$output" "np called: scope patch --id scope-123 --body {\"domain\":\"generated.nullapps.io\"}" + assert_contains "$output" "✅ Scope domain updated" +} + +# ============================================================================= +# Calls domain-generate with correct params +# ============================================================================= +@test "generate_domain: extracts slugs from CONTEXT and passes correct parameters" { + cat > "$SERVICE_PATH/scope/networking/dns/domain/domain-generate" << 'MOCK' +#!/bin/bash +for arg in "$@"; do + echo "$arg" +done +MOCK + chmod +x "$SERVICE_PATH/scope/networking/dns/domain/domain-generate" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "--accountSlug=my-account" + assert_contains "$output" "--namespaceSlug=prod" + assert_contains "$output" "--applicationSlug=webapp" + assert_contains "$output" "--scopeSlug=api" + assert_contains "$output" "--domain=nullapps.io" + assert_contains "$output" "--useAccountSlug=false" +} + +# ============================================================================= +# domain-generate failure +# ============================================================================= +@test "generate_domain: fails with error details when domain-generate fails" { + cat > "$SERVICE_PATH/scope/networking/dns/domain/domain-generate" << 'MOCK' +#!/bin/bash +echo "Error: generation failed" >&2 +exit 1 +MOCK + chmod +x "$SERVICE_PATH/scope/networking/dns/domain/domain-generate" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -ne 0 ] + assert_contains "$output" "❌ Failed to generate scope domain" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The domain-generate binary returned an error" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Verify the input slugs are valid" +} + +# ============================================================================= +# Updates CONTEXT with scope domain +# ============================================================================= +@test "generate_domain: updates CONTEXT with new scope domain" { + run bash -c 'source "$SCRIPT" && echo "$CONTEXT" | jq -r ".scope.domain"' + + [ "$status" -eq 0 ] + assert_contains "$output" "generated.nullapps.io" +} diff --git a/k8s/scope/tests/networking/dns/external_dns/manage_route.bats b/k8s/scope/tests/networking/dns/external_dns/manage_route.bats new file mode 100644 index 00000000..db1563b4 --- /dev/null +++ b/k8s/scope/tests/networking/dns/external_dns/manage_route.bats @@ -0,0 +1,186 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for scope/networking/dns/external_dns/manage_route +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export SERVICE_PATH="$PROJECT_ROOT/k8s" + export SCRIPT="$SERVICE_PATH/scope/networking/dns/external_dns/manage_route" + + # Default environment + export GATEWAY_NAME="gw-public" + export SCOPE_ID="scope-123" + export SCOPE_DOMAIN="myapp.example.com" + export K8S_NAMESPACE="test-ns" + export CONTEXT='{"scope":{"slug":"my-app"}}' + export OUTPUT_DIR="$(mktemp -d)" + + # Mock kubectl - default: gateway returns IP + kubectl() { + case "$*" in + *"get gateway"*) + echo "10.0.0.1" + ;; + *"get service"*) + echo "10.0.0.2" + ;; + *"delete dnsendpoint"*) + echo "dnsendpoint deleted" + ;; + esac + } + export -f kubectl + + # Mock gomplate + gomplate() { + # Just copy template to output + local outfile="" + local infile="" + while [[ $# -gt 0 ]]; do + case "$1" in + --out) outfile="$2"; shift 2 ;; + --file) infile="$2"; shift 2 ;; + *) shift ;; + esac + done + echo "rendered: $infile" > "$outfile" + } + export -f gomplate +} + +teardown() { + rm -rf "$OUTPUT_DIR" +} + +# ============================================================================= +# CREATE: success with gateway IP +# ============================================================================= +@test "manage_route: CREATE - full success flow with gateway IP" { + export ACTION="CREATE" + export DNS_ENDPOINT_TEMPLATE="$OUTPUT_DIR/dns-endpoint.yaml.tpl" + echo "template content" > "$DNS_ENDPOINT_TEMPLATE" + + run bash "$SCRIPT" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Building DNSEndpoint manifest for ExternalDNS..." + assert_contains "$output" "📡 Getting IP for gateway: gw-public" + assert_contains "$output" "✅ Gateway IP: 10.0.0.1" + assert_contains "$output" "📝 Building DNSEndpoint from template:" + assert_contains "$output" "✅ DNSEndpoint manifest created:" +} + +# ============================================================================= +# CREATE: fallback to service IP +# ============================================================================= +@test "manage_route: CREATE - falls back to service when gateway has no IP" { + export ACTION="CREATE" + export DNS_ENDPOINT_TEMPLATE="$OUTPUT_DIR/dns-endpoint.yaml.tpl" + echo "template content" > "$DNS_ENDPOINT_TEMPLATE" + + kubectl() { + case "$*" in + *"get gateway"*) + echo "" + ;; + *"get service"*) + echo "10.0.0.2" + ;; + esac + } + export -f kubectl + + run bash "$SCRIPT" + + [ "$status" -eq 0 ] + assert_contains "$output" "⚠️ Gateway IP not found, trying service fallback..." + assert_contains "$output" "✅ Gateway IP: 10.0.0.2" +} + +# ============================================================================= +# CREATE: no IP available - exits 0 +# ============================================================================= +@test "manage_route: CREATE - exits 0 when no IP available" { + kubectl() { echo ""; } + export -f kubectl + + export ACTION="CREATE" + run bash "$SCRIPT" + + [ "$status" -eq 0 ] + assert_contains "$output" "⚠️ Could not determine gateway IP address yet, DNSEndpoint will be created later" +} + +# ============================================================================= +# CREATE: template not found +# ============================================================================= +@test "manage_route: CREATE - fails with error details when template not found" { + export DNS_ENDPOINT_TEMPLATE="/nonexistent/template.yaml.tpl" + + export ACTION="CREATE" + run bash "$SCRIPT" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ DNSEndpoint template not found: /nonexistent/template.yaml.tpl" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The template file may be missing or the path is incorrect" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Verify template exists: ls -la /nonexistent/template.yaml.tpl" +} + +# ============================================================================= +# CREATE: custom template path +# ============================================================================= +@test "manage_route: CREATE - uses custom DNS_ENDPOINT_TEMPLATE when set" { + export DNS_ENDPOINT_TEMPLATE="$OUTPUT_DIR/custom-template.yaml.tpl" + echo "custom template" > "$DNS_ENDPOINT_TEMPLATE" + + export ACTION="CREATE" + run bash "$SCRIPT" + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Building DNSEndpoint from template: $DNS_ENDPOINT_TEMPLATE" + assert_contains "$output" "✅ DNSEndpoint manifest created:" +} + +# ============================================================================= +# DELETE: success +# ============================================================================= +@test "manage_route: DELETE - full success flow" { + export ACTION="DELETE" + + run bash "$SCRIPT" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Deleting DNSEndpoint for external_dns..." + assert_contains "$output" "📝 Deleting DNSEndpoint: k-8-s-my-app-scope-123-dns in namespace test-ns" + assert_contains "$output" "✅ DNSEndpoint deletion completed" +} + +# ============================================================================= +# DELETE: already deleted (idempotent) +# ============================================================================= +@test "manage_route: DELETE - warns when DNSEndpoint already deleted" { + export ACTION="DELETE" + + kubectl() { + case "$*" in + *"delete dnsendpoint"*) + return 1 + ;; + esac + } + export -f kubectl + + run bash "$SCRIPT" + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Deleting DNSEndpoint: k-8-s-my-app-scope-123-dns in namespace test-ns" + assert_contains "$output" "⚠️ DNSEndpoint 'k-8-s-my-app-scope-123-dns' may already be deleted" + assert_contains "$output" "✅ DNSEndpoint deletion completed" +} diff --git a/k8s/scope/tests/networking/dns/get_hosted_zones.bats b/k8s/scope/tests/networking/dns/get_hosted_zones.bats new file mode 100644 index 00000000..527578fc --- /dev/null +++ b/k8s/scope/tests/networking/dns/get_hosted_zones.bats @@ -0,0 +1,118 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for scope/networking/dns/get_hosted_zones +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export SERVICE_PATH="$(mktemp -d)" + export SCRIPT="$PROJECT_ROOT/k8s/scope/networking/dns/get_hosted_zones" +} + +teardown() { + rm -rf "$SERVICE_PATH" +} + +# ============================================================================= +# Both zones found +# ============================================================================= +@test "get_hosted_zones: both zones found - displays IDs and creates directories" { + export CONTEXT='{"providers":{"cloud-providers":{"networking":{"hosted_public_zone_id":"Z_PUBLIC_123","hosted_zone_id":"Z_PRIVATE_456"}}}}' + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Getting hosted zones..." + assert_contains "$output" "📋 Public Hosted Zone ID: Z_PUBLIC_123" + assert_contains "$output" "📋 Private Hosted Zone ID: Z_PRIVATE_456" + assert_contains "$output" "✅ Hosted zones loaded" + assert_directory_exists "$SERVICE_PATH/tmp" + assert_directory_exists "$SERVICE_PATH/output" +} + +# ============================================================================= +# Only public zone found +# ============================================================================= +@test "get_hosted_zones: only public zone - succeeds and creates directories" { + export CONTEXT='{"providers":{"cloud-providers":{"networking":{"hosted_public_zone_id":"Z_PUBLIC_123","hosted_zone_id":null}}}}' + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Getting hosted zones..." + assert_contains "$output" "📋 Public Hosted Zone ID: Z_PUBLIC_123" + assert_contains "$output" "📋 Private Hosted Zone ID: null" + assert_contains "$output" "✅ Hosted zones loaded" + assert_directory_exists "$SERVICE_PATH/tmp" + assert_directory_exists "$SERVICE_PATH/output" +} + +# ============================================================================= +# Only private zone found +# ============================================================================= +@test "get_hosted_zones: only private zone - succeeds and creates directories" { + export CONTEXT='{"providers":{"cloud-providers":{"networking":{"hosted_public_zone_id":null,"hosted_zone_id":"Z_PRIVATE_456"}}}}' + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Getting hosted zones..." + assert_contains "$output" "📋 Public Hosted Zone ID: null" + assert_contains "$output" "📋 Private Hosted Zone ID: Z_PRIVATE_456" + assert_contains "$output" "✅ Hosted zones loaded" + assert_directory_exists "$SERVICE_PATH/tmp" + assert_directory_exists "$SERVICE_PATH/output" +} + +# ============================================================================= +# Neither zone found +# ============================================================================= +@test "get_hosted_zones: neither zone found - displays warning and exits 0" { + export CONTEXT='{"providers":{"cloud-providers":{"networking":{"hosted_public_zone_id":null,"hosted_zone_id":null}}}}' + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Getting hosted zones..." + assert_contains "$output" "📋 Public Hosted Zone ID: null" + assert_contains "$output" "📋 Private Hosted Zone ID: null" + assert_contains "$output" "⚠️ No hosted zones found (neither public nor private)" +} + +@test "get_hosted_zones: both zones empty strings - displays warning and exits 0" { + export CONTEXT='{"providers":{"cloud-providers":{"networking":{"hosted_public_zone_id":"","hosted_zone_id":""}}}}' + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "⚠️ No hosted zones found (neither public nor private)" +} + +@test "get_hosted_zones: neither zone found - does not create directories" { + export CONTEXT='{"providers":{"cloud-providers":{"networking":{"hosted_public_zone_id":null,"hosted_zone_id":null}}}}' + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + [ ! -d "$SERVICE_PATH/tmp" ] + [ ! -d "$SERVICE_PATH/output" ] +} + +# ============================================================================= +# Missing networking keys +# ============================================================================= +@test "get_hosted_zones: missing networking keys - displays warning and exits 0" { + export CONTEXT='{"providers":{"cloud-providers":{}}}' + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Getting hosted zones..." + assert_contains "$output" "📋 Public Hosted Zone ID: null" + assert_contains "$output" "📋 Private Hosted Zone ID: null" + assert_contains "$output" "⚠️ No hosted zones found (neither public nor private)" +} diff --git a/k8s/scope/tests/networking/dns/manage_dns.bats b/k8s/scope/tests/networking/dns/manage_dns.bats new file mode 100644 index 00000000..f1a33db5 --- /dev/null +++ b/k8s/scope/tests/networking/dns/manage_dns.bats @@ -0,0 +1,237 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for scope/networking/dns/manage_dns +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export SERVICE_PATH="$(mktemp -d)" + export SCRIPT="$PROJECT_ROOT/k8s/scope/networking/dns/manage_dns" + + # Create mock scripts that succeed by default + mkdir -p "$SERVICE_PATH/scope/networking/dns/route53" + mkdir -p "$SERVICE_PATH/scope/networking/dns/external_dns" + mkdir -p "$SERVICE_PATH/scope/networking/dns/az-records" + + cat > "$SERVICE_PATH/scope/networking/dns/route53/manage_route" << 'MOCK' +echo "route53 manage_route called" +MOCK + + cat > "$SERVICE_PATH/scope/networking/dns/external_dns/manage_route" << 'MOCK' +echo "external_dns manage_route called" +MOCK + + cat > "$SERVICE_PATH/scope/networking/dns/az-records/manage_route" << 'MOCK' +echo "az-records manage_route called" +MOCK + + # Default environment + export DNS_TYPE="route53" + export ACTION="CREATE" + export SCOPE_DOMAIN="test.nullapps.io" + export SCOPE_VISIBILITY="public" + export PUBLIC_GATEWAY_NAME="gw-public" + export PRIVATE_GATEWAY_NAME="gw-private" + export RESOURCE_GROUP="my-rg" + export AZURE_SUBSCRIPTION_ID="sub-123" + export HOSTED_ZONE_NAME="example.com" + export HOSTED_ZONE_RG="dns-rg" +} + +teardown() { + rm -rf "$SERVICE_PATH" +} + +# ============================================================================= +# Header messages +# ============================================================================= +@test "manage_dns: displays header messages for route53" { + export DNS_TYPE="route53" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Managing DNS records..." + assert_contains "$output" "📋 DNS type: route53 | Action: CREATE | Domain: test.nullapps.io" +} + +@test "manage_dns: displays header messages for external_dns" { + export DNS_TYPE="external_dns" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Managing DNS records..." + assert_contains "$output" "📋 DNS type: external_dns | Action: CREATE | Domain: test.nullapps.io" +} + +# ============================================================================= +# Route53 dispatching +# ============================================================================= +@test "manage_dns: route53 - dispatches to route53/manage_route" { + export DNS_TYPE="route53" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Using Route53 DNS provider" + assert_contains "$output" "route53 manage_route called" + assert_contains "$output" "✅ DNS records managed successfully" +} + +@test "manage_dns: route53 - fails with error details when manage_route fails" { + export DNS_TYPE="route53" + + cat > "$SERVICE_PATH/scope/networking/dns/route53/manage_route" << 'MOCK' +return 1 +MOCK + + run bash -c 'source "$SCRIPT"' + + [ "$status" -ne 0 ] + assert_contains "$output" "📝 Using Route53 DNS provider" + assert_contains "$output" "❌ Route53 DNS management failed" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The hosted zone may not exist or the agent lacks Route53 permissions" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Check hosted zone exists: aws route53 list-hosted-zones" +} + +# ============================================================================= +# External DNS dispatching +# ============================================================================= +@test "manage_dns: external_dns - dispatches to external_dns/manage_route" { + export DNS_TYPE="external_dns" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Using External DNS provider" + assert_contains "$output" "external_dns manage_route called" + assert_contains "$output" "✅ DNS records managed successfully" +} + +@test "manage_dns: external_dns - fails with error details when manage_route fails" { + export DNS_TYPE="external_dns" + + cat > "$SERVICE_PATH/scope/networking/dns/external_dns/manage_route" << 'MOCK' +return 1 +MOCK + + run bash -c 'source "$SCRIPT"' + + [ "$status" -ne 0 ] + assert_contains "$output" "📝 Using External DNS provider" + assert_contains "$output" "❌ External DNS management failed" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The External DNS operator may not be running or lacks permissions" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Check operator status: kubectl get pods -l app=external-dns" +} + +# ============================================================================= +# DELETE with empty domain - skips +# ============================================================================= +@test "manage_dns: DELETE with empty SCOPE_DOMAIN - skips action" { + export ACTION="DELETE" + export SCOPE_DOMAIN="" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Managing DNS records..." + assert_contains "$output" "⚠️ Skipping DNS action — scope has no domain" +} + +# ============================================================================= +# DELETE with "To be defined" domain - skips +# ============================================================================= +@test "manage_dns: DELETE with 'To be defined' SCOPE_DOMAIN - skips action" { + export ACTION="DELETE" + export SCOPE_DOMAIN="To be defined" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Managing DNS records..." + assert_contains "$output" "⚠️ Skipping DNS action — scope has no domain" +} + +# ============================================================================= +# DELETE with valid domain - does not skip +# ============================================================================= +@test "manage_dns: DELETE with valid SCOPE_DOMAIN - proceeds normally" { + export ACTION="DELETE" + export SCOPE_DOMAIN="test.nullapps.io" + export DNS_TYPE="route53" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Using Route53 DNS provider" + assert_contains "$output" "route53 manage_route called" + assert_contains "$output" "✅ DNS records managed successfully" +} + +# ============================================================================= +# CREATE with empty domain - does not skip (only DELETE skips) +# ============================================================================= +@test "manage_dns: CREATE with empty SCOPE_DOMAIN - proceeds normally" { + export ACTION="CREATE" + export SCOPE_DOMAIN="" + export DNS_TYPE="route53" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Using Route53 DNS provider" + assert_contains "$output" "route53 manage_route called" +} + +# ============================================================================= +# Unsupported DNS type +# ============================================================================= +@test "manage_dns: unsupported DNS type - fails with error details" { + export DNS_TYPE="cloudflare" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Unsupported DNS type: 'cloudflare'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The DNS_TYPE value in values.yaml is not one of: route53, azure, external_dns" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Supported types: route53, azure, external_dns" +} + +# ============================================================================= +# Azure dispatching +# ============================================================================= +@test "manage_dns: azure public - dispatches to az-records/manage_route" { + export DNS_TYPE="azure" + export SCOPE_VISIBILITY="public" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 DNS type: azure | Action: CREATE | Domain: test.nullapps.io" + assert_contains "$output" "📝 Using Azure DNS provider (gateway: gw-public)" + assert_contains "$output" "az-records manage_route called" + assert_contains "$output" "✅ DNS records managed successfully" +} + +@test "manage_dns: azure private - dispatches to az-records/manage_route" { + export DNS_TYPE="azure" + export SCOPE_VISIBILITY="private" + + run bash -c 'source "$SCRIPT"' + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Using Azure DNS provider (gateway: gw-private)" + assert_contains "$output" "az-records manage_route called" + assert_contains "$output" "✅ DNS records managed successfully" +} diff --git a/k8s/scope/tests/networking/dns/route53/manage_route.bats b/k8s/scope/tests/networking/dns/route53/manage_route.bats new file mode 100644 index 00000000..36346519 --- /dev/null +++ b/k8s/scope/tests/networking/dns/route53/manage_route.bats @@ -0,0 +1,196 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for scope/networking/dns/route53/manage_route +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export SERVICE_PATH="$PROJECT_ROOT/k8s" + export SCRIPT="$SERVICE_PATH/scope/networking/dns/route53/manage_route" + + # Default environment + export ALB_NAME="my-alb" + export REGION="us-east-1" + export SCOPE_DOMAIN="test.nullapps.io" + export HOSTED_PRIVATE_ZONE_ID="Z_PRIVATE_123" + export HOSTED_PUBLIC_ZONE_ID="Z_PUBLIC_456" + + # Mock aws CLI - default: describe-load-balancers succeeds, change-resource-record-sets succeeds + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "my-alb-dns.us-east-1.elb.amazonaws.com Z_ELB_789" + ;; + *"change-resource-record-sets"*) + echo '{"ChangeInfo":{"Status":"PENDING"}}' + ;; + esac + } + export -f aws +} + +# ============================================================================= +# Success: both zones +# ============================================================================= +@test "manage_route: creates records in both zones when public != private" { + run bash "$SCRIPT" --action=CREATE + + [ "$status" -eq 0 ] + assert_contains "$output" "📡 Looking for load balancer: my-alb in region us-east-1..." + assert_contains "$output" "✅ Found load balancer DNS: my-alb-dns.us-east-1.elb.amazonaws.com" + assert_contains "$output" "📋 Will create records in both public and private zones" + assert_contains "$output" "📝 CREATING Route53 record in hosted zone: Z_PRIVATE_123" + assert_contains "$output" "📋 Domain: test.nullapps.io -> my-alb-dns.us-east-1.elb.amazonaws.com" + assert_contains "$output" "✅ Successfully CREATED public Route53 record" + assert_contains "$output" "📝 CREATING Route53 record in hosted zone: Z_PUBLIC_456" + assert_contains "$output" "✨ Route53 DNS configuration completed" +} + +# ============================================================================= +# Success: only private zone +# ============================================================================= +@test "manage_route: creates record in private zone only when public is null" { + export HOSTED_PUBLIC_ZONE_ID="null" + + run bash "$SCRIPT" --action=CREATE + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 CREATING Route53 record in hosted zone: Z_PRIVATE_123" + assert_contains "$output" "✅ Successfully CREATED private Route53 record" + + assert_contains "$output" "✨ Route53 DNS configuration completed" +} + +# ============================================================================= +# Success: same zone ID for public and private (no duplicate) +# ============================================================================= +@test "manage_route: creates record once when public == private zone" { + export HOSTED_PUBLIC_ZONE_ID="Z_PRIVATE_123" + + run bash "$SCRIPT" --action=UPSERT + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 UPSERTING Route53 record in hosted zone: Z_PRIVATE_123" + assert_contains "$output" "✨ Route53 DNS configuration completed" +} + +# ============================================================================= +# Error: load balancer not found +# ============================================================================= +@test "manage_route: fails with error details when ALB not found" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "An error occurred (LoadBalancerNotFound)" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" --action=CREATE + + [ "$status" -eq 1 ] + assert_contains "$output" "📡 Looking for load balancer: my-alb in region us-east-1..." + assert_contains "$output" "❌ Failed to find load balancer 'my-alb' in region 'us-east-1'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The load balancer may not exist or you lack permissions to describe it" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Verify the ALB exists: aws elbv2 describe-load-balancers --names my-alb" +} + +# ============================================================================= +# Error: load balancer has no DNS name +# ============================================================================= +@test "manage_route: fails with error details when ALB has no DNS name" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "None None" + ;; + esac + } + export -f aws + + run bash "$SCRIPT" --action=CREATE + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Load balancer 'my-alb' exists but has no DNS name" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The load balancer may still be provisioning" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Check ALB status: aws elbv2 describe-load-balancers --names my-alb" +} + +# ============================================================================= +# Error: Route53 change fails +# ============================================================================= +@test "manage_route: fails with error details when Route53 change fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "my-alb-dns.us-east-1.elb.amazonaws.com Z_ELB_789" + ;; + *"change-resource-record-sets"*) + echo "An error occurred (AccessDenied)" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" --action=CREATE + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Failed to CREATE Route53 record" + assert_contains "$output" "📋 Zone ID: Z_PRIVATE_123" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The agent may lack Route53 permissions" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Check IAM permissions for route53:ChangeResourceRecordSets" +} + +# ============================================================================= +# DELETE: skips when record not found (idempotent) +# ============================================================================= +@test "manage_route: DELETE skips when record not found in zone" { + export HOSTED_PUBLIC_ZONE_ID="null" + + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "my-alb-dns.us-east-1.elb.amazonaws.com Z_ELB_789" + ;; + *"change-resource-record-sets"*) + ROUTE53_OUTPUT="InvalidChangeBatch: it was submitted as part of a batch but it was not found" + echo "$ROUTE53_OUTPUT" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" --action=DELETE + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 Route53 record for test.nullapps.io does not exist in zone Z_PRIVATE_123, skipping deletion" + assert_contains "$output" "✨ Route53 DNS configuration completed" +} + +# ============================================================================= +# DELETE: succeeds normally +# ============================================================================= +@test "manage_route: DELETE succeeds when record exists" { + export HOSTED_PUBLIC_ZONE_ID="null" + + run bash "$SCRIPT" --action=DELETE + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 DELETING Route53 record in hosted zone: Z_PRIVATE_123" + assert_contains "$output" "✅ Successfully DELETED private Route53 record" + assert_contains "$output" "✨ Route53 DNS configuration completed" +} diff --git a/k8s/scope/tests/networking/gateway/build_gateway.bats b/k8s/scope/tests/networking/gateway/build_gateway.bats new file mode 100644 index 00000000..f2a09157 --- /dev/null +++ b/k8s/scope/tests/networking/gateway/build_gateway.bats @@ -0,0 +1,133 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for scope/networking/gateway/build_gateway +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + export SCRIPT="$PROJECT_ROOT/k8s/scope/networking/gateway/build_gateway" + + # Create temp output directory + export OUTPUT_DIR="$(mktemp -d)" + export SCOPE_ID="scope-123" + export SCOPE_DOMAIN="test.nullapps.io" + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT='{"scope":{"id":"scope-123","domain":"test.nullapps.io"}}' + + # Create a mock template + export TEMPLATE="$(mktemp)" + echo '{{ .scope.domain }}' > "$TEMPLATE" + + # Mock gomplate + gomplate() { + local out_file="" + local in_file="" + while [[ $# -gt 0 ]]; do + case "$1" in + --out) out_file="$2"; shift 2 ;; + --file) in_file="$2"; shift 2 ;; + *) shift ;; + esac + done + if [ -n "$out_file" ]; then + echo "rendered-ingress-content" > "$out_file" + fi + return 0 + } + export -f gomplate +} + +teardown() { + rm -rf "$OUTPUT_DIR" + rm -f "$TEMPLATE" + unset -f gomplate +} + +# ============================================================================= +# Success flow +# ============================================================================= +@test "build_gateway: success flow - displays all messages and renders template" { + run bash "$SCRIPT" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Building gateway ingress..." + assert_contains "$output" "📋 Scope: scope-123 | Domain: test.nullapps.io | Visibility: internet-facing" + assert_contains "$output" "📝 Building template: $TEMPLATE" + assert_contains "$output" "✅ Ingress manifest created: $OUTPUT_DIR/ingress-scope-123-internet-facing.yaml" +} + +@test "build_gateway: generates correct ingress file path" { + run bash "$SCRIPT" + + [ "$status" -eq 0 ] + assert_file_exists "$OUTPUT_DIR/ingress-scope-123-internet-facing.yaml" +} + +@test "build_gateway: cleans up context JSON file after rendering" { + run bash "$SCRIPT" + + [ "$status" -eq 0 ] + assert_file_not_exists "$OUTPUT_DIR/context-scope-123.json" +} + +@test "build_gateway: writes CONTEXT to temporary context file path" { + gomplate() { + local context_file="" + while [[ $# -gt 0 ]]; do + case "$1" in + -c) context_file="${2#*.=}"; shift 2 ;; + --out) + echo "rendered" > "$2"; shift 2 + ;; + *) shift ;; + esac + done + if [ -n "$context_file" ] && [ -f "$context_file" ]; then + local content + content=$(cat "$context_file") + if [[ "$content" == *"scope-123"* ]]; then + return 0 + fi + fi + return 1 + } + export -f gomplate + + run bash "$SCRIPT" + + [ "$status" -eq 0 ] +} + +@test "build_gateway: uses internal visibility in file name" { + export INGRESS_VISIBILITY="internal" + + run bash "$SCRIPT" + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 Scope: scope-123 | Domain: test.nullapps.io | Visibility: internal" + assert_contains "$output" "✅ Ingress manifest created: $OUTPUT_DIR/ingress-scope-123-internal.yaml" + assert_file_exists "$OUTPUT_DIR/ingress-scope-123-internal.yaml" +} + +# ============================================================================= +# gomplate failure +# ============================================================================= +@test "build_gateway: fails with error details when gomplate fails" { + gomplate() { + return 1 + } + export -f gomplate + + run bash "$SCRIPT" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ Failed to render ingress template" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The template file may contain invalid gomplate syntax" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Check the template is valid gomplate YAML" +} diff --git a/k8s/scope/tests/networking/resolve_balancer.bats b/k8s/scope/tests/networking/resolve_balancer.bats new file mode 100644 index 00000000..676d8c67 --- /dev/null +++ b/k8s/scope/tests/networking/resolve_balancer.bats @@ -0,0 +1,499 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for networking/resolve_balancer +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + source "$PROJECT_ROOT/k8s/utils/get_config_value" + + export SCRIPT="$PROJECT_ROOT/k8s/scope/networking/resolve_balancer" + export REGION="us-east-1" + export DNS_TYPE="route53" + + # Default: aws returns failure (no Route53 record, no ALBs) + aws() { return 1; } + export -f aws + + # Temp file for tracking ALB rule counts in mocks + export MOCK_RULES_FILE="$(mktemp)" + + # Base CONTEXT + export CONTEXT='{ + "scope": { + "id": "test-scope-123", + "domain": "test.nullapps.io" + }, + "providers": { + "scope-configurations": {}, + "cloud-providers": { + "networking": { + "hosted_public_zone_id": "Z1234567890", + "hosted_zone_id": "Z0987654321" + } + }, + "container-orchestration": { + "cluster": { + "namespace": "test-ns" + }, + "balancer": { + "public_name": "co-balancer-public", + "private_name": "co-balancer-private" + } + } + } + }' +} + +teardown() { + unset -f aws + unset -f log + unset -f get_config_value + rm -f "$MOCK_RULES_FILE" + unset ALB_NAME + unset ADDITIONAL_BALANCERS +} + +# ============================================================================= +# Mock helpers +# ============================================================================= + +# Sets up aws mock that returns a Route53 record pointing to a specific ALB. +mock_route53_alb() { + local alb_name="$1" + local alb_dns="${alb_name}-123.us-east-1.elb.amazonaws.com" + + eval "aws() { + case \"\$*\" in + *list-resource-record-sets*) + echo '${alb_dns}.' + return 0 + ;; + *describe-load-balancers*) + echo '{\"LoadBalancers\":[{\"LoadBalancerName\":\"${alb_name}\",\"DNSName\":\"${alb_dns}\"}]}' + return 0 + ;; + *) + return 1 + ;; + esac + } + export -f aws" +} + +# Sets up aws mock with no Route53 record but with rule counts for ALBs. +# Write rule counts to MOCK_RULES_FILE as "alb_name count" lines. +# The mock returns the ALB ARN with the name embedded so describe-rules +# can look up the correct rule count. +mock_alb_rules() { + > "$MOCK_RULES_FILE" + for pair in "$@"; do + echo "$pair" >> "$MOCK_RULES_FILE" + done + local rules_file="$MOCK_RULES_FILE" + + eval "aws() { + case \"\$*\" in + *list-resource-record-sets*) + echo 'None' + return 0 + ;; + *describe-load-balancers*--names*) + local name='' + local prev='' + for arg in \"\$@\"; do + if [ \"\$prev\" = '--names' ]; then name=\"\$arg\"; fi + prev=\"\$arg\" + done + if ! grep -q \"^\${name} \" '${rules_file}' 2>/dev/null; then + return 1 + fi + echo \"arn:aws:elasticloadbalancing:us-east-1:123:loadbalancer/app/\${name}/abc\" + return 0 + ;; + *describe-listeners*) + local lb_arn='' + local prev='' + for arg in \"\$@\"; do + if [ \"\$prev\" = '--load-balancer-arn' ]; then lb_arn=\"\$arg\"; fi + prev=\"\$arg\" + done + local alb_name=\$(echo \"\$lb_arn\" | sed 's|.*/app/||;s|/.*||') + echo \"arn:aws:elasticloadbalancing:us-east-1:123:listener/app/\${alb_name}/abc/def\" + return 0 + ;; + *describe-rules*) + local listener_arn='' + local prev='' + for arg in \"\$@\"; do + if [ \"\$prev\" = '--listener-arn' ]; then listener_arn=\"\$arg\"; fi + prev=\"\$arg\" + done + local alb_name=\$(echo \"\$listener_arn\" | sed 's|.*/app/||;s|/.*||') + local count=\$(grep \"^\${alb_name} \" '${rules_file}' | awk '{print \$2}') + if [ -z \"\$count\" ]; then + return 1 + fi + local rules='{\"Rules\": [{\"IsDefault\": true}' + local i=0 + while [ \$i -lt \$count ]; do + rules=\"\${rules}, {\\\"IsDefault\\\": false}\" + i=\$((i + 1)) + done + rules=\"\${rules}]}\" + echo \"\$rules\" + return 0 + ;; + *) + return 1 + ;; + esac + } + export -f aws" +} + +# ============================================================================= +# Default ALB name (no provider config) +# ============================================================================= +@test "resolve_balancer: uses default ALB name when no provider config (public)" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT='{ "providers": {} }' + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "k8s-nullplatform-internet-facing" +} + +@test "resolve_balancer: uses default ALB name when no provider config (private)" { + export INGRESS_VISIBILITY="internal" + export CONTEXT='{ "providers": {} }' + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "k8s-nullplatform-internal" +} + +# ============================================================================= +# Provider overrides - container-orchestration +# ============================================================================= +@test "resolve_balancer: resolves public ALB from container-orchestration provider" { + export INGRESS_VISIBILITY="internet-facing" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "co-balancer-public" +} + +@test "resolve_balancer: resolves private ALB from container-orchestration provider" { + export INGRESS_VISIBILITY="internal" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "co-balancer-private" +} + +# ============================================================================= +# Provider overrides - scope-configurations takes priority +# ============================================================================= +@test "resolve_balancer: scope-configurations overrides container-orchestration (public)" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT=$(echo "$CONTEXT" | jq '.providers["scope-configurations"].networking.balancer_public_name = "scope-alb-public"') + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "scope-alb-public" +} + +@test "resolve_balancer: scope-configurations overrides container-orchestration (private)" { + export INGRESS_VISIBILITY="internal" + export CONTEXT=$(echo "$CONTEXT" | jq '.providers["scope-configurations"].networking.balancer_private_name = "scope-alb-private"') + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "scope-alb-private" +} + +# ============================================================================= +# Additional balancers from container-orchestration provider +# ============================================================================= +@test "resolve_balancer: reads additional public balancers from container-orchestration" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["container-orchestration"].balancer.additional_public_names = ["co-extra-1"] + ') + mock_alb_rules "co-balancer-public 45" "co-extra-1 10" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "co-extra-1" +} + +@test "resolve_balancer: reads additional private balancers from container-orchestration" { + export INGRESS_VISIBILITY="internal" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["container-orchestration"].balancer.additional_private_names = ["co-priv-extra-1"] + ') + mock_alb_rules "co-balancer-private 45" "co-priv-extra-1 10" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "co-priv-extra-1" +} + +@test "resolve_balancer: scope-configurations additional balancers override container-orchestration" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["scope-configurations"].networking.additional_public_balancers = ["scope-extra-1"] | + .providers["container-orchestration"].balancer.additional_public_names = ["co-extra-1"] + ') + mock_alb_rules "co-balancer-public 45" "scope-extra-1 10" "co-extra-1 5" + + source "$SCRIPT" + + # scope-configurations wins — co-extra-1 is not even a candidate + assert_equal "$ALB_NAME" "scope-extra-1" +} + +# ============================================================================= +# Priority 1: Route53 lookup takes precedence over everything +# ============================================================================= +@test "resolve_balancer: uses ALB from Route53 when record exists" { + export INGRESS_VISIBILITY="internet-facing" + mock_route53_alb "alb-from-dns" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "alb-from-dns" +} + +@test "resolve_balancer: Route53 takes priority over additional balancers config" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["scope-configurations"].networking.additional_public_balancers = ["alb-extra-1", "alb-extra-2"] + ') + mock_route53_alb "alb-from-dns" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "alb-from-dns" +} + +@test "resolve_balancer: Route53 takes priority over provider config" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT=$(echo "$CONTEXT" | jq '.providers["scope-configurations"].networking.balancer_public_name = "scope-alb-public"') + mock_route53_alb "alb-from-dns" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "alb-from-dns" +} + +@test "resolve_balancer: logs when using Route53 ALB" { + export INGRESS_VISIBILITY="internet-facing" + mock_route53_alb "alb-from-dns" + + run bash -c 'source "$SCRIPT"' + + assert_contains "$output" "📝 Using ALB 'alb-from-dns' from Route53 record for test.nullapps.io" +} + +@test "resolve_balancer: falls through to config when Route53 has no record" { + export INGRESS_VISIBILITY="internet-facing" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "co-balancer-public" +} + +# ============================================================================= +# Priority 2: additional balancers — least-loaded selection +# ============================================================================= +@test "resolve_balancer: selects ALB with fewest rules from candidates (public)" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["scope-configurations"].networking.additional_public_balancers = ["alb-extra-1", "alb-extra-2"] + ') + mock_alb_rules "co-balancer-public 45" "alb-extra-1 12" "alb-extra-2 30" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "alb-extra-1" +} + +@test "resolve_balancer: selects ALB with fewest rules from candidates (private)" { + export INGRESS_VISIBILITY="internal" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["scope-configurations"].networking.additional_private_balancers = ["alb-priv-extra-1", "alb-priv-extra-2"] + ') + mock_alb_rules "co-balancer-private 50" "alb-priv-extra-1 20" "alb-priv-extra-2 5" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "alb-priv-extra-2" +} + +@test "resolve_balancer: keeps default ALB when it has fewest rules" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["scope-configurations"].networking.additional_public_balancers = ["alb-extra-1"] + ') + mock_alb_rules "co-balancer-public 5" "alb-extra-1 30" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "co-balancer-public" +} + +@test "resolve_balancer: logs selected ALB when different from default" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["scope-configurations"].networking.additional_public_balancers = ["alb-extra-1"] + ') + mock_alb_rules "co-balancer-public 45" "alb-extra-1 12" + + run bash -c 'source "$SCRIPT"' + + assert_contains "$output" "📝 Selected ALB 'alb-extra-1' (12 rules) over default 'co-balancer-public'" +} + +@test "resolve_balancer: logs candidate balancers list" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["scope-configurations"].networking.additional_public_balancers = ["alb-extra-1", "alb-extra-2"] + ') + mock_alb_rules "co-balancer-public 10" "alb-extra-1 10" "alb-extra-2 10" + + run bash -c 'export LOG_LEVEL=debug; source "$SCRIPT"' + + assert_contains "$output" "🔍 Additional balancers configured, resolving least-loaded ALB..." + assert_contains "$output" "📋 Candidate balancers: co-balancer-public, alb-extra-1, alb-extra-2" +} + +# ============================================================================= +# AWS API failure handling +# ============================================================================= +@test "resolve_balancer: skips candidate when rule count query fails" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["scope-configurations"].networking.additional_public_balancers = ["alb-extra-1", "alb-extra-2"] + ') + # alb-extra-1 not in mock → describe-load-balancers returns 1 + mock_alb_rules "co-balancer-public 45" "alb-extra-2 20" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "alb-extra-2" +} + +@test "resolve_balancer: warns when a candidate fails" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["scope-configurations"].networking.additional_public_balancers = ["alb-broken"] + ') + mock_alb_rules "co-balancer-public 10" + + run bash -c 'source "$SCRIPT"' + + assert_contains "$output" "⚠️ Could not query rules for ALB 'alb-broken', skipping" +} + +@test "resolve_balancer: keeps default when all candidates fail" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["scope-configurations"].networking.additional_public_balancers = ["alb-broken-1", "alb-broken-2"] + ') + aws() { + case "$*" in + *list-resource-record-sets*) echo "None"; return 0 ;; + *) return 1 ;; + esac + } + export -f aws + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "co-balancer-public" +} + +# ============================================================================= +# No additional balancers — no AWS calls for rule counts +# ============================================================================= +@test "resolve_balancer: does not calculate when no additional balancers configured" { + export INGRESS_VISIBILITY="internet-facing" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "co-balancer-public" +} + +@test "resolve_balancer: handles empty additional balancers array gracefully" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["scope-configurations"].networking.additional_public_balancers = [] + ') + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "co-balancer-public" +} + +# ============================================================================= +# Tie-breaking: first candidate with fewest rules wins +# ============================================================================= +@test "resolve_balancer: picks first candidate on tie" { + export INGRESS_VISIBILITY="internet-facing" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["scope-configurations"].networking.additional_public_balancers = ["alb-extra-1", "alb-extra-2"] + ') + mock_alb_rules "co-balancer-public 10" "alb-extra-1 10" "alb-extra-2 10" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "co-balancer-public" +} + +# ============================================================================= +# DNS_TYPE guard — non-route53 skips Route53 lookup and load balancing +# ============================================================================= +@test "resolve_balancer: skips Route53 and load balancing for external_dns" { + export INGRESS_VISIBILITY="internet-facing" + export DNS_TYPE="external_dns" + export CONTEXT=$(echo "$CONTEXT" | jq ' + .providers["scope-configurations"].networking.additional_public_balancers = ["alb-extra-1"] + ') + mock_route53_alb "alb-from-dns" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "co-balancer-public" +} + +@test "resolve_balancer: skips Route53 and load balancing for azure" { + export INGRESS_VISIBILITY="internet-facing" + export DNS_TYPE="azure" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "co-balancer-public" +} + +@test "resolve_balancer: uses provider config for non-route53 (private)" { + export INGRESS_VISIBILITY="internal" + export DNS_TYPE="external_dns" + + source "$SCRIPT" + + assert_equal "$ALB_NAME" "co-balancer-private" +} + +@test "resolve_balancer: logs skip message for non-route53 DNS" { + export INGRESS_VISIBILITY="internet-facing" + export DNS_TYPE="external_dns" + + run bash -c 'export LOG_LEVEL=debug; source "$SCRIPT"' + + assert_contains "$output" "DNS type is 'external_dns', skipping Route53 lookup and load balancing" +} diff --git a/k8s/scope/tests/pause_autoscaling.bats b/k8s/scope/tests/pause_autoscaling.bats new file mode 100644 index 00000000..9316255d --- /dev/null +++ b/k8s/scope/tests/pause_autoscaling.bats @@ -0,0 +1,197 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for scope/pause_autoscaling - pause HPA by fixing replicas +# ============================================================================= + +setup() { + # Get project root directory + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + + # Source assertions and shared functions + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + source "$PROJECT_ROOT/k8s/scope/require_resource" + export -f require_hpa require_deployment find_deployment_by_label + + # Default environment + export K8S_NAMESPACE="default-namespace" + + # Base CONTEXT with required fields + export CONTEXT='{ + "scope": { + "id": "scope-123", + "current_active_deployment": "deploy-456" + }, + "providers": { + "container-orchestration": { + "cluster": { + "namespace": "provider-namespace" + } + } + } + }' +} + +teardown() { + unset -f kubectl +} + +# ============================================================================= +# HPA Not Found +# ============================================================================= +@test "pause_autoscaling: fails when HPA does not exist" { + kubectl() { + case "$*" in + "get hpa"*) + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../pause_autoscaling" + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Looking for HPA 'hpa-d-scope-123-deploy-456' in namespace 'provider-namespace'..." + assert_contains "$output" "❌ HPA 'hpa-d-scope-123-deploy-456' not found in namespace 'provider-namespace'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The HPA may not exist or autoscaling is not configured for this deployment" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Verify the HPA exists: kubectl get hpa -n provider-namespace" + assert_contains "$output" "• Check that autoscaling is configured for scope scope-123" +} + +# ============================================================================= +# Successful Pause Flow +# ============================================================================= +@test "pause_autoscaling: complete successful pause flow" { + kubectl() { + case "$*" in + "get hpa hpa-d-scope-123-deploy-456 -n provider-namespace -o json") + echo '{"spec":{"minReplicas":3,"maxReplicas":15}}' + ;; + "get hpa hpa-d-scope-123-deploy-456 -n provider-namespace") + return 0 + ;; + "get deployment d-scope-123-deploy-456 -n provider-namespace -o jsonpath"*) + echo "7" + ;; + "patch hpa"*) + return 0 + ;; + *) + return 0 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../pause_autoscaling" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Looking for HPA 'hpa-d-scope-123-deploy-456' in namespace 'provider-namespace'..." + assert_contains "$output" "📋 Current HPA configuration:" + assert_contains "$output" " Min replicas: 3" + assert_contains "$output" " Max replicas: 15" + assert_contains "$output" "📋 Current deployment replicas: 7" + assert_contains "$output" "📝 Pausing autoscaling at 7 replicas..." + assert_contains "$output" "✅ Autoscaling paused successfully" + assert_contains "$output" " HPA: hpa-d-scope-123-deploy-456" + assert_contains "$output" " Namespace: provider-namespace" + assert_contains "$output" " Fixed replicas: 7" + assert_contains "$output" "📋 To resume autoscaling, use the resume-autoscaling action or manually patch the HPA." +} + +@test "pause_autoscaling: stores original config in annotation" { + kubectl() { + case "$*" in + "get hpa hpa-d-scope-123-deploy-456 -n provider-namespace") + return 0 + ;; + "get hpa hpa-d-scope-123-deploy-456 -n provider-namespace -o json") + echo '{"spec":{"minReplicas":2,"maxReplicas":10}}' + ;; + "get deployment d-scope-123-deploy-456 -n provider-namespace -o jsonpath"*) + echo "5" + ;; + "patch hpa"*) + if [[ "$*" == *"nullplatform.com/autoscaling-paused"* ]]; then + return 0 + fi + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../pause_autoscaling" + + [ "$status" -eq 0 ] + assert_contains "$output" "✅ Autoscaling paused successfully" +} + +# ============================================================================= +# Namespace Resolution Tests +# ============================================================================= +@test "pause_autoscaling: uses namespace from provider" { + kubectl() { + case "$*" in + *"-n provider-namespace"*) + case "$*" in + "get hpa"*"-o json"*) + echo '{"spec":{"minReplicas":2,"maxReplicas":10}}' + ;; + "get deployment"*) + echo "5" + ;; + *) + return 0 + ;; + esac + ;; + *) + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../pause_autoscaling" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Looking for HPA 'hpa-d-scope-123-deploy-456' in namespace 'provider-namespace'..." + assert_contains "$output" " Namespace: provider-namespace" +} + +@test "pause_autoscaling: falls back to default namespace" { + export CONTEXT=$(echo "$CONTEXT" | jq 'del(.providers["container-orchestration"].cluster.namespace)') + + kubectl() { + case "$*" in + *"-n default-namespace"*) + case "$*" in + "get hpa"*"-o json"*) + echo '{"spec":{"minReplicas":2,"maxReplicas":10}}' + ;; + "get deployment"*) + echo "5" + ;; + *) + return 0 + ;; + esac + ;; + *) + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../pause_autoscaling" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Looking for HPA 'hpa-d-scope-123-deploy-456' in namespace 'default-namespace'..." + assert_contains "$output" " Namespace: default-namespace" +} diff --git a/k8s/scope/tests/restart_pods.bats b/k8s/scope/tests/restart_pods.bats new file mode 100644 index 00000000..c0f3df8b --- /dev/null +++ b/k8s/scope/tests/restart_pods.bats @@ -0,0 +1,237 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for scope/restart_pods - restart deployment pods via rollout +# ============================================================================= + +setup() { + # Get project root directory + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + + # Source assertions and shared functions + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + source "$PROJECT_ROOT/k8s/scope/require_resource" + export -f require_hpa require_deployment find_deployment_by_label + + # Default environment + export K8S_NAMESPACE="default-namespace" + + # Base CONTEXT with required fields + export CONTEXT='{ + "scope": { + "id": "scope-123", + "current_active_deployment": "deploy-456" + }, + "providers": { + "container-orchestration": { + "cluster": { + "namespace": "provider-namespace" + } + } + } + }' + + # Mock kubectl: success flow by default + kubectl() { + case "$*" in + "get deployment -n provider-namespace -l name=d-scope-123-deploy-456 -o jsonpath={.items[0].metadata.name}") + echo "my-deployment" + return 0 + ;; + "rollout restart -n provider-namespace deployment/my-deployment") + return 0 + ;; + "rollout status -n provider-namespace deployment/my-deployment -w") + return 0 + ;; + *) + return 0 + ;; + esac + } + export -f kubectl +} + +teardown() { + unset -f kubectl +} + +# ============================================================================= +# Success Flow Tests +# ============================================================================= +@test "restart_pods: success flow - finds deployment, restarts, waits, completes" { + run bash "$BATS_TEST_DIRNAME/../restart_pods" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Looking for deployment with label: name=d-scope-123-deploy-456" + assert_contains "$output" "📝 Restarting deployment: my-deployment" + assert_contains "$output" "🔍 Waiting for rollout to complete..." + assert_contains "$output" "✅ Deployment restart completed successfully" +} + +# ============================================================================= +# Error: kubectl get deployment fails +# ============================================================================= +@test "restart_pods: error when kubectl get deployment fails" { + kubectl() { + case "$*" in + "get deployment -n provider-namespace -l name=d-scope-123-deploy-456 -o jsonpath={.items[0].metadata.name}") + echo "connection refused" >&2 + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../restart_pods" + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Looking for deployment with label: name=d-scope-123-deploy-456" + assert_contains "$output" "❌ Failed to find deployment with label 'name=d-scope-123-deploy-456' in namespace 'provider-namespace'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The deployment may not exist or was not created yet" + assert_contains "$output" "🔧 How to fix:" +} + +# ============================================================================= +# Error: empty deployment name returned +# ============================================================================= +@test "restart_pods: error when empty deployment name returned" { + kubectl() { + case "$*" in + "get deployment -n provider-namespace -l name=d-scope-123-deploy-456 -o jsonpath={.items[0].metadata.name}") + echo "" + return 0 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../restart_pods" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ No deployment found with label 'name=d-scope-123-deploy-456' in namespace 'provider-namespace'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "🔧 How to fix:" +} + +# ============================================================================= +# Error: rollout restart fails +# ============================================================================= +@test "restart_pods: error when rollout restart fails" { + kubectl() { + case "$*" in + "get deployment -n provider-namespace -l name=d-scope-123-deploy-456 -o jsonpath={.items[0].metadata.name}") + echo "my-deployment" + return 0 + ;; + "rollout restart -n provider-namespace deployment/my-deployment") + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../restart_pods" + + [ "$status" -eq 1 ] + assert_contains "$output" "📝 Restarting deployment: my-deployment" + assert_contains "$output" "❌ Failed to restart deployment 'my-deployment'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The deployment may be in a bad state or kubectl lacks permissions" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Check deployment status: kubectl describe deployment my-deployment -n provider-namespace" +} + +# ============================================================================= +# Error: rollout status fails/times out +# ============================================================================= +@test "restart_pods: error when rollout status fails or times out" { + kubectl() { + case "$*" in + "get deployment -n provider-namespace -l name=d-scope-123-deploy-456 -o jsonpath={.items[0].metadata.name}") + echo "my-deployment" + return 0 + ;; + "rollout restart -n provider-namespace deployment/my-deployment") + return 0 + ;; + "rollout status -n provider-namespace deployment/my-deployment -w") + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../restart_pods" + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Waiting for rollout to complete..." + assert_contains "$output" "❌ Rollout failed or timed out" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "Pods may be failing to start (image pull errors, crashes, resource limits)" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Check pod events: kubectl describe pods -n provider-namespace -l name=d-scope-123-deploy-456" + assert_contains "$output" "• Check pod logs: kubectl logs -n provider-namespace -l name=d-scope-123-deploy-456 --tail=50" +} + +# ============================================================================= +# Namespace Resolution Tests +# ============================================================================= +@test "restart_pods: uses namespace from provider" { + kubectl() { + case "$*" in + *"-n provider-namespace"*) + case "$*" in + "get deployment"*) + echo "my-deployment" + return 0 + ;; + *) + return 0 + ;; + esac + ;; + *) + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../restart_pods" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Looking for deployment with label: name=d-scope-123-deploy-456" + assert_contains "$output" "✅ Deployment restart completed successfully" +} + +@test "restart_pods: falls back to default namespace when provider namespace not set" { + export CONTEXT=$(echo "$CONTEXT" | jq 'del(.providers["container-orchestration"].cluster.namespace)') + + kubectl() { + case "$*" in + *"-n default-namespace"*) + case "$*" in + "get deployment"*) + echo "my-deployment" + return 0 + ;; + *) + return 0 + ;; + esac + ;; + *) + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../restart_pods" + + [ "$status" -eq 0 ] + assert_contains "$output" "✅ Deployment restart completed successfully" +} diff --git a/k8s/scope/tests/resume_autoscaling.bats b/k8s/scope/tests/resume_autoscaling.bats new file mode 100644 index 00000000..853f4179 --- /dev/null +++ b/k8s/scope/tests/resume_autoscaling.bats @@ -0,0 +1,220 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for scope/resume_autoscaling - restore HPA from paused state +# ============================================================================= + +setup() { + # Get project root directory + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + + # Source assertions and shared functions + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + source "$PROJECT_ROOT/k8s/scope/require_resource" + export -f require_hpa require_deployment find_deployment_by_label + + # Default environment + export K8S_NAMESPACE="default-namespace" + + # Base CONTEXT with required fields + export CONTEXT='{ + "scope": { + "id": "scope-123", + "current_active_deployment": "deploy-456" + }, + "providers": { + "container-orchestration": { + "cluster": { + "namespace": "provider-namespace" + } + } + } + }' +} + +teardown() { + unset -f kubectl +} + +# ============================================================================= +# HPA Not Found +# ============================================================================= +@test "resume_autoscaling: fails when HPA does not exist" { + kubectl() { + case "$*" in + "get hpa"*) + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../resume_autoscaling" + + [ "$status" -eq 1 ] + assert_contains "$output" "🔍 Looking for HPA 'hpa-d-scope-123-deploy-456' in namespace 'provider-namespace'..." + assert_contains "$output" "❌ HPA 'hpa-d-scope-123-deploy-456' not found in namespace 'provider-namespace'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The HPA may not exist or autoscaling is not configured for this deployment" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Verify the HPA exists: kubectl get hpa -n provider-namespace" + assert_contains "$output" "• Check that autoscaling is configured for scope scope-123" +} + +# ============================================================================= +# HPA Already Active (idempotent) +# ============================================================================= +@test "resume_autoscaling: succeeds when HPA is already active (empty annotation)" { + kubectl() { + case "$*" in + "get hpa"*"-n provider-namespace"*) + if [[ "$*" == *"-o jsonpath"* ]]; then + echo "" + else + return 0 + fi + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../resume_autoscaling" + + [ "$status" -eq 0 ] + assert_contains "$output" "✅ HPA 'hpa-d-scope-123-deploy-456' is already active, no action needed" +} + +@test "resume_autoscaling: succeeds when hpa is not paused" { + kubectl() { + case "$*" in + "get hpa"*"-n provider-namespace"*) + if [[ "$*" == *"-o jsonpath"* ]]; then + echo "null" + else + return 0 + fi + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../resume_autoscaling" + + [ "$status" -eq 0 ] + assert_contains "$output" "✅ HPA 'hpa-d-scope-123-deploy-456' is already active, no action needed" +} + +# ============================================================================= +# Successful Resume Flow +# ============================================================================= +@test "resume_autoscaling: complete successful resume flow" { + kubectl() { + case "$*" in + "get hpa"*"-n provider-namespace"*) + if [[ "$*" == *"-o jsonpath"* ]]; then + echo '{"originalMinReplicas":3,"originalMaxReplicas":15,"pausedAt":"2024-06-15T10:30:00Z"}' + else + return 0 + fi + ;; + "patch hpa"*) + return 0 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../resume_autoscaling" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Looking for HPA 'hpa-d-scope-123-deploy-456' in namespace 'provider-namespace'..." + assert_contains "$output" "📋 Found paused HPA configuration:" + assert_contains "$output" " Original min replicas: 3" + assert_contains "$output" " Original max replicas: 15" + assert_contains "$output" " Paused at: 2024-06-15T10:30:00Z" + assert_contains "$output" "📝 Resuming autoscaling..." + assert_contains "$output" "✅ Autoscaling resumed successfully" + assert_contains "$output" " HPA: hpa-d-scope-123-deploy-456" + assert_contains "$output" " Namespace: provider-namespace" + assert_contains "$output" " Min replicas: 3" + assert_contains "$output" " Max replicas: 15" +} + +@test "resume_autoscaling: removes paused annotation" { + kubectl() { + case "$*" in + "get hpa"*"-n provider-namespace"*) + if [[ "$*" == *"-o jsonpath"* ]]; then + echo '{"originalMinReplicas":2,"originalMaxReplicas":10,"pausedAt":"2024-01-01T00:00:00Z"}' + else + return 0 + fi + ;; + "patch hpa"*) + if [[ "$*" == *"null"* ]]; then + return 0 + fi + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../resume_autoscaling" + + [ "$status" -eq 0 ] +} + +# ============================================================================= +# Namespace Resolution Tests +# ============================================================================= +@test "resume_autoscaling: uses namespace from provider" { + kubectl() { + case "$*" in + *"-n provider-namespace"*) + if [[ "$*" == *"-o jsonpath"* ]]; then + echo '{"originalMinReplicas":2,"originalMaxReplicas":10,"pausedAt":"2024-01-01T00:00:00Z"}' + else + return 0 + fi + ;; + *) + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../resume_autoscaling" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Looking for HPA 'hpa-d-scope-123-deploy-456' in namespace 'provider-namespace'..." + assert_contains "$output" " Namespace: provider-namespace" +} + +@test "resume_autoscaling: falls back to default namespace" { + export CONTEXT=$(echo "$CONTEXT" | jq 'del(.providers["container-orchestration"].cluster.namespace)') + + kubectl() { + case "$*" in + *"-n default-namespace"*) + if [[ "$*" == *"-o jsonpath"* ]]; then + echo '{"originalMinReplicas":2,"originalMaxReplicas":10,"pausedAt":"2024-01-01T00:00:00Z"}' + else + return 0 + fi + ;; + *) + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../resume_autoscaling" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Looking for HPA 'hpa-d-scope-123-deploy-456' in namespace 'default-namespace'..." + assert_contains "$output" " Namespace: default-namespace" +} diff --git a/k8s/scope/tests/set_desired_instance_count.bats b/k8s/scope/tests/set_desired_instance_count.bats new file mode 100644 index 00000000..628e807e --- /dev/null +++ b/k8s/scope/tests/set_desired_instance_count.bats @@ -0,0 +1,403 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for scope/set_desired_instance_count - set deployment replicas +# ============================================================================= + +setup() { + # Get project root directory + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + + # Source assertions and shared functions + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + source "$PROJECT_ROOT/k8s/scope/require_resource" + export -f require_hpa require_deployment find_deployment_by_label + + # Default environment + export K8S_NAMESPACE="default-namespace" + export ACTION_PARAMETERS_DESIRED_INSTANCES="5" + + # Base CONTEXT with required fields + export CONTEXT='{ + "scope": { + "id": "scope-123", + "current_active_deployment": "deploy-456" + }, + "providers": { + "container-orchestration": { + "cluster": { + "namespace": "provider-namespace" + } + } + } + }' +} + +teardown() { + unset -f kubectl + rm -f "${REPLICAS_COUNTER_FILE:-}" "${HPA_MIN_COUNTER_FILE:-}" "${HPA_MAX_COUNTER_FILE:-}" +} + +# ============================================================================= +# Parameter Validation Tests +# ============================================================================= +@test "set_desired_instance_count: fails when DESIRED_INSTANCES not set" { + unset ACTION_PARAMETERS_DESIRED_INSTANCES + + run bash "$BATS_TEST_DIRNAME/../set_desired_instance_count" + + [ "$status" -eq 1 ] + assert_contains "$output" "📝 Setting desired instance count..." + assert_contains "$output" "❌ desired_instances parameter not found" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The ACTION_PARAMETERS_DESIRED_INSTANCES environment variable is not set" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Set the desired_instances parameter in the action configuration" +} + +@test "set_desired_instance_count: fails when DESIRED_INSTANCES is empty" { + export ACTION_PARAMETERS_DESIRED_INSTANCES="" + + run bash "$BATS_TEST_DIRNAME/../set_desired_instance_count" + + [ "$status" -eq 1 ] + assert_contains "$output" "📝 Setting desired instance count..." + assert_contains "$output" "❌ desired_instances parameter not found" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The ACTION_PARAMETERS_DESIRED_INSTANCES environment variable is not set" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Set the desired_instances parameter in the action configuration" +} + +# ============================================================================= +# Deployment Not Found +# ============================================================================= +@test "set_desired_instance_count: fails when deployment not found" { + kubectl() { + case "$*" in + "get deployment d-scope-123-deploy-456 -n provider-namespace") + return 1 + ;; + *) + return 0 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../set_desired_instance_count" + + [ "$status" -eq 1 ] + assert_contains "$output" "📋 Desired instances: 5" + assert_contains "$output" "📋 Deployment: d-scope-123-deploy-456" + assert_contains "$output" "📋 Namespace: provider-namespace" + assert_contains "$output" "🔍 Looking for deployment 'd-scope-123-deploy-456' in namespace 'provider-namespace'..." + assert_contains "$output" "❌ Deployment 'd-scope-123-deploy-456' not found in namespace 'provider-namespace'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The deployment may not exist or was not created yet" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Verify the deployment exists: kubectl get deployment -n provider-namespace" + assert_contains "$output" "• Check that scope scope-123 has an active deployment" +} + +# ============================================================================= +# No HPA Path - Complete Flow +# ============================================================================= +@test "set_desired_instance_count: complete flow with no HPA" { + export REPLICAS_COUNTER_FILE=$(mktemp) + echo "0" > "$REPLICAS_COUNTER_FILE" + + kubectl() { + case "$*" in + "get deployment d-scope-123-deploy-456 -n provider-namespace") + return 0 + ;; + "get deployment d-scope-123-deploy-456 -n provider-namespace -o jsonpath"*) + if [[ "$*" == *"readyReplicas"* ]]; then + echo "5" + else + local count + count=$(cat "$REPLICAS_COUNTER_FILE") + echo $(( count + 1 )) > "$REPLICAS_COUNTER_FILE" + if [[ "$count" == "0" ]]; then + echo "3" # CURRENT_REPLICAS + else + echo "5" # FINAL_REPLICAS (after scale) + fi + fi + ;; + "get hpa hpa-d-scope-123-deploy-456 -n provider-namespace") + return 1 # No HPA + ;; + "scale deployment"*) + return 0 + ;; + "rollout status"*) + return 0 + ;; + *) + return 0 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../set_desired_instance_count" + rm -f "$REPLICAS_COUNTER_FILE" + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Setting desired instance count..." + assert_contains "$output" "📋 Desired instances: 5" + assert_contains "$output" "📋 Deployment: d-scope-123-deploy-456" + assert_contains "$output" "📋 Namespace: provider-namespace" + assert_contains "$output" "📋 Current replicas: 3" + assert_contains "$output" "📋 No HPA found for this deployment" + assert_contains "$output" "📝 Updating deployment (no HPA)..." + assert_contains "$output" "✅ Deployment scaled to 5 replicas" + assert_contains "$output" "🔍 Waiting for deployment rollout to complete..." + assert_contains "$output" "📋 Final status:" + assert_contains "$output" " Deployment replicas: 5" + assert_contains "$output" " Ready replicas: 5" + assert_contains "$output" "✨ Instance count successfully set to 5" +} + +# ============================================================================= +# Active HPA Path - Complete Flow +# ============================================================================= +@test "set_desired_instance_count: complete flow with active HPA" { + export REPLICAS_COUNTER_FILE=$(mktemp) + export HPA_MIN_COUNTER_FILE=$(mktemp) + export HPA_MAX_COUNTER_FILE=$(mktemp) + echo "0" > "$REPLICAS_COUNTER_FILE" + echo "0" > "$HPA_MIN_COUNTER_FILE" + echo "0" > "$HPA_MAX_COUNTER_FILE" + + kubectl() { + case "$*" in + "get deployment d-scope-123-deploy-456 -n provider-namespace") + return 0 + ;; + "get deployment d-scope-123-deploy-456 -n provider-namespace -o jsonpath"*) + if [[ "$*" == *"readyReplicas"* ]]; then + echo "5" + else + local count + count=$(cat "$REPLICAS_COUNTER_FILE") + echo $(( count + 1 )) > "$REPLICAS_COUNTER_FILE" + if [[ "$count" == "0" ]]; then + echo "3" # CURRENT_REPLICAS + else + echo "5" # FINAL_REPLICAS + fi + fi + ;; + "get hpa hpa-d-scope-123-deploy-456 -n provider-namespace") + return 0 # HPA exists + ;; + "get hpa hpa-d-scope-123-deploy-456 -n provider-namespace -o jsonpath"*) + if [[ "$*" == *"autoscaling-paused"* ]]; then + echo "" # Not paused + elif [[ "$*" == *"minReplicas"* ]]; then + local count + count=$(cat "$HPA_MIN_COUNTER_FILE") + echo $(( count + 1 )) > "$HPA_MIN_COUNTER_FILE" + if [[ "$count" == "0" ]]; then + echo "2" # Before patch + else + echo "5" # After patch (final status) + fi + elif [[ "$*" == *"maxReplicas"* ]]; then + local count + count=$(cat "$HPA_MAX_COUNTER_FILE") + echo $(( count + 1 )) > "$HPA_MAX_COUNTER_FILE" + if [[ "$count" == "0" ]]; then + echo "10" # Before patch + else + echo "5" # After patch (final status) + fi + fi + ;; + "patch hpa"*) + return 0 + ;; + "rollout status"*) + return 0 + ;; + *) + return 0 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../set_desired_instance_count" + rm -f "$REPLICAS_COUNTER_FILE" "$HPA_MIN_COUNTER_FILE" "$HPA_MAX_COUNTER_FILE" + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Setting desired instance count..." + assert_contains "$output" "📋 Desired instances: 5" + assert_contains "$output" "📋 Current replicas: 3" + assert_contains "$output" "📋 HPA found: hpa-d-scope-123-deploy-456" + assert_contains "$output" "📋 HPA is currently ACTIVE" + assert_contains "$output" "📝 Updating HPA for active autoscaling..." + assert_contains "$output" "📋 Current HPA range: 2 - 10 replicas" + assert_contains "$output" "📋 Setting desired instances to 5 by updating HPA range" + assert_contains "$output" "✅ HPA updated: min=5, max=5" + assert_contains "$output" "🔍 Waiting for deployment rollout to complete..." + assert_contains "$output" "📋 Final status:" + assert_contains "$output" " Deployment replicas: 5" + assert_contains "$output" " Ready replicas: 5" + assert_contains "$output" " HPA range: 5 - 5 replicas" + assert_contains "$output" "✨ Instance count successfully set to 5" +} + +# ============================================================================= +# Paused HPA Path - Complete Flow +# ============================================================================= +@test "set_desired_instance_count: complete flow with paused HPA" { + export REPLICAS_COUNTER_FILE=$(mktemp) + echo "0" > "$REPLICAS_COUNTER_FILE" + + kubectl() { + case "$*" in + "get deployment d-scope-123-deploy-456 -n provider-namespace") + return 0 + ;; + "get deployment d-scope-123-deploy-456 -n provider-namespace -o jsonpath"*) + if [[ "$*" == *"readyReplicas"* ]]; then + echo "5" + else + local count + count=$(cat "$REPLICAS_COUNTER_FILE") + echo $(( count + 1 )) > "$REPLICAS_COUNTER_FILE" + if [[ "$count" == "0" ]]; then + echo "3" # CURRENT_REPLICAS + else + echo "5" # FINAL_REPLICAS + fi + fi + ;; + "get hpa hpa-d-scope-123-deploy-456 -n provider-namespace") + return 0 # HPA exists + ;; + "get hpa hpa-d-scope-123-deploy-456 -n provider-namespace -o jsonpath"*) + if [[ "$*" == *"autoscaling-paused"* ]]; then + echo '{"originalMinReplicas":2,"originalMaxReplicas":10}' # Paused + elif [[ "$*" == *"minReplicas"* ]]; then + echo "5" + elif [[ "$*" == *"maxReplicas"* ]]; then + echo "5" + fi + ;; + "scale deployment"*) + return 0 + ;; + "rollout status"*) + return 0 + ;; + *) + return 0 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../set_desired_instance_count" + rm -f "$REPLICAS_COUNTER_FILE" + + [ "$status" -eq 0 ] + assert_contains "$output" "📝 Setting desired instance count..." + assert_contains "$output" "📋 Current replicas: 3" + assert_contains "$output" "📋 HPA found: hpa-d-scope-123-deploy-456" + assert_contains "$output" "📋 HPA is currently PAUSED" + assert_contains "$output" "📝 Updating deployment (HPA paused)..." + assert_contains "$output" "✅ Deployment scaled to 5 replicas" + assert_contains "$output" "🔍 Waiting for deployment rollout to complete..." + assert_contains "$output" "📋 Final status:" + assert_contains "$output" " Deployment replicas: 5" + assert_contains "$output" " Ready replicas: 5" + assert_contains "$output" " HPA range: 5 - 5 replicas" + assert_contains "$output" "✨ Instance count successfully set to 5" +} + +# ============================================================================= +# Namespace Resolution Tests +# ============================================================================= +@test "set_desired_instance_count: uses namespace from provider" { + kubectl() { + case "$*" in + "get deployment d-scope-123-deploy-456 -n provider-namespace") + return 0 + ;; + "get deployment d-scope-123-deploy-456 -n provider-namespace -o jsonpath"*) + if [[ "$*" == *"readyReplicas"* ]]; then + echo "5" + else + echo "3" + fi + ;; + "get hpa hpa-d-scope-123-deploy-456 -n provider-namespace") + return 1 + ;; + "scale deployment"*) + return 0 + ;; + "rollout status"*) + return 0 + ;; + *) + return 0 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../set_desired_instance_count" + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 Namespace: provider-namespace" + assert_contains "$output" "🔍 Looking for deployment 'd-scope-123-deploy-456' in namespace 'provider-namespace'..." +} + +@test "set_desired_instance_count: falls back to default namespace" { + export CONTEXT=$(echo "$CONTEXT" | jq 'del(.providers["container-orchestration"].cluster.namespace)') + + kubectl() { + case "$*" in + *"-n default-namespace"*) + case "$*" in + "get deployment"*"-o jsonpath"*) + if [[ "$*" == *"readyReplicas"* ]]; then + echo "5" + else + echo "3" + fi + ;; + "get deployment"*) + return 0 + ;; + *) + return 0 + ;; + esac + ;; + "get hpa"*) + return 1 + ;; + "rollout status"*) + return 0 + ;; + *) + return 0 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../set_desired_instance_count" + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 Namespace: default-namespace" + assert_contains "$output" "🔍 Looking for deployment 'd-scope-123-deploy-456' in namespace 'default-namespace'..." +} diff --git a/k8s/scope/tests/validate_alb_capacity.bats b/k8s/scope/tests/validate_alb_capacity.bats new file mode 100644 index 00000000..af08defd --- /dev/null +++ b/k8s/scope/tests/validate_alb_capacity.bats @@ -0,0 +1,492 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for validate_alb_capacity +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + source "$PROJECT_ROOT/k8s/utils/get_config_value" + + export SCRIPT="$PROJECT_ROOT/k8s/scope/validate_alb_capacity" + + export ALB_NAME="k8s-nullplatform-internet-facing" + export REGION="us-east-1" + export ALB_MAX_CAPACITY="75" + export DNS_TYPE="route53" + + # Base CONTEXT + export CONTEXT='{ + "providers": {} + }' + + # Mock aws - default: ALB with 2 listeners, 30 rules each + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/k8s-nullplatform-internet-facing/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/k8s-nullplatform-internet-facing/abc123/listener1 arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/k8s-nullplatform-internet-facing/abc123/listener2" + return 0 + ;; + *"describe-rules"*) + echo "30" + return 0 + ;; + esac + } + export -f aws +} + +teardown() { + unset -f aws +} + +# ============================================================================= +# Success flow +# ============================================================================= +@test "validate_alb_capacity: success when under capacity" { + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "🔍 Validating ALB capacity for 'k8s-nullplatform-internet-facing'..." + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 60 rules (max capacity: 75)" + assert_contains "$output" "✅ ALB capacity validated: 60/75 rules" +} + +@test "validate_alb_capacity: displays debug info" { + export LOG_LEVEL="debug" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB: k8s-nullplatform-internet-facing | Region: us-east-1 | Max capacity: 75 rules" + assert_contains "$output" "📋 ALB ARN: arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/k8s-nullplatform-internet-facing/abc123" +} + +@test "validate_alb_capacity: success with single listener" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + return 0 + ;; + *"describe-rules"*) + echo "10" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 10 rules (max capacity: 75)" + assert_contains "$output" "✅ ALB capacity validated: 10/75 rules" +} + +# ============================================================================= +# Capacity exceeded +# ============================================================================= +@test "validate_alb_capacity: fails when at capacity" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + return 0 + ;; + *"describe-rules"*) + echo "75" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached capacity: 75/75 rules" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "Too many scopes or ingress rules are configured on this ALB" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Remove unused scopes or ingress rules from the ALB" + assert_contains "$output" "Increase ALB_MAX_CAPACITY in values.yaml or container-orchestration provider (AWS limit is 100 per listener)" + assert_contains "$output" "Request an AWS service quota increase for rules per ALB listener" + assert_contains "$output" "Consider using a separate ALB for additional scopes" +} + +@test "validate_alb_capacity: fails when over capacity" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + return 0 + ;; + *"describe-rules"*) + echo "90" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached capacity: 90/75 rules" +} + +# ============================================================================= +# Configuration via get_config_value +# ============================================================================= +@test "validate_alb_capacity: uses default ALB_MAX_CAPACITY of 75" { + unset ALB_MAX_CAPACITY + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 60 rules (max capacity: 75)" +} + +@test "validate_alb_capacity: ALB_MAX_CAPACITY from env var" { + export ALB_MAX_CAPACITY="50" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached capacity: 60/50 rules" +} + +@test "validate_alb_capacity: ALB_MAX_CAPACITY from scope-configurations provider" { + export CONTEXT='{"providers":{"scope-configurations":{"networking":{"alb_max_capacity":"50"}}}}' + export ALB_MAX_CAPACITY="75" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached capacity: 60/50 rules" +} + +@test "validate_alb_capacity: provider takes priority over env var" { + export CONTEXT='{"providers":{"scope-configurations":{"networking":{"alb_max_capacity":"100"}}}}' + export ALB_MAX_CAPACITY="50" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 60 rules (max capacity: 100)" + assert_contains "$output" "✅ ALB capacity validated: 60/100 rules" +} + +@test "validate_alb_capacity: ALB_MAX_CAPACITY from container-orchestration provider" { + export CONTEXT='{"providers":{"container-orchestration":{"balancer":{"alb_capacity_threshold":"50"}}}}' + export ALB_MAX_CAPACITY="75" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached capacity: 60/50 rules" +} + +@test "validate_alb_capacity: scope-configurations takes priority over container-orchestration" { + export CONTEXT='{"providers":{"scope-configurations":{"networking":{"alb_max_capacity":"100"}},"container-orchestration":{"balancer":{"alb_capacity_threshold":"50"}}}}' + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 60 rules (max capacity: 100)" +} + +# ============================================================================= +# AWS API errors +# ============================================================================= +@test "validate_alb_capacity: fails when describe-load-balancers fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "An error occurred (LoadBalancerNotFound)" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ Failed to find load balancer 'k8s-nullplatform-internet-facing' in region 'us-east-1'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The load balancer may not exist or the agent lacks permissions" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Verify the ALB exists: aws elbv2 describe-load-balancers --names k8s-nullplatform-internet-facing --region us-east-1" + assert_contains "$output" "Check IAM permissions for elbv2:DescribeLoadBalancers" +} + +@test "validate_alb_capacity: fails when ALB ARN is None" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "None" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ Load balancer 'k8s-nullplatform-internet-facing' not found in region 'us-east-1'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The load balancer name may be incorrect or it was deleted" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "List available ALBs: aws elbv2 describe-load-balancers --region us-east-1" + assert_contains "$output" "Check the balancer name in values.yaml or scope-configurations provider" +} + +@test "validate_alb_capacity: fails when describe-listeners fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "Access Denied" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ Failed to describe listeners for ALB 'k8s-nullplatform-internet-facing'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The agent may lack permissions to describe listeners" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Check IAM permissions for elbv2:DescribeListeners" +} + +@test "validate_alb_capacity: skips when no listeners found" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "None" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "⚠️ No listeners found on ALB 'k8s-nullplatform-internet-facing', skipping capacity check" +} + +@test "validate_alb_capacity: fails when describe-rules fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + return 0 + ;; + *"describe-rules"*) + echo "Access Denied" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ Failed to describe rules for listener" + assert_contains "$output" "📋 Listener ARN: arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The agent may lack permissions to describe rules" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Check IAM permissions for elbv2:DescribeRules" +} + +# ============================================================================= +# Edge cases +# ============================================================================= +@test "validate_alb_capacity: handles zero rules" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + return 0 + ;; + *"describe-rules"*) + echo "0" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 0 rules (max capacity: 75)" + assert_contains "$output" "✅ ALB capacity validated: 0/75 rules" +} + +@test "validate_alb_capacity: passes at exactly one below capacity" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + return 0 + ;; + *"describe-rules"*) + echo "74" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "✅ ALB capacity validated: 74/75 rules" +} + +@test "validate_alb_capacity: fails when rule count is non-numeric" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + return 0 + ;; + *"describe-rules"*) + echo "WARNING: something unexpected" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ Unexpected non-numeric rule count from listener" + assert_contains "$output" "📋 Listener ARN: arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + assert_contains "$output" "📋 Received value: WARNING: something unexpected" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The AWS CLI returned an unexpected response format" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Verify AWS CLI version and credentials are correct" +} + +@test "validate_alb_capacity: fails when ALB_MAX_CAPACITY is non-numeric" { + export ALB_MAX_CAPACITY="abc" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB_MAX_CAPACITY must be a numeric value, got: 'abc'" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Set a numeric value in values.yaml or scope-configurations provider" +} + +@test "validate_alb_capacity: empty ALB ARN response triggers error" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "" + return 0 + ;; + esac + } + export -f aws + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "1" + assert_contains "$output" "❌ Load balancer 'k8s-nullplatform-internet-facing' not found in region 'us-east-1'" +} + +# ============================================================================= +# DNS_TYPE guard +# ============================================================================= +@test "validate_alb_capacity: skips when DNS_TYPE is external_dns" { + export DNS_TYPE="external_dns" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + [[ "$output" != *"🔍 Validating ALB capacity"* ]] +} + +@test "validate_alb_capacity: skips when DNS_TYPE is azure" { + export DNS_TYPE="azure" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + [[ "$output" != *"🔍 Validating ALB capacity"* ]] +} + +@test "validate_alb_capacity: skips with debug message for non-route53 DNS" { + export DNS_TYPE="external_dns" + export LOG_LEVEL="debug" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "DNS type is 'external_dns', ALB capacity validation only applies to route53, skipping" +} + +@test "validate_alb_capacity: runs when DNS_TYPE is route53" { + export DNS_TYPE="route53" + + run bash -c 'source "$SCRIPT"' + + assert_equal "$status" "0" + assert_contains "$output" "🔍 Validating ALB capacity for 'k8s-nullplatform-internet-facing'..." +} diff --git a/k8s/scope/tests/wait_on_balancer.bats b/k8s/scope/tests/wait_on_balancer.bats new file mode 100644 index 00000000..4d111db8 --- /dev/null +++ b/k8s/scope/tests/wait_on_balancer.bats @@ -0,0 +1,223 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for scope/wait_on_balancer - wait for DNS/balancer setup +# ============================================================================= + +setup() { + # Get project root directory + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + + # Source assertions + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + + # Default environment + export K8S_NAMESPACE="default-namespace" + export DNS_TYPE="external_dns" + + # Base CONTEXT with required fields + export CONTEXT='{ + "scope": { + "id": "scope-123", + "slug": "my-scope", + "domain": "my-scope.example.com" + } + }' + + # Mock sleep to be instant + sleep() { + return 0 + } + export -f sleep + + # Mock kubectl: DNS endpoint found with status by default + kubectl() { + case "$*" in + "get dnsendpoint k-8-s-my-scope-scope-123-dns -n default-namespace -o jsonpath={.status}") + echo '{"observedGeneration":1}' + return 0 + ;; + *) + return 0 + ;; + esac + } + export -f kubectl + + # Mock nslookup: resolves on first attempt by default + nslookup() { + case "$1" in + "my-scope.example.com") + if [ "$2" = "8.8.8.8" ]; then + echo "Server: 8.8.8.8" + echo "Address: 8.8.8.8#53" + echo "" + echo "Name: my-scope.example.com" + echo "Address: 10.0.0.1" + return 0 + fi + ;; + esac + return 1 + } + export -f nslookup +} + +teardown() { + unset -f kubectl + unset -f nslookup + unset -f sleep +} + +# ============================================================================= +# external_dns: Success on first attempt +# ============================================================================= +@test "wait_on_balancer: external_dns success on first attempt" { + run bash "$BATS_TEST_DIRNAME/../wait_on_balancer" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Waiting for balancer/DNS setup to complete..." + assert_contains "$output" "📋 Checking ExternalDNS record creation for domain: my-scope.example.com" + assert_contains "$output" "🔍 Checking DNS resolution for my-scope.example.com (attempt 1/" + assert_contains "$output" "📋 Checking DNSEndpoint status: k-8-s-my-scope-scope-123-dns" + assert_contains "$output" "📋 DNSEndpoint status:" + assert_contains "$output" "✅ DNS record for my-scope.example.com is now resolvable" + assert_contains "$output" "✅ Domain my-scope.example.com resolves to:" + assert_contains "$output" "✨ ExternalDNS setup completed successfully" +} + +# ============================================================================= +# external_dns: Success after retries +# ============================================================================= +@test "wait_on_balancer: external_dns success after retries" { + local attempt=0 + nslookup() { + attempt=$((attempt + 1)) + if [ "$attempt" -ge 2 ] && [ "$1" = "my-scope.example.com" ] && [ "$2" = "8.8.8.8" ]; then + echo "Server: 8.8.8.8" + echo "Address: 8.8.8.8#53" + echo "" + echo "Name: my-scope.example.com" + echo "Address: 10.0.0.1" + return 0 + fi + return 1 + } + export -f nslookup + + run bash "$BATS_TEST_DIRNAME/../wait_on_balancer" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Checking DNS resolution for my-scope.example.com (attempt 1/" + assert_contains "$output" "📋 DNS record not yet available, waiting 10s..." + assert_contains "$output" "🔍 Checking DNS resolution for my-scope.example.com (attempt 2/" + assert_contains "$output" "✅ DNS record for my-scope.example.com is now resolvable" + assert_contains "$output" "✨ ExternalDNS setup completed successfully" +} + +# ============================================================================= +# external_dns: Timeout after MAX_ITERATIONS +# ============================================================================= +@test "wait_on_balancer: external_dns timeout after MAX_ITERATIONS" { + export MAX_ITERATIONS=2 + + nslookup() { + return 1 + } + export -f nslookup + + run bash "$BATS_TEST_DIRNAME/../wait_on_balancer" + + [ "$status" -eq 1 ] + assert_contains "$output" "❌ DNS record creation timeout after 20s" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "ExternalDNS may still be processing the DNSEndpoint resource" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "• Check DNSEndpoint resources: kubectl get dnsendpoint -A" + assert_contains "$output" "• Check ExternalDNS logs: kubectl logs -n external-dns -l app=external-dns --tail=50" +} + +# ============================================================================= +# external_dns: DNS endpoint not found but keeps trying +# ============================================================================= +@test "wait_on_balancer: external_dns DNS endpoint not found but keeps trying until resolved" { + kubectl() { + case "$*" in + "get dnsendpoint k-8-s-my-scope-scope-123-dns -n default-namespace -o jsonpath={.status}") + echo "not found" + return 1 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../wait_on_balancer" + + [ "$status" -eq 0 ] + assert_contains "$output" "📋 Checking DNSEndpoint status: k-8-s-my-scope-scope-123-dns" + assert_contains "$output" "✅ DNS record for my-scope.example.com is now resolvable" + assert_contains "$output" "✨ ExternalDNS setup completed successfully" +} + +# ============================================================================= +# external_dns: DNS endpoint found with status +# ============================================================================= +@test "wait_on_balancer: external_dns DNS endpoint found with status is displayed" { + kubectl() { + case "$*" in + "get dnsendpoint k-8-s-my-scope-scope-123-dns -n default-namespace -o jsonpath={.status}") + echo '{"observedGeneration":2}' + return 0 + ;; + esac + } + export -f kubectl + + run bash "$BATS_TEST_DIRNAME/../wait_on_balancer" + + [ "$status" -eq 0 ] + assert_contains "$output" '📋 DNSEndpoint status: {"observedGeneration":2}' +} + +# ============================================================================= +# route53: Skips check +# ============================================================================= +@test "wait_on_balancer: route53 skips check" { + export DNS_TYPE="route53" + + run bash "$BATS_TEST_DIRNAME/../wait_on_balancer" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Waiting for balancer/DNS setup to complete..." + assert_contains "$output" "📋 DNS Type route53 - DNS should already be configured" + assert_contains "$output" "📋 Skipping DNS wait check" +} + +# ============================================================================= +# azure: Skips check +# ============================================================================= +@test "wait_on_balancer: azure skips check" { + export DNS_TYPE="azure" + + run bash "$BATS_TEST_DIRNAME/../wait_on_balancer" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Waiting for balancer/DNS setup to complete..." + assert_contains "$output" "📋 DNS Type azure - DNS should already be configured" + assert_contains "$output" "📋 Skipping DNS wait check" +} + +# ============================================================================= +# Unknown DNS type: Skips +# ============================================================================= +@test "wait_on_balancer: unknown DNS type skips" { + export DNS_TYPE="cloudflare" + + run bash "$BATS_TEST_DIRNAME/../wait_on_balancer" + + [ "$status" -eq 0 ] + assert_contains "$output" "🔍 Waiting for balancer/DNS setup to complete..." + assert_contains "$output" "📋 Unknown DNS type: cloudflare" + assert_contains "$output" "📋 Skipping DNS wait check" +} diff --git a/k8s/scope/validate_alb_capacity b/k8s/scope/validate_alb_capacity new file mode 100755 index 00000000..fc5eb50e --- /dev/null +++ b/k8s/scope/validate_alb_capacity @@ -0,0 +1,142 @@ +#!/bin/bash + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/../utils/get_config_value" + +if [[ "$DNS_TYPE" != "route53" ]]; then + log debug "📋 DNS type is '$DNS_TYPE', ALB capacity validation only applies to route53, skipping" + return 0 +fi + +ALB_MAX_CAPACITY=$(get_config_value \ + --env ALB_MAX_CAPACITY \ + --provider '.providers["scope-configurations"].networking.alb_max_capacity' \ + --provider '.providers["container-orchestration"].balancer.alb_capacity_threshold' \ + --default "75" +) + +if ! [[ "$ALB_MAX_CAPACITY" =~ ^[0-9]+$ ]]; then + log error "❌ ALB_MAX_CAPACITY must be a numeric value, got: '$ALB_MAX_CAPACITY'" + log error "" + log error "🔧 How to fix:" + log error " • Set a numeric value in values.yaml or scope-configurations provider" + log error "" + exit 1 +fi + +log info "🔍 Validating ALB capacity for '$ALB_NAME'..." +log debug "📋 ALB: $ALB_NAME | Region: $REGION | Max capacity: $ALB_MAX_CAPACITY rules" + +ALB_ARN=$(aws elbv2 describe-load-balancers \ + --names "$ALB_NAME" \ + --region "$REGION" \ + --query 'LoadBalancers[0].LoadBalancerArn' \ + --output text \ + --no-paginate 2>&1) || { + log error "❌ Failed to find load balancer '$ALB_NAME' in region '$REGION'" + log error "" + log error "💡 Possible causes:" + log error " The load balancer may not exist or the agent lacks permissions" + log error "" + log error "🔧 How to fix:" + log error " • Verify the ALB exists: aws elbv2 describe-load-balancers --names $ALB_NAME --region $REGION" + log error " • Check IAM permissions for elbv2:DescribeLoadBalancers" + log error "" + exit 1 +} + +if [[ -z "$ALB_ARN" ]] || [[ "$ALB_ARN" == "None" ]]; then + log error "❌ Load balancer '$ALB_NAME' not found in region '$REGION'" + log error "" + log error "💡 Possible causes:" + log error " The load balancer name may be incorrect or it was deleted" + log error "" + log error "🔧 How to fix:" + log error " • List available ALBs: aws elbv2 describe-load-balancers --region $REGION" + log error " • Check the balancer name in values.yaml or scope-configurations provider" + log error "" + exit 1 +fi + +log debug "📋 ALB ARN: $ALB_ARN" + +LISTENER_ARNS=$(aws elbv2 describe-listeners \ + --load-balancer-arn "$ALB_ARN" \ + --region "$REGION" \ + --query 'Listeners[].ListenerArn' \ + --output text \ + --no-paginate 2>&1) || { + log error "❌ Failed to describe listeners for ALB '$ALB_NAME'" + log error "" + log error "💡 Possible causes:" + log error " The agent may lack permissions to describe listeners" + log error "" + log error "🔧 How to fix:" + log error " • Check IAM permissions for elbv2:DescribeListeners" + log error "" + exit 1 +} + +if [[ -z "$LISTENER_ARNS" ]] || [[ "$LISTENER_ARNS" == "None" ]]; then + log warn "⚠️ No listeners found on ALB '$ALB_NAME', skipping capacity check" + return 0 +fi + +# Count rules across all listeners (excluding default rules) +TOTAL_RULES=0 + +for LISTENER_ARN in $LISTENER_ARNS; do + RULE_COUNT=$(aws elbv2 describe-rules \ + --listener-arn "$LISTENER_ARN" \ + --region "$REGION" \ + --query 'length(Rules[?!IsDefault])' \ + --output text \ + --no-paginate 2>&1) || { + log error "❌ Failed to describe rules for listener" + log error "📋 Listener ARN: $LISTENER_ARN" + log error "" + log error "💡 Possible causes:" + log error " The agent may lack permissions to describe rules" + log error "" + log error "🔧 How to fix:" + log error " • Check IAM permissions for elbv2:DescribeRules" + log error "" + exit 1 + } + + if ! [[ "$RULE_COUNT" =~ ^[0-9]+$ ]]; then + log error "❌ Unexpected non-numeric rule count from listener" + log error "📋 Listener ARN: $LISTENER_ARN" + log error "📋 Received value: $RULE_COUNT" + log error "" + log error "💡 Possible causes:" + log error " The AWS CLI returned an unexpected response format" + log error "" + log error "🔧 How to fix:" + log error " • Verify AWS CLI version and credentials are correct" + log error " • Run manually: aws elbv2 describe-rules --listener-arn $LISTENER_ARN --region $REGION --query 'length(Rules[?!IsDefault])'" + log error "" + exit 1 + fi + + TOTAL_RULES=$((TOTAL_RULES + RULE_COUNT)) +done + +log info "📋 ALB '$ALB_NAME' has $TOTAL_RULES rules (max capacity: $ALB_MAX_CAPACITY)" + +if [[ "$TOTAL_RULES" -ge "$ALB_MAX_CAPACITY" ]]; then + log error "❌ ALB '$ALB_NAME' has reached capacity: $TOTAL_RULES/$ALB_MAX_CAPACITY rules" + log error "" + log error "💡 Possible causes:" + log error " Too many scopes or ingress rules are configured on this ALB" + log error "" + log error "🔧 How to fix:" + log error " • Remove unused scopes or ingress rules from the ALB" + log error " • Increase ALB_MAX_CAPACITY in values.yaml or container-orchestration provider (AWS limit is 100 per listener)" + log error " • Request an AWS service quota increase for rules per ALB listener" + log error " • Consider using a separate ALB for additional scopes" + log error "" + exit 1 +fi + +log info "✅ ALB capacity validated: $TOTAL_RULES/$ALB_MAX_CAPACITY rules" diff --git a/k8s/scope/wait_on_balancer b/k8s/scope/wait_on_balancer index 9f9edf88..972f4c02 100644 --- a/k8s/scope/wait_on_balancer +++ b/k8s/scope/wait_on_balancer @@ -1,6 +1,7 @@ #!/bin/bash -echo "Waiting for balancer/DNS setup to complete..." + +log debug "🔍 Waiting for balancer/DNS setup to complete..." MAX_ITERATIONS=${MAX_ITERATIONS:-30} iteration=0 @@ -10,50 +11,58 @@ case "$DNS_TYPE" in SCOPE_DOMAIN=$(echo "$CONTEXT" | jq -r '.scope.domain') SCOPE_SLUG=$(echo "$CONTEXT" | jq -r '.scope.slug') SCOPE_ID=$(echo "$CONTEXT" | jq -r '.scope.id') - - echo "Checking ExternalDNS record creation for domain: $SCOPE_DOMAIN" - + + log debug "📋 Checking ExternalDNS record creation for domain: $SCOPE_DOMAIN" + while true; do iteration=$((iteration + 1)) if [ $iteration -gt $MAX_ITERATIONS ]; then - echo "⚠️ DNS record creation timeout after $((MAX_ITERATIONS * 10))s" - echo "ExternalDNS may still be processing the DNSEndpoint resource" - echo "You can check manually with: kubectl get dnsendpoint -A" + log error "" + log error " ❌ DNS record creation timeout after $((MAX_ITERATIONS * 10))s" + log error "" + log error "💡 Possible causes:" + log error " ExternalDNS may still be processing the DNSEndpoint resource" + log error "" + log error "🔧 How to fix:" + log error " • Check DNSEndpoint resources: kubectl get dnsendpoint -A" + log error " • Check ExternalDNS logs: kubectl logs -n external-dns -l app=external-dns --tail=50" + log error "" exit 1 fi - - echo "Checking DNS resolution for $SCOPE_DOMAIN (attempt $iteration/$MAX_ITERATIONS)" - + + log debug "🔍 Checking DNS resolution for $SCOPE_DOMAIN (attempt $iteration/$MAX_ITERATIONS)" + DNS_ENDPOINT_NAME="k-8-s-${SCOPE_SLUG}-${SCOPE_ID}-dns" - echo "Checking DNSEndpoint status: $DNS_ENDPOINT_NAME" - + log debug "📋 Checking DNSEndpoint status: $DNS_ENDPOINT_NAME" + DNS_STATUS=$(kubectl get dnsendpoint "$DNS_ENDPOINT_NAME" -n "$K8S_NAMESPACE" -o jsonpath='{.status}' 2>/dev/null || echo "not found") if [ "$DNS_STATUS" != "not found" ] && [ -n "$DNS_STATUS" ]; then - echo "DNSEndpoint status: $DNS_STATUS" + log debug "📋 DNSEndpoint status: $DNS_STATUS" fi - + if nslookup "$SCOPE_DOMAIN" 8.8.8.8 >/dev/null 2>&1; then - echo "✓ DNS record for $SCOPE_DOMAIN is now resolvable" - + log info " ✅ DNS record for $SCOPE_DOMAIN is now resolvable" + RESOLVED_IP=$(nslookup "$SCOPE_DOMAIN" 8.8.8.8 | grep -A1 "Name:" | tail -1 | awk '{print $2}' 2>/dev/null || echo "unknown") - echo "✓ Domain $SCOPE_DOMAIN resolves to: $RESOLVED_IP" - + log info " ✅ Domain $SCOPE_DOMAIN resolves to: $RESOLVED_IP" + break fi - - echo "DNS record not yet available, waiting 10s..." + + log debug "📋 DNS record not yet available, waiting 10s..." sleep 10 done - - echo "✓ ExternalDNS setup completed successfully" + + log info "" + log info "✨ ExternalDNS setup completed successfully" ;; route53|azure) - echo "DNS Type $DNS_TYPE - DNS should already be configured" - echo "Skipping DNS wait check" + log debug "📋 DNS Type $DNS_TYPE - DNS should already be configured" + log debug "📋 Skipping DNS wait check" ;; *) - echo "Unknown DNS type: $DNS_TYPE" - echo "Skipping DNS wait check" + log debug "📋 Unknown DNS type: $DNS_TYPE" + log debug "📋 Skipping DNS wait check" ;; -esac \ No newline at end of file +esac diff --git a/k8s/scope/workflows/create.yaml b/k8s/scope/workflows/create.yaml index adb336c5..9c0f3006 100644 --- a/k8s/scope/workflows/create.yaml +++ b/k8s/scope/workflows/create.yaml @@ -1,6 +1,15 @@ include: - "$SERVICE_PATH/values.yaml" steps: + - name: load logging + type: script + file: "$SERVICE_PATH/logging" + output: + - name: log + type: function + parameters: + level: string + message: string - name: build context type: script file: "$SERVICE_PATH/scope/build_context" @@ -13,6 +22,9 @@ steps: type: environment - name: OUTPUT_DIR type: environment + - name: validate alb capacity + type: script + file: "$SERVICE_PATH/scope/validate_alb_capacity" - name: iam type: workflow steps: diff --git a/k8s/scope/workflows/delete.yaml b/k8s/scope/workflows/delete.yaml index 541f53ad..cf02790d 100644 --- a/k8s/scope/workflows/delete.yaml +++ b/k8s/scope/workflows/delete.yaml @@ -1,6 +1,15 @@ include: - "$SERVICE_PATH/values.yaml" steps: + - name: load logging + type: script + file: "$SERVICE_PATH/logging" + output: + - name: log + type: function + parameters: + level: string + message: string - name: build context type: script file: "$SERVICE_PATH/scope/build_context" diff --git a/k8s/scope/workflows/pause-autoscaling.yaml b/k8s/scope/workflows/pause-autoscaling.yaml index 6a18079f..362ef27c 100644 --- a/k8s/scope/workflows/pause-autoscaling.yaml +++ b/k8s/scope/workflows/pause-autoscaling.yaml @@ -1,6 +1,37 @@ include: - "$SERVICE_PATH/values.yaml" steps: + - name: load logging + type: script + file: "$SERVICE_PATH/logging" + output: + - name: log + type: function + parameters: + level: string + message: string + - name: load resource helpers + type: script + file: "$SERVICE_PATH/scope/require_resource" + output: + - name: require_hpa + type: function + parameters: + hpa_name: string + namespace: string + scope_id: string + - name: require_deployment + type: function + parameters: + deployment_name: string + namespace: string + scope_id: string + - name: find_deployment_by_label + type: function + parameters: + scope_id: string + deployment_id: string + namespace: string - name: pause autoscaling type: script - file: "$SERVICE_PATH/scope/pause_autoscaling" \ No newline at end of file + file: "$SERVICE_PATH/scope/pause_autoscaling" diff --git a/k8s/scope/workflows/restart-pods.yaml b/k8s/scope/workflows/restart-pods.yaml index f00c207f..7771041a 100644 --- a/k8s/scope/workflows/restart-pods.yaml +++ b/k8s/scope/workflows/restart-pods.yaml @@ -1,6 +1,37 @@ include: - "$SERVICE_PATH/values.yaml" steps: + - name: load logging + type: script + file: "$SERVICE_PATH/logging" + output: + - name: log + type: function + parameters: + level: string + message: string + - name: load resource helpers + type: script + file: "$SERVICE_PATH/scope/require_resource" + output: + - name: require_hpa + type: function + parameters: + hpa_name: string + namespace: string + scope_id: string + - name: require_deployment + type: function + parameters: + deployment_name: string + namespace: string + scope_id: string + - name: find_deployment_by_label + type: function + parameters: + scope_id: string + deployment_id: string + namespace: string - name: restart pods type: script - file: "$SERVICE_PATH/scope/restart_pods" \ No newline at end of file + file: "$SERVICE_PATH/scope/restart_pods" diff --git a/k8s/scope/workflows/resume-autoscaling.yaml b/k8s/scope/workflows/resume-autoscaling.yaml index e56be5c1..8b155b68 100644 --- a/k8s/scope/workflows/resume-autoscaling.yaml +++ b/k8s/scope/workflows/resume-autoscaling.yaml @@ -1,6 +1,37 @@ include: - "$SERVICE_PATH/values.yaml" steps: + - name: load logging + type: script + file: "$SERVICE_PATH/logging" + output: + - name: log + type: function + parameters: + level: string + message: string + - name: load resource helpers + type: script + file: "$SERVICE_PATH/scope/require_resource" + output: + - name: require_hpa + type: function + parameters: + hpa_name: string + namespace: string + scope_id: string + - name: require_deployment + type: function + parameters: + deployment_name: string + namespace: string + scope_id: string + - name: find_deployment_by_label + type: function + parameters: + scope_id: string + deployment_id: string + namespace: string - name: resume autoscaling type: script - file: "$SERVICE_PATH/scope/resume_autoscaling" \ No newline at end of file + file: "$SERVICE_PATH/scope/resume_autoscaling" diff --git a/k8s/scope/workflows/set-desired-instance-count.yaml b/k8s/scope/workflows/set-desired-instance-count.yaml index bff02a1d..03e3ba0f 100644 --- a/k8s/scope/workflows/set-desired-instance-count.yaml +++ b/k8s/scope/workflows/set-desired-instance-count.yaml @@ -1,6 +1,37 @@ include: - "$SERVICE_PATH/values.yaml" steps: + - name: load logging + type: script + file: "$SERVICE_PATH/logging" + output: + - name: log + type: function + parameters: + level: string + message: string + - name: load resource helpers + type: script + file: "$SERVICE_PATH/scope/require_resource" + output: + - name: require_hpa + type: function + parameters: + hpa_name: string + namespace: string + scope_id: string + - name: require_deployment + type: function + parameters: + deployment_name: string + namespace: string + scope_id: string + - name: find_deployment_by_label + type: function + parameters: + scope_id: string + deployment_id: string + namespace: string - name: set desired instance count type: script - file: "$SERVICE_PATH/scope/set_desired_instance_count" \ No newline at end of file + file: "$SERVICE_PATH/scope/set_desired_instance_count" diff --git a/k8s/specs/notification-channel.json.tpl b/k8s/specs/notification-channel.json.tpl index ee3c7986..30fad0e3 100644 --- a/k8s/specs/notification-channel.json.tpl +++ b/k8s/specs/notification-channel.json.tpl @@ -1,6 +1,7 @@ { "nrn": "{{ env.Getenv "NRN" }}", "status": "active", + "description": "Channel to handle Containers scopes", "type": "agent", "source": [ "telemetry", diff --git a/k8s/specs/service-spec.json.tpl b/k8s/specs/service-spec.json.tpl index ca47ae5d..f331df10 100644 --- a/k8s/specs/service-spec.json.tpl +++ b/k8s/specs/service-spec.json.tpl @@ -433,7 +433,10 @@ "default":10, "maximum":300, "minimum":1, - "description":"Seconds between health checks" + "description":"Seconds between health checks", + "exclusiveMinimum": { + "$data": "1/timeout_seconds" + } }, "timeout_seconds":{ "type":"integer", @@ -476,7 +479,7 @@ "cpu_millicores":{ "type":"integer", "title":"CPU Millicores", - "default":500, + "default":100, "maximum":4000, "minimum":100, "description":"Amount of CPU to allocate (in millicores, 1000m = 1 CPU core)" @@ -630,10 +633,10 @@ }, "name": "Containers", "selectors": { - "category": "any", + "category": "Scope", "imported": false, - "provider": "any", - "sub_category": "any" + "provider": "Agent", + "sub_category": "Containers" }, "type": "scope", "use_default_actions": false, diff --git a/k8s/utils/get_config_value b/k8s/utils/get_config_value new file mode 100755 index 00000000..6e4c2e7e --- /dev/null +++ b/k8s/utils/get_config_value @@ -0,0 +1,62 @@ +#!/bin/bash + +# Function to get configuration value with priority hierarchy +# Priority order (highest to lowest): providers > environment variables > default +# Usage: get_config_value [--provider "jq.path"] ... [--env ENV_VAR] ... [--default "value"] +# Returns the first non-empty value found according to priority order +# Note: The order of arguments does NOT affect priority - providers always win, then env vars (in order), then default +get_config_value() { + local default_value="" + local -a providers=() + local -a env_vars=() + + # First pass: collect all arguments + while [[ $# -gt 0 ]]; do + case "$1" in + --env) + env_vars+=("${2:-}") + shift 2 + ;; + --provider) + providers+=("${2:-}") + shift 2 + ;; + --default) + default_value="${2:-}" + shift 2 + ;; + *) + shift + ;; + esac + done + + # Priority 1: Check all providers in order + for jq_path in "${providers[@]}"; do + if [ -n "$jq_path" ]; then + local provider_value + provider_value=$(echo "$CONTEXT" | jq -r "$jq_path // empty") + if [ -n "$provider_value" ] && [ "$provider_value" != "null" ]; then + echo "$provider_value" + return 0 + fi + fi + done + + # Priority 2: Check environment variables in order + for env_var in "${env_vars[@]}"; do + if [ -n "$env_var" ] && [ -n "${!env_var:-}" ]; then + echo "${!env_var}" + return 0 + fi + done + + # Priority 3: Use default value + if [ -n "$default_value" ]; then + echo "$default_value" + return 0 + fi + + # No value found + echo "" +} \ No newline at end of file diff --git a/k8s/utils/tests/get_config_value.bats b/k8s/utils/tests/get_config_value.bats new file mode 100644 index 00000000..47e962a3 --- /dev/null +++ b/k8s/utils/tests/get_config_value.bats @@ -0,0 +1,310 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for get_config_value - configuration value priority hierarchy +# ============================================================================= + +setup() { + # Get project root directory (tests are in k8s/utils/tests, so go up 3 levels) + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + + # Source assertions + source "$PROJECT_ROOT/testing/assertions.sh" + + # Source the get_config_value file we're testing (it's one level up from test directory) + source "$BATS_TEST_DIRNAME/../get_config_value" + + # Setup test CONTEXT for provider tests + export CONTEXT='{ + "providers": { + "scope-configurations": { + "kubernetes": { + "namespace": "scope-config-namespace" + }, + "region": "us-west-2" + }, + "container-orchestration": { + "cluster": { + "namespace": "container-orch-namespace" + } + }, + "cloud-providers": { + "account": { + "region": "eu-west-1" + } + } + } + }' +} + +teardown() { + # Clean up any env vars set during tests + unset TEST_ENV_VAR + unset NAMESPACE_OVERRIDE +} + +# ============================================================================= +# Test: Provider has highest priority over env variable +# ============================================================================= +@test "get_config_value: provider has highest priority over env variable" { + export TEST_ENV_VAR="env-value" + + result=$(get_config_value \ + --env TEST_ENV_VAR \ + --provider '.providers["scope-configurations"].kubernetes.namespace' \ + --default "default-value") + + assert_equal "$result" "scope-config-namespace" +} + +# ============================================================================= +# Test: Provider value used when env var is not set +# ============================================================================= +@test "get_config_value: uses provider when env var not set" { + result=$(get_config_value \ + --env NON_EXISTENT_VAR \ + --provider '.providers["scope-configurations"].kubernetes.namespace' \ + --default "default-value") + + assert_equal "$result" "scope-config-namespace" +} + +# ============================================================================= +# Test: Multiple providers - first match wins +# ============================================================================= +@test "get_config_value: first provider match wins" { + result=$(get_config_value \ + --provider '.providers["scope-configurations"].kubernetes.namespace' \ + --provider '.providers["container-orchestration"].cluster.namespace' \ + --default "default-value") + + assert_equal "$result" "scope-config-namespace" +} + +# ============================================================================= +# Test: Falls through to second provider when first doesn't exist +# ============================================================================= +@test "get_config_value: falls through to second provider" { + result=$(get_config_value \ + --provider '.providers["non-existent"].value' \ + --provider '.providers["container-orchestration"].cluster.namespace' \ + --default "default-value") + + assert_equal "$result" "container-orch-namespace" +} + +# ============================================================================= +# Test: Default value used when nothing else matches +# ============================================================================= +@test "get_config_value: uses default when no matches" { + result=$(get_config_value \ + --env NON_EXISTENT_VAR \ + --provider '.providers["non-existent"].value' \ + --default "default-value") + + assert_equal "$result" "default-value" +} + +# ============================================================================= +# Test: Complete hierarchy - provider1 > provider2 > env > default +# ============================================================================= +@test "get_config_value: complete hierarchy provider1 > provider2 > env > default" { + # Test 1: First provider wins over everything + export NAMESPACE_OVERRIDE="override-namespace" + result=$(get_config_value \ + --env NAMESPACE_OVERRIDE \ + --provider '.providers["scope-configurations"].kubernetes.namespace' \ + --provider '.providers["container-orchestration"].cluster.namespace' \ + --default "default-namespace") + assert_equal "$result" "scope-config-namespace" + + # Test 2: Second provider wins when first doesn't exist + result=$(get_config_value \ + --env NAMESPACE_OVERRIDE \ + --provider '.providers["non-existent"].value' \ + --provider '.providers["container-orchestration"].cluster.namespace' \ + --default "default-namespace") + assert_equal "$result" "container-orch-namespace" + + # Test 3: Env var wins when no providers exist + result=$(get_config_value \ + --env NAMESPACE_OVERRIDE \ + --provider '.providers["non-existent1"].value' \ + --provider '.providers["non-existent2"].value' \ + --default "default-namespace") + assert_equal "$result" "override-namespace" + + # Test 4: Default wins when nothing else exists + unset NAMESPACE_OVERRIDE + result=$(get_config_value \ + --env NAMESPACE_OVERRIDE \ + --provider '.providers["non-existent1"].value' \ + --provider '.providers["non-existent2"].value' \ + --default "default-namespace") + assert_equal "$result" "default-namespace" +} + +# ============================================================================= +# Test: Returns empty string when no matches and no default +# ============================================================================= +@test "get_config_value: returns empty when no matches and no default" { + result=$(get_config_value \ + --env NON_EXISTENT_VAR \ + --provider '.providers["non-existent"].value') + + assert_empty "$result" +} + +# ============================================================================= +# Test: Handles null values from jq correctly +# ============================================================================= +@test "get_config_value: ignores null provider values" { + export CONTEXT='{"providers": {"test": {"value": null}}}' + + result=$(get_config_value \ + --provider '.providers["test"].value' \ + --default "default-value") + + assert_equal "$result" "default-value" +} + +# ============================================================================= +# Test: Handles empty string env vars correctly (should use them) +# ============================================================================= +@test "get_config_value: empty env var is not treated as unset" { + export TEST_ENV_VAR="" + + result=$(get_config_value \ + --env TEST_ENV_VAR \ + --provider '.providers["scope-configurations"].kubernetes.namespace' \ + --default "default-value") + + # Empty string from env should NOT be used, falls through to provider + assert_equal "$result" "scope-config-namespace" +} + +# ============================================================================= +# Test: Real-world scenario - region selection (only from cloud-providers) +# ============================================================================= +@test "get_config_value: real-world region selection from cloud-providers only" { + # Scenario: region should only come from cloud-providers, not scope-configuration + result=$(get_config_value \ + --provider '.providers["cloud-providers"].account.region' \ + --default "us-east-1") + + assert_equal "$result" "eu-west-1" +} + +# ============================================================================= +# Test: Real-world scenario - namespace with override (provider wins) +# ============================================================================= +@test "get_config_value: real-world namespace - provider wins over NAMESPACE_OVERRIDE" { + export NAMESPACE_OVERRIDE="prod-override" + + result=$(get_config_value \ + --env NAMESPACE_OVERRIDE \ + --provider '.providers["scope-configurations"].kubernetes.namespace' \ + --provider '.providers["container-orchestration"].cluster.namespace' \ + --default "default-ns") + + # Provider wins over env var + assert_equal "$result" "scope-config-namespace" +} + +# ============================================================================= +# Test: Argument order does NOT affect priority - providers always win +# ============================================================================= +@test "get_config_value: argument order does not affect priority - provider first" { + export TEST_ENV_VAR="env-value" + + # Test with provider before env + result=$(get_config_value \ + --provider '.providers["scope-configurations"].kubernetes.namespace' \ + --env TEST_ENV_VAR \ + --default "default-value") + + assert_equal "$result" "scope-config-namespace" +} + +@test "get_config_value: argument order does not affect priority - env first" { + export TEST_ENV_VAR="env-value" + + # Test with env before provider - provider should still win + result=$(get_config_value \ + --env TEST_ENV_VAR \ + --provider '.providers["scope-configurations"].kubernetes.namespace' \ + --default "default-value") + + assert_equal "$result" "scope-config-namespace" +} + +@test "get_config_value: argument order does not affect priority - default first" { + export TEST_ENV_VAR="env-value" + + # Test with default first - provider should still win + result=$(get_config_value \ + --default "default-value" \ + --provider '.providers["scope-configurations"].kubernetes.namespace' \ + --env TEST_ENV_VAR) + + assert_equal "$result" "scope-config-namespace" +} + +@test "get_config_value: argument order does not affect priority - mixed order" { + export TEST_ENV_VAR="env-value" + + # Test with mixed order + result=$(get_config_value \ + --default "default-value" \ + --env TEST_ENV_VAR \ + --provider '.providers["scope-configurations"].kubernetes.namespace') + + assert_equal "$result" "scope-config-namespace" +} + +# ============================================================================= +# Test: Env var wins when no providers exist, regardless of argument order +# ============================================================================= +@test "get_config_value: env var wins when no providers - default first" { + export TEST_ENV_VAR="env-value" + + result=$(get_config_value \ + --default "default-value" \ + --env TEST_ENV_VAR \ + --provider '.providers["non-existent"].value') + + assert_equal "$result" "env-value" +} + +@test "get_config_value: env var wins when no providers - env last" { + export TEST_ENV_VAR="env-value" + + result=$(get_config_value \ + --provider '.providers["non-existent"].value' \ + --default "default-value" \ + --env TEST_ENV_VAR) + + assert_equal "$result" "env-value" +} + +# ============================================================================= +# Test: Multiple providers priority order is preserved +# ============================================================================= +@test "get_config_value: multiple providers - order matters among providers" { + # First provider in list should win + result=$(get_config_value \ + --provider '.providers["scope-configurations"].kubernetes.namespace' \ + --provider '.providers["container-orchestration"].cluster.namespace' \ + --default "default-value") + + assert_equal "$result" "scope-config-namespace" +} + +@test "get_config_value: multiple providers - reversed order" { + # First provider in list should still win (container-orchestration comes first) + result=$(get_config_value \ + --provider '.providers["container-orchestration"].cluster.namespace' \ + --provider '.providers["scope-configurations"].kubernetes.namespace' \ + --default "default-value") + + assert_equal "$result" "container-orch-namespace" +} diff --git a/k8s/values.yaml b/k8s/values.yaml index 56edaa68..020b6059 100644 --- a/k8s/values.yaml +++ b/k8s/values.yaml @@ -1,6 +1,7 @@ provider_categories: - container-orchestration - cloud-providers + - scope-configurations configuration: K8S_NAMESPACE: nullplatform CREATE_K8S_NAMESPACE_IF_NOT_EXIST: true @@ -9,6 +10,10 @@ configuration: USE_ACCOUNT_SLUG: false DNS_TYPE: route53 # Available values route53 | azure | external_dns ALB_RECONCILIATION_ENABLED: false + ALB_MAX_CAPACITY: 75 + ALB_MAX_TARGET_GROUPS: 98 + ALB_METRICS_PUBLISH_ENABLED: false +# ALB_METRICS_PUBLISH_TARGET: cloudwatch # Available values: cloudwatch | datadog DEPLOYMENT_MAX_WAIT_IN_SECONDS: 600 DEPLOYMENT_TEMPLATE: "$SERVICE_PATH/deployment/templates/deployment.yaml.tpl" SECRET_TEMPLATE: "$SERVICE_PATH/deployment/templates/secret.yaml.tpl" @@ -18,6 +23,9 @@ configuration: INITIAL_INGRESS_PATH: "$SERVICE_PATH/deployment/templates/initial-ingress.yaml.tpl" BLUE_GREEN_INGRESS_PATH: "$SERVICE_PATH/deployment/templates/blue-green-ingress.yaml.tpl" SERVICE_ACCOUNT_TEMPLATE: "$SERVICE_PATH/scope/templates/service-account.yaml.tpl" + CONTAINER_MEMORY_IN_MEMORY: 64 + CONTAINER_CPU_IN_MILLICORES: 93 + LOG_LEVEL: info # TRAFFIC_CONTAINER_IMAGE: "public.ecr.aws/nullplatform/k8s-traffic-manager:latest" # TRAFFIC_MANAGER_CONFIG_MAP: traffic-manager-configuration IMAGE_PULL_SECRETS: diff --git a/makefile b/makefile new file mode 100644 index 00000000..e091370b --- /dev/null +++ b/makefile @@ -0,0 +1,54 @@ +.PHONY: test test-all test-unit test-tofu test-integration help + +# Default test target - shows available options +test: + @echo "Usage: make test-" + @echo "" + @echo "Available test levels:" + @echo " make test-all Run all tests" + @echo " make test-unit Run BATS unit tests" + @echo " make test-tofu Run OpenTofu tests" + @echo " make test-integration Run integration tests" + @echo "" + @echo "You can also run tests for a specific module:" + @echo " make test-unit MODULE=frontend" + +# Run all tests +test-all: test-unit test-tofu test-integration + +# Run BATS unit tests +test-unit: +ifdef MODULE + @./testing/run_bats_tests.sh $(MODULE) +else + @./testing/run_bats_tests.sh +endif + +# Run OpenTofu tests +test-tofu: +ifdef MODULE + @./testing/run_tofu_tests.sh $(MODULE) +else + @./testing/run_tofu_tests.sh +endif + +# Run integration tests +test-integration: +ifdef MODULE + @./testing/run_integration_tests.sh $(MODULE) $(if $(VERBOSE),-v) +else + @./testing/run_integration_tests.sh $(if $(VERBOSE),-v) +endif + +# Help +help: + @echo "Test targets:" + @echo " test Show available test options" + @echo " test-all Run all tests" + @echo " test-unit Run BATS unit tests" + @echo " test-tofu Run OpenTofu tests" + @echo " test-integration Run integration tests" + @echo "" + @echo "Options:" + @echo " MODULE= Run tests for specific module (e.g., MODULE=frontend)" + @echo " VERBOSE=1 Show output of passing tests (integration tests only)" diff --git a/scheduled_task/specs/notification-channel.json.tpl b/scheduled_task/specs/notification-channel.json.tpl index f1db58e5..080fdef7 100644 --- a/scheduled_task/specs/notification-channel.json.tpl +++ b/scheduled_task/specs/notification-channel.json.tpl @@ -1,6 +1,7 @@ { "nrn": "{{ env.Getenv "NRN" }}", "status": "active", + "description": "Channel to handle Scheduled tasks scopes", "type": "agent", "source": [ "telemetry", diff --git a/scheduled_task/specs/service-spec.json.tpl b/scheduled_task/specs/service-spec.json.tpl index f6ce2009..4ffc81ac 100644 --- a/scheduled_task/specs/service-spec.json.tpl +++ b/scheduled_task/specs/service-spec.json.tpl @@ -87,7 +87,7 @@ "type": "number" } ], - "default": 500, + "default": 100, "description": "Amount of CPU to allocate (in millicores, 1000m = 1 CPU core)", "title": "CPU Millicores", "type": "integer" @@ -121,6 +121,9 @@ { "const": "0 0 * * *", "title": "Every day (midnight)" + }, + { + "type": "string" } ], "description": "Specify how often the task should run. You can select a predefined option or enter a standard cron expression for custom schedules.", @@ -285,10 +288,10 @@ "dimensions": {}, "name": "Scheduled task", "selectors": { - "category": "any", + "category": "Scope", "imported": false, - "provider": "any", - "sub_category": "any" + "provider": "Agent", + "sub_category": "Scheduled task" }, "type": "scope", "use_default_actions": false, diff --git a/testing/assertions.sh b/testing/assertions.sh new file mode 100644 index 00000000..d0abc399 --- /dev/null +++ b/testing/assertions.sh @@ -0,0 +1,325 @@ +#!/usr/bin/env bash +# ============================================================================= +# Shared assertion functions for BATS tests +# +# Usage: Add this line at the top of your .bats file's setup() function: +# source "$PROJECT_ROOT/testing/assertions.sh" +# ============================================================================= + +# ============================================================================= +# Assertion functions +# ============================================================================= +assert_equal() { + local actual="$1" + local expected="$2" + if [ "$actual" != "$expected" ]; then + echo "Expected: '$expected'" + echo "Actual: '$actual'" + return 1 + fi +} + +assert_contains() { + local haystack="$1" + local needle="$2" + if [[ "$haystack" != *"$needle"* ]]; then + echo "Expected string to contain: '$needle'" + echo "Actual: '$haystack'" + return 1 + fi +} + +assert_not_empty() { + local value="$1" + local name="${2:-value}" + if [ -z "$value" ]; then + echo "Expected $name to be non-empty, but it was empty" + return 1 + fi +} + +assert_empty() { + local value="$1" + local name="${2:-value}" + if [ -n "$value" ]; then + echo "Expected $name to be empty" + echo "Actual: '$value'" + return 1 + fi +} + +assert_true() { + local value="$1" + local name="${2:-value}" + if [[ "$value" != "true" ]]; then + echo "Expected $name to be true" + echo "Actual: '$value'" + return 1 + fi +} + +assert_false() { + local value="$1" + local name="${2:-value}" + if [[ "$value" != "false" ]]; then + echo "Expected $name to be false" + echo "Actual: '$value'" + return 1 + fi +} + +assert_greater_than() { + local actual="$1" + local expected="$2" + local name="${3:-value}" + if [[ ! "$actual" -gt "$expected" ]]; then + echo "Expected $name to be greater than $expected" + echo "Actual: '$actual'" + return 1 + fi +} + +assert_less_than() { + local actual="$1" + local expected="$2" + local name="${3:-value}" + if [[ ! "$actual" -lt "$expected" ]]; then + echo "Expected $name to be less than $expected" + echo "Actual: '$actual'" + return 1 + fi +} + +# Assert that commands appear in a specific order in a log file +# Usage: assert_command_order "" "command1" "command2" ["command3" ...] +# Example: assert_command_order "$LOG_FILE" "init" "apply" +assert_command_order() { + local log_file="$1" + shift + local commands=("$@") + + if [[ ${#commands[@]} -lt 2 ]]; then + echo "assert_command_order requires at least 2 commands" + return 1 + fi + + if [[ ! -f "$log_file" ]]; then + echo "Log file not found: $log_file" + return 1 + fi + + local prev_line=0 + local prev_cmd="" + + for cmd in "${commands[@]}"; do + local line_num + line_num=$(grep -n "$cmd" "$log_file" | head -1 | cut -d: -f1) + + if [[ -z "$line_num" ]]; then + echo "Command '$cmd' not found in log file" + return 1 + fi + + if [[ $prev_line -gt 0 ]] && [[ $line_num -le $prev_line ]]; then + echo "Expected: '$cmd'" + echo "To be executed after: '$prev_cmd'" + + echo "Actual execution order:" + echo " '$prev_cmd' at line $prev_line" + echo " '$cmd' at line $line_num" + return 1 + fi + + prev_line=$line_num + prev_cmd=$cmd + done +} + +assert_directory_exists() { + local dir="$1" + if [ ! -d "$dir" ]; then + echo "Expected directory to exist: '$dir'" + return 1 + fi +} + +assert_file_exists() { + local file="$1" + if [ ! -f "$file" ]; then + echo "Expected file to exist: '$file'" + return 1 + fi +} + +assert_file_not_exists() { + local file="$1" + if [ -f "$file" ]; then + echo "Expected file to not exist: '$file'" + return 1 + fi +} + +assert_json_equal() { + local actual="$1" + local expected="$2" + local name="${3:-JSON}" + + local actual_sorted=$(echo "$actual" | jq -S .) + local expected_sorted=$(echo "$expected" | jq -S .) + + if [ "$actual_sorted" != "$expected_sorted" ]; then + echo "$name does not match expected structure" + echo "" + echo "Diff:" + diff <(echo "$expected_sorted") <(echo "$actual_sorted") || true + echo "" + echo "Expected:" + echo "$expected_sorted" + echo "" + echo "Actual:" + echo "$actual_sorted" + echo "" + return 1 + fi +} + +# ============================================================================= +# Mock helpers +# ============================================================================= + +# Set up a mock response for the np CLI +# Usage: set_np_mock "" [exit_code] +set_np_mock() { + local mock_file="$1" + local exit_code="${2:-0}" + export NP_MOCK_RESPONSE="$mock_file" + export NP_MOCK_EXIT_CODE="$exit_code" +} + + +# Set up a mock response for the aws CLI +# Usage: set_aws_mock "" [exit_code] +# Requires: AWS_MOCKS_DIR to be set in the test setup +set_aws_mock() { + local mock_file="$1" + local exit_code="${2:-0}" + export AWS_MOCK_RESPONSE="$mock_file" + export AWS_MOCK_EXIT_CODE="$exit_code" +} + +# Set up a mock response for the az CLI +# Usage: set_az_mock "" [exit_code] +# Requires: AZURE_MOCKS_DIR to be set in the test setup +set_az_mock() { + local mock_file="$1" + local exit_code="${2:-0}" + export AZ_MOCK_RESPONSE="$mock_file" + export AZ_MOCK_EXIT_CODE="$exit_code" +} + +# ============================================================================= +# Help / Documentation +# ============================================================================= + +# Display help for all available unit test assertion utilities +test_help() { + cat <<'EOF' +================================================================================ + Unit Test Assertions Reference +================================================================================ + +VALUE ASSERTIONS +---------------- + assert_equal "" "" + Assert two string values are equal. + Example: assert_equal "$result" "expected_value" + + assert_contains "" "" + Assert a string contains a substring. + Example: assert_contains "$output" "success" + + assert_not_empty "" [""] + Assert a value is not empty. + Example: assert_not_empty "$result" "API response" + + assert_empty "" [""] + Assert a value is empty. + Example: assert_empty "$error" "error message" + + assert_true "" [""] + Assert a value equals the string "true". + Example: assert_true "$enabled" "distribution enabled" + + assert_false "" [""] + Assert a value equals the string "false". + Example: assert_false "$disabled" "feature disabled" + +NUMERIC ASSERTIONS +------------------ + assert_greater_than "" "" [""] + Assert a number is greater than another. + Example: assert_greater_than "$count" "0" "item count" + + assert_less_than "" "" [""] + Assert a number is less than another. + Example: assert_less_than "$errors" "10" "error count" + +COMMAND ORDER ASSERTIONS +------------------------ + assert_command_order "" "cmd1" "cmd2" ["cmd3" ...] + Assert commands appear in order in a log file. + Example: assert_command_order "$LOG" "init" "apply" "output" + +FILE SYSTEM ASSERTIONS +---------------------- + assert_file_exists "" + Assert a file exists. + Example: assert_file_exists "/tmp/output.json" + + assert_file_not_exists "" + Assert a file does not exist. + Example: assert_file_not_exists "/tmp/should_not_exist.json" + + assert_directory_exists "" + Assert a directory exists. + Example: assert_directory_exists "/tmp/output" + +JSON ASSERTIONS +--------------- + assert_json_equal "" "" [""] + Assert two JSON structures are equal (order-independent). + Example: assert_json_equal "$response" '{"status": "ok"}' + +MOCK HELPERS +------------ + set_np_mock "" [exit_code] + Set up a mock response for the np CLI. + Example: set_np_mock "$MOCKS_DIR/provider/success.json" + + set_aws_mock "" [exit_code] + Set up a mock response for the aws CLI. + Example: set_aws_mock "$MOCKS_DIR/route53/success.json" + +BATS BUILT-IN HELPERS +--------------------- + run + Run a command and capture output in $output and exit code in $status. + Example: run my_function "arg1" "arg2" + + [ "$status" -eq 0 ] + Check exit code after 'run'. + + [[ "$output" == *"expected"* ]] + Check output contains expected string. + +USAGE IN TESTS +-------------- + Add this to your test file's setup() function: + + setup() { + source "$PROJECT_ROOT/testing/assertions.sh" + } + +================================================================================ +EOF +} diff --git a/testing/azure-mock-provider/backend_override.tf b/testing/azure-mock-provider/backend_override.tf new file mode 100644 index 00000000..8a04e28e --- /dev/null +++ b/testing/azure-mock-provider/backend_override.tf @@ -0,0 +1,9 @@ +# Backend override for Azure Mock testing +# This configures the azurerm backend to use the mock blob storage + +terraform { + backend "azurerm" { + # These values are overridden at runtime via -backend-config flags + # but we need a backend block for terraform to accept them + } +} diff --git a/testing/azure-mock-provider/provider_override.tf b/testing/azure-mock-provider/provider_override.tf new file mode 100644 index 00000000..6b1a4406 --- /dev/null +++ b/testing/azure-mock-provider/provider_override.tf @@ -0,0 +1,32 @@ +# Override file for Azure Mock testing +# This file is copied into the module directory during integration tests +# to configure the Azure provider to use mock endpoints +# +# This is analogous to the LocalStack provider override for AWS tests. +# +# Azure Mock (port 8080): ARM APIs (CDN, DNS, Storage) + Blob Storage API + +provider "azurerm" { + features {} + + # Test subscription ID (mock doesn't validate this) + subscription_id = "mock-subscription-id" + + # Skip provider registration (not needed for mock) + skip_provider_registration = true + + # Use client credentials with mock values + # The mock server accepts any credentials + client_id = "mock-client-id" + client_secret = "mock-client-secret" + tenant_id = "mock-tenant-id" + + # Disable all authentication methods except client credentials + use_msi = false + use_cli = false + use_oidc = false + + default_tags { + tags = var.resource_tags + } +} diff --git a/testing/docker/Dockerfile.test-runner b/testing/docker/Dockerfile.test-runner new file mode 100644 index 00000000..4323fbdb --- /dev/null +++ b/testing/docker/Dockerfile.test-runner @@ -0,0 +1,47 @@ +# ============================================================================= +# Integration Test Runner Container +# +# Contains all tools needed to run integration tests: +# - bats-core (test framework) +# - aws-cli (for LocalStack/Moto assertions) +# - azure-cli (for Azure API calls) +# - jq (JSON processing) +# - curl (HTTP requests) +# - np CLI (nullplatform CLI) +# - opentofu (infrastructure as code) +# ============================================================================= + +FROM alpine:3.19 + +# Install base dependencies +RUN apk add --no-cache \ + bash \ + curl \ + jq \ + git \ + openssh \ + docker-cli \ + aws-cli \ + ca-certificates \ + ncurses \ + python3 \ + py3-pip + +# Install bats-core +RUN apk add --no-cache bats + +# Install OpenTofu +RUN apk add --no-cache --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community opentofu + +# Install Azure CLI +RUN pip3 install --break-system-packages azure-cli + +# Install nullplatform CLI and add to PATH +RUN curl -fsSL https://cli.nullplatform.com/install.sh | sh +ENV PATH="/root/.local/bin:${PATH}" + +# Create workspace directory +WORKDIR /workspace + +# Default command - run bats tests +ENTRYPOINT ["/bin/bash"] diff --git a/testing/docker/azure-mock/Dockerfile b/testing/docker/azure-mock/Dockerfile new file mode 100644 index 00000000..0e3d902e --- /dev/null +++ b/testing/docker/azure-mock/Dockerfile @@ -0,0 +1,44 @@ +# Azure Mock API Server +# +# Lightweight mock server that implements Azure REST API endpoints +# for integration testing without requiring real Azure resources. +# +# Build: +# docker build -t azure-mock . +# +# Run: +# docker run -p 8080:8080 azure-mock + +FROM golang:1.21-alpine AS builder + +WORKDIR /app + +# Copy go mod files +COPY go.mod ./ + +# Copy source code +COPY main.go ./ + +# Build the binary +RUN CGO_ENABLED=0 GOOS=linux go build -o azure-mock . + +# Final stage - minimal image +FROM alpine:3.19 + +# Add ca-certificates for HTTPS (if needed) and curl for healthcheck +RUN apk --no-cache add ca-certificates curl + +WORKDIR /app + +# Copy binary from builder +COPY --from=builder /app/azure-mock . + +# Expose port +EXPOSE 8080 + +# Health check +HEALTHCHECK --interval=5s --timeout=3s --retries=10 \ + CMD curl -f http://localhost:8080/health || exit 1 + +# Run the server +CMD ["./azure-mock"] diff --git a/testing/docker/azure-mock/go.mod b/testing/docker/azure-mock/go.mod new file mode 100644 index 00000000..a2f2e22e --- /dev/null +++ b/testing/docker/azure-mock/go.mod @@ -0,0 +1,3 @@ +module azure-mock + +go 1.21 diff --git a/testing/docker/azure-mock/main.go b/testing/docker/azure-mock/main.go new file mode 100644 index 00000000..57c81baf --- /dev/null +++ b/testing/docker/azure-mock/main.go @@ -0,0 +1,3669 @@ +// Azure Mock API Server +// +// A lightweight mock server that implements Azure REST API endpoints +// for integration testing. Supports: +// - Azure CDN (profiles and endpoints) +// - Azure DNS (zones and CNAME records) +// - Azure Storage Accounts (read-only for data source) +// +// Usage: +// +// docker run -p 8080:8080 azure-mock +// +// Configure Terraform azurerm provider to use this endpoint. +package main + +import ( + "encoding/base64" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "regexp" + "strings" + "sync" + "time" +) + +// ============================================================================= +// In-Memory Store +// ============================================================================= + +type Store struct { + mu sync.RWMutex + cdnProfiles map[string]CDNProfile + cdnEndpoints map[string]CDNEndpoint + cdnCustomDomains map[string]CDNCustomDomain + dnsZones map[string]DNSZone + dnsCNAMERecords map[string]DNSCNAMERecord + storageAccounts map[string]StorageAccount + blobContainers map[string]BlobContainer // key: accountName/containerName + blobs map[string]Blob // key: accountName/containerName/blobName + blobBlocks map[string][]byte // key: blobKey/blockId - staged blocks for block blob uploads + // App Service resources + appServicePlans map[string]AppServicePlan + linuxWebApps map[string]LinuxWebApp + webAppSlots map[string]WebAppSlot + logAnalyticsWorkspaces map[string]LogAnalyticsWorkspace + appInsights map[string]ApplicationInsights + autoscaleSettings map[string]AutoscaleSetting + actionGroups map[string]ActionGroup + metricAlerts map[string]MetricAlert + diagnosticSettings map[string]DiagnosticSetting + trafficRouting map[string][]TrafficRoutingRule +} + +// TrafficRoutingRule represents a traffic routing rule for a slot +type TrafficRoutingRule struct { + ActionHostName string `json:"actionHostName"` + ReroutePercentage int `json:"reroutePercentage"` + Name string `json:"name"` +} + +func NewStore() *Store { + return &Store{ + cdnProfiles: make(map[string]CDNProfile), + cdnEndpoints: make(map[string]CDNEndpoint), + cdnCustomDomains: make(map[string]CDNCustomDomain), + dnsZones: make(map[string]DNSZone), + dnsCNAMERecords: make(map[string]DNSCNAMERecord), + storageAccounts: make(map[string]StorageAccount), + blobContainers: make(map[string]BlobContainer), + blobs: make(map[string]Blob), + blobBlocks: make(map[string][]byte), + appServicePlans: make(map[string]AppServicePlan), + linuxWebApps: make(map[string]LinuxWebApp), + webAppSlots: make(map[string]WebAppSlot), + logAnalyticsWorkspaces: make(map[string]LogAnalyticsWorkspace), + appInsights: make(map[string]ApplicationInsights), + autoscaleSettings: make(map[string]AutoscaleSetting), + actionGroups: make(map[string]ActionGroup), + metricAlerts: make(map[string]MetricAlert), + diagnosticSettings: make(map[string]DiagnosticSetting), + trafficRouting: make(map[string][]TrafficRoutingRule), + } +} + +// ============================================================================= +// Azure Resource Models +// ============================================================================= + +// CDN Profile +type CDNProfile struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Location string `json:"location"` + Tags map[string]string `json:"tags,omitempty"` + Sku CDNSku `json:"sku"` + Properties CDNProfileProps `json:"properties"` +} + +type CDNSku struct { + Name string `json:"name"` +} + +type CDNProfileProps struct { + ResourceState string `json:"resourceState"` + ProvisioningState string `json:"provisioningState"` +} + +// CDN Endpoint +type CDNEndpoint struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Location string `json:"location"` + Tags map[string]string `json:"tags,omitempty"` + Properties CDNEndpointProps `json:"properties"` +} + +// CDN Custom Domain +type CDNCustomDomain struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Properties CDNCustomDomainProps `json:"properties"` +} + +type CDNCustomDomainProps struct { + HostName string `json:"hostName"` + ResourceState string `json:"resourceState"` + ProvisioningState string `json:"provisioningState"` + ValidationData string `json:"validationData,omitempty"` +} + +type CDNEndpointProps struct { + HostName string `json:"hostName"` + OriginHostHeader string `json:"originHostHeader,omitempty"` + Origins []CDNOrigin `json:"origins"` + OriginPath string `json:"originPath,omitempty"` + IsHttpAllowed bool `json:"isHttpAllowed"` + IsHttpsAllowed bool `json:"isHttpsAllowed"` + IsCompressionEnabled bool `json:"isCompressionEnabled"` + ResourceState string `json:"resourceState"` + ProvisioningState string `json:"provisioningState"` + DeliveryPolicy *CDNDeliveryPolicy `json:"deliveryPolicy,omitempty"` +} + +type CDNOrigin struct { + Name string `json:"name"` + Properties CDNOriginProps `json:"properties"` +} + +type CDNOriginProps struct { + HostName string `json:"hostName"` + HttpPort int `json:"httpPort,omitempty"` + HttpsPort int `json:"httpsPort,omitempty"` +} + +type CDNDeliveryPolicy struct { + Rules []CDNDeliveryRule `json:"rules,omitempty"` +} + +type CDNDeliveryRule struct { + Name string `json:"name"` + Order int `json:"order"` + Actions []interface{} `json:"actions,omitempty"` +} + +// DNS Zone +type DNSZone struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Location string `json:"location"` + Tags map[string]string `json:"tags,omitempty"` + Properties DNSZoneProps `json:"properties"` +} + +type DNSZoneProps struct { + MaxNumberOfRecordSets int `json:"maxNumberOfRecordSets"` + NumberOfRecordSets int `json:"numberOfRecordSets"` + NameServers []string `json:"nameServers"` +} + +// DNS CNAME Record +type DNSCNAMERecord struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Etag string `json:"etag,omitempty"` + Properties DNSCNAMERecordProps `json:"properties"` +} + +type DNSCNAMERecordProps struct { + TTL int `json:"TTL"` + Fqdn string `json:"fqdn,omitempty"` + CNAMERecord *DNSCNAMEValue `json:"CNAMERecord,omitempty"` +} + +type DNSCNAMEValue struct { + Cname string `json:"cname"` +} + +// Storage Account +type StorageAccount struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Location string `json:"location"` + Tags map[string]string `json:"tags,omitempty"` + Kind string `json:"kind"` + Sku StorageSku `json:"sku"` + Properties StorageAccountProps `json:"properties"` +} + +type StorageSku struct { + Name string `json:"name"` + Tier string `json:"tier"` +} + +type StorageAccountProps struct { + PrimaryEndpoints StorageEndpoints `json:"primaryEndpoints"` + ProvisioningState string `json:"provisioningState"` +} + +type StorageEndpoints struct { + Blob string `json:"blob"` + Web string `json:"web"` +} + +// Blob Storage Container +type BlobContainer struct { + Name string `json:"name"` + Properties BlobContainerProps `json:"properties"` +} + +type BlobContainerProps struct { + LastModified string `json:"lastModified"` + Etag string `json:"etag"` +} + +// Blob +type Blob struct { + Name string `json:"name"` + Content []byte `json:"-"` + Properties BlobProps `json:"properties"` + Metadata map[string]string `json:"-"` // x-ms-meta-* headers +} + +type BlobProps struct { + LastModified string `json:"lastModified"` + Etag string `json:"etag"` + ContentLength int `json:"contentLength"` + ContentType string `json:"contentType"` +} + +// ============================================================================= +// App Service Models +// ============================================================================= + +// App Service Plan (serverfarms) +type AppServicePlan struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Location string `json:"location"` + Tags map[string]string `json:"tags,omitempty"` + Kind string `json:"kind,omitempty"` + Sku AppServiceSku `json:"sku"` + Properties AppServicePlanProps `json:"properties"` +} + +type AppServiceSku struct { + Name string `json:"name"` + Tier string `json:"tier"` + Size string `json:"size"` + Family string `json:"family"` + Capacity int `json:"capacity"` +} + +type AppServicePlanProps struct { + ProvisioningState string `json:"provisioningState"` + Status string `json:"status"` + MaximumNumberOfWorkers int `json:"maximumNumberOfWorkers"` + NumberOfSites int `json:"numberOfSites"` + PerSiteScaling bool `json:"perSiteScaling"` + ZoneRedundant bool `json:"zoneRedundant"` + Reserved bool `json:"reserved"` // true for Linux +} + +// Linux Web App (sites) +type LinuxWebApp struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Location string `json:"location"` + Tags map[string]string `json:"tags,omitempty"` + Kind string `json:"kind,omitempty"` + Identity *AppIdentity `json:"identity,omitempty"` + Properties LinuxWebAppProps `json:"properties"` +} + +type AppIdentity struct { + Type string `json:"type"` + PrincipalID string `json:"principalId,omitempty"` + TenantID string `json:"tenantId,omitempty"` + UserIDs map[string]string `json:"userAssignedIdentities,omitempty"` +} + +type LinuxWebAppProps struct { + ProvisioningState string `json:"provisioningState"` + State string `json:"state"` + DefaultHostName string `json:"defaultHostName"` + ServerFarmID string `json:"serverFarmId"` + HTTPSOnly bool `json:"httpsOnly"` + ClientAffinityEnabled bool `json:"clientAffinityEnabled"` + OutboundIPAddresses string `json:"outboundIpAddresses"` + PossibleOutboundIPAddresses string `json:"possibleOutboundIpAddresses"` + CustomDomainVerificationID string `json:"customDomainVerificationId"` + SiteConfig *WebAppSiteConfig `json:"siteConfig,omitempty"` +} + +type WebAppSiteConfig struct { + AlwaysOn bool `json:"alwaysOn"` + HTTP20Enabled bool `json:"http20Enabled"` + WebSocketsEnabled bool `json:"webSocketsEnabled"` + FtpsState string `json:"ftpsState"` + MinTLSVersion string `json:"minTlsVersion"` + LinuxFxVersion string `json:"linuxFxVersion"` + AppCommandLine string `json:"appCommandLine,omitempty"` + HealthCheckPath string `json:"healthCheckPath,omitempty"` + VnetRouteAllEnabled bool `json:"vnetRouteAllEnabled"` + AutoHealEnabled bool `json:"autoHealEnabled"` + Experiments *WebAppExperiments `json:"experiments,omitempty"` +} + +// WebAppExperiments contains traffic routing configuration +type WebAppExperiments struct { + RampUpRules []RampUpRule `json:"rampUpRules,omitempty"` +} + +// RampUpRule defines traffic routing to a deployment slot +type RampUpRule struct { + ActionHostName string `json:"actionHostName"` + ReroutePercentage float64 `json:"reroutePercentage"` + Name string `json:"name"` +} + +// Web App Slot +type WebAppSlot struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Location string `json:"location"` + Tags map[string]string `json:"tags,omitempty"` + Kind string `json:"kind,omitempty"` + Properties LinuxWebAppProps `json:"properties"` +} + +// Log Analytics Workspace +type LogAnalyticsWorkspace struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Location string `json:"location"` + Tags map[string]string `json:"tags,omitempty"` + Properties LogAnalyticsWorkspaceProps `json:"properties"` +} + +type LogAnalyticsWorkspaceProps struct { + ProvisioningState string `json:"provisioningState"` + CustomerID string `json:"customerId"` + Sku struct { + Name string `json:"name"` + } `json:"sku"` + RetentionInDays int `json:"retentionInDays"` +} + +// Application Insights +type ApplicationInsights struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Location string `json:"location"` + Tags map[string]string `json:"tags,omitempty"` + Kind string `json:"kind"` + Properties ApplicationInsightsProps `json:"properties"` +} + +type ApplicationInsightsProps struct { + ProvisioningState string `json:"provisioningState"` + ApplicationID string `json:"AppId"` + InstrumentationKey string `json:"InstrumentationKey"` + ConnectionString string `json:"ConnectionString"` + WorkspaceResourceID string `json:"WorkspaceResourceId,omitempty"` +} + +// Monitor Autoscale Settings +type AutoscaleSetting struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Location string `json:"location"` + Tags map[string]string `json:"tags,omitempty"` + Properties AutoscaleSettingProps `json:"properties"` +} + +type AutoscaleSettingProps struct { + ProvisioningState string `json:"provisioningState,omitempty"` + Enabled bool `json:"enabled"` + TargetResourceURI string `json:"targetResourceUri"` + TargetResourceLocation string `json:"targetResourceLocation,omitempty"` + Profiles []interface{} `json:"profiles"` + Notifications []interface{} `json:"notifications,omitempty"` +} + +// Monitor Action Group +type ActionGroup struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Location string `json:"location"` + Tags map[string]string `json:"tags,omitempty"` + Properties ActionGroupProps `json:"properties"` +} + +type ActionGroupProps struct { + GroupShortName string `json:"groupShortName"` + Enabled bool `json:"enabled"` + EmailReceivers []interface{} `json:"emailReceivers,omitempty"` + WebhookReceivers []interface{} `json:"webhookReceivers,omitempty"` +} + +// Monitor Metric Alert +type MetricAlert struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Location string `json:"location"` + Tags map[string]string `json:"tags,omitempty"` + Properties MetricAlertProps `json:"properties"` +} + +type MetricAlertProps struct { + Description string `json:"description,omitempty"` + Severity int `json:"severity"` + Enabled bool `json:"enabled"` + Scopes []string `json:"scopes"` + EvaluationFrequency string `json:"evaluationFrequency"` + WindowSize string `json:"windowSize"` + Criteria interface{} `json:"criteria"` + Actions []interface{} `json:"actions,omitempty"` +} + +// Diagnostic Settings (nested resource) +type DiagnosticSetting struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Properties DiagnosticSettingProps `json:"properties"` +} + +type DiagnosticSettingProps struct { + WorkspaceID string `json:"workspaceId,omitempty"` + Logs []interface{} `json:"logs,omitempty"` + Metrics []interface{} `json:"metrics,omitempty"` +} + +// Azure Error Response +type AzureError struct { + Error AzureErrorDetail `json:"error"` +} + +type AzureErrorDetail struct { + Code string `json:"code"` + Message string `json:"message"` +} + +// ============================================================================= +// Server +// ============================================================================= + +type Server struct { + store *Store +} + +func NewServer() *Server { + return &Server{ + store: NewStore(), + } +} + +func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + method := r.Method + host := r.Host + + log.Printf("%s %s (Host: %s)", method, path, host) + + // Health check + if path == "/health" || path == "/" { + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]string{"status": "ok"}) + return + } + + // Check if this is a Blob Storage request (based on Host header) + if strings.Contains(host, ".blob.core.windows.net") { + s.handleBlobStorage(w, r) + return + } + + w.Header().Set("Content-Type", "application/json") + + // OpenID Connect discovery endpoints (required by MSAL/Azure CLI) + if strings.Contains(path, "/.well-known/openid-configuration") { + s.handleOpenIDConfiguration(w, r) + return + } + + // MSAL instance discovery endpoint + if strings.Contains(path, "/common/discovery/instance") || strings.Contains(path, "/discovery/instance") { + s.handleInstanceDiscovery(w, r) + return + } + + // OAuth token endpoint (Azure AD authentication) + if strings.Contains(path, "/oauth2/token") || strings.Contains(path, "/oauth2/v2.0/token") { + s.handleOAuth(w, r) + return + } + + // Subscription endpoint + if matchSubscription(path) { + s.handleSubscription(w, r) + return + } + + // List all providers endpoint (for provider cache) + if matchListProviders(path) { + s.handleListProviders(w, r) + return + } + + // Provider registration endpoint + if matchProviderRegistration(path) { + s.handleProviderRegistration(w, r) + return + } + + // Route to appropriate handler + // Note: More specific routes must come first (operationresults before enableCustomHttps before customDomain, customDomain before endpoint) + switch { + case matchCDNOperationResults(path): + s.handleCDNOperationResults(w, r) + case matchCDNCustomDomainEnableHttps(path): + s.handleCDNCustomDomainHttps(w, r, true) + case matchCDNCustomDomainDisableHttps(path): + s.handleCDNCustomDomainHttps(w, r, false) + case matchCDNCustomDomain(path): + s.handleCDNCustomDomain(w, r) + case matchCDNProfile(path): + s.handleCDNProfile(w, r) + case matchCDNEndpoint(path): + s.handleCDNEndpoint(w, r) + case matchDNSZone(path): + s.handleDNSZone(w, r) + case matchDNSCNAMERecord(path): + s.handleDNSCNAMERecord(w, r) + case matchStorageAccountKeys(path): + s.handleStorageAccountKeys(w, r) + case matchStorageAccount(path): + s.handleStorageAccount(w, r) + // App Service handlers (more specific routes first) + case matchWebAppCheckName(path): + s.handleWebAppCheckName(w, r) + case matchWebAppAuthSettings(path): + s.handleWebAppAuthSettings(w, r) + case matchWebAppAuthSettingsV2(path): + s.handleWebAppAuthSettingsV2(w, r) + case matchWebAppConfigLogs(path): + s.handleWebAppConfigLogs(w, r) + case matchWebAppAppSettings(path): + s.handleWebAppAppSettings(w, r) + case matchWebAppConnStrings(path): + s.handleWebAppConnStrings(w, r) + case matchWebAppStickySettings(path): + s.handleWebAppStickySettings(w, r) + case matchWebAppStorageAccounts(path): + s.handleWebAppStorageAccounts(w, r) + case matchWebAppBackups(path): + s.handleWebAppBackups(w, r) + case matchWebAppMetadata(path): + s.handleWebAppMetadata(w, r) + case matchWebAppPubCreds(path): + s.handleWebAppPubCreds(w, r) + case matchWebAppConfig(path): + // Must be before ConfigFallback - /config/web is more specific than /config/[^/]+ + s.handleWebAppConfig(w, r) + case matchWebAppConfigFallback(path): + s.handleWebAppConfigFallback(w, r) + case matchWebAppBasicAuthPolicy(path): + s.handleWebAppBasicAuthPolicy(w, r) + case matchWebAppSlotConfig(path): + s.handleWebAppSlotConfig(w, r) + case matchWebAppSlotConfigFallback(path): + s.handleWebAppSlotConfigFallback(w, r) + case matchWebAppSlotBasicAuthPolicy(path): + s.handleWebAppSlotBasicAuthPolicy(w, r) + case matchWebAppSlot(path): + s.handleWebAppSlot(w, r) + case matchWebAppTrafficRouting(path): + s.handleWebAppTrafficRouting(w, r) + case matchLinuxWebApp(path): + s.handleLinuxWebApp(w, r) + case matchAppServicePlan(path): + s.handleAppServicePlan(w, r) + // Monitoring handlers + case matchLogAnalytics(path): + s.handleLogAnalytics(w, r) + case matchAppInsights(path): + s.handleAppInsights(w, r) + case matchAutoscaleSetting(path): + s.handleAutoscaleSetting(w, r) + case matchActionGroup(path): + s.handleActionGroup(w, r) + case matchMetricAlert(path): + s.handleMetricAlert(w, r) + case matchDiagnosticSetting(path): + s.handleDiagnosticSetting(w, r) + default: + s.notFound(w, path) + } +} + +// ============================================================================= +// Path Matchers +// ============================================================================= + +var ( + subscriptionRegex = regexp.MustCompile(`^/subscriptions/[^/]+$`) + listProvidersRegex = regexp.MustCompile(`^/subscriptions/[^/]+/providers$`) + providerRegistrationRegex = regexp.MustCompile(`/subscriptions/[^/]+/providers/Microsoft\.[^/]+$`) + cdnProfileRegex = regexp.MustCompile(`/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Cdn/profiles/[^/]+$`) + cdnEndpointRegex = regexp.MustCompile(`/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Cdn/profiles/[^/]+/endpoints/[^/]+$`) + cdnCustomDomainRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Cdn/profiles/[^/]+/endpoints/[^/]+/customDomains/[^/]+$`) + cdnCustomDomainEnableHttpsRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Cdn/profiles/[^/]+/endpoints/[^/]+/customDomains/[^/]+/enableCustomHttps$`) + cdnCustomDomainDisableHttpsRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Cdn/profiles/[^/]+/endpoints/[^/]+/customDomains/[^/]+/disableCustomHttps$`) + cdnOperationResultsRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Cdn/profiles/[^/]+/endpoints/[^/]+/customDomains/[^/]+/operationresults/`) + dnsZoneRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Network/dnszones/[^/]+$`) + dnsCNAMERecordRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Network/dnszones/[^/]+/CNAME/[^/]+$`) + storageAccountRegex = regexp.MustCompile(`/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Storage/storageAccounts/[^/]+$`) + storageAccountKeysRegex = regexp.MustCompile(`/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Storage/storageAccounts/[^/]+/listKeys$`) + // App Service resources + appServicePlanRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/serverfarms/[^/]+$`) + linuxWebAppRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+$`) + webAppSlotRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/slots/[^/]+$`) + webAppSlotConfigRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/slots/[^/]+/config/web$`) + webAppSlotConfigFallbackRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/slots/[^/]+/config/[^/]+(/list)?$`) + webAppSlotBasicAuthPolicyRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/slots/[^/]+/basicPublishingCredentialsPolicies/(ftp|scm)$`) + webAppConfigRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/config/web$`) + webAppCheckNameRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/providers/Microsoft\.Web/checknameavailability$`) + webAppAuthSettingsRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/config/authsettings/list$`) + webAppAuthSettingsV2Regex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/config/authsettingsV2/list$`) + webAppConfigLogsRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/config/logs$`) + webAppAppSettingsRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/config/appSettings/list$`) + webAppConnStringsRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/config/connectionstrings/list$`) + webAppStickySettingsRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/config/slotConfigNames$`) + webAppStorageAccountsRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/config/azurestorageaccounts/list$`) + webAppBackupsRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/config/backup/list$`) + webAppMetadataRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/config/metadata/list$`) + webAppPubCredsRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/config/publishingcredentials/list$`) + webAppConfigFallbackRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/config/[^/]+(/list)?$`) + webAppBasicAuthPolicyRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/basicPublishingCredentialsPolicies/(ftp|scm)$`) + webAppTrafficRoutingRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Web/sites/[^/]+/trafficRouting$`) + // Monitoring resources + logAnalyticsRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.OperationalInsights/workspaces/[^/]+$`) + appInsightsRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Insights/components/[^/]+$`) + autoscaleSettingRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Insights/autoscalesettings/[^/]+$`) + actionGroupRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Insights/actionGroups/[^/]+$`) + metricAlertRegex = regexp.MustCompile(`(?i)/subscriptions/[^/]+/resourceGroups/[^/]+/providers/Microsoft\.Insights/metricAlerts/[^/]+$`) + diagnosticSettingRegex = regexp.MustCompile(`(?i)/providers/Microsoft\.Insights/diagnosticSettings/[^/]+$`) +) + +func matchSubscription(path string) bool { return subscriptionRegex.MatchString(path) } +func matchListProviders(path string) bool { return listProvidersRegex.MatchString(path) } +func matchProviderRegistration(path string) bool { return providerRegistrationRegex.MatchString(path) } +func matchCDNProfile(path string) bool { return cdnProfileRegex.MatchString(path) } +func matchCDNEndpoint(path string) bool { return cdnEndpointRegex.MatchString(path) } +func matchCDNCustomDomain(path string) bool { return cdnCustomDomainRegex.MatchString(path) } +func matchCDNCustomDomainEnableHttps(path string) bool { return cdnCustomDomainEnableHttpsRegex.MatchString(path) } +func matchCDNCustomDomainDisableHttps(path string) bool { return cdnCustomDomainDisableHttpsRegex.MatchString(path) } +func matchCDNOperationResults(path string) bool { return cdnOperationResultsRegex.MatchString(path) } +func matchDNSZone(path string) bool { return dnsZoneRegex.MatchString(path) } +func matchDNSCNAMERecord(path string) bool { return dnsCNAMERecordRegex.MatchString(path) } +func matchStorageAccount(path string) bool { return storageAccountRegex.MatchString(path) } +func matchStorageAccountKeys(path string) bool { return storageAccountKeysRegex.MatchString(path) } +// App Service matchers +func matchAppServicePlan(path string) bool { return appServicePlanRegex.MatchString(path) } +func matchLinuxWebApp(path string) bool { return linuxWebAppRegex.MatchString(path) } +func matchWebAppSlot(path string) bool { return webAppSlotRegex.MatchString(path) } +func matchWebAppSlotConfig(path string) bool { return webAppSlotConfigRegex.MatchString(path) } +func matchWebAppSlotConfigFallback(path string) bool { return webAppSlotConfigFallbackRegex.MatchString(path) } +func matchWebAppSlotBasicAuthPolicy(path string) bool { return webAppSlotBasicAuthPolicyRegex.MatchString(path) } +func matchWebAppConfig(path string) bool { return webAppConfigRegex.MatchString(path) } +func matchWebAppCheckName(path string) bool { return webAppCheckNameRegex.MatchString(path) } +func matchWebAppAuthSettings(path string) bool { return webAppAuthSettingsRegex.MatchString(path) } +func matchWebAppAuthSettingsV2(path string) bool { return webAppAuthSettingsV2Regex.MatchString(path) } +func matchWebAppConfigLogs(path string) bool { return webAppConfigLogsRegex.MatchString(path) } +func matchWebAppAppSettings(path string) bool { return webAppAppSettingsRegex.MatchString(path) } +func matchWebAppConnStrings(path string) bool { return webAppConnStringsRegex.MatchString(path) } +func matchWebAppStickySettings(path string) bool { return webAppStickySettingsRegex.MatchString(path) } +func matchWebAppStorageAccounts(path string) bool { return webAppStorageAccountsRegex.MatchString(path) } +func matchWebAppBackups(path string) bool { return webAppBackupsRegex.MatchString(path) } +func matchWebAppMetadata(path string) bool { return webAppMetadataRegex.MatchString(path) } +func matchWebAppPubCreds(path string) bool { return webAppPubCredsRegex.MatchString(path) } +func matchWebAppConfigFallback(path string) bool { return webAppConfigFallbackRegex.MatchString(path) } +func matchWebAppBasicAuthPolicy(path string) bool { return webAppBasicAuthPolicyRegex.MatchString(path) } +func matchWebAppTrafficRouting(path string) bool { return webAppTrafficRoutingRegex.MatchString(path) } +// Monitoring matchers +func matchLogAnalytics(path string) bool { return logAnalyticsRegex.MatchString(path) } +func matchAppInsights(path string) bool { return appInsightsRegex.MatchString(path) } +func matchAutoscaleSetting(path string) bool { return autoscaleSettingRegex.MatchString(path) } +func matchActionGroup(path string) bool { return actionGroupRegex.MatchString(path) } +func matchMetricAlert(path string) bool { return metricAlertRegex.MatchString(path) } +func matchDiagnosticSetting(path string) bool { return diagnosticSettingRegex.MatchString(path) } + +// ============================================================================= +// CDN Profile Handler +// ============================================================================= + +func (s *Server) handleCDNProfile(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + // Extract components from path + subscriptionID := parts[2] + resourceGroup := parts[4] + profileName := parts[8] + + resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Cdn/profiles/%s", + subscriptionID, resourceGroup, profileName) + + switch r.Method { + case http.MethodPut: + var req struct { + Location string `json:"location"` + Tags map[string]string `json:"tags"` + Sku CDNSku `json:"sku"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + if req.Sku.Name == "" { + s.badRequest(w, "sku.name is required") + return + } + + profile := CDNProfile{ + ID: resourceID, + Name: profileName, + Type: "Microsoft.Cdn/profiles", + Location: req.Location, + Tags: req.Tags, + Sku: req.Sku, + Properties: CDNProfileProps{ + ResourceState: "Active", + ProvisioningState: "Succeeded", + }, + } + + s.store.mu.Lock() + s.store.cdnProfiles[resourceID] = profile + s.store.mu.Unlock() + + w.WriteHeader(http.StatusCreated) + json.NewEncoder(w).Encode(profile) + + case http.MethodGet: + s.store.mu.RLock() + profile, exists := s.store.cdnProfiles[resourceID] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "CDN Profile", profileName) + return + } + + json.NewEncoder(w).Encode(profile) + + case http.MethodDelete: + s.store.mu.Lock() + delete(s.store.cdnProfiles, resourceID) + // Also delete associated endpoints + for k := range s.store.cdnEndpoints { + if strings.HasPrefix(k, resourceID+"/endpoints/") { + delete(s.store.cdnEndpoints, k) + } + } + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// CDN Endpoint Handler +// ============================================================================= + +func (s *Server) handleCDNEndpoint(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + profileName := parts[8] + endpointName := parts[10] + + resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Cdn/profiles/%s/endpoints/%s", + subscriptionID, resourceGroup, profileName, endpointName) + + switch r.Method { + case http.MethodPut: + var req struct { + Location string `json:"location"` + Tags map[string]string `json:"tags"` + Properties CDNEndpointProps `json:"properties"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + if len(req.Properties.Origins) == 0 { + s.badRequest(w, "At least one origin is required") + return + } + + endpoint := CDNEndpoint{ + ID: resourceID, + Name: endpointName, + Type: "Microsoft.Cdn/profiles/endpoints", + Location: req.Location, + Tags: req.Tags, + Properties: CDNEndpointProps{ + HostName: fmt.Sprintf("%s.azureedge.net", endpointName), + OriginHostHeader: req.Properties.OriginHostHeader, + Origins: req.Properties.Origins, + OriginPath: req.Properties.OriginPath, + IsHttpAllowed: req.Properties.IsHttpAllowed, + IsHttpsAllowed: true, + IsCompressionEnabled: req.Properties.IsCompressionEnabled, + ResourceState: "Running", + ProvisioningState: "Succeeded", + DeliveryPolicy: req.Properties.DeliveryPolicy, + }, + } + + s.store.mu.Lock() + s.store.cdnEndpoints[resourceID] = endpoint + s.store.mu.Unlock() + + w.WriteHeader(http.StatusCreated) + json.NewEncoder(w).Encode(endpoint) + + case http.MethodGet: + s.store.mu.RLock() + endpoint, exists := s.store.cdnEndpoints[resourceID] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "CDN Endpoint", endpointName) + return + } + + json.NewEncoder(w).Encode(endpoint) + + case http.MethodDelete: + s.store.mu.Lock() + delete(s.store.cdnEndpoints, resourceID) + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// CDN Custom Domain Handler +// ============================================================================= + +func (s *Server) handleCDNCustomDomain(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + profileName := parts[8] + endpointName := parts[10] + customDomainName := parts[12] + + resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Cdn/profiles/%s/endpoints/%s/customDomains/%s", + subscriptionID, resourceGroup, profileName, endpointName, customDomainName) + + switch r.Method { + case http.MethodPut: + var req struct { + Properties struct { + HostName string `json:"hostName"` + } `json:"properties"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + if req.Properties.HostName == "" { + s.badRequest(w, "properties.hostName is required") + return + } + + customDomain := CDNCustomDomain{ + ID: resourceID, + Name: customDomainName, + Type: "Microsoft.Cdn/profiles/endpoints/customDomains", + Properties: CDNCustomDomainProps{ + HostName: req.Properties.HostName, + ResourceState: "Active", + ProvisioningState: "Succeeded", + }, + } + + s.store.mu.Lock() + s.store.cdnCustomDomains[resourceID] = customDomain + s.store.mu.Unlock() + + w.WriteHeader(http.StatusCreated) + json.NewEncoder(w).Encode(customDomain) + + case http.MethodGet: + s.store.mu.RLock() + customDomain, exists := s.store.cdnCustomDomains[resourceID] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "CDN Custom Domain", customDomainName) + return + } + + json.NewEncoder(w).Encode(customDomain) + + case http.MethodDelete: + s.store.mu.Lock() + delete(s.store.cdnCustomDomains, resourceID) + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// CDN Custom Domain HTTPS Handler +// ============================================================================= + +func (s *Server) handleCDNOperationResults(w http.ResponseWriter, r *http.Request) { + // Operation results endpoint - returns the status of an async operation + // Always return Succeeded to indicate the operation is complete + + if r.Method != http.MethodGet { + s.methodNotAllowed(w) + return + } + + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.WriteHeader(http.StatusOK) + + response := map[string]interface{}{ + "status": "Succeeded", + "properties": map[string]interface{}{ + "customHttpsProvisioningState": "Enabled", + "customHttpsProvisioningSubstate": "CertificateDeployed", + }, + } + json.NewEncoder(w).Encode(response) +} + +func (s *Server) handleCDNCustomDomainHttps(w http.ResponseWriter, r *http.Request, enable bool) { + // enableCustomHttps and disableCustomHttps endpoints + // These are POST requests to enable/disable HTTPS on a custom domain + + if r.Method != http.MethodPost { + s.methodNotAllowed(w) + return + } + + // Extract resource info from path for the polling URL + path := r.URL.Path + // Remove /enableCustomHttps or /disableCustomHttps from path to get custom domain path + customDomainPath := strings.TrimSuffix(path, "/enableCustomHttps") + customDomainPath = strings.TrimSuffix(customDomainPath, "/disableCustomHttps") + + // Azure async operations require a Location or Azure-AsyncOperation header for polling + // The Location header should point to the operation status endpoint + operationID := fmt.Sprintf("op-%d", time.Now().UnixNano()) + asyncOperationURL := fmt.Sprintf("https://%s%s/operationresults/%s", r.Host, customDomainPath, operationID) + + w.Header().Set("Azure-AsyncOperation", asyncOperationURL) + w.Header().Set("Location", asyncOperationURL) + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.WriteHeader(http.StatusAccepted) + + // Return a custom domain response with the updated HTTPS state + response := map[string]interface{}{ + "properties": map[string]interface{}{ + "customHttpsProvisioningState": "Enabled", + "customHttpsProvisioningSubstate": "CertificateDeployed", + }, + } + if !enable { + response["properties"].(map[string]interface{})["customHttpsProvisioningState"] = "Disabled" + response["properties"].(map[string]interface{})["customHttpsProvisioningSubstate"] = "" + } + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// DNS Zone Handler +// ============================================================================= + +func (s *Server) handleDNSZone(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + zoneName := parts[8] + + resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/dnszones/%s", + subscriptionID, resourceGroup, zoneName) + + switch r.Method { + case http.MethodPut: + var req struct { + Location string `json:"location"` + Tags map[string]string `json:"tags"` + } + json.NewDecoder(r.Body).Decode(&req) + + zone := DNSZone{ + ID: resourceID, + Name: zoneName, + Type: "Microsoft.Network/dnszones", + Location: "global", + Tags: req.Tags, + Properties: DNSZoneProps{ + MaxNumberOfRecordSets: 10000, + NumberOfRecordSets: 2, + NameServers: []string{ + "ns1-01.azure-dns.com.", + "ns2-01.azure-dns.net.", + "ns3-01.azure-dns.org.", + "ns4-01.azure-dns.info.", + }, + }, + } + + s.store.mu.Lock() + s.store.dnsZones[resourceID] = zone + s.store.mu.Unlock() + + w.WriteHeader(http.StatusCreated) + json.NewEncoder(w).Encode(zone) + + case http.MethodGet: + s.store.mu.RLock() + zone, exists := s.store.dnsZones[resourceID] + s.store.mu.RUnlock() + + if !exists { + // Return a fake zone for any GET request (like storage account handler) + // This allows data sources to work without pre-creating the zone + zone = DNSZone{ + ID: resourceID, + Name: zoneName, + Type: "Microsoft.Network/dnszones", + Location: "global", + Properties: DNSZoneProps{ + MaxNumberOfRecordSets: 10000, + NumberOfRecordSets: 2, + NameServers: []string{ + "ns1-01.azure-dns.com.", + "ns2-01.azure-dns.net.", + "ns3-01.azure-dns.org.", + "ns4-01.azure-dns.info.", + }, + }, + } + } + + json.NewEncoder(w).Encode(zone) + + case http.MethodDelete: + s.store.mu.Lock() + delete(s.store.dnsZones, resourceID) + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// DNS CNAME Record Handler +// ============================================================================= + +func (s *Server) handleDNSCNAMERecord(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + zoneName := parts[8] + recordName := parts[10] + + resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/dnszones/%s/CNAME/%s", + subscriptionID, resourceGroup, zoneName, recordName) + + switch r.Method { + case http.MethodPut: + var req struct { + Properties DNSCNAMERecordProps `json:"properties"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + if req.Properties.CNAMERecord == nil || req.Properties.CNAMERecord.Cname == "" { + s.badRequest(w, "CNAMERecord.cname is required") + return + } + + record := DNSCNAMERecord{ + ID: resourceID, + Name: recordName, + Type: "Microsoft.Network/dnszones/CNAME", + Etag: fmt.Sprintf("etag-%d", time.Now().Unix()), + Properties: DNSCNAMERecordProps{ + TTL: req.Properties.TTL, + Fqdn: fmt.Sprintf("%s.%s.", recordName, zoneName), + CNAMERecord: req.Properties.CNAMERecord, + }, + } + + s.store.mu.Lock() + s.store.dnsCNAMERecords[resourceID] = record + s.store.mu.Unlock() + + w.WriteHeader(http.StatusCreated) + json.NewEncoder(w).Encode(record) + + case http.MethodGet: + s.store.mu.RLock() + record, exists := s.store.dnsCNAMERecords[resourceID] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "DNS CNAME Record", recordName) + return + } + + json.NewEncoder(w).Encode(record) + + case http.MethodDelete: + s.store.mu.Lock() + delete(s.store.dnsCNAMERecords, resourceID) + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Storage Account Handler (Read-only for data source) +// ============================================================================= + +func (s *Server) handleStorageAccount(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + accountName := parts[8] + + resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Storage/storageAccounts/%s", + subscriptionID, resourceGroup, accountName) + + switch r.Method { + case http.MethodGet: + // For data sources, we return a pre-configured storage account + // The account "exists" as long as it's queried + account := StorageAccount{ + ID: resourceID, + Name: accountName, + Type: "Microsoft.Storage/storageAccounts", + Location: "eastus", + Kind: "StorageV2", + Sku: StorageSku{ + Name: "Standard_LRS", + Tier: "Standard", + }, + Properties: StorageAccountProps{ + PrimaryEndpoints: StorageEndpoints{ + Blob: fmt.Sprintf("https://%s.blob.core.windows.net/", accountName), + Web: fmt.Sprintf("https://%s.z13.web.core.windows.net/", accountName), + }, + ProvisioningState: "Succeeded", + }, + } + + json.NewEncoder(w).Encode(account) + + case http.MethodPut: + // Allow creating storage accounts for completeness + var req struct { + Location string `json:"location"` + Tags map[string]string `json:"tags"` + Kind string `json:"kind"` + Sku StorageSku `json:"sku"` + } + json.NewDecoder(r.Body).Decode(&req) + + account := StorageAccount{ + ID: resourceID, + Name: accountName, + Type: "Microsoft.Storage/storageAccounts", + Location: req.Location, + Kind: req.Kind, + Sku: req.Sku, + Properties: StorageAccountProps{ + PrimaryEndpoints: StorageEndpoints{ + Blob: fmt.Sprintf("https://%s.blob.core.windows.net/", accountName), + Web: fmt.Sprintf("https://%s.z13.web.core.windows.net/", accountName), + }, + ProvisioningState: "Succeeded", + }, + } + + s.store.mu.Lock() + s.store.storageAccounts[resourceID] = account + s.store.mu.Unlock() + + w.WriteHeader(http.StatusCreated) + json.NewEncoder(w).Encode(account) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Storage Account Keys Handler +// ============================================================================= + +func (s *Server) handleStorageAccountKeys(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + s.methodNotAllowed(w) + return + } + + // Return mock storage account keys + response := map[string]interface{}{ + "keys": []map[string]interface{}{ + { + "keyName": "key1", + "value": "mock-storage-key-1-base64encodedvalue==", + "permissions": "FULL", + }, + { + "keyName": "key2", + "value": "mock-storage-key-2-base64encodedvalue==", + "permissions": "FULL", + }, + }, + } + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Blob Storage Handler (for azurerm backend state storage) +// ============================================================================= + +func (s *Server) handleBlobStorage(w http.ResponseWriter, r *http.Request) { + host := r.Host + path := r.URL.Path + query := r.URL.Query() + + // Extract account name from host (e.g., "devstoreaccount1.blob.core.windows.net" -> "devstoreaccount1") + accountName := strings.Split(host, ".")[0] + + // Remove leading slash and parse path + path = strings.TrimPrefix(path, "/") + parts := strings.SplitN(path, "/", 2) + + containerName := "" + blobName := "" + + if len(parts) >= 1 && parts[0] != "" { + containerName = parts[0] + } + if len(parts) >= 2 { + blobName = parts[1] + } + + log.Printf("Blob Storage: account=%s container=%s blob=%s restype=%s comp=%s", accountName, containerName, blobName, query.Get("restype"), query.Get("comp")) + + // List blobs in container (restype=container&comp=list) + // Must check this BEFORE container operations since ListBlobs also has restype=container + if containerName != "" && query.Get("comp") == "list" { + s.handleListBlobs(w, r, accountName, containerName) + return + } + + // Check if this is a container operation (restype=container without comp=list) + if query.Get("restype") == "container" { + s.handleBlobContainer(w, r, accountName, containerName) + return + } + + // Otherwise, it's a blob operation + if containerName != "" && blobName != "" { + s.handleBlob(w, r, accountName, containerName, blobName) + return + } + + // Unknown operation + w.Header().Set("Content-Type", "application/xml") + w.WriteHeader(http.StatusBadRequest) + fmt.Fprintf(w, `InvalidUriThe requested URI does not represent any resource on the server.`) +} + +func (s *Server) handleBlobContainer(w http.ResponseWriter, r *http.Request, accountName, containerName string) { + containerKey := fmt.Sprintf("%s/%s", accountName, containerName) + + switch r.Method { + case http.MethodPut: + // Create container + now := time.Now().UTC().Format(time.RFC1123) + etag := fmt.Sprintf("\"0x%X\"", time.Now().UnixNano()) + + container := BlobContainer{ + Name: containerName, + Properties: BlobContainerProps{ + LastModified: now, + Etag: etag, + }, + } + + s.store.mu.Lock() + s.store.blobContainers[containerKey] = container + s.store.mu.Unlock() + + w.Header().Set("ETag", etag) + w.Header().Set("Last-Modified", now) + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.WriteHeader(http.StatusCreated) + + case http.MethodGet, http.MethodHead: + // Get container properties + s.store.mu.RLock() + container, exists := s.store.blobContainers[containerKey] + s.store.mu.RUnlock() + + if !exists { + s.blobNotFound(w, "ContainerNotFound", fmt.Sprintf("The specified container does not exist. Container: %s", containerName)) + return + } + + w.Header().Set("ETag", container.Properties.Etag) + w.Header().Set("Last-Modified", container.Properties.LastModified) + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.Header().Set("x-ms-lease-status", "unlocked") + w.Header().Set("x-ms-lease-state", "available") + w.Header().Set("x-ms-has-immutability-policy", "false") + w.Header().Set("x-ms-has-legal-hold", "false") + w.WriteHeader(http.StatusOK) + + case http.MethodDelete: + // Delete container + s.store.mu.Lock() + delete(s.store.blobContainers, containerKey) + // Also delete all blobs in the container + for k := range s.store.blobs { + if strings.HasPrefix(k, containerKey+"/") { + delete(s.store.blobs, k) + } + } + s.store.mu.Unlock() + + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.WriteHeader(http.StatusAccepted) + + default: + w.WriteHeader(http.StatusMethodNotAllowed) + } +} + +func (s *Server) handleBlob(w http.ResponseWriter, r *http.Request, accountName, containerName, blobName string) { + containerKey := fmt.Sprintf("%s/%s", accountName, containerName) + blobKey := fmt.Sprintf("%s/%s/%s", accountName, containerName, blobName) + query := r.URL.Query() + + // Handle lease operations + if query.Get("comp") == "lease" { + s.handleBlobLease(w, r, blobKey) + return + } + + // Handle metadata operations (used for state locking) + if query.Get("comp") == "metadata" { + s.handleBlobMetadata(w, r, blobKey) + return + } + + // Handle block blob operations (staged uploads) + if query.Get("comp") == "block" { + s.handlePutBlock(w, r, blobKey) + return + } + + if query.Get("comp") == "blocklist" { + s.handleBlockList(w, r, accountName, containerName, blobName, blobKey) + return + } + + // Handle blob properties + if query.Get("comp") == "properties" { + s.handleBlobProperties(w, r, blobKey) + return + } + + switch r.Method { + case http.MethodPut: + // Upload blob + s.store.mu.RLock() + _, containerExists := s.store.blobContainers[containerKey] + s.store.mu.RUnlock() + + if !containerExists { + s.blobNotFound(w, "ContainerNotFound", fmt.Sprintf("The specified container does not exist. Container: %s", containerName)) + return + } + + // Read request body + body := make([]byte, 0) + if r.Body != nil { + body, _ = io.ReadAll(r.Body) + } + + now := time.Now().UTC().Format(time.RFC1123) + etag := fmt.Sprintf("\"0x%X\"", time.Now().UnixNano()) + contentType := r.Header.Get("Content-Type") + if contentType == "" { + contentType = "application/octet-stream" + } + + // Extract metadata from x-ms-meta-* headers + metadata := make(map[string]string) + for key, values := range r.Header { + lowerKey := strings.ToLower(key) + if strings.HasPrefix(lowerKey, "x-ms-meta-") { + metaKey := strings.TrimPrefix(lowerKey, "x-ms-meta-") + if len(values) > 0 { + metadata[metaKey] = values[0] + } + } + } + + blob := Blob{ + Name: blobName, + Content: body, + Metadata: metadata, + Properties: BlobProps{ + LastModified: now, + Etag: etag, + ContentLength: len(body), + ContentType: contentType, + }, + } + + s.store.mu.Lock() + s.store.blobs[blobKey] = blob + s.store.mu.Unlock() + + w.Header().Set("ETag", etag) + w.Header().Set("Last-Modified", now) + w.Header().Set("Content-MD5", "") + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.Header().Set("x-ms-request-server-encrypted", "true") + w.WriteHeader(http.StatusCreated) + + case http.MethodGet: + // Download blob + s.store.mu.RLock() + blob, exists := s.store.blobs[blobKey] + s.store.mu.RUnlock() + + if !exists { + s.blobNotFound(w, "BlobNotFound", fmt.Sprintf("The specified blob does not exist. Blob: %s", blobName)) + return + } + + w.Header().Set("Content-Type", blob.Properties.ContentType) + w.Header().Set("Content-Length", fmt.Sprintf("%d", blob.Properties.ContentLength)) + w.Header().Set("ETag", blob.Properties.Etag) + w.Header().Set("Last-Modified", blob.Properties.LastModified) + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.Header().Set("x-ms-blob-type", "BlockBlob") + w.WriteHeader(http.StatusOK) + w.Write(blob.Content) + + case http.MethodHead: + // Get blob properties + s.store.mu.RLock() + blob, exists := s.store.blobs[blobKey] + s.store.mu.RUnlock() + + if !exists { + s.blobNotFound(w, "BlobNotFound", fmt.Sprintf("The specified blob does not exist. Blob: %s", blobName)) + return + } + + // Return metadata as x-ms-meta-* headers + for key, value := range blob.Metadata { + w.Header().Set("x-ms-meta-"+key, value) + } + + w.Header().Set("Content-Type", blob.Properties.ContentType) + w.Header().Set("Content-Length", fmt.Sprintf("%d", blob.Properties.ContentLength)) + w.Header().Set("ETag", blob.Properties.Etag) + w.Header().Set("Last-Modified", blob.Properties.LastModified) + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.Header().Set("x-ms-blob-type", "BlockBlob") + w.Header().Set("x-ms-lease-status", "unlocked") + w.Header().Set("x-ms-lease-state", "available") + w.WriteHeader(http.StatusOK) + + case http.MethodDelete: + // Delete blob + s.store.mu.Lock() + _, exists := s.store.blobs[blobKey] + if exists { + delete(s.store.blobs, blobKey) + } + s.store.mu.Unlock() + + if !exists { + s.blobNotFound(w, "BlobNotFound", fmt.Sprintf("The specified blob does not exist. Blob: %s", blobName)) + return + } + + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.Header().Set("x-ms-delete-type-permanent", "true") + w.WriteHeader(http.StatusAccepted) + + default: + w.WriteHeader(http.StatusMethodNotAllowed) + } +} + +func (s *Server) handleBlobMetadata(w http.ResponseWriter, r *http.Request, blobKey string) { + log.Printf("Blob Metadata: method=%s key=%s", r.Method, blobKey) + + switch r.Method { + case http.MethodPut: + // Set blob metadata - used for state locking + // Extract metadata from x-ms-meta-* headers + metadata := make(map[string]string) + for key, values := range r.Header { + lowerKey := strings.ToLower(key) + if strings.HasPrefix(lowerKey, "x-ms-meta-") { + metaKey := strings.TrimPrefix(lowerKey, "x-ms-meta-") + if len(values) > 0 { + metadata[metaKey] = values[0] + log.Printf("Blob Metadata: storing %s=%s", metaKey, values[0]) + } + } + } + + s.store.mu.Lock() + blob, exists := s.store.blobs[blobKey] + if exists { + blob.Metadata = metadata + s.store.blobs[blobKey] = blob + } else { + // Create a placeholder blob if it doesn't exist (for lock files) + now := time.Now().UTC().Format(time.RFC1123) + etag := fmt.Sprintf("\"0x%X\"", time.Now().UnixNano()) + s.store.blobs[blobKey] = Blob{ + Name: "", + Content: []byte{}, + Metadata: metadata, + Properties: BlobProps{ + LastModified: now, + Etag: etag, + ContentLength: 0, + ContentType: "application/octet-stream", + }, + } + } + s.store.mu.Unlock() + + w.Header().Set("ETag", fmt.Sprintf("\"0x%X\"", time.Now().UnixNano())) + w.Header().Set("Last-Modified", time.Now().UTC().Format(time.RFC1123)) + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.Header().Set("x-ms-request-server-encrypted", "true") + w.WriteHeader(http.StatusOK) + + case http.MethodGet, http.MethodHead: + // Get blob metadata + s.store.mu.RLock() + blob, exists := s.store.blobs[blobKey] + s.store.mu.RUnlock() + + if !exists { + s.blobNotFound(w, "BlobNotFound", "The specified blob does not exist.") + return + } + + // Return metadata as x-ms-meta-* headers + for key, value := range blob.Metadata { + w.Header().Set("x-ms-meta-"+key, value) + log.Printf("Blob Metadata: returning x-ms-meta-%s=%s", key, value) + } + + w.Header().Set("ETag", blob.Properties.Etag) + w.Header().Set("Last-Modified", blob.Properties.LastModified) + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.WriteHeader(http.StatusOK) + + default: + w.WriteHeader(http.StatusMethodNotAllowed) + } +} + +func (s *Server) handleBlobLease(w http.ResponseWriter, r *http.Request, blobKey string) { + leaseAction := r.Header.Get("x-ms-lease-action") + log.Printf("Blob Lease: action=%s key=%s", leaseAction, blobKey) + + switch leaseAction { + case "acquire": + // Acquire lease - return a mock lease ID + leaseID := fmt.Sprintf("lease-%d", time.Now().UnixNano()) + w.Header().Set("x-ms-lease-id", leaseID) + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.WriteHeader(http.StatusCreated) + + case "release", "break": + // Release or break lease + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.WriteHeader(http.StatusOK) + + case "renew": + // Renew lease + leaseID := r.Header.Get("x-ms-lease-id") + w.Header().Set("x-ms-lease-id", leaseID) + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.WriteHeader(http.StatusOK) + + default: + w.WriteHeader(http.StatusBadRequest) + } +} + +func (s *Server) handlePutBlock(w http.ResponseWriter, r *http.Request, blobKey string) { + blockID := r.URL.Query().Get("blockid") + log.Printf("Put Block: key=%s blockid=%s", blobKey, blockID) + + if r.Method != http.MethodPut { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + // Read block data + body, _ := io.ReadAll(r.Body) + + // Store the block + blockKey := fmt.Sprintf("%s/%s", blobKey, blockID) + s.store.mu.Lock() + s.store.blobBlocks[blockKey] = body + s.store.mu.Unlock() + + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.Header().Set("x-ms-content-crc64", "") + w.Header().Set("x-ms-request-server-encrypted", "true") + w.WriteHeader(http.StatusCreated) +} + +func (s *Server) handleBlockList(w http.ResponseWriter, r *http.Request, accountName, containerName, blobName, blobKey string) { + log.Printf("Block List: method=%s key=%s", r.Method, blobKey) + + switch r.Method { + case http.MethodPut: + // Commit block list - assemble blocks into final blob + // For simplicity, we just create an empty blob (the actual block assembly would be complex) + // The terraform state is typically small enough to not use block uploads + body, _ := io.ReadAll(r.Body) + log.Printf("Block List body: %s", string(body)) + + now := time.Now().UTC().Format(time.RFC1123) + etag := fmt.Sprintf("\"0x%X\"", time.Now().UnixNano()) + + // Create the blob (simplified - in reality would assemble from blocks) + blob := Blob{ + Name: blobName, + Content: []byte{}, // Would normally assemble from blocks + Properties: BlobProps{ + LastModified: now, + Etag: etag, + ContentLength: 0, + ContentType: "application/octet-stream", + }, + } + + s.store.mu.Lock() + s.store.blobs[blobKey] = blob + // Clean up staged blocks + for k := range s.store.blobBlocks { + if strings.HasPrefix(k, blobKey+"/") { + delete(s.store.blobBlocks, k) + } + } + s.store.mu.Unlock() + + w.Header().Set("ETag", etag) + w.Header().Set("Last-Modified", now) + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.Header().Set("x-ms-request-server-encrypted", "true") + w.WriteHeader(http.StatusCreated) + + case http.MethodGet: + // Get block list + w.Header().Set("Content-Type", "application/xml") + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.WriteHeader(http.StatusOK) + fmt.Fprintf(w, ``) + + default: + w.WriteHeader(http.StatusMethodNotAllowed) + } +} + +func (s *Server) handleBlobProperties(w http.ResponseWriter, r *http.Request, blobKey string) { + log.Printf("Blob Properties: method=%s key=%s", r.Method, blobKey) + + s.store.mu.RLock() + blob, exists := s.store.blobs[blobKey] + s.store.mu.RUnlock() + + if !exists { + s.blobNotFound(w, "BlobNotFound", "The specified blob does not exist.") + return + } + + switch r.Method { + case http.MethodPut: + // Set blob properties + w.Header().Set("ETag", blob.Properties.Etag) + w.Header().Set("Last-Modified", blob.Properties.LastModified) + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.WriteHeader(http.StatusOK) + + case http.MethodGet, http.MethodHead: + // Get blob properties + w.Header().Set("Content-Type", blob.Properties.ContentType) + w.Header().Set("Content-Length", fmt.Sprintf("%d", blob.Properties.ContentLength)) + w.Header().Set("ETag", blob.Properties.Etag) + w.Header().Set("Last-Modified", blob.Properties.LastModified) + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.Header().Set("x-ms-blob-type", "BlockBlob") + w.WriteHeader(http.StatusOK) + + default: + w.WriteHeader(http.StatusMethodNotAllowed) + } +} + +func (s *Server) handleListBlobs(w http.ResponseWriter, r *http.Request, accountName, containerName string) { + containerKey := fmt.Sprintf("%s/%s", accountName, containerName) + prefix := containerKey + "/" + + s.store.mu.RLock() + _, containerExists := s.store.blobContainers[containerKey] + var blobs []Blob + for k, b := range s.store.blobs { + if strings.HasPrefix(k, prefix) { + blobs = append(blobs, b) + } + } + s.store.mu.RUnlock() + + if !containerExists { + s.blobNotFound(w, "ContainerNotFound", fmt.Sprintf("The specified container does not exist. Container: %s", containerName)) + return + } + + w.Header().Set("Content-Type", "application/xml") + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.WriteHeader(http.StatusOK) + + fmt.Fprintf(w, ``, accountName, containerName) + for _, b := range blobs { + fmt.Fprintf(w, `%s%d%s%s%sBlockBlobunlockedavailable`, + b.Name, b.Properties.ContentLength, b.Properties.ContentType, b.Properties.LastModified, b.Properties.Etag) + } + fmt.Fprintf(w, ``) +} + +func (s *Server) blobNotFound(w http.ResponseWriter, code, message string) { + w.Header().Set("Content-Type", "application/xml") + w.Header().Set("x-ms-request-id", fmt.Sprintf("%d", time.Now().UnixNano())) + w.Header().Set("x-ms-version", "2021-06-08") + w.WriteHeader(http.StatusNotFound) + fmt.Fprintf(w, `%s%s`, code, message) +} + +// ============================================================================= +// App Service Plan Handler +// ============================================================================= + +func (s *Server) handleAppServicePlan(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + planName := parts[8] + + // Build canonical resource ID (lowercase path for consistent storage key) + resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Web/serverfarms/%s", + subscriptionID, resourceGroup, planName) + // Use lowercase key for storage to handle case-insensitive lookups + storeKey := strings.ToLower(resourceID) + + switch r.Method { + case http.MethodPut: + var req struct { + Location string `json:"location"` + Tags map[string]string `json:"tags"` + Kind string `json:"kind"` + Sku AppServiceSku `json:"sku"` + Properties struct { + PerSiteScaling bool `json:"perSiteScaling"` + ZoneRedundant bool `json:"zoneRedundant"` + Reserved bool `json:"reserved"` + } `json:"properties"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + // Derive SKU tier from name + skuTier := "Standard" + if strings.HasPrefix(req.Sku.Name, "P") { + skuTier = "PremiumV3" + } else if strings.HasPrefix(req.Sku.Name, "B") { + skuTier = "Basic" + } else if strings.HasPrefix(req.Sku.Name, "F") { + skuTier = "Free" + } + + plan := AppServicePlan{ + ID: resourceID, + Name: planName, + Type: "Microsoft.Web/serverfarms", + Location: req.Location, + Tags: req.Tags, + Kind: req.Kind, + Sku: AppServiceSku{ + Name: req.Sku.Name, + Tier: skuTier, + Size: req.Sku.Name, + Family: string(req.Sku.Name[0]), + Capacity: 1, + }, + Properties: AppServicePlanProps{ + ProvisioningState: "Succeeded", + Status: "Ready", + MaximumNumberOfWorkers: 10, + NumberOfSites: 0, + PerSiteScaling: req.Properties.PerSiteScaling, + ZoneRedundant: req.Properties.ZoneRedundant, + Reserved: req.Properties.Reserved, + }, + } + + s.store.mu.Lock() + s.store.appServicePlans[storeKey] = plan + s.store.mu.Unlock() + + // Azure SDK for azurerm provider expects 200 for PUT operations + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(plan) + + case http.MethodGet: + s.store.mu.RLock() + plan, exists := s.store.appServicePlans[storeKey] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "App Service Plan", planName) + return + } + + json.NewEncoder(w).Encode(plan) + + case http.MethodDelete: + s.store.mu.Lock() + delete(s.store.appServicePlans, storeKey) + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Web App Auth Settings Handler +// ============================================================================= + +func (s *Server) handleWebAppAuthSettings(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + s.methodNotAllowed(w) + return + } + + // Return default disabled auth settings + response := map[string]interface{}{ + "id": r.URL.Path, + "name": "authsettings", + "type": "Microsoft.Web/sites/config", + "properties": map[string]interface{}{ + "enabled": false, + "runtimeVersion": "~1", + "unauthenticatedClientAction": "RedirectToLoginPage", + "tokenStoreEnabled": false, + "allowedExternalRedirectUrls": []string{}, + "defaultProvider": "AzureActiveDirectory", + "clientId": nil, + "issuer": nil, + "allowedAudiences": nil, + "additionalLoginParams": nil, + "isAadAutoProvisioned": false, + "aadClaimsAuthorization": nil, + "googleClientId": nil, + "facebookAppId": nil, + "gitHubClientId": nil, + "twitterConsumerKey": nil, + "microsoftAccountClientId": nil, + }, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Web App Auth Settings V2 Handler +// ============================================================================= + +func (s *Server) handleWebAppAuthSettingsV2(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + s.methodNotAllowed(w) + return + } + + // Return default disabled auth settings V2 + response := map[string]interface{}{ + "id": r.URL.Path, + "name": "authsettingsV2", + "type": "Microsoft.Web/sites/config", + "properties": map[string]interface{}{ + "platform": map[string]interface{}{ + "enabled": false, + "runtimeVersion": "~1", + }, + "globalValidation": map[string]interface{}{ + "requireAuthentication": false, + "unauthenticatedClientAction": "RedirectToLoginPage", + }, + "identityProviders": map[string]interface{}{}, + "login": map[string]interface{}{ + "routes": map[string]interface{}{}, + "tokenStore": map[string]interface{}{"enabled": false}, + "preserveUrlFragmentsForLogins": false, + }, + "httpSettings": map[string]interface{}{ + "requireHttps": true, + }, + }, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Web App App Settings Handler +// ============================================================================= + +func (s *Server) handleWebAppAppSettings(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + s.methodNotAllowed(w) + return + } + + // Return empty app settings + response := map[string]interface{}{ + "id": r.URL.Path, + "name": "appsettings", + "type": "Microsoft.Web/sites/config", + "properties": map[string]string{}, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Web App Connection Strings Handler +// ============================================================================= + +func (s *Server) handleWebAppConnStrings(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + s.methodNotAllowed(w) + return + } + + // Return empty connection strings + response := map[string]interface{}{ + "id": r.URL.Path, + "name": "connectionstrings", + "type": "Microsoft.Web/sites/config", + "properties": map[string]interface{}{}, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Web App Sticky Settings Handler +// ============================================================================= + +func (s *Server) handleWebAppStickySettings(w http.ResponseWriter, r *http.Request) { + // Handle both GET and PUT methods + if r.Method != http.MethodGet && r.Method != http.MethodPut { + s.methodNotAllowed(w) + return + } + + // Return default sticky settings + response := map[string]interface{}{ + "id": r.URL.Path, + "name": "slotConfigNames", + "type": "Microsoft.Web/sites/config", + "properties": map[string]interface{}{ + "appSettingNames": []string{}, + "connectionStringNames": []string{}, + "azureStorageConfigNames": []string{}, + }, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Web App Config Logs Handler +// ============================================================================= + +func (s *Server) handleWebAppConfigLogs(w http.ResponseWriter, r *http.Request) { + // Handle both GET and PUT methods + if r.Method != http.MethodGet && r.Method != http.MethodPut { + s.methodNotAllowed(w) + return + } + + // Return default logging configuration + response := map[string]interface{}{ + "id": r.URL.Path, + "name": "logs", + "type": "Microsoft.Web/sites/config", + "properties": map[string]interface{}{ + "applicationLogs": map[string]interface{}{ + "fileSystem": map[string]interface{}{ + "level": "Off", + }, + "azureBlobStorage": nil, + "azureTableStorage": nil, + }, + "httpLogs": map[string]interface{}{ + "fileSystem": map[string]interface{}{ + "retentionInMb": 35, + "retentionInDays": 0, + "enabled": false, + }, + "azureBlobStorage": nil, + }, + "failedRequestsTracing": map[string]interface{}{ + "enabled": false, + }, + "detailedErrorMessages": map[string]interface{}{ + "enabled": false, + }, + }, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Web App Storage Accounts Handler +// ============================================================================= + +func (s *Server) handleWebAppStorageAccounts(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + s.methodNotAllowed(w) + return + } + + // Return empty storage accounts + response := map[string]interface{}{ + "id": r.URL.Path, + "name": "azurestorageaccounts", + "type": "Microsoft.Web/sites/config", + "properties": map[string]interface{}{}, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Web App Backups Handler +// ============================================================================= + +func (s *Server) handleWebAppBackups(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + s.methodNotAllowed(w) + return + } + + // Return empty backup config (no backup configured) + response := map[string]interface{}{ + "id": r.URL.Path, + "name": "backup", + "type": "Microsoft.Web/sites/config", + "properties": map[string]interface{}{ + "backupName": nil, + "enabled": false, + "storageAccountUrl": nil, + "backupSchedule": nil, + "databases": []interface{}{}, + }, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Web App Metadata Handler +// ============================================================================= + +func (s *Server) handleWebAppMetadata(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + s.methodNotAllowed(w) + return + } + + // Return empty metadata + response := map[string]interface{}{ + "id": r.URL.Path, + "name": "metadata", + "type": "Microsoft.Web/sites/config", + "properties": map[string]interface{}{}, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Web App Publishing Credentials Handler +// ============================================================================= + +func (s *Server) handleWebAppPubCreds(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + s.methodNotAllowed(w) + return + } + + path := r.URL.Path + parts := strings.Split(path, "/") + appName := parts[8] + + // Return publishing credentials + response := map[string]interface{}{ + "id": path, + "name": "publishingcredentials", + "type": "Microsoft.Web/sites/config", + "properties": map[string]interface{}{ + "name": "$" + appName, + "publishingUserName": "$" + appName, + "publishingPassword": "mock-publishing-password", + "scmUri": fmt.Sprintf("https://%s.scm.azurewebsites.net", appName), + }, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Web App Config Fallback Handler (for any unhandled config endpoints) +// ============================================================================= + +func (s *Server) handleWebAppConfigFallback(w http.ResponseWriter, r *http.Request) { + // This handles any config endpoint we haven't explicitly implemented + // Return an empty properties response which should work for most cases + path := r.URL.Path + + // Extract config name from path + parts := strings.Split(path, "/") + configName := "unknown" + for i, p := range parts { + if p == "config" && i+1 < len(parts) { + configName = parts[i+1] + break + } + } + + response := map[string]interface{}{ + "id": path, + "name": configName, + "type": "Microsoft.Web/sites/config", + "properties": map[string]interface{}{}, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Web App Basic Auth Policy Handler (ftp/scm publishing credentials) +// ============================================================================= + +func (s *Server) handleWebAppBasicAuthPolicy(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + policyType := parts[len(parts)-1] // "ftp" or "scm" + + if r.Method != http.MethodGet && r.Method != http.MethodPut { + s.methodNotAllowed(w) + return + } + + // Return policy that allows basic auth + response := map[string]interface{}{ + "id": path, + "name": policyType, + "type": "Microsoft.Web/sites/basicPublishingCredentialsPolicies", + "properties": map[string]interface{}{ + "allow": true, + }, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Web App Traffic Routing Handler +// Handles az webapp traffic-routing set/clear/show commands +// ============================================================================= + +func (s *Server) handleWebAppTrafficRouting(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + appName := parts[8] + + // Key for storing traffic routing rules + routingKey := fmt.Sprintf("%s:%s:%s", subscriptionID, resourceGroup, appName) + + switch r.Method { + case http.MethodGet: + // Return current traffic routing rules + s.store.mu.RLock() + rules, exists := s.store.trafficRouting[routingKey] + s.store.mu.RUnlock() + + if !exists { + // Return empty routing rules + response := []TrafficRoutingRule{} + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) + return + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(rules) + + case http.MethodPost: + // Set traffic routing (from az webapp traffic-routing set) + var req struct { + SlotName string `json:"slotName"` + TrafficPercent int `json:"trafficPercent"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + // Store the traffic routing rule + rules := []TrafficRoutingRule{ + { + ActionHostName: fmt.Sprintf("%s-%s.azurewebsites.net", appName, req.SlotName), + ReroutePercentage: req.TrafficPercent, + Name: req.SlotName, + }, + } + + s.store.mu.Lock() + s.store.trafficRouting[routingKey] = rules + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(rules) + + case http.MethodDelete: + // Clear traffic routing (from az webapp traffic-routing clear) + s.store.mu.Lock() + delete(s.store.trafficRouting, routingKey) + s.store.mu.Unlock() + + // Return empty array + response := []TrafficRoutingRule{} + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Web App Check Name Availability Handler +// ============================================================================= + +func (s *Server) handleWebAppCheckName(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + s.methodNotAllowed(w) + return + } + + var req struct { + Name string `json:"name"` + Type string `json:"type"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + // Always return that the name is available (for testing purposes) + response := struct { + NameAvailable bool `json:"nameAvailable"` + Reason string `json:"reason,omitempty"` + Message string `json:"message,omitempty"` + }{ + NameAvailable: true, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Linux Web App Handler +// ============================================================================= + +func (s *Server) handleLinuxWebApp(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + appName := parts[8] + + resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Web/sites/%s", + subscriptionID, resourceGroup, appName) + // Use lowercase key for storage to handle case-insensitive lookups + storeKey := strings.ToLower(resourceID) + + switch r.Method { + case http.MethodPut: + var req struct { + Location string `json:"location"` + Tags map[string]string `json:"tags"` + Kind string `json:"kind"` + Identity *AppIdentity `json:"identity"` + Properties struct { + ServerFarmID string `json:"serverFarmId"` + HTTPSOnly bool `json:"httpsOnly"` + ClientAffinityEnabled bool `json:"clientAffinityEnabled"` + SiteConfig *WebAppSiteConfig `json:"siteConfig"` + } `json:"properties"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + // Generate mock identity if system-assigned requested + var identity *AppIdentity + if req.Identity != nil && (req.Identity.Type == "SystemAssigned" || req.Identity.Type == "SystemAssigned, UserAssigned") { + identity = &AppIdentity{ + Type: req.Identity.Type, + PrincipalID: fmt.Sprintf("principal-%s", appName), + TenantID: "mock-tenant-id", + UserIDs: req.Identity.UserIDs, + } + } else if req.Identity != nil { + identity = req.Identity + } + + app := LinuxWebApp{ + ID: resourceID, + Name: appName, + Type: "Microsoft.Web/sites", + Location: req.Location, + Tags: req.Tags, + Kind: req.Kind, + Identity: identity, + Properties: LinuxWebAppProps{ + ProvisioningState: "Succeeded", + State: "Running", + DefaultHostName: fmt.Sprintf("%s.azurewebsites.net", appName), + ServerFarmID: req.Properties.ServerFarmID, + HTTPSOnly: req.Properties.HTTPSOnly, + ClientAffinityEnabled: req.Properties.ClientAffinityEnabled, + OutboundIPAddresses: "20.42.0.1,20.42.0.2,20.42.0.3", + PossibleOutboundIPAddresses: "20.42.0.1,20.42.0.2,20.42.0.3,20.42.0.4,20.42.0.5", + CustomDomainVerificationID: fmt.Sprintf("verification-id-%s", appName), + SiteConfig: req.Properties.SiteConfig, + }, + } + + s.store.mu.Lock() + s.store.linuxWebApps[storeKey] = app + s.store.mu.Unlock() + + // Azure SDK for azurerm provider expects 200 for PUT operations + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(app) + + case http.MethodGet: + s.store.mu.RLock() + app, exists := s.store.linuxWebApps[storeKey] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "Web App", appName) + return + } + + json.NewEncoder(w).Encode(app) + + case http.MethodDelete: + s.store.mu.Lock() + delete(s.store.linuxWebApps, storeKey) + // Also delete associated slots (use lowercase prefix for consistency) + slotPrefix := strings.ToLower(resourceID + "/slots/") + for k := range s.store.webAppSlots { + if strings.HasPrefix(strings.ToLower(k), slotPrefix) { + delete(s.store.webAppSlots, k) + } + } + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Web App Config Handler +// ============================================================================= + +func (s *Server) handleWebAppConfig(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + appName := parts[8] + + appResourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Web/sites/%s", + subscriptionID, resourceGroup, appName) + // Use lowercase key for storage to handle case-insensitive lookups + storeKey := strings.ToLower(appResourceID) + + switch r.Method { + case http.MethodPut, http.MethodPatch: + var req struct { + Properties WebAppSiteConfig `json:"properties"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + s.store.mu.Lock() + if app, exists := s.store.linuxWebApps[storeKey]; exists { + app.Properties.SiteConfig = &req.Properties + s.store.linuxWebApps[storeKey] = app + } + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(map[string]interface{}{ + "properties": req.Properties, + }) + + case http.MethodGet: + s.store.mu.RLock() + app, exists := s.store.linuxWebApps[storeKey] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "Web App", appName) + return + } + + config := app.Properties.SiteConfig + if config == nil { + config = &WebAppSiteConfig{} + } + // Ensure Experiments is always initialized (Azure CLI expects it for traffic routing) + if config.Experiments == nil { + config.Experiments = &WebAppExperiments{ + RampUpRules: []RampUpRule{}, + } + } + + json.NewEncoder(w).Encode(map[string]interface{}{ + "properties": config, + }) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Web App Slot Handler +// ============================================================================= + +func (s *Server) handleWebAppSlot(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + appName := parts[8] + slotName := parts[10] + + resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Web/sites/%s/slots/%s", + subscriptionID, resourceGroup, appName, slotName) + + switch r.Method { + case http.MethodPut: + var req struct { + Location string `json:"location"` + Tags map[string]string `json:"tags"` + Kind string `json:"kind"` + Properties struct { + ServerFarmID string `json:"serverFarmId"` + SiteConfig *WebAppSiteConfig `json:"siteConfig"` + } `json:"properties"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + slot := WebAppSlot{ + ID: resourceID, + Name: fmt.Sprintf("%s/%s", appName, slotName), + Type: "Microsoft.Web/sites/slots", + Location: req.Location, + Tags: req.Tags, + Kind: req.Kind, + Properties: LinuxWebAppProps{ + ProvisioningState: "Succeeded", + State: "Running", + DefaultHostName: fmt.Sprintf("%s-%s.azurewebsites.net", appName, slotName), + ServerFarmID: req.Properties.ServerFarmID, + OutboundIPAddresses: "20.42.0.1,20.42.0.2,20.42.0.3", + PossibleOutboundIPAddresses: "20.42.0.1,20.42.0.2,20.42.0.3,20.42.0.4,20.42.0.5", + CustomDomainVerificationID: fmt.Sprintf("verification-id-%s-%s", appName, slotName), + SiteConfig: req.Properties.SiteConfig, + }, + } + + s.store.mu.Lock() + s.store.webAppSlots[resourceID] = slot + s.store.mu.Unlock() + + // Azure SDK for azurerm provider expects 200 for PUT operations + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(slot) + + case http.MethodGet: + s.store.mu.RLock() + slot, exists := s.store.webAppSlots[resourceID] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "Web App Slot", slotName) + return + } + + json.NewEncoder(w).Encode(slot) + + case http.MethodDelete: + s.store.mu.Lock() + delete(s.store.webAppSlots, resourceID) + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Web App Slot Config Handler +// ============================================================================= + +func (s *Server) handleWebAppSlotConfig(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + appName := parts[8] + slotName := parts[10] + + slotResourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Web/sites/%s/slots/%s", + subscriptionID, resourceGroup, appName, slotName) + + switch r.Method { + case http.MethodGet: + // Return the site config from the stored slot + s.store.mu.RLock() + slot, exists := s.store.webAppSlots[slotResourceID] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "Web App Slot", slotName) + return + } + + // Return site config + config := struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Properties *WebAppSiteConfig `json:"properties"` + }{ + ID: slotResourceID + "/config/web", + Name: "web", + Type: "Microsoft.Web/sites/slots/config", + Properties: slot.Properties.SiteConfig, + } + + // If no site config stored, return a default + if config.Properties == nil { + config.Properties = &WebAppSiteConfig{ + AlwaysOn: false, + HTTP20Enabled: true, + MinTLSVersion: "1.2", + FtpsState: "Disabled", + LinuxFxVersion: "DOCKER|nginx:latest", + WebSocketsEnabled: false, + } + } + // Ensure Experiments is always initialized (Azure CLI expects it for traffic routing) + if config.Properties.Experiments == nil { + config.Properties.Experiments = &WebAppExperiments{ + RampUpRules: []RampUpRule{}, + } + } + + json.NewEncoder(w).Encode(config) + + case http.MethodPut: + var req struct { + Properties *WebAppSiteConfig `json:"properties"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + // Update the slot's site config + s.store.mu.Lock() + if slot, exists := s.store.webAppSlots[slotResourceID]; exists { + slot.Properties.SiteConfig = req.Properties + s.store.webAppSlots[slotResourceID] = slot + } + s.store.mu.Unlock() + + config := struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Properties *WebAppSiteConfig `json:"properties"` + }{ + ID: slotResourceID + "/config/web", + Name: "web", + Type: "Microsoft.Web/sites/slots/config", + Properties: req.Properties, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(config) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Web App Slot Config Fallback Handler +// Handles various slot config endpoints like appSettings, connectionstrings, etc. +// ============================================================================= + +func (s *Server) handleWebAppSlotConfigFallback(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + appName := parts[8] + slotName := parts[10] + configType := parts[12] + + slotResourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Web/sites/%s/slots/%s", + subscriptionID, resourceGroup, appName, slotName) + + // Check if slot exists + s.store.mu.RLock() + _, exists := s.store.webAppSlots[slotResourceID] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "Web App Slot", slotName) + return + } + + // Return empty/default response for various config types + switch configType { + case "appSettings": + json.NewEncoder(w).Encode(map[string]interface{}{ + "id": slotResourceID + "/config/appSettings", + "name": "appSettings", + "type": "Microsoft.Web/sites/slots/config", + "properties": map[string]string{}, + }) + case "connectionstrings": + json.NewEncoder(w).Encode(map[string]interface{}{ + "id": slotResourceID + "/config/connectionstrings", + "name": "connectionstrings", + "type": "Microsoft.Web/sites/slots/config", + "properties": map[string]interface{}{}, + }) + case "authsettings": + json.NewEncoder(w).Encode(map[string]interface{}{ + "id": slotResourceID + "/config/authsettings", + "name": "authsettings", + "type": "Microsoft.Web/sites/slots/config", + "properties": map[string]interface{}{ + "enabled": false, + }, + }) + case "authsettingsV2": + json.NewEncoder(w).Encode(map[string]interface{}{ + "id": slotResourceID + "/config/authsettingsV2", + "name": "authsettingsV2", + "type": "Microsoft.Web/sites/slots/config", + "properties": map[string]interface{}{ + "platform": map[string]interface{}{ + "enabled": false, + }, + }, + }) + case "logs": + json.NewEncoder(w).Encode(map[string]interface{}{ + "id": slotResourceID + "/config/logs", + "name": "logs", + "type": "Microsoft.Web/sites/slots/config", + "properties": map[string]interface{}{ + "applicationLogs": map[string]interface{}{ + "fileSystem": map[string]interface{}{ + "level": "Off", + }, + }, + "httpLogs": map[string]interface{}{ + "fileSystem": map[string]interface{}{ + "enabled": false, + }, + }, + "detailedErrorMessages": map[string]interface{}{ + "enabled": false, + }, + "failedRequestsTracing": map[string]interface{}{ + "enabled": false, + }, + }, + }) + case "slotConfigNames": + json.NewEncoder(w).Encode(map[string]interface{}{ + "id": slotResourceID + "/config/slotConfigNames", + "name": "slotConfigNames", + "type": "Microsoft.Web/sites/slots/config", + "properties": map[string]interface{}{ + "appSettingNames": []string{}, + "connectionStringNames": []string{}, + }, + }) + case "azurestorageaccounts": + json.NewEncoder(w).Encode(map[string]interface{}{ + "id": slotResourceID + "/config/azurestorageaccounts", + "name": "azurestorageaccounts", + "type": "Microsoft.Web/sites/slots/config", + "properties": map[string]interface{}{}, + }) + case "backup": + json.NewEncoder(w).Encode(map[string]interface{}{ + "id": slotResourceID + "/config/backup", + "name": "backup", + "type": "Microsoft.Web/sites/slots/config", + "properties": map[string]interface{}{ + "enabled": false, + }, + }) + case "metadata": + json.NewEncoder(w).Encode(map[string]interface{}{ + "id": slotResourceID + "/config/metadata", + "name": "metadata", + "type": "Microsoft.Web/sites/slots/config", + "properties": map[string]interface{}{}, + }) + case "publishingcredentials": + json.NewEncoder(w).Encode(map[string]interface{}{ + "id": slotResourceID + "/config/publishingcredentials", + "name": "publishingcredentials", + "type": "Microsoft.Web/sites/slots/config", + "properties": map[string]interface{}{ + "publishingUserName": fmt.Sprintf("$%s__%s", appName, slotName), + "publishingPassword": "mock-password", + }, + }) + default: + // Generic empty response for unknown config types + json.NewEncoder(w).Encode(map[string]interface{}{ + "id": fmt.Sprintf("%s/config/%s", slotResourceID, configType), + "name": configType, + "type": "Microsoft.Web/sites/slots/config", + "properties": map[string]interface{}{}, + }) + } +} + +// ============================================================================= +// Web App Slot Basic Auth Policy Handler +// Handles /sites/{app}/slots/{slot}/basicPublishingCredentialsPolicies/(ftp|scm) +// ============================================================================= + +func (s *Server) handleWebAppSlotBasicAuthPolicy(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + appName := parts[8] + slotName := parts[10] + policyType := parts[12] // "ftp" or "scm" + + slotResourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Web/sites/%s/slots/%s", + subscriptionID, resourceGroup, appName, slotName) + + policyID := fmt.Sprintf("%s/basicPublishingCredentialsPolicies/%s", slotResourceID, policyType) + + switch r.Method { + case http.MethodGet: + // Return default policy (basic auth allowed) + json.NewEncoder(w).Encode(map[string]interface{}{ + "id": policyID, + "name": policyType, + "type": "Microsoft.Web/sites/slots/basicPublishingCredentialsPolicies", + "properties": map[string]interface{}{ + "allow": true, + }, + }) + + case http.MethodPut: + var req struct { + Properties struct { + Allow bool `json:"allow"` + } `json:"properties"` + } + json.NewDecoder(r.Body).Decode(&req) + + response := map[string]interface{}{ + "id": policyID, + "name": policyType, + "type": "Microsoft.Web/sites/slots/basicPublishingCredentialsPolicies", + "properties": map[string]interface{}{ + "allow": req.Properties.Allow, + }, + } + + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(response) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Log Analytics Workspace Handler +// ============================================================================= + +func (s *Server) handleLogAnalytics(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + workspaceName := parts[8] + + resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.OperationalInsights/workspaces/%s", + subscriptionID, resourceGroup, workspaceName) + + switch r.Method { + case http.MethodPut: + var req struct { + Location string `json:"location"` + Tags map[string]string `json:"tags"` + Properties struct { + Sku struct { + Name string `json:"name"` + } `json:"sku"` + RetentionInDays int `json:"retentionInDays"` + } `json:"properties"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + workspace := LogAnalyticsWorkspace{ + ID: resourceID, + Name: workspaceName, + Type: "Microsoft.OperationalInsights/workspaces", + Location: req.Location, + Tags: req.Tags, + Properties: LogAnalyticsWorkspaceProps{ + ProvisioningState: "Succeeded", + CustomerID: fmt.Sprintf("customer-id-%s", workspaceName), + Sku: struct { + Name string `json:"name"` + }{ + Name: req.Properties.Sku.Name, + }, + RetentionInDays: req.Properties.RetentionInDays, + }, + } + + s.store.mu.Lock() + s.store.logAnalyticsWorkspaces[resourceID] = workspace + s.store.mu.Unlock() + + w.WriteHeader(http.StatusCreated) + json.NewEncoder(w).Encode(workspace) + + case http.MethodGet: + s.store.mu.RLock() + workspace, exists := s.store.logAnalyticsWorkspaces[resourceID] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "Log Analytics Workspace", workspaceName) + return + } + + json.NewEncoder(w).Encode(workspace) + + case http.MethodDelete: + s.store.mu.Lock() + delete(s.store.logAnalyticsWorkspaces, resourceID) + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Application Insights Handler +// ============================================================================= + +func (s *Server) handleAppInsights(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + insightsName := parts[8] + + resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Insights/components/%s", + subscriptionID, resourceGroup, insightsName) + + switch r.Method { + case http.MethodPut: + var req struct { + Location string `json:"location"` + Tags map[string]string `json:"tags"` + Kind string `json:"kind"` + Properties struct { + ApplicationType string `json:"Application_Type"` + WorkspaceResourceID string `json:"WorkspaceResourceId"` + } `json:"properties"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + instrumentationKey := fmt.Sprintf("ikey-%s", insightsName) + appID := fmt.Sprintf("appid-%s", insightsName) + + insights := ApplicationInsights{ + ID: resourceID, + Name: insightsName, + Type: "Microsoft.Insights/components", + Location: req.Location, + Tags: req.Tags, + Kind: req.Kind, + Properties: ApplicationInsightsProps{ + ProvisioningState: "Succeeded", + ApplicationID: appID, + InstrumentationKey: instrumentationKey, + ConnectionString: fmt.Sprintf("InstrumentationKey=%s;IngestionEndpoint=https://eastus-0.in.applicationinsights.azure.com/", instrumentationKey), + WorkspaceResourceID: req.Properties.WorkspaceResourceID, + }, + } + + s.store.mu.Lock() + s.store.appInsights[resourceID] = insights + s.store.mu.Unlock() + + w.WriteHeader(http.StatusCreated) + json.NewEncoder(w).Encode(insights) + + case http.MethodGet: + s.store.mu.RLock() + insights, exists := s.store.appInsights[resourceID] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "Application Insights", insightsName) + return + } + + json.NewEncoder(w).Encode(insights) + + case http.MethodDelete: + s.store.mu.Lock() + delete(s.store.appInsights, resourceID) + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Autoscale Setting Handler +// ============================================================================= + +func (s *Server) handleAutoscaleSetting(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + settingName := parts[8] + + resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Insights/autoscalesettings/%s", + subscriptionID, resourceGroup, settingName) + + switch r.Method { + case http.MethodPut: + var req struct { + Location string `json:"location"` + Tags map[string]string `json:"tags"` + Properties AutoscaleSettingProps `json:"properties"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + setting := AutoscaleSetting{ + ID: resourceID, + Name: settingName, + Type: "Microsoft.Insights/autoscalesettings", + Location: req.Location, + Tags: req.Tags, + Properties: AutoscaleSettingProps{ + ProvisioningState: "Succeeded", + Enabled: req.Properties.Enabled, + TargetResourceURI: req.Properties.TargetResourceURI, + TargetResourceLocation: req.Location, + Profiles: req.Properties.Profiles, + Notifications: req.Properties.Notifications, + }, + } + + s.store.mu.Lock() + s.store.autoscaleSettings[resourceID] = setting + s.store.mu.Unlock() + + w.WriteHeader(http.StatusCreated) + json.NewEncoder(w).Encode(setting) + + case http.MethodGet: + s.store.mu.RLock() + setting, exists := s.store.autoscaleSettings[resourceID] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "Autoscale Setting", settingName) + return + } + + json.NewEncoder(w).Encode(setting) + + case http.MethodDelete: + s.store.mu.Lock() + delete(s.store.autoscaleSettings, resourceID) + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Action Group Handler +// ============================================================================= + +func (s *Server) handleActionGroup(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + groupName := parts[8] + + resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Insights/actionGroups/%s", + subscriptionID, resourceGroup, groupName) + + switch r.Method { + case http.MethodPut: + var req struct { + Location string `json:"location"` + Tags map[string]string `json:"tags"` + Properties ActionGroupProps `json:"properties"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + group := ActionGroup{ + ID: resourceID, + Name: groupName, + Type: "Microsoft.Insights/actionGroups", + Location: "global", + Tags: req.Tags, + Properties: ActionGroupProps{ + GroupShortName: req.Properties.GroupShortName, + Enabled: req.Properties.Enabled, + EmailReceivers: req.Properties.EmailReceivers, + WebhookReceivers: req.Properties.WebhookReceivers, + }, + } + + s.store.mu.Lock() + s.store.actionGroups[resourceID] = group + s.store.mu.Unlock() + + w.WriteHeader(http.StatusCreated) + json.NewEncoder(w).Encode(group) + + case http.MethodGet: + s.store.mu.RLock() + group, exists := s.store.actionGroups[resourceID] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "Action Group", groupName) + return + } + + json.NewEncoder(w).Encode(group) + + case http.MethodDelete: + s.store.mu.Lock() + delete(s.store.actionGroups, resourceID) + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Metric Alert Handler +// ============================================================================= + +func (s *Server) handleMetricAlert(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + + subscriptionID := parts[2] + resourceGroup := parts[4] + alertName := parts[8] + + resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Insights/metricAlerts/%s", + subscriptionID, resourceGroup, alertName) + + switch r.Method { + case http.MethodPut: + var req struct { + Location string `json:"location"` + Tags map[string]string `json:"tags"` + Properties MetricAlertProps `json:"properties"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + alert := MetricAlert{ + ID: resourceID, + Name: alertName, + Type: "Microsoft.Insights/metricAlerts", + Location: "global", + Tags: req.Tags, + Properties: MetricAlertProps{ + Description: req.Properties.Description, + Severity: req.Properties.Severity, + Enabled: req.Properties.Enabled, + Scopes: req.Properties.Scopes, + EvaluationFrequency: req.Properties.EvaluationFrequency, + WindowSize: req.Properties.WindowSize, + Criteria: req.Properties.Criteria, + Actions: req.Properties.Actions, + }, + } + + s.store.mu.Lock() + s.store.metricAlerts[resourceID] = alert + s.store.mu.Unlock() + + w.WriteHeader(http.StatusCreated) + json.NewEncoder(w).Encode(alert) + + case http.MethodGet: + s.store.mu.RLock() + alert, exists := s.store.metricAlerts[resourceID] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "Metric Alert", alertName) + return + } + + json.NewEncoder(w).Encode(alert) + + case http.MethodDelete: + s.store.mu.Lock() + delete(s.store.metricAlerts, resourceID) + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Diagnostic Setting Handler +// ============================================================================= + +func (s *Server) handleDiagnosticSetting(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + // Diagnostic settings are nested under resources, extract name from end + parts := strings.Split(path, "/") + settingName := parts[len(parts)-1] + + // Use full path as resource ID + resourceID := path + + switch r.Method { + case http.MethodPut: + var req struct { + Properties DiagnosticSettingProps `json:"properties"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + s.badRequest(w, "Invalid request body") + return + } + + setting := DiagnosticSetting{ + ID: resourceID, + Name: settingName, + Type: "Microsoft.Insights/diagnosticSettings", + Properties: DiagnosticSettingProps{ + WorkspaceID: req.Properties.WorkspaceID, + Logs: req.Properties.Logs, + Metrics: req.Properties.Metrics, + }, + } + + s.store.mu.Lock() + s.store.diagnosticSettings[resourceID] = setting + s.store.mu.Unlock() + + w.WriteHeader(http.StatusCreated) + json.NewEncoder(w).Encode(setting) + + case http.MethodGet: + s.store.mu.RLock() + setting, exists := s.store.diagnosticSettings[resourceID] + s.store.mu.RUnlock() + + if !exists { + s.resourceNotFound(w, "Diagnostic Setting", settingName) + return + } + + json.NewEncoder(w).Encode(setting) + + case http.MethodDelete: + s.store.mu.Lock() + delete(s.store.diagnosticSettings, resourceID) + s.store.mu.Unlock() + + w.WriteHeader(http.StatusOK) + + default: + s.methodNotAllowed(w) + } +} + +// ============================================================================= +// Error Responses +// ============================================================================= + +func (s *Server) notFound(w http.ResponseWriter, path string) { + w.WriteHeader(http.StatusNotFound) + json.NewEncoder(w).Encode(AzureError{ + Error: AzureErrorDetail{ + Code: "PathNotFound", + Message: fmt.Sprintf("The path '%s' is not a valid Azure API path", path), + }, + }) +} + +func (s *Server) resourceNotFound(w http.ResponseWriter, resourceType, name string) { + w.WriteHeader(http.StatusNotFound) + json.NewEncoder(w).Encode(AzureError{ + Error: AzureErrorDetail{ + Code: "ResourceNotFound", + Message: fmt.Sprintf("The %s '%s' was not found.", resourceType, name), + }, + }) +} + +func (s *Server) badRequest(w http.ResponseWriter, message string) { + w.WriteHeader(http.StatusBadRequest) + json.NewEncoder(w).Encode(AzureError{ + Error: AzureErrorDetail{ + Code: "BadRequest", + Message: message, + }, + }) +} + +func (s *Server) methodNotAllowed(w http.ResponseWriter) { + w.WriteHeader(http.StatusMethodNotAllowed) + json.NewEncoder(w).Encode(AzureError{ + Error: AzureErrorDetail{ + Code: "MethodNotAllowed", + Message: "The HTTP method is not allowed for this resource", + }, + }) +} + +// ============================================================================= +// OAuth Token Handler (for Azure AD authentication) +// ============================================================================= + +type OAuthToken struct { + AccessToken string `json:"access_token"` + ExpiresIn int `json:"expires_in"` + ExpiresOn int64 `json:"expires_on,omitempty"` + NotBefore int64 `json:"not_before,omitempty"` + TokenType string `json:"token_type"` + Resource string `json:"resource,omitempty"` + Scope string `json:"scope,omitempty"` + RefreshToken string `json:"refresh_token,omitempty"` +} + +func (s *Server) handleOpenIDConfiguration(w http.ResponseWriter, r *http.Request) { + // Return OpenID Connect configuration document + // This is required by MSAL for Azure CLI authentication + host := r.Host + if host == "" { + host = "login.microsoftonline.com" + } + + config := map[string]interface{}{ + "issuer": fmt.Sprintf("https://%s/mock-tenant-id/v2.0", host), + "authorization_endpoint": fmt.Sprintf("https://%s/mock-tenant-id/oauth2/v2.0/authorize", host), + "token_endpoint": fmt.Sprintf("https://%s/mock-tenant-id/oauth2/v2.0/token", host), + "device_authorization_endpoint": fmt.Sprintf("https://%s/mock-tenant-id/oauth2/v2.0/devicecode", host), + "userinfo_endpoint": fmt.Sprintf("https://%s/oidc/userinfo", host), + "end_session_endpoint": fmt.Sprintf("https://%s/mock-tenant-id/oauth2/v2.0/logout", host), + "jwks_uri": fmt.Sprintf("https://%s/mock-tenant-id/discovery/v2.0/keys", host), + "response_types_supported": []string{"code", "id_token", "code id_token", "token id_token", "token"}, + "response_modes_supported": []string{"query", "fragment", "form_post"}, + "subject_types_supported": []string{"pairwise"}, + "id_token_signing_alg_values_supported": []string{"RS256"}, + "scopes_supported": []string{"openid", "profile", "email", "offline_access"}, + "token_endpoint_auth_methods_supported": []string{"client_secret_post", "client_secret_basic"}, + "claims_supported": []string{"sub", "iss", "aud", "exp", "iat", "name", "email"}, + "tenant_region_scope": "NA", + "cloud_instance_name": "microsoftonline.com", + "cloud_graph_host_name": "graph.windows.net", + "msgraph_host": "graph.microsoft.com", + } + + json.NewEncoder(w).Encode(config) +} + +func (s *Server) handleInstanceDiscovery(w http.ResponseWriter, r *http.Request) { + // Return instance discovery response for MSAL + response := map[string]interface{}{ + "tenant_discovery_endpoint": "https://login.microsoftonline.com/mock-tenant-id/v2.0/.well-known/openid-configuration", + "api-version": "1.1", + "metadata": []map[string]interface{}{ + { + "preferred_network": "login.microsoftonline.com", + "preferred_cache": "login.windows.net", + "aliases": []string{"login.microsoftonline.com", "login.windows.net", "login.microsoft.com"}, + }, + }, + } + + json.NewEncoder(w).Encode(response) +} + +func (s *Server) handleOAuth(w http.ResponseWriter, r *http.Request) { + // Return a mock OAuth token that looks like a valid JWT + // JWT format: header.payload.signature (all base64url encoded) + // The Azure SDK parses claims from the token, so it must be valid JWT format + + now := time.Now().Unix() + exp := now + 3600 + + // JWT Header (typ: JWT, alg: RS256) + header := "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9" + + // JWT Payload with required Azure claims + // Decoded: {"aud":"https://management.azure.com/","iss":"https://sts.windows.net/mock-tenant-id/","iat":NOW,"nbf":NOW,"exp":EXP,"oid":"mock-object-id","sub":"mock-subject","tid":"mock-tenant-id"} + payloadJSON := fmt.Sprintf(`{"aud":"https://management.azure.com/","iss":"https://sts.windows.net/mock-tenant-id/","iat":%d,"nbf":%d,"exp":%d,"oid":"mock-object-id","sub":"mock-subject","tid":"mock-tenant-id"}`, now, now, exp) + payload := base64.RawURLEncoding.EncodeToString([]byte(payloadJSON)) + + // Mock signature (doesn't need to be valid, just present) + signature := "mock-signature-placeholder" + + mockJWT := header + "." + payload + "." + signature + + token := OAuthToken{ + AccessToken: mockJWT, + ExpiresIn: 3600, + ExpiresOn: exp, + NotBefore: now, + TokenType: "Bearer", + Resource: "https://management.azure.com/", + Scope: "https://management.azure.com/.default", + RefreshToken: "mock-refresh-token", + } + json.NewEncoder(w).Encode(token) +} + +// ============================================================================= +// Provider Registration Handler +// ============================================================================= + +func (s *Server) handleListProviders(w http.ResponseWriter, r *http.Request) { + // Return a list of registered providers that the azurerm provider needs + providers := []map[string]interface{}{ + {"namespace": "Microsoft.Cdn", "registrationState": "Registered"}, + {"namespace": "Microsoft.Network", "registrationState": "Registered"}, + {"namespace": "Microsoft.Storage", "registrationState": "Registered"}, + {"namespace": "Microsoft.Resources", "registrationState": "Registered"}, + {"namespace": "Microsoft.Authorization", "registrationState": "Registered"}, + {"namespace": "Microsoft.Web", "registrationState": "Registered"}, + {"namespace": "Microsoft.Insights", "registrationState": "Registered"}, + {"namespace": "Microsoft.OperationalInsights", "registrationState": "Registered"}, + } + response := map[string]interface{}{ + "value": providers, + } + json.NewEncoder(w).Encode(response) +} + +func (s *Server) handleProviderRegistration(w http.ResponseWriter, r *http.Request) { + // Return success for provider registration checks + response := map[string]interface{}{ + "registrationState": "Registered", + } + json.NewEncoder(w).Encode(response) +} + +// ============================================================================= +// Subscription Handler +// ============================================================================= + +func (s *Server) handleSubscription(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + parts := strings.Split(path, "/") + subscriptionID := parts[2] + + subscription := map[string]interface{}{ + "id": fmt.Sprintf("/subscriptions/%s", subscriptionID), + "subscriptionId": subscriptionID, + "displayName": "Mock Subscription", + "state": "Enabled", + } + json.NewEncoder(w).Encode(subscription) +} + +// ============================================================================= +// Main +// ============================================================================= + +func main() { + server := NewServer() + + log.Println("Azure Mock API Server") + log.Println("=====================") + log.Println("ARM Endpoints:") + log.Println(" OAuth Token: /{tenant}/oauth2/token (POST)") + log.Println(" Subscriptions: /subscriptions/{sub}") + log.Println(" CDN Profiles: .../Microsoft.Cdn/profiles/{name}") + log.Println(" CDN Endpoints: .../Microsoft.Cdn/profiles/{profile}/endpoints/{name}") + log.Println(" DNS Zones: .../Microsoft.Network/dnszones/{name}") + log.Println(" DNS CNAME: .../Microsoft.Network/dnszones/{zone}/CNAME/{name}") + log.Println(" Storage Accounts: .../Microsoft.Storage/storageAccounts/{name}") + log.Println("") + log.Println("App Service Endpoints:") + log.Println(" Service Plans: .../Microsoft.Web/serverfarms/{name}") + log.Println(" Web Apps: .../Microsoft.Web/sites/{name}") + log.Println(" Web App Slots: .../Microsoft.Web/sites/{app}/slots/{slot}") + log.Println(" Web App Config: .../Microsoft.Web/sites/{app}/config/web") + log.Println("") + log.Println("Monitoring Endpoints:") + log.Println(" Log Analytics: .../Microsoft.OperationalInsights/workspaces/{name}") + log.Println(" App Insights: .../Microsoft.Insights/components/{name}") + log.Println(" Autoscale: .../Microsoft.Insights/autoscalesettings/{name}") + log.Println(" Action Groups: .../Microsoft.Insights/actionGroups/{name}") + log.Println(" Metric Alerts: .../Microsoft.Insights/metricAlerts/{name}") + log.Println("") + log.Println("Blob Storage Endpoints (Host: {account}.blob.core.windows.net):") + log.Println(" Containers: /{container}?restype=container") + log.Println(" Blobs: /{container}/{blob}") + log.Println("") + log.Println("Starting server on :8080...") + + if err := http.ListenAndServe(":8080", server); err != nil { + log.Fatalf("Server failed: %v", err) + } +} diff --git a/testing/docker/docker-compose.integration.yml b/testing/docker/docker-compose.integration.yml new file mode 100644 index 00000000..0faeb76c --- /dev/null +++ b/testing/docker/docker-compose.integration.yml @@ -0,0 +1,182 @@ +services: + # ============================================================================= + # LocalStack - AWS services emulator (S3, Route53, DynamoDB, etc.) + # ============================================================================= + localstack: + image: localstack/localstack:latest + container_name: integration-localstack + ports: + - "4566:4566" + environment: + - DEBUG=0 + - SERVICES=s3,route53,sts,iam,dynamodb,acm + - DEFAULT_REGION=us-east-1 + - AWS_DEFAULT_REGION=us-east-1 + - AWS_ACCESS_KEY_ID=test + - AWS_SECRET_ACCESS_KEY=test + - PERSISTENCE=0 + - EAGER_SERVICE_LOADING=1 + volumes: + - localstack-data:/var/lib/localstack + - /var/run/docker.sock:/var/run/docker.sock + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:4566/_localstack/health"] + interval: 5s + timeout: 5s + retries: 10 + networks: + integration-network: + ipv4_address: 172.28.0.2 + + # ============================================================================= + # Moto - CloudFront emulator (LocalStack doesn't support CloudFront well) + # ============================================================================= + moto: + image: motoserver/moto:latest + container_name: integration-moto + ports: + - "5555:5000" + environment: + - MOTO_PORT=5000 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:5000/moto-api/"] + interval: 5s + timeout: 5s + retries: 10 + networks: + integration-network: + ipv4_address: 172.28.0.3 + + # ============================================================================= + # Azure Mock - Azure REST API mock server for CDN, DNS, Storage + # ============================================================================= + azure-mock: + build: + context: ./azure-mock + dockerfile: Dockerfile + container_name: integration-azure-mock + ports: + - "8090:8080" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/health"] + interval: 5s + timeout: 5s + retries: 10 + networks: + integration-network: + ipv4_address: 172.28.0.4 + + # ============================================================================= + # Smocker - API mock server for nullplatform API + # ============================================================================= + smocker: + image: thiht/smocker:latest + container_name: integration-smocker + ports: + - "8080:8080" # Mock server port (HTTP) + - "8081:8081" # Admin API port (configure mocks) + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8081/version"] + interval: 5s + timeout: 5s + retries: 10 + networks: + integration-network: + ipv4_address: 172.28.0.11 + + # ============================================================================= + # Nginx - HTTPS reverse proxy for smocker (np CLI requires HTTPS) + # ============================================================================= + nginx-proxy: + image: nginx:alpine + container_name: integration-nginx + ports: + - "8443:443" # HTTPS port for np CLI + volumes: + - ./nginx.conf:/etc/nginx/nginx.conf:ro + - ./certs:/certs:ro + depends_on: + - smocker + - azure-mock + healthcheck: + test: ["CMD", "curl", "-fk", "https://localhost:443/mocks"] + interval: 5s + timeout: 5s + retries: 10 + networks: + integration-network: + ipv4_address: 172.28.0.10 + + # ============================================================================= + # Test Runner - Container that runs the integration tests + # ============================================================================= + test-runner: + build: + context: . + dockerfile: Dockerfile.test-runner + container_name: integration-test-runner + environment: + # Terminal for BATS pretty formatter + - TERM=xterm-256color + # nullplatform CLI configuration + - NULLPLATFORM_API_KEY=test-api-key + # AWS Configuration - point to LocalStack + - AWS_ENDPOINT_URL=http://localstack:4566 + - LOCALSTACK_ENDPOINT=http://localstack:4566 + - MOTO_ENDPOINT=http://moto:5000 + - AWS_ACCESS_KEY_ID=test + - AWS_SECRET_ACCESS_KEY=test + - AWS_DEFAULT_REGION=us-east-1 + - AWS_PAGER= + # Smocker configuration + - SMOCKER_HOST=http://smocker:8081 + # Azure Mock configuration (handles both ARM API and Blob Storage) + - AZURE_MOCK_ENDPOINT=http://azure-mock:8080 + # ARM_ACCESS_KEY is required by azurerm backend to build auth headers + # (azure-mock ignores authentication, but SDK validates base64 format) + - ARM_ACCESS_KEY=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== + # Azure credentials for mock (azurerm provider) + - ARM_CLIENT_ID=mock-client-id + - ARM_CLIENT_SECRET=mock-client-secret + - ARM_TENANT_ID=mock-tenant-id + - ARM_SUBSCRIPTION_ID=mock-subscription-id + - ARM_SKIP_PROVIDER_REGISTRATION=true + # Azure CLI service principal credentials (same as ARM_*) + - AZURE_CLIENT_ID=mock-client-id + - AZURE_CLIENT_SECRET=mock-client-secret + - AZURE_TENANT_ID=mock-tenant-id + - AZURE_SUBSCRIPTION_ID=mock-subscription-id + # Disable TLS verification for np CLI (talking to smocker) + - NODE_TLS_REJECT_UNAUTHORIZED=0 + # Python/Azure CLI certificate configuration + - REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt + - CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt + - SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt + - AZURE_CLI_DISABLE_CONNECTION_VERIFICATION=1 + - PATH=/root/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + extra_hosts: + # Redirect nullplatform API to smocker mock server (via nginx-proxy) + - "api.nullplatform.com:172.28.0.10" + # Redirect Azure APIs to azure-mock server (via nginx-proxy for HTTPS) + - "management.azure.com:172.28.0.10" + - "login.microsoftonline.com:172.28.0.10" + # Redirect Azure Blob Storage to azure-mock (via nginx-proxy for HTTPS) + - "devstoreaccount1.blob.core.windows.net:172.28.0.10" + volumes: + # Mount the project for tests + - ../..:/workspace + # Mount the TLS certificate for trusting smocker + - ./certs/cert.pem:/usr/local/share/ca-certificates/smocker.crt:ro + working_dir: /workspace + networks: + - integration-network + +networks: + integration-network: + driver: bridge + ipam: + config: + - subnet: 172.28.0.0/16 + +volumes: + localstack-data: diff --git a/testing/docker/generate-certs.sh b/testing/docker/generate-certs.sh new file mode 100755 index 00000000..02f7f7bf --- /dev/null +++ b/testing/docker/generate-certs.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Generate self-signed certificates for smocker TLS + +CERT_DIR="$(dirname "$0")/certs" +mkdir -p "$CERT_DIR" + +# Generate private key +openssl genrsa -out "$CERT_DIR/key.pem" 2048 2>/dev/null + +# Generate self-signed certificate +openssl req -new -x509 \ + -key "$CERT_DIR/key.pem" \ + -out "$CERT_DIR/cert.pem" \ + -days 365 \ + -subj "/CN=api.nullplatform.com" \ + -addext "subjectAltName=DNS:api.nullplatform.com,DNS:localhost" \ + 2>/dev/null + +echo "Certificates generated in $CERT_DIR" diff --git a/testing/docker/nginx.conf b/testing/docker/nginx.conf new file mode 100644 index 00000000..f3940af1 --- /dev/null +++ b/testing/docker/nginx.conf @@ -0,0 +1,83 @@ +events { + worker_connections 1024; +} + +http { + upstream smocker { + server smocker:8080; + } + + upstream azure_mock { + server azure-mock:8080; + } + + + # nullplatform API proxy + server { + listen 443 ssl; + server_name api.nullplatform.com; + + ssl_certificate /certs/cert.pem; + ssl_certificate_key /certs/key.pem; + + location / { + proxy_pass http://smocker; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + } + + # Azure Resource Manager API proxy + server { + listen 443 ssl; + server_name management.azure.com; + + ssl_certificate /certs/cert.pem; + ssl_certificate_key /certs/key.pem; + + location / { + proxy_pass http://azure_mock; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + } + + # Azure AD OAuth proxy + server { + listen 443 ssl; + server_name login.microsoftonline.com; + + ssl_certificate /certs/cert.pem; + ssl_certificate_key /certs/key.pem; + + location / { + proxy_pass http://azure_mock; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + } + + # Azure Blob Storage proxy (redirect to Azure Mock) + # Blob storage API is routed to azure-mock which handles it based on Host header + server { + listen 443 ssl; + server_name devstoreaccount1.blob.core.windows.net; + + ssl_certificate /certs/cert.pem; + ssl_certificate_key /certs/key.pem; + + location / { + proxy_pass http://azure_mock; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + } +} diff --git a/testing/integration_helpers.sh b/testing/integration_helpers.sh new file mode 100755 index 00000000..c8d620e3 --- /dev/null +++ b/testing/integration_helpers.sh @@ -0,0 +1,924 @@ +#!/bin/bash +# ============================================================================= +# Integration Test Helpers for BATS +# +# Provides helper functions for integration testing with cloud provider support. +# +# Usage in BATS test files: +# setup_file() { +# load "${PROJECT_ROOT}/testing/integration_helpers.sh" +# integration_setup --cloud-provider aws +# } +# +# teardown_file() { +# integration_teardown +# } +# +# Supported cloud providers: aws, azure, gcp +# ============================================================================= + +# ============================================================================= +# Colors +# ============================================================================= +INTEGRATION_RED='\033[0;31m' +INTEGRATION_GREEN='\033[0;32m' +INTEGRATION_YELLOW='\033[1;33m' +INTEGRATION_CYAN='\033[0;36m' +INTEGRATION_NC='\033[0m' + +# ============================================================================= +# Global State +# ============================================================================= +INTEGRATION_CLOUD_PROVIDER="${INTEGRATION_CLOUD_PROVIDER:-}" +INTEGRATION_COMPOSE_FILE="${INTEGRATION_COMPOSE_FILE:-}" + +# Determine module root from PROJECT_ROOT environment variable +# PROJECT_ROOT is set by the test runner (run_integration_tests.sh) +if [[ -z "${INTEGRATION_MODULE_ROOT:-}" ]]; then + INTEGRATION_MODULE_ROOT="${PROJECT_ROOT:-.}" +fi +export INTEGRATION_MODULE_ROOT + +# Default AWS/LocalStack configuration (can be overridden) +export LOCALSTACK_ENDPOINT="${LOCALSTACK_ENDPOINT:-http://localhost:4566}" +export MOTO_ENDPOINT="${MOTO_ENDPOINT:-http://localhost:5555}" +export AWS_ENDPOINT_URL="${AWS_ENDPOINT_URL:-$LOCALSTACK_ENDPOINT}" +export AWS_ACCESS_KEY_ID="${AWS_ACCESS_KEY_ID:-test}" +export AWS_SECRET_ACCESS_KEY="${AWS_SECRET_ACCESS_KEY:-test}" +export AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-east-1}" +export AWS_PAGER="" + +# Default Azure Mock configuration (can be overridden) +export AZURE_MOCK_ENDPOINT="${AZURE_MOCK_ENDPOINT:-http://localhost:8090}" +export ARM_CLIENT_ID="${ARM_CLIENT_ID:-mock-client-id}" +export ARM_CLIENT_SECRET="${ARM_CLIENT_SECRET:-mock-client-secret}" +export ARM_TENANT_ID="${ARM_TENANT_ID:-mock-tenant-id}" +export ARM_SUBSCRIPTION_ID="${ARM_SUBSCRIPTION_ID:-mock-subscription-id}" +export ARM_SKIP_PROVIDER_REGISTRATION="${ARM_SKIP_PROVIDER_REGISTRATION:-true}" + +# Smocker configuration for API mocking +export SMOCKER_HOST="${SMOCKER_HOST:-http://localhost:8081}" + +# ============================================================================= +# Setup & Teardown +# ============================================================================= + +integration_setup() { + local cloud_provider="" + + # Parse arguments + while [[ $# -gt 0 ]]; do + case $1 in + --cloud-provider) + cloud_provider="$2" + shift 2 + ;; + *) + echo -e "${INTEGRATION_RED}Unknown argument: $1${INTEGRATION_NC}" + return 1 + ;; + esac + done + + # Validate cloud provider + if [[ -z "$cloud_provider" ]]; then + echo -e "${INTEGRATION_RED}Error: --cloud-provider is required${INTEGRATION_NC}" + echo "Usage: integration_setup --cloud-provider " + return 1 + fi + + case "$cloud_provider" in + aws|azure|gcp) + INTEGRATION_CLOUD_PROVIDER="$cloud_provider" + ;; + *) + echo -e "${INTEGRATION_RED}Error: Unsupported cloud provider: $cloud_provider${INTEGRATION_NC}" + echo "Supported providers: aws, azure, gcp" + return 1 + ;; + esac + + export INTEGRATION_CLOUD_PROVIDER + + # Find docker-compose.yml + INTEGRATION_COMPOSE_FILE=$(find_compose_file) + export INTEGRATION_COMPOSE_FILE + + echo -e "${INTEGRATION_CYAN}Integration Setup${INTEGRATION_NC}" + echo " Cloud Provider: $INTEGRATION_CLOUD_PROVIDER" + echo " Module Root: $INTEGRATION_MODULE_ROOT" + echo "" + + # Call provider-specific setup + case "$INTEGRATION_CLOUD_PROVIDER" in + aws) + _setup_aws + ;; + azure) + _setup_azure + ;; + gcp) + _setup_gcp + ;; + esac +} + +integration_teardown() { + echo "" + echo -e "${INTEGRATION_CYAN}Integration Teardown${INTEGRATION_NC}" + + # Call provider-specific teardown + case "$INTEGRATION_CLOUD_PROVIDER" in + aws) + _teardown_aws + ;; + azure) + _teardown_azure + ;; + gcp) + _teardown_gcp + ;; + esac +} + +# ============================================================================= +# AWS Provider (LocalStack + Moto) +# ============================================================================= + +_setup_aws() { + echo " LocalStack: $LOCALSTACK_ENDPOINT" + echo " Moto: $MOTO_ENDPOINT" + echo "" + + # Configure OpenTofu/Terraform S3 backend for LocalStack + # These settings allow the S3 backend to work with LocalStack's S3 emulation + export TOFU_INIT_VARIABLES="${TOFU_INIT_VARIABLES:-}" + TOFU_INIT_VARIABLES="$TOFU_INIT_VARIABLES -backend-config=force_path_style=true" + TOFU_INIT_VARIABLES="$TOFU_INIT_VARIABLES -backend-config=skip_credentials_validation=true" + TOFU_INIT_VARIABLES="$TOFU_INIT_VARIABLES -backend-config=skip_metadata_api_check=true" + TOFU_INIT_VARIABLES="$TOFU_INIT_VARIABLES -backend-config=skip_region_validation=true" + TOFU_INIT_VARIABLES="$TOFU_INIT_VARIABLES -backend-config=endpoints={s3=\"$LOCALSTACK_ENDPOINT\",dynamodb=\"$LOCALSTACK_ENDPOINT\"}" + export TOFU_INIT_VARIABLES + + # Start containers if compose file exists + if [[ -n "$INTEGRATION_COMPOSE_FILE" ]]; then + _start_localstack + else + echo -e "${INTEGRATION_YELLOW}Warning: No docker-compose.yml found, skipping container startup${INTEGRATION_NC}" + fi +} + +_teardown_aws() { + if [[ -n "$INTEGRATION_COMPOSE_FILE" ]]; then + _stop_localstack + fi +} + +_start_localstack() { + echo -e " Starting LocalStack..." + docker compose -f "$INTEGRATION_COMPOSE_FILE" up -d 2>/dev/null + + echo -n " Waiting for LocalStack to be ready" + local max_attempts=30 + local attempt=0 + + while [[ $attempt -lt $max_attempts ]]; do + if curl -s "$LOCALSTACK_ENDPOINT/_localstack/health" 2>/dev/null | jq -e '.services.s3 == "running"' > /dev/null 2>&1; then + echo "" + echo -e " ${INTEGRATION_GREEN}LocalStack is ready${INTEGRATION_NC}" + echo "" + return 0 + fi + attempt=$((attempt + 1)) + sleep 2 + echo -n "." + done + + echo "" + echo -e " ${INTEGRATION_RED}LocalStack failed to start${INTEGRATION_NC}" + return 1 +} + +_stop_localstack() { + echo " Stopping LocalStack..." + docker compose -f "$INTEGRATION_COMPOSE_FILE" down -v 2>/dev/null || true +} + +# ============================================================================= +# Azure Provider (Azure Mock) +# ============================================================================= + +_setup_azure() { + echo " Azure Mock: $AZURE_MOCK_ENDPOINT" + echo "" + + # Azure tests use: + # - Azure Mock for ARM APIs (CDN, DNS, etc.) AND Blob Storage (terraform state) + # - nginx proxy to redirect *.blob.core.windows.net to Azure Mock + + # Install the self-signed certificate for nginx proxy + # This allows the Azure SDK to trust the proxy for blob storage + if [[ -f /usr/local/share/ca-certificates/smocker.crt ]]; then + echo -n " Installing TLS certificate..." + update-ca-certificates >/dev/null 2>&1 || true + # Also set for Python/requests (used by Azure CLI) + export REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt + export CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt + echo -e " ${INTEGRATION_GREEN}done${INTEGRATION_NC}" + fi + + # Start containers if compose file exists + if [[ -n "$INTEGRATION_COMPOSE_FILE" ]]; then + _start_azure_mock + else + echo -e "${INTEGRATION_YELLOW}Warning: No docker-compose.yml found, skipping container startup${INTEGRATION_NC}" + fi + + # Configure Azure CLI to work with mock + _configure_azure_cli +} + +_teardown_azure() { + if [[ -n "$INTEGRATION_COMPOSE_FILE" ]]; then + _stop_azure_mock + fi +} + +_start_azure_mock() { + echo -e " Starting Azure Mock..." + docker compose -f "$INTEGRATION_COMPOSE_FILE" up -d azure-mock nginx-proxy smocker 2>/dev/null + + # Wait for Azure Mock + echo -n " Waiting for Azure Mock to be ready" + local max_attempts=30 + local attempt=0 + + while [[ $attempt -lt $max_attempts ]]; do + if curl -s "$AZURE_MOCK_ENDPOINT/health" 2>/dev/null | jq -e '.status == "ok"' > /dev/null 2>&1; then + echo "" + echo -e " ${INTEGRATION_GREEN}Azure Mock is ready${INTEGRATION_NC}" + break + fi + attempt=$((attempt + 1)) + sleep 2 + echo -n "." + done + + if [[ $attempt -ge $max_attempts ]]; then + echo "" + echo -e " ${INTEGRATION_RED}Azure Mock failed to start${INTEGRATION_NC}" + return 1 + fi + + # Create tfstate container in Azure Mock (required by azurerm backend) + # The account name comes from Host header, path is just /{container} + echo -n " Creating tfstate container..." + curl -s -X PUT "${AZURE_MOCK_ENDPOINT}/tfstate?restype=container" \ + -H "Host: devstoreaccount1.blob.core.windows.net" \ + -H "x-ms-version: 2021-06-08" >/dev/null 2>&1 + echo -e " ${INTEGRATION_GREEN}done${INTEGRATION_NC}" + + # Wait for nginx proxy to be ready (handles blob storage redirect) + echo -n " Waiting for nginx proxy to be ready" + attempt=0 + + while [[ $attempt -lt $max_attempts ]]; do + if curl -sk "https://localhost:443/mocks" >/dev/null 2>&1; then + echo "" + echo -e " ${INTEGRATION_GREEN}nginx proxy is ready${INTEGRATION_NC}" + break + fi + attempt=$((attempt + 1)) + sleep 2 + echo -n "." + done + + if [[ $attempt -ge $max_attempts ]]; then + echo "" + echo -e " ${INTEGRATION_YELLOW}Warning: nginx proxy health check failed, continuing anyway${INTEGRATION_NC}" + fi + + echo "" + return 0 +} + +_stop_azure_mock() { + echo " Stopping Azure Mock..." + docker compose -f "$INTEGRATION_COMPOSE_FILE" down -v 2>/dev/null || true +} + +_configure_azure_cli() { + # Check if Azure CLI is available + if ! command -v az &>/dev/null; then + echo -e " ${INTEGRATION_YELLOW}Warning: Azure CLI not installed, skipping configuration${INTEGRATION_NC}" + return 0 + fi + + echo "" + echo -e " ${INTEGRATION_CYAN}Configuring Azure CLI...${INTEGRATION_NC}" + + local azure_dir="$HOME/.azure" + mkdir -p "$azure_dir" + + # Generate timestamps for token + local now=$(date +%s) + local exp=$((now + 86400)) # 24 hours from now + + # Create the azureProfile.json (subscription info) + cat > "$azure_dir/azureProfile.json" << EOF +{ + "installationId": "mock-installation-id", + "subscriptions": [ + { + "id": "${ARM_SUBSCRIPTION_ID}", + "name": "Mock Subscription", + "state": "Enabled", + "user": { + "name": "${ARM_CLIENT_ID}", + "type": "servicePrincipal" + }, + "isDefault": true, + "tenantId": "${ARM_TENANT_ID}", + "environmentName": "AzureCloud" + } + ] +} +EOF + + # Create the service principal secret storage file + # This is where Azure CLI stores secrets for service principals after login + # Format must match what Azure CLI identity.py expects (uses 'tenant' not 'tenant_id') + cat > "$azure_dir/service_principal_entries.json" << EOF +[ + { + "client_id": "${ARM_CLIENT_ID}", + "tenant": "${ARM_TENANT_ID}", + "client_secret": "${ARM_CLIENT_SECRET}" + } +] +EOF + + # Set proper permissions + chmod 600 "$azure_dir"/*.json + + echo -e " ${INTEGRATION_GREEN}Azure CLI configured with mock credentials${INTEGRATION_NC}" + return 0 +} + +# ============================================================================= +# GCP Provider (Fake GCS Server) - Placeholder +# ============================================================================= + +_setup_gcp() { + echo -e "${INTEGRATION_YELLOW}GCP provider setup not yet implemented${INTEGRATION_NC}" + echo " Fake GCS Server endpoint would be configured here" + echo "" +} + +_teardown_gcp() { + echo -e "${INTEGRATION_YELLOW}GCP provider teardown not yet implemented${INTEGRATION_NC}" +} + +# ============================================================================= +# Utility Functions +# ============================================================================= + +find_compose_file() { + local search_paths=( + "${BATS_TEST_DIRNAME:-}/docker-compose.yml" + "${BATS_TEST_DIRNAME:-}/../docker-compose.yml" + "${INTEGRATION_MODULE_ROOT}/tests/integration/docker-compose.yml" + ) + + for path in "${search_paths[@]}"; do + if [[ -f "$path" ]]; then + echo "$path" + return 0 + fi + done + + # Return success with empty output - compose file is optional + # (containers may already be managed by the test runner) + return 0 +} + +# ============================================================================= +# AWS Local Commands +# ============================================================================= + +# Execute AWS CLI against LocalStack +aws_local() { + aws --endpoint-url="$LOCALSTACK_ENDPOINT" --no-cli-pager --no-cli-auto-prompt "$@" +} + +# Execute AWS CLI against Moto (for CloudFront) +aws_moto() { + aws --endpoint-url="$MOTO_ENDPOINT" --no-cli-pager --no-cli-auto-prompt "$@" +} + +# ============================================================================= +# Azure Mock Commands +# ============================================================================= + +# Execute a GET request against Azure Mock API +# Usage: azure_mock "/subscriptions/sub-id/resourceGroups/rg/providers/Microsoft.Cdn/profiles/profile-name" +azure_mock() { + local path="$1" + curl -s "${AZURE_MOCK_ENDPOINT}${path}" 2>/dev/null +} + +# Execute a PUT request against Azure Mock API +# Usage: azure_mock_put "/path" '{"json": "body"}' +azure_mock_put() { + local path="$1" + local body="$2" + curl -s -X PUT "${AZURE_MOCK_ENDPOINT}${path}" \ + -H "Content-Type: application/json" \ + -d "$body" 2>/dev/null +} + +# Execute a DELETE request against Azure Mock API +# Usage: azure_mock_delete "/path" +azure_mock_delete() { + local path="$1" + curl -s -X DELETE "${AZURE_MOCK_ENDPOINT}${path}" 2>/dev/null +} + +# ============================================================================= +# Workflow Execution +# ============================================================================= + +# Run a nullplatform workflow +# Usage: run_workflow "deployment/workflows/initial.yaml" +run_workflow() { + local workflow="$1" + local full_path + + # Resolve path relative to module root + if [[ "$workflow" = /* ]]; then + full_path="$workflow" + else + full_path="$INTEGRATION_MODULE_ROOT/$workflow" + fi + + echo -e "${INTEGRATION_CYAN}Running workflow:${INTEGRATION_NC} $workflow" + np service workflow exec --workflow "$full_path" +} + +# ============================================================================= +# Context Helpers +# ============================================================================= + +# Load context from a JSON file +# Usage: load_context "resources/context.json" +load_context() { + local context_file="$1" + local full_path + + # Resolve path relative to module root + if [[ "$context_file" = /* ]]; then + full_path="$context_file" + else + full_path="$INTEGRATION_MODULE_ROOT/$context_file" + fi + + if [[ ! -f "$full_path" ]]; then + echo -e "${INTEGRATION_RED}Context file not found: $full_path${INTEGRATION_NC}" + return 1 + fi + + export CONTEXT=$(cat "$full_path") + echo -e " ${INTEGRATION_CYAN}Loaded context from:${INTEGRATION_NC} $context_file" +} + +# Override a value in the current CONTEXT +# Usage: override_context "providers.networking.zone_id" "Z1234567890" +override_context() { + local key="$1" + local value="$2" + + if [[ -z "$CONTEXT" ]]; then + echo -e "${INTEGRATION_RED}Error: CONTEXT is not set. Call load_context first.${INTEGRATION_NC}" + return 1 + fi + + CONTEXT=$(echo "$CONTEXT" | jq --arg k "$key" --arg v "$value" 'setpath($k | split("."); $v)') + export CONTEXT +} + +# ============================================================================= +# Generic Assertions +# ============================================================================= + +# Assert command succeeds +# Usage: assert_success "aws s3 ls" +assert_success() { + local cmd="$1" + local description="${2:-Command succeeds}" + echo -ne " ${INTEGRATION_CYAN}Assert:${INTEGRATION_NC} ${description} ... " + + if eval "$cmd" >/dev/null 2>&1; then + _assert_result "true" + else + _assert_result "false" + return 1 + fi +} + +# Assert command fails +# Usage: assert_failure "aws s3api head-bucket --bucket nonexistent" +assert_failure() { + local cmd="$1" + local description="${2:-Command fails}" + echo -ne " ${INTEGRATION_CYAN}Assert:${INTEGRATION_NC} ${description} ... " + + if eval "$cmd" >/dev/null 2>&1; then + _assert_result "false" + return 1 + else + _assert_result "true" + fi +} + +# Assert output contains string +# Usage: result=$(some_command); assert_contains "$result" "expected" +assert_contains() { + local haystack="$1" + local needle="$2" + local description="${3:-Output contains '$needle'}" + echo -ne " ${INTEGRATION_CYAN}Assert:${INTEGRATION_NC} ${description} ... " + + if [[ "$haystack" == *"$needle"* ]]; then + _assert_result "true" + else + _assert_result "false" + return 1 + fi +} + +# Assert values are equal +# Usage: assert_equals "$actual" "$expected" "Values match" +assert_equals() { + local actual="$1" + local expected="$2" + local description="${3:-Values are equal}" + echo -ne " ${INTEGRATION_CYAN}Assert:${INTEGRATION_NC} ${description} ... " + + if [[ "$actual" == "$expected" ]]; then + _assert_result "true" + else + _assert_result "false" + echo " Expected: $expected" + echo " Actual: $actual" + return 1 + fi +} + +# ============================================================================= +# API Mocking (Smocker) +# +# Smocker is used to mock the nullplatform API (api.nullplatform.com). +# Tests run in a container where api.nullplatform.com resolves to smocker. +# ============================================================================= + +# Clear all mocks from smocker and set up default mocks +# Usage: clear_mocks +clear_mocks() { + curl -s -X POST "${SMOCKER_HOST}/reset" >/dev/null 2>&1 + # Set up default mocks that are always needed + _setup_default_mocks +} + +# Set up default mocks that are always needed for np CLI +# These are internal API calls that np CLI makes automatically +_setup_default_mocks() { + # Token endpoint - np CLI always authenticates before making API calls + local token_mock + token_mock=$(cat <<'EOF' +[{ + "request": { + "method": "POST", + "path": "/token" + }, + "response": { + "status": 200, + "headers": {"Content-Type": "application/json"}, + "body": "{\"access_token\": \"test-integration-token\", \"token_type\": \"Bearer\", \"expires_in\": 3600}" + } +}] +EOF +) + curl -s -X POST "${SMOCKER_HOST}/mocks" \ + -H "Content-Type: application/json" \ + -d "$token_mock" >/dev/null 2>&1 +} + +# Mock an API request +# Usage with file: mock_request "GET" "/providers/123" "responses/provider.json" +# Usage inline: mock_request "POST" "/deployments" 201 '{"id": "new-dep"}' +# +# File format (JSON): +# { +# "status": 200, +# "headers": {"Content-Type": "application/json"}, // optional +# "body": { ... } +# } +mock_request() { + local method="$1" + local path="$2" + local status_or_file="$3" + local body="$4" + + local status + local response_body + local headers='{"Content-Type": "application/json"}' + + # Check if third argument is a file or a status code + if [[ -f "$status_or_file" ]]; then + # File mode - read status and body from file + local file_content + file_content=$(cat "$status_or_file") + status=$(echo "$file_content" | jq -r '.status // 200') + response_body=$(echo "$file_content" | jq -c '.body // {}') + local file_headers + file_headers=$(echo "$file_content" | jq -c '.headers // null') + if [[ "$file_headers" != "null" ]]; then + headers="$file_headers" + fi + elif [[ -f "${INTEGRATION_MODULE_ROOT}/$status_or_file" ]]; then + # File mode with relative path + local file_content + file_content=$(cat "${INTEGRATION_MODULE_ROOT}/$status_or_file") + status=$(echo "$file_content" | jq -r '.status // 200') + response_body=$(echo "$file_content" | jq -c '.body // {}') + local file_headers + file_headers=$(echo "$file_content" | jq -c '.headers // null') + if [[ "$file_headers" != "null" ]]; then + headers="$file_headers" + fi + else + # Inline mode - status code and body provided directly + status="$status_or_file" + response_body="$body" + fi + + # Build smocker mock definition + # Note: Smocker expects body as a string, not a JSON object + local mock_definition + mock_definition=$(jq -n \ + --arg method "$method" \ + --arg path "$path" \ + --argjson status "$status" \ + --arg body "$response_body" \ + --argjson headers "$headers" \ + '[{ + "request": { + "method": $method, + "path": $path + }, + "response": { + "status": $status, + "headers": $headers, + "body": $body + } + }]') + + # Register mock with smocker + local result + local http_code + http_code=$(curl -s -w "%{http_code}" -o /tmp/smocker_response.json -X POST "${SMOCKER_HOST}/mocks" \ + -H "Content-Type: application/json" \ + -d "$mock_definition" 2>&1) + result=$(cat /tmp/smocker_response.json 2>/dev/null) + + if [[ "$http_code" != "200" ]]; then + local error_msg + error_msg=$(echo "$result" | jq -r '.message // "Unknown error"' 2>/dev/null) + echo -e "${INTEGRATION_RED}Failed to register mock (HTTP ${http_code}): ${error_msg}${INTEGRATION_NC}" + return 1 + fi + + echo -e " ${INTEGRATION_CYAN}Mock:${INTEGRATION_NC} ${method} ${path} -> ${status}" +} + +# Mock a request with query parameters +# Usage: mock_request_with_query "GET" "/providers" "type=assets-repository" 200 '[...]' +mock_request_with_query() { + local method="$1" + local path="$2" + local query="$3" + local status="$4" + local body="$5" + + local mock_definition + mock_definition=$(jq -n \ + --arg method "$method" \ + --arg path "$path" \ + --arg query "$query" \ + --argjson status "$status" \ + --arg body "$body" \ + '[{ + "request": { + "method": $method, + "path": $path, + "query_params": ($query | split("&") | map(split("=") | {(.[0]): [.[1]]}) | add) + }, + "response": { + "status": $status, + "headers": {"Content-Type": "application/json"}, + "body": $body + } + }]') + + curl -s -X POST "${SMOCKER_HOST}/mocks" \ + -H "Content-Type: application/json" \ + -d "$mock_definition" >/dev/null 2>&1 + + echo -e " ${INTEGRATION_CYAN}Mock:${INTEGRATION_NC} ${method} ${path}?${query} -> ${status}" +} + +# Verify that a mock was called +# Usage: assert_mock_called "GET" "/providers/123" +assert_mock_called() { + local method="$1" + local path="$2" + echo -ne " ${INTEGRATION_CYAN}Assert:${INTEGRATION_NC} ${method} ${path} was called ... " + + local history + history=$(curl -s "${SMOCKER_HOST}/history" 2>/dev/null) + + local called + called=$(echo "$history" | jq -r \ + --arg method "$method" \ + --arg path "$path" \ + '[.[] | select(.request.method == $method and .request.path == $path)] | length') + + if [[ "$called" -gt 0 ]]; then + _assert_result "true" + else + _assert_result "false" + return 1 + fi +} + +# Get the number of times a mock was called +# Usage: count=$(mock_call_count "GET" "/providers/123") +mock_call_count() { + local method="$1" + local path="$2" + + local history + history=$(curl -s "${SMOCKER_HOST}/history" 2>/dev/null) + + echo "$history" | jq -r \ + --arg method "$method" \ + --arg path "$path" \ + '[.[] | select(.request.method == $method and .request.path == $path)] | length' +} + +# ============================================================================= +# Help / Documentation +# ============================================================================= + +# Display help for all available integration test utilities +test_help() { + cat <<'EOF' +================================================================================ + Integration Test Helpers Reference +================================================================================ + +SETUP & TEARDOWN +---------------- + integration_setup --cloud-provider + Initialize integration test environment for the specified cloud provider. + Call this in setup_file(). + + integration_teardown + Clean up integration test environment. + Call this in teardown_file(). + +AWS LOCAL COMMANDS +------------------ + aws_local + Execute AWS CLI against LocalStack (S3, Route53, DynamoDB, etc.) + Example: aws_local s3 ls + + aws_moto + Execute AWS CLI against Moto (CloudFront) + Example: aws_moto cloudfront list-distributions + +AZURE MOCK COMMANDS +------------------- + azure_mock "" + Execute a GET request against Azure Mock API. + Example: azure_mock "/subscriptions/sub-id/resourceGroups/rg/providers/Microsoft.Cdn/profiles/my-profile" + + azure_mock_put "" '' + Execute a PUT request against Azure Mock API. + Example: azure_mock_put "/subscriptions/.../profiles/my-profile" '{"location": "eastus"}' + + azure_mock_delete "" + Execute a DELETE request against Azure Mock API. + Example: azure_mock_delete "/subscriptions/.../profiles/my-profile" + +WORKFLOW EXECUTION +------------------ + run_workflow "" + Run a nullplatform workflow file. + Path is relative to module root. + Example: run_workflow "frontend/deployment/workflows/initial.yaml" + +CONTEXT HELPERS +--------------- + load_context "" + Load a context JSON file into the CONTEXT environment variable. + Example: load_context "tests/resources/context.json" + + override_context "" "" + Override a value in the current CONTEXT. + Example: override_context "providers.networking.zone_id" "Z1234567890" + +API MOCKING (Smocker) +--------------------- + clear_mocks + Clear all mocks and set up default mocks (token endpoint). + Call this at the start of each test. + + mock_request "" "" "" + Mock an API request using a response file. + File format: { "status": 200, "body": {...} } + Example: mock_request "GET" "/provider/123" "mocks/provider.json" + + mock_request "" "" '' + Mock an API request with inline response. + Example: mock_request "POST" "/deployments" 201 '{"id": "new"}' + + mock_request_with_query "" "" "" '' + Mock a request with query parameters. + Example: mock_request_with_query "GET" "/items" "type=foo" 200 '[...]' + + assert_mock_called "" "" + Assert that a mock endpoint was called. + Example: assert_mock_called "GET" "/provider/123" + + mock_call_count "" "" + Get the number of times a mock was called. + Example: count=$(mock_call_count "GET" "/provider/123") + +AWS ASSERTIONS +-------------- + assert_s3_bucket_exists "" + Assert an S3 bucket exists in LocalStack. + + assert_s3_bucket_not_exists "" + Assert an S3 bucket does not exist. + + assert_cloudfront_exists "" + Assert a CloudFront distribution exists (matched by comment). + + assert_cloudfront_not_exists "" + Assert a CloudFront distribution does not exist. + + assert_route53_record_exists "" "" + Assert a Route53 record exists. + Example: assert_route53_record_exists "app.example.com" "A" + + assert_route53_record_not_exists "" "" + Assert a Route53 record does not exist. + + assert_dynamodb_table_exists "" + Assert a DynamoDB table exists. + + assert_dynamodb_table_not_exists "" + Assert a DynamoDB table does not exist. + +GENERIC ASSERTIONS +------------------ + assert_success "" [""] + Assert a command succeeds (exit code 0). + + assert_failure "" [""] + Assert a command fails (non-zero exit code). + + assert_contains "" "" [""] + Assert a string contains a substring. + + assert_equals "" "" [""] + Assert two values are equal. + +ENVIRONMENT VARIABLES +--------------------- + LOCALSTACK_ENDPOINT LocalStack URL (default: http://localhost:4566) + MOTO_ENDPOINT Moto URL (default: http://localhost:5555) + AZURE_MOCK_ENDPOINT Azure Mock URL (default: http://localhost:8090) + SMOCKER_HOST Smocker admin URL (default: http://localhost:8081) + AWS_ENDPOINT_URL AWS endpoint for CLI (default: $LOCALSTACK_ENDPOINT) + ARM_CLIENT_ID Azure client ID for mock (default: mock-client-id) + ARM_CLIENT_SECRET Azure client secret for mock (default: mock-client-secret) + ARM_TENANT_ID Azure tenant ID for mock (default: mock-tenant-id) + ARM_SUBSCRIPTION_ID Azure subscription ID for mock (default: mock-subscription-id) + INTEGRATION_MODULE_ROOT Root directory of the module being tested + +================================================================================ +EOF +} diff --git a/testing/localstack-provider/provider_override.tf b/testing/localstack-provider/provider_override.tf new file mode 100644 index 00000000..587982c2 --- /dev/null +++ b/testing/localstack-provider/provider_override.tf @@ -0,0 +1,38 @@ +# Override file for LocalStack + Moto testing +# This file is copied into the module directory during integration tests +# to configure the AWS provider to use mock endpoints +# +# LocalStack (port 4566): S3, Route53, STS, IAM, DynamoDB, ACM +# Moto (port 5000): CloudFront + +# Set CloudFront endpoint for AWS CLI commands (used by cache invalidation) +variable "distribution_cloudfront_endpoint_url" { + default = "http://moto:5000" +} + +provider "aws" { + region = var.aws_provider.region + access_key = "test" + secret_key = "test" + skip_credentials_validation = true + skip_metadata_api_check = true + skip_requesting_account_id = true + + endpoints { + # LocalStack services (using Docker service name) + s3 = "http://localstack:4566" + route53 = "http://localstack:4566" + sts = "http://localstack:4566" + iam = "http://localstack:4566" + dynamodb = "http://localstack:4566" + acm = "http://localstack:4566" + # Moto services (CloudFront not in LocalStack free tier) + cloudfront = "http://moto:5000" + } + + default_tags { + tags = var.provider_resource_tags_json + } + + s3_use_path_style = true +} diff --git a/testing/run_bats_tests.sh b/testing/run_bats_tests.sh new file mode 100755 index 00000000..d17384e6 --- /dev/null +++ b/testing/run_bats_tests.sh @@ -0,0 +1,194 @@ +#!/bin/bash +# ============================================================================= +# Test runner for all BATS tests across all modules +# +# Usage: +# ./testing/run_bats_tests.sh # Run all tests +# ./testing/run_bats_tests.sh frontend # Run tests for frontend module only +# ./testing/run_bats_tests.sh frontend/deployment/tests # Run specific test directory +# ============================================================================= + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$PROJECT_ROOT" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +NC='\033[0m' + +# Track failed tests globally +FAILED_TESTS=() +CURRENT_TEST_FILE="" + +# Check if bats is installed +if ! command -v bats &> /dev/null; then + echo -e "${RED}bats-core is not installed${NC}" + echo "" + echo "Install with:" + echo " brew install bats-core # macOS" + echo " apt install bats # Ubuntu/Debian" + echo " apk add bats # Alpine" + echo " choco install bats # Windows" + exit 1 +fi + +# Check if jq is installed +if ! command -v jq &> /dev/null; then + echo -e "${RED}jq is not installed${NC}" + echo "" + echo "Install with:" + echo " brew install jq # macOS" + echo " apt install jq # Ubuntu/Debian" + echo " apk add jq # Alpine" + echo " choco install jq # Windows" + exit 1 +fi + +# Find all test directories +find_test_dirs() { + find . -mindepth 3 -maxdepth 3 -type d -name "tests" -not -path "*/node_modules/*" 2>/dev/null | sort +} + +# Get module name from test path +get_module_name() { + local path="$1" + echo "$path" | sed 's|^\./||' | cut -d'/' -f1 +} + +# Run tests for a specific directory +run_tests_in_dir() { + local test_dir="$1" + local module_name + module_name=$(get_module_name "$test_dir") + + # Find all .bats files, excluding integration directory (integration tests are run separately) + local bats_files + bats_files=$(find "$test_dir" -name "*.bats" -not -path "*/integration/*" 2>/dev/null) + + if [ -z "$bats_files" ]; then + return 0 + fi + + echo -e "${CYAN}[$module_name]${NC} Running BATS tests in $test_dir" + echo "" + + # Create temp file to capture output + local temp_output + temp_output=$(mktemp) + + local exit_code=0 + ( + cd "$test_dir" + # Use script to force TTY for colored output + # Exclude integration directory - those tests are run by run_integration_tests.sh + # --print-output-on-failure: only show test output when a test fails + script -q /dev/null bats --formatter pretty --print-output-on-failure $(find . -name "*.bats" -not -path "*/integration/*" | sort) + ) 2>&1 | tee "$temp_output" || exit_code=$? + + # Extract failed tests from output + # Strip all ANSI escape codes (colors, cursor movements, etc.) + local clean_output + clean_output=$(perl -pe 's/\e\[[0-9;]*[a-zA-Z]//g; s/\e\][^\a]*\a//g' "$temp_output" 2>/dev/null || cat "$temp_output") + + local current_file="" + while IFS= read -r line; do + # Track current test file (lines containing .bats without test markers) + if [[ "$line" == *".bats"* ]] && [[ "$line" != *"✗"* ]] && [[ "$line" != *"✓"* ]]; then + # Extract the file path (e.g., network/route53/setup_test.bats) + current_file=$(echo "$line" | grep -oE '[a-zA-Z0-9_/.-]+\.bats' | head -1) + fi + + # Find failed test lines + if [[ "$line" == *"✗"* ]]; then + # Extract test name: get text after ✗, clean up any remaining control chars + local failed_test_name + failed_test_name=$(echo "$line" | sed 's/.*✗[[:space:]]*//' | sed 's/[[:space:]]*$//' | tr -d '\r') + # Only add if we got a valid test name + if [[ -n "$failed_test_name" ]]; then + FAILED_TESTS+=("${module_name}|${current_file}|${failed_test_name}") + fi + fi + done <<< "$clean_output" + + rm -f "$temp_output" + echo "" + + return $exit_code +} + +echo "" +echo "========================================" +echo " BATS Tests (Unit)" +echo "========================================" +echo "" + +# Print available test helpers reference +source "$SCRIPT_DIR/assertions.sh" +test_help +echo "" + +# Export BASH_ENV to auto-source assertions.sh in all bats test subshells +export BASH_ENV="$SCRIPT_DIR/assertions.sh" + +HAS_FAILURES=0 + +if [ -n "$1" ]; then + # Run tests for specific module or directory + if [ -d "$1" ] && [[ "$1" == *"/tests"* || "$1" == *"/tests" ]]; then + # Direct test directory path + run_tests_in_dir "$1" || HAS_FAILURES=1 + elif [ -d "$1" ]; then + # Module name (e.g., "frontend") - find all test directories under it + module_test_dirs=$(find "$1" -mindepth 2 -maxdepth 2 -type d -name "tests" 2>/dev/null | sort) + if [ -z "$module_test_dirs" ]; then + echo -e "${RED}No test directories found in: $1${NC}" + exit 1 + fi + for test_dir in $module_test_dirs; do + run_tests_in_dir "$test_dir" || HAS_FAILURES=1 + done + else + echo -e "${RED}Directory not found: $1${NC}" + echo "" + echo "Available modules with tests:" + for dir in $(find_test_dirs); do + echo " - $(get_module_name "$dir")" + done | sort -u + exit 1 + fi +else + # Run all tests + test_dirs=$(find_test_dirs) + + if [ -z "$test_dirs" ]; then + echo -e "${YELLOW}No test directories found${NC}" + exit 0 + fi + + for test_dir in $test_dirs; do + run_tests_in_dir "$test_dir" || HAS_FAILURES=1 + done +fi + +# Show summary of failed tests +if [ ${#FAILED_TESTS[@]} -gt 0 ]; then + echo "" + echo "========================================" + echo " Failed Tests Summary" + echo "========================================" + echo "" + for failed_test in "${FAILED_TESTS[@]}"; do + # Parse module|file|test_name format + module_name=$(echo "$failed_test" | cut -d'|' -f1) + file_name=$(echo "$failed_test" | cut -d'|' -f2) + test_name=$(echo "$failed_test" | cut -d'|' -f3) + echo -e " ${RED}✗${NC} ${CYAN}[$module_name]${NC} ${RED}$file_name${NC} $test_name" + done + echo "" + exit 1 +fi + +echo -e "${GREEN}All BATS tests passed!${NC}" diff --git a/testing/run_integration_tests.sh b/testing/run_integration_tests.sh new file mode 100755 index 00000000..0a020f60 --- /dev/null +++ b/testing/run_integration_tests.sh @@ -0,0 +1,223 @@ +#!/bin/bash +# ============================================================================= +# Test runner for all integration tests (BATS) across all modules +# +# Tests run inside a Docker container with: +# - LocalStack for AWS emulation +# - Moto for CloudFront emulation +# - Smocker for nullplatform API mocking +# +# Usage: +# ./testing/run_integration_tests.sh # Run all tests +# ./testing/run_integration_tests.sh frontend # Run tests for frontend module only +# ./testing/run_integration_tests.sh --build # Rebuild containers before running +# ./testing/run_integration_tests.sh -v|--verbose # Show output of passing tests +# ============================================================================= + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$PROJECT_ROOT" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +NC='\033[0m' + +# Parse arguments +MODULE="" +BUILD_FLAG="" +VERBOSE="" + +for arg in "$@"; do + case $arg in + --build) + BUILD_FLAG="--build" + ;; + -v|--verbose) + VERBOSE="--show-output-of-passing-tests" + ;; + *) + MODULE="$arg" + ;; + esac +done + +# Docker compose file location +COMPOSE_FILE="$SCRIPT_DIR/docker/docker-compose.integration.yml" + +# Check if docker is installed +if ! command -v docker &> /dev/null; then + echo -e "${RED}docker is not installed${NC}" + echo "" + echo "Install with:" + echo " brew install docker # macOS" + echo " apt install docker.io # Ubuntu/Debian" + echo " apk add docker # Alpine" + echo " choco install docker # Windows" + exit 1 +fi + +# Check if docker compose file exists +if [ ! -f "$COMPOSE_FILE" ]; then + echo -e "${RED}Docker compose file not found: $COMPOSE_FILE${NC}" + exit 1 +fi + +# Generate certificates if they don't exist +CERT_DIR="$SCRIPT_DIR/docker/certs" +if [ ! -f "$CERT_DIR/cert.pem" ] || [ ! -f "$CERT_DIR/key.pem" ]; then + echo -e "${CYAN}Generating TLS certificates...${NC}" + "$SCRIPT_DIR/docker/generate-certs.sh" +fi + +# Find all integration test directories +find_test_dirs() { + find . -type d -name "integration" -path "*/tests/*" -not -path "*/node_modules/*" 2>/dev/null | sort +} + +# Get module name from test path +get_module_name() { + local path="$1" + echo "$path" | sed 's|^\./||' | cut -d'/' -f1 +} + +# Cleanup function +cleanup() { + echo "" + echo -e "${CYAN}Stopping containers...${NC}" + docker compose -f "$COMPOSE_FILE" down -v 2>/dev/null || true +} + +echo "" +echo "========================================" +echo " Integration Tests (Containerized)" +echo "========================================" +echo "" + +# Print available test helpers reference +source "$SCRIPT_DIR/integration_helpers.sh" +test_help +echo "" + +# Set trap for cleanup +trap cleanup EXIT + +# Build test runner and azure-mock images if needed +echo -e "${CYAN}Building containers...${NC}" +docker compose -f "$COMPOSE_FILE" build $BUILD_FLAG test-runner azure-mock 2>&1 | grep -v "^$" || true +echo "" + +# Start infrastructure services +echo -e "${CYAN}Starting infrastructure services...${NC}" +docker compose -f "$COMPOSE_FILE" up -d localstack moto azure-mock smocker nginx-proxy 2>&1 | grep -v "^$" || true + +# Wait for services to be healthy +echo -n "Waiting for services to be ready" +max_attempts=30 +attempt=0 + +while [ $attempt -lt $max_attempts ]; do + # Check health via curl (most reliable) + localstack_ok=$(curl -s "http://localhost:4566/_localstack/health" 2>/dev/null | jq -e '.services.s3 == "running"' >/dev/null 2>&1 && echo "yes" || echo "no") + moto_ok=$(curl -s "http://localhost:5555/moto-api/" >/dev/null 2>&1 && echo "yes" || echo "no") + azure_mock_ok=$(curl -s "http://localhost:8090/health" 2>/dev/null | jq -e '.status == "ok"' >/dev/null 2>&1 && echo "yes" || echo "no") + smocker_ok=$(curl -s "http://localhost:8081/version" >/dev/null 2>&1 && echo "yes" || echo "no") + nginx_ok=$(curl -sk "https://localhost:8443/mocks" >/dev/null 2>&1 && echo "yes" || echo "no") + + if [[ "$localstack_ok" == "yes" ]] && [[ "$moto_ok" == "yes" ]] && [[ "$azure_mock_ok" == "yes" ]] && [[ "$smocker_ok" == "yes" ]] && [[ "$nginx_ok" == "yes" ]]; then + echo "" + echo -e "${GREEN}All services ready${NC}" + break + fi + + attempt=$((attempt + 1)) + sleep 2 + echo -n "." +done + +if [ $attempt -eq $max_attempts ]; then + echo "" + echo -e "${RED}Services failed to start${NC}" + docker compose -f "$COMPOSE_FILE" logs + exit 1 +fi + +echo "" + +# Get smocker container IP for DNS resolution +SMOCKER_IP=$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' integration-smocker 2>/dev/null || echo "172.28.0.10") +export SMOCKER_IP + +# Determine which tests to run +if [ -n "$MODULE" ]; then + if [ -d "$MODULE" ]; then + TEST_PATHS=$(find "$MODULE" -type d -name "integration" -path "*/tests/*" 2>/dev/null | sort) + if [ -z "$TEST_PATHS" ]; then + echo -e "${RED}No integration test directories found in: $MODULE${NC}" + exit 1 + fi + else + echo -e "${RED}Directory not found: $MODULE${NC}" + echo "" + echo "Available modules with integration tests:" + for dir in $(find_test_dirs); do + echo " - $(get_module_name "$dir")" + done | sort -u + exit 1 + fi +else + TEST_PATHS=$(find_test_dirs) + if [ -z "$TEST_PATHS" ]; then + echo -e "${YELLOW}No integration test directories found${NC}" + exit 0 + fi +fi + +# Run tests for each directory +TOTAL_FAILED=0 + +for test_dir in $TEST_PATHS; do + module_name=$(get_module_name "$test_dir") + + # Find .bats files recursively (supports test_cases/ subfolder structure) + bats_files=$(find "$test_dir" -name "*.bats" 2>/dev/null | sort) + if [ -z "$bats_files" ]; then + continue + fi + + echo -e "${CYAN}[$module_name]${NC} Running integration tests in $test_dir" + echo "" + + # Strip leading ./ from test_dir for cleaner paths + container_test_dir="${test_dir#./}" + + # Build list of test files for bats (space-separated, container paths) + container_bats_files="" + for bats_file in $bats_files; do + container_path="/workspace/${bats_file#./}" + container_bats_files="$container_bats_files $container_path" + done + + # Run tests inside the container + docker compose -f "$COMPOSE_FILE" run --rm \ + -e PROJECT_ROOT=/workspace \ + -e SMOCKER_HOST=http://smocker:8081 \ + -e LOCALSTACK_ENDPOINT=http://localstack:4566 \ + -e MOTO_ENDPOINT=http://moto:5000 \ + -e AWS_ENDPOINT_URL=http://localstack:4566 \ + test-runner \ + -c "update-ca-certificates 2>/dev/null; bats --formatter pretty $VERBOSE $container_bats_files" || TOTAL_FAILED=$((TOTAL_FAILED + 1)) + + echo "" +done + +if [ $TOTAL_FAILED -gt 0 ]; then + echo -e "${RED}Some integration tests failed${NC}" + exit 1 +else + echo -e "${GREEN}All integration tests passed!${NC}" +fi diff --git a/testing/run_tofu_tests.sh b/testing/run_tofu_tests.sh new file mode 100755 index 00000000..1c1ee77f --- /dev/null +++ b/testing/run_tofu_tests.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# ============================================================================= +# Test runner for all OpenTofu/Terraform tests across all modules +# +# Usage: +# ./testing/run_tofu_tests.sh # Run all tests +# ./testing/run_tofu_tests.sh frontend # Run tests for frontend module only +# ./testing/run_tofu_tests.sh frontend/deployment/provider/aws/modules # Run specific test directory +# ============================================================================= + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$PROJECT_ROOT" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +NC='\033[0m' + +# Check if tofu is installed +if ! command -v tofu &> /dev/null; then + echo -e "${RED}OpenTofu is not installed${NC}" + echo "" + echo "Install with:" + echo " brew install opentofu # macOS" + echo " apt install tofu # Ubuntu/Debian" + echo " apk add opentofu # Alpine" + echo " choco install opentofu # Windows" + echo "" + echo "See https://opentofu.org/docs/intro/install/" + exit 1 +fi + +# Find all directories with .tftest.hcl files +find_test_dirs() { + find . -name "*.tftest.hcl" -not -path "*/node_modules/*" 2>/dev/null | xargs -I{} dirname {} | sort -u +} + +# Get module name from test path +get_module_name() { + local path="$1" + echo "$path" | sed 's|^\./||' | cut -d'/' -f1 +} + +# Run tests for a specific directory +run_tests_in_dir() { + local test_dir="$1" + local module_name=$(get_module_name "$test_dir") + + # Check if there are .tftest.hcl files + if ! ls "$test_dir"/*.tftest.hcl &>/dev/null; then + return 0 + fi + + echo -e "${CYAN}[$module_name]${NC} Running OpenTofu tests in $test_dir" + echo "" + + ( + cd "$test_dir" + + # Initialize if needed (without backend) + if [ ! -d ".terraform" ]; then + tofu init -backend=false -input=false >/dev/null 2>&1 || true + fi + + # Run tests + tofu test + ) + + echo "" +} + +echo "" +echo "========================================" +echo " OpenTofu Tests" +echo "========================================" +echo "" + +if [ -n "$1" ]; then + # Run tests for specific module or directory + if [ -d "$1" ] && ls "$1"/*.tftest.hcl &>/dev/null; then + # Direct test directory path with .tftest.hcl files + run_tests_in_dir "$1" + elif [ -d "$1" ]; then + # Module name (e.g., "frontend") - find all test directories under it + module_test_dirs=$(find "$1" -name "*.tftest.hcl" 2>/dev/null | xargs -I{} dirname {} | sort -u) + if [ -z "$module_test_dirs" ]; then + echo -e "${RED}No OpenTofu test files found in: $1${NC}" + exit 1 + fi + for test_dir in $module_test_dirs; do + run_tests_in_dir "$test_dir" + done + else + echo -e "${RED}Directory not found: $1${NC}" + echo "" + echo "Available modules with OpenTofu tests:" + for dir in $(find_test_dirs); do + echo " - $(get_module_name "$dir")" + done | sort -u + exit 1 + fi +else + # Run all tests + test_dirs=$(find_test_dirs) + + if [ -z "$test_dirs" ]; then + echo -e "${YELLOW}No OpenTofu test files found${NC}" + exit 0 + fi + + for test_dir in $test_dirs; do + run_tests_in_dir "$test_dir" + done +fi + +echo -e "${GREEN}All OpenTofu tests passed!${NC}" diff --git a/workflow.schema.json b/workflow.schema.json index 713d27c0..d972e698 100644 --- a/workflow.schema.json +++ b/workflow.schema.json @@ -3,8 +3,9 @@ "title": "Workflow", "additionalProperties": false, "type": "object", - "required": [ - "steps" + "anyOf": [ + { "required": ["steps"] }, + { "required": ["include"] } ], "properties": { "steps": {