From feb7e4f83b53efcd3fb06d033fdf860c49547b4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirko=20K=C3=A4mpf?= Date: Fri, 6 Mar 2026 08:00:36 +0100 Subject: [PATCH 1/6] feat: add Large File Support (LFS) subsystem Add transparent large-file offloading for Kafka via an S3-backed proxy. When enabled, oversized records are automatically replaced with compact envelope references pointing to S3 objects, keeping Kafka partitions lean while supporting arbitrarily large payloads. Core components: - pkg/lfs: envelope codec, S3 client, checksum, consumer/producer/resolver - cmd/lfs-proxy: standalone LFS proxy binary with HTTP ingest, SASL, TLS, Swagger UI, and ops tracker - cmd/proxy/lfs_*: feature-flagged LFS module merged into unified proxy - internal/console: LFS consumer, S3 browser, and metrics in web console - pkg/operator: LFS proxy resource management for Kubernetes operator - deploy/helm: Helm templates for lfs-proxy deployment and monitoring Client SDKs (Java, Python, JavaScript, browser): - lfs-client-sdk/: multi-language SDKs for producing/consuming LFS records Iceberg processor addon: - addons/processors/iceberg-processor: reads LFS envelopes and sinks to Apache Iceberg tables Also includes: - Broker fix: send error response instead of dropping TCP connection on handler errors (fixes "fetching message: EOF" with older Fetch versions) - E2E test suite for LFS proxy, SDK, and iceberg processor - CI/Helm/Docker updates for LFS proxy build and deployment Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci.yml | 43 + .github/workflows/codeql.yml | 6 + .github/workflows/docker.yml | 4 + .gitignore | 29 + Makefile | 361 +- README.md | 7 + .../processors/iceberg-processor/Dockerfile | 13 +- addons/processors/iceberg-processor/Makefile | 29 +- .../iceberg-processor/config/config.yaml | 6 + .../helm/iceberg-processor/config/config.yaml | 6 + .../deploy/helm/iceberg-processor/values.yaml | 6 + addons/processors/iceberg-processor/go.mod | 2 + addons/processors/iceberg-processor/go.sum | 206 +- .../internal/config/config.go | 127 +- .../internal/config/config_test.go | 49 + .../internal/metrics/metrics.go | 37 + .../internal/processor/lfs.go | 218 + .../internal/processor/lfs_test.go | 238 + .../internal/processor/processor.go | 64 +- .../internal/sink/iceberg.go | 63 +- .../internal/sink/iceberg_integration_test.go | 65 + .../iceberg-processor/internal/sink/sink.go | 1 + api/lfs-proxy/openapi.yaml | 688 +++ api/v1alpha1/kafscalecluster_types.go | 211 +- cmd/broker/acl_test.go | 4 +- cmd/broker/admin_metrics.go | 18 +- cmd/broker/auth_metrics.go | 8 +- cmd/broker/lag_metrics.go | 6 +- cmd/broker/main.go | 2 +- cmd/broker/metrics_histogram.go | 12 +- cmd/broker/runtime_metrics.go | 36 +- cmd/console/main.go | 94 + cmd/e2e-client/main.go | 55 +- cmd/idoc-explode/main.go | 283 ++ cmd/lfs-proxy/backend_auth.go | 105 + cmd/lfs-proxy/backend_tls.go | 68 + cmd/lfs-proxy/handler.go | 1120 +++++ cmd/lfs-proxy/handler_test.go | 325 ++ cmd/lfs-proxy/http.go | 1013 ++++ cmd/lfs-proxy/http_test.go | 274 ++ cmd/lfs-proxy/http_tls.go | 59 + cmd/lfs-proxy/http_tls_test.go | 39 + cmd/lfs-proxy/main.go | 440 ++ cmd/lfs-proxy/metrics.go | 221 + cmd/lfs-proxy/openapi.yaml | 433 ++ cmd/lfs-proxy/record.go | 277 ++ cmd/lfs-proxy/s3.go | 582 +++ cmd/lfs-proxy/sasl_encode.go | 77 + cmd/lfs-proxy/sasl_encode_test.go | 45 + cmd/lfs-proxy/swagger.go | 73 + cmd/lfs-proxy/tracker.go | 372 ++ cmd/lfs-proxy/tracker_test.go | 383 ++ cmd/lfs-proxy/tracker_types.go | 238 + cmd/lfs-proxy/uuid.go | 22 + cmd/proxy/lfs.go | 503 ++ cmd/proxy/lfs_backend_auth.go | 98 + cmd/proxy/lfs_backend_tls.go | 68 + cmd/proxy/lfs_http.go | 1018 ++++ cmd/proxy/lfs_http_tls.go | 59 + cmd/proxy/lfs_metrics.go | 221 + cmd/proxy/lfs_record.go | 113 + cmd/proxy/lfs_rewrite.go | 358 ++ cmd/proxy/lfs_s3.go | 582 +++ cmd/proxy/lfs_sasl_encode.go | 258 + cmd/proxy/lfs_swagger.go | 71 + cmd/proxy/lfs_test.go | 426 ++ cmd/proxy/lfs_tracker.go | 372 ++ cmd/proxy/lfs_tracker_types.go | 238 + cmd/proxy/lfs_uuid.go | 22 + cmd/proxy/main.go | 50 +- cmd/proxy/openapi.yaml | 433 ++ deploy/docker-compose/Makefile | 65 + deploy/docker-compose/README.md | 313 ++ deploy/docker-compose/docker-compose.yaml | 287 ++ deploy/docker/lfs-proxy.Dockerfile | 46 + deploy/helm/kafscale/README.md | 311 ++ .../helm/kafscale/crds/kafscaleclusters.yaml | 76 + .../templates/console-deployment.yaml | 36 + .../templates/lfs-proxy-deployment.yaml | 251 + .../templates/lfs-proxy-http-ingress.yaml | 57 + .../templates/lfs-proxy-metrics-service.yaml | 36 + .../templates/lfs-proxy-prometheusrule.yaml | 46 + .../kafscale/templates/lfs-proxy-service.yaml | 47 + .../templates/lfs-proxy-servicemonitor.yaml | 34 + deploy/helm/kafscale/values-lfs-demo.yaml | 97 + deploy/helm/kafscale/values.yaml | 136 +- go.mod | 3 +- go.sum | 2 + hack/check_coverage.sh | 17 +- hack/check_license_headers.py | 19 +- internal/console/lfs_consumer.go | 206 + internal/console/lfs_consumer_test.go | 167 + internal/console/lfs_handlers.go | 491 ++ internal/console/lfs_handlers_test.go | 549 +++ internal/console/lfs_s3_client.go | 203 + internal/console/lfs_s3_client_test.go | 63 + internal/console/lfs_types.go | 171 + internal/console/metrics_client.go | 4 +- internal/console/metrics_client_test.go | 208 + internal/console/server.go | 27 +- internal/console/server_test.go | 601 ++- internal/mcpserver/tools_handler_test.go | 502 ++ internal/testutil/etcd.go | 2 +- lfs-client-sdk/Makefile | 169 + lfs-client-sdk/java/README.md | 53 + lfs-client-sdk/java/pom.xml | 105 + .../java/org/kafscale/lfs/AwsS3Reader.java | 38 + .../main/java/org/kafscale/lfs/Checksum.java | 37 + .../main/java/org/kafscale/lfs/LfsCodec.java | 47 + .../java/org/kafscale/lfs/LfsConsumer.java | 43 + .../java/org/kafscale/lfs/LfsEnvelope.java | 40 + .../org/kafscale/lfs/LfsHttpException.java | 47 + .../java/org/kafscale/lfs/LfsProducer.java | 149 + .../java/org/kafscale/lfs/LfsResolver.java | 62 + .../main/java/org/kafscale/lfs/S3Reader.java | 20 + .../kafscale/lfs/DockerAvailabilityTest.java | 126 + .../java/org/kafscale/lfs/LfsCodecTest.java | 29 + .../lfs/LfsProducerIntegrationTest.java | 239 + .../org/kafscale/lfs/LfsProducerTest.java | 191 + .../org/kafscale/lfs/LfsResolverTest.java | 75 + lfs-client-sdk/js-browser/package.json | 40 + lfs-client-sdk/js-browser/src/envelope.ts | 54 + lfs-client-sdk/js-browser/src/index.ts | 29 + lfs-client-sdk/js-browser/src/producer.ts | 238 + lfs-client-sdk/js-browser/src/resolver.ts | 114 + lfs-client-sdk/js-browser/tsconfig.json | 19 + lfs-client-sdk/js/package-lock.json | 4170 +++++++++++++++++ lfs-client-sdk/js/package.json | 20 + .../js/src/__tests__/envelope.test.ts | 8 + lfs-client-sdk/js/src/envelope.ts | 28 + lfs-client-sdk/js/src/index.ts | 3 + lfs-client-sdk/js/src/producer.ts | 31 + lfs-client-sdk/js/src/resolver.ts | 44 + lfs-client-sdk/js/tsconfig.json | 13 + lfs-client-sdk/python/README.md | 20 + lfs-client-sdk/python/lfs_sdk/__init__.py | 14 + lfs-client-sdk/python/lfs_sdk/envelope.py | 36 + lfs-client-sdk/python/lfs_sdk/producer.py | 169 + lfs-client-sdk/python/lfs_sdk/resolver.py | 40 + lfs-client-sdk/python/pyproject.toml | 36 + lfs-client-sdk/python/tests/test_envelope.py | 6 + pkg/acl/acl_test.go | 162 + pkg/broker/conn_context_test.go | 69 + pkg/broker/proxyproto_test.go | 209 +- pkg/broker/s3_health_test.go | 57 + pkg/broker/server.go | 27 +- pkg/broker/server_test.go | 198 +- pkg/cache/segment_cache_test.go | 121 +- pkg/gen/control/broker.pb.go | 19 +- pkg/gen/control/broker_grpc.pb.go | 17 +- pkg/gen/metadata/metadata.pb.go | 246 +- pkg/idoc/explode.go | 252 + pkg/idoc/explode_test.go | 253 + pkg/lfs/checksum.go | 118 + pkg/lfs/checksum_test.go | 232 + pkg/lfs/consumer.go | 125 + pkg/lfs/consumer_test.go | 306 ++ pkg/lfs/doc.go | 232 + pkg/lfs/envelope.go | 72 + pkg/lfs/envelope_test.go | 189 + pkg/lfs/errors.go | 60 + pkg/lfs/errors_test.go | 78 + pkg/lfs/producer.go | 388 ++ pkg/lfs/producer_test.go | 422 ++ pkg/lfs/record.go | 286 ++ pkg/lfs/record_test.go | 412 ++ pkg/lfs/resolver.go | 93 + pkg/lfs/resolver_test.go | 175 + pkg/lfs/s3client.go | 111 + pkg/lfs/s3client_test.go | 151 + pkg/lfs/s3reader.go | 27 + pkg/metadata/etcd_store.go | 4 +- pkg/metadata/etcd_store_test.go | 124 +- pkg/metadata/group_lease_test.go | 36 +- pkg/metadata/group_router_test.go | 34 + pkg/metadata/lease_manager.go | 6 +- pkg/metadata/partition_lease_test.go | 71 +- pkg/metadata/partition_router_test.go | 31 + pkg/metadata/store_test.go | 632 +++ pkg/operator/cluster_controller.go | 8 +- pkg/operator/cluster_controller_test.go | 128 + pkg/operator/etcd_resources.go | 4 +- pkg/operator/helpers_test.go | 1721 +++++++ pkg/operator/lfs_proxy_resources.go | 366 ++ pkg/operator/snapshot.go | 4 +- pkg/protocol/encoding_test.go | 359 ++ pkg/protocol/request_test.go | 127 + pkg/protocol/types.go | 1 + pkg/storage/buffer_test.go | 51 + pkg/storage/index_test.go | 78 +- pkg/storage/log_test.go | 262 +- pkg/storage/s3_aws.go | 21 +- pkg/storage/s3client_test.go | 262 ++ pkg/storage/segment.go | 16 +- test/e2e/README.md | 102 +- test/e2e/franz_test.go | 1 + test/e2e/kafka_cli_test.go | 2 + test/e2e/lfs_iceberg_processor_test.go | 252 + test/e2e/lfs_proxy_broker_test.go | 234 + test/e2e/lfs_proxy_etcd_test.go | 65 + test/e2e/lfs_proxy_http_test.go | 641 +++ test/e2e/lfs_proxy_test.go | 462 ++ test/e2e/lfs_sdk_test.go | 184 + test/e2e/log_test.go | 1 + test/e2e/multi_segment_restart_test.go | 1 + test/e2e/operator_console_test.go | 12 +- test/e2e/ports.go | 33 + test/e2e/process_group_unix.go | 2 + ui/embed_test.go | 68 + ui/public/app.js | 504 ++ ui/public/index.html | 139 + ui/public/style.css | 343 ++ 212 files changed, 37795 insertions(+), 432 deletions(-) create mode 100644 addons/processors/iceberg-processor/internal/processor/lfs.go create mode 100644 addons/processors/iceberg-processor/internal/processor/lfs_test.go create mode 100644 api/lfs-proxy/openapi.yaml create mode 100644 cmd/idoc-explode/main.go create mode 100644 cmd/lfs-proxy/backend_auth.go create mode 100644 cmd/lfs-proxy/backend_tls.go create mode 100644 cmd/lfs-proxy/handler.go create mode 100644 cmd/lfs-proxy/handler_test.go create mode 100644 cmd/lfs-proxy/http.go create mode 100644 cmd/lfs-proxy/http_test.go create mode 100644 cmd/lfs-proxy/http_tls.go create mode 100644 cmd/lfs-proxy/http_tls_test.go create mode 100644 cmd/lfs-proxy/main.go create mode 100644 cmd/lfs-proxy/metrics.go create mode 100644 cmd/lfs-proxy/openapi.yaml create mode 100644 cmd/lfs-proxy/record.go create mode 100644 cmd/lfs-proxy/s3.go create mode 100644 cmd/lfs-proxy/sasl_encode.go create mode 100644 cmd/lfs-proxy/sasl_encode_test.go create mode 100644 cmd/lfs-proxy/swagger.go create mode 100644 cmd/lfs-proxy/tracker.go create mode 100644 cmd/lfs-proxy/tracker_test.go create mode 100644 cmd/lfs-proxy/tracker_types.go create mode 100644 cmd/lfs-proxy/uuid.go create mode 100644 cmd/proxy/lfs.go create mode 100644 cmd/proxy/lfs_backend_auth.go create mode 100644 cmd/proxy/lfs_backend_tls.go create mode 100644 cmd/proxy/lfs_http.go create mode 100644 cmd/proxy/lfs_http_tls.go create mode 100644 cmd/proxy/lfs_metrics.go create mode 100644 cmd/proxy/lfs_record.go create mode 100644 cmd/proxy/lfs_rewrite.go create mode 100644 cmd/proxy/lfs_s3.go create mode 100644 cmd/proxy/lfs_sasl_encode.go create mode 100644 cmd/proxy/lfs_swagger.go create mode 100644 cmd/proxy/lfs_test.go create mode 100644 cmd/proxy/lfs_tracker.go create mode 100644 cmd/proxy/lfs_tracker_types.go create mode 100644 cmd/proxy/lfs_uuid.go create mode 100644 cmd/proxy/openapi.yaml create mode 100644 deploy/docker-compose/Makefile create mode 100644 deploy/docker-compose/README.md create mode 100644 deploy/docker-compose/docker-compose.yaml create mode 100644 deploy/docker/lfs-proxy.Dockerfile create mode 100644 deploy/helm/kafscale/README.md create mode 100644 deploy/helm/kafscale/templates/lfs-proxy-deployment.yaml create mode 100644 deploy/helm/kafscale/templates/lfs-proxy-http-ingress.yaml create mode 100644 deploy/helm/kafscale/templates/lfs-proxy-metrics-service.yaml create mode 100644 deploy/helm/kafscale/templates/lfs-proxy-prometheusrule.yaml create mode 100644 deploy/helm/kafscale/templates/lfs-proxy-service.yaml create mode 100644 deploy/helm/kafscale/templates/lfs-proxy-servicemonitor.yaml create mode 100644 deploy/helm/kafscale/values-lfs-demo.yaml create mode 100644 internal/console/lfs_consumer.go create mode 100644 internal/console/lfs_consumer_test.go create mode 100644 internal/console/lfs_handlers.go create mode 100644 internal/console/lfs_handlers_test.go create mode 100644 internal/console/lfs_s3_client.go create mode 100644 internal/console/lfs_s3_client_test.go create mode 100644 internal/console/lfs_types.go create mode 100644 internal/mcpserver/tools_handler_test.go create mode 100644 lfs-client-sdk/Makefile create mode 100644 lfs-client-sdk/java/README.md create mode 100644 lfs-client-sdk/java/pom.xml create mode 100644 lfs-client-sdk/java/src/main/java/org/kafscale/lfs/AwsS3Reader.java create mode 100644 lfs-client-sdk/java/src/main/java/org/kafscale/lfs/Checksum.java create mode 100644 lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsCodec.java create mode 100644 lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsConsumer.java create mode 100644 lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsEnvelope.java create mode 100644 lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsHttpException.java create mode 100644 lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsProducer.java create mode 100644 lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsResolver.java create mode 100644 lfs-client-sdk/java/src/main/java/org/kafscale/lfs/S3Reader.java create mode 100644 lfs-client-sdk/java/src/test/java/org/kafscale/lfs/DockerAvailabilityTest.java create mode 100644 lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsCodecTest.java create mode 100644 lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsProducerIntegrationTest.java create mode 100644 lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsProducerTest.java create mode 100644 lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsResolverTest.java create mode 100644 lfs-client-sdk/js-browser/package.json create mode 100644 lfs-client-sdk/js-browser/src/envelope.ts create mode 100644 lfs-client-sdk/js-browser/src/index.ts create mode 100644 lfs-client-sdk/js-browser/src/producer.ts create mode 100644 lfs-client-sdk/js-browser/src/resolver.ts create mode 100644 lfs-client-sdk/js-browser/tsconfig.json create mode 100644 lfs-client-sdk/js/package-lock.json create mode 100644 lfs-client-sdk/js/package.json create mode 100644 lfs-client-sdk/js/src/__tests__/envelope.test.ts create mode 100644 lfs-client-sdk/js/src/envelope.ts create mode 100644 lfs-client-sdk/js/src/index.ts create mode 100644 lfs-client-sdk/js/src/producer.ts create mode 100644 lfs-client-sdk/js/src/resolver.ts create mode 100644 lfs-client-sdk/js/tsconfig.json create mode 100644 lfs-client-sdk/python/README.md create mode 100644 lfs-client-sdk/python/lfs_sdk/__init__.py create mode 100644 lfs-client-sdk/python/lfs_sdk/envelope.py create mode 100644 lfs-client-sdk/python/lfs_sdk/producer.py create mode 100644 lfs-client-sdk/python/lfs_sdk/resolver.py create mode 100644 lfs-client-sdk/python/pyproject.toml create mode 100644 lfs-client-sdk/python/tests/test_envelope.py create mode 100644 pkg/broker/conn_context_test.go create mode 100644 pkg/idoc/explode.go create mode 100644 pkg/idoc/explode_test.go create mode 100644 pkg/lfs/checksum.go create mode 100644 pkg/lfs/checksum_test.go create mode 100644 pkg/lfs/consumer.go create mode 100644 pkg/lfs/consumer_test.go create mode 100644 pkg/lfs/doc.go create mode 100644 pkg/lfs/envelope.go create mode 100644 pkg/lfs/envelope_test.go create mode 100644 pkg/lfs/errors.go create mode 100644 pkg/lfs/errors_test.go create mode 100644 pkg/lfs/producer.go create mode 100644 pkg/lfs/producer_test.go create mode 100644 pkg/lfs/record.go create mode 100644 pkg/lfs/record_test.go create mode 100644 pkg/lfs/resolver.go create mode 100644 pkg/lfs/resolver_test.go create mode 100644 pkg/lfs/s3client.go create mode 100644 pkg/lfs/s3client_test.go create mode 100644 pkg/lfs/s3reader.go create mode 100644 pkg/operator/helpers_test.go create mode 100644 pkg/operator/lfs_proxy_resources.go create mode 100644 pkg/protocol/encoding_test.go create mode 100644 test/e2e/lfs_iceberg_processor_test.go create mode 100644 test/e2e/lfs_proxy_broker_test.go create mode 100644 test/e2e/lfs_proxy_etcd_test.go create mode 100644 test/e2e/lfs_proxy_http_test.go create mode 100644 test/e2e/lfs_proxy_test.go create mode 100644 test/e2e/lfs_sdk_test.go create mode 100644 ui/embed_test.go diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d56b8ec1..932c1541 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,6 +57,28 @@ jobs: - name: Run go test -race ./... run: go test -race ./... + build-lfs-proxy: + name: Build LFS Proxy + runs-on: ubuntu-latest + env: + GOCACHE: ${{ github.workspace }}/.gocache + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4 + + - uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v5 + with: + go-version-file: go.mod + cache-dependency-path: go.sum + + - name: Prepare Go build cache + run: mkdir -p "$GOCACHE" + + - name: Build lfs-proxy + run: go build -o lfs-proxy ./cmd/lfs-proxy + + - name: Run lfs-proxy tests + run: go test ./cmd/lfs-proxy/... + go-coverage: name: Go Coverage Gate runs-on: ubuntu-latest @@ -76,6 +98,27 @@ jobs: - name: Enforce coverage floor run: bash hack/check_coverage.sh 45 + + e2e-lfs-proxy: + name: LFS Proxy E2E + runs-on: ubuntu-latest + env: + GOCACHE: ${{ github.workspace }}/.gocache + KAFSCALE_E2E: "1" + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4 + + - uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v5 + with: + go-version-file: go.mod + cache-dependency-path: go.sum + + - name: Prepare Go build cache + run: mkdir -p "$GOCACHE" + + - name: Run LFS proxy E2E tests + run: go test -tags=e2e ./test/e2e -run LfsProxy + helm-lint: name: Helm Lint runs-on: ubuntu-latest diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 8361a10f..53d7b6e5 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -51,6 +51,12 @@ jobs: with: languages: ${{ matrix.language }} queries: security-extended,security-and-quality + config: | + paths-ignore: + - '**/node_modules/**' + - '**/target/**' + - '**/*.egg-info/**' + - 'third_party/**' - name: Autobuild uses: github/codeql-action/autobuild@v4 diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 6a50ff9d..c22a7f57 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -36,6 +36,10 @@ jobs: image: kafscale-broker context: . file: deploy/docker/broker.Dockerfile + - name: lfs-proxy + image: kafscale-lfs-proxy + context: . + file: deploy/docker/lfs-proxy.Dockerfile - name: operator image: kafscale-operator context: . diff --git a/.gitignore b/.gitignore index 47666b68..954b01ec 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +Makefile-MK # Ignore build and IDE files + # Binaries /bin/ /dist/ @@ -31,6 +33,9 @@ *.key coverage*.out .build/ +target/ +**/target/ +spark-warehouse/ # Local Go cache (use GOCACHE=.gocache for hermetic builds/tests) .gocache/ @@ -79,3 +84,27 @@ proto/**/*.swagger.json _site/ Gemfile Gemfile.lock + +# Ignore demo node_modules +examples/E50_JS-kafscale-demo/node_modules/ + +# Go compiled binaries (top-level) +/e2e-client +/lfs-proxy +/proxy + +# Java build artifacts +target/ +dependency-reduced-pom.xml + +# JavaScript/Node.js +node_modules/ +package-lock.json + +# Python +__pycache__/ +*.pyc +*.egg-info/ + +# Test output +records.txt diff --git a/Makefile b/Makefile index 4ffa12a6..d8575624 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -.PHONY: proto build test tidy lint generate docker-build docker-build-e2e-client docker-build-etcd-tools docker-clean ensure-minio start-minio stop-containers release-broker-ports test-produce-consume test-produce-consume-debug test-consumer-group test-ops-api test-mcp test-multi-segment-durability test-full test-operator test-acl demo demo-platform demo-platform-bootstrap iceberg-demo kafsql-demo platform-demo help clean-kind-all +.PHONY: proto build test tidy lint generate build-sdk docker-build docker-build-e2e-client docker-build-etcd-tools docker-build-lfs-proxy docker-clean ensure-minio start-minio stop-containers release-broker-ports test-produce-consume test-produce-consume-debug test-consumer-group test-ops-api test-mcp test-multi-segment-durability test-lfs-proxy-broker test-full test-operator test-acl demo demo-long demo-platform demo-platform-bootstrap iceberg-demo kafsql-demo lfs-demo lfs-demo-medical lfs-demo-video lfs-demo-industrial platform-demo lfs-demo-idoc act-runnable demo-guide-pf demo-guide-pf-clean help clean-kind-all REGISTRY ?= ghcr.io/kafscale STAMP_DIR ?= .build @@ -21,11 +21,21 @@ BROKER_IMAGE ?= $(REGISTRY)/kafscale-broker:dev OPERATOR_IMAGE ?= $(REGISTRY)/kafscale-operator:dev CONSOLE_IMAGE ?= $(REGISTRY)/kafscale-console:dev PROXY_IMAGE ?= $(REGISTRY)/kafscale-proxy:dev +LFS_PROXY_IMAGE ?= $(REGISTRY)/kafscale-lfs-proxy:dev SQL_PROCESSOR_IMAGE ?= $(REGISTRY)/kafscale-sql-processor:dev MCP_IMAGE ?= $(REGISTRY)/kafscale-mcp:dev +SPRING_DEMO_IMAGE ?= $(REGISTRY)/kafscale-spring-demo:dev + +OPERATOR_REPO := $(shell echo $(OPERATOR_IMAGE) | sed 's/:[^:]*$$//') +OPERATOR_TAG := $(shell echo $(OPERATOR_IMAGE) | sed 's/.*://') +CONSOLE_REPO := $(shell echo $(CONSOLE_IMAGE) | sed 's/:[^:]*$$//') +CONSOLE_TAG := $(shell echo $(CONSOLE_IMAGE) | sed 's/.*://') +SPRING_DEMO_REPO := $(shell echo $(SPRING_DEMO_IMAGE) | sed 's/:[^:]*$$//') +SPRING_DEMO_TAG := $(shell echo $(SPRING_DEMO_IMAGE) | sed 's/.*://') E2E_CLIENT_IMAGE ?= $(REGISTRY)/kafscale-e2e-client:dev ETCD_TOOLS_IMAGE ?= $(REGISTRY)/kafscale-etcd-tools:dev ICEBERG_PROCESSOR_IMAGE ?= iceberg-processor:dev +E72_BROWSER_DEMO_IMAGE ?= $(REGISTRY)/kafscale-e72-browser-demo:dev ICEBERG_REST_IMAGE ?= tabulario/iceberg-rest:1.6.0 ICEBERG_REST_PORT ?= 8181 ICEBERG_WAREHOUSE_BUCKET ?= kafscale-snapshots @@ -46,6 +56,11 @@ KAFSQL_DEMO_TOPIC ?= kafsql-demo-topic KAFSQL_DEMO_RECORDS ?= 200 KAFSQL_DEMO_TIMEOUT_SEC ?= 120 KAFSQL_PROCESSOR_RELEASE ?= kafsql-processor-dev +LFS_DEMO_NAMESPACE ?= $(KAFSCALE_DEMO_NAMESPACE) +LFS_DEMO_TOPIC ?= lfs-demo-topic +LFS_DEMO_BLOB_SIZE ?= 524288 +LFS_DEMO_BLOB_COUNT ?= 5 +LFS_DEMO_TIMEOUT_SEC ?= 120 MINIO_CONTAINER ?= kafscale-minio MINIO_IMAGE ?= quay.io/minio/minio:RELEASE.2024-09-22T00-33-43Z MINIO_PORT ?= 9000 @@ -71,6 +86,10 @@ KAFSCALE_DEMO_ETCD_INMEM ?= 1 KAFSCALE_DEMO_ETCD_REPLICAS ?= 3 BROKER_PORT ?= 39092 BROKER_PORTS ?= 39092 39093 39094 +SDK_JAVA_BUILD_CMD ?= mvn -DskipTests clean package +SDK_JS_BUILD_CMD ?= npm install && npm run build +SDK_PY_BUILD_CMD ?= python -m build +SKIP_JS_SDK ?= 1 proto: ## Generate protobuf + gRPC stubs buf generate @@ -80,6 +99,19 @@ generate: proto build: ## Build all binaries go build ./... +build-sdk: ## Build all LFS client SDKs + @echo "Building Java SDK..." + @cd lfs-client-sdk/java && $(SDK_JAVA_BUILD_CMD) + @test -d lfs-client-sdk/java/target || { echo "Java SDK target/ missing"; exit 1; } + @if [ "$(SKIP_JS_SDK)" = "1" ]; then \ + echo "Skipping JS SDK build (SKIP_JS_SDK=1)"; \ + else \ + echo "Building JS SDK..."; \ + cd lfs-client-sdk/js && $(SDK_JS_BUILD_CMD); \ + fi + @echo "Building Python SDK..." + @cd lfs-client-sdk/python && $(SDK_PY_BUILD_CMD) + test: ## Run unit tests + vet + race go vet ./... go test -race ./... @@ -87,7 +119,7 @@ test: ## Run unit tests + vet + race test-acl: ## Run ACL e2e test (requires KAFSCALE_E2E=1) KAFSCALE_E2E=1 go test -tags=e2e ./test/e2e -run TestACLsE2E -docker-build: docker-build-broker docker-build-operator docker-build-console docker-build-proxy docker-build-mcp docker-build-e2e-client docker-build-etcd-tools docker-build-sql-processor ## Build all container images +docker-build: docker-build-broker docker-build-operator docker-build-console docker-build-proxy docker-build-mcp docker-build-spring-demo docker-build-e2e-client docker-build-etcd-tools docker-build-sql-processor ## Build all container images @mkdir -p $(STAMP_DIR) DOCKER_BUILD_CMD := $(shell \ @@ -131,6 +163,13 @@ $(STAMP_DIR)/proxy.image: $(PROXY_SRCS) $(DOCKER_BUILD_CMD) $(DOCKER_BUILD_ARGS) -t $(PROXY_IMAGE) -f deploy/docker/proxy.Dockerfile . @touch $(STAMP_DIR)/proxy.image +LFS_PROXY_SRCS := $(shell find cmd/lfs-proxy pkg go.mod go.sum) +docker-build-lfs-proxy: $(STAMP_DIR)/lfs-proxy.image ## Build LFS proxy container image +$(STAMP_DIR)/lfs-proxy.image: $(LFS_PROXY_SRCS) + @mkdir -p $(STAMP_DIR) + $(DOCKER_BUILD_CMD) $(DOCKER_BUILD_ARGS) -t $(LFS_PROXY_IMAGE) -f deploy/docker/lfs-proxy.Dockerfile . + @touch $(STAMP_DIR)/lfs-proxy.image + MCP_SRCS := $(shell find cmd/mcp internal/mcpserver go.mod go.sum) docker-build-mcp: $(STAMP_DIR)/mcp.image ## Build MCP container image $(STAMP_DIR)/mcp.image: $(MCP_SRCS) @@ -159,10 +198,23 @@ $(STAMP_DIR)/sql-processor.image: $(SQL_PROCESSOR_SRCS) $(DOCKER_BUILD_CMD) $(DOCKER_BUILD_ARGS) -t $(SQL_PROCESSOR_IMAGE) -f addons/processors/sql-processor/Dockerfile addons/processors/sql-processor @touch $(STAMP_DIR)/sql-processor.image +SPRING_DEMO_SRCS := $(shell find examples/E20_spring-boot-kafscale-demo -type f) +docker-build-spring-demo: $(STAMP_DIR)/spring-demo.image ## Build Spring Boot demo container image +$(STAMP_DIR)/spring-demo.image: $(SPRING_DEMO_SRCS) + @mkdir -p $(STAMP_DIR) + $(DOCKER_BUILD_CMD) -t $(SPRING_DEMO_IMAGE) examples/E20_spring-boot-kafscale-demo + @touch $(STAMP_DIR)/spring-demo.image + +docker-build-e72-browser-demo: ## Build E72 browser demo container image + $(DOCKER_BUILD_CMD) $(DOCKER_BUILD_ARGS) -t $(E72_BROWSER_DEMO_IMAGE) -f examples/E72_browser-lfs-sdk-demo/Dockerfile examples/E72_browser-lfs-sdk-demo + +docker-build-iceberg-processor: ## Build Iceberg processor container image + $(MAKE) -C addons/processors/iceberg-processor docker-build IMAGE=$(ICEBERG_PROCESSOR_IMAGE) DOCKER_BUILD_ARGS="$(DOCKER_BUILD_ARGS) --build-arg GO_BUILD_FLAGS='$(ICEBERG_PROCESSOR_BUILD_FLAGS)'" + docker-clean: ## Remove local dev images and prune dangling Docker data @echo "WARNING: this resets Docker build caches (buildx/builder) and removes local images." @printf "Type YES to continue: "; read ans; [ "$$ans" = "YES" ] || { echo "aborted"; exit 1; } - -docker image rm -f $(BROKER_IMAGE) $(OPERATOR_IMAGE) $(CONSOLE_IMAGE) $(PROXY_IMAGE) $(MCP_IMAGE) $(E2E_CLIENT_IMAGE) $(ETCD_TOOLS_IMAGE) $(SQL_PROCESSOR_IMAGE) + -docker image rm -f $(BROKER_IMAGE) $(OPERATOR_IMAGE) $(CONSOLE_IMAGE) $(PROXY_IMAGE) $(MCP_IMAGE) $(E2E_CLIENT_IMAGE) $(ETCD_TOOLS_IMAGE) $(SQL_PROCESSOR_IMAGE) $(SPRING_DEMO_IMAGE) -rm -rf $(STAMP_DIR) docker system prune --force --volumes docker buildx prune --force @@ -270,6 +322,11 @@ test-multi-segment-durability: release-broker-ports ensure-minio ## Run multi-se KAFSCALE_E2E=1 \ go test -tags=e2e ./test/e2e -run TestMultiSegmentRestartDurability -v + +test-lfs-proxy-broker: ## Run LFS proxy e2e with real broker (embedded etcd + in-memory S3). + KAFSCALE_E2E=1 \ + go test -tags=e2e ./test/e2e -run TestLfsProxyBrokerE2E -v + test-full: ## Run unit tests plus local + MinIO-backed e2e suites. $(MAKE) test $(MAKE) test-consumer-group @@ -536,6 +593,100 @@ kafsql-demo: demo-platform-bootstrap ## Run the KAFSQL processor e2e demo on kin MINIO_ROOT_PASSWORD=$(MINIO_ROOT_PASSWORD) \ bash scripts/kafsql-demo.sh +lfs-demo: KAFSCALE_DEMO_PROXY=0 +lfs-demo: KAFSCALE_DEMO_CONSOLE=1 +lfs-demo: KAFSCALE_DEMO_BROKER_REPLICAS=1 +lfs-demo: demo-platform-bootstrap ## Run the LFS proxy demo on kind. + $(MAKE) docker-build-lfs-proxy + KUBECONFIG=$(KAFSCALE_KIND_KUBECONFIG) \ + KAFSCALE_DEMO_NAMESPACE=$(KAFSCALE_DEMO_NAMESPACE) \ + KAFSCALE_KIND_CLUSTER=$(KAFSCALE_KIND_CLUSTER) \ + LFS_DEMO_NAMESPACE=$(LFS_DEMO_NAMESPACE) \ + LFS_DEMO_TOPIC=$(LFS_DEMO_TOPIC) \ + LFS_DEMO_BLOB_SIZE=$(LFS_DEMO_BLOB_SIZE) \ + LFS_DEMO_BLOB_COUNT=$(LFS_DEMO_BLOB_COUNT) \ + LFS_DEMO_TIMEOUT_SEC=$(LFS_DEMO_TIMEOUT_SEC) \ + LFS_PROXY_IMAGE=$(LFS_PROXY_IMAGE) \ + E2E_CLIENT_IMAGE=$(E2E_CLIENT_IMAGE) \ + MINIO_BUCKET=$(MINIO_BUCKET) \ + MINIO_REGION=$(MINIO_REGION) \ + MINIO_ROOT_USER=$(MINIO_ROOT_USER) \ + MINIO_ROOT_PASSWORD=$(MINIO_ROOT_PASSWORD) \ + bash scripts/lfs-demo.sh + +lfs-demo-medical: KAFSCALE_DEMO_PROXY=0 +lfs-demo-medical: KAFSCALE_DEMO_CONSOLE=0 +lfs-demo-medical: KAFSCALE_DEMO_BROKER_REPLICAS=1 +lfs-demo-medical: demo-platform-bootstrap ## Run the Medical LFS demo (E60) - healthcare imaging with content explosion. + $(MAKE) docker-build-lfs-proxy + KUBECONFIG=$(KAFSCALE_KIND_KUBECONFIG) \ + KAFSCALE_KIND_CLUSTER=$(KAFSCALE_KIND_CLUSTER) \ + LFS_PROXY_IMAGE=$(LFS_PROXY_IMAGE) \ + E2E_CLIENT_IMAGE=$(E2E_CLIENT_IMAGE) \ + MINIO_BUCKET=$(MINIO_BUCKET) \ + MINIO_ROOT_USER=$(MINIO_ROOT_USER) \ + MINIO_ROOT_PASSWORD=$(MINIO_ROOT_PASSWORD) \ + bash scripts/medical-lfs-demo.sh + +lfs-demo-video: KAFSCALE_DEMO_PROXY=0 +lfs-demo-video: KAFSCALE_DEMO_CONSOLE=0 +lfs-demo-video: KAFSCALE_DEMO_BROKER_REPLICAS=1 +lfs-demo-video: demo-platform-bootstrap ## Run the Video LFS demo (E61) - media streaming with content explosion. + $(MAKE) docker-build-lfs-proxy + KUBECONFIG=$(KAFSCALE_KIND_KUBECONFIG) \ + KAFSCALE_KIND_CLUSTER=$(KAFSCALE_KIND_CLUSTER) \ + LFS_PROXY_IMAGE=$(LFS_PROXY_IMAGE) \ + E2E_CLIENT_IMAGE=$(E2E_CLIENT_IMAGE) \ + MINIO_BUCKET=$(MINIO_BUCKET) \ + MINIO_ROOT_USER=$(MINIO_ROOT_USER) \ + MINIO_ROOT_PASSWORD=$(MINIO_ROOT_PASSWORD) \ + bash scripts/video-lfs-demo.sh + +lfs-demo-industrial: KAFSCALE_DEMO_PROXY=0 +lfs-demo-industrial: KAFSCALE_DEMO_CONSOLE=0 +lfs-demo-industrial: KAFSCALE_DEMO_BROKER_REPLICAS=1 +lfs-demo-industrial: demo-platform-bootstrap ## Run the Industrial LFS demo (E62) - mixed telemetry + images. + $(MAKE) docker-build-lfs-proxy + KUBECONFIG=$(KAFSCALE_KIND_KUBECONFIG) \ + KAFSCALE_KIND_CLUSTER=$(KAFSCALE_KIND_CLUSTER) \ + LFS_PROXY_IMAGE=$(LFS_PROXY_IMAGE) \ + E2E_CLIENT_IMAGE=$(E2E_CLIENT_IMAGE) \ + MINIO_BUCKET=$(MINIO_BUCKET) \ + MINIO_ROOT_USER=$(MINIO_ROOT_USER) \ + MINIO_ROOT_PASSWORD=$(MINIO_ROOT_PASSWORD) \ + bash scripts/industrial-lfs-demo.sh + +e72-browser-demo: ## Run the E72 Browser LFS SDK demo (local, requires port-forward). + @echo "=== E72 Browser LFS SDK Demo (Local) ===" + @echo "Prerequisites: LFS proxy must be port-forwarded to localhost:8080" + @echo " kubectl -n kafscale-demo port-forward svc/lfs-proxy 8080:8080" + @echo "" + cd examples/E72_browser-lfs-sdk-demo && $(MAKE) test + +E72_PROXY_LOCAL_PORT ?= 8080 +E72_MINIO_LOCAL_PORT ?= 9000 +E72_S3_PUBLIC_ENDPOINT ?= http://localhost:$(E72_MINIO_LOCAL_PORT) + +e72-browser-demo-test: ## Rebuild/redeploy LFS proxy, refresh demo, port-forward, and open the SPA. + @echo "=== E72 Browser LFS SDK Demo (Rebuild + Test) ===" + $(MAKE) docker-build-lfs-proxy + kind load docker-image $(LFS_PROXY_IMAGE) --name $(KAFSCALE_KIND_CLUSTER) + kubectl -n $(KAFSCALE_DEMO_NAMESPACE) set env deployment/lfs-proxy KAFSCALE_LFS_PROXY_S3_PUBLIC_ENDPOINT=$(E72_S3_PUBLIC_ENDPOINT) + kubectl -n $(KAFSCALE_DEMO_NAMESPACE) rollout restart deployment/lfs-proxy + kubectl -n $(KAFSCALE_DEMO_NAMESPACE) rollout status deployment/lfs-proxy --timeout=60s + kubectl -n $(KAFSCALE_DEMO_NAMESPACE) apply -f examples/E72_browser-lfs-sdk-demo/k8s-deploy.yaml + kubectl -n $(KAFSCALE_DEMO_NAMESPACE) rollout restart deployment/e72-browser-demo + kubectl -n $(KAFSCALE_DEMO_NAMESPACE) rollout status deployment/e72-browser-demo --timeout=60s + @pkill -f "port-forward.*$(E72_PROXY_LOCAL_PORT)" 2>/dev/null || true + @pkill -f "port-forward.*$(E72_MINIO_LOCAL_PORT)" 2>/dev/null || true + @kubectl -n $(KAFSCALE_DEMO_NAMESPACE) port-forward svc/lfs-proxy $(E72_PROXY_LOCAL_PORT):8080 >/dev/null 2>&1 & + @kubectl -n $(KAFSCALE_DEMO_NAMESPACE) port-forward svc/minio $(E72_MINIO_LOCAL_PORT):9000 >/dev/null 2>&1 & + @sleep 2 + cd examples/E72_browser-lfs-sdk-demo && $(MAKE) test PORT=3000 + +e72-browser-demo-k8s: ## Run the E72 Browser LFS SDK demo inside the kind cluster. + bash scripts/e72-browser-demo.sh + platform-demo: demo-platform ## Alias for demo-platform. demo: release-broker-ports ensure-minio ## Launch the broker + console demo stack and open the UI (Ctrl-C to stop). @@ -572,11 +723,215 @@ demo-long: release-broker-ports ensure-minio ## Launch the broker + console demo KAFSCALE_S3_SECRET_KEY=$(MINIO_ROOT_PASSWORD) \ go test -count=1 -timeout 0 -tags=e2e ./test/e2e -run TestDemoStack -v +demo-bridge: release-broker-ports ensure-minio ## Launch the broker + console demo stack and open the UI (Ctrl-C to stop) + expose host for docker. + KAFSCALE_E2E=1 \ + KAFSCALE_E2E_DEMO=1 \ + KAFSCALE_E2E_OPEN_UI=1 \ + KAFSCALE_UI_USERNAME=kafscaleadmin \ + KAFSCALE_UI_PASSWORD=kafscale \ + KAFSCALE_CONSOLE_BROKER_METRICS_URL=http://127.0.0.1:39093/metrics \ + KAFSCALE_CONSOLE_OPERATOR_METRICS_URL=http://127.0.0.1:8080/metrics \ + KAFSCALE_S3_BUCKET=$(MINIO_BUCKET) \ + KAFSCALE_S3_REGION=$(MINIO_REGION) \ + KAFSCALE_S3_NAMESPACE=default \ + KAFSCALE_S3_ENDPOINT=http://127.0.0.1:$(MINIO_PORT) \ + KAFSCALE_S3_PATH_STYLE=true \ + KAFSCALE_S3_ACCESS_KEY=$(MINIO_ROOT_USER) \ + KAFSCALE_S3_SECRET_KEY=$(MINIO_ROOT_PASSWORD) \ + KAFSCALE_BROKERS_ADVERTISED_HOST=host.docker.internal \ + KAFSCALE_BROKERS_ADVERTISED_PORT=39092 \ + go test -count=1 -tags=e2e ./test/e2e -run TestDemoStack -v + +demo-guide-pf: docker-build ## Launch a full platform demo on kind. + @command -v docker >/dev/null 2>&1 || { echo "docker is required"; exit 1; } + @command -v kind >/dev/null 2>&1 || { echo "kind is required"; exit 1; } + @command -v kubectl >/dev/null 2>&1 || { echo "kubectl is required"; exit 1; } + @command -v helm >/dev/null 2>&1 || { echo "helm is required"; exit 1; } + + @kind delete cluster --name $(KAFSCALE_KIND_CLUSTER) >/dev/null 2>&1 || true + @kind create cluster --name $(KAFSCALE_KIND_CLUSTER) + + @kind load docker-image $(BROKER_IMAGE) --name $(KAFSCALE_KIND_CLUSTER) + @kind load docker-image $(OPERATOR_IMAGE) --name $(KAFSCALE_KIND_CLUSTER) + @kind load docker-image $(CONSOLE_IMAGE) --name $(KAFSCALE_KIND_CLUSTER) + @kind load docker-image $(SPRING_DEMO_IMAGE) --name $(KAFSCALE_KIND_CLUSTER) + + kubectl apply -f deploy/demo/namespace.yaml + kubectl apply -f deploy/demo/minio.yaml + + kubectl -n kafscale-demo rollout status deployment/minio --timeout=120s + + kubectl apply -f deploy/demo/s3-secret.yaml + + helm upgrade --install kafscale deploy/helm/kafscale \ + --namespace $(KAFSCALE_DEMO_NAMESPACE) \ + --create-namespace \ + --set operator.replicaCount=1 \ + --set operator.image.repository=$(OPERATOR_REPO) \ + --set operator.image.tag=$(OPERATOR_TAG) \ + --set operator.image.pullPolicy=IfNotPresent \ + --set console.image.repository=$(CONSOLE_REPO) \ + --set console.image.tag=$(CONSOLE_TAG) \ + --set console.auth.username=admin \ + --set console.auth.password=admin \ + --set operator.etcdEndpoints[0]= + + @echo "[CONSOLE_TAG] CONSOLE_TAG = $(CONSOLE_TAG)" + @echo "[CONSOLE_REPO ] CONSOLE_REPO = $(CONSOLE_REPO)" + @echo "[OPERATOR_REPO] OPERATOR_REPO = $(OPERATOR_REPO)" + @echo "[SPRING_DEMO_REPO] SPRING_DEMO_REPO = $(SPRING_DEMO_REPO)" + + @echo "[CONSOLE_REPO] CONSOLE_REPO =$(CONSOLE_REPO)" + @echo "[OPERATOR_REPO] OPERATOR_REPO =$(OPERATOR_REPO)" + + @echo "[IMAGENAME] BROKER_IMAGE. =$(BROKER_IMAGE)" + @echo "[IMAGENAME] OPERATOR_IMAGE =$(OPERATOR_IMAGE)" + @echo "[IMAGENAME] CONSOLE_IMAGE =$(CONSOLE_IMAGE)" + @echo "[IMAGENAME] SPRING_DEMO_IMAGE = $(SPRING_DEMO_IMAGE)" + + @echo "[CONSOLE_TAG] CONSOLE_TAG =$(CONSOLE_TAG)" + + @bash -c 'set -e; \ + OPERATOR_DEPLOY=$$(kubectl -n kafscale-demo get deployments \ + -l app.kubernetes.io/component=operator \ + -o jsonpath="{.items[0].metadata.name}"); \ + echo "Using operator deployment: $$OPERATOR_DEPLOY"; \ + kubectl -n kafscale-demo set env deployment/$$OPERATOR_DEPLOY \ + BROKER_IMAGE=$(BROKER_IMAGE) \ + KAFSCALE_OPERATOR_ETCD_ENDPOINTS= \ + KAFSCALE_OPERATOR_ETCD_SNAPSHOT_BUCKET=kafscale-snapshots \ + KAFSCALE_OPERATOR_ETCD_SNAPSHOT_CREATE_BUCKET=1 \ + KAFSCALE_OPERATOR_ETCD_SNAPSHOT_PROTECT_BUCKET=1 \ + KAFSCALE_OPERATOR_LEADER_KEY=kafscale-operator-leader \ + KAFSCALE_OPERATOR_ETCD_SNAPSHOT_S3_ENDPOINT=http://minio.kafscale-demo.svc.cluster.local:9000; \ + kubectl -n kafscale-demo rollout status deployment/$$OPERATOR_DEPLOY --timeout=120s; \ + kubectl apply -f deploy/demo/kafscale-cluster.yaml; \ + kubectl apply -f deploy/demo/kafscale-topics.yaml; \ + echo "Waiting for broker deployment to be created ..."; \ + while ! kubectl -n kafscale-demo get deployment kafscale-broker >/dev/null 2>&1; do sleep 1; done; \ + kubectl -n kafscale-demo wait --for=condition=available deployment/kafscale-broker --timeout=180s; \ + console_svc=$$(kubectl -n kafscale-demo get svc -l app.kubernetes.io/component=console -o jsonpath="{.items[0].metadata.name}"); \ + echo "Exposing Console at http://localhost:8080/ui"; \ + nohup kubectl -n kafscale-demo port-forward svc/$$console_svc 8080:80 >/tmp/kafscale-demo-console.log 2>&1 & \ + kubectl apply -f deploy/demo/spring-boot-app.yaml; \ + kubectl apply -f deploy/demo/flink-wordcount-app.yaml; \ + kubectl -n kafscale-demo wait --for=condition=available deployment/spring-demo-app --timeout=120s; \ + nohup kubectl -n kafscale-demo port-forward svc/spring-demo-app 8083:8083 >/tmp/kafscale-demo-spring.log 2>&1 & \ + nohup kubectl -n kafscale-demo port-forward svc/kafscale-broker 9093:9093 >/tmp/kafscale-demo-metrics.log 2>&1 & \ + nohup kubectl -n kafscale-demo port-forward svc/kafscale-broker 39092:9092 >/tmp/kafscale-demo-broker.log 2>&1 & \ + echo "Exposing SpringBootApp at http://localhost:8083"; \ + echo "Exposing Metrics at localhost:9093"; \ + echo "Services exposed in background. Logs at /tmp/kafscale-demo-*.log"' + +demo-guide-pf-app: docker-build + kubectl apply -f deploy/demo/spring-boot-app.yaml; + kubectl -n kafscale-demo wait --for=condition=available deployment/spring-demo-app --timeout=120s; + # Start Nginx Load Balancer + kubectl apply -f deploy/demo/nginx-lb.yaml; + kubectl -n kafscale-demo wait --for=condition=available deployment/nginx-lb --timeout=120s; + echo "Exposing SpringBootApp at http://localhost:8083"; + nohup kubectl -n kafscale-demo port-forward svc/spring-demo-app 8083:8083 >/tmp/kafscale-demo-spring.log 2>&1 & + echo "Exposing Kafka via Nginx LB at localhost:59092"; + nohup kubectl -n kafscale-demo port-forward svc/nginx-lb 59092:59092 >/tmp/kafscale-demo-nginx.log 2>&1 & + +demo-guide-pf-clean: ## Clean up the platform demo environment + @echo "Cleaning up demo-platform2..." + @pkill -f 'kubectl -n kafscale-demo port-forward' || true + @kind delete cluster --name $(KAFSCALE_KIND_CLUSTER) >/dev/null 2>&1 || true + @echo "Cleanup complete. \nKIND CLUSTER: [$(KAFSCALE_KIND_CLUSTER)] removed." + tidy: go mod tidy lint: golangci-lint run +ACT ?= act +ACT_PLATFORM ?= linux/amd64 +ACT_FLAGS ?= --container-architecture $(ACT_PLATFORM) +ACT_IMAGE ?= local/act-runner:latest +STAGE_REGISTRY ?= 192.168.0.131:5100 +STAGE_TAG ?= stage +STAGE_PLATFORMS ?= linux/amd64,linux/arm64 +STAGE_NO_CACHE ?= 1 +STAGE_SOURCE_REGISTRY ?= ghcr.io/kafscale +STAGE_SOURCE_TAG ?= dev +STAGE_IMAGES ?= kafscale-broker kafscale-lfs-proxy kafscale-operator kafscale-console \ + kafscale-etcd-tools kafscale-iceberg-processor kafscale-sql-processor \ + kafscale-e72-browser-demo + +act-runnable: ## Run runnable GitHub Actions locally (ci.yml, docker.yml) + $(ACT) -W .github/workflows/ci.yml $(ACT_FLAGS) + $(ACT) -W .github/workflows/docker.yml $(ACT_FLAGS) + +act-image: ## Build local act runner image. + docker build -t $(ACT_IMAGE) .devcontainer/act-runner + +stage-release: ## Push stage images to local registry (local buildx). + STAGE_REGISTRY=$(STAGE_REGISTRY) STAGE_TAG=$(STAGE_TAG) STAGE_PLATFORMS=$(STAGE_PLATFORMS) STAGE_NO_CACHE=$(STAGE_NO_CACHE) \ + bash scripts/stage-release-local.sh + +stage-release-push: docker-build docker-build-lfs-proxy docker-build-iceberg-processor docker-build-e72-browser-demo ## Retag and push locally built images to STAGE_REGISTRY. + @set -e; \ + for img in $(STAGE_IMAGES); do \ + dst="$(STAGE_REGISTRY)/kafscale/$${img}:$(STAGE_TAG)"; \ + found=0; \ + for src in \ + "$(STAGE_SOURCE_REGISTRY)/$${img}:$(STAGE_SOURCE_TAG)" \ + "$${img}:$(STAGE_SOURCE_TAG)" \ + "$$(case $$img in \ + kafscale-broker) echo $(BROKER_IMAGE) ;; \ + kafscale-operator) echo $(OPERATOR_IMAGE) ;; \ + kafscale-console) echo $(CONSOLE_IMAGE) ;; \ + kafscale-lfs-proxy) echo $(LFS_PROXY_IMAGE) ;; \ + kafscale-etcd-tools) echo $(ETCD_TOOLS_IMAGE) ;; \ + kafscale-sql-processor) echo $(SQL_PROCESSOR_IMAGE) ;; \ + kafscale-iceberg-processor) echo $(ICEBERG_PROCESSOR_IMAGE) ;; \ + kafscale-e72-browser-demo) echo $(E72_BROWSER_DEMO_IMAGE) ;; \ + *) echo "" ;; \ + esac)"; do \ + [ -z "$$src" ] && continue; \ + if docker image inspect "$$src" >/dev/null 2>&1; then \ + echo "Pushing $$src -> $$dst"; \ + docker tag "$$src" "$$dst"; \ + docker push "$$dst"; \ + found=1; \ + break; \ + fi; \ + done; \ + if [ "$$found" -ne 1 ]; then \ + echo "Skipping $$img (source image not found)"; \ + fi; \ + done + +stage-release-clean: ## Remove stage release builder and prune local stage images. + @docker buildx rm stage-release-builder >/dev/null 2>&1 || true + @docker image rm -f $(E72_BROWSER_DEMO_IMAGE) $(BROKER_IMAGE) $(OPERATOR_IMAGE) $(CONSOLE_IMAGE) \ + $(LFS_PROXY_IMAGE) $(ETCD_TOOLS_IMAGE) $(SQL_PROCESSOR_IMAGE) $(ICEBERG_PROCESSOR_IMAGE) >/dev/null 2>&1 || true + +stage-release-act: act-image ## Push stage images to local registry via workflow (containerized act). + docker run --rm \ + --privileged \ + --network host \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v $(PWD):/workspace \ + -w /workspace \ + $(ACT_IMAGE) \ + -W .github/workflows/stage-release.yml $(ACT_FLAGS) \ + -P ubuntu-latest=catthehacker/ubuntu:act-latest \ + --input registry=$(STAGE_REGISTRY) --input tag=$(STAGE_TAG) + +IDOC_EXPLODE_BIN ?= bin/idoc-explode + +lfs-demo-idoc: ensure-minio ## Run IDoc explode demo — uploads IDoc XML to S3 via LFS, then explodes into topic streams. + @mkdir -p bin + go build -o $(IDOC_EXPLODE_BIN) ./cmd/idoc-explode + MINIO_PORT=$(MINIO_PORT) \ + MINIO_BUCKET=$(MINIO_BUCKET) \ + MINIO_REGION=$(MINIO_REGION) \ + MINIO_ROOT_USER=$(MINIO_ROOT_USER) \ + MINIO_ROOT_PASSWORD=$(MINIO_ROOT_PASSWORD) \ + ./scripts/idoc-explode-demo.sh + help: ## Show targets @grep -E '^[a-zA-Z_-]+:.*?##' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "%-20s %s\n", $$1, $$2}' diff --git a/README.md b/README.md index e3af2b51..39a47cd6 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,13 @@ For the technical specification and data formats, see `kafscale-spec.md`. A detailed architecture overview and design rationale are available here: https://www.novatechflow.com/p/kafscale.html +## Examples + +- Quickstart guide: `examples/101_kafscale-dev-guide/README.md` +- Spring Boot app demo (E20): `examples/E20_spring-boot-kafscale-demo/README.md` +- Flink demo (E30): `examples/E30_flink-kafscale-demo/README.md` +- Spark demo (E40): `examples/E40_spark-kafscale-demo/README.md` + ## Community - License: Apache 2.0 (`LICENSE`) diff --git a/addons/processors/iceberg-processor/Dockerfile b/addons/processors/iceberg-processor/Dockerfile index 6c14a27d..ad5d56f4 100644 --- a/addons/processors/iceberg-processor/Dockerfile +++ b/addons/processors/iceberg-processor/Dockerfile @@ -16,12 +16,21 @@ FROM golang:1.25-alpine AS build RUN apk add --no-cache git +ARG REPO_ROOT=. +ARG MODULE_DIR=. WORKDIR /src -COPY go.mod go.sum ./ +COPY ${REPO_ROOT} /src +WORKDIR /src/${MODULE_DIR} +ARG USE_LOCAL_PLATFORM=0 +RUN if [ "${USE_LOCAL_PLATFORM}" != "1" ]; then \ + go mod edit -dropreplace github.com/KafScale/platform || true; \ + fi +RUN if [ "${USE_LOCAL_PLATFORM}" != "1" ]; then \ + go mod download github.com/KafScale/platform@v1.5.0 || true; \ + fi RUN --mount=type=cache,target=/go/pkg/mod \ --mount=type=cache,target=/root/.cache/go-build \ go mod download -COPY . ./ ARG GO_BUILD_FLAGS= RUN --mount=type=cache,target=/go/pkg/mod \ --mount=type=cache,target=/root/.cache/go-build \ diff --git a/addons/processors/iceberg-processor/Makefile b/addons/processors/iceberg-processor/Makefile index 31a9e697..a405b9d8 100644 --- a/addons/processors/iceberg-processor/Makefile +++ b/addons/processors/iceberg-processor/Makefile @@ -19,6 +19,30 @@ BINARY := iceberg-processor BUILD_DIR := bin IMAGE ?= $(BINARY):dev DOCKER_BUILD_ARGS ?= +DOCKER_BUILD_ARGS_LOCAL = --build-arg USE_LOCAL_PLATFORM=1 --build-arg REPO_ROOT=. --build-arg MODULE_DIR=addons/processors/iceberg-processor +RSYNC_EXCLUDES = \ + --exclude ".dockerignore" \ + --exclude ".git" \ + --exclude ".build" \ + --exclude ".gocache" \ + --exclude ".idea" \ + --exclude ".vscode" \ + --exclude "_site" \ + --exclude "bin" \ + --exclude "coverage.out" \ + --exclude "dist" \ + --exclude "docs" \ + --exclude "deploy/helm" \ + --exclude "test" \ + --exclude "tmp" \ + --exclude "**/.DS_Store" \ + --exclude "**/*.log" \ + --exclude "**/*.swp" \ + --exclude "**/*_test.go" \ + --exclude "**/node_modules" \ + --exclude "ui/.next" \ + --exclude "ui/dist" \ + --exclude "ui/build" DOCKER_BUILD_CMD := $(shell \ if command -v docker >/dev/null 2>&1 && docker buildx version >/dev/null 2>&1; then \ @@ -46,4 +70,7 @@ clean: rm -rf $(BUILD_DIR) docker-build: - $(DOCKER_BUILD_CMD) $(DOCKER_BUILD_ARGS) -t $(IMAGE) . + @tmp=$$(mktemp -d); \ + rsync -a --delete $(RSYNC_EXCLUDES) ../../.. "$$tmp/"; \ + $(DOCKER_BUILD_CMD) $(DOCKER_BUILD_ARGS) $(DOCKER_BUILD_ARGS_LOCAL) -t $(IMAGE) -f Dockerfile "$$tmp"; \ + rm -rf "$$tmp" diff --git a/addons/processors/iceberg-processor/config/config.yaml b/addons/processors/iceberg-processor/config/config.yaml index 25117402..664f546b 100644 --- a/addons/processors/iceberg-processor/config/config.yaml +++ b/addons/processors/iceberg-processor/config/config.yaml @@ -62,3 +62,9 @@ mappings: type: string required: false allow_type_widening: true + lfs: + mode: off + max_inline_size: 1048576 + store_metadata: false + validate_checksum: true + resolve_concurrency: 4 diff --git a/addons/processors/iceberg-processor/deploy/helm/iceberg-processor/config/config.yaml b/addons/processors/iceberg-processor/deploy/helm/iceberg-processor/config/config.yaml index d9cd9b94..a8988200 100644 --- a/addons/processors/iceberg-processor/deploy/helm/iceberg-processor/config/config.yaml +++ b/addons/processors/iceberg-processor/deploy/helm/iceberg-processor/config/config.yaml @@ -57,3 +57,9 @@ mappings: type: string required: false allow_type_widening: true + lfs: + mode: off + max_inline_size: 1048576 + store_metadata: false + validate_checksum: true + resolve_concurrency: 4 diff --git a/addons/processors/iceberg-processor/deploy/helm/iceberg-processor/values.yaml b/addons/processors/iceberg-processor/deploy/helm/iceberg-processor/values.yaml index c313c227..10159f8d 100644 --- a/addons/processors/iceberg-processor/deploy/helm/iceberg-processor/values.yaml +++ b/addons/processors/iceberg-processor/deploy/helm/iceberg-processor/values.yaml @@ -89,6 +89,12 @@ config: type: string required: false allow_type_widening: true + lfs: + mode: off + max_inline_size: 1048576 + store_metadata: false + validate_checksum: true + resolve_concurrency: 4 s3: credentialsSecretRef: "" diff --git a/addons/processors/iceberg-processor/go.mod b/addons/processors/iceberg-processor/go.mod index 86ccccd3..07ec1587 100644 --- a/addons/processors/iceberg-processor/go.mod +++ b/addons/processors/iceberg-processor/go.mod @@ -155,3 +155,5 @@ require ( google.golang.org/grpc v1.78.0 // indirect google.golang.org/protobuf v1.36.11 // indirect ) + +replace github.com/KafScale/platform => ../../.. diff --git a/addons/processors/iceberg-processor/go.sum b/addons/processors/iceberg-processor/go.sum index 5507e6b2..5ccf627f 100644 --- a/addons/processors/iceberg-processor/go.sum +++ b/addons/processors/iceberg-processor/go.sum @@ -6,8 +6,8 @@ atomicgo.dev/keyboard v0.2.9 h1:tOsIid3nlPLZ3lwgG8KZMp/SFmr7P0ssEN5JUsm78K8= atomicgo.dev/keyboard v0.2.9/go.mod h1:BC4w9g00XkxH/f1HXhW2sXmJFOCWbKn9xrOunSFtExQ= atomicgo.dev/schedule v0.1.0 h1:nTthAbhZS5YZmgYbb2+DH8uQIZcTlIrd4eYr3UQxEjs= atomicgo.dev/schedule v0.1.0/go.mod h1:xeUa3oAkiuHYh8bKiQBRojqAMq3PXXbJujjb0hw8pEU= -cel.dev/expr v0.24.0 h1:56OvJKSH3hDGL0ml5uSxZmz3/3Pq4tJ+fb1unVLAFcY= -cel.dev/expr v0.24.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw= +cel.dev/expr v0.25.1 h1:1KrZg61W6TWSxuNZ37Xy49ps13NUovb66QLprthtwi4= +cel.dev/expr v0.25.1/go.mod h1:hrXvqGP6G6gyx8UAHSHJ5RGk//1Oj5nXQ2NI02Nrsg4= cloud.google.com/go v0.121.6 h1:waZiuajrI28iAf40cWgycWNgaXPO06dupuS+sgibK6c= cloud.google.com/go v0.121.6/go.mod h1:coChdst4Ea5vUpiALcYKXEpR1S9ZgXbhEzzMcMR66vI= cloud.google.com/go/auth v0.16.5 h1:mFWNQ2FEVWAliEQWpAdH80omXFokmrnbDhUS9cBywsI= @@ -66,10 +66,6 @@ github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0 github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.53.0/go.mod h1:jUZ5LYlw40WMd07qxcQJD5M40aUxrfwqQX1g7zxYnrQ= github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.53.0 h1:Ron4zCA/yk6U7WOBXhTJcDpsUBG9npumK6xw2auFltQ= github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.53.0/go.mod h1:cSgYe11MCNYunTnRXrKiR/tHc0eoKjICUuWpNZoVCOo= -github.com/KafScale/platform v1.4.2 h1:se8dIXEILnsIpY7VkqOE3UPEVGhohVZDTZUAdrq+bLE= -github.com/KafScale/platform v1.4.2/go.mod h1:8HBfHD7GBslKj+1ymFt9BSsqOC2mAItrFBmYJjFIMhY= -github.com/KafScale/platform v1.5.0 h1:2hZNeNG6nXN+XB6Wx/tSeKhyORcEhJe+ZfAGmJb0rz8= -github.com/KafScale/platform v1.5.0/go.mod h1:H6eTVqlZ7baK+b1kQgiRkdsPY85VA2XVUj+twtzKZC4= github.com/MarvinJWendt/testza v0.1.0/go.mod h1:7AxNvlfeHP7Z/hDQ5JtE3OKYT3XFUeLCDE2DQninSqs= github.com/MarvinJWendt/testza v0.2.1/go.mod h1:God7bhG8n6uQxwdScay+gjm9/LnO4D3kkcZX4hv9Rp8= github.com/MarvinJWendt/testza v0.2.8/go.mod h1:nwIcjmr0Zz+Rcwfh3/4UhBp7ePKVhuBExvZqnKYWlII= @@ -100,48 +96,48 @@ github.com/apparentlymart/go-textseg/v15 v15.0.0/go.mod h1:K8XmNZdhEBkdlyDdvbmms github.com/atomicgo/cursor v0.0.1/go.mod h1:cBON2QmmrysudxNBFthvMtN32r3jxVRIvzkUiF/RuIk= github.com/aws/aws-sdk-go v1.55.7 h1:UJrkFq7es5CShfBwlWAC8DA077vp8PyVbQd3lqLiztE= github.com/aws/aws-sdk-go v1.55.7/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU= -github.com/aws/aws-sdk-go-v2 v1.41.1 h1:ABlyEARCDLN034NhxlRUSZr4l71mh+T5KAeGh6cerhU= -github.com/aws/aws-sdk-go-v2 v1.41.1/go.mod h1:MayyLB8y+buD9hZqkCW3kX1AKq07Y5pXxtgB+rRFhz0= +github.com/aws/aws-sdk-go-v2 v1.41.2 h1:LuT2rzqNQsauaGkPK/7813XxcZ3o3yePY0Iy891T2ls= +github.com/aws/aws-sdk-go-v2 v1.41.2/go.mod h1:IvvlAZQXvTXznUPfRVfryiG1fbzE2NGK6m9u39YQ+S4= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 h1:489krEF9xIGkOaaX3CE/Be2uWjiXrkCH6gUX+bZA/BU= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4/go.mod h1:IOAPF6oT9KCsceNTvvYMNHy0+kMF8akOjeDvPENWxp4= -github.com/aws/aws-sdk-go-v2/config v1.32.7 h1:vxUyWGUwmkQ2g19n7JY/9YL8MfAIl7bTesIUykECXmY= -github.com/aws/aws-sdk-go-v2/config v1.32.7/go.mod h1:2/Qm5vKUU/r7Y+zUk/Ptt2MDAEKAfUtKc1+3U1Mo3oY= -github.com/aws/aws-sdk-go-v2/credentials v1.19.7 h1:tHK47VqqtJxOymRrNtUXN5SP/zUTvZKeLx4tH6PGQc8= -github.com/aws/aws-sdk-go-v2/credentials v1.19.7/go.mod h1:qOZk8sPDrxhf+4Wf4oT2urYJrYt3RejHSzgAquYeppw= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 h1:I0GyV8wiYrP8XpA70g1HBcQO1JlQxCMTW9npl5UbDHY= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17/go.mod h1:tyw7BOl5bBe/oqvoIeECFJjMdzXoa/dfVz3QQ5lgHGA= +github.com/aws/aws-sdk-go-v2/config v1.32.9 h1:ktda/mtAydeObvJXlHzyGpK1xcsLaP16zfUPDGoW90A= +github.com/aws/aws-sdk-go-v2/config v1.32.9/go.mod h1:U+fCQ+9QKsLW786BCfEjYRj34VVTbPdsLP3CHSYXMOI= +github.com/aws/aws-sdk-go-v2/credentials v1.19.10 h1:EEhmEUFCE1Yhl7vDhNOI5OCL/iKMdkkYFTRpZXNw7m8= +github.com/aws/aws-sdk-go-v2/credentials v1.19.10/go.mod h1:RnnlFCAlxQCkN2Q379B67USkBMu1PipEEiibzYN5UTE= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.18 h1:Ii4s+Sq3yDfaMLpjrJsqD6SmG/Wq/P5L/hw2qa78UAY= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.18/go.mod h1:6x81qnY++ovptLE6nWQeWrpXxbnlIex+4H4eYYGcqfc= github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.84 h1:cTXRdLkpBanlDwISl+5chq5ui1d1YWg4PWMR9c3kXyw= github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.84/go.mod h1:kwSy5X7tfIHN39uucmjQVs2LvDdXEjQucgQQEqCggEo= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17 h1:xOLELNKGp2vsiteLsvLPwxC+mYmO6OZ8PYgiuPJzF8U= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17/go.mod h1:5M5CI3D12dNOtH3/mk6minaRwI2/37ifCURZISxA/IQ= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17 h1:WWLqlh79iO48yLkj1v3ISRNiv+3KdQoZ6JWyfcsyQik= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17/go.mod h1:EhG22vHRrvF8oXSTYStZhJc1aUgKtnJe+aOiFEV90cM= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.18 h1:F43zk1vemYIqPAwhjTjYIz0irU2EY7sOb/F5eJ3HuyM= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.18/go.mod h1:w1jdlZXrGKaJcNoL+Nnrj+k5wlpGXqnNrKoP22HvAug= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.18 h1:xCeWVjj0ki0l3nruoyP2slHsGArMxeiiaoPN5QZH6YQ= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.18/go.mod h1:r/eLGuGCBw6l36ZRWiw6PaZwPXb6YOj+i/7MizNl5/k= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc= github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.17 h1:JqcdRG//czea7Ppjb+g/n4o8i/R50aTBHkA7vu0lK+k= github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.17/go.mod h1:CO+WeGmIdj/MlPel2KwID9Gt7CNq4M65HUfBW97liM0= github.com/aws/aws-sdk-go-v2/service/glue v1.129.1 h1:43/6Yay8BWMwCq5Ow9pSTcumKROQdqe5DxnS/44LODQ= github.com/aws/aws-sdk-go-v2/service/glue v1.129.1/go.mod h1:iH5M4d6X8IdmFUwOVdnoCEt7eqhjYZuw4gEI0ebsQjs= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 h1:0ryTNEdJbzUCEWkVXEXoqlXV72J5keC1GvILMOuD00E= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4/go.mod h1:HQ4qwNZh32C3CBeO6iJLQlgtMzqeG17ziAA/3KDJFow= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.5 h1:CeY9LUdur+Dxoeldqoun6y4WtJ3RQtzk0JMP2gfUay0= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.5/go.mod h1:AZLZf2fMaahW5s/wMRciu1sYbdsikT/UHwbUjOdEVTc= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.8 h1:Z5EiPIzXKewUQK0QTMkutjiaPVeVYXX7KIqhXu/0fXs= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.8/go.mod h1:FsTpJtvC4U1fyDXk7c71XoDv3HlRm8V3NiYLeYLh5YE= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17 h1:RuNSMoozM8oXlgLG/n6WLaFGoea7/CddrCfIiSA+xdY= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17/go.mod h1:F2xxQ9TZz5gDWsclCtPQscGpP0VUOc8RqgFM3vDENmU= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.18 h1:LTRCYFlnnKFlKsyIQxKhJuDuA3ZkrDQMRYm6rXiHlLY= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.18/go.mod h1:XhwkgGG6bHSd00nO/mexWTcTjgd6PjuvWQMqSn2UaEk= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.17 h1:bGeHBsGZx0Dvu/eJC0Lh9adJa3M1xREcndxLNZlve2U= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.17/go.mod h1:dcW24lbU0CzHusTE8LLHhRLI42ejmINN8Lcr22bwh/g= -github.com/aws/aws-sdk-go-v2/service/s3 v1.95.1 h1:C2dUPSnEpy4voWFIq3JNd8gN0Y5vYGDo44eUE58a/p8= -github.com/aws/aws-sdk-go-v2/service/s3 v1.95.1/go.mod h1:5jggDlZ2CLQhwJBiZJb4vfk4f0GxWdEDruWKEJ1xOdo= -github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 h1:VrhDvQib/i0lxvr3zqlUwLwJP4fpmpyD9wYG1vfSu+Y= -github.com/aws/aws-sdk-go-v2/service/signin v1.0.5/go.mod h1:k029+U8SY30/3/ras4G/Fnv/b88N4mAfliNn08Dem4M= -github.com/aws/aws-sdk-go-v2/service/sso v1.30.9 h1:v6EiMvhEYBoHABfbGB4alOYmCIrcgyPPiBE1wZAEbqk= -github.com/aws/aws-sdk-go-v2/service/sso v1.30.9/go.mod h1:yifAsgBxgJWn3ggx70A3urX2AN49Y5sJTD1UQFlfqBw= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13 h1:gd84Omyu9JLriJVCbGApcLzVR3XtmC4ZDPcAI6Ftvds= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13/go.mod h1:sTGThjphYE4Ohw8vJiRStAcu3rbjtXRsdNB0TvZ5wwo= -github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 h1:5fFjR/ToSOzB2OQ/XqWpZBmNvmP/pJ1jOWYlFDJTjRQ= -github.com/aws/aws-sdk-go-v2/service/sts v1.41.6/go.mod h1:qgFDZQSD/Kys7nJnVqYlWKnh0SSdMjAi0uSwON4wgYQ= -github.com/aws/smithy-go v1.24.0 h1:LpilSUItNPFr1eY85RYgTIg5eIEPtvFbskaFcmmIUnk= -github.com/aws/smithy-go v1.24.0/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0= +github.com/aws/aws-sdk-go-v2/service/s3 v1.96.0 h1:oeu8VPlOre74lBA/PMhxa5vewaMIMmILM+RraSyB8KA= +github.com/aws/aws-sdk-go-v2/service/s3 v1.96.0/go.mod h1:5jggDlZ2CLQhwJBiZJb4vfk4f0GxWdEDruWKEJ1xOdo= +github.com/aws/aws-sdk-go-v2/service/signin v1.0.6 h1:MzORe+J94I+hYu2a6XmV5yC9huoTv8NRcCrUNedDypQ= +github.com/aws/aws-sdk-go-v2/service/signin v1.0.6/go.mod h1:hXzcHLARD7GeWnifd8j9RWqtfIgxj4/cAtIVIK7hg8g= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.11 h1:7oGD8KPfBOJGXiCoRKrrrQkbvCp8N++u36hrLMPey6o= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.11/go.mod h1:0DO9B5EUJQlIDif+XJRWCljZRKsAFKh3gpFz7UnDtOo= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.15 h1:edCcNp9eGIUDUCrzoCu1jWAXLGFIizeqkdkKgRlJwWc= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.15/go.mod h1:lyRQKED9xWfgkYC/wmmYfv7iVIM68Z5OQ88ZdcV1QbU= +github.com/aws/aws-sdk-go-v2/service/sts v1.41.7 h1:NITQpgo9A5NrDZ57uOWj+abvXSb83BbyggcUBVksN7c= +github.com/aws/aws-sdk-go-v2/service/sts v1.41.7/go.mod h1:sks5UWBhEuWYDPdwlnRFn1w7xWdH29Jcpe+/PJQefEs= +github.com/aws/smithy-go v1.24.1 h1:VbyeNfmYkWoxMVpGUAbQumkODcYmfMRfZ8yQiH30SK0= +github.com/aws/smithy-go v1.24.1/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/buger/goterm v1.0.4 h1:Z9YvGmOih81P0FbVtEYTFF6YsSgxSUKEhf/f9bTMXbY= @@ -152,8 +148,8 @@ github.com/cenkalti/backoff/v5 v5.0.2 h1:rIfFVxEf1QsI7E1ZHfp/B4DF/6QBAUhmgkxc0H7 github.com/cenkalti/backoff/v5 v5.0.2/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cncf/xds/go v0.0.0-20251022180443-0feb69152e9f h1:Y8xYupdHxryycyPlc9Y+bSQAYZnetRJ70VMVKm5CKI0= -github.com/cncf/xds/go v0.0.0-20251022180443-0feb69152e9f/go.mod h1:HlzOvOjVBOfTGSRXRyY0OiCS/3J1akRGQQpRO/7zyF4= +github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 h1:6xNmx7iTtyBRev0+D/Tv1FZd4SCg8axKApyNyRsAt/w= +github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/BuvyMxRnYBlmVaq4OLiKW6iRQfvC62cvdkI= github.com/cockroachdb/apd/v3 v3.2.1 h1:U+8j7t0axsIgvQUqthuNm82HIrYXodOV2iWLWtEaIwg= github.com/cockroachdb/apd/v3 v3.2.1/go.mod h1:klXJcjp+FffLTHlhIG69tezTDvdP065naDsHzKhYSqc= github.com/compose-spec/compose-go/v2 v2.6.0 h1:/+oBD2ixSENOeN/TlJqWZmUak0xM8A7J08w/z661Wd4= @@ -223,14 +219,14 @@ github.com/eiannone/keyboard v0.0.0-20220611211555-0d226195f203 h1:XBBHcIb256gUJ github.com/eiannone/keyboard v0.0.0-20220611211555-0d226195f203/go.mod h1:E1jcSv8FaEny+OP/5k9UxZVw9YFWGj7eI4KR/iOBqCg= github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes= github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/envoyproxy/go-control-plane v0.13.5-0.20251024222203-75eaa193e329 h1:K+fnvUM0VZ7ZFJf0n4L/BRlnsb9pL/GuDG6FqaH+PwM= -github.com/envoyproxy/go-control-plane v0.13.5-0.20251024222203-75eaa193e329/go.mod h1:Alz8LEClvR7xKsrq3qzoc4N0guvVNSS8KmSChGYr9hs= -github.com/envoyproxy/go-control-plane/envoy v1.35.0 h1:ixjkELDE+ru6idPxcHLj8LBVc2bFP7iBytj353BoHUo= -github.com/envoyproxy/go-control-plane/envoy v1.35.0/go.mod h1:09qwbGVuSWWAyN5t/b3iyVfz5+z8QWGrzkoqm/8SbEs= +github.com/envoyproxy/go-control-plane v0.14.0 h1:hbG2kr4RuFj222B6+7T83thSPqLjwBIfQawTkC++2HA= +github.com/envoyproxy/go-control-plane v0.14.0/go.mod h1:NcS5X47pLl/hfqxU70yPwL9ZMkUlwlKxtAohpi2wBEU= +github.com/envoyproxy/go-control-plane/envoy v1.36.0 h1:yg/JjO5E7ubRyKX3m07GF3reDNEnfOboJ0QySbH736g= +github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98= github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI= github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4= -github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8= -github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU= +github.com/envoyproxy/protoc-gen-validate v1.3.0 h1:TvGH1wof4H33rezVKWSpqKz5NXWg5VPuZ0uONDT6eb4= +github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= @@ -351,8 +347,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= -github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk= -github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= +github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c= +github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.10/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c= github.com/klauspost/cpuid/v2 v2.0.12/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c= @@ -450,8 +446,8 @@ github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJw github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= -github.com/pierrec/lz4/v4 v4.1.23 h1:oJE7T90aYBGtFNrI8+KbETnPymobAhzRrR8Mu8n1yfU= -github.com/pierrec/lz4/v4 v4.1.23/go.mod h1:EoQMVJgeeEOMsCqCzqFm2O0cJvljX2nGZjcRIPL34O4= +github.com/pierrec/lz4/v4 v4.1.25 h1:kocOqRffaIbU5djlIBr7Wh+cx82C0vtFb0fOurZHqD0= +github.com/pierrec/lz4/v4 v4.1.25/go.mod h1:EoQMVJgeeEOMsCqCzqFm2O0cJvljX2nGZjcRIPL34O4= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -556,7 +552,7 @@ github.com/tonistiigi/units v0.0.0-20180711220420-6950e57a87ea h1:SXhTLE6pb6eld/ github.com/tonistiigi/units v0.0.0-20180711220420-6950e57a87ea/go.mod h1:WPnis/6cRcDZSUvVmezrxJPkiO87ThFYsoUiMwWNDJk= github.com/tonistiigi/vt100 v0.0.0-20240514184818-90bafcd6abab h1:H6aJ0yKQ0gF49Qb2z5hI1UHxSQt4JMyxebFR15KnApw= github.com/tonistiigi/vt100 v0.0.0-20240514184818-90bafcd6abab/go.mod h1:ulncasL3N9uLrVann0m+CDlJKWsIAP34MPcOJF6VRvc= -github.com/twmb/franz-go v1.20.6 h1:TpQTt4QcixJ1cHEmQGPOERvTzo99s8jAutmS7rbSD6w= +github.com/twmb/franz-go v1.20.7 h1:P4MGSXJjjAPP3NRGPCks/Lrq+j+twWMVl1qYCVgNmWY= github.com/twmb/franz-go/pkg/kmsg v1.12.0 h1:CbatD7ers1KzDNgJqPbKOq0Bz/WLBdsTH75wgzeVaPc= github.com/twmb/franz-go/pkg/kmsg v1.12.0/go.mod h1:+DPt4NC8RmI6hqb8G09+3giKObE6uD2Eya6CfqBpeJY= github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg= @@ -611,30 +607,30 @@ github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= go.etcd.io/bbolt v1.4.3 h1:dEadXpI6G79deX5prL3QRNP6JB8UxVkqo4UPnHaNXJo= go.etcd.io/bbolt v1.4.3/go.mod h1:tKQlpPaYCVFctUIgFKFnAlvbmB3tpy1vkTnDWohtc0E= -go.etcd.io/etcd/api/v3 v3.6.7 h1:7BNJ2gQmc3DNM+9cRkv7KkGQDayElg8x3X+tFDYS+E0= -go.etcd.io/etcd/api/v3 v3.6.7/go.mod h1:xJ81TLj9hxrYYEDmXTeKURMeY3qEDN24hqe+q7KhbnI= -go.etcd.io/etcd/client/pkg/v3 v3.6.7 h1:vvzgyozz46q+TyeGBuFzVuI53/yd133CHceNb/AhBVs= -go.etcd.io/etcd/client/pkg/v3 v3.6.7/go.mod h1:2IVulJ3FZ/czIGl9T4lMF1uxzrhRahLqe+hSgy+Kh7Q= -go.etcd.io/etcd/client/v3 v3.6.7 h1:9WqA5RpIBtdMxAy1ukXLAdtg2pAxNqW5NUoO2wQrE6U= -go.etcd.io/etcd/client/v3 v3.6.7/go.mod h1:2XfROY56AXnUqGsvl+6k29wrwsSbEh1lAouQB1vHpeE= -go.etcd.io/etcd/pkg/v3 v3.6.6 h1:wylOivS/UxXTZ0Le5fOdxCjatW5ql9dcWEggQQHSorw= -go.etcd.io/etcd/pkg/v3 v3.6.6/go.mod h1:9TKZL7WUEVHXYM3srP3ESZfIms34s1G72eNtWA9YKg4= -go.etcd.io/etcd/server/v3 v3.6.6 h1:YSRWGJPzU+lIREwUQI4MfyLZrkUyzjJOVpMxJvZePaY= -go.etcd.io/etcd/server/v3 v3.6.6/go.mod h1:A1OQ1x3PaiENDLywMjCiMwV1pwJSpb0h9Z5ORP2dv6I= +go.etcd.io/etcd/api/v3 v3.6.8 h1:gqb1VN92TAI6G2FiBvWcqKtHiIjr4SU2GdXxTwyexbM= +go.etcd.io/etcd/api/v3 v3.6.8/go.mod h1:qyQj1HZPUV3B5cbAL8scG62+fyz5dSxxu0w8pn28N6Q= +go.etcd.io/etcd/client/pkg/v3 v3.6.8 h1:Qs/5C0LNFiqXxYf2GU8MVjYUEXJ6sZaYOz0zEqQgy50= +go.etcd.io/etcd/client/pkg/v3 v3.6.8/go.mod h1:GsiTRUZE2318PggZkAo6sWb6l8JLVrnckTNfbG8PWtw= +go.etcd.io/etcd/client/v3 v3.6.8 h1:B3G76t1UykqAOrbio7s/EPatixQDkQBevN8/mwiplrY= +go.etcd.io/etcd/client/v3 v3.6.8/go.mod h1:MVG4BpSIuumPi+ELF7wYtySETmoTWBHVcDoHdVupwt8= +go.etcd.io/etcd/pkg/v3 v3.6.8 h1:Xe+LIL974spy8b4nEx3H0KMr1ofq3r0kh6FbU3aw4es= +go.etcd.io/etcd/pkg/v3 v3.6.8/go.mod h1:TRibVNe+FqJIe1abOAA1PsuQ4wqO87ZaOoprg09Tn8c= +go.etcd.io/etcd/server/v3 v3.6.8 h1:U2strdSEy1U8qcSzRIdkYpvOPtBy/9i/IfaaCI9flZ4= +go.etcd.io/etcd/server/v3 v3.6.8/go.mod h1:88dCtwUnSirkUoJbflQxxWXqtBSZa6lSG0Kuej+dois= go.etcd.io/raft/v3 v3.6.0 h1:5NtvbDVYpnfZWcIHgGRk9DyzkBIXOi8j+DDp1IcnUWQ= go.etcd.io/raft/v3 v3.6.0/go.mod h1:nLvLevg6+xrVtHUmVaTcTz603gQPHfh7kUAwV6YpfGo= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/contrib/detectors/gcp v1.38.0 h1:ZoYbqX7OaA/TAikspPl3ozPI6iY6LiIY9I8cUfm+pJs= -go.opentelemetry.io/contrib/detectors/gcp v1.38.0/go.mod h1:SU+iU7nu5ud4oCb3LQOhIZ3nRLj6FNVrKgtflbaf2ts= +go.opentelemetry.io/contrib/detectors/gcp v1.39.0 h1:kWRNZMsfBHZ+uHjiH4y7Etn2FK26LAGkNFw7RHv1DhE= +go.opentelemetry.io/contrib/detectors/gcp v1.39.0/go.mod h1:t/OGqzHBa5v6RHZwrDBJ2OirWc+4q/w2fTbLZwAKjTk= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.62.0 h1:rbRJ8BBoVMsQShESYZ0FkvcITu8X8QNwJogcLUmDNNw= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.62.0/go.mod h1:ru6KHrNtNHxM4nD/vd6QrLVWgKhxPYgblq4VAtNawTQ= go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0 h1:4BZHA+B1wXEQoGNHxW8mURaLhcdGwvRnmhGbm+odRbc= go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0/go.mod h1:3qi2EEwMgB4xnKgPLqsDP3j9qxnHDZeHsnAxfjQqTko= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 h1:Hf9xI/XLML9ElpiHVDNwvqI0hIFlzV8dgIr35kV1kRU= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0/go.mod h1:NfchwuyNoMcZ5MLHwPrODwUF1HWCXWrL31s8gSAdIKY= -go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= -go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= +go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.37.0 h1:zG8GlgXCJQd5BU98C0hZnBbElszTmUgCNCfYneaDL0A= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.37.0/go.mod h1:hOfBCz8kv/wuq73Mx2H2QnWokh/kHZxkh6SNF2bdKtw= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.31.0 h1:ZsXq73BERAiNuuFXYqP4MR5hBrjXfMGSO+Cx7qoOZiM= @@ -647,16 +643,16 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 h1:lUsI2 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0/go.mod h1:2HpZxxQurfGxJlJDblybejHB6RX6pmExPNe517hREw4= go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.37.0 h1:6VjV6Et+1Hd2iLZEPtdV7vie80Yyqf7oikJLjQ/myi0= go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.37.0/go.mod h1:u8hcp8ji5gaM/RfcOo8z9NMnf1pVLfVY7lBY2VOGuUU= -go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= -go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= -go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= -go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= -go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= -go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= -go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= -go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= -go.opentelemetry.io/proto/otlp v1.7.0 h1:jX1VolD6nHuFzOYso2E73H85i92Mv8JQYk0K9vz09os= -go.opentelemetry.io/proto/otlp v1.7.0/go.mod h1:fSKjH6YJ7HDlwzltzyMj036AJ3ejJLCgCSHGj4efDDo= +go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0= +go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= +go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18= +go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE= +go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8= +go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= +go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI= +go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= +go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4= +go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/mock v0.5.0 h1:KAMbZvZPyBPWgD14IrIQ38QCyjwpvVVV6K/bHl1IwQU= @@ -677,8 +673,8 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= -golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU= -golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0= +golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts= +golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos= golang.org/x/exp v0.0.0-20250711185948-6ae5c78190dc h1:TS73t7x3KarrNd5qAipmspBDS1rkMcgVG/fS1aRb4Rc= golang.org/x/exp v0.0.0-20250711185948-6ae5c78190dc/go.mod h1:A+z0yzpGtvnG90cToK5n2tu8UJVP2XUATh+r+sfOOOc= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -687,8 +683,8 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= -golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= +golang.org/x/mod v0.32.0 h1:9F4d3PHLljb6x//jOyokMv3eX+YDeepZSEo3mFJy93c= +golang.org/x/mod v0.32.0/go.mod h1:SgipZ/3h2Ci89DlEtEXWUk/HteuRin+HHhN+WbNhguU= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -699,8 +695,8 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= -golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU= -golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= +golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= +golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -728,10 +724,10 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= -golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/telemetry v0.0.0-20251203150158-8fff8a5912fc h1:bH6xUXay0AIFMElXG2rQ4uiE+7ncwtiOdPfYK1NK2XA= -golang.org/x/telemetry v0.0.0-20251203150158-8fff8a5912fc/go.mod h1:hKdjCMrbv9skySur+Nek8Hd0uJ0GuxJIoIX2payrIdQ= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/telemetry v0.0.0-20260109210033-bd525da824e2 h1:O1cMQHRfwNpDfDJerqRoE2oD+AFlyid87D40L/OkkJo= +golang.org/x/telemetry v0.0.0-20260109210033-bd525da824e2/go.mod h1:b7fPSJ0pKZ3ccUh8gnTONJxhn3c/PS6tyzQvyqw4iA8= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -740,8 +736,8 @@ golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= -golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q= -golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg= +golang.org/x/term v0.40.0 h1:36e4zGLqU4yhjlmxEaagx2KuYbJq3EwY8K943ZsHcvg= +golang.org/x/term v0.40.0/go.mod h1:w2P8uVp06p2iyKKuvXIm7N/y0UCRt3UfJTfZ7oOpglM= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= @@ -749,8 +745,8 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= -golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= +golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk= +golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA= golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -761,8 +757,8 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= -golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= -golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= +golang.org/x/tools v0.41.0 h1:a9b8iMweWG+S0OBnlU36rzLp20z1Rp10w+IY2czHTQc= +golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -779,8 +775,8 @@ google.golang.org/genproto/googleapis/api v0.0.0-20251213004720-97cd9d5aeac2 h1: google.golang.org/genproto/googleapis/api v0.0.0-20251213004720-97cd9d5aeac2/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto= google.golang.org/genproto/googleapis/rpc v0.0.0-20251213004720-97cd9d5aeac2 h1:2I6GHUeJ/4shcDpoUlLs/2WPnhg7yJwvXtqcMJt9liA= google.golang.org/genproto/googleapis/rpc v0.0.0-20251213004720-97cd9d5aeac2/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= -google.golang.org/grpc v1.78.0 h1:K1XZG/yGDJnzMdd/uZHAkVqJE+xIDOcmdSFZkBUicNc= -google.golang.org/grpc v1.78.0/go.mod h1:I47qjTo4OKbMkjA/aOOwxDIiPSBofUtQUI5EfpWvW7U= +google.golang.org/grpc v1.79.1 h1:zGhSi45ODB9/p3VAawt9a+O/MULLl9dpizzNNpq7flY= +google.golang.org/grpc v1.79.1/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/cenkalti/backoff.v1 v1.1.0 h1:Arh75ttbsvlpVA7WtVpH4u9h6Zl46xuptxqLxPiSo4Y= @@ -789,8 +785,8 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= -gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo= +gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= @@ -806,18 +802,18 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM= -k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk= -k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4= -k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw= -k8s.io/client-go v0.34.1 h1:ZUPJKgXsnKwVwmKKdPfw4tB58+7/Ik3CrjOEhsiZ7mY= -k8s.io/client-go v0.34.1/go.mod h1:kA8v0FP+tk6sZA0yKLRG67LWjqufAoSHA2xVGKw9Of8= +k8s.io/api v0.35.1 h1:0PO/1FhlK/EQNVK5+txc4FuhQibV25VLSdLMmGpDE/Q= +k8s.io/api v0.35.1/go.mod h1:28uR9xlXWml9eT0uaGo6y71xK86JBELShLy4wR1XtxM= +k8s.io/apimachinery v0.35.1 h1:yxO6gV555P1YV0SANtnTjXYfiivaTPvCTKX6w6qdDsU= +k8s.io/apimachinery v0.35.1/go.mod h1:jQCgFZFR1F4Ik7hvr2g84RTJSZegBc8yHgFWKn//hns= +k8s.io/client-go v0.35.1 h1:+eSfZHwuo/I19PaSxqumjqZ9l5XiTEKbIaJ+j1wLcLM= +k8s.io/client-go v0.35.1/go.mod h1:1p1KxDt3a0ruRfc/pG4qT/3oHmUj1AhSHEcxNSGg+OA= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA= -k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts= -k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= -k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE= +k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ= +k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck= +k8s.io/utils v0.0.0-20251002143259-bc988d571ff4/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= modernc.org/libc v1.66.3 h1:cfCbjTUcdsKyyZZfEUKfoHcP3S0Wkvz3jgSzByEWVCQ= modernc.org/libc v1.66.3/go.mod h1:XD9zO8kt59cANKvHPXpx7yS2ELPheAey0vjIuZOhOU8= modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= @@ -826,12 +822,12 @@ modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= modernc.org/sqlite v1.38.0 h1:+4OrfPQ8pxHKuWG4md1JpR/EYAh3Md7TdejuuzE7EUI= modernc.org/sqlite v1.38.0/go.mod h1:1Bj+yES4SVvBZ4cBOpVZ6QgesMCKpJZDq0nxYzOpmNE= -sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= -sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= -sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= -sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/structured-merge-diff/v6 v6.3.2-0.20260122202528-d9cc6641c482 h1:2WOzJpHUBVrrkDjU4KBT8n5LDcj824eX0I5UKcgeRUs= +sigs.k8s.io/structured-merge-diff/v6 v6.3.2-0.20260122202528-d9cc6641c482/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= tags.cncf.io/container-device-interface v1.0.1 h1:KqQDr4vIlxwfYh0Ed/uJGVgX+CHAkahrgabg6Q8GYxc= diff --git a/addons/processors/iceberg-processor/internal/config/config.go b/addons/processors/iceberg-processor/internal/config/config.go index f453c581..f672b2fd 100644 --- a/addons/processors/iceberg-processor/internal/config/config.go +++ b/addons/processors/iceberg-processor/internal/config/config.go @@ -18,6 +18,7 @@ package config import ( "fmt" "os" + "strconv" "strings" "gopkg.in/yaml.v3" @@ -75,11 +76,12 @@ type RegistryConfig struct { } type Mapping struct { - Topic string `yaml:"topic"` - Table string `yaml:"table"` - Mode string `yaml:"mode"` - CreateTableIfAbsent bool `yaml:"create_table_if_missing"` + Topic string `yaml:"topic"` + Table string `yaml:"table"` + Mode string `yaml:"mode"` + CreateTableIfAbsent bool `yaml:"create_table_if_missing"` Schema MappingSchemaConfig `yaml:"schema"` + Lfs LfsConfig `yaml:"lfs"` } type MappingSchemaConfig struct { @@ -94,6 +96,14 @@ type Column struct { Required bool `yaml:"required"` } +type LfsConfig struct { + Mode string `yaml:"mode"` + MaxInlineSize int64 `yaml:"max_inline_size"` + StoreMetadata bool `yaml:"store_metadata"` + ValidateChecksum *bool `yaml:"validate_checksum"` + ResolveConcurrency int `yaml:"resolve_concurrency"` +} + type IcebergConfig struct { Catalog CatalogConfig `yaml:"catalog"` Warehouse string `yaml:"warehouse"` @@ -151,6 +161,7 @@ func Load(path string) (Config, error) { if cfg.Offsets.Backend == "etcd" && len(cfg.Etcd.Endpoints) == 0 { return Config{}, fmt.Errorf("etcd.endpoints is required for offsets.backend=etcd") } + applyLfsEnvOverrides(&cfg) for i, mapping := range cfg.Mappings { if mapping.Topic == "" { return Config{}, fmt.Errorf("mappings[%d].topic is required", i) @@ -164,6 +175,10 @@ func Load(path string) (Config, error) { if mapping.Mode != "append" { return Config{}, fmt.Errorf("mappings[%d].mode must be append", i) } + applyLfsDefaults(&mapping) + if err := validateLfsConfig(mapping.Lfs, i); err != nil { + return Config{}, err + } if mapping.Schema.Source == "" { if len(mapping.Schema.Columns) > 0 { mapping.Schema.Source = "mapping" @@ -203,6 +218,110 @@ func Load(path string) (Config, error) { return cfg, nil } +func (l LfsConfig) ChecksumEnabled() bool { + if l.ValidateChecksum == nil { + return true + } + return *l.ValidateChecksum +} + +func applyLfsDefaults(mapping *Mapping) { + if mapping.Lfs.Mode == "" { + mapping.Lfs.Mode = "off" + } + if mapping.Lfs.ResolveConcurrency == 0 { + mapping.Lfs.ResolveConcurrency = 4 + } + if mapping.Lfs.ValidateChecksum == nil { + value := true + mapping.Lfs.ValidateChecksum = &value + } +} + +func validateLfsConfig(lfsCfg LfsConfig, idx int) error { + switch lfsCfg.Mode { + case "off", "resolve", "reference", "skip", "hybrid": + default: + return fmt.Errorf("mappings[%d].lfs.mode %q is not supported", idx, lfsCfg.Mode) + } + if lfsCfg.Mode == "hybrid" && lfsCfg.MaxInlineSize <= 0 { + return fmt.Errorf("mappings[%d].lfs.max_inline_size must be > 0 for mode=hybrid", idx) + } + if lfsCfg.ResolveConcurrency < 0 { + return fmt.Errorf("mappings[%d].lfs.resolve_concurrency must be >= 0", idx) + } + return nil +} + +func applyLfsEnvOverrides(cfg *Config) { + mode := envString("KAFSCALE_ICEBERG_LFS_MODE") + maxInline := envInt64("KAFSCALE_ICEBERG_LFS_MAX_INLINE_SIZE") + storeMetadata := envBool("KAFSCALE_ICEBERG_LFS_STORE_METADATA") + validateChecksum := envBool("KAFSCALE_ICEBERG_LFS_VALIDATE_CHECKSUM") + resolveConcurrency := envInt("KAFSCALE_ICEBERG_LFS_RESOLVE_CONCURRENCY") + for i := range cfg.Mappings { + if mode != nil { + cfg.Mappings[i].Lfs.Mode = *mode + } + if maxInline != nil { + cfg.Mappings[i].Lfs.MaxInlineSize = *maxInline + } + if storeMetadata != nil { + cfg.Mappings[i].Lfs.StoreMetadata = *storeMetadata + } + if validateChecksum != nil { + cfg.Mappings[i].Lfs.ValidateChecksum = validateChecksum + } + if resolveConcurrency != nil { + cfg.Mappings[i].Lfs.ResolveConcurrency = *resolveConcurrency + } + } +} + +func envString(key string) *string { + value := strings.TrimSpace(os.Getenv(key)) + if value == "" { + return nil + } + return &value +} + +func envBool(key string) *bool { + value := strings.TrimSpace(os.Getenv(key)) + if value == "" { + return nil + } + parsed, err := strconv.ParseBool(value) + if err != nil { + return nil + } + return &parsed +} + +func envInt64(key string) *int64 { + value := strings.TrimSpace(os.Getenv(key)) + if value == "" { + return nil + } + parsed, err := strconv.ParseInt(value, 10, 64) + if err != nil { + return nil + } + return &parsed +} + +func envInt(key string) *int { + value := strings.TrimSpace(os.Getenv(key)) + if value == "" { + return nil + } + parsed, err := strconv.Atoi(value) + if err != nil { + return nil + } + return &parsed +} + func isSupportedColumnType(value string) bool { switch strings.ToLower(value) { case "boolean", "int", "long", "float", "double", "string", "binary", "timestamp", "date": diff --git a/addons/processors/iceberg-processor/internal/config/config_test.go b/addons/processors/iceberg-processor/internal/config/config_test.go index 49a19650..97979c4e 100644 --- a/addons/processors/iceberg-processor/internal/config/config_test.go +++ b/addons/processors/iceberg-processor/internal/config/config_test.go @@ -153,3 +153,52 @@ func TestLoadRejectsRegistrySourceWithoutBaseURL(t *testing.T) { t.Fatalf("expected error for schema.source=registry without base_url") } } + +func TestLoadRejectsInvalidLfsMode(t *testing.T) { + data := []byte("s3:\n bucket: test-bucket\niceberg:\n catalog:\n type: rest\n uri: http://catalog\netcd:\n endpoints:\n - http://etcd:2379\nschema:\n mode: \"off\"\nmappings:\n - topic: orders\n table: prod.orders\n lfs:\n mode: nope\n") + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + if err := os.WriteFile(path, data, 0644); err != nil { + t.Fatalf("write config: %v", err) + } + + if _, err := Load(path); err == nil { + t.Fatalf("expected error for invalid lfs mode") + } +} + +func TestLoadRejectsHybridWithoutMaxInlineSize(t *testing.T) { + data := []byte("s3:\n bucket: test-bucket\niceberg:\n catalog:\n type: rest\n uri: http://catalog\netcd:\n endpoints:\n - http://etcd:2379\nschema:\n mode: \"off\"\nmappings:\n - topic: orders\n table: prod.orders\n lfs:\n mode: hybrid\n") + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + if err := os.WriteFile(path, data, 0644); err != nil { + t.Fatalf("write config: %v", err) + } + + if _, err := Load(path); err == nil { + t.Fatalf("expected error for hybrid without max_inline_size") + } +} + +func TestLoadDefaultsLfsConfig(t *testing.T) { + data := []byte("s3:\n bucket: test-bucket\niceberg:\n catalog:\n type: rest\n uri: http://catalog\netcd:\n endpoints:\n - http://etcd:2379\nschema:\n mode: \"off\"\nmappings:\n - topic: orders\n table: prod.orders\n") + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + if err := os.WriteFile(path, data, 0644); err != nil { + t.Fatalf("write config: %v", err) + } + + cfg, err := Load(path) + if err != nil { + t.Fatalf("load config: %v", err) + } + if cfg.Mappings[0].Lfs.Mode != "off" { + t.Fatalf("expected lfs mode off, got %q", cfg.Mappings[0].Lfs.Mode) + } + if cfg.Mappings[0].Lfs.ResolveConcurrency != 4 { + t.Fatalf("expected default resolve_concurrency 4, got %d", cfg.Mappings[0].Lfs.ResolveConcurrency) + } + if !cfg.Mappings[0].Lfs.ChecksumEnabled() { + t.Fatalf("expected checksum enabled by default") + } +} diff --git a/addons/processors/iceberg-processor/internal/metrics/metrics.go b/addons/processors/iceberg-processor/internal/metrics/metrics.go index ae79cbb7..01ab6965 100644 --- a/addons/processors/iceberg-processor/internal/metrics/metrics.go +++ b/addons/processors/iceberg-processor/internal/metrics/metrics.go @@ -77,6 +77,39 @@ var ( }, []string{"topic", "partition"}, ) + LfsResolvedTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "lfs_resolved_total", + Help: "Total LFS blobs resolved per topic.", + }, + []string{"topic"}, + ) + LfsResolvedBytesTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "lfs_resolved_bytes_total", + Help: "Total bytes resolved from LFS per topic.", + }, + []string{"topic"}, + ) + LfsResolutionErrorsTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "lfs_resolution_errors_total", + Help: "Total LFS resolution errors per topic and reason.", + }, + []string{"topic", "reason"}, + ) + LfsResolutionDurationSeconds = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: namespace, + Name: "lfs_resolution_duration_seconds", + Help: "LFS resolution duration in seconds.", + Buckets: prometheus.DefBuckets, + }, + []string{"topic"}, + ) ) func init() { @@ -88,5 +121,9 @@ func init() { LastOffset, WatermarkOffset, WatermarkTimestamp, + LfsResolvedTotal, + LfsResolvedBytesTotal, + LfsResolutionErrorsTotal, + LfsResolutionDurationSeconds, ) } diff --git a/addons/processors/iceberg-processor/internal/processor/lfs.go b/addons/processors/iceberg-processor/internal/processor/lfs.go new file mode 100644 index 00000000..4054ed9b --- /dev/null +++ b/addons/processors/iceberg-processor/internal/processor/lfs.go @@ -0,0 +1,218 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package processor + +import ( + "context" + "fmt" + "log" + "sync" + "time" + + "github.com/KafScale/platform/addons/processors/iceberg-processor/internal/config" + "github.com/KafScale/platform/addons/processors/iceberg-processor/internal/metrics" + "github.com/KafScale/platform/addons/processors/iceberg-processor/internal/sink" + "github.com/KafScale/platform/pkg/lfs" +) + +const ( + lfsModeOff = "off" + lfsModeResolve = "resolve" + lfsModeReference = "reference" + lfsModeSkip = "skip" + lfsModeHybrid = "hybrid" +) + +type lfsJob struct { + idx int + record sink.Record +} + +type lfsResult struct { + idx int + record sink.Record + keep bool + resolved bool + resolvedBytes int64 + err error +} + +func (p *Processor) resolveLfsRecords(ctx context.Context, records []sink.Record, lfsCfg config.LfsConfig, topic string) ([]sink.Record, error) { + if len(records) == 0 || lfsCfg.Mode == lfsModeOff { + return records, nil + } + if p.lfsS3 == nil && (lfsCfg.Mode == lfsModeResolve || lfsCfg.Mode == lfsModeHybrid) { + return nil, fmt.Errorf("lfs s3 reader not configured") + } + + out := make([]*sink.Record, len(records)) + jobs := make(chan lfsJob) + results := make(chan lfsResult, len(records)) + workers := lfsCfg.ResolveConcurrency + if workers <= 0 { + workers = 1 + } + + resolver := lfs.NewResolver(lfs.ResolverConfig{ + MaxSize: lfsCfg.MaxInlineSize, + ValidateChecksum: lfsCfg.ChecksumEnabled(), + }, p.lfsS3) + + var wg sync.WaitGroup + for i := 0; i < workers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for job := range jobs { + res := resolveLfsRecord(ctx, resolver, job.record, lfsCfg) + res.idx = job.idx + results <- res + } + }() + } + + for idx, record := range records { + if !lfs.IsLfsEnvelope(record.Value) { + out[idx] = &record + continue + } + if lfsCfg.Mode == lfsModeSkip { + metrics.RecordsTotal.WithLabelValues(topic, "skipped_lfs").Inc() + continue + } + env, err := lfs.DecodeEnvelope(record.Value) + if err != nil { + metrics.LfsResolutionErrorsTotal.WithLabelValues(topic, "decode").Inc() + continue + } + + switch lfsCfg.Mode { + case lfsModeReference: + if lfsCfg.StoreMetadata { + record = attachLfsMetadata(record, lfsMetadataFromEnvelope(env)) + } + out[idx] = &record + case lfsModeHybrid: + if env.Size > 0 && env.Size <= lfsCfg.MaxInlineSize { + jobs <- lfsJob{idx: idx, record: record} + continue + } + if lfsCfg.StoreMetadata { + record = attachLfsMetadata(record, lfsMetadataFromEnvelope(env)) + } + out[idx] = &record + case lfsModeResolve: + jobs <- lfsJob{idx: idx, record: record} + default: + out[idx] = &record + } + } + close(jobs) + + go func() { + wg.Wait() + close(results) + }() + + for res := range results { + if res.err != nil { + metrics.LfsResolutionErrorsTotal.WithLabelValues(topic, "resolve").Inc() + log.Printf("lfs resolve failed topic=%s offset=%d: %v", topic, res.record.Offset, res.err) + continue + } + if res.keep { + out[res.idx] = &res.record + } + if res.resolved { + metrics.LfsResolvedTotal.WithLabelValues(topic).Inc() + metrics.LfsResolvedBytesTotal.WithLabelValues(topic).Add(float64(res.resolvedBytes)) + } + } + + filtered := make([]sink.Record, 0, len(records)) + for _, record := range out { + if record == nil { + continue + } + filtered = append(filtered, *record) + } + return filtered, nil +} + +func resolveLfsRecord(ctx context.Context, resolver *lfs.Resolver, record sink.Record, lfsCfg config.LfsConfig) lfsResult { + start := time.Now() + resolved, ok, err := resolver.Resolve(ctx, record.Value) + if err != nil { + return lfsResult{record: record, err: err} + } + if !ok { + return lfsResult{record: record, keep: true} + } + record.Value = resolved.Payload + if lfsCfg.StoreMetadata { + record = attachLfsMetadata(record, lfsMetadataFromResolved(resolved)) + } + metrics.LfsResolutionDurationSeconds.WithLabelValues(record.Topic).Observe(time.Since(start).Seconds()) + return lfsResult{record: record, keep: true, resolved: true, resolvedBytes: resolved.BlobSize} +} + +func attachLfsMetadata(record sink.Record, values map[string]interface{}) sink.Record { + if record.Columns == nil { + record.Columns = make(map[string]interface{}, len(values)) + } + for key, value := range values { + record.Columns[key] = value + } + return record +} + +func lfsMetadataFromEnvelope(env lfs.Envelope) map[string]interface{} { + checksum := env.Checksum + checksumAlg := env.ChecksumAlg + if checksum == "" { + checksum = env.SHA256 + if checksumAlg == "" { + checksumAlg = "sha256" + } + } + return map[string]interface{}{ + "lfs_content_type": env.ContentType, + "lfs_blob_size": env.Size, + "lfs_checksum": checksum, + "lfs_checksum_alg": checksumAlg, + "lfs_bucket": env.Bucket, + "lfs_key": env.Key, + } +} + +func lfsMetadataFromResolved(resolved lfs.ResolvedRecord) map[string]interface{} { + checksum := resolved.Checksum + checksumAlg := resolved.ChecksumAlg + if checksum == "" { + checksum = resolved.Envelope.SHA256 + if checksumAlg == "" { + checksumAlg = "sha256" + } + } + return map[string]interface{}{ + "lfs_content_type": resolved.ContentType, + "lfs_blob_size": resolved.BlobSize, + "lfs_checksum": checksum, + "lfs_checksum_alg": checksumAlg, + "lfs_bucket": resolved.Envelope.Bucket, + "lfs_key": resolved.Envelope.Key, + } +} diff --git a/addons/processors/iceberg-processor/internal/processor/lfs_test.go b/addons/processors/iceberg-processor/internal/processor/lfs_test.go new file mode 100644 index 00000000..99cf0af9 --- /dev/null +++ b/addons/processors/iceberg-processor/internal/processor/lfs_test.go @@ -0,0 +1,238 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package processor + +import ( + "bytes" + "context" + "io" + "testing" + "time" + + "github.com/KafScale/platform/addons/processors/iceberg-processor/internal/config" + "github.com/KafScale/platform/addons/processors/iceberg-processor/internal/decoder" + "github.com/KafScale/platform/addons/processors/iceberg-processor/internal/discovery" + "github.com/KafScale/platform/addons/processors/iceberg-processor/internal/sink" + "github.com/KafScale/platform/pkg/lfs" +) + +type fakeS3Reader struct { + payloads map[string][]byte +} + +func (f *fakeS3Reader) Fetch(ctx context.Context, key string) ([]byte, error) { + return f.payloads[key], nil +} + +func (f *fakeS3Reader) Stream(ctx context.Context, key string) (io.ReadCloser, int64, error) { + return io.NopCloser(bytes.NewReader(nil)), 0, nil +} + +func TestResolveLfsRecordsResolveMode(t *testing.T) { + payload := []byte("hello") + checksum, err := lfs.ComputeChecksum(lfs.ChecksumSHA256, payload) + if err != nil { + t.Fatalf("checksum: %v", err) + } + envBytes := mustEnvelope(t, lfs.Envelope{ + Version: 1, + Bucket: "bucket", + Key: "key", + Size: int64(len(payload)), + SHA256: checksum, + }) + + p := &Processor{lfsS3: &fakeS3Reader{payloads: map[string][]byte{"key": payload}}} + cfg := config.LfsConfig{ + Mode: lfsModeResolve, + StoreMetadata: true, + ResolveConcurrency: 1, + ValidateChecksum: boolPtr(true), + } + + out, err := p.resolveLfsRecords(context.Background(), []sink.Record{{Topic: "t", Offset: 1, Value: envBytes}}, cfg, "t") + if err != nil { + t.Fatalf("resolve: %v", err) + } + if len(out) != 1 { + t.Fatalf("expected 1 record, got %d", len(out)) + } + if string(out[0].Value) != string(payload) { + t.Fatalf("expected resolved payload") + } + if out[0].Columns["lfs_key"] != "key" { + t.Fatalf("expected lfs metadata") + } +} + +func TestResolveLfsRecordsReferenceMode(t *testing.T) { + envBytes := mustEnvelope(t, lfs.Envelope{ + Version: 1, + Bucket: "bucket", + Key: "key", + Size: 10, + SHA256: "abc", + }) + p := &Processor{} + cfg := config.LfsConfig{Mode: lfsModeReference, StoreMetadata: true} + + out, err := p.resolveLfsRecords(context.Background(), []sink.Record{{Topic: "t", Offset: 1, Value: envBytes}}, cfg, "t") + if err != nil { + t.Fatalf("resolve: %v", err) + } + if len(out) != 1 { + t.Fatalf("expected 1 record, got %d", len(out)) + } + if out[0].Columns["lfs_bucket"] != "bucket" { + t.Fatalf("expected metadata from envelope") + } + if string(out[0].Value) != string(envBytes) { + t.Fatalf("expected envelope value to remain") + } +} + +func TestResolveLfsRecordsSkipMode(t *testing.T) { + envBytes := mustEnvelope(t, lfs.Envelope{Version: 1, Bucket: "b", Key: "k", Size: 1, SHA256: "abc"}) + p := &Processor{} + cfg := config.LfsConfig{Mode: lfsModeSkip} + + out, err := p.resolveLfsRecords(context.Background(), []sink.Record{{Topic: "t", Offset: 1, Value: envBytes}}, cfg, "t") + if err != nil { + t.Fatalf("resolve: %v", err) + } + if len(out) != 0 { + t.Fatalf("expected 0 records, got %d", len(out)) + } +} + +func TestResolveLfsRecordsHybridMode(t *testing.T) { + payload := []byte("hello") + checksum, err := lfs.ComputeChecksum(lfs.ChecksumSHA256, payload) + if err != nil { + t.Fatalf("checksum: %v", err) + } + envBytes := mustEnvelope(t, lfs.Envelope{Version: 1, Bucket: "b", Key: "k", Size: int64(len(payload)), SHA256: checksum}) + + p := &Processor{lfsS3: &fakeS3Reader{payloads: map[string][]byte{"k": payload}}} + cfg := config.LfsConfig{Mode: lfsModeHybrid, MaxInlineSize: int64(len(payload)), StoreMetadata: true, ResolveConcurrency: 1, ValidateChecksum: boolPtr(true)} + + out, err := p.resolveLfsRecords(context.Background(), []sink.Record{{Topic: "t", Offset: 1, Value: envBytes}}, cfg, "t") + if err != nil { + t.Fatalf("resolve: %v", err) + } + if len(out) != 1 { + t.Fatalf("expected 1 record, got %d", len(out)) + } + if string(out[0].Value) != string(payload) { + t.Fatalf("expected resolved payload") + } +} + +func mustEnvelope(t *testing.T, env lfs.Envelope) []byte { + t.Helper() + data, err := lfs.EncodeEnvelope(env) + if err != nil { + t.Fatalf("encode envelope: %v", err) + } + return data +} + +func boolPtr(value bool) *bool { + return &value +} + +func TestProcessorResolvesLfsRecords(t *testing.T) { + payload := []byte("hello") + checksum, err := lfs.ComputeChecksum(lfs.ChecksumSHA256, payload) + if err != nil { + t.Fatalf("checksum: %v", err) + } + envBytes := mustEnvelope(t, lfs.Envelope{ + Version: 1, + Bucket: "bucket", + Key: "key", + Size: int64(len(payload)), + SHA256: checksum, + }) + + segments := []discovery.SegmentRef{ + { + Topic: "orders", + Partition: 0, + BaseOffset: 0, + SegmentKey: "segment-0", + IndexKey: "index-0", + }, + } + records := map[string][]decoder.Record{ + "segment-0": { + {Topic: "orders", Partition: 0, Offset: 10, Timestamp: 1, Value: envBytes}, + }, + } + + store := &testStore{} + sinkWriter := &testSink{writes: make(chan struct{}, 1)} + p := &Processor{ + cfg: config.Config{ + Processor: config.ProcessorConfig{PollIntervalSeconds: 1}, + Mappings: []config.Mapping{ + { + Topic: "orders", + Table: "prod.orders", + Mode: "append", + CreateTableIfAbsent: true, + Lfs: config.LfsConfig{ + Mode: lfsModeResolve, + StoreMetadata: true, + ResolveConcurrency: 1, + ValidateChecksum: boolPtr(true), + }, + }, + }, + }, + discover: &testLister{segments: segments}, + decode: &testDecoder{records: records}, + store: store, + sink: sinkWriter, + validator: nil, + lfsS3: &fakeS3Reader{payloads: map[string][]byte{"key": payload}}, + mappingByTopic: map[string]config.Mapping{"orders": {Topic: "orders", Lfs: config.LfsConfig{Mode: lfsModeResolve, StoreMetadata: true, ResolveConcurrency: 1, ValidateChecksum: boolPtr(true)}}}, + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go func() { + _ = p.Run(ctx) + }() + + select { + case <-sinkWriter.writes: + case <-time.After(2 * time.Second): + t.Fatalf("timed out waiting for sink write") + } + cancel() + + if len(sinkWriter.all) != 1 { + t.Fatalf("expected 1 record, got %d", len(sinkWriter.all)) + } + if string(sinkWriter.all[0].Value) != string(payload) { + t.Fatalf("expected resolved payload") + } + if sinkWriter.all[0].Columns["lfs_key"] != "key" { + t.Fatalf("expected lfs metadata") + } +} diff --git a/addons/processors/iceberg-processor/internal/processor/processor.go b/addons/processors/iceberg-processor/internal/processor/processor.go index 6355e566..da824e04 100644 --- a/addons/processors/iceberg-processor/internal/processor/processor.go +++ b/addons/processors/iceberg-processor/internal/processor/processor.go @@ -29,18 +29,21 @@ import ( "github.com/KafScale/platform/addons/processors/iceberg-processor/internal/metrics" "github.com/KafScale/platform/addons/processors/iceberg-processor/internal/schema" "github.com/KafScale/platform/addons/processors/iceberg-processor/internal/sink" + "github.com/KafScale/platform/pkg/lfs" ) var leaseRenewInterval = 10 * time.Second // Processor wires discovery, decoding, checkpointing, and sink writing. type Processor struct { - cfg config.Config - discover discovery.Lister - decode decoder.Decoder - store checkpoint.Store - sink sink.Writer - validator schema.Validator + cfg config.Config + discover discovery.Lister + decode decoder.Decoder + store checkpoint.Store + sink sink.Writer + validator schema.Validator + lfsS3 lfs.S3Reader + mappingByTopic map[string]config.Mapping } func New(cfg config.Config) (*Processor, error) { @@ -65,13 +68,41 @@ func New(cfg config.Config) (*Processor, error) { return nil, err } + mappingByTopic := make(map[string]config.Mapping, len(cfg.Mappings)) + lfsEnabled := false + for _, mapping := range cfg.Mappings { + mappingByTopic[mapping.Topic] = mapping + if mapping.Lfs.Mode != "off" { + lfsEnabled = true + } + } + + var lfsS3 lfs.S3Reader + if lfsEnabled { + if cfg.S3.Region == "" { + return nil, fmt.Errorf("s3.region is required when lfs is enabled") + } + s3Client, err := lfs.NewS3Client(context.Background(), lfs.S3Config{ + Bucket: cfg.S3.Bucket, + Region: cfg.S3.Region, + Endpoint: cfg.S3.Endpoint, + ForcePathStyle: cfg.S3.PathStyle, + }) + if err != nil { + return nil, fmt.Errorf("lfs s3 client: %w", err) + } + lfsS3 = s3Client + } + return &Processor{ - cfg: cfg, - discover: lister, - decode: decoderClient, - store: store, - sink: writer, - validator: validator, + cfg: cfg, + discover: lister, + decode: decoderClient, + store: store, + sink: writer, + validator: validator, + lfsS3: lfsS3, + mappingByTopic: mappingByTopic, }, nil } @@ -167,6 +198,14 @@ func (p *Processor) Run(ctx context.Context) error { metrics.RecordsTotal.WithLabelValues(seg.Topic, "dropped").Add(float64(dropped)) } } + if mapping, ok := p.mappingByTopic[seg.Topic]; ok { + resolved, err := p.resolveLfsRecords(ctx, records, mapping.Lfs, seg.Topic) + if err != nil { + metrics.ErrorsTotal.WithLabelValues("lfs").Inc() + continue + } + records = resolved + } records, invalid, err := validateRecords(ctx, records, p.validator) if err != nil { metrics.ErrorsTotal.WithLabelValues("schema").Inc() @@ -235,6 +274,7 @@ func mapRecords(records []decoder.Record) []sink.Record { Key: record.Key, Value: record.Value, Headers: record.Headers, + Columns: nil, }) } return out diff --git a/addons/processors/iceberg-processor/internal/sink/iceberg.go b/addons/processors/iceberg-processor/internal/sink/iceberg.go index cef798aa..f1564d88 100644 --- a/addons/processors/iceberg-processor/internal/sink/iceberg.go +++ b/addons/processors/iceberg-processor/internal/sink/iceberg.go @@ -31,6 +31,8 @@ import ( "sync" "time" + "github.com/KafScale/platform/addons/processors/iceberg-processor/internal/config" + "github.com/KafScale/platform/addons/processors/iceberg-processor/internal/decoder" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" @@ -39,8 +41,6 @@ import ( restcatalog "github.com/apache/iceberg-go/catalog/rest" iceio "github.com/apache/iceberg-go/io" "github.com/apache/iceberg-go/table" - "github.com/KafScale/platform/addons/processors/iceberg-processor/internal/config" - "github.com/KafScale/platform/addons/processors/iceberg-processor/internal/decoder" ) const defaultTableSchemaID = 1 @@ -103,6 +103,7 @@ func New(cfg config.Config) (Writer, error) { autoCreate: mapping.CreateTableIfAbsent, mode: mapping.Mode, schema: mapping.Schema, + lfs: mapping.Lfs, } } @@ -121,6 +122,7 @@ type tableMapping struct { autoCreate bool mode string schema config.MappingSchemaConfig + lfs config.LfsConfig } type icebergWriter struct { @@ -289,7 +291,7 @@ func (w *icebergWriter) topicLock(topic string) *sync.Mutex { return lock } -func (w *icebergWriter) createTable(ctx context.Context, ident table.Identifier, schemaCfg config.MappingSchemaConfig, topic string) (*table.Table, error) { +func (w *icebergWriter) createTable(ctx context.Context, ident table.Identifier, schemaCfg config.MappingSchemaConfig, mappingLfs config.LfsConfig, topic string) (*table.Table, error) { if len(ident) > 1 { namespace := ident[:len(ident)-1] if _, isRest := w.catalog.(*restcatalog.Catalog); isRest { @@ -327,7 +329,7 @@ func (w *icebergWriter) createTable(ctx context.Context, ident table.Identifier, opts = append(opts, catalog.WithLocation(location)) } - desired, _, err := w.buildDesiredSchema(ctx, schemaCfg, topic, nil) + desired, _, err := w.buildDesiredSchema(ctx, schemaCfg, mappingLfs, topic, nil) if err != nil { return nil, err } @@ -341,7 +343,7 @@ func (w *icebergWriter) createTableWithRetry(ctx context.Context, mapping tableM return nil, err } log.Printf("iceberg: create table attempt %d/%d for %v", i+1, attempts, mapping.identifier) - tbl, err := w.createTable(ctx, mapping.identifier, mapping.schema, topic) + tbl, err := w.createTable(ctx, mapping.identifier, mapping.schema, mapping.lfs, topic) if err == nil { log.Printf("iceberg: create table %v succeeded", mapping.identifier) return tbl, nil @@ -482,7 +484,7 @@ func (w *icebergWriter) ensureSchema(ctx context.Context, tbl *table.Table, mapp tbl = updated current = tbl.Schema() } - desired, columns, err := w.buildDesiredSchema(ctx, mapping.schema, topic, current) + desired, columns, err := w.buildDesiredSchema(ctx, mapping.schema, mapping.lfs, topic, current) if err != nil { return nil, nil, err } @@ -510,7 +512,7 @@ func (w *icebergWriter) ensureSchema(ctx context.Context, tbl *table.Table, mapp } tbl = reloaded current = tbl.Schema() - desired, columns, err = w.buildDesiredSchema(ctx, mapping.schema, topic, current) + desired, columns, err = w.buildDesiredSchema(ctx, mapping.schema, mapping.lfs, topic, current) if err != nil { return nil, nil, err } @@ -605,11 +607,14 @@ func (w *icebergWriter) ensureTablePaths(ctx context.Context, tbl *table.Table, return nil, fmt.Errorf("failed to update table paths for %v", ident) } -func (w *icebergWriter) buildDesiredSchema(ctx context.Context, schemaCfg config.MappingSchemaConfig, topic string, existing *iceberg.Schema) (*iceberg.Schema, []config.Column, error) { +func (w *icebergWriter) buildDesiredSchema(ctx context.Context, schemaCfg config.MappingSchemaConfig, lfsCfg config.LfsConfig, topic string, existing *iceberg.Schema) (*iceberg.Schema, []config.Column, error) { columns, err := resolveColumns(ctx, w.registry, schemaCfg, topic) if err != nil { return nil, nil, err } + if lfsCfg.StoreMetadata { + columns = mergeColumns(columns, lfsMetadataColumns()) + } fieldIDs := map[string]int{} maxID := 0 @@ -663,6 +668,38 @@ func (w *icebergWriter) buildDesiredSchema(ctx context.Context, schemaCfg config return iceberg.NewSchema(schemaID, fields...), columns, nil } +func lfsMetadataColumns() []config.Column { + return []config.Column{ + {Name: "lfs_content_type", Type: "string"}, + {Name: "lfs_blob_size", Type: "long"}, + {Name: "lfs_checksum", Type: "string"}, + {Name: "lfs_checksum_alg", Type: "string"}, + {Name: "lfs_bucket", Type: "string"}, + {Name: "lfs_key", Type: "string"}, + } +} + +func mergeColumns(existing []config.Column, extra []config.Column) []config.Column { + if len(extra) == 0 { + return existing + } + if len(existing) == 0 { + return extra + } + seen := make(map[string]struct{}, len(existing)) + for _, col := range existing { + seen[col.Name] = struct{}{} + } + merged := append([]config.Column{}, existing...) + for _, col := range extra { + if _, ok := seen[col.Name]; ok { + continue + } + merged = append(merged, col) + } + return merged +} + func resolveColumns(ctx context.Context, registry config.SchemaConfig, schemaCfg config.MappingSchemaConfig, topic string) ([]config.Column, error) { switch schemaCfg.Source { case "mapping": @@ -885,7 +922,7 @@ func recordsToArrow(schema *arrow.Schema, columns []config.Column, records []Rec headersBuilder.Append(serializeHeaders(record.Headers)) if len(columnBuilders) > 0 { values := extractJSONValues(record.Value) - appendColumnValues(columnBuilders, values) + appendColumnValues(columnBuilders, values, record.Columns) } } @@ -1095,8 +1132,14 @@ func extractJSONValues(payload []byte) map[string]interface{} { return out } -func appendColumnValues(builders []columnBuilder, values map[string]interface{}) { +func appendColumnValues(builders []columnBuilder, values map[string]interface{}, extras map[string]interface{}) { for _, col := range builders { + if extras != nil { + if val, ok := extras[col.name]; ok { + col.append(val) + continue + } + } if values == nil { col.appendNull() continue diff --git a/addons/processors/iceberg-processor/internal/sink/iceberg_integration_test.go b/addons/processors/iceberg-processor/internal/sink/iceberg_integration_test.go index d271f01f..1e48c209 100644 --- a/addons/processors/iceberg-processor/internal/sink/iceberg_integration_test.go +++ b/addons/processors/iceberg-processor/internal/sink/iceberg_integration_test.go @@ -76,3 +76,68 @@ func TestIcebergWriteSmoke(t *testing.T) { t.Fatalf("Write: %v", err) } } + +func TestIcebergWriteWithLfsMetadata(t *testing.T) { + catalogURI := os.Getenv("ICEBERG_PROCESSOR_CATALOG_URI") + if catalogURI == "" { + t.Skip("ICEBERG_PROCESSOR_CATALOG_URI not set") + } + catalogType := os.Getenv("ICEBERG_PROCESSOR_CATALOG_TYPE") + if catalogType == "" { + catalogType = "rest" + } + warehouse := os.Getenv("ICEBERG_PROCESSOR_WAREHOUSE") + if warehouse == "" { + t.Skip("ICEBERG_PROCESSOR_WAREHOUSE not set") + } + + cfg := config.Config{ + Iceberg: config.IcebergConfig{ + Catalog: config.CatalogConfig{ + Type: catalogType, + URI: catalogURI, + Token: os.Getenv("ICEBERG_PROCESSOR_CATALOG_TOKEN"), + }, + Warehouse: warehouse, + }, + Mappings: []config.Mapping{ + { + Topic: "orders", + Table: "default.orders_lfs", + Mode: "append", + CreateTableIfAbsent: true, + Lfs: config.LfsConfig{ + Mode: "resolve", + StoreMetadata: true, + }, + }, + }, + } + + writer, err := New(cfg) + if err != nil { + t.Fatalf("New writer: %v", err) + } + + records := []Record{ + { + Topic: "orders", + Partition: 0, + Offset: 2, + Timestamp: time.Now().UnixMilli(), + Key: []byte("k2"), + Value: []byte(`{"id":2}`), + Columns: map[string]interface{}{ + "lfs_bucket": "bucket", + "lfs_key": "key", + "lfs_blob_size": int64(5), + "lfs_checksum": "abc", + "lfs_checksum_alg": "sha256", + "lfs_content_type": "application/octet-stream", + }, + }, + } + if err := writer.Write(context.Background(), records); err != nil { + t.Fatalf("Write: %v", err) + } +} diff --git a/addons/processors/iceberg-processor/internal/sink/sink.go b/addons/processors/iceberg-processor/internal/sink/sink.go index 09d2f841..bca1e819 100644 --- a/addons/processors/iceberg-processor/internal/sink/sink.go +++ b/addons/processors/iceberg-processor/internal/sink/sink.go @@ -30,6 +30,7 @@ type Record struct { Key []byte Value []byte Headers []decoder.Header + Columns map[string]interface{} } // Writer writes records to a downstream system (Iceberg, OLAP, etc). diff --git a/api/lfs-proxy/openapi.yaml b/api/lfs-proxy/openapi.yaml new file mode 100644 index 00000000..d330dec4 --- /dev/null +++ b/api/lfs-proxy/openapi.yaml @@ -0,0 +1,688 @@ +# Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +openapi: 3.0.3 +info: + title: KafScale LFS Proxy HTTP API + version: 1.0.0 + description: | + The KafScale LFS (Large File Support) Proxy provides HTTP endpoints for producing + large binary objects to Kafka via S3-backed storage. Instead of sending large payloads + directly through Kafka, clients upload blobs to S3 and receive an envelope (pointer) + that is stored in Kafka. + + ## Authentication + + When API key authentication is enabled (via `KAFSCALE_LFS_PROXY_HTTP_API_KEY`), + requests must include one of: + - `X-API-Key` header with the API key + - `Authorization: Bearer ` header + + ## CORS + + The API supports CORS for browser-based clients. Preflight OPTIONS requests are handled automatically. + + ## Request Tracing + + All requests can include an optional `X-Request-ID` header for tracing. If not provided, + the proxy generates one and returns it in the response. + contact: + name: KafScale + url: https://github.com/KafScale/platform + license: + name: Apache 2.0 + url: https://www.apache.org/licenses/LICENSE-2.0 +servers: + - url: http://localhost:8080 + description: Local development + - url: http://lfs-proxy:8080 + description: Kubernetes in-cluster +tags: + - name: LFS + description: Large File Support operations +paths: + /lfs/produce: + post: + tags: + - LFS + summary: Upload and produce an LFS record + description: | + Streams a binary payload to the LFS proxy, which: + 1. Uploads the blob to S3 storage + 2. Computes checksums (SHA256 by default) + 3. Creates an LFS envelope with blob metadata + 4. Produces the envelope to the specified Kafka topic + + The response contains the full LFS envelope that was stored in Kafka. + operationId: lfsProduce + security: + - ApiKeyAuth: [] + - BearerAuth: [] + - {} + parameters: + - in: header + name: X-Kafka-Topic + required: true + schema: + type: string + pattern: '^[a-zA-Z0-9._-]+$' + maxLength: 249 + description: Target Kafka topic name (alphanumeric, dots, underscores, hyphens only) + example: video-uploads + - in: header + name: X-Kafka-Key + required: false + schema: + type: string + description: Base64-encoded Kafka record key for partitioning + example: dXNlci0xMjM= + - in: header + name: X-Kafka-Partition + required: false + schema: + type: integer + format: int32 + minimum: 0 + description: Explicit partition number (overrides key-based partitioning) + example: 0 + - in: header + name: X-LFS-Checksum + required: false + schema: + type: string + description: Expected checksum of the payload for verification + example: abc123def456... + - in: header + name: X-LFS-Checksum-Alg + required: false + schema: + type: string + enum: [sha256, md5, crc32, none] + default: sha256 + description: Checksum algorithm for verification + - in: header + name: X-Request-ID + required: false + schema: + type: string + format: uuid + description: Request correlation ID for tracing + - in: header + name: Content-Type + required: false + schema: + type: string + description: MIME type of the payload (stored in envelope) + example: video/mp4 + requestBody: + required: true + description: Binary payload to upload + content: + application/octet-stream: + schema: + type: string + format: binary + '*/*': + schema: + type: string + format: binary + responses: + "200": + description: LFS envelope successfully produced to Kafka + headers: + X-Request-ID: + schema: + type: string + description: Request correlation ID + content: + application/json: + schema: + $ref: "#/components/schemas/LfsEnvelope" + example: + kfs_lfs: 1 + bucket: kafscale-lfs + key: default/video-uploads/lfs/2026/02/05/abc123 + size: 10485760 + sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + checksum: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + checksum_alg: sha256 + content_type: video/mp4 + created_at: "2026-02-05T10:30:00Z" + proxy_id: lfs-proxy-0 + "400": + description: Invalid request (missing topic, invalid checksum, etc.) + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + examples: + missing_topic: + value: + code: missing_topic + message: missing topic + request_id: abc-123 + checksum_mismatch: + value: + code: checksum_mismatch + message: "expected abc123, got def456" + request_id: abc-123 + "401": + description: Unauthorized - API key required or invalid + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "502": + description: Upstream storage or Kafka failure + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "503": + description: Proxy not ready (backends unavailable) + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + options: + tags: + - LFS + summary: CORS preflight for produce endpoint + description: Handles CORS preflight requests for browser clients + responses: + "204": + description: CORS headers returned + headers: + Access-Control-Allow-Origin: + schema: + type: string + Access-Control-Allow-Methods: + schema: + type: string + Access-Control-Allow-Headers: + schema: + type: string + + /lfs/uploads: + post: + tags: + - LFS + summary: Initiate a resumable multipart upload + description: | + Starts a multipart upload session for large, resumable uploads. + Clients upload parts to `/lfs/uploads/{upload_id}/parts/{part_number}` + and finalize with `/lfs/uploads/{upload_id}/complete`. + operationId: lfsUploadInit + security: + - ApiKeyAuth: [] + - BearerAuth: [] + - {} + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/UploadInitRequest" + responses: + "200": + description: Upload session created + content: + application/json: + schema: + $ref: "#/components/schemas/UploadInitResponse" + "400": + description: Invalid request + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "401": + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + + /lfs/uploads/{upload_id}/parts/{part_number}: + put: + tags: + - LFS + summary: Upload a multipart chunk + description: | + Uploads a single part for an existing upload session. Parts are identified + by `part_number` (1..10000) and must be at least 5MB except the final part. + operationId: lfsUploadPart + security: + - ApiKeyAuth: [] + - BearerAuth: [] + - {} + parameters: + - in: path + name: upload_id + required: true + schema: + type: string + - in: path + name: part_number + required: true + schema: + type: integer + format: int32 + minimum: 1 + maximum: 10000 + - in: header + name: Content-Range + required: false + schema: + type: string + description: Optional byte range for the part (e.g., bytes 0-16777215/6442450944) + requestBody: + required: true + content: + application/octet-stream: + schema: + type: string + format: binary + '*/*': + schema: + type: string + format: binary + responses: + "200": + description: Part uploaded + content: + application/json: + schema: + $ref: "#/components/schemas/UploadPartResponse" + "400": + description: Invalid part or session + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "404": + description: Upload session not found + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + + /lfs/uploads/{upload_id}/complete: + post: + tags: + - LFS + summary: Complete a multipart upload + description: | + Finalizes the multipart upload and produces the LFS envelope to Kafka. + The request must include the ordered list of part numbers and ETags. + operationId: lfsUploadComplete + security: + - ApiKeyAuth: [] + - BearerAuth: [] + - {} + parameters: + - in: path + name: upload_id + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/UploadCompleteRequest" + responses: + "200": + description: Upload completed and envelope produced + content: + application/json: + schema: + $ref: "#/components/schemas/LfsEnvelope" + "400": + description: Invalid completion request + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "404": + description: Upload session not found + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + + /lfs/uploads/{upload_id}: + delete: + tags: + - LFS + summary: Abort a multipart upload + description: Aborts an in-progress upload and deletes partial parts. + operationId: lfsUploadAbort + security: + - ApiKeyAuth: [] + - BearerAuth: [] + - {} + parameters: + - in: path + name: upload_id + required: true + schema: + type: string + responses: + "204": + description: Upload aborted + "404": + description: Upload session not found + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + + /lfs/download: + post: + tags: + - LFS + summary: Download an LFS object + description: | + Retrieves an LFS object from S3 storage. Supports two modes: + + - **presign**: Returns a presigned S3 URL for direct download (default) + - **stream**: Streams the object content through the proxy + + For presign mode, the URL TTL is capped by server configuration. + operationId: lfsDownload + security: + - ApiKeyAuth: [] + - BearerAuth: [] + - {} + parameters: + - in: header + name: X-Request-ID + required: false + schema: + type: string + format: uuid + description: Request correlation ID for tracing + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/DownloadRequest" + examples: + presign: + summary: Get presigned URL + value: + bucket: kafscale-lfs + key: default/video-uploads/lfs/2026/02/05/abc123 + mode: presign + expires_seconds: 300 + stream: + summary: Stream content + value: + bucket: kafscale-lfs + key: default/video-uploads/lfs/2026/02/05/abc123 + mode: stream + responses: + "200": + description: Presigned URL or streamed object content + content: + application/json: + schema: + $ref: "#/components/schemas/DownloadResponse" + example: + mode: presign + url: https://s3.amazonaws.com/kafscale-lfs/... + expires_at: "2026-02-05T10:35:00Z" + application/octet-stream: + schema: + type: string + format: binary + description: Streamed object content (when mode=stream) + "400": + description: Invalid request + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "401": + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "502": + description: Upstream storage failure + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "503": + description: Proxy not ready + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + options: + tags: + - LFS + summary: CORS preflight for download endpoint + responses: + "204": + description: CORS headers returned + +components: + securitySchemes: + ApiKeyAuth: + type: apiKey + in: header + name: X-API-Key + description: API key for authentication + BearerAuth: + type: http + scheme: bearer + description: Bearer token authentication (same API key) + + schemas: + LfsEnvelope: + type: object + description: LFS envelope containing blob metadata and S3 location + properties: + kfs_lfs: + type: integer + format: int32 + description: LFS envelope version + example: 1 + bucket: + type: string + description: S3 bucket name + example: kafscale-lfs + key: + type: string + description: S3 object key + example: default/video-uploads/lfs/2026/02/05/abc123 + size: + type: integer + format: int64 + description: Blob size in bytes + example: 10485760 + sha256: + type: string + description: SHA256 hash of the blob + example: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + checksum: + type: string + description: Checksum value (algorithm depends on checksum_alg) + checksum_alg: + type: string + description: Checksum algorithm used + enum: [sha256, md5, crc32, none] + example: sha256 + content_type: + type: string + description: MIME type of the blob + example: video/mp4 + created_at: + type: string + format: date-time + description: Timestamp when the blob was created + example: "2026-02-05T10:30:00Z" + proxy_id: + type: string + description: ID of the proxy instance that handled the upload + example: lfs-proxy-0 + + DownloadRequest: + type: object + required: [bucket, key] + description: Request to download an LFS object + properties: + bucket: + type: string + description: S3 bucket name (must match proxy's configured bucket) + example: kafscale-lfs + key: + type: string + description: S3 object key from the LFS envelope + example: default/video-uploads/lfs/2026/02/05/abc123 + mode: + type: string + enum: [presign, stream] + default: presign + description: | + Download mode: + - presign: Return a presigned URL for direct S3 download + - stream: Stream content through the proxy + expires_seconds: + type: integer + format: int32 + default: 120 + minimum: 1 + maximum: 3600 + description: Requested presign URL TTL in seconds (capped by server) + + DownloadResponse: + type: object + description: Response for presign download mode + properties: + mode: + type: string + enum: [presign] + description: Download mode used + url: + type: string + format: uri + description: Presigned S3 URL for direct download + expires_at: + type: string + format: date-time + description: URL expiration timestamp + + UploadInitRequest: + type: object + required: [topic, content_type, size_bytes] + properties: + topic: + type: string + description: Target Kafka topic + example: browser-uploads + key: + type: string + description: Optional Kafka key (base64) + example: dXNlci0xMjM= + partition: + type: integer + format: int32 + description: Optional Kafka partition + content_type: + type: string + description: MIME type of the object + example: video/mp4 + size_bytes: + type: integer + format: int64 + description: Total object size in bytes + checksum: + type: string + description: Optional checksum for validation + checksum_alg: + type: string + enum: [sha256, md5, crc32, none] + default: sha256 + + UploadInitResponse: + type: object + properties: + upload_id: + type: string + description: Upload session ID + s3_key: + type: string + description: S3 object key reserved for this upload + part_size: + type: integer + format: int64 + description: Recommended part size in bytes + expires_at: + type: string + format: date-time + description: Upload session expiration time + + UploadPartResponse: + type: object + properties: + upload_id: + type: string + part_number: + type: integer + format: int32 + etag: + type: string + description: S3 ETag for the uploaded part + + UploadCompleteRequest: + type: object + required: [parts] + properties: + parts: + type: array + description: Ordered list of uploaded parts + items: + type: object + required: [part_number, etag] + properties: + part_number: + type: integer + format: int32 + etag: + type: string + + ErrorResponse: + type: object + description: Error response returned for all error conditions + properties: + code: + type: string + description: Machine-readable error code + example: missing_topic + message: + type: string + description: Human-readable error message + example: missing topic + request_id: + type: string + description: Request correlation ID for support/debugging + example: abc-123-def-456 diff --git a/api/v1alpha1/kafscalecluster_types.go b/api/v1alpha1/kafscalecluster_types.go index ae9041a8..3a98e9a4 100644 --- a/api/v1alpha1/kafscalecluster_types.go +++ b/api/v1alpha1/kafscalecluster_types.go @@ -23,10 +23,11 @@ import ( // KafscaleClusterSpec defines the desired state of a Kafscale cluster. type KafscaleClusterSpec struct { - Brokers BrokerSpec `json:"brokers"` - S3 S3Spec `json:"s3"` - Etcd EtcdSpec `json:"etcd"` - Config ClusterConfigSpec `json:"config,omitempty"` + Brokers BrokerSpec `json:"brokers"` + S3 S3Spec `json:"s3"` + Etcd EtcdSpec `json:"etcd"` + Config ClusterConfigSpec `json:"config,omitempty"` + LfsProxy LfsProxySpec `json:"lfsProxy,omitempty"` } type BrokerSpec struct { @@ -73,6 +74,54 @@ type ClusterConfigSpec struct { CacheSize string `json:"cacheSize,omitempty"` } +type LfsProxySpec struct { + Enabled bool `json:"enabled,omitempty"` + Replicas *int32 `json:"replicas,omitempty"` + Image string `json:"image,omitempty"` + ImagePullPolicy string `json:"imagePullPolicy,omitempty"` + Backends []string `json:"backends,omitempty"` + AdvertisedHost string `json:"advertisedHost,omitempty"` + AdvertisedPort *int32 `json:"advertisedPort,omitempty"` + BackendCacheTTLSeconds *int32 `json:"backendCacheTTLSeconds,omitempty"` + Service LfsProxyServiceSpec `json:"service,omitempty"` + HTTP LfsProxyHTTPSpec `json:"http,omitempty"` + Metrics LfsProxyMetricsSpec `json:"metrics,omitempty"` + Health LfsProxyHealthSpec `json:"health,omitempty"` + S3 LfsProxyS3Spec `json:"s3,omitempty"` +} + +type LfsProxyServiceSpec struct { + Type string `json:"type,omitempty"` + Annotations map[string]string `json:"annotations,omitempty"` + LoadBalancerSourceRanges []string `json:"loadBalancerSourceRanges,omitempty"` + Port *int32 `json:"port,omitempty"` +} + +type LfsProxyHTTPSpec struct { + Enabled *bool `json:"enabled,omitempty"` + Port *int32 `json:"port,omitempty"` + APIKeySecretRef string `json:"apiKeySecretRef,omitempty"` + APIKeySecretKey string `json:"apiKeySecretKey,omitempty"` +} + +type LfsProxyMetricsSpec struct { + Enabled *bool `json:"enabled,omitempty"` + Port *int32 `json:"port,omitempty"` +} + +type LfsProxyHealthSpec struct { + Enabled *bool `json:"enabled,omitempty"` + Port *int32 `json:"port,omitempty"` +} + +type LfsProxyS3Spec struct { + Namespace string `json:"namespace,omitempty"` + MaxBlobSize *int64 `json:"maxBlobSize,omitempty"` + ChunkSize *int64 `json:"chunkSize,omitempty"` + ForcePathStyle *bool `json:"forcePathStyle,omitempty"` + EnsureBucket *bool `json:"ensureBucket,omitempty"` +} + // KafscaleClusterStatus captures observed state. type KafscaleClusterStatus struct { Phase string `json:"phase,omitempty"` @@ -104,6 +153,159 @@ func init() { SchemeBuilder.Register(&KafscaleCluster{}, &KafscaleClusterList{}) } +func (in *LfsProxyServiceSpec) DeepCopyInto(out *LfsProxyServiceSpec) { + *out = *in + if in.Annotations != nil { + out.Annotations = make(map[string]string, len(in.Annotations)) + for key, val := range in.Annotations { + out.Annotations[key] = val + } + } + if in.LoadBalancerSourceRanges != nil { + out.LoadBalancerSourceRanges = make([]string, len(in.LoadBalancerSourceRanges)) + copy(out.LoadBalancerSourceRanges, in.LoadBalancerSourceRanges) + } + if in.Port != nil { + out.Port = new(int32) + *out.Port = *in.Port + } +} + +func (in *LfsProxyServiceSpec) DeepCopy() *LfsProxyServiceSpec { + if in == nil { + return nil + } + out := new(LfsProxyServiceSpec) + in.DeepCopyInto(out) + return out +} + +func (in *LfsProxyHTTPSpec) DeepCopyInto(out *LfsProxyHTTPSpec) { + *out = *in + if in.Enabled != nil { + out.Enabled = new(bool) + *out.Enabled = *in.Enabled + } + if in.Port != nil { + out.Port = new(int32) + *out.Port = *in.Port + } +} + +func (in *LfsProxyHTTPSpec) DeepCopy() *LfsProxyHTTPSpec { + if in == nil { + return nil + } + out := new(LfsProxyHTTPSpec) + in.DeepCopyInto(out) + return out +} + +func (in *LfsProxyMetricsSpec) DeepCopyInto(out *LfsProxyMetricsSpec) { + *out = *in + if in.Enabled != nil { + out.Enabled = new(bool) + *out.Enabled = *in.Enabled + } + if in.Port != nil { + out.Port = new(int32) + *out.Port = *in.Port + } +} + +func (in *LfsProxyMetricsSpec) DeepCopy() *LfsProxyMetricsSpec { + if in == nil { + return nil + } + out := new(LfsProxyMetricsSpec) + in.DeepCopyInto(out) + return out +} + +func (in *LfsProxyHealthSpec) DeepCopyInto(out *LfsProxyHealthSpec) { + *out = *in + if in.Enabled != nil { + out.Enabled = new(bool) + *out.Enabled = *in.Enabled + } + if in.Port != nil { + out.Port = new(int32) + *out.Port = *in.Port + } +} + +func (in *LfsProxyHealthSpec) DeepCopy() *LfsProxyHealthSpec { + if in == nil { + return nil + } + out := new(LfsProxyHealthSpec) + in.DeepCopyInto(out) + return out +} + +func (in *LfsProxyS3Spec) DeepCopyInto(out *LfsProxyS3Spec) { + *out = *in + if in.MaxBlobSize != nil { + out.MaxBlobSize = new(int64) + *out.MaxBlobSize = *in.MaxBlobSize + } + if in.ChunkSize != nil { + out.ChunkSize = new(int64) + *out.ChunkSize = *in.ChunkSize + } + if in.ForcePathStyle != nil { + out.ForcePathStyle = new(bool) + *out.ForcePathStyle = *in.ForcePathStyle + } + if in.EnsureBucket != nil { + out.EnsureBucket = new(bool) + *out.EnsureBucket = *in.EnsureBucket + } +} + +func (in *LfsProxyS3Spec) DeepCopy() *LfsProxyS3Spec { + if in == nil { + return nil + } + out := new(LfsProxyS3Spec) + in.DeepCopyInto(out) + return out +} + +func (in *LfsProxySpec) DeepCopyInto(out *LfsProxySpec) { + *out = *in + if in.Replicas != nil { + out.Replicas = new(int32) + *out.Replicas = *in.Replicas + } + if in.AdvertisedPort != nil { + out.AdvertisedPort = new(int32) + *out.AdvertisedPort = *in.AdvertisedPort + } + if in.BackendCacheTTLSeconds != nil { + out.BackendCacheTTLSeconds = new(int32) + *out.BackendCacheTTLSeconds = *in.BackendCacheTTLSeconds + } + if in.Backends != nil { + out.Backends = make([]string, len(in.Backends)) + copy(out.Backends, in.Backends) + } + in.Service.DeepCopyInto(&out.Service) + in.HTTP.DeepCopyInto(&out.HTTP) + in.Metrics.DeepCopyInto(&out.Metrics) + in.Health.DeepCopyInto(&out.Health) + in.S3.DeepCopyInto(&out.S3) +} + +func (in *LfsProxySpec) DeepCopy() *LfsProxySpec { + if in == nil { + return nil + } + out := new(LfsProxySpec) + in.DeepCopyInto(out) + return out +} + func (in *BrokerResources) DeepCopyInto(out *BrokerResources) { *out = *in if in.Requests != nil { @@ -202,6 +404,7 @@ func (in *KafscaleClusterSpec) DeepCopyInto(out *KafscaleClusterSpec) { out.S3 = in.S3 out.Etcd = in.Etcd out.Config = in.Config + in.LfsProxy.DeepCopyInto(&out.LfsProxy) } func (in *KafscaleClusterSpec) DeepCopy() *KafscaleClusterSpec { diff --git a/cmd/broker/acl_test.go b/cmd/broker/acl_test.go index 3150aac0..503689bc 100644 --- a/cmd/broker/acl_test.go +++ b/cmd/broker/acl_test.go @@ -215,8 +215,8 @@ func TestACLProxyAddrProduceAllowed(t *testing.T) { handler := newTestHandler(store) conn, peer := net.Pipe() - defer conn.Close() - defer peer.Close() + defer func() { _ = conn.Close() }() + defer func() { _ = peer.Close() }() go func() { _, _ = peer.Write([]byte("PROXY TCP4 10.0.0.1 10.0.0.2 12345 9092\r\n")) }() diff --git a/cmd/broker/admin_metrics.go b/cmd/broker/admin_metrics.go index 630acd16..d677620b 100644 --- a/cmd/broker/admin_metrics.go +++ b/cmd/broker/admin_metrics.go @@ -65,21 +65,21 @@ func (m *adminMetrics) writePrometheus(w io.Writer) { } m.mu.Lock() defer m.mu.Unlock() - fmt.Fprintln(w, "# HELP kafscale_admin_requests_total Total admin API requests.") - fmt.Fprintln(w, "# TYPE kafscale_admin_requests_total counter") - fmt.Fprintln(w, "# HELP kafscale_admin_request_errors_total Total admin API requests that returned an error.") - fmt.Fprintln(w, "# TYPE kafscale_admin_request_errors_total counter") - fmt.Fprintln(w, "# HELP kafscale_admin_request_latency_ms_avg Average admin API request latency in milliseconds.") - fmt.Fprintln(w, "# TYPE kafscale_admin_request_latency_ms_avg gauge") + _, _ = fmt.Fprintln(w, "# HELP kafscale_admin_requests_total Total admin API requests.") + _, _ = fmt.Fprintln(w, "# TYPE kafscale_admin_requests_total counter") + _, _ = fmt.Fprintln(w, "# HELP kafscale_admin_request_errors_total Total admin API requests that returned an error.") + _, _ = fmt.Fprintln(w, "# TYPE kafscale_admin_request_errors_total counter") + _, _ = fmt.Fprintln(w, "# HELP kafscale_admin_request_latency_ms_avg Average admin API request latency in milliseconds.") + _, _ = fmt.Fprintln(w, "# TYPE kafscale_admin_request_latency_ms_avg gauge") for apiKey, entry := range m.data { name := adminAPIName(apiKey) avg := 0.0 if entry.count > 0 { avg = float64(entry.latencySum.Milliseconds()) / float64(entry.count) } - fmt.Fprintf(w, "kafscale_admin_requests_total{api=%q} %d\n", name, entry.count) - fmt.Fprintf(w, "kafscale_admin_request_errors_total{api=%q} %d\n", name, entry.errorCount) - fmt.Fprintf(w, "kafscale_admin_request_latency_ms_avg{api=%q} %.3f\n", name, avg) + _, _ = fmt.Fprintf(w, "kafscale_admin_requests_total{api=%q} %d\n", name, entry.count) + _, _ = fmt.Fprintf(w, "kafscale_admin_request_errors_total{api=%q} %d\n", name, entry.errorCount) + _, _ = fmt.Fprintf(w, "kafscale_admin_request_latency_ms_avg{api=%q} %.3f\n", name, avg) } } diff --git a/cmd/broker/auth_metrics.go b/cmd/broker/auth_metrics.go index 75626242..676b47a9 100644 --- a/cmd/broker/auth_metrics.go +++ b/cmd/broker/auth_metrics.go @@ -50,15 +50,15 @@ func (m *authMetrics) writePrometheus(w io.Writer) { return } total := atomic.LoadUint64(&m.deniedTotal) - fmt.Fprintln(w, "# HELP kafscale_authz_denied_total Authorization denials across broker APIs.") - fmt.Fprintln(w, "# TYPE kafscale_authz_denied_total counter") - fmt.Fprintf(w, "kafscale_authz_denied_total %d\n", total) + _, _ = fmt.Fprintln(w, "# HELP kafscale_authz_denied_total Authorization denials across broker APIs.") + _, _ = fmt.Fprintln(w, "# TYPE kafscale_authz_denied_total counter") + _, _ = fmt.Fprintf(w, "kafscale_authz_denied_total %d\n", total) m.mu.Lock() defer m.mu.Unlock() for key, count := range m.byKey { action, resource := splitAuthMetricKey(key) - fmt.Fprintf(w, "kafscale_authz_denied_total{action=%q,resource=%q} %d\n", action, resource, count) + _, _ = fmt.Fprintf(w, "kafscale_authz_denied_total{action=%q,resource=%q} %d\n", action, resource, count) } } diff --git a/cmd/broker/lag_metrics.go b/cmd/broker/lag_metrics.go index 722c9f98..96c6006a 100644 --- a/cmd/broker/lag_metrics.go +++ b/cmd/broker/lag_metrics.go @@ -71,7 +71,7 @@ func (m *lagMetrics) WritePrometheus(w io.Writer) { if hist != nil { hist.WritePrometheus(w, "kafscale_consumer_lag", "Consumer lag in records.") } - fmt.Fprintf(w, "# HELP kafscale_consumer_lag_max Maximum consumer lag in records.\n") - fmt.Fprintf(w, "# TYPE kafscale_consumer_lag_max gauge\n") - fmt.Fprintf(w, "kafscale_consumer_lag_max %f\n", max) + _, _ = fmt.Fprintf(w, "# HELP kafscale_consumer_lag_max Maximum consumer lag in records.\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_consumer_lag_max gauge\n") + _, _ = fmt.Fprintf(w, "kafscale_consumer_lag_max %f\n", max) } diff --git a/cmd/broker/main.go b/cmd/broker/main.go index 971295e8..c1ba1088 100644 --- a/cmd/broker/main.go +++ b/cmd/broker/main.go @@ -1731,7 +1731,7 @@ func (h *handler) handleListOffsets(ctx context.Context, header *protocol.Reques } func (h *handler) handleFetch(ctx context.Context, header *protocol.RequestHeader, req *kmsg.FetchRequest) ([]byte, error) { - if header.APIVersion < 11 || header.APIVersion > 13 { + if header.APIVersion > 13 { return nil, fmt.Errorf("fetch version %d not supported", header.APIVersion) } topicResponses := make([]kmsg.FetchResponseTopic, 0, len(req.Topics)) diff --git a/cmd/broker/metrics_histogram.go b/cmd/broker/metrics_histogram.go index 76b34220..fd606c1d 100644 --- a/cmd/broker/metrics_histogram.go +++ b/cmd/broker/metrics_histogram.go @@ -70,17 +70,17 @@ func (h *histogram) WritePrometheus(w io.Writer, name, help string) { return } buckets, counts, sum, count := h.Snapshot() - fmt.Fprintf(w, "# HELP %s %s\n", name, help) - fmt.Fprintf(w, "# TYPE %s histogram\n", name) + _, _ = fmt.Fprintf(w, "# HELP %s %s\n", name, help) + _, _ = fmt.Fprintf(w, "# TYPE %s histogram\n", name) var cumulative int64 for i, upper := range buckets { cumulative += counts[i] - fmt.Fprintf(w, "%s_bucket{le=%q} %d\n", name, formatFloat(upper), cumulative) + _, _ = fmt.Fprintf(w, "%s_bucket{le=%q} %d\n", name, formatFloat(upper), cumulative) } cumulative += counts[len(counts)-1] - fmt.Fprintf(w, "%s_bucket{le=\"+Inf\"} %d\n", name, cumulative) - fmt.Fprintf(w, "%s_sum %f\n", name, sum) - fmt.Fprintf(w, "%s_count %d\n", name, count) + _, _ = fmt.Fprintf(w, "%s_bucket{le=\"+Inf\"} %d\n", name, cumulative) + _, _ = fmt.Fprintf(w, "%s_sum %f\n", name, sum) + _, _ = fmt.Fprintf(w, "%s_count %d\n", name, count) } func formatFloat(val float64) string { diff --git a/cmd/broker/runtime_metrics.go b/cmd/broker/runtime_metrics.go index 57108ec7..9dad04d8 100644 --- a/cmd/broker/runtime_metrics.go +++ b/cmd/broker/runtime_metrics.go @@ -34,27 +34,27 @@ func (h *handler) writeRuntimeMetrics(w io.Writer) { cpuPercent = h.cpuTracker.Percent() } - fmt.Fprintln(w, "# HELP kafscale_broker_uptime_seconds Seconds since broker start.") - fmt.Fprintln(w, "# TYPE kafscale_broker_uptime_seconds gauge") - fmt.Fprintf(w, "kafscale_broker_uptime_seconds %f\n", uptime) + _, _ = fmt.Fprintln(w, "# HELP kafscale_broker_uptime_seconds Seconds since broker start.") + _, _ = fmt.Fprintln(w, "# TYPE kafscale_broker_uptime_seconds gauge") + _, _ = fmt.Fprintf(w, "kafscale_broker_uptime_seconds %f\n", uptime) - fmt.Fprintln(w, "# HELP kafscale_broker_mem_alloc_bytes Bytes of allocated heap objects.") - fmt.Fprintln(w, "# TYPE kafscale_broker_mem_alloc_bytes gauge") - fmt.Fprintf(w, "kafscale_broker_mem_alloc_bytes %d\n", stats.Alloc) + _, _ = fmt.Fprintln(w, "# HELP kafscale_broker_mem_alloc_bytes Bytes of allocated heap objects.") + _, _ = fmt.Fprintln(w, "# TYPE kafscale_broker_mem_alloc_bytes gauge") + _, _ = fmt.Fprintf(w, "kafscale_broker_mem_alloc_bytes %d\n", stats.Alloc) - fmt.Fprintln(w, "# HELP kafscale_broker_mem_sys_bytes Bytes of memory obtained from the OS.") - fmt.Fprintln(w, "# TYPE kafscale_broker_mem_sys_bytes gauge") - fmt.Fprintf(w, "kafscale_broker_mem_sys_bytes %d\n", stats.Sys) + _, _ = fmt.Fprintln(w, "# HELP kafscale_broker_mem_sys_bytes Bytes of memory obtained from the OS.") + _, _ = fmt.Fprintln(w, "# TYPE kafscale_broker_mem_sys_bytes gauge") + _, _ = fmt.Fprintf(w, "kafscale_broker_mem_sys_bytes %d\n", stats.Sys) - fmt.Fprintln(w, "# HELP kafscale_broker_heap_inuse_bytes Bytes in in-use spans.") - fmt.Fprintln(w, "# TYPE kafscale_broker_heap_inuse_bytes gauge") - fmt.Fprintf(w, "kafscale_broker_heap_inuse_bytes %d\n", stats.HeapInuse) + _, _ = fmt.Fprintln(w, "# HELP kafscale_broker_heap_inuse_bytes Bytes in in-use spans.") + _, _ = fmt.Fprintln(w, "# TYPE kafscale_broker_heap_inuse_bytes gauge") + _, _ = fmt.Fprintf(w, "kafscale_broker_heap_inuse_bytes %d\n", stats.HeapInuse) - fmt.Fprintln(w, "# HELP kafscale_broker_cpu_percent Approximate CPU usage percent since last scrape.") - fmt.Fprintln(w, "# TYPE kafscale_broker_cpu_percent gauge") - fmt.Fprintf(w, "kafscale_broker_cpu_percent %f\n", cpuPercent) + _, _ = fmt.Fprintln(w, "# HELP kafscale_broker_cpu_percent Approximate CPU usage percent since last scrape.") + _, _ = fmt.Fprintln(w, "# TYPE kafscale_broker_cpu_percent gauge") + _, _ = fmt.Fprintf(w, "kafscale_broker_cpu_percent %f\n", cpuPercent) - fmt.Fprintln(w, "# HELP kafscale_broker_goroutines Number of goroutines.") - fmt.Fprintln(w, "# TYPE kafscale_broker_goroutines gauge") - fmt.Fprintf(w, "kafscale_broker_goroutines %d\n", runtime.NumGoroutine()) + _, _ = fmt.Fprintln(w, "# HELP kafscale_broker_goroutines Number of goroutines.") + _, _ = fmt.Fprintln(w, "# TYPE kafscale_broker_goroutines gauge") + _, _ = fmt.Fprintf(w, "kafscale_broker_goroutines %d\n", runtime.NumGoroutine()) } diff --git a/cmd/console/main.go b/cmd/console/main.go index 94c5867c..c02cc809 100644 --- a/cmd/console/main.go +++ b/cmd/console/main.go @@ -20,6 +20,7 @@ import ( "log" "os" "os/signal" + "strconv" "strings" "syscall" @@ -44,6 +45,16 @@ func main() { if metricsProvider := buildMetricsProvider(store); metricsProvider != nil { opts.Metrics = metricsProvider } + + // Initialize LFS components if enabled + if lfsHandlers, lfsConsumer := buildLFSComponents(ctx); lfsHandlers != nil { + opts.LFSHandlers = lfsHandlers + if lfsConsumer != nil { + lfsConsumer.Start() + defer func() { _ = lfsConsumer.Close() }() + } + } + if err := consolepkg.StartServer(ctx, addr, opts); err != nil { log.Fatalf("console server failed: %v", err) } @@ -119,3 +130,86 @@ func consoleEtcdConfigFromEnv() (metadata.EtcdStoreConfig, bool) { Password: os.Getenv("KAFSCALE_CONSOLE_ETCD_PASSWORD"), }, true } + +func buildLFSComponents(ctx context.Context) (*consolepkg.LFSHandlers, *consolepkg.LFSConsumer) { + enabled := strings.EqualFold(strings.TrimSpace(os.Getenv("KAFSCALE_CONSOLE_LFS_ENABLED")), "true") + if !enabled { + return nil, nil + } + + // LFS configuration + lfsCfg := consolepkg.LFSConfig{ + Enabled: true, + TrackerTopic: envOrDefault("KAFSCALE_LFS_TRACKER_TOPIC", "__lfs_ops_state"), + KafkaBrokers: splitCSV(os.Getenv("KAFSCALE_CONSOLE_KAFKA_BROKERS")), + S3Bucket: strings.TrimSpace(os.Getenv("KAFSCALE_CONSOLE_LFS_S3_BUCKET")), + S3Region: strings.TrimSpace(os.Getenv("KAFSCALE_CONSOLE_LFS_S3_REGION")), + S3Endpoint: strings.TrimSpace(os.Getenv("KAFSCALE_CONSOLE_LFS_S3_ENDPOINT")), + S3AccessKey: strings.TrimSpace(os.Getenv("KAFSCALE_CONSOLE_LFS_S3_ACCESS_KEY")), + S3SecretKey: strings.TrimSpace(os.Getenv("KAFSCALE_CONSOLE_LFS_S3_SECRET_KEY")), + PresignTTL: 300, // 5 minutes default + } + + if ttl := strings.TrimSpace(os.Getenv("KAFSCALE_CONSOLE_LFS_S3_PRESIGN_TTL")); ttl != "" { + if parsed, err := strconv.Atoi(ttl); err == nil && parsed > 0 { + lfsCfg.PresignTTL = parsed + } + } + + // Create handlers + handlers := consolepkg.NewLFSHandlers(lfsCfg, log.Default()) + + // Create S3 client if configured + if lfsCfg.S3Bucket != "" { + s3Cfg := consolepkg.LFSS3Config{ + Bucket: lfsCfg.S3Bucket, + Region: lfsCfg.S3Region, + Endpoint: lfsCfg.S3Endpoint, + AccessKey: lfsCfg.S3AccessKey, + SecretKey: lfsCfg.S3SecretKey, + ForcePathStyle: lfsCfg.S3Endpoint != "", + } + if s3Client, err := consolepkg.NewLFSS3Client(ctx, s3Cfg, log.Default()); err == nil && s3Client != nil { + handlers.SetS3Client(s3Client) + } else if err != nil { + log.Printf("lfs s3 client init failed: %v", err) + } + } + + // Create consumer if Kafka brokers configured + var consumer *consolepkg.LFSConsumer + if len(lfsCfg.KafkaBrokers) > 0 { + consumerCfg := consolepkg.LFSConsumerConfig{ + Brokers: lfsCfg.KafkaBrokers, + Topic: lfsCfg.TrackerTopic, + GroupID: "kafscale-console-lfs", + } + var err error + consumer, err = consolepkg.NewLFSConsumer(ctx, consumerCfg, handlers, log.Default()) + if err != nil { + log.Printf("lfs consumer init failed: %v", err) + } else if consumer != nil { + handlers.SetConsumer(consumer) + } + } + + log.Printf("lfs console components initialized: s3_bucket=%s tracker_topic=%s", + lfsCfg.S3Bucket, lfsCfg.TrackerTopic) + + return handlers, consumer +} + +func splitCSV(raw string) []string { + if strings.TrimSpace(raw) == "" { + return nil + } + parts := strings.Split(raw, ",") + out := make([]string, 0, len(parts)) + for _, part := range parts { + val := strings.TrimSpace(part) + if val != "" { + out = append(out, val) + } + } + return out +} diff --git a/cmd/e2e-client/main.go b/cmd/e2e-client/main.go index b03d76ad..f482353a 100644 --- a/cmd/e2e-client/main.go +++ b/cmd/e2e-client/main.go @@ -17,6 +17,7 @@ package main import ( "context" + "crypto/rand" "errors" "fmt" "io" @@ -38,6 +39,8 @@ func main() { topic := strings.TrimSpace(os.Getenv("KAFSCALE_E2E_TOPIC")) count := parseEnvInt("KAFSCALE_E2E_COUNT", 1) timeout := time.Duration(parseEnvInt("KAFSCALE_E2E_TIMEOUT_SEC", 40)) * time.Second + printValues := parseEnvBool("KAFSCALE_E2E_PRINT_VALUES", false) + printLimit := parseEnvInt("KAFSCALE_E2E_PRINT_LIMIT", 512) switch mode { case "produce": @@ -47,6 +50,8 @@ func main() { if count <= 0 { log.Fatalf("KAFSCALE_E2E_COUNT must be > 0") } + lfsBlob := parseEnvBool("KAFSCALE_E2E_LFS_BLOB", false) + msgSize := parseEnvInt("KAFSCALE_E2E_MSG_SIZE", 1024) client, err := kgo.NewClient( kgo.SeedBrokers(brokerAddr), kgo.AllowAutoTopicCreation(), @@ -57,10 +62,14 @@ func main() { defer client.Close() produceCtx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() - if err := produceMessages(produceCtx, client, topic, count); err != nil { + if err := produceMessages(produceCtx, client, topic, count, lfsBlob, msgSize); err != nil { log.Fatalf("produce: %v", err) } - log.Printf("produced %d messages to %s", count, topic) + if lfsBlob { + log.Printf("produced %d LFS messages (%d bytes each) to %s", count, msgSize, topic) + } else { + log.Printf("produced %d messages to %s", count, topic) + } case "consume": if brokerAddr == "" || topic == "" { log.Fatalf("KAFSCALE_E2E_BROKER_ADDR and KAFSCALE_E2E_TOPIC are required") @@ -94,7 +103,7 @@ func main() { log.Fatalf("create consumer client: %v", err) } defer client.Close() - if err := consumeMessages(context.Background(), client, topic, count, timeout); err != nil { + if err := consumeMessages(context.Background(), client, topic, count, timeout, printValues, printLimit); err != nil { log.Fatalf("consume: %v", err) } log.Printf("consumed %d messages from %s", count, topic) @@ -127,10 +136,27 @@ func main() { } } -func produceMessages(ctx context.Context, client *kgo.Client, topic string, count int) error { +func produceMessages(ctx context.Context, client *kgo.Client, topic string, count int, lfsBlob bool, msgSize int) error { for i := 0; i < count; i++ { - msg := fmt.Sprintf("restart-%d", i) - res := client.ProduceSync(ctx, &kgo.Record{Topic: topic, Value: []byte(msg)}) + var value []byte + if lfsBlob && msgSize > 0 { + value = make([]byte, msgSize) + if _, err := rand.Read(value); err != nil { + return fmt.Errorf("generate random payload: %w", err) + } + } else { + value = []byte(fmt.Sprintf("restart-%d", i)) + } + + record := &kgo.Record{Topic: topic, Value: value} + if lfsBlob { + record.Headers = append(record.Headers, kgo.RecordHeader{ + Key: "LFS_BLOB", + Value: nil, // presence signals LFS, value can be checksum for validation + }) + } + + res := client.ProduceSync(ctx, record) if err := res.FirstErr(); err != nil { return err } @@ -138,7 +164,7 @@ func produceMessages(ctx context.Context, client *kgo.Client, topic string, coun return nil } -func consumeMessages(ctx context.Context, client *kgo.Client, topic string, count int, timeout time.Duration) error { +func consumeMessages(ctx context.Context, client *kgo.Client, topic string, count int, timeout time.Duration, printValues bool, printLimit int) error { deadline := time.Now().Add(timeout) received := 0 for received < count { @@ -156,6 +182,13 @@ func consumeMessages(ctx context.Context, client *kgo.Client, topic string, coun } fetches.EachRecord(func(record *kgo.Record) { received++ + if printValues { + value := record.Value + if printLimit > 0 && len(value) > printLimit { + value = value[:printLimit] + } + fmt.Printf("record\t%d\t%d\t%s\n", received, len(record.Value), string(value)) + } }) } return nil @@ -279,3 +312,11 @@ func parseEnvInt64(name string, fallback int64) int64 { } return parsed } + +func parseEnvBool(name string, fallback bool) bool { + val := strings.ToLower(strings.TrimSpace(os.Getenv(name))) + if val == "" { + return fallback + } + return val == "true" || val == "1" || val == "yes" +} diff --git a/cmd/idoc-explode/main.go b/cmd/idoc-explode/main.go new file mode 100644 index 00000000..6c218783 --- /dev/null +++ b/cmd/idoc-explode/main.go @@ -0,0 +1,283 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bufio" + "context" + "errors" + "flag" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/KafScale/platform/pkg/idoc" + "github.com/KafScale/platform/pkg/lfs" +) + +func main() { + inputPath := flag.String("input", "", "Path to input file (XML or JSONL envelopes). Empty reads stdin.") + outputDir := flag.String("out", envOrDefault("KAFSCALE_IDOC_OUTPUT_DIR", "idoc-output"), "Output directory for topic files") + flag.Parse() + + ctx := context.Background() + resolver, err := buildResolver(ctx) + if err != nil { + fmt.Fprintf(os.Stderr, "resolver: %v\n", err) + os.Exit(1) + } + + input, err := openInput(*inputPath) + if err != nil { + fmt.Fprintf(os.Stderr, "input: %v\n", err) + os.Exit(1) + } + defer func() { _ = input.Close() }() + + cfg := idoc.ExplodeConfig{ + ItemSegments: parseCSV(envOrDefault("KAFSCALE_IDOC_ITEM_SEGMENTS", "E1EDP01,E1EDP19")), + PartnerSegments: parseCSV(envOrDefault("KAFSCALE_IDOC_PARTNER_SEGMENTS", "E1EDKA1")), + StatusSegments: parseCSV(envOrDefault("KAFSCALE_IDOC_STATUS_SEGMENTS", "E1STATS")), + DateSegments: parseCSV(envOrDefault("KAFSCALE_IDOC_DATE_SEGMENTS", "E1EDK03")), + } + topics := idoc.TopicConfig{ + Header: envOrDefault("KAFSCALE_IDOC_TOPIC_HEADER", "idoc-headers"), + Segments: envOrDefault("KAFSCALE_IDOC_TOPIC_SEGMENTS", "idoc-segments"), + Items: envOrDefault("KAFSCALE_IDOC_TOPIC_ITEMS", "idoc-items"), + Partners: envOrDefault("KAFSCALE_IDOC_TOPIC_PARTNERS", "idoc-partners"), + Statuses: envOrDefault("KAFSCALE_IDOC_TOPIC_STATUS", "idoc-status"), + Dates: envOrDefault("KAFSCALE_IDOC_TOPIC_DATES", "idoc-dates"), + } + + writer := newTopicWriter(*outputDir) + if err := writer.ensureDir(); err != nil { + fmt.Fprintf(os.Stderr, "output: %v\n", err) + os.Exit(1) + } + + if isXMLInput(*inputPath) { + payload, err := io.ReadAll(input) + if err != nil { + fmt.Fprintf(os.Stderr, "read xml: %v\n", err) + os.Exit(1) + } + processPayload(ctx, resolver, payload, cfg, topics, writer) + return + } + + scanner := bufio.NewScanner(input) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" { + continue + } + processPayload(ctx, resolver, []byte(line), cfg, topics, writer) + } + if err := scanner.Err(); err != nil { + fmt.Fprintf(os.Stderr, "scan: %v\n", err) + os.Exit(1) + } +} + +func buildResolver(ctx context.Context) (*lfs.Resolver, error) { + bucket := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_BUCKET")) + region := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_REGION")) + endpoint := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_ENDPOINT")) + accessKey := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_ACCESS_KEY")) + secretKey := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_SECRET_KEY")) + sessionToken := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_SESSION_TOKEN")) + forcePathStyle := envBoolDefault("KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE", endpoint != "") + maxSize := envInt64("KAFSCALE_IDOC_MAX_BLOB_SIZE", 0) + validate := envBoolDefault("KAFSCALE_IDOC_VALIDATE_CHECKSUM", true) + + if bucket == "" || region == "" { + return lfs.NewResolver(lfs.ResolverConfig{MaxSize: maxSize, ValidateChecksum: validate}, nil), nil + } + + s3Client, err := lfs.NewS3Client(ctx, lfs.S3Config{ + Bucket: bucket, + Region: region, + Endpoint: endpoint, + AccessKeyID: accessKey, + SecretAccessKey: secretKey, + SessionToken: sessionToken, + ForcePathStyle: forcePathStyle, + }) + if err != nil { + return nil, err + } + return lfs.NewResolver(lfs.ResolverConfig{MaxSize: maxSize, ValidateChecksum: validate}, s3Client), nil +} + +func resolvePayload(ctx context.Context, resolver *lfs.Resolver, raw []byte) ([]byte, error) { + trimmed := strings.TrimSpace(string(raw)) + if strings.HasPrefix(trimmed, "<") { + return raw, nil + } + if resolver == nil { + return nil, errors.New("resolver not configured") + } + res, ok, err := resolver.Resolve(ctx, raw) + if err != nil { + return nil, err + } + if !ok { + return raw, nil + } + return res.Payload, nil +} + +func openInput(path string) (*os.File, error) { + if strings.TrimSpace(path) == "" { + return os.Stdin, nil + } + return os.Open(path) +} + +type topicWriter struct { + base string +} + +func newTopicWriter(base string) *topicWriter { + return &topicWriter{base: base} +} + +func (w *topicWriter) ensureDir() error { + return os.MkdirAll(w.base, 0o755) +} + +func (w *topicWriter) write(records idoc.TopicRecords) error { + for topic, entries := range records { + if len(entries) == 0 { + continue + } + path := filepath.Join(w.base, fmt.Sprintf("%s.jsonl", topic)) + f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644) + if err != nil { + return err + } + writer := bufio.NewWriter(f) + for _, entry := range entries { + if _, err := writer.Write(entry); err != nil { + _ = f.Close() + return err + } + if err := writer.WriteByte('\n'); err != nil { + _ = f.Close() + return err + } + } + if err := writer.Flush(); err != nil { + _ = f.Close() + return err + } + if err := f.Close(); err != nil { + return err + } + } + return nil +} + +func parseCSV(raw string) []string { + if strings.TrimSpace(raw) == "" { + return nil + } + parts := strings.Split(raw, ",") + out := make([]string, 0, len(parts)) + for _, part := range parts { + val := strings.TrimSpace(part) + if val != "" { + out = append(out, val) + } + } + return out +} + +func envOrDefault(key, fallback string) string { + if val := strings.TrimSpace(os.Getenv(key)); val != "" { + return val + } + return fallback +} + +func envBoolDefault(key string, fallback bool) bool { + val := strings.TrimSpace(os.Getenv(key)) + if val == "" { + return fallback + } + switch strings.ToLower(val) { + case "1", "true", "yes", "y", "on": + return true + case "0", "false", "no", "n", "off": + return false + default: + return fallback + } +} + +func envInt64(key string, fallback int64) int64 { + val := strings.TrimSpace(os.Getenv(key)) + if val == "" { + return fallback + } + parsed, err := parseInt64(val) + if err != nil { + return fallback + } + return parsed +} + +func parseInt64(raw string) (int64, error) { + var out int64 + for _, ch := range strings.TrimSpace(raw) { + if ch < '0' || ch > '9' { + return 0, fmt.Errorf("invalid integer") + } + out = out*10 + int64(ch-'0') + } + return out, nil +} + +func processPayload(ctx context.Context, resolver *lfs.Resolver, raw []byte, cfg idoc.ExplodeConfig, topics idoc.TopicConfig, writer *topicWriter) { + payload, err := resolvePayload(ctx, resolver, raw) + if err != nil { + fmt.Fprintf(os.Stderr, "resolve payload: %v\n", err) + return + } + result, err := idoc.ExplodeXML(payload, cfg) + if err != nil { + fmt.Fprintf(os.Stderr, "explode: %v\n", err) + return + } + records, err := result.ToTopicRecords(topics) + if err != nil { + fmt.Fprintf(os.Stderr, "records: %v\n", err) + return + } + if err := writer.write(records); err != nil { + fmt.Fprintf(os.Stderr, "write: %v\n", err) + return + } +} + +func isXMLInput(path string) bool { + if strings.TrimSpace(path) == "" { + return false + } + return strings.HasSuffix(strings.ToLower(path), ".xml") +} diff --git a/cmd/lfs-proxy/backend_auth.go b/cmd/lfs-proxy/backend_auth.go new file mode 100644 index 00000000..bfb0f784 --- /dev/null +++ b/cmd/lfs-proxy/backend_auth.go @@ -0,0 +1,105 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "crypto/tls" + "errors" + "fmt" + "net" + "strings" + "time" + + "github.com/KafScale/platform/pkg/protocol" +) + +const ( + apiKeySaslHandshake int16 = 17 + apiKeySaslAuthenticate int16 = 36 +) + +func (p *lfsProxy) wrapBackendTLS(ctx context.Context, conn net.Conn, addr string) (net.Conn, error) { + if p.backendTLSConfig == nil { + return conn, nil + } + cfg := p.backendTLSConfig.Clone() + if cfg.ServerName == "" { + if host, _, err := net.SplitHostPort(addr); err == nil { + cfg.ServerName = host + } + } + tlsConn := tls.Client(conn, cfg) + deadline := time.Now().Add(p.dialTimeout) + if ctxDeadline, ok := ctx.Deadline(); ok { + deadline = ctxDeadline + } + _ = tlsConn.SetDeadline(deadline) + if err := tlsConn.Handshake(); err != nil { + return nil, err + } + _ = tlsConn.SetDeadline(time.Time{}) + return tlsConn, nil +} + +func (p *lfsProxy) performBackendSASL(ctx context.Context, conn net.Conn) error { + mech := strings.TrimSpace(p.backendSASLMechanism) + if mech == "" { + return nil + } + if strings.ToUpper(mech) != "PLAIN" { + return fmt.Errorf("unsupported SASL mechanism %q", mech) + } + if p.backendSASLUsername == "" { + return errors.New("backend SASL username required") + } + + // 1) Handshake + correlationID := int32(1) + handshakeReq, err := encodeSaslHandshakeRequest(&protocol.RequestHeader{ + APIKey: apiKeySaslHandshake, + APIVersion: 1, + CorrelationID: correlationID, + }, mech) + if err != nil { + return err + } + if err := protocol.WriteFrame(conn, handshakeReq); err != nil { + return err + } + if err := readSaslResponse(conn); err != nil { + return fmt.Errorf("sasl handshake failed: %w", err) + } + + // 2) Authenticate + authBytes := buildSaslPlainAuthBytes(p.backendSASLUsername, p.backendSASLPassword) + authReq, err := encodeSaslAuthenticateRequest(&protocol.RequestHeader{ + APIKey: apiKeySaslAuthenticate, + APIVersion: 1, + CorrelationID: correlationID + 1, + }, authBytes) + if err != nil { + return err + } + if err := protocol.WriteFrame(conn, authReq); err != nil { + return err + } + if err := readSaslResponse(conn); err != nil { + return fmt.Errorf("sasl authenticate failed: %w", err) + } + + return nil +} diff --git a/cmd/lfs-proxy/backend_tls.go b/cmd/lfs-proxy/backend_tls.go new file mode 100644 index 00000000..a691cdfa --- /dev/null +++ b/cmd/lfs-proxy/backend_tls.go @@ -0,0 +1,68 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "crypto/tls" + "crypto/x509" + "errors" + "os" + "strings" +) + +func buildBackendTLSConfig() (*tls.Config, error) { + enabled := envBoolDefault("KAFSCALE_LFS_PROXY_BACKEND_TLS_ENABLED", false) + if !enabled { + return nil, nil + } + caFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_TLS_CA_FILE")) + certFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_TLS_CERT_FILE")) + keyFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_TLS_KEY_FILE")) + serverName := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_TLS_SERVER_NAME")) + insecureSkip := envBoolDefault("KAFSCALE_LFS_PROXY_BACKEND_TLS_INSECURE_SKIP_VERIFY", false) + + var rootCAs *x509.CertPool + if caFile != "" { + caPEM, err := os.ReadFile(caFile) + if err != nil { + return nil, err + } + rootCAs = x509.NewCertPool() + if !rootCAs.AppendCertsFromPEM(caPEM) { + return nil, errors.New("failed to parse backend TLS CA file") + } + } + + var certs []tls.Certificate + if certFile != "" || keyFile != "" { + if certFile == "" || keyFile == "" { + return nil, errors.New("backend TLS cert and key must both be set") + } + cert, err := tls.LoadX509KeyPair(certFile, keyFile) + if err != nil { + return nil, err + } + certs = append(certs, cert) + } + + return &tls.Config{ + RootCAs: rootCAs, + Certificates: certs, + ServerName: serverName, + InsecureSkipVerify: insecureSkip, + MinVersion: tls.VersionTLS12, + }, nil +} diff --git a/cmd/lfs-proxy/handler.go b/cmd/lfs-proxy/handler.go new file mode 100644 index 00000000..ad302b36 --- /dev/null +++ b/cmd/lfs-proxy/handler.go @@ -0,0 +1,1120 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "context" + "errors" + "fmt" + "hash/crc32" + "log/slog" + "net" + "net/http" + "strings" + "sync/atomic" + "time" + + "github.com/KafScale/platform/pkg/lfs" + "github.com/KafScale/platform/pkg/metadata" + "github.com/KafScale/platform/pkg/protocol" + "github.com/twmb/franz-go/pkg/kgo" + "github.com/twmb/franz-go/pkg/kmsg" +) + +func (p *lfsProxy) listenAndServe(ctx context.Context) error { + ln, err := net.Listen("tcp", p.addr) + if err != nil { + return err + } + p.logger.Info("lfs proxy listening", "addr", ln.Addr().String()) + + go func() { + <-ctx.Done() + _ = ln.Close() + }() + + for { + conn, err := ln.Accept() + if err != nil { + select { + case <-ctx.Done(): + return nil + default: + } + if ne, ok := err.(net.Error); ok && !ne.Timeout() { + p.logger.Warn("accept temporary error", "error", err) + continue + } + return err + } + p.logger.Debug("connection accepted", "remote", conn.RemoteAddr().String()) + go p.handleConnection(ctx, conn) + } +} + +func (p *lfsProxy) setReady(ready bool) { + prev := atomic.LoadUint32(&p.ready) + if ready { + atomic.StoreUint32(&p.ready, 1) + if prev == 0 { + p.logger.Info("proxy ready state changed", "ready", true) + } + return + } + atomic.StoreUint32(&p.ready, 0) + if prev == 1 { + p.logger.Warn("proxy ready state changed", "ready", false) + } +} + +func (p *lfsProxy) isReady() bool { + readyFlag := atomic.LoadUint32(&p.ready) == 1 + cacheFresh := p.cacheFresh() + s3Healthy := p.isS3Healthy() + ready := readyFlag && cacheFresh && s3Healthy + if !ready { + p.logger.Debug("ready check failed", "readyFlag", readyFlag, "cacheFresh", cacheFresh, "s3Healthy", s3Healthy) + } + return ready +} + +func (p *lfsProxy) markS3Healthy(ok bool) { + if ok { + atomic.StoreUint32(&p.s3Healthy, 1) + return + } + atomic.StoreUint32(&p.s3Healthy, 0) +} + +func (p *lfsProxy) isS3Healthy() bool { + return atomic.LoadUint32(&p.s3Healthy) == 1 +} + +func (p *lfsProxy) startS3HealthCheck(ctx context.Context, interval time.Duration) { + if interval <= 0 { + interval = time.Duration(defaultS3HealthIntervalSec) * time.Second + } + ticker := time.NewTicker(interval) + go func() { + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + err := p.s3Uploader.HeadBucket(ctx) + wasHealthy := p.isS3Healthy() + p.markS3Healthy(err == nil) + if err != nil && wasHealthy { + p.logger.Warn("s3 health check failed", "error", err) + } else if err == nil && !wasHealthy { + p.logger.Info("s3 health check recovered") + } + } + } + }() +} + +func (p *lfsProxy) setCachedBackends(backends []string) { + if len(backends) == 0 { + return + } + copied := make([]string, len(backends)) + copy(copied, backends) + p.cacheMu.Lock() + p.cachedBackends = copied + p.cacheMu.Unlock() +} + +func (p *lfsProxy) cachedBackendsSnapshot() []string { + p.cacheMu.RLock() + if len(p.cachedBackends) == 0 { + p.cacheMu.RUnlock() + return nil + } + copied := make([]string, len(p.cachedBackends)) + copy(copied, p.cachedBackends) + p.cacheMu.RUnlock() + return copied +} + +func (p *lfsProxy) touchHealthy() { + atomic.StoreInt64(&p.lastHealthy, time.Now().UnixNano()) +} + +func (p *lfsProxy) cacheFresh() bool { + // Static backends are always fresh (no TTL expiry) + if len(p.backends) > 0 { + return true + } + last := atomic.LoadInt64(&p.lastHealthy) + if last == 0 { + return false + } + return time.Since(time.Unix(0, last)) <= p.cacheTTL +} + +func (p *lfsProxy) startBackendRefresh(ctx context.Context, backoff time.Duration, interval time.Duration) { + if p.store == nil || len(p.backends) > 0 { + p.logger.Debug("backend refresh disabled", "hasStore", p.store != nil, "staticBackends", len(p.backends)) + return + } + if backoff <= 0 { + backoff = time.Duration(defaultBackendBackoffMs) * time.Millisecond + } + if interval <= 0 { + interval = time.Duration(defaultBackendRefreshIntervalSec) * time.Second + } + ticker := time.NewTicker(interval) + go func() { + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + backends, err := p.refreshBackends(ctx) + if err != nil { + p.logger.Warn("backend refresh failed", "error", err) + if !p.cacheFresh() { + p.setReady(false) + } + time.Sleep(backoff) + } else { + p.logger.Debug("backend refresh succeeded", "count", len(backends)) + } + } + } + }() +} + +func (p *lfsProxy) refreshBackends(ctx context.Context) ([]string, error) { + backends, err := p.currentBackends(ctx) + if err != nil { + return nil, err + } + if len(backends) > 0 { + p.touchHealthy() + p.setReady(true) + } + return backends, nil +} + +func (p *lfsProxy) startHealthServer(ctx context.Context, addr string) { + mux := http.NewServeMux() + mux.HandleFunc("/readyz", func(w http.ResponseWriter, _ *http.Request) { + if p.isReady() || (len(p.cachedBackendsSnapshot()) > 0 && p.cacheFresh() && p.isS3Healthy()) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ready\n")) + return + } + http.Error(w, "not ready", http.StatusServiceUnavailable) + }) + mux.HandleFunc("/livez", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ok\n")) + }) + srv := &http.Server{ + Addr: addr, + Handler: mux, + ReadTimeout: p.httpReadTimeout, + WriteTimeout: p.httpWriteTimeout, + IdleTimeout: p.httpIdleTimeout, + ReadHeaderTimeout: p.httpHeaderTimeout, + MaxHeaderBytes: p.httpMaxHeaderBytes, + } + go func() { + <-ctx.Done() + shutdownCtx, cancel := context.WithTimeout(context.Background(), p.httpShutdownTimeout) + defer cancel() + _ = srv.Shutdown(shutdownCtx) + }() + go func() { + p.logger.Info("lfs proxy health listening", "addr", addr) + if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { + p.logger.Warn("lfs proxy health server error", "error", err) + } + }() +} + +func (p *lfsProxy) handleConnection(ctx context.Context, conn net.Conn) { + defer func() { _ = conn.Close() }() + var backendConn net.Conn + var backendAddr string + + for { + frame, err := protocol.ReadFrame(conn) + if err != nil { + p.logger.Debug("connection read ended", "remote", conn.RemoteAddr().String(), "error", err) + return + } + header, _, err := protocol.ParseRequestHeader(frame.Payload) + if err != nil { + p.logger.Warn("parse request header failed", "error", err) + return + } + p.logger.Debug("request received", "apiKey", header.APIKey, "correlationId", header.CorrelationID, "remote", conn.RemoteAddr().String()) + + if header.APIKey == protocol.APIKeyApiVersion { + resp, err := p.handleApiVersions(header) + if err != nil { + p.logger.Warn("api versions handling failed", "error", err) + return + } + if err := protocol.WriteFrame(conn, resp); err != nil { + p.logger.Warn("write api versions response failed", "error", err) + return + } + continue + } + + if !p.isReady() { + p.logger.Warn("rejecting request: proxy not ready", "apiKey", header.APIKey, "remote", conn.RemoteAddr().String()) + resp, ok, err := p.buildNotReadyResponse(header, frame.Payload) + if err != nil { + p.logger.Warn("not-ready response build failed", "error", err) + return + } + if ok { + if err := protocol.WriteFrame(conn, resp); err != nil { + p.logger.Warn("write not-ready response failed", "error", err) + } + } + return + } + + switch header.APIKey { + case protocol.APIKeyMetadata: + resp, err := p.handleMetadata(ctx, header, frame.Payload) + if err != nil { + p.logger.Warn("metadata handling failed", "error", err) + return + } + if err := protocol.WriteFrame(conn, resp); err != nil { + p.logger.Warn("write metadata response failed", "error", err) + return + } + continue + case protocol.APIKeyFindCoordinator: + resp, err := p.handleFindCoordinator(header) + if err != nil { + p.logger.Warn("find coordinator handling failed", "error", err) + return + } + if err := protocol.WriteFrame(conn, resp); err != nil { + p.logger.Warn("write coordinator response failed", "error", err) + return + } + continue + case protocol.APIKeyProduce: + resp, handled, err := p.handleProduce(ctx, header, frame.Payload) + if err != nil { + p.logger.Warn("produce handling failed", "error", err) + if resp != nil { + _ = protocol.WriteFrame(conn, resp) + } + return + } + if handled { + if err := protocol.WriteFrame(conn, resp); err != nil { + p.logger.Warn("write produce response failed", "error", err) + } + continue + } + default: + } + + if backendConn == nil { + backendConn, backendAddr, err = p.connectBackend(ctx) + if err != nil { + p.logger.Error("backend connect failed", "error", err) + p.respondBackendError(conn, header, frame.Payload) + return + } + } + + resp, err := p.forwardToBackend(ctx, backendConn, backendAddr, frame.Payload) + if err != nil { + _ = backendConn.Close() + backendConn, backendAddr, err = p.connectBackend(ctx) + if err != nil { + p.logger.Warn("backend reconnect failed", "error", err) + p.respondBackendError(conn, header, frame.Payload) + return + } + resp, err = p.forwardToBackend(ctx, backendConn, backendAddr, frame.Payload) + if err != nil { + p.logger.Warn("backend forward failed", "error", err) + p.respondBackendError(conn, header, frame.Payload) + return + } + } + if err := protocol.WriteFrame(conn, resp); err != nil { + p.logger.Warn("write response failed", "error", err) + return + } + } +} + +func (p *lfsProxy) handleApiVersions(header *protocol.RequestHeader) ([]byte, error) { + resp := kmsg.NewPtrApiVersionsResponse() + resp.ErrorCode = protocol.NONE + resp.ApiKeys = p.apiVersions + return protocol.EncodeResponse(header.CorrelationID, header.APIVersion, resp), nil +} + +func (p *lfsProxy) respondBackendError(conn net.Conn, header *protocol.RequestHeader, payload []byte) { + resp, ok, err := p.buildNotReadyResponse(header, payload) + if err != nil || !ok { + return + } + _ = protocol.WriteFrame(conn, resp) +} + +func (p *lfsProxy) handleMetadata(ctx context.Context, header *protocol.RequestHeader, payload []byte) ([]byte, error) { + _, req, err := protocol.ParseRequest(payload) + if err != nil { + return nil, err + } + metaReq, ok := req.(*kmsg.MetadataRequest) + if !ok { + return nil, fmt.Errorf("unexpected metadata request type %T", req) + } + + meta, err := p.loadMetadata(ctx, metaReq) + if err != nil { + return nil, err + } + p.logger.Debug("metadata response", "advertisedHost", p.advertisedHost, "advertisedPort", p.advertisedPort, "topics", len(meta.Topics)) + resp := buildProxyMetadataResponse(meta, header.CorrelationID, header.APIVersion, p.advertisedHost, p.advertisedPort) + return protocol.EncodeResponse(header.CorrelationID, header.APIVersion, resp), nil +} + +func (p *lfsProxy) handleFindCoordinator(header *protocol.RequestHeader) ([]byte, error) { + resp := kmsg.NewPtrFindCoordinatorResponse() + resp.ErrorCode = protocol.NONE + resp.NodeID = 0 + resp.Host = p.advertisedHost + resp.Port = p.advertisedPort + return protocol.EncodeResponse(header.CorrelationID, header.APIVersion, resp), nil +} + +func (p *lfsProxy) loadMetadata(ctx context.Context, req *kmsg.MetadataRequest) (*metadata.ClusterMetadata, error) { + var zeroID [16]byte + useIDs := false + var topicNames []string + if req.Topics != nil { + for _, t := range req.Topics { + if t.TopicID != zeroID { + useIDs = true + break + } + if t.Topic != nil { + topicNames = append(topicNames, *t.Topic) + } + } + } + if !useIDs { + return p.store.Metadata(ctx, topicNames) + } + all, err := p.store.Metadata(ctx, nil) + if err != nil { + return nil, err + } + index := make(map[[16]byte]protocol.MetadataTopic, len(all.Topics)) + for _, topic := range all.Topics { + index[topic.TopicID] = topic + } + filtered := make([]protocol.MetadataTopic, 0, len(req.Topics)) + for _, t := range req.Topics { + if t.TopicID == zeroID { + continue + } + if topic, ok := index[t.TopicID]; ok { + filtered = append(filtered, topic) + } else { + filtered = append(filtered, protocol.MetadataTopic{ + ErrorCode: protocol.UNKNOWN_TOPIC_ID, + TopicID: t.TopicID, + }) + } + } + return &metadata.ClusterMetadata{ + Brokers: all.Brokers, + ClusterID: all.ClusterID, + ControllerID: all.ControllerID, + Topics: filtered, + }, nil +} + +func (p *lfsProxy) handleProduce(ctx context.Context, header *protocol.RequestHeader, payload []byte) ([]byte, bool, error) { + start := time.Now() + _, req, err := protocol.ParseRequest(payload) + if err != nil { + return nil, false, err + } + prodReq, ok := req.(*kmsg.ProduceRequest) + if !ok { + return nil, false, fmt.Errorf("unexpected produce request type %T", req) + } + + p.logger.Debug("handling produce request", "topics", topicsFromProduce(prodReq)) + lfsResult, err := p.rewriteProduceRecords(ctx, header, prodReq) + if err != nil { + for _, topic := range topicsFromProduce(prodReq) { + p.metrics.IncRequests(topic, "error", "lfs") + } + resp, errResp := buildProduceErrorResponse(prodReq, header.CorrelationID, header.APIVersion, protocol.UNKNOWN_SERVER_ERROR) + if errResp != nil { + return nil, true, err + } + return resp, true, err + } + if !lfsResult.modified { + for _, topic := range topicsFromProduce(prodReq) { + p.metrics.IncRequests(topic, "ok", "passthrough") + } + return nil, false, nil + } + for topic := range lfsResult.topics { + p.metrics.IncRequests(topic, "ok", "lfs") + } + p.metrics.ObserveUploadDuration(time.Since(start).Seconds()) + p.metrics.AddUploadBytes(lfsResult.uploadBytes) + + backendConn, backendAddr, err := p.connectBackend(ctx) + if err != nil { + p.trackOrphans(lfsResult.orphans) + return nil, true, err + } + defer func() { _ = backendConn.Close() }() + + resp, err := p.forwardToBackend(ctx, backendConn, backendAddr, lfsResult.payload) + if err != nil { + p.trackOrphans(lfsResult.orphans) + } + return resp, true, err +} + +func (p *lfsProxy) rewriteProduceRecords(ctx context.Context, header *protocol.RequestHeader, req *kmsg.ProduceRequest) (rewriteResult, error) { + if p.logger == nil { + p.logger = slog.Default() + } + + if req == nil { + return rewriteResult{}, errors.New("nil produce request") + } + + modified := false + uploadBytes := int64(0) + decompressor := kgo.DefaultDecompressor() + topics := make(map[string]struct{}) + orphans := make([]orphanInfo, 0, 4) + + for ti := range req.Topics { + topic := &req.Topics[ti] + for pi := range topic.Partitions { + partition := &topic.Partitions[pi] + if len(partition.Records) == 0 { + continue + } + batches, err := decodeRecordBatches(partition.Records) + if err != nil { + return rewriteResult{}, err + } + batchModified := false + for bi := range batches { + batch := &batches[bi] + records, codec, err := decodeBatchRecords(batch, decompressor) + if err != nil { + return rewriteResult{}, err + } + if len(records) == 0 { + continue + } + recordChanged := false + for ri := range records { + rec := &records[ri] + headers := rec.Headers + lfsValue, ok := findHeaderValue(headers, "LFS_BLOB") + if !ok { + continue + } + recordChanged = true + modified = true + topics[topic.Topic] = struct{}{} + checksumHeader := strings.TrimSpace(string(lfsValue)) + algHeader, _ := findHeaderValue(headers, "LFS_BLOB_ALG") + alg, err := p.resolveChecksumAlg(string(algHeader)) + if err != nil { + return rewriteResult{}, err + } + if checksumHeader != "" && alg == lfs.ChecksumNone { + return rewriteResult{}, errors.New("checksum provided but checksum algorithm is none") + } + payload := rec.Value + p.logger.Info("LFS blob detected", "topic", topic.Topic, "size", len(payload)) + if int64(len(payload)) > p.maxBlob { + p.logger.Error("blob exceeds max size", "size", len(payload), "max", p.maxBlob) + return rewriteResult{}, fmt.Errorf("blob size %d exceeds max %d", len(payload), p.maxBlob) + } + key := p.buildObjectKey(topic.Topic) + sha256Hex, checksum, checksumAlg, err := p.s3Uploader.Upload(ctx, key, payload, alg) + if err != nil { + p.metrics.IncS3Errors() + return rewriteResult{}, err + } + if checksumHeader != "" && checksum != "" && !strings.EqualFold(checksumHeader, checksum) { + if err := p.s3Uploader.DeleteObject(ctx, key); err != nil { + p.trackOrphans([]orphanInfo{{Topic: topic.Topic, Key: key, RequestID: "", Reason: "checksum_mismatch_delete_failed"}}) + return rewriteResult{}, fmt.Errorf("checksum mismatch; delete failed: %w", err) + } + return rewriteResult{}, &lfs.ChecksumError{Expected: checksumHeader, Actual: checksum} + } + env := lfs.Envelope{ + Version: 1, + Bucket: p.s3Bucket, + Key: key, + Size: int64(len(payload)), + SHA256: sha256Hex, + Checksum: checksum, + ChecksumAlg: checksumAlg, + ContentType: headerValue(headers, "content-type"), + OriginalHeaders: headersToMap(headers), + CreatedAt: time.Now().UTC().Format(time.RFC3339), + ProxyID: p.proxyID, + } + encoded, err := lfs.EncodeEnvelope(env) + if err != nil { + return rewriteResult{}, err + } + rec.Value = encoded + rec.Headers = dropHeader(headers, "LFS_BLOB") + uploadBytes += int64(len(payload)) + orphans = append(orphans, orphanInfo{Topic: topic.Topic, Key: key, RequestID: "", Reason: "kafka_produce_failed"}) + } + if !recordChanged { + continue + } + newRecords := encodeRecords(records) + compressedRecords, usedCodec, err := compressRecords(codec, newRecords) + if err != nil { + return rewriteResult{}, err + } + batch.Records = compressedRecords + batch.NumRecords = int32(len(records)) + batch.Attributes = (batch.Attributes &^ 0x0007) | int16(usedCodec) + batch.Length = 0 + batch.CRC = 0 + batchBytes := batch.AppendTo(nil) + batch.Length = int32(len(batchBytes) - 12) + batchBytes = batch.AppendTo(nil) + batch.CRC = int32(crc32.Checksum(batchBytes[21:], crc32cTable)) + batchBytes = batch.AppendTo(nil) + batch.Raw = batchBytes + batchModified = true + } + if !batchModified { + continue + } + partition.Records = joinRecordBatches(batches) + } + } + if !modified { + return rewriteResult{modified: false}, nil + } + + payloadBytes, err := encodeProduceRequest(header, req) + if err != nil { + return rewriteResult{}, err + } + return rewriteResult{modified: true, payload: payloadBytes, uploadBytes: uploadBytes, topics: topics, orphans: orphans}, nil +} + +func (p *lfsProxy) buildObjectKey(topic string) string { + ns := strings.TrimSpace(p.s3Namespace) + if ns == "" { + ns = "default" + } + now := time.Now().UTC() + return fmt.Sprintf("%s/%s/lfs/%04d/%02d/%02d/obj-%s", ns, topic, now.Year(), now.Month(), now.Day(), newUUID()) +} + +func (p *lfsProxy) connectBackend(ctx context.Context) (net.Conn, string, error) { + retries := envInt("KAFSCALE_LFS_PROXY_BACKEND_RETRIES", 6) + if retries < 1 { + retries = 1 + } + backoff := time.Duration(envInt("KAFSCALE_LFS_PROXY_BACKEND_BACKOFF_MS", 500)) * time.Millisecond + if backoff <= 0 { + backoff = time.Duration(defaultBackendBackoffMs) * time.Millisecond + } + var lastErr error + for attempt := 0; attempt < retries; attempt++ { + backends, err := p.currentBackends(ctx) + if err != nil || len(backends) == 0 { + if cached := p.cachedBackendsSnapshot(); len(cached) > 0 && p.cacheFresh() { + backends = cached + err = nil + } + } + if err != nil || len(backends) == 0 { + lastErr = err + time.Sleep(backoff) + continue + } + index := atomic.AddUint32(&p.rr, 1) + addr := backends[int(index)%len(backends)] + dialer := net.Dialer{Timeout: p.dialTimeout} + conn, dialErr := dialer.DialContext(ctx, "tcp", addr) + if dialErr == nil { + wrapped, err := p.wrapBackendTLS(ctx, conn, addr) + if err != nil { + _ = conn.Close() + lastErr = err + time.Sleep(backoff) + continue + } + if err := p.performBackendSASL(ctx, wrapped); err != nil { + _ = wrapped.Close() + lastErr = err + time.Sleep(backoff) + continue + } + return wrapped, addr, nil + } + lastErr = dialErr + time.Sleep(backoff) + } + if lastErr == nil { + lastErr = errors.New("no backends available") + } + return nil, "", lastErr +} + +func (p *lfsProxy) currentBackends(ctx context.Context) ([]string, error) { + if len(p.backends) > 0 { + return p.backends, nil + } + meta, err := p.store.Metadata(ctx, nil) + if err != nil { + return nil, err + } + addrs := make([]string, 0, len(meta.Brokers)) + for _, broker := range meta.Brokers { + if broker.Host == "" || broker.Port == 0 { + continue + } + addrs = append(addrs, fmt.Sprintf("%s:%d", broker.Host, broker.Port)) + } + if len(addrs) > 0 { + p.setCachedBackends(addrs) + p.touchHealthy() + p.setReady(true) + } + return addrs, nil +} + +func (p *lfsProxy) forwardToBackend(ctx context.Context, conn net.Conn, backendAddr string, payload []byte) ([]byte, error) { + if err := protocol.WriteFrame(conn, payload); err != nil { + return nil, err + } + frame, err := protocol.ReadFrame(conn) + if err != nil { + return nil, err + } + return frame.Payload, nil +} + +func buildProxyMetadataResponse(meta *metadata.ClusterMetadata, correlationID int32, version int16, host string, port int32) *kmsg.MetadataResponse { + brokers := []protocol.MetadataBroker{{ + NodeID: 0, + Host: host, + Port: port, + }} + topics := make([]protocol.MetadataTopic, 0, len(meta.Topics)) + for _, topic := range meta.Topics { + if topic.ErrorCode != protocol.NONE { + topics = append(topics, topic) + continue + } + partitions := make([]protocol.MetadataPartition, 0, len(topic.Partitions)) + for _, part := range topic.Partitions { + partitions = append(partitions, protocol.MetadataPartition{ + ErrorCode: part.ErrorCode, + Partition: part.Partition, + Leader: 0, + LeaderEpoch: part.LeaderEpoch, + Replicas: []int32{0}, + ISR: []int32{0}, + }) + } + topics = append(topics, protocol.MetadataTopic{ + ErrorCode: topic.ErrorCode, + Topic: topic.Topic, + TopicID: topic.TopicID, + IsInternal: topic.IsInternal, + Partitions: partitions, + }) + } + resp := kmsg.NewPtrMetadataResponse() + resp.Brokers = brokers + resp.ClusterID = meta.ClusterID + resp.ControllerID = 0 + resp.Topics = topics + return resp +} + +func (p *lfsProxy) buildNotReadyResponse(header *protocol.RequestHeader, payload []byte) ([]byte, bool, error) { + _, req, err := protocol.ParseRequest(payload) + if err != nil { + return nil, false, err + } + encode := func(resp kmsg.Response) ([]byte, bool, error) { + return protocol.EncodeResponse(header.CorrelationID, header.APIVersion, resp), true, nil + } + switch header.APIKey { + case protocol.APIKeyMetadata: + metaReq := req.(*kmsg.MetadataRequest) + resp := kmsg.NewPtrMetadataResponse() + resp.ControllerID = -1 + for _, t := range metaReq.Topics { + mt := kmsg.NewMetadataResponseTopic() + mt.ErrorCode = protocol.REQUEST_TIMED_OUT + mt.Topic = t.Topic + mt.TopicID = t.TopicID + resp.Topics = append(resp.Topics, mt) + } + return encode(resp) + case protocol.APIKeyFindCoordinator: + resp := kmsg.NewPtrFindCoordinatorResponse() + resp.ErrorCode = protocol.REQUEST_TIMED_OUT + resp.NodeID = -1 + return encode(resp) + case protocol.APIKeyProduce: + prodReq := req.(*kmsg.ProduceRequest) + resp := kmsg.NewPtrProduceResponse() + for _, topic := range prodReq.Topics { + rt := kmsg.NewProduceResponseTopic() + rt.Topic = topic.Topic + for _, part := range topic.Partitions { + rp := kmsg.NewProduceResponseTopicPartition() + rp.Partition = part.Partition + rp.ErrorCode = protocol.REQUEST_TIMED_OUT + rp.BaseOffset = -1 + rp.LogAppendTime = -1 + rp.LogStartOffset = -1 + rt.Partitions = append(rt.Partitions, rp) + } + resp.Topics = append(resp.Topics, rt) + } + return encode(resp) + default: + return nil, false, nil + } +} + +func buildProduceErrorResponse(req *kmsg.ProduceRequest, correlationID int32, version int16, code int16) ([]byte, error) { + resp := kmsg.NewPtrProduceResponse() + for _, topic := range req.Topics { + rt := kmsg.NewProduceResponseTopic() + rt.Topic = topic.Topic + for _, part := range topic.Partitions { + rp := kmsg.NewProduceResponseTopicPartition() + rp.Partition = part.Partition + rp.ErrorCode = code + rp.BaseOffset = -1 + rp.LogAppendTime = -1 + rp.LogStartOffset = -1 + rt.Partitions = append(rt.Partitions, rp) + } + resp.Topics = append(resp.Topics, rt) + } + return protocol.EncodeResponse(correlationID, version, resp), nil +} + +func generateProxyApiVersions() []kmsg.ApiVersionsResponseApiKey { + supported := []struct { + key int16 + min, max int16 + }{ + {key: protocol.APIKeyApiVersion, min: 0, max: 4}, + {key: protocol.APIKeyMetadata, min: 0, max: 12}, + {key: protocol.APIKeyProduce, min: 0, max: 9}, + {key: protocol.APIKeyFetch, min: 11, max: 13}, + {key: protocol.APIKeyFindCoordinator, min: 3, max: 3}, + {key: protocol.APIKeyListOffsets, min: 0, max: 4}, + {key: protocol.APIKeyJoinGroup, min: 4, max: 4}, + {key: protocol.APIKeySyncGroup, min: 4, max: 4}, + {key: protocol.APIKeyHeartbeat, min: 4, max: 4}, + {key: protocol.APIKeyLeaveGroup, min: 4, max: 4}, + {key: protocol.APIKeyOffsetCommit, min: 3, max: 3}, + {key: protocol.APIKeyOffsetFetch, min: 5, max: 5}, + {key: protocol.APIKeyDescribeGroups, min: 5, max: 5}, + {key: protocol.APIKeyListGroups, min: 5, max: 5}, + {key: protocol.APIKeyOffsetForLeaderEpoch, min: 3, max: 3}, + {key: protocol.APIKeyDescribeConfigs, min: 4, max: 4}, + {key: protocol.APIKeyAlterConfigs, min: 1, max: 1}, + {key: protocol.APIKeyCreatePartitions, min: 0, max: 3}, + {key: protocol.APIKeyCreateTopics, min: 0, max: 2}, + {key: protocol.APIKeyDeleteTopics, min: 0, max: 2}, + {key: protocol.APIKeyDeleteGroups, min: 0, max: 2}, + } + unsupported := []int16{4, 5, 6, 7, 21, 22, 24, 25, 26} + entries := make([]kmsg.ApiVersionsResponseApiKey, 0, len(supported)+len(unsupported)) + for _, entry := range supported { + entries = append(entries, kmsg.ApiVersionsResponseApiKey{ + ApiKey: entry.key, + MinVersion: entry.min, + MaxVersion: entry.max, + }) + } + for _, key := range unsupported { + entries = append(entries, kmsg.ApiVersionsResponseApiKey{ + ApiKey: key, + MinVersion: -1, + MaxVersion: -1, + }) + } + return entries +} + +func topicsFromProduce(req *kmsg.ProduceRequest) []string { + if req == nil { + return nil + } + seen := make(map[string]struct{}, len(req.Topics)) + out := make([]string, 0, len(req.Topics)) + for _, topic := range req.Topics { + if _, ok := seen[topic.Topic]; ok { + continue + } + seen[topic.Topic] = struct{}{} + out = append(out, topic.Topic) + } + if len(out) == 0 { + return []string{"unknown"} + } + return out +} + +type recordBatch struct { + kmsg.RecordBatch + Raw []byte +} + +type rewriteResult struct { + modified bool + payload []byte + uploadBytes int64 + topics map[string]struct{} + orphans []orphanInfo +} + +type orphanInfo struct { + Topic string + Key string + RequestID string + Reason string +} + +func (p *lfsProxy) trackOrphans(orphans []orphanInfo) { + if len(orphans) == 0 { + return + } + p.metrics.IncOrphans(len(orphans)) + for _, orphan := range orphans { + p.logger.Warn("lfs orphaned object", "topic", orphan.Topic, "key", orphan.Key, "reason", orphan.Reason) + // Emit orphan_detected event + reason := orphan.Reason + if reason == "" { + reason = "kafka_produce_failed" + } + p.tracker.EmitOrphanDetected(orphan.RequestID, "upload_failure", orphan.Topic, p.s3Bucket, orphan.Key, orphan.RequestID, reason, 0) + } +} + +func decodeRecordBatches(records []byte) ([]recordBatch, error) { + out := make([]recordBatch, 0, 4) + buf := records + for len(buf) > 0 { + if len(buf) < 12 { + return nil, fmt.Errorf("record batch too short: %d", len(buf)) + } + length := int(int32FromBytes(buf[8:12])) + total := 12 + length + if length < 0 || len(buf) < total { + return nil, fmt.Errorf("invalid record batch length %d", length) + } + batchBytes := buf[:total] + var batch kmsg.RecordBatch + if err := batch.ReadFrom(batchBytes); err != nil { + return nil, err + } + out = append(out, recordBatch{RecordBatch: batch, Raw: batchBytes}) + buf = buf[total:] + } + return out, nil +} + +func joinRecordBatches(batches []recordBatch) []byte { + if len(batches) == 0 { + return nil + } + size := 0 + for _, batch := range batches { + size += len(batch.Raw) + } + out := make([]byte, 0, size) + for _, batch := range batches { + out = append(out, batch.Raw...) + } + return out +} + +func decodeBatchRecords(batch *recordBatch, decompressor kgo.Decompressor) ([]kmsg.Record, kgo.CompressionCodecType, error) { + codec := kgo.CompressionCodecType(batch.Attributes & 0x0007) + rawRecords := batch.Records + if codec != kgo.CodecNone { + var err error + rawRecords, err = decompressor.Decompress(rawRecords, codec) + if err != nil { + return nil, codec, err + } + } + numRecords := int(batch.NumRecords) + records := make([]kmsg.Record, numRecords) + records = readRawRecordsInto(records, rawRecords) + return records, codec, nil +} + +func readRawRecordsInto(rs []kmsg.Record, in []byte) []kmsg.Record { + for i := range rs { + length, used := varint(in) + total := used + int(length) + if used == 0 || length < 0 || len(in) < total { + return rs[:i] + } + if err := (&rs[i]).ReadFrom(in[:total]); err != nil { + rs[i] = kmsg.Record{} + return rs[:i] + } + in = in[total:] + } + return rs +} + +func compressRecords(codec kgo.CompressionCodecType, raw []byte) ([]byte, kgo.CompressionCodecType, error) { + if codec == kgo.CodecNone { + return raw, kgo.CodecNone, nil + } + var comp kgo.Compressor + var err error + switch codec { + case kgo.CodecGzip: + comp, err = kgo.DefaultCompressor(kgo.GzipCompression()) + case kgo.CodecSnappy: + comp, err = kgo.DefaultCompressor(kgo.SnappyCompression()) + case kgo.CodecLz4: + comp, err = kgo.DefaultCompressor(kgo.Lz4Compression()) + case kgo.CodecZstd: + comp, err = kgo.DefaultCompressor(kgo.ZstdCompression()) + default: + return raw, kgo.CodecNone, nil + } + if err != nil || comp == nil { + return raw, kgo.CodecNone, err + } + out, usedCodec := comp.Compress(bytes.NewBuffer(nil), raw) + return out, usedCodec, nil +} + +func findHeaderValue(headers []kmsg.Header, key string) ([]byte, bool) { + for _, header := range headers { + if header.Key == key { + return header.Value, true + } + } + return nil, false +} + +func headerValue(headers []kmsg.Header, key string) string { + for _, header := range headers { + if header.Key == key { + return string(header.Value) + } + } + return "" +} + +// safeHeaderAllowlist defines headers that are safe to include in the LFS envelope. +// Headers not in this list are redacted to prevent leaking sensitive information. +var safeHeaderAllowlist = map[string]bool{ + "content-type": true, + "content-encoding": true, + "correlation-id": true, + "message-id": true, + "x-correlation-id": true, + "x-request-id": true, + "traceparent": true, // W3C trace context + "tracestate": true, // W3C trace context +} + +func headersToMap(headers []kmsg.Header) map[string]string { + if len(headers) == 0 { + return nil + } + out := make(map[string]string) + for _, header := range headers { + key := strings.ToLower(header.Key) + // Only include safe headers in the envelope + if safeHeaderAllowlist[key] { + out[header.Key] = string(header.Value) + } + } + if len(out) == 0 { + return nil + } + return out +} + +func dropHeader(headers []kmsg.Header, key string) []kmsg.Header { + if len(headers) == 0 { + return headers + } + out := headers[:0] + for _, header := range headers { + if header.Key == key { + continue + } + out = append(out, header) + } + return out +} + +func int32FromBytes(b []byte) int32 { + return int32(uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3])) +} + +var crc32cTable = crc32.MakeTable(crc32.Castagnoli) + +func (p *lfsProxy) resolveChecksumAlg(raw string) (lfs.ChecksumAlg, error) { + if strings.TrimSpace(raw) == "" { + return lfs.NormalizeChecksumAlg(p.checksumAlg) + } + return lfs.NormalizeChecksumAlg(raw) +} diff --git a/cmd/lfs-proxy/handler_test.go b/cmd/lfs-proxy/handler_test.go new file mode 100644 index 00000000..d338f1c2 --- /dev/null +++ b/cmd/lfs-proxy/handler_test.go @@ -0,0 +1,325 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "io" + "testing" + + "github.com/KafScale/platform/pkg/lfs" + "github.com/KafScale/platform/pkg/protocol" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/twmb/franz-go/pkg/kgo" + "github.com/twmb/franz-go/pkg/kmsg" +) + +type fakeS3API struct{} + +func (fakeS3API) CreateMultipartUpload(ctx context.Context, params *s3.CreateMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CreateMultipartUploadOutput, error) { + return &s3.CreateMultipartUploadOutput{UploadId: aws.String("upload")}, nil +} +func (fakeS3API) UploadPart(ctx context.Context, params *s3.UploadPartInput, optFns ...func(*s3.Options)) (*s3.UploadPartOutput, error) { + return &s3.UploadPartOutput{ETag: aws.String("etag")}, nil +} +func (fakeS3API) CompleteMultipartUpload(ctx context.Context, params *s3.CompleteMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CompleteMultipartUploadOutput, error) { + return &s3.CompleteMultipartUploadOutput{}, nil +} +func (fakeS3API) AbortMultipartUpload(ctx context.Context, params *s3.AbortMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.AbortMultipartUploadOutput, error) { + return &s3.AbortMultipartUploadOutput{}, nil +} +func (fakeS3API) PutObject(ctx context.Context, params *s3.PutObjectInput, optFns ...func(*s3.Options)) (*s3.PutObjectOutput, error) { + return &s3.PutObjectOutput{}, nil +} +func (fakeS3API) GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) { + body := io.NopCloser(bytes.NewReader([]byte("payload"))) + return &s3.GetObjectOutput{ + Body: body, + ContentLength: aws.Int64(int64(len("payload"))), + ContentType: aws.String("application/octet-stream"), + }, nil +} + +func (fakeS3API) DeleteObject(ctx context.Context, params *s3.DeleteObjectInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectOutput, error) { + return &s3.DeleteObjectOutput{}, nil +} +func (fakeS3API) HeadBucket(ctx context.Context, params *s3.HeadBucketInput, optFns ...func(*s3.Options)) (*s3.HeadBucketOutput, error) { + return &s3.HeadBucketOutput{}, nil +} +func (fakeS3API) CreateBucket(ctx context.Context, params *s3.CreateBucketInput, optFns ...func(*s3.Options)) (*s3.CreateBucketOutput, error) { + return &s3.CreateBucketOutput{}, nil +} + +type failingS3API struct { + err error +} + +func (f failingS3API) CreateMultipartUpload(ctx context.Context, params *s3.CreateMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CreateMultipartUploadOutput, error) { + return nil, f.err +} +func (f failingS3API) UploadPart(ctx context.Context, params *s3.UploadPartInput, optFns ...func(*s3.Options)) (*s3.UploadPartOutput, error) { + return nil, f.err +} +func (f failingS3API) CompleteMultipartUpload(ctx context.Context, params *s3.CompleteMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CompleteMultipartUploadOutput, error) { + return nil, f.err +} +func (f failingS3API) AbortMultipartUpload(ctx context.Context, params *s3.AbortMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.AbortMultipartUploadOutput, error) { + return nil, f.err +} +func (f failingS3API) PutObject(ctx context.Context, params *s3.PutObjectInput, optFns ...func(*s3.Options)) (*s3.PutObjectOutput, error) { + return nil, f.err +} +func (f failingS3API) GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) { + return nil, f.err +} + +func (f failingS3API) DeleteObject(ctx context.Context, params *s3.DeleteObjectInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectOutput, error) { + return nil, f.err +} +func (f failingS3API) HeadBucket(ctx context.Context, params *s3.HeadBucketInput, optFns ...func(*s3.Options)) (*s3.HeadBucketOutput, error) { + return nil, f.err +} +func (f failingS3API) CreateBucket(ctx context.Context, params *s3.CreateBucketInput, optFns ...func(*s3.Options)) (*s3.CreateBucketOutput, error) { + return nil, f.err +} + +func TestRewriteProduceRecords(t *testing.T) { + proxy := &lfsProxy{ + s3Uploader: &s3Uploader{bucket: "bucket", chunkSize: 1024, api: fakeS3API{}}, + s3Bucket: "bucket", + s3Namespace: "ns", + maxBlob: 1024 * 1024, + proxyID: "proxy-1", + metrics: newLfsMetrics(), + } + + rec := kmsg.Record{ + TimestampDelta64: 0, + OffsetDelta: 0, + Value: []byte("payload"), + Headers: []kmsg.Header{ + {Key: "LFS_BLOB", Value: nil}, + {Key: "content-type", Value: []byte("application/octet-stream")}, + }, + } + batchBytes := buildRecordBatch([]kmsg.Record{rec}) + + req := &kmsg.ProduceRequest{ + Acks: 1, + TimeoutMillis: 1000, + Topics: []kmsg.ProduceRequestTopic{ + { + Topic: "topic", + Partitions: []kmsg.ProduceRequestTopicPartition{{ + Partition: 0, + Records: batchBytes, + }}, + }, + }, + } + header := &protocol.RequestHeader{ + APIKey: protocol.APIKeyProduce, + APIVersion: 9, + CorrelationID: 1, + ClientID: strPtr("client"), + } + + result, err := proxy.rewriteProduceRecords(context.Background(), header, req) + if err != nil { + t.Fatalf("rewriteProduceRecords error: %v", err) + } + if !result.modified { + t.Fatalf("expected modified payload") + } + parsedHeader, parsedReq, err := protocol.ParseRequest(result.payload) + if err != nil { + t.Fatalf("parse rewritten request: %v", err) + } + if parsedHeader.APIKey != protocol.APIKeyProduce { + t.Fatalf("unexpected api key %d", parsedHeader.APIKey) + } + prodReq := parsedReq.(*kmsg.ProduceRequest) + batches, err := decodeRecordBatches(prodReq.Topics[0].Partitions[0].Records) + if err != nil { + t.Fatalf("decode record batches: %v", err) + } + records, _, err := decodeBatchRecords(&batches[0], kgo.DefaultDecompressor()) + if err != nil { + t.Fatalf("decode records: %v", err) + } + var env lfs.Envelope + if err := json.Unmarshal(records[0].Value, &env); err != nil { + t.Fatalf("unmarshal envelope: %v", err) + } + if env.Bucket != "bucket" || env.Key == "" || env.Version != 1 { + t.Fatalf("unexpected envelope: %+v", env) + } +} + +func TestRewriteProduceRecordsPassthrough(t *testing.T) { + proxy := &lfsProxy{ + s3Uploader: &s3Uploader{bucket: "bucket", chunkSize: 1024, api: fakeS3API{}}, + s3Bucket: "bucket", + s3Namespace: "ns", + maxBlob: 1024 * 1024, + metrics: newLfsMetrics(), + } + + rec := kmsg.Record{ + TimestampDelta64: 0, + OffsetDelta: 0, + Value: []byte("payload"), + Headers: nil, + } + batchBytes := buildRecordBatch([]kmsg.Record{rec}) + + req := &kmsg.ProduceRequest{ + Acks: 1, + TimeoutMillis: 1000, + Topics: []kmsg.ProduceRequestTopic{ + { + Topic: "topic", + Partitions: []kmsg.ProduceRequestTopicPartition{{ + Partition: 0, + Records: batchBytes, + }}, + }, + }, + } + header := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: 1} + + result, err := proxy.rewriteProduceRecords(context.Background(), header, req) + if err != nil { + t.Fatalf("rewriteProduceRecords error: %v", err) + } + if result.modified { + t.Fatalf("expected passthrough") + } +} + +func TestRewriteProduceRecordsS3Failure(t *testing.T) { + proxy := &lfsProxy{ + s3Uploader: &s3Uploader{bucket: "bucket", chunkSize: 1024, api: failingS3API{err: errors.New("boom")}}, + s3Bucket: "bucket", + s3Namespace: "ns", + maxBlob: 1024 * 1024, + metrics: newLfsMetrics(), + } + + rec := kmsg.Record{ + TimestampDelta64: 0, + OffsetDelta: 0, + Value: []byte("payload"), + Headers: []kmsg.Header{{Key: "LFS_BLOB", Value: nil}}, + } + batchBytes := buildRecordBatch([]kmsg.Record{rec}) + + req := &kmsg.ProduceRequest{ + Acks: 1, + TimeoutMillis: 1000, + Topics: []kmsg.ProduceRequestTopic{{ + Topic: "topic", + Partitions: []kmsg.ProduceRequestTopicPartition{{ + Partition: 0, + Records: batchBytes, + }}, + }}, + } + header := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: 1} + + _, err := proxy.rewriteProduceRecords(context.Background(), header, req) + if err == nil { + t.Fatalf("expected error") + } +} + +func TestRewriteProduceRecordsChecksumMismatch(t *testing.T) { + proxy := &lfsProxy{ + s3Uploader: &s3Uploader{bucket: "bucket", chunkSize: 1024, api: fakeS3API{}}, + s3Bucket: "bucket", + s3Namespace: "ns", + maxBlob: 1024 * 1024, + metrics: newLfsMetrics(), + } + + rec := kmsg.Record{ + TimestampDelta64: 0, + OffsetDelta: 0, + Value: []byte("payload"), + Headers: []kmsg.Header{{Key: "LFS_BLOB", Value: []byte("deadbeef")}}, + } + batchBytes := buildRecordBatch([]kmsg.Record{rec}) + + req := &kmsg.ProduceRequest{ + Acks: 1, + TimeoutMillis: 1000, + Topics: []kmsg.ProduceRequestTopic{{ + Topic: "topic", + Partitions: []kmsg.ProduceRequestTopicPartition{{ + Partition: 0, + Records: batchBytes, + }}, + }}, + } + header := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: 1} + + _, err := proxy.rewriteProduceRecords(context.Background(), header, req) + if err == nil { + t.Fatalf("expected error") + } +} + +func TestRewriteProduceRecordsMaxBlobSize(t *testing.T) { + proxy := &lfsProxy{ + s3Uploader: &s3Uploader{bucket: "bucket", chunkSize: 1024, api: fakeS3API{}}, + s3Bucket: "bucket", + s3Namespace: "ns", + maxBlob: 3, + metrics: newLfsMetrics(), + } + + rec := kmsg.Record{ + TimestampDelta64: 0, + OffsetDelta: 0, + Value: []byte("payload"), + Headers: []kmsg.Header{{Key: "LFS_BLOB", Value: nil}}, + } + batchBytes := buildRecordBatch([]kmsg.Record{rec}) + + req := &kmsg.ProduceRequest{ + Acks: 1, + TimeoutMillis: 1000, + Topics: []kmsg.ProduceRequestTopic{{ + Topic: "topic", + Partitions: []kmsg.ProduceRequestTopicPartition{{ + Partition: 0, + Records: batchBytes, + }}, + }}, + } + header := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: 1} + + _, err := proxy.rewriteProduceRecords(context.Background(), header, req) + if err == nil { + t.Fatalf("expected error") + } +} + +func strPtr(v string) *string { return &v } diff --git a/cmd/lfs-proxy/http.go b/cmd/lfs-proxy/http.go new file mode 100644 index 00000000..27c16325 --- /dev/null +++ b/cmd/lfs-proxy/http.go @@ -0,0 +1,1013 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "crypto/sha256" + "crypto/subtle" + "encoding/base64" + "encoding/hex" + "encoding/json" + "errors" + "io" + "math" + "net/http" + "regexp" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/KafScale/platform/pkg/lfs" + "github.com/KafScale/platform/pkg/protocol" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/twmb/franz-go/pkg/kmsg" +) + +const ( + headerTopic = "X-Kafka-Topic" + headerKey = "X-Kafka-Key" + headerPartition = "X-Kafka-Partition" + headerChecksum = "X-LFS-Checksum" + headerChecksumAlg = "X-LFS-Checksum-Alg" + headerRequestID = "X-Request-ID" +) + +// validTopicPattern matches valid Kafka topic names (alphanumeric, dots, underscores, hyphens) +var validTopicPattern = regexp.MustCompile(`^[a-zA-Z0-9._-]+$`) + +type errorResponse struct { + Code string `json:"code"` + Message string `json:"message"` + RequestID string `json:"request_id"` +} + +type downloadRequest struct { + Bucket string `json:"bucket"` + Key string `json:"key"` + Mode string `json:"mode"` + ExpiresSeconds int `json:"expires_seconds"` +} + +type downloadResponse struct { + Mode string `json:"mode"` + URL string `json:"url"` + ExpiresAt string `json:"expires_at"` +} + +type uploadInitRequest struct { + Topic string `json:"topic"` + Key string `json:"key"` + Partition *int32 `json:"partition,omitempty"` + ContentType string `json:"content_type"` + SizeBytes int64 `json:"size_bytes"` + Checksum string `json:"checksum,omitempty"` + ChecksumAlg string `json:"checksum_alg,omitempty"` +} + +type uploadInitResponse struct { + UploadID string `json:"upload_id"` + S3Key string `json:"s3_key"` + PartSize int64 `json:"part_size"` + ExpiresAt string `json:"expires_at"` +} + +type uploadPartResponse struct { + UploadID string `json:"upload_id"` + PartNumber int32 `json:"part_number"` + ETag string `json:"etag"` +} + +type uploadCompleteRequest struct { + Parts []struct { + PartNumber int32 `json:"part_number"` + ETag string `json:"etag"` + } `json:"parts"` +} + +type uploadSession struct { + mu sync.Mutex + ID string + Topic string + S3Key string + UploadID string + ContentType string + SizeBytes int64 + KeyBytes []byte + Partition int32 + Checksum string + ChecksumAlg lfs.ChecksumAlg + CreatedAt time.Time + ExpiresAt time.Time + PartSize int64 + NextPart int32 + TotalUploaded int64 + Parts map[int32]string + PartSizes map[int32]int64 + sha256Hasher hashWriter + checksumHasher hashWriter +} + +type hashWriter interface { + Write([]byte) (int, error) + Sum([]byte) []byte +} + +func (p *lfsProxy) startHTTPServer(ctx context.Context, addr string) { + mux := http.NewServeMux() + mux.HandleFunc("/lfs/produce", p.corsMiddleware(p.handleHTTPProduce)) + mux.HandleFunc("/lfs/download", p.corsMiddleware(p.handleHTTPDownload)) + mux.HandleFunc("/lfs/uploads", p.corsMiddleware(p.handleHTTPUploadInit)) + mux.HandleFunc("/lfs/uploads/", p.corsMiddleware(p.handleHTTPUploadSession)) + // Swagger UI and OpenAPI spec endpoints + mux.HandleFunc("/swagger", p.handleSwaggerUI) + mux.HandleFunc("/swagger/", p.handleSwaggerUI) + mux.HandleFunc("/api/openapi.yaml", p.handleOpenAPISpec) + srv := &http.Server{ + Addr: addr, + Handler: mux, + ReadTimeout: p.httpReadTimeout, + WriteTimeout: p.httpWriteTimeout, + IdleTimeout: p.httpIdleTimeout, + ReadHeaderTimeout: p.httpHeaderTimeout, + MaxHeaderBytes: p.httpMaxHeaderBytes, + } + go func() { + <-ctx.Done() + shutdownCtx, cancel := context.WithTimeout(context.Background(), p.httpShutdownTimeout) + defer cancel() + _ = srv.Shutdown(shutdownCtx) + }() + go func() { + p.logger.Info("lfs proxy http listening", "addr", addr, "tls", p.httpTLSConfig != nil) + var err error + if p.httpTLSConfig != nil { + srv.TLSConfig = p.httpTLSConfig + err = srv.ListenAndServeTLS(p.httpTLSCertFile, p.httpTLSKeyFile) + } else { + err = srv.ListenAndServe() + } + if err != nil && err != http.ErrServerClosed { + p.logger.Warn("lfs proxy http server error", "error", err) + } + }() +} + +// corsMiddleware adds CORS headers to allow browser-based clients. +func (p *lfsProxy) corsMiddleware(next http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + // Set CORS headers for all responses + w.Header().Set("Access-Control-Allow-Origin", "*") + w.Header().Set("Access-Control-Allow-Methods", "POST, PUT, DELETE, OPTIONS") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Content-Range, X-Kafka-Topic, X-Kafka-Key, X-Kafka-Partition, X-LFS-Checksum, X-LFS-Checksum-Alg, X-LFS-Size, X-LFS-Mode, X-Request-ID, X-API-Key, Authorization") + w.Header().Set("Access-Control-Expose-Headers", "X-Request-ID") + + // Handle preflight OPTIONS request + if r.Method == http.MethodOptions { + w.WriteHeader(http.StatusNoContent) + return + } + + next(w, r) + } +} + +func (p *lfsProxy) handleHTTPProduce(w http.ResponseWriter, r *http.Request) { + requestID := strings.TrimSpace(r.Header.Get(headerRequestID)) + if requestID == "" { + requestID = newUUID() + } + w.Header().Set(headerRequestID, requestID) + if r.Method != http.MethodPost { + p.writeHTTPError(w, requestID, "", http.StatusMethodNotAllowed, "method_not_allowed", "method not allowed") + return + } + if p.httpAPIKey != "" && !p.validateHTTPAPIKey(r) { + p.writeHTTPError(w, requestID, "", http.StatusUnauthorized, "unauthorized", "unauthorized") + return + } + if !p.isReady() { + p.writeHTTPError(w, requestID, "", http.StatusServiceUnavailable, "proxy_not_ready", "proxy not ready") + return + } + topic := strings.TrimSpace(r.Header.Get(headerTopic)) + if topic == "" { + p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "missing_topic", "missing topic") + return + } + if !p.isValidTopicName(topic) { + p.writeHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_topic", "invalid topic name") + return + } + + var keyBytes []byte + if keyHeader := strings.TrimSpace(r.Header.Get(headerKey)); keyHeader != "" { + decoded, err := base64.StdEncoding.DecodeString(keyHeader) + if err != nil { + p.writeHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_key", "invalid key") + return + } + keyBytes = decoded + } + + partition := int32(0) + if partitionHeader := strings.TrimSpace(r.Header.Get(headerPartition)); partitionHeader != "" { + parsed, err := strconv.ParseInt(partitionHeader, 10, 32) + if err != nil { + p.writeHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_partition", "invalid partition") + return + } + partition = int32(parsed) + } + + checksumHeader := strings.TrimSpace(r.Header.Get(headerChecksum)) + checksumAlgHeader := strings.TrimSpace(r.Header.Get(headerChecksumAlg)) + alg, err := p.resolveChecksumAlg(checksumAlgHeader) + if err != nil { + p.writeHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_request", err.Error()) + return + } + if checksumHeader != "" && alg == lfs.ChecksumNone { + p.writeHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_checksum", "checksum provided but checksum algorithm is none") + return + } + objectKey := p.buildObjectKey(topic) + clientIP := getClientIP(r) + contentType := r.Header.Get("Content-Type") + + start := time.Now() + + // Emit upload_started event + p.tracker.EmitUploadStarted(requestID, topic, partition, objectKey, contentType, clientIP, "http", r.ContentLength) + + sha256Hex, checksum, checksumAlg, size, err := p.s3Uploader.UploadStream(r.Context(), objectKey, r.Body, p.maxBlob, alg) + if err != nil { + p.metrics.IncRequests(topic, "error", "lfs") + p.metrics.IncS3Errors() + status, code := statusForUploadError(err) + p.tracker.EmitUploadFailed(requestID, topic, objectKey, code, err.Error(), "s3_upload", 0, time.Since(start)) + p.writeHTTPError(w, requestID, topic, status, code, err.Error()) + return + } + if checksumHeader != "" && checksum != "" && !strings.EqualFold(checksumHeader, checksum) { + if err := p.s3Uploader.DeleteObject(r.Context(), objectKey); err != nil { + p.trackOrphans([]orphanInfo{{Topic: topic, Key: objectKey, RequestID: requestID, Reason: "kafka_produce_failed"}}) + p.metrics.IncRequests(topic, "error", "lfs") + p.tracker.EmitUploadFailed(requestID, topic, objectKey, "checksum_mismatch", "checksum mismatch; delete failed", "validation", size, time.Since(start)) + p.writeHTTPError(w, requestID, topic, http.StatusBadRequest, "checksum_mismatch", "checksum mismatch; delete failed") + return + } + p.metrics.IncRequests(topic, "error", "lfs") + p.tracker.EmitUploadFailed(requestID, topic, objectKey, "checksum_mismatch", (&lfs.ChecksumError{Expected: checksumHeader, Actual: checksum}).Error(), "validation", size, time.Since(start)) + p.writeHTTPError(w, requestID, topic, http.StatusBadRequest, "checksum_mismatch", (&lfs.ChecksumError{Expected: checksumHeader, Actual: checksum}).Error()) + return + } + + env := lfs.Envelope{ + Version: 1, + Bucket: p.s3Bucket, + Key: objectKey, + Size: size, + SHA256: sha256Hex, + Checksum: checksum, + ChecksumAlg: checksumAlg, + ContentType: r.Header.Get("Content-Type"), + CreatedAt: time.Now().UTC().Format(time.RFC3339), + ProxyID: p.proxyID, + } + encoded, err := lfs.EncodeEnvelope(env) + if err != nil { + p.metrics.IncRequests(topic, "error", "lfs") + p.writeHTTPError(w, requestID, topic, http.StatusInternalServerError, "encode_failed", err.Error()) + return + } + + record := kmsg.Record{ + TimestampDelta64: 0, + OffsetDelta: 0, + Key: keyBytes, + Value: encoded, + } + batchBytes := buildRecordBatch([]kmsg.Record{record}) + + produceReq := &kmsg.ProduceRequest{ + Acks: 1, + TimeoutMillis: 15000, + Topics: []kmsg.ProduceRequestTopic{{ + Topic: topic, + Partitions: []kmsg.ProduceRequestTopicPartition{{ + Partition: partition, + Records: batchBytes, + }}, + }}, + } + + correlationID := int32(atomic.AddUint32(&p.corrID, 1)) + reqHeader := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: correlationID} + payload, err := encodeProduceRequest(reqHeader, produceReq) + if err != nil { + p.metrics.IncRequests(topic, "error", "lfs") + p.writeHTTPError(w, requestID, topic, http.StatusInternalServerError, "encode_failed", err.Error()) + return + } + + backendConn, backendAddr, err := p.connectBackend(r.Context()) + if err != nil { + p.metrics.IncRequests(topic, "error", "lfs") + p.trackOrphans([]orphanInfo{{Topic: topic, Key: objectKey, RequestID: requestID, Reason: "kafka_produce_failed"}}) + p.tracker.EmitUploadFailed(requestID, topic, objectKey, "backend_unavailable", err.Error(), "kafka_produce", size, time.Since(start)) + p.writeHTTPError(w, requestID, topic, http.StatusServiceUnavailable, "backend_unavailable", err.Error()) + return + } + defer func() { _ = backendConn.Close() }() + + _, err = p.forwardToBackend(r.Context(), backendConn, backendAddr, payload) + if err != nil { + p.metrics.IncRequests(topic, "error", "lfs") + p.trackOrphans([]orphanInfo{{Topic: topic, Key: objectKey, RequestID: requestID, Reason: "kafka_produce_failed"}}) + p.tracker.EmitUploadFailed(requestID, topic, objectKey, "backend_error", err.Error(), "kafka_produce", size, time.Since(start)) + p.writeHTTPError(w, requestID, topic, http.StatusBadGateway, "backend_error", err.Error()) + return + } + + p.metrics.IncRequests(topic, "ok", "lfs") + p.metrics.AddUploadBytes(size) + p.metrics.ObserveUploadDuration(time.Since(start).Seconds()) + + // Emit upload_completed event + p.tracker.EmitUploadCompleted(requestID, topic, partition, 0, p.s3Bucket, objectKey, size, sha256Hex, checksum, checksumAlg, contentType, time.Since(start)) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(env) +} + +func (p *lfsProxy) handleHTTPDownload(w http.ResponseWriter, r *http.Request) { + requestID := strings.TrimSpace(r.Header.Get(headerRequestID)) + if requestID == "" { + requestID = newUUID() + } + w.Header().Set(headerRequestID, requestID) + if r.Method != http.MethodPost { + p.writeHTTPError(w, requestID, "", http.StatusMethodNotAllowed, "method_not_allowed", "method not allowed") + return + } + if p.httpAPIKey != "" && !p.validateHTTPAPIKey(r) { + p.writeHTTPError(w, requestID, "", http.StatusUnauthorized, "unauthorized", "unauthorized") + return + } + if !p.isReady() { + p.writeHTTPError(w, requestID, "", http.StatusServiceUnavailable, "proxy_not_ready", "proxy not ready") + return + } + + var req downloadRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_request", "invalid JSON body") + return + } + req.Bucket = strings.TrimSpace(req.Bucket) + req.Key = strings.TrimSpace(req.Key) + if req.Bucket == "" || req.Key == "" { + p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_request", "bucket and key required") + return + } + if req.Bucket != p.s3Bucket { + p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_bucket", "bucket not allowed") + return + } + if err := p.validateObjectKey(req.Key); err != nil { + p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_key", err.Error()) + return + } + + mode := strings.ToLower(strings.TrimSpace(req.Mode)) + if mode == "" { + mode = "presign" + } + if mode != "presign" && mode != "stream" { + p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_mode", "mode must be presign or stream") + return + } + + clientIP := getClientIP(r) + start := time.Now() + + // Emit download_requested event + ttlSeconds := 0 + if mode == "presign" { + ttlSeconds = req.ExpiresSeconds + if ttlSeconds <= 0 { + ttlSeconds = int(p.downloadTTLMax.Seconds()) + } + } + p.tracker.EmitDownloadRequested(requestID, req.Bucket, req.Key, mode, clientIP, ttlSeconds) + + switch mode { + case "presign": + ttl := p.downloadTTLMax + if req.ExpiresSeconds > 0 { + requested := time.Duration(req.ExpiresSeconds) * time.Second + if requested < ttl { + ttl = requested + } + } + url, err := p.s3Uploader.PresignGetObject(r.Context(), req.Key, ttl) + if err != nil { + p.metrics.IncS3Errors() + p.writeHTTPError(w, requestID, "", http.StatusBadGateway, "s3_presign_failed", err.Error()) + return + } + // Emit download_completed for presign (URL generated) + p.tracker.EmitDownloadCompleted(requestID, req.Key, mode, time.Since(start), 0) + + resp := downloadResponse{ + Mode: "presign", + URL: url, + ExpiresAt: time.Now().UTC().Add(ttl).Format(time.RFC3339), + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(resp) + case "stream": + obj, err := p.s3Uploader.GetObject(r.Context(), req.Key) + if err != nil { + p.metrics.IncS3Errors() + p.writeHTTPError(w, requestID, "", http.StatusBadGateway, "s3_get_failed", err.Error()) + return + } + defer func() { _ = obj.Body.Close() }() + contentType := "application/octet-stream" + if obj.ContentType != nil && *obj.ContentType != "" { + contentType = *obj.ContentType + } + w.Header().Set("Content-Type", contentType) + var size int64 + if obj.ContentLength != nil { + size = *obj.ContentLength + w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) + } + if _, err := io.Copy(w, obj.Body); err != nil { + p.logger.Warn("download stream failed", "error", err) + } + // Emit download_completed for stream + p.tracker.EmitDownloadCompleted(requestID, req.Key, mode, time.Since(start), size) + } +} + +func (p *lfsProxy) handleHTTPUploadInit(w http.ResponseWriter, r *http.Request) { + requestID := strings.TrimSpace(r.Header.Get(headerRequestID)) + if requestID == "" { + requestID = newUUID() + } + w.Header().Set(headerRequestID, requestID) + if r.Method != http.MethodPost { + p.writeHTTPError(w, requestID, "", http.StatusMethodNotAllowed, "method_not_allowed", "method not allowed") + return + } + if p.httpAPIKey != "" && !p.validateHTTPAPIKey(r) { + p.writeHTTPError(w, requestID, "", http.StatusUnauthorized, "unauthorized", "unauthorized") + return + } + if !p.isReady() { + p.writeHTTPError(w, requestID, "", http.StatusServiceUnavailable, "proxy_not_ready", "proxy not ready") + return + } + + var req uploadInitRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_request", "invalid JSON body") + return + } + + req.Topic = strings.TrimSpace(req.Topic) + req.ContentType = strings.TrimSpace(req.ContentType) + req.Checksum = strings.TrimSpace(req.Checksum) + req.ChecksumAlg = strings.TrimSpace(req.ChecksumAlg) + if req.Topic == "" { + p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "missing_topic", "missing topic") + return + } + if !p.isValidTopicName(req.Topic) { + p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_topic", "invalid topic name") + return + } + if req.ContentType == "" { + p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "missing_content_type", "content_type required") + return + } + if req.SizeBytes <= 0 { + p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_size", "size_bytes must be > 0") + return + } + if p.maxBlob > 0 && req.SizeBytes > p.maxBlob { + p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "payload_too_large", "payload exceeds max size") + return + } + + keyBytes := []byte(nil) + if req.Key != "" { + decoded, err := base64.StdEncoding.DecodeString(req.Key) + if err != nil { + p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_key", "invalid key") + return + } + keyBytes = decoded + } + + partition := int32(0) + if req.Partition != nil { + partition = *req.Partition + if partition < 0 { + p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_partition", "invalid partition") + return + } + } + + alg, err := p.resolveChecksumAlg(req.ChecksumAlg) + if err != nil { + p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_request", err.Error()) + return + } + if req.Checksum != "" && alg == lfs.ChecksumNone { + p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_checksum", "checksum provided but checksum algorithm is none") + return + } + + objectKey := p.buildObjectKey(req.Topic) + uploadID, err := p.s3Uploader.StartMultipartUpload(r.Context(), objectKey, req.ContentType) + if err != nil { + p.metrics.IncS3Errors() + p.writeHTTPError(w, requestID, req.Topic, http.StatusBadGateway, "s3_upload_failed", err.Error()) + return + } + p.logger.Info("http chunked upload init", "requestId", requestID, "topic", req.Topic, "s3Key", objectKey, "uploadId", uploadID, "sizeBytes", req.SizeBytes, "partSize", p.chunkSize) + + partSize := normalizeChunkSize(p.chunkSize) + session := &uploadSession{ + ID: newUUID(), + Topic: req.Topic, + S3Key: objectKey, + UploadID: uploadID, + ContentType: req.ContentType, + SizeBytes: req.SizeBytes, + KeyBytes: keyBytes, + Partition: partition, + Checksum: req.Checksum, + ChecksumAlg: alg, + CreatedAt: time.Now().UTC(), + ExpiresAt: time.Now().UTC().Add(p.uploadSessionTTL), + PartSize: partSize, + NextPart: 1, + Parts: make(map[int32]string), + PartSizes: make(map[int32]int64), + sha256Hasher: sha256.New(), + } + if alg != lfs.ChecksumNone { + if alg == lfs.ChecksumSHA256 { + session.checksumHasher = session.sha256Hasher + } else if h, err := lfs.NewChecksumHasher(alg); err == nil { + session.checksumHasher = h + } else if err != nil { + p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_checksum", err.Error()) + return + } + } + + p.storeUploadSession(session) + p.tracker.EmitUploadStarted(requestID, req.Topic, partition, objectKey, req.ContentType, getClientIP(r), "http-chunked", req.SizeBytes) + + resp := uploadInitResponse{ + UploadID: session.ID, + S3Key: session.S3Key, + PartSize: session.PartSize, + ExpiresAt: session.ExpiresAt.Format(time.RFC3339), + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(resp) +} + +func (p *lfsProxy) handleHTTPUploadSession(w http.ResponseWriter, r *http.Request) { + requestID := strings.TrimSpace(r.Header.Get(headerRequestID)) + if requestID == "" { + requestID = newUUID() + } + w.Header().Set(headerRequestID, requestID) + if p.httpAPIKey != "" && !p.validateHTTPAPIKey(r) { + p.writeHTTPError(w, requestID, "", http.StatusUnauthorized, "unauthorized", "unauthorized") + return + } + if !p.isReady() { + p.writeHTTPError(w, requestID, "", http.StatusServiceUnavailable, "proxy_not_ready", "proxy not ready") + return + } + + path := strings.TrimPrefix(r.URL.Path, "/lfs/uploads/") + parts := strings.Split(strings.Trim(path, "/"), "/") + if len(parts) == 0 || parts[0] == "" { + p.writeHTTPError(w, requestID, "", http.StatusNotFound, "not_found", "not found") + return + } + uploadID := parts[0] + + switch { + case len(parts) == 1 && r.Method == http.MethodDelete: + p.handleHTTPUploadAbort(w, r, requestID, uploadID) + return + case len(parts) == 2 && parts[1] == "complete" && r.Method == http.MethodPost: + p.handleHTTPUploadComplete(w, r, requestID, uploadID) + return + case len(parts) == 3 && parts[1] == "parts" && r.Method == http.MethodPut: + partNum, err := strconv.ParseInt(parts[2], 10, 32) + if err != nil || partNum <= 0 || partNum > math.MaxInt32 { + p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_part", "invalid part number") + return + } + p.handleHTTPUploadPart(w, r, requestID, uploadID, int32(partNum)) + return + default: + p.writeHTTPError(w, requestID, "", http.StatusNotFound, "not_found", "not found") + return + } +} + +func (p *lfsProxy) handleHTTPUploadPart(w http.ResponseWriter, r *http.Request, requestID, sessionID string, partNumber int32) { + session, ok := p.getUploadSession(sessionID) + if !ok { + p.writeHTTPError(w, requestID, "", http.StatusNotFound, "upload_not_found", "upload session not found") + return + } + + session.mu.Lock() + defer session.mu.Unlock() + if time.Now().UTC().After(session.ExpiresAt) { + p.deleteUploadSession(sessionID) + p.writeHTTPError(w, requestID, session.Topic, http.StatusGone, "upload_expired", "upload session expired") + return + } + + if etag, exists := session.Parts[partNumber]; exists { + _, _ = io.Copy(io.Discard, r.Body) + p.logger.Info("http chunked upload part already received", "requestId", requestID, "uploadId", sessionID, "part", partNumber, "etag", etag) + resp := uploadPartResponse{UploadID: sessionID, PartNumber: partNumber, ETag: etag} + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(resp) + return + } + + if partNumber != session.NextPart { + p.writeHTTPError(w, requestID, session.Topic, http.StatusConflict, "out_of_order", "part out of order") + return + } + + limit := session.PartSize + 1 + body, err := io.ReadAll(io.LimitReader(r.Body, limit)) + if err != nil { + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", err.Error()) + return + } + if int64(len(body)) == 0 { + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "empty part") + return + } + if int64(len(body)) > session.PartSize { + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "part too large") + return + } + if session.TotalUploaded+int64(len(body)) > session.SizeBytes { + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "part exceeds declared size") + return + } + if session.TotalUploaded+int64(len(body)) < session.SizeBytes && int64(len(body)) < minMultipartChunkSize { + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "part too small") + return + } + + if _, err := session.sha256Hasher.Write(body); err != nil { + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "hash_error", err.Error()) + return + } + if session.checksumHasher != nil && session.checksumHasher != session.sha256Hasher { + if _, err := session.checksumHasher.Write(body); err != nil { + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "hash_error", err.Error()) + return + } + } + + etag, err := p.s3Uploader.UploadPart(r.Context(), session.S3Key, session.UploadID, partNumber, body) + if err != nil { + p.metrics.IncS3Errors() + p.tracker.EmitUploadFailed(requestID, session.Topic, session.S3Key, "s3_upload_failed", err.Error(), "upload_part", session.TotalUploaded, 0) + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadGateway, "s3_upload_failed", err.Error()) + return + } + p.logger.Info("http chunked upload part stored", "requestId", requestID, "uploadId", sessionID, "part", partNumber, "etag", etag, "bytes", len(body)) + + session.Parts[partNumber] = etag + session.PartSizes[partNumber] = int64(len(body)) + session.TotalUploaded += int64(len(body)) + session.NextPart++ + + resp := uploadPartResponse{UploadID: sessionID, PartNumber: partNumber, ETag: etag} + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(resp) +} + +func (p *lfsProxy) handleHTTPUploadComplete(w http.ResponseWriter, r *http.Request, requestID, sessionID string) { + session, ok := p.getUploadSession(sessionID) + if !ok { + p.writeHTTPError(w, requestID, "", http.StatusNotFound, "upload_not_found", "upload session not found") + return + } + + session.mu.Lock() + defer session.mu.Unlock() + if time.Now().UTC().After(session.ExpiresAt) { + p.deleteUploadSession(sessionID) + p.writeHTTPError(w, requestID, session.Topic, http.StatusGone, "upload_expired", "upload session expired") + return + } + if session.TotalUploaded != session.SizeBytes { + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "incomplete_upload", "not all bytes uploaded") + return + } + + var req uploadCompleteRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_request", "invalid JSON body") + return + } + if len(req.Parts) == 0 { + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_request", "parts required") + return + } + + completed := make([]types.CompletedPart, 0, len(req.Parts)) + for _, part := range req.Parts { + etag, ok := session.Parts[part.PartNumber] + if !ok || etag == "" || part.ETag == "" || etag != part.ETag { + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "part etag mismatch") + return + } + completed = append(completed, types.CompletedPart{ + ETag: aws.String(part.ETag), + PartNumber: aws.Int32(part.PartNumber), + }) + } + + if err := p.s3Uploader.CompleteMultipartUpload(r.Context(), session.S3Key, session.UploadID, completed); err != nil { + p.metrics.IncS3Errors() + p.tracker.EmitUploadFailed(requestID, session.Topic, session.S3Key, "s3_upload_failed", err.Error(), "upload_complete", session.TotalUploaded, 0) + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadGateway, "s3_upload_failed", err.Error()) + return + } + p.logger.Info("http chunked upload completed", "requestId", requestID, "uploadId", sessionID, "parts", len(completed), "bytes", session.TotalUploaded) + + shaHex := hex.EncodeToString(session.sha256Hasher.Sum(nil)) + checksum := "" + if session.ChecksumAlg != lfs.ChecksumNone { + if session.ChecksumAlg == lfs.ChecksumSHA256 { + checksum = shaHex + } else if session.checksumHasher != nil { + checksum = hex.EncodeToString(session.checksumHasher.Sum(nil)) + } + } + if session.Checksum != "" && checksum != "" && !strings.EqualFold(session.Checksum, checksum) { + _ = p.s3Uploader.AbortMultipartUpload(r.Context(), session.S3Key, session.UploadID) + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "checksum_mismatch", "checksum mismatch") + return + } + + env := lfs.Envelope{ + Version: 1, + Bucket: p.s3Bucket, + Key: session.S3Key, + Size: session.TotalUploaded, + SHA256: shaHex, + Checksum: checksum, + ChecksumAlg: string(session.ChecksumAlg), + ContentType: session.ContentType, + CreatedAt: time.Now().UTC().Format(time.RFC3339), + ProxyID: p.proxyID, + } + encoded, err := lfs.EncodeEnvelope(env) + if err != nil { + p.writeHTTPError(w, requestID, session.Topic, http.StatusInternalServerError, "encode_failed", err.Error()) + return + } + + record := kmsg.Record{ + TimestampDelta64: 0, + OffsetDelta: 0, + Key: session.KeyBytes, + Value: encoded, + } + batchBytes := buildRecordBatch([]kmsg.Record{record}) + + produceReq := &kmsg.ProduceRequest{ + Acks: 1, + TimeoutMillis: 15000, + Topics: []kmsg.ProduceRequestTopic{{ + Topic: session.Topic, + Partitions: []kmsg.ProduceRequestTopicPartition{{ + Partition: session.Partition, + Records: batchBytes, + }}, + }}, + } + + correlationID := int32(atomic.AddUint32(&p.corrID, 1)) + reqHeader := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: correlationID} + payload, err := encodeProduceRequest(reqHeader, produceReq) + if err != nil { + p.writeHTTPError(w, requestID, session.Topic, http.StatusInternalServerError, "encode_failed", err.Error()) + return + } + + backendConn, backendAddr, err := p.connectBackend(r.Context()) + if err != nil { + p.trackOrphans([]orphanInfo{{Topic: session.Topic, Key: session.S3Key, RequestID: requestID, Reason: "kafka_produce_failed"}}) + p.tracker.EmitUploadFailed(requestID, session.Topic, session.S3Key, "backend_unavailable", err.Error(), "kafka_produce", session.TotalUploaded, 0) + p.writeHTTPError(w, requestID, session.Topic, http.StatusServiceUnavailable, "backend_unavailable", err.Error()) + return + } + defer func() { _ = backendConn.Close() }() + + if _, err := p.forwardToBackend(r.Context(), backendConn, backendAddr, payload); err != nil { + p.trackOrphans([]orphanInfo{{Topic: session.Topic, Key: session.S3Key, RequestID: requestID, Reason: "kafka_produce_failed"}}) + p.tracker.EmitUploadFailed(requestID, session.Topic, session.S3Key, "backend_error", err.Error(), "kafka_produce", session.TotalUploaded, 0) + p.writeHTTPError(w, requestID, session.Topic, http.StatusBadGateway, "backend_error", err.Error()) + return + } + + p.metrics.IncRequests(session.Topic, "ok", "lfs") + p.metrics.AddUploadBytes(session.TotalUploaded) + + p.tracker.EmitUploadCompleted(requestID, session.Topic, session.Partition, 0, p.s3Bucket, session.S3Key, session.TotalUploaded, shaHex, checksum, string(session.ChecksumAlg), session.ContentType, 0) + + p.deleteUploadSession(sessionID) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(env) +} + +func (p *lfsProxy) handleHTTPUploadAbort(w http.ResponseWriter, r *http.Request, requestID, sessionID string) { + session, ok := p.getUploadSession(sessionID) + if !ok { + p.writeHTTPError(w, requestID, "", http.StatusNotFound, "upload_not_found", "upload session not found") + return + } + session.mu.Lock() + defer session.mu.Unlock() + _ = p.s3Uploader.AbortMultipartUpload(r.Context(), session.S3Key, session.UploadID) + p.deleteUploadSession(sessionID) + w.WriteHeader(http.StatusNoContent) +} + +func (p *lfsProxy) storeUploadSession(session *uploadSession) { + if session == nil { + return + } + p.uploadMu.Lock() + defer p.uploadMu.Unlock() + p.cleanupUploadSessionsLocked() + p.uploadSessions[session.ID] = session +} + +func (p *lfsProxy) getUploadSession(id string) (*uploadSession, bool) { + p.uploadMu.Lock() + defer p.uploadMu.Unlock() + p.cleanupUploadSessionsLocked() + session, ok := p.uploadSessions[id] + return session, ok +} + +func (p *lfsProxy) deleteUploadSession(id string) { + p.uploadMu.Lock() + defer p.uploadMu.Unlock() + delete(p.uploadSessions, id) +} + +func (p *lfsProxy) cleanupUploadSessionsLocked() { + now := time.Now().UTC() + for id, session := range p.uploadSessions { + if session.ExpiresAt.Before(now) { + delete(p.uploadSessions, id) + } + } +} + +func statusForUploadError(err error) (int, string) { + msg := err.Error() + switch { + case strings.Contains(msg, "exceeds max"): + return http.StatusBadRequest, "payload_too_large" + case strings.Contains(msg, "empty upload"): + return http.StatusBadRequest, "empty_upload" + case strings.Contains(msg, "s3 key required"): + return http.StatusBadRequest, "invalid_key" + case strings.Contains(msg, "reader required"): + return http.StatusBadRequest, "invalid_reader" + default: + return http.StatusBadGateway, "s3_upload_failed" + } +} + +func (p *lfsProxy) writeHTTPError(w http.ResponseWriter, requestID, topic string, status int, code, message string) { + if topic != "" { + p.logger.Warn("http produce failed", "status", status, "code", code, "requestId", requestID, "topic", topic, "error", message) + } else { + p.logger.Warn("http produce failed", "status", status, "code", code, "requestId", requestID, "error", message) + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _ = json.NewEncoder(w).Encode(errorResponse{ + Code: code, + Message: message, + RequestID: requestID, + }) +} + +func (p *lfsProxy) validateHTTPAPIKey(r *http.Request) bool { + if r == nil { + return false + } + key := strings.TrimSpace(r.Header.Get("X-API-Key")) + if key == "" { + auth := strings.TrimSpace(r.Header.Get("Authorization")) + if strings.HasPrefix(strings.ToLower(auth), "bearer ") { + key = strings.TrimSpace(auth[len("bearer "):]) + } + } + if key == "" { + return false + } + // Use constant-time comparison to prevent timing attacks + return subtle.ConstantTimeCompare([]byte(key), []byte(p.httpAPIKey)) == 1 +} + +func (p *lfsProxy) validateObjectKey(key string) error { + if strings.HasPrefix(key, "/") { + return errors.New("key must be relative") + } + if strings.Contains(key, "..") { + return errors.New("key must not contain '..'") + } + ns := strings.TrimSpace(p.s3Namespace) + if ns != "" && !strings.HasPrefix(key, ns+"/") { + return errors.New("key outside namespace") + } + if !strings.Contains(key, "/lfs/") { + return errors.New("key must include /lfs/ segment") + } + return nil +} + +// isValidTopicName validates a Kafka topic name. +// Topics must be 1-249 characters, containing only alphanumeric, dots, underscores, or hyphens. +func (p *lfsProxy) isValidTopicName(topic string) bool { + if len(topic) == 0 || len(topic) > p.topicMaxLength { + return false + } + return validTopicPattern.MatchString(topic) +} + +// getClientIP extracts the client IP address from the request. +// It checks X-Forwarded-For and X-Real-IP headers first, then falls back to RemoteAddr. +func getClientIP(r *http.Request) string { + if xff := r.Header.Get("X-Forwarded-For"); xff != "" { + // X-Forwarded-For can contain multiple IPs; take the first one + if idx := strings.Index(xff, ","); idx > 0 { + return strings.TrimSpace(xff[:idx]) + } + return strings.TrimSpace(xff) + } + if xri := r.Header.Get("X-Real-IP"); xri != "" { + return strings.TrimSpace(xri) + } + // Extract IP from RemoteAddr (host:port format) + host, _, err := strings.Cut(r.RemoteAddr, ":") + if err { + return host + } + return r.RemoteAddr +} diff --git a/cmd/lfs-proxy/http_test.go b/cmd/lfs-proxy/http_test.go new file mode 100644 index 00000000..fbae909f --- /dev/null +++ b/cmd/lfs-proxy/http_test.go @@ -0,0 +1,274 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "io" + "net/http" + "net/http/httptest" + "testing" + "time" + + "log/slog" + + "github.com/aws/aws-sdk-go-v2/aws/signer/v4" + "github.com/aws/aws-sdk-go-v2/service/s3" +) + +type fakePresignAPI struct { + url string + err error +} + +func (f fakePresignAPI) PresignGetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.PresignOptions)) (*v4.PresignedHTTPRequest, error) { + if f.err != nil { + return nil, f.err + } + return &v4.PresignedHTTPRequest{URL: f.url}, nil +} + +func newReadyProxy(api s3API) *lfsProxy { + proxy := &lfsProxy{ + logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + topicMaxLength: defaultTopicMaxLength, + cacheTTL: time.Minute, + metrics: newLfsMetrics(), + s3Uploader: &s3Uploader{bucket: "bucket", chunkSize: minMultipartChunkSize, api: api, presign: fakePresignAPI{url: "https://example.com/object"}}, + s3Bucket: "bucket", + s3Namespace: "default", + downloadTTLMax: 2 * time.Minute, + } + proxy.setReady(true) + proxy.markS3Healthy(true) + proxy.touchHealthy() + return proxy +} + +func TestHTTPProduceNotReadyReturnsJSON(t *testing.T) { + proxy := &lfsProxy{ + logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + topicMaxLength: defaultTopicMaxLength, + cacheTTL: time.Minute, + metrics: newLfsMetrics(), + } + + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", bytes.NewReader([]byte("payload"))) + req.Header.Set(headerTopic, "lfs-demo-topic") + rec := httptest.NewRecorder() + + proxy.handleHTTPProduce(rec, req) + + resp := rec.Result() + if resp.StatusCode != http.StatusServiceUnavailable { + t.Fatalf("expected status %d, got %d", http.StatusServiceUnavailable, resp.StatusCode) + } + if got := resp.Header.Get(headerRequestID); got == "" { + t.Fatalf("expected %s header to be set", headerRequestID) + } + var body errorResponse + if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { + t.Fatalf("decode response: %v", err) + } + if body.Code != "proxy_not_ready" { + t.Fatalf("unexpected code: %s", body.Code) + } + if body.RequestID == "" { + t.Fatalf("expected request_id in body") + } +} + +func TestHTTPProduceInvalidTopic(t *testing.T) { + proxy := newReadyProxy(fakeS3API{}) + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", bytes.NewReader([]byte("payload"))) + req.Header.Set(headerTopic, "bad topic") + rec := httptest.NewRecorder() + + proxy.handleHTTPProduce(rec, req) + + resp := rec.Result() + if resp.StatusCode != http.StatusBadRequest { + t.Fatalf("expected status %d, got %d", http.StatusBadRequest, resp.StatusCode) + } + var body errorResponse + if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { + t.Fatalf("decode response: %v", err) + } + if body.Code != "invalid_topic" { + t.Fatalf("unexpected code: %s", body.Code) + } +} + +func TestHTTPProduceUploadFailureReturnsBadGateway(t *testing.T) { + proxy := newReadyProxy(failingS3API{err: errors.New("boom")}) + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", bytes.NewReader([]byte("payload"))) + req.Header.Set(headerTopic, "lfs-demo-topic") + rec := httptest.NewRecorder() + + proxy.handleHTTPProduce(rec, req) + + resp := rec.Result() + if resp.StatusCode != http.StatusBadGateway { + t.Fatalf("expected status %d, got %d", http.StatusBadGateway, resp.StatusCode) + } + var body errorResponse + if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { + t.Fatalf("decode response: %v", err) + } + if body.Code != "s3_upload_failed" { + t.Fatalf("unexpected code: %s", body.Code) + } +} + +func TestHTTPProduceRequestIDPreserved(t *testing.T) { + proxy := newReadyProxy(failingS3API{err: errors.New("boom")}) + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", bytes.NewReader([]byte("payload"))) + req.Header.Set(headerTopic, "lfs-demo-topic") + req.Header.Set(headerRequestID, "req-123") + rec := httptest.NewRecorder() + + proxy.handleHTTPProduce(rec, req) + + resp := rec.Result() + if got := resp.Header.Get(headerRequestID); got != "req-123" { + t.Fatalf("expected request id to be preserved, got %q", got) + } + var body errorResponse + if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { + t.Fatalf("decode response: %v", err) + } + if body.RequestID != "req-123" { + t.Fatalf("expected request_id in body to be preserved, got %q", body.RequestID) + } +} + +func TestHTTPProduceUnauthorized(t *testing.T) { + proxy := newReadyProxy(fakeS3API{}) + proxy.httpAPIKey = "secret" + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", bytes.NewReader([]byte("payload"))) + req.Header.Set(headerTopic, "lfs-demo-topic") + rec := httptest.NewRecorder() + + proxy.handleHTTPProduce(rec, req) + + resp := rec.Result() + if resp.StatusCode != http.StatusUnauthorized { + t.Fatalf("expected status %d, got %d", http.StatusUnauthorized, resp.StatusCode) + } +} + +func TestHTTPProduceMethodNotAllowed(t *testing.T) { + proxy := newReadyProxy(fakeS3API{}) + req := httptest.NewRequest(http.MethodGet, "/lfs/produce", nil) + rec := httptest.NewRecorder() + + proxy.handleHTTPProduce(rec, req) + + resp := rec.Result() + if resp.StatusCode != http.StatusMethodNotAllowed { + t.Fatalf("expected status %d, got %d", http.StatusMethodNotAllowed, resp.StatusCode) + } +} + +func TestHTTPDownloadMethodNotAllowed(t *testing.T) { + proxy := newReadyProxy(fakeS3API{}) + req := httptest.NewRequest(http.MethodGet, "/lfs/download", nil) + rec := httptest.NewRecorder() + + proxy.handleHTTPDownload(rec, req) + + if rec.Result().StatusCode != http.StatusMethodNotAllowed { + t.Fatalf("expected status %d, got %d", http.StatusMethodNotAllowed, rec.Result().StatusCode) + } +} + +func TestHTTPDownloadUnauthorized(t *testing.T) { + proxy := newReadyProxy(fakeS3API{}) + proxy.httpAPIKey = "secret" + req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader([]byte(`{"bucket":"bucket","key":"default/topic/lfs/2026/02/03/obj-1"}`))) + rec := httptest.NewRecorder() + + proxy.handleHTTPDownload(rec, req) + + if rec.Result().StatusCode != http.StatusUnauthorized { + t.Fatalf("expected status %d, got %d", http.StatusUnauthorized, rec.Result().StatusCode) + } +} + +func TestHTTPDownloadInvalidKey(t *testing.T) { + proxy := newReadyProxy(fakeS3API{}) + req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader([]byte(`{"bucket":"bucket","key":"other/topic/obj-1"}`))) + rec := httptest.NewRecorder() + + proxy.handleHTTPDownload(rec, req) + + if rec.Result().StatusCode != http.StatusBadRequest { + t.Fatalf("expected status %d, got %d", http.StatusBadRequest, rec.Result().StatusCode) + } + var body errorResponse + if err := json.NewDecoder(rec.Body).Decode(&body); err != nil { + t.Fatalf("decode response: %v", err) + } + if body.Code != "invalid_key" { + t.Fatalf("unexpected code: %s", body.Code) + } +} + +func TestHTTPDownloadPresignOK(t *testing.T) { + proxy := newReadyProxy(fakeS3API{}) + req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader([]byte(`{"bucket":"bucket","key":"default/topic/lfs/2026/02/03/obj-1","mode":"presign","expires_seconds":120}`))) + rec := httptest.NewRecorder() + + proxy.handleHTTPDownload(rec, req) + + resp := rec.Result() + if resp.StatusCode != http.StatusOK { + t.Fatalf("expected status %d, got %d", http.StatusOK, resp.StatusCode) + } + var body downloadResponse + if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { + t.Fatalf("decode response: %v", err) + } + if body.URL == "" || body.Mode != "presign" { + t.Fatalf("expected presign response, got %+v", body) + } +} + +func TestHTTPDownloadStreamOK(t *testing.T) { + proxy := newReadyProxy(fakeS3API{}) + req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader([]byte(`{"bucket":"bucket","key":"default/topic/lfs/2026/02/03/obj-1","mode":"stream"}`))) + rec := httptest.NewRecorder() + + proxy.handleHTTPDownload(rec, req) + + resp := rec.Result() + if resp.StatusCode != http.StatusOK { + t.Fatalf("expected status %d, got %d", http.StatusOK, resp.StatusCode) + } + if resp.Header.Get("Content-Type") == "" { + t.Fatalf("expected content-type header to be set") + } + payload, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatalf("read body: %v", err) + } + if string(payload) == "" { + t.Fatalf("expected body payload") + } +} diff --git a/cmd/lfs-proxy/http_tls.go b/cmd/lfs-proxy/http_tls.go new file mode 100644 index 00000000..d659dcd0 --- /dev/null +++ b/cmd/lfs-proxy/http_tls.go @@ -0,0 +1,59 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "crypto/tls" + "crypto/x509" + "errors" + "os" + "strings" +) + +func buildHTTPServerTLSConfig() (*tls.Config, string, string, error) { + enabled := envBoolDefault("KAFSCALE_LFS_PROXY_HTTP_TLS_ENABLED", false) + if !enabled { + return nil, "", "", nil + } + certFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HTTP_TLS_CERT_FILE")) + keyFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HTTP_TLS_KEY_FILE")) + clientCA := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HTTP_TLS_CLIENT_CA_FILE")) + requireClient := envBoolDefault("KAFSCALE_LFS_PROXY_HTTP_TLS_REQUIRE_CLIENT_CERT", false) + + if certFile == "" || keyFile == "" { + return nil, "", "", errors.New("http TLS cert and key must be set when enabled") + } + + cfg := &tls.Config{MinVersion: tls.VersionTLS12} + if clientCA != "" { + caPEM, err := os.ReadFile(clientCA) + if err != nil { + return nil, "", "", err + } + pool := x509.NewCertPool() + if !pool.AppendCertsFromPEM(caPEM) { + return nil, "", "", errors.New("failed to parse http TLS client CA file") + } + cfg.ClientCAs = pool + if requireClient { + cfg.ClientAuth = tls.RequireAndVerifyClientCert + } else { + cfg.ClientAuth = tls.VerifyClientCertIfGiven + } + } + + return cfg, certFile, keyFile, nil +} diff --git a/cmd/lfs-proxy/http_tls_test.go b/cmd/lfs-proxy/http_tls_test.go new file mode 100644 index 00000000..ac2813a1 --- /dev/null +++ b/cmd/lfs-proxy/http_tls_test.go @@ -0,0 +1,39 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import "testing" + +func TestBuildHTTPServerTLSConfigDisabled(t *testing.T) { + t.Setenv("KAFSCALE_LFS_PROXY_HTTP_TLS_ENABLED", "false") + cfg, certFile, keyFile, err := buildHTTPServerTLSConfig() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg != nil || certFile != "" || keyFile != "" { + t.Fatalf("expected empty TLS config when disabled") + } +} + +func TestBuildHTTPServerTLSConfigMissingCert(t *testing.T) { + t.Setenv("KAFSCALE_LFS_PROXY_HTTP_TLS_ENABLED", "true") + t.Setenv("KAFSCALE_LFS_PROXY_HTTP_TLS_CERT_FILE", "") + t.Setenv("KAFSCALE_LFS_PROXY_HTTP_TLS_KEY_FILE", "") + _, _, _, err := buildHTTPServerTLSConfig() + if err == nil { + t.Fatal("expected error when cert/key missing") + } +} diff --git a/cmd/lfs-proxy/main.go b/cmd/lfs-proxy/main.go new file mode 100644 index 00000000..79df2e29 --- /dev/null +++ b/cmd/lfs-proxy/main.go @@ -0,0 +1,440 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "crypto/tls" + "errors" + "log/slog" + "net" + "net/http" + "os" + "os/signal" + "strconv" + "strings" + "sync" + "syscall" + "time" + + "github.com/KafScale/platform/pkg/metadata" + "github.com/twmb/franz-go/pkg/kmsg" +) + +const ( + defaultProxyAddr = ":9092" + defaultMaxBlob = int64(5 << 30) + defaultChunkSize = int64(5 << 20) + defaultDialTimeoutMs = 5000 + defaultBackendBackoffMs = 500 + defaultBackendRefreshIntervalSec = 3 + defaultS3HealthIntervalSec = 30 + defaultHTTPReadTimeoutSec = 30 + defaultHTTPWriteTimeoutSec = 300 + defaultHTTPIdleTimeoutSec = 60 + defaultHTTPHeaderTimeoutSec = 10 + defaultHTTPMaxHeaderBytes = 1 << 20 + defaultHTTPShutdownTimeoutSec = 10 + defaultTopicMaxLength = 249 + defaultDownloadTTLSec = 120 + defaultUploadSessionTTLSec = 3600 +) + +type lfsProxy struct { + addr string + advertisedHost string + advertisedPort int32 + store metadata.Store + backends []string + logger *slog.Logger + rr uint32 + dialTimeout time.Duration + httpReadTimeout time.Duration + httpWriteTimeout time.Duration + httpIdleTimeout time.Duration + httpHeaderTimeout time.Duration + httpMaxHeaderBytes int + httpShutdownTimeout time.Duration + topicMaxLength int + downloadTTLMax time.Duration + checksumAlg string + backendTLSConfig *tls.Config + backendSASLMechanism string + backendSASLUsername string + backendSASLPassword string + httpTLSConfig *tls.Config + httpTLSCertFile string + httpTLSKeyFile string + ready uint32 + lastHealthy int64 + cacheTTL time.Duration + cacheMu sync.RWMutex + cachedBackends []string + apiVersions []kmsg.ApiVersionsResponseApiKey + metrics *lfsMetrics + + s3Uploader *s3Uploader + s3Bucket string + s3Namespace string + maxBlob int64 + chunkSize int64 + proxyID string + s3Healthy uint32 + corrID uint32 + httpAPIKey string + + // LFS Operations Tracker + tracker *LfsOpsTracker + + uploadSessionTTL time.Duration + uploadMu sync.Mutex + uploadSessions map[string]*uploadSession +} + +func main() { + ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) + defer cancel() + + logLevel := slog.LevelInfo + if strings.EqualFold(os.Getenv("KAFSCALE_LFS_PROXY_LOG_LEVEL"), "debug") { + logLevel = slog.LevelDebug + } + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: logLevel})) + + logger.Warn("DEPRECATED: standalone lfs-proxy is deprecated; use the unified proxy with KAFSCALE_PROXY_LFS_ENABLED=true instead") + + addr := envOrDefault("KAFSCALE_LFS_PROXY_ADDR", defaultProxyAddr) + healthAddr := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HEALTH_ADDR")) + metricsAddr := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_METRICS_ADDR")) + httpAddr := envOrDefault("KAFSCALE_LFS_PROXY_HTTP_ADDR", ":8080") + httpAPIKey := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HTTP_API_KEY")) + advertisedHost := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_ADVERTISED_HOST")) + advertisedPort := envPort("KAFSCALE_LFS_PROXY_ADVERTISED_PORT", portFromAddr(addr, 9092)) + logger.Info("advertised address configured", "host", advertisedHost, "port", advertisedPort) + backends := splitCSV(os.Getenv("KAFSCALE_LFS_PROXY_BACKENDS")) + backendBackoff := time.Duration(envInt("KAFSCALE_LFS_PROXY_BACKEND_BACKOFF_MS", defaultBackendBackoffMs)) * time.Millisecond + backendRefreshInterval := time.Duration(envInt("KAFSCALE_LFS_PROXY_BACKEND_REFRESH_INTERVAL_SEC", defaultBackendRefreshIntervalSec)) * time.Second + cacheTTL := time.Duration(envInt("KAFSCALE_LFS_PROXY_BACKEND_CACHE_TTL_SEC", 60)) * time.Second + if cacheTTL <= 0 { + cacheTTL = 60 * time.Second + } + + s3Bucket := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_BUCKET")) + s3Region := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_REGION")) + s3Endpoint := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_ENDPOINT")) + s3PublicURL := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_PUBLIC_ENDPOINT")) + s3AccessKey := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_ACCESS_KEY")) + s3SecretKey := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_SECRET_KEY")) + s3SessionToken := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_SESSION_TOKEN")) + forcePathStyle := envBoolDefault("KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE", s3Endpoint != "") + s3EnsureBucket := envBoolDefault("KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET", false) + maxBlob := envInt64("KAFSCALE_LFS_PROXY_MAX_BLOB_SIZE", defaultMaxBlob) + chunkSize := envInt64("KAFSCALE_LFS_PROXY_CHUNK_SIZE", defaultChunkSize) + proxyID := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_ID")) + s3Namespace := envOrDefault("KAFSCALE_S3_NAMESPACE", "default") + dialTimeout := time.Duration(envInt("KAFSCALE_LFS_PROXY_DIAL_TIMEOUT_MS", defaultDialTimeoutMs)) * time.Millisecond + s3HealthInterval := time.Duration(envInt("KAFSCALE_LFS_PROXY_S3_HEALTH_INTERVAL_SEC", defaultS3HealthIntervalSec)) * time.Second + httpReadTimeout := time.Duration(envInt("KAFSCALE_LFS_PROXY_HTTP_READ_TIMEOUT_SEC", defaultHTTPReadTimeoutSec)) * time.Second + httpWriteTimeout := time.Duration(envInt("KAFSCALE_LFS_PROXY_HTTP_WRITE_TIMEOUT_SEC", defaultHTTPWriteTimeoutSec)) * time.Second + httpIdleTimeout := time.Duration(envInt("KAFSCALE_LFS_PROXY_HTTP_IDLE_TIMEOUT_SEC", defaultHTTPIdleTimeoutSec)) * time.Second + httpHeaderTimeout := time.Duration(envInt("KAFSCALE_LFS_PROXY_HTTP_HEADER_TIMEOUT_SEC", defaultHTTPHeaderTimeoutSec)) * time.Second + httpMaxHeaderBytes := envInt("KAFSCALE_LFS_PROXY_HTTP_MAX_HEADER_BYTES", defaultHTTPMaxHeaderBytes) + httpShutdownTimeout := time.Duration(envInt("KAFSCALE_LFS_PROXY_HTTP_SHUTDOWN_TIMEOUT_SEC", defaultHTTPShutdownTimeoutSec)) * time.Second + uploadSessionTTL := time.Duration(envInt("KAFSCALE_LFS_PROXY_UPLOAD_SESSION_TTL_SEC", defaultUploadSessionTTLSec)) * time.Second + topicMaxLength := envInt("KAFSCALE_LFS_PROXY_TOPIC_MAX_LENGTH", defaultTopicMaxLength) + downloadTTLSec := envInt("KAFSCALE_LFS_PROXY_DOWNLOAD_TTL_SEC", defaultDownloadTTLSec) + if downloadTTLSec <= 0 { + downloadTTLSec = defaultDownloadTTLSec + } + checksumAlg := envOrDefault("KAFSCALE_LFS_PROXY_CHECKSUM_ALGO", "sha256") + backendTLSConfig, err := buildBackendTLSConfig() + if err != nil { + logger.Error("backend tls config failed", "error", err) + os.Exit(1) + } + backendSASLMechanism := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_SASL_MECHANISM")) + backendSASLUsername := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_SASL_USERNAME")) + backendSASLPassword := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_SASL_PASSWORD")) + httpTLSConfig, httpTLSCertFile, httpTLSKeyFile, err := buildHTTPServerTLSConfig() + if err != nil { + logger.Error("http tls config failed", "error", err) + os.Exit(1) + } + + store, err := buildMetadataStore(ctx) + if err != nil { + logger.Error("metadata store init failed", "error", err) + os.Exit(1) + } + if store == nil { + logger.Error("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS not set; proxy cannot build metadata responses") + os.Exit(1) + } + + if advertisedHost == "" { + logger.Warn("KAFSCALE_LFS_PROXY_ADVERTISED_HOST not set; clients may not resolve the proxy address") + } + + s3Uploader, err := newS3Uploader(ctx, s3Config{ + Bucket: s3Bucket, + Region: s3Region, + Endpoint: s3Endpoint, + PublicEndpoint: s3PublicURL, + AccessKeyID: s3AccessKey, + SecretAccessKey: s3SecretKey, + SessionToken: s3SessionToken, + ForcePathStyle: forcePathStyle, + ChunkSize: chunkSize, + }) + if err != nil { + logger.Error("s3 client init failed", "error", err) + os.Exit(1) + } + if s3EnsureBucket { + if err := s3Uploader.EnsureBucket(ctx); err != nil { + logger.Error("s3 bucket ensure failed", "error", err) + } + } + + metrics := newLfsMetrics() + + // LFS Ops Tracker configuration + trackerEnabled := envBoolDefault("KAFSCALE_LFS_TRACKER_ENABLED", true) + trackerTopic := envOrDefault("KAFSCALE_LFS_TRACKER_TOPIC", defaultTrackerTopic) + trackerBatchSize := envInt("KAFSCALE_LFS_TRACKER_BATCH_SIZE", defaultTrackerBatchSize) + trackerFlushMs := envInt("KAFSCALE_LFS_TRACKER_FLUSH_MS", defaultTrackerFlushMs) + trackerEnsureTopic := envBoolDefault("KAFSCALE_LFS_TRACKER_ENSURE_TOPIC", true) + trackerPartitions := envInt("KAFSCALE_LFS_TRACKER_PARTITIONS", defaultTrackerPartitions) + trackerReplication := envInt("KAFSCALE_LFS_TRACKER_REPLICATION_FACTOR", defaultTrackerReplication) + + trackerCfg := TrackerConfig{ + Enabled: trackerEnabled, + Topic: trackerTopic, + Brokers: backends, + BatchSize: trackerBatchSize, + FlushMs: trackerFlushMs, + ProxyID: proxyID, + EnsureTopic: trackerEnsureTopic, + Partitions: trackerPartitions, + ReplicationFactor: trackerReplication, + } + + tracker, err := NewLfsOpsTracker(ctx, trackerCfg, logger) + if err != nil { + logger.Warn("lfs ops tracker init failed, continuing without tracker", "error", err) + tracker = &LfsOpsTracker{config: trackerCfg, logger: logger} + } + + p := &lfsProxy{ + addr: addr, + advertisedHost: advertisedHost, + advertisedPort: advertisedPort, + store: store, + backends: backends, + logger: logger, + dialTimeout: dialTimeout, + cacheTTL: cacheTTL, + apiVersions: generateProxyApiVersions(), + metrics: metrics, + s3Uploader: s3Uploader, + s3Bucket: s3Bucket, + s3Namespace: s3Namespace, + maxBlob: maxBlob, + chunkSize: chunkSize, + proxyID: proxyID, + httpAPIKey: httpAPIKey, + httpReadTimeout: httpReadTimeout, + httpWriteTimeout: httpWriteTimeout, + httpIdleTimeout: httpIdleTimeout, + httpHeaderTimeout: httpHeaderTimeout, + httpMaxHeaderBytes: httpMaxHeaderBytes, + httpShutdownTimeout: httpShutdownTimeout, + topicMaxLength: topicMaxLength, + downloadTTLMax: time.Duration(downloadTTLSec) * time.Second, + checksumAlg: checksumAlg, + backendTLSConfig: backendTLSConfig, + backendSASLMechanism: backendSASLMechanism, + backendSASLUsername: backendSASLUsername, + backendSASLPassword: backendSASLPassword, + httpTLSConfig: httpTLSConfig, + httpTLSCertFile: httpTLSCertFile, + httpTLSKeyFile: httpTLSKeyFile, + tracker: tracker, + uploadSessionTTL: uploadSessionTTL, + uploadSessions: make(map[string]*uploadSession), + } + if len(backends) > 0 { + p.setCachedBackends(backends) + p.touchHealthy() + p.setReady(true) + } + p.markS3Healthy(true) + p.startBackendRefresh(ctx, backendBackoff, backendRefreshInterval) + p.startS3HealthCheck(ctx, s3HealthInterval) + if healthAddr != "" { + p.startHealthServer(ctx, healthAddr) + } + if metricsAddr != "" { + p.startMetricsServer(ctx, metricsAddr) + } + if httpAddr != "" { + p.startHTTPServer(ctx, httpAddr) + } + if err := p.listenAndServe(ctx); err != nil && !errors.Is(err, context.Canceled) { + logger.Error("lfs proxy server error", "error", err) + os.Exit(1) + } + + // Graceful shutdown of tracker + if p.tracker != nil { + if err := p.tracker.Close(); err != nil { + logger.Warn("tracker close error", "error", err) + } + } +} + +func envOrDefault(key, fallback string) string { + if val := os.Getenv(key); val != "" { + return val + } + return fallback +} + +func envPort(key string, fallback int32) int32 { + val := strings.TrimSpace(os.Getenv(key)) + if val == "" { + return fallback + } + parsed, err := strconv.ParseInt(val, 10, 32) + if err != nil || parsed <= 0 { + return fallback + } + return int32(parsed) +} + +func envInt(key string, fallback int) int { + val := strings.TrimSpace(os.Getenv(key)) + if val == "" { + return fallback + } + parsed, err := strconv.Atoi(val) + if err != nil { + return fallback + } + return parsed +} + +func envInt64(key string, fallback int64) int64 { + val := strings.TrimSpace(os.Getenv(key)) + if val == "" { + return fallback + } + parsed, err := strconv.ParseInt(val, 10, 64) + if err != nil { + return fallback + } + return parsed +} + +func envBoolDefault(key string, fallback bool) bool { + val := strings.TrimSpace(os.Getenv(key)) + if val == "" { + return fallback + } + switch strings.ToLower(val) { + case "1", "true", "yes", "y", "on": + return true + case "0", "false", "no", "n", "off": + return false + default: + return fallback + } +} + +func portFromAddr(addr string, fallback int32) int32 { + _, portStr, err := net.SplitHostPort(addr) + if err != nil { + return fallback + } + port, err := strconv.ParseInt(portStr, 10, 32) + if err != nil || port <= 0 || port > 65535 { + return fallback + } + return int32(port) +} + +func splitCSV(raw string) []string { + if strings.TrimSpace(raw) == "" { + return nil + } + parts := strings.Split(raw, ",") + out := make([]string, 0, len(parts)) + for _, part := range parts { + val := strings.TrimSpace(part) + if val != "" { + out = append(out, val) + } + } + return out +} + +func buildMetadataStore(ctx context.Context) (metadata.Store, error) { + cfg, ok := proxyEtcdConfigFromEnv() + if !ok { + return nil, nil + } + return metadata.NewEtcdStore(ctx, metadata.ClusterMetadata{}, cfg) +} + +func proxyEtcdConfigFromEnv() (metadata.EtcdStoreConfig, bool) { + endpoints := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS")) + if endpoints == "" { + return metadata.EtcdStoreConfig{}, false + } + return metadata.EtcdStoreConfig{ + Endpoints: strings.Split(endpoints, ","), + Username: os.Getenv("KAFSCALE_LFS_PROXY_ETCD_USERNAME"), + Password: os.Getenv("KAFSCALE_LFS_PROXY_ETCD_PASSWORD"), + }, true +} + +func (p *lfsProxy) startMetricsServer(ctx context.Context, addr string) { + mux := http.NewServeMux() + mux.HandleFunc("/metrics", func(w http.ResponseWriter, _ *http.Request) { + p.metrics.WritePrometheus(w) + }) + srv := &http.Server{ + Addr: addr, + Handler: mux, + ReadTimeout: p.httpReadTimeout, + WriteTimeout: p.httpWriteTimeout, + IdleTimeout: p.httpIdleTimeout, + ReadHeaderTimeout: p.httpHeaderTimeout, + MaxHeaderBytes: p.httpMaxHeaderBytes, + } + go func() { + <-ctx.Done() + shutdownCtx, cancel := context.WithTimeout(context.Background(), p.httpShutdownTimeout) + defer cancel() + _ = srv.Shutdown(shutdownCtx) + }() + go func() { + p.logger.Info("lfs proxy metrics listening", "addr", addr) + if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { + p.logger.Warn("lfs proxy metrics server error", "error", err) + } + }() +} diff --git a/cmd/lfs-proxy/metrics.go b/cmd/lfs-proxy/metrics.go new file mode 100644 index 00000000..a24c3030 --- /dev/null +++ b/cmd/lfs-proxy/metrics.go @@ -0,0 +1,221 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "io" + "runtime" + "sort" + "sync" + "sync/atomic" +) + +type lfsMetrics struct { + uploadDuration *histogram + uploadBytes uint64 + s3Errors uint64 + orphans uint64 + mu sync.Mutex + requests map[string]*topicCounters +} + +func newLfsMetrics() *lfsMetrics { + buckets := []float64{0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30} + return &lfsMetrics{ + uploadDuration: newHistogram(buckets), + requests: make(map[string]*topicCounters), + } +} + +func (m *lfsMetrics) ObserveUploadDuration(seconds float64) { + if m == nil || m.uploadDuration == nil { + return + } + m.uploadDuration.Observe(seconds) +} + +func (m *lfsMetrics) AddUploadBytes(n int64) { + if m == nil || n <= 0 { + return + } + atomic.AddUint64(&m.uploadBytes, uint64(n)) +} + +func (m *lfsMetrics) IncRequests(topic, status, typ string) { + if m == nil { + return + } + if topic == "" { + topic = "unknown" + } + m.mu.Lock() + counters := m.requests[topic] + if counters == nil { + counters = &topicCounters{} + m.requests[topic] = counters + } + m.mu.Unlock() + switch { + case status == "ok" && typ == "lfs": + atomic.AddUint64(&counters.okLfs, 1) + case status == "error" && typ == "lfs": + atomic.AddUint64(&counters.errLfs, 1) + case status == "ok" && typ == "passthrough": + atomic.AddUint64(&counters.okPas, 1) + case status == "error" && typ == "passthrough": + atomic.AddUint64(&counters.errPas, 1) + } +} + +func (m *lfsMetrics) IncS3Errors() { + if m == nil { + return + } + atomic.AddUint64(&m.s3Errors, 1) +} + +func (m *lfsMetrics) IncOrphans(count int) { + if m == nil || count <= 0 { + return + } + atomic.AddUint64(&m.orphans, uint64(count)) +} + +func (m *lfsMetrics) WritePrometheus(w io.Writer) { + if m == nil { + return + } + m.uploadDuration.WritePrometheus(w, "kafscale_lfs_proxy_upload_duration_seconds", "LFS proxy upload durations in seconds") + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_upload_bytes_total Total bytes uploaded via LFS\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_upload_bytes_total counter\n") + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_upload_bytes_total %d\n", atomic.LoadUint64(&m.uploadBytes)) + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_requests_total LFS proxy requests\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_requests_total counter\n") + topics := m.snapshotTopics() + for _, topic := range topics { + counters := m.requests[topic] + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_requests_total{topic=\"%s\",status=\"ok\",type=\"lfs\"} %d\n", topic, atomic.LoadUint64(&counters.okLfs)) + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_requests_total{topic=\"%s\",status=\"error\",type=\"lfs\"} %d\n", topic, atomic.LoadUint64(&counters.errLfs)) + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_requests_total{topic=\"%s\",status=\"ok\",type=\"passthrough\"} %d\n", topic, atomic.LoadUint64(&counters.okPas)) + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_requests_total{topic=\"%s\",status=\"error\",type=\"passthrough\"} %d\n", topic, atomic.LoadUint64(&counters.errPas)) + } + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_s3_errors_total Total S3 errors\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_s3_errors_total counter\n") + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_s3_errors_total %d\n", atomic.LoadUint64(&m.s3Errors)) + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_orphan_objects_total LFS objects uploaded but not committed to Kafka\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_orphan_objects_total counter\n") + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_orphan_objects_total %d\n", atomic.LoadUint64(&m.orphans)) + + // Runtime metrics + var memStats runtime.MemStats + runtime.ReadMemStats(&memStats) + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_goroutines Number of goroutines\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_goroutines gauge\n") + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_goroutines %d\n", runtime.NumGoroutine()) + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_memory_alloc_bytes Bytes allocated and in use\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_memory_alloc_bytes gauge\n") + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_memory_alloc_bytes %d\n", memStats.Alloc) + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_memory_sys_bytes Bytes obtained from system\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_memory_sys_bytes gauge\n") + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_memory_sys_bytes %d\n", memStats.Sys) + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_gc_pause_total_ns Total GC pause time in nanoseconds\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_gc_pause_total_ns counter\n") + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_gc_pause_total_ns %d\n", memStats.PauseTotalNs) +} + +func (m *lfsMetrics) snapshotTopics() []string { + m.mu.Lock() + defer m.mu.Unlock() + out := make([]string, 0, len(m.requests)) + for topic := range m.requests { + out = append(out, topic) + } + sort.Strings(out) + return out +} + +type topicCounters struct { + okLfs uint64 + errLfs uint64 + okPas uint64 + errPas uint64 +} + +type histogram struct { + mu sync.Mutex + buckets []float64 + counts []int64 + sum float64 + count int64 +} + +func newHistogram(buckets []float64) *histogram { + if len(buckets) == 0 { + buckets = []float64{1, 2, 5, 10, 25, 50, 100} + } + cp := append([]float64(nil), buckets...) + sort.Float64s(cp) + return &histogram{ + buckets: cp, + counts: make([]int64, len(cp)+1), + } +} + +func (h *histogram) Observe(value float64) { + if h == nil { + return + } + h.mu.Lock() + defer h.mu.Unlock() + h.sum += value + h.count++ + idx := sort.SearchFloat64s(h.buckets, value) + h.counts[idx]++ +} + +func (h *histogram) Snapshot() ([]float64, []int64, float64, int64) { + if h == nil { + return nil, nil, 0, 0 + } + h.mu.Lock() + defer h.mu.Unlock() + buckets := append([]float64(nil), h.buckets...) + counts := append([]int64(nil), h.counts...) + return buckets, counts, h.sum, h.count +} + +func (h *histogram) WritePrometheus(w io.Writer, name, help string) { + if h == nil { + return + } + buckets, counts, sum, count := h.Snapshot() + _, _ = fmt.Fprintf(w, "# HELP %s %s\n", name, help) + _, _ = fmt.Fprintf(w, "# TYPE %s histogram\n", name) + var cumulative int64 + for i, upper := range buckets { + cumulative += counts[i] + _, _ = fmt.Fprintf(w, "%s_bucket{le=%q} %d\n", name, formatFloat(upper), cumulative) + } + cumulative += counts[len(counts)-1] + _, _ = fmt.Fprintf(w, "%s_bucket{le=\"+Inf\"} %d\n", name, cumulative) + _, _ = fmt.Fprintf(w, "%s_sum %f\n", name, sum) + _, _ = fmt.Fprintf(w, "%s_count %d\n", name, count) +} + +func formatFloat(val float64) string { + return fmt.Sprintf("%g", val) +} diff --git a/cmd/lfs-proxy/openapi.yaml b/cmd/lfs-proxy/openapi.yaml new file mode 100644 index 00000000..065ad0e1 --- /dev/null +++ b/cmd/lfs-proxy/openapi.yaml @@ -0,0 +1,433 @@ +# Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +openapi: 3.0.3 +info: + title: KafScale LFS Proxy HTTP API + version: 1.0.0 + description: | + The KafScale LFS (Large File Support) Proxy provides HTTP endpoints for producing + large binary objects to Kafka via S3-backed storage. Instead of sending large payloads + directly through Kafka, clients upload blobs to S3 and receive an envelope (pointer) + that is stored in Kafka. + + ## Authentication + + When API key authentication is enabled (via `KAFSCALE_LFS_PROXY_HTTP_API_KEY`), + requests must include one of: + - `X-API-Key` header with the API key + - `Authorization: Bearer ` header + + ## CORS + + The API supports CORS for browser-based clients. Preflight OPTIONS requests are handled automatically. + + ## Request Tracing + + All requests can include an optional `X-Request-ID` header for tracing. If not provided, + the proxy generates one and returns it in the response. + contact: + name: KafScale + url: https://github.com/KafScale/platform + license: + name: Apache 2.0 + url: https://www.apache.org/licenses/LICENSE-2.0 +servers: + - url: http://localhost:8080 + description: Local development + - url: http://lfs-proxy:8080 + description: Kubernetes in-cluster +tags: + - name: LFS + description: Large File Support operations +paths: + /lfs/produce: + post: + tags: + - LFS + summary: Upload and produce an LFS record + description: | + Streams a binary payload to the LFS proxy, which: + 1. Uploads the blob to S3 storage + 2. Computes checksums (SHA256 by default) + 3. Creates an LFS envelope with blob metadata + 4. Produces the envelope to the specified Kafka topic + + The response contains the full LFS envelope that was stored in Kafka. + operationId: lfsProduce + security: + - ApiKeyAuth: [] + - BearerAuth: [] + - {} + parameters: + - in: header + name: X-Kafka-Topic + required: true + schema: + type: string + pattern: '^[a-zA-Z0-9._-]+$' + maxLength: 249 + description: Target Kafka topic name (alphanumeric, dots, underscores, hyphens only) + example: video-uploads + - in: header + name: X-Kafka-Key + required: false + schema: + type: string + description: Base64-encoded Kafka record key for partitioning + example: dXNlci0xMjM= + - in: header + name: X-Kafka-Partition + required: false + schema: + type: integer + format: int32 + minimum: 0 + description: Explicit partition number (overrides key-based partitioning) + example: 0 + - in: header + name: X-LFS-Checksum + required: false + schema: + type: string + description: Expected checksum of the payload for verification + example: abc123def456... + - in: header + name: X-LFS-Checksum-Alg + required: false + schema: + type: string + enum: [sha256, md5, crc32, none] + default: sha256 + description: Checksum algorithm for verification + - in: header + name: X-Request-ID + required: false + schema: + type: string + format: uuid + description: Request correlation ID for tracing + - in: header + name: Content-Type + required: false + schema: + type: string + description: MIME type of the payload (stored in envelope) + example: video/mp4 + requestBody: + required: true + description: Binary payload to upload + content: + application/octet-stream: + schema: + type: string + format: binary + '*/*': + schema: + type: string + format: binary + responses: + "200": + description: LFS envelope successfully produced to Kafka + headers: + X-Request-ID: + schema: + type: string + description: Request correlation ID + content: + application/json: + schema: + $ref: "#/components/schemas/LfsEnvelope" + example: + kfs_lfs: 1 + bucket: kafscale-lfs + key: default/video-uploads/lfs/2026/02/05/abc123 + size: 10485760 + sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + checksum: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + checksum_alg: sha256 + content_type: video/mp4 + created_at: "2026-02-05T10:30:00Z" + proxy_id: lfs-proxy-0 + "400": + description: Invalid request (missing topic, invalid checksum, etc.) + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + examples: + missing_topic: + value: + code: missing_topic + message: missing topic + request_id: abc-123 + checksum_mismatch: + value: + code: checksum_mismatch + message: "expected abc123, got def456" + request_id: abc-123 + "401": + description: Unauthorized - API key required or invalid + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "502": + description: Upstream storage or Kafka failure + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "503": + description: Proxy not ready (backends unavailable) + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + options: + tags: + - LFS + summary: CORS preflight for produce endpoint + description: Handles CORS preflight requests for browser clients + responses: + "204": + description: CORS headers returned + headers: + Access-Control-Allow-Origin: + schema: + type: string + Access-Control-Allow-Methods: + schema: + type: string + Access-Control-Allow-Headers: + schema: + type: string + + /lfs/download: + post: + tags: + - LFS + summary: Download an LFS object + description: | + Retrieves an LFS object from S3 storage. Supports two modes: + + - **presign**: Returns a presigned S3 URL for direct download (default) + - **stream**: Streams the object content through the proxy + + For presign mode, the URL TTL is capped by server configuration. + operationId: lfsDownload + security: + - ApiKeyAuth: [] + - BearerAuth: [] + - {} + parameters: + - in: header + name: X-Request-ID + required: false + schema: + type: string + format: uuid + description: Request correlation ID for tracing + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/DownloadRequest" + examples: + presign: + summary: Get presigned URL + value: + bucket: kafscale-lfs + key: default/video-uploads/lfs/2026/02/05/abc123 + mode: presign + expires_seconds: 300 + stream: + summary: Stream content + value: + bucket: kafscale-lfs + key: default/video-uploads/lfs/2026/02/05/abc123 + mode: stream + responses: + "200": + description: Presigned URL or streamed object content + content: + application/json: + schema: + $ref: "#/components/schemas/DownloadResponse" + example: + mode: presign + url: https://s3.amazonaws.com/kafscale-lfs/... + expires_at: "2026-02-05T10:35:00Z" + application/octet-stream: + schema: + type: string + format: binary + description: Streamed object content (when mode=stream) + "400": + description: Invalid request + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "401": + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "502": + description: Upstream storage failure + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "503": + description: Proxy not ready + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + options: + tags: + - LFS + summary: CORS preflight for download endpoint + responses: + "204": + description: CORS headers returned + +components: + securitySchemes: + ApiKeyAuth: + type: apiKey + in: header + name: X-API-Key + description: API key for authentication + BearerAuth: + type: http + scheme: bearer + description: Bearer token authentication (same API key) + + schemas: + LfsEnvelope: + type: object + description: LFS envelope containing blob metadata and S3 location + properties: + kfs_lfs: + type: integer + format: int32 + description: LFS envelope version + example: 1 + bucket: + type: string + description: S3 bucket name + example: kafscale-lfs + key: + type: string + description: S3 object key + example: default/video-uploads/lfs/2026/02/05/abc123 + size: + type: integer + format: int64 + description: Blob size in bytes + example: 10485760 + sha256: + type: string + description: SHA256 hash of the blob + example: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + checksum: + type: string + description: Checksum value (algorithm depends on checksum_alg) + checksum_alg: + type: string + description: Checksum algorithm used + enum: [sha256, md5, crc32, none] + example: sha256 + content_type: + type: string + description: MIME type of the blob + example: video/mp4 + created_at: + type: string + format: date-time + description: Timestamp when the blob was created + example: "2026-02-05T10:30:00Z" + proxy_id: + type: string + description: ID of the proxy instance that handled the upload + example: lfs-proxy-0 + + DownloadRequest: + type: object + required: [bucket, key] + description: Request to download an LFS object + properties: + bucket: + type: string + description: S3 bucket name (must match proxy's configured bucket) + example: kafscale-lfs + key: + type: string + description: S3 object key from the LFS envelope + example: default/video-uploads/lfs/2026/02/05/abc123 + mode: + type: string + enum: [presign, stream] + default: presign + description: | + Download mode: + - presign: Return a presigned URL for direct S3 download + - stream: Stream content through the proxy + expires_seconds: + type: integer + format: int32 + default: 120 + minimum: 1 + maximum: 3600 + description: Requested presign URL TTL in seconds (capped by server) + + DownloadResponse: + type: object + description: Response for presign download mode + properties: + mode: + type: string + enum: [presign] + description: Download mode used + url: + type: string + format: uri + description: Presigned S3 URL for direct download + expires_at: + type: string + format: date-time + description: URL expiration timestamp + + ErrorResponse: + type: object + description: Error response returned for all error conditions + properties: + code: + type: string + description: Machine-readable error code + example: missing_topic + message: + type: string + description: Human-readable error message + example: missing topic + request_id: + type: string + description: Request correlation ID for support/debugging + example: abc-123-def-456 diff --git a/cmd/lfs-proxy/record.go b/cmd/lfs-proxy/record.go new file mode 100644 index 00000000..447da53f --- /dev/null +++ b/cmd/lfs-proxy/record.go @@ -0,0 +1,277 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "encoding/binary" + "errors" + "hash/crc32" + + "github.com/KafScale/platform/pkg/protocol" + "github.com/twmb/franz-go/pkg/kmsg" +) + +type byteWriter struct { + buf []byte +} + +func newByteWriter(capacity int) *byteWriter { + return &byteWriter{buf: make([]byte, 0, capacity)} +} + +func (w *byteWriter) write(b []byte) { + w.buf = append(w.buf, b...) +} + +func (w *byteWriter) Int16(v int16) { + var tmp [2]byte + binary.BigEndian.PutUint16(tmp[:], uint16(v)) + w.write(tmp[:]) +} + +func (w *byteWriter) Int32(v int32) { + var tmp [4]byte + binary.BigEndian.PutUint32(tmp[:], uint32(v)) + w.write(tmp[:]) +} + +func (w *byteWriter) Int64(v int64) { + var tmp [8]byte + binary.BigEndian.PutUint64(tmp[:], uint64(v)) + w.write(tmp[:]) +} + +func (w *byteWriter) String(v string) { + w.Int16(int16(len(v))) + if len(v) > 0 { + w.write([]byte(v)) + } +} + +func (w *byteWriter) NullableString(v *string) { + if v == nil { + w.Int16(-1) + return + } + w.String(*v) +} + +func (w *byteWriter) CompactString(v string) { + w.compactLength(len(v)) + if len(v) > 0 { + w.write([]byte(v)) + } +} + +func (w *byteWriter) CompactNullableString(v *string) { + if v == nil { + w.compactLength(-1) + return + } + w.CompactString(*v) +} + +func (w *byteWriter) BytesWithLength(b []byte) { + w.Int32(int32(len(b))) + w.write(b) +} + +func (w *byteWriter) CompactBytes(b []byte) { + if b == nil { + w.compactLength(-1) + return + } + w.compactLength(len(b)) + w.write(b) +} + +func (w *byteWriter) UVarint(v uint64) { + var tmp [binary.MaxVarintLen64]byte + n := binary.PutUvarint(tmp[:], v) + w.write(tmp[:n]) +} + +func (w *byteWriter) CompactArrayLen(length int) { + if length < 0 { + w.UVarint(0) + return + } + w.UVarint(uint64(length) + 1) +} + +func (w *byteWriter) WriteTaggedFields(count int) { + if count == 0 { + w.UVarint(0) + return + } + w.UVarint(uint64(count)) +} + +func (w *byteWriter) compactLength(length int) { + if length < 0 { + w.UVarint(0) + return + } + w.UVarint(uint64(length) + 1) +} + +func (w *byteWriter) Bytes() []byte { + return w.buf +} + +func encodeProduceRequest(header *protocol.RequestHeader, req *kmsg.ProduceRequest) ([]byte, error) { + if header == nil || req == nil { + return nil, errors.New("nil header or request") + } + req.SetVersion(header.APIVersion) + // Build header manually (kmsg doesn't handle request headers) + flexible := req.IsFlexible() + w := newByteWriter(0) + w.Int16(header.APIKey) + w.Int16(header.APIVersion) + w.Int32(header.CorrelationID) + w.NullableString(header.ClientID) + if flexible { + w.WriteTaggedFields(0) + } + // Append kmsg-encoded body + body := req.AppendTo(nil) + w.write(body) + return w.Bytes(), nil +} + +func isFlexibleRequest(apiKey, version int16) bool { + switch apiKey { + case protocol.APIKeyApiVersion: + return version >= 3 + case protocol.APIKeyProduce: + return version >= 9 + case protocol.APIKeyMetadata: + return version >= 9 + case protocol.APIKeyFetch: + return version >= 12 + case protocol.APIKeyFindCoordinator: + return version >= 3 + case protocol.APIKeySyncGroup: + return version >= 4 + case protocol.APIKeyHeartbeat: + return version >= 4 + case protocol.APIKeyListGroups: + return version >= 3 + case protocol.APIKeyDescribeGroups: + return version >= 5 + case protocol.APIKeyOffsetForLeaderEpoch: + return version >= 4 + case protocol.APIKeyDescribeConfigs: + return version >= 4 + case protocol.APIKeyAlterConfigs: + return version >= 2 + case protocol.APIKeyCreatePartitions: + return version >= 2 + case protocol.APIKeyDeleteGroups: + return version >= 2 + default: + return false + } +} + +func encodeRecords(records []kmsg.Record) []byte { + if len(records) == 0 { + return nil + } + out := make([]byte, 0, 256) + for _, record := range records { + out = append(out, encodeRecord(record)...) + } + return out +} + +func encodeRecord(record kmsg.Record) []byte { + body := make([]byte, 0, 128) + body = append(body, byte(record.Attributes)) + body = appendVarlong(body, record.TimestampDelta64) + body = appendVarint(body, record.OffsetDelta) + body = appendVarintBytes(body, record.Key) + body = appendVarintBytes(body, record.Value) + body = appendVarint(body, int32(len(record.Headers))) + for _, header := range record.Headers { + body = appendVarintString(body, header.Key) + body = appendVarintBytes(body, header.Value) + } + + cap64 := int64(len(body)) + int64(binary.MaxVarintLen32) + out := make([]byte, 0, cap64) + out = appendVarint(out, int32(len(body))) + out = append(out, body...) + return out +} + +func appendVarint(dst []byte, v int32) []byte { + var tmp [binary.MaxVarintLen32]byte + n := binary.PutVarint(tmp[:], int64(v)) + return append(dst, tmp[:n]...) +} + +func appendVarlong(dst []byte, v int64) []byte { + var tmp [binary.MaxVarintLen64]byte + n := binary.PutVarint(tmp[:], v) + return append(dst, tmp[:n]...) +} + +func appendVarintBytes(dst []byte, b []byte) []byte { + if b == nil { + dst = appendVarint(dst, -1) + return dst + } + dst = appendVarint(dst, int32(len(b))) + return append(dst, b...) +} + +func appendVarintString(dst []byte, s string) []byte { + dst = appendVarint(dst, int32(len(s))) + return append(dst, s...) +} + +func varint(buf []byte) (int32, int) { + val, n := binary.Varint(buf) + if n <= 0 { + return 0, 0 + } + return int32(val), n +} + +func buildRecordBatch(records []kmsg.Record) []byte { + encoded := encodeRecords(records) + batch := kmsg.RecordBatch{ + FirstOffset: 0, + PartitionLeaderEpoch: -1, + Magic: 2, + Attributes: 0, + LastOffsetDelta: int32(len(records) - 1), + FirstTimestamp: 0, + MaxTimestamp: 0, + ProducerID: -1, + ProducerEpoch: -1, + FirstSequence: 0, + NumRecords: int32(len(records)), + Records: encoded, + } + batchBytes := batch.AppendTo(nil) + batch.Length = int32(len(batchBytes) - 12) + batchBytes = batch.AppendTo(nil) + batch.CRC = int32(crc32.Checksum(batchBytes[21:], crc32cTable)) + return batch.AppendTo(nil) +} diff --git a/cmd/lfs-proxy/s3.go b/cmd/lfs-proxy/s3.go new file mode 100644 index 00000000..a66bc449 --- /dev/null +++ b/cmd/lfs-proxy/s3.go @@ -0,0 +1,582 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "io" + "time" + + "github.com/KafScale/platform/pkg/lfs" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/aws/signer/v4" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/aws/smithy-go" +) + +const minMultipartChunkSize int64 = 5 * 1024 * 1024 + +type s3Config struct { + Bucket string + Region string + Endpoint string + PublicEndpoint string + AccessKeyID string + SecretAccessKey string + SessionToken string + ForcePathStyle bool + ChunkSize int64 +} + +type s3API interface { + CreateMultipartUpload(ctx context.Context, params *s3.CreateMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CreateMultipartUploadOutput, error) + UploadPart(ctx context.Context, params *s3.UploadPartInput, optFns ...func(*s3.Options)) (*s3.UploadPartOutput, error) + CompleteMultipartUpload(ctx context.Context, params *s3.CompleteMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CompleteMultipartUploadOutput, error) + AbortMultipartUpload(ctx context.Context, params *s3.AbortMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.AbortMultipartUploadOutput, error) + PutObject(ctx context.Context, params *s3.PutObjectInput, optFns ...func(*s3.Options)) (*s3.PutObjectOutput, error) + GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) + DeleteObject(ctx context.Context, params *s3.DeleteObjectInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectOutput, error) + HeadBucket(ctx context.Context, params *s3.HeadBucketInput, optFns ...func(*s3.Options)) (*s3.HeadBucketOutput, error) + CreateBucket(ctx context.Context, params *s3.CreateBucketInput, optFns ...func(*s3.Options)) (*s3.CreateBucketOutput, error) +} + +type s3PresignAPI interface { + PresignGetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.PresignOptions)) (*v4.PresignedHTTPRequest, error) +} + +type s3Uploader struct { + bucket string + region string + chunkSize int64 + api s3API + presign s3PresignAPI +} + +func normalizeChunkSize(chunk int64) int64 { + if chunk <= 0 { + chunk = defaultChunkSize + } + if chunk < minMultipartChunkSize { + chunk = minMultipartChunkSize + } + return chunk +} + +func newS3Uploader(ctx context.Context, cfg s3Config) (*s3Uploader, error) { + if cfg.Bucket == "" { + return nil, errors.New("s3 bucket required") + } + if cfg.Region == "" { + return nil, errors.New("s3 region required") + } + cfg.ChunkSize = normalizeChunkSize(cfg.ChunkSize) + + loadOpts := []func(*config.LoadOptions) error{ + config.WithRegion(cfg.Region), + } + if cfg.AccessKeyID != "" && cfg.SecretAccessKey != "" { + loadOpts = append(loadOpts, config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(cfg.AccessKeyID, cfg.SecretAccessKey, cfg.SessionToken))) + } + awsCfg, err := config.LoadDefaultConfig(ctx, loadOpts...) + if err != nil { + return nil, fmt.Errorf("load aws config: %w", err) + } + client := s3.NewFromConfig(awsCfg, func(o *s3.Options) { + if cfg.Endpoint != "" { + o.BaseEndpoint = aws.String(cfg.Endpoint) + } + o.UsePathStyle = cfg.ForcePathStyle + }) + presignEndpoint := cfg.Endpoint + if cfg.PublicEndpoint != "" { + presignEndpoint = cfg.PublicEndpoint + } + presignClient := s3.NewFromConfig(awsCfg, func(o *s3.Options) { + if presignEndpoint != "" { + o.BaseEndpoint = aws.String(presignEndpoint) + } + o.UsePathStyle = cfg.ForcePathStyle + }) + presigner := s3.NewPresignClient(presignClient) + + return &s3Uploader{ + bucket: cfg.Bucket, + region: cfg.Region, + chunkSize: cfg.ChunkSize, + api: client, + presign: presigner, + }, nil +} + +func (u *s3Uploader) PresignGetObject(ctx context.Context, key string, ttl time.Duration) (string, error) { + if key == "" { + return "", errors.New("s3 key required") + } + if u.presign == nil { + return "", errors.New("presign client not configured") + } + out, err := u.presign.PresignGetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + }, func(opts *s3.PresignOptions) { + opts.Expires = ttl + }) + if err != nil { + return "", err + } + return out.URL, nil +} + +func (u *s3Uploader) GetObject(ctx context.Context, key string) (*s3.GetObjectOutput, error) { + if key == "" { + return nil, errors.New("s3 key required") + } + return u.api.GetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + }) +} + +func (u *s3Uploader) HeadBucket(ctx context.Context) error { + _, err := u.api.HeadBucket(ctx, &s3.HeadBucketInput{Bucket: aws.String(u.bucket)}) + if err == nil { + return nil + } + return err +} + +func (u *s3Uploader) EnsureBucket(ctx context.Context) error { + if err := u.HeadBucket(ctx); err == nil { + return nil + } + input := &s3.CreateBucketInput{Bucket: aws.String(u.bucket)} + if u.region != "" && u.region != "us-east-1" { + input.CreateBucketConfiguration = &types.CreateBucketConfiguration{LocationConstraint: types.BucketLocationConstraint(u.region)} + } + _, err := u.api.CreateBucket(ctx, input) + if err != nil { + var apiErr smithy.APIError + if errors.As(err, &apiErr) { + switch apiErr.ErrorCode() { + case "BucketAlreadyOwnedByYou", "BucketAlreadyExists": + return nil + } + } + return fmt.Errorf("create bucket %s: %w", u.bucket, err) + } + return nil +} + +func (u *s3Uploader) Upload(ctx context.Context, key string, payload []byte, alg lfs.ChecksumAlg) (string, string, string, error) { + if key == "" { + return "", "", "", errors.New("s3 key required") + } + shaHasher := sha256.New() + if _, err := shaHasher.Write(payload); err != nil { + return "", "", "", err + } + shaHex := hex.EncodeToString(shaHasher.Sum(nil)) + + checksumAlg := alg + if checksumAlg == "" { + checksumAlg = lfs.ChecksumSHA256 + } + var checksum string + if checksumAlg != lfs.ChecksumNone { + if checksumAlg == lfs.ChecksumSHA256 { + checksum = shaHex + } else { + computed, err := lfs.ComputeChecksum(checksumAlg, payload) + if err != nil { + return "", "", "", err + } + checksum = computed + } + } + + size := int64(len(payload)) + if size <= u.chunkSize { + _, err := u.api.PutObject(ctx, &s3.PutObjectInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + Body: bytes.NewReader(payload), + ContentLength: aws.Int64(size), + }) + return shaHex, checksum, string(checksumAlg), err + } + return shaHex, checksum, string(checksumAlg), u.multipartUpload(ctx, key, payload) +} + +func (u *s3Uploader) UploadStream(ctx context.Context, key string, reader io.Reader, maxSize int64, alg lfs.ChecksumAlg) (string, string, string, int64, error) { + if key == "" { + return "", "", "", 0, errors.New("s3 key required") + } + if reader == nil { + return "", "", "", 0, errors.New("reader required") + } + u.chunkSize = normalizeChunkSize(u.chunkSize) + + checksumAlg := alg + if checksumAlg == "" { + checksumAlg = lfs.ChecksumSHA256 + } + + // Read first chunk to determine if we need multipart upload + firstBuf := make([]byte, u.chunkSize) + firstN, firstErr := io.ReadFull(reader, firstBuf) + if firstErr != nil && firstErr != io.EOF && firstErr != io.ErrUnexpectedEOF { + return "", "", "", 0, firstErr + } + if firstN == 0 { + return "", "", "", 0, errors.New("empty upload") + } + + firstReadHitEOF := firstErr == io.EOF || firstErr == io.ErrUnexpectedEOF + + // If data fits in one chunk and is smaller than minMultipartChunkSize, use PutObject + if firstReadHitEOF && int64(firstN) < minMultipartChunkSize { + data := firstBuf[:firstN] + shaHasher := sha256.New() + shaHasher.Write(data) + shaHex := hex.EncodeToString(shaHasher.Sum(nil)) + + checksum := "" + if checksumAlg != lfs.ChecksumNone { + if checksumAlg == lfs.ChecksumSHA256 { + checksum = shaHex + } else { + computed, err := lfs.ComputeChecksum(checksumAlg, data) + if err != nil { + return "", "", "", 0, err + } + checksum = computed + } + } + + _, err := u.api.PutObject(ctx, &s3.PutObjectInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + Body: bytes.NewReader(data), + ContentLength: aws.Int64(int64(firstN)), + }) + if err != nil { + return "", "", "", 0, fmt.Errorf("put object: %w", err) + } + return shaHex, checksum, string(checksumAlg), int64(firstN), nil + } + + // Use multipart upload for larger files + createResp, err := u.api.CreateMultipartUpload(ctx, &s3.CreateMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + }) + if err != nil { + return "", "", "", 0, fmt.Errorf("create multipart upload: %w", err) + } + uploadID := createResp.UploadId + if uploadID == nil { + return "", "", "", 0, errors.New("missing upload id") + } + + shaHasher := sha256.New() + var checksumHasher interface { + Write([]byte) (int, error) + Sum([]byte) []byte + } + if checksumAlg != lfs.ChecksumNone { + if checksumAlg == lfs.ChecksumSHA256 { + checksumHasher = shaHasher + } else { + h, err := lfs.NewChecksumHasher(checksumAlg) + if err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", 0, err + } + checksumHasher = h + } + } + parts := make([]types.CompletedPart, 0, 4) + partNum := int32(1) + var total int64 + + // Upload first chunk + total += int64(firstN) + if maxSize > 0 && total > maxSize { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, fmt.Errorf("blob size %d exceeds max %d", total, maxSize) + } + shaHasher.Write(firstBuf[:firstN]) + if checksumHasher != nil && checksumHasher != shaHasher { + _, _ = checksumHasher.Write(firstBuf[:firstN]) + } + partResp, err := u.api.UploadPart(ctx, &s3.UploadPartInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: uploadID, + PartNumber: aws.Int32(partNum), + Body: bytes.NewReader(firstBuf[:firstN]), + }) + if err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, fmt.Errorf("upload part %d: %w", partNum, err) + } + parts = append(parts, types.CompletedPart{ETag: partResp.ETag, PartNumber: aws.Int32(partNum)}) + partNum++ + + // Continue reading remaining chunks + buf := make([]byte, u.chunkSize) + for { + n, readErr := io.ReadFull(reader, buf) + if n > 0 { + total += int64(n) + if maxSize > 0 && total > maxSize { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, fmt.Errorf("blob size %d exceeds max %d", total, maxSize) + } + if _, err := shaHasher.Write(buf[:n]); err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, err + } + if checksumHasher != nil && checksumHasher != shaHasher { + if _, err := checksumHasher.Write(buf[:n]); err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, err + } + } + partResp, err := u.api.UploadPart(ctx, &s3.UploadPartInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: uploadID, + PartNumber: aws.Int32(partNum), + Body: bytes.NewReader(buf[:n]), + }) + if err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, fmt.Errorf("upload part %d: %w", partNum, err) + } + parts = append(parts, types.CompletedPart{ETag: partResp.ETag, PartNumber: aws.Int32(partNum)}) + partNum++ + } + if readErr == io.EOF { + break + } + if readErr == io.ErrUnexpectedEOF { + break + } + if readErr != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, readErr + } + } + + _, err = u.api.CompleteMultipartUpload(ctx, &s3.CompleteMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: uploadID, + MultipartUpload: &types.CompletedMultipartUpload{ + Parts: parts, + }, + }) + if err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, fmt.Errorf("complete multipart upload: %w", err) + } + shaHex := hex.EncodeToString(shaHasher.Sum(nil)) + checksum := "" + if checksumAlg != lfs.ChecksumNone { + if checksumAlg == lfs.ChecksumSHA256 { + checksum = shaHex + } else if checksumHasher != nil { + checksum = hex.EncodeToString(checksumHasher.Sum(nil)) + } + } + return shaHex, checksum, string(checksumAlg), total, nil +} + +func (u *s3Uploader) StartMultipartUpload(ctx context.Context, key, contentType string) (string, error) { + if key == "" { + return "", errors.New("s3 key required") + } + input := &s3.CreateMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + } + if contentType != "" { + input.ContentType = aws.String(contentType) + } + resp, err := u.api.CreateMultipartUpload(ctx, input) + if err != nil { + return "", fmt.Errorf("create multipart upload: %w", err) + } + if resp.UploadId == nil || *resp.UploadId == "" { + return "", errors.New("missing upload id") + } + return *resp.UploadId, nil +} + +func (u *s3Uploader) UploadPart(ctx context.Context, key, uploadID string, partNumber int32, payload []byte) (string, error) { + if key == "" { + return "", errors.New("s3 key required") + } + if uploadID == "" { + return "", errors.New("upload id required") + } + resp, err := u.api.UploadPart(ctx, &s3.UploadPartInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: aws.String(uploadID), + PartNumber: aws.Int32(partNumber), + Body: bytes.NewReader(payload), + }) + if err != nil { + return "", fmt.Errorf("upload part %d: %w", partNumber, err) + } + if resp.ETag == nil || *resp.ETag == "" { + return "", errors.New("missing etag") + } + return *resp.ETag, nil +} + +func (u *s3Uploader) CompleteMultipartUpload(ctx context.Context, key, uploadID string, parts []types.CompletedPart) error { + if key == "" { + return errors.New("s3 key required") + } + if uploadID == "" { + return errors.New("upload id required") + } + _, err := u.api.CompleteMultipartUpload(ctx, &s3.CompleteMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: aws.String(uploadID), + MultipartUpload: &types.CompletedMultipartUpload{ + Parts: parts, + }, + }) + if err != nil { + return fmt.Errorf("complete multipart upload: %w", err) + } + return nil +} + +func (u *s3Uploader) AbortMultipartUpload(ctx context.Context, key, uploadID string) error { + if key == "" { + return errors.New("s3 key required") + } + if uploadID == "" { + return errors.New("upload id required") + } + _, err := u.api.AbortMultipartUpload(ctx, &s3.AbortMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: aws.String(uploadID), + }) + return err +} + +func (u *s3Uploader) multipartUpload(ctx context.Context, key string, payload []byte) error { + createResp, err := u.api.CreateMultipartUpload(ctx, &s3.CreateMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + }) + if err != nil { + return fmt.Errorf("create multipart upload: %w", err) + } + uploadID := createResp.UploadId + if uploadID == nil { + return errors.New("missing upload id") + } + + numParts := int64(len(payload))/u.chunkSize + 1 + parts := make([]types.CompletedPart, 0, numParts) + reader := bytes.NewReader(payload) + partNum := int32(1) + buf := make([]byte, u.chunkSize) + for { + n, readErr := io.ReadFull(reader, buf) + if readErr == io.EOF || readErr == io.ErrUnexpectedEOF { + if n == 0 { + break + } + } + if n > 0 { + partResp, err := u.api.UploadPart(ctx, &s3.UploadPartInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: uploadID, + PartNumber: aws.Int32(partNum), + Body: bytes.NewReader(buf[:n]), + }) + if err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return fmt.Errorf("upload part %d: %w", partNum, err) + } + parts = append(parts, types.CompletedPart{ETag: partResp.ETag, PartNumber: aws.Int32(partNum)}) + partNum++ + } + if readErr == io.EOF { + break + } + if readErr != nil && readErr != io.ErrUnexpectedEOF { + _ = u.abortUpload(ctx, key, *uploadID) + return fmt.Errorf("read payload: %w", readErr) + } + if readErr == io.ErrUnexpectedEOF { + break + } + } + + _, err = u.api.CompleteMultipartUpload(ctx, &s3.CompleteMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: uploadID, + MultipartUpload: &types.CompletedMultipartUpload{ + Parts: parts, + }, + }) + if err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return fmt.Errorf("complete multipart upload: %w", err) + } + return nil +} + +func (u *s3Uploader) abortUpload(ctx context.Context, key, uploadID string) error { + _, err := u.api.AbortMultipartUpload(ctx, &s3.AbortMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: aws.String(uploadID), + }) + return err +} + +func (u *s3Uploader) DeleteObject(ctx context.Context, key string) error { + if key == "" { + return errors.New("s3 key required") + } + _, err := u.api.DeleteObject(ctx, &s3.DeleteObjectInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + }) + return err +} diff --git a/cmd/lfs-proxy/sasl_encode.go b/cmd/lfs-proxy/sasl_encode.go new file mode 100644 index 00000000..8729099d --- /dev/null +++ b/cmd/lfs-proxy/sasl_encode.go @@ -0,0 +1,77 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "encoding/binary" + "errors" + "fmt" + "io" + + "github.com/KafScale/platform/pkg/protocol" +) + +func encodeSaslHandshakeRequest(header *protocol.RequestHeader, mechanism string) ([]byte, error) { + if header == nil { + return nil, errors.New("nil header") + } + w := newByteWriter(0) + w.Int16(header.APIKey) + w.Int16(header.APIVersion) + w.Int32(header.CorrelationID) + w.NullableString(header.ClientID) + w.String(mechanism) + return w.Bytes(), nil +} + +func encodeSaslAuthenticateRequest(header *protocol.RequestHeader, authBytes []byte) ([]byte, error) { + if header == nil { + return nil, errors.New("nil header") + } + w := newByteWriter(0) + w.Int16(header.APIKey) + w.Int16(header.APIVersion) + w.Int32(header.CorrelationID) + w.NullableString(header.ClientID) + w.BytesWithLength(authBytes) + return w.Bytes(), nil +} + +func buildSaslPlainAuthBytes(username, password string) []byte { + // PLAIN: 0x00 + username + 0x00 + password + buf := make([]byte, 0, len(username)+len(password)+2) + buf = append(buf, 0) + buf = append(buf, []byte(username)...) + buf = append(buf, 0) + buf = append(buf, []byte(password)...) + return buf +} + +func readSaslResponse(r io.Reader) error { + frame, err := protocol.ReadFrame(r) + if err != nil { + return err + } + if len(frame.Payload) < 6 { + return fmt.Errorf("invalid SASL response length %d", len(frame.Payload)) + } + // First 4 bytes are correlation ID + errorCode := int16(binary.BigEndian.Uint16(frame.Payload[4:6])) + if errorCode != 0 { + return fmt.Errorf("sasl error code %d", errorCode) + } + return nil +} diff --git a/cmd/lfs-proxy/sasl_encode_test.go b/cmd/lfs-proxy/sasl_encode_test.go new file mode 100644 index 00000000..a32baa3d --- /dev/null +++ b/cmd/lfs-proxy/sasl_encode_test.go @@ -0,0 +1,45 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "encoding/binary" + "testing" + + "github.com/KafScale/platform/pkg/protocol" +) + +func TestBuildSaslPlainAuthBytes(t *testing.T) { + got := buildSaslPlainAuthBytes("user", "pass") + want := []byte{0, 'u', 's', 'e', 'r', 0, 'p', 'a', 's', 's'} + if !bytes.Equal(got, want) { + t.Fatalf("auth bytes mismatch: got %v want %v", got, want) + } +} + +func TestReadSaslResponseError(t *testing.T) { + buf := &bytes.Buffer{} + payload := make([]byte, 6) + binary.BigEndian.PutUint32(payload[:4], 1) + binary.BigEndian.PutUint16(payload[4:6], 1) // error code 1 + if err := protocol.WriteFrame(buf, payload); err != nil { + t.Fatalf("write frame: %v", err) + } + if err := readSaslResponse(buf); err == nil { + t.Fatal("expected error") + } +} diff --git a/cmd/lfs-proxy/swagger.go b/cmd/lfs-proxy/swagger.go new file mode 100644 index 00000000..32fb14c3 --- /dev/null +++ b/cmd/lfs-proxy/swagger.go @@ -0,0 +1,73 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + _ "embed" + "net/http" +) + +//go:embed openapi.yaml +var openAPISpec []byte + +const swaggerUIHTML = ` + + + + KafScale LFS Proxy - API Documentation + + + + +
+ + + + + +` + +// handleSwaggerUI serves the Swagger UI HTML page. +func (p *lfsProxy) handleSwaggerUI(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(swaggerUIHTML)) +} + +// handleOpenAPISpec serves the OpenAPI specification YAML file. +func (p *lfsProxy) handleOpenAPISpec(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/yaml") + w.Header().Set("Access-Control-Allow-Origin", "*") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(openAPISpec) +} diff --git a/cmd/lfs-proxy/tracker.go b/cmd/lfs-proxy/tracker.go new file mode 100644 index 00000000..dd7c4e4d --- /dev/null +++ b/cmd/lfs-proxy/tracker.go @@ -0,0 +1,372 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "errors" + "log/slog" + "math" + "sync" + "sync/atomic" + "time" + + "github.com/twmb/franz-go/pkg/kadm" + "github.com/twmb/franz-go/pkg/kerr" + "github.com/twmb/franz-go/pkg/kgo" +) + +const ( + defaultTrackerTopic = "__lfs_ops_state" + defaultTrackerBatchSize = 100 + defaultTrackerFlushMs = 100 + defaultTrackerChanSize = 10000 + defaultTrackerPartitions = 3 + defaultTrackerReplication = 1 +) + +// TrackerConfig holds configuration for the LFS operations tracker. +type TrackerConfig struct { + Enabled bool + Topic string + Brokers []string + BatchSize int + FlushMs int + ProxyID string + EnsureTopic bool + Partitions int + ReplicationFactor int +} + +// LfsOpsTracker tracks LFS operations by emitting events to a Kafka topic. +type LfsOpsTracker struct { + config TrackerConfig + client *kgo.Client + logger *slog.Logger + eventCh chan TrackerEvent + wg sync.WaitGroup + ctx context.Context + cancel context.CancelFunc + + // Circuit breaker state + circuitOpen uint32 + failures uint32 + lastSuccess int64 + failureThreshold uint32 + resetTimeout time.Duration + + // Metrics + eventsEmitted uint64 + eventsDropped uint64 + batchesSent uint64 +} + +// NewLfsOpsTracker creates a new tracker instance. +func NewLfsOpsTracker(ctx context.Context, cfg TrackerConfig, logger *slog.Logger) (*LfsOpsTracker, error) { + if !cfg.Enabled { + logger.Info("lfs ops tracker disabled") + return &LfsOpsTracker{config: cfg, logger: logger}, nil + } + + if cfg.Topic == "" { + cfg.Topic = defaultTrackerTopic + } + if cfg.BatchSize <= 0 { + cfg.BatchSize = defaultTrackerBatchSize + } + if cfg.FlushMs <= 0 { + cfg.FlushMs = defaultTrackerFlushMs + } + if cfg.Partitions <= 0 { + cfg.Partitions = defaultTrackerPartitions + } + if cfg.ReplicationFactor <= 0 { + cfg.ReplicationFactor = defaultTrackerReplication + } + if len(cfg.Brokers) == 0 { + logger.Warn("lfs ops tracker: no brokers configured, tracker disabled") + return &LfsOpsTracker{config: cfg, logger: logger}, nil + } + + opts := []kgo.Opt{ + kgo.SeedBrokers(cfg.Brokers...), + kgo.DefaultProduceTopic(cfg.Topic), + kgo.ProducerBatchMaxBytes(1024 * 1024), // 1MB max batch + kgo.ProducerLinger(time.Duration(cfg.FlushMs) * time.Millisecond), + kgo.RequiredAcks(kgo.LeaderAck()), + kgo.DisableIdempotentWrite(), // Not required for tracking events + } + + client, err := kgo.NewClient(opts...) + if err != nil { + return nil, err + } + + if cfg.EnsureTopic { + if err := ensureTrackerTopic(ctx, client, cfg, logger); err != nil { + logger.Warn("lfs ops tracker: ensure topic failed", "topic", cfg.Topic, "error", err) + } + } + + trackerCtx, cancel := context.WithCancel(ctx) + t := &LfsOpsTracker{ + config: cfg, + client: client, + logger: logger, + eventCh: make(chan TrackerEvent, defaultTrackerChanSize), + ctx: trackerCtx, + cancel: cancel, + failureThreshold: 5, + resetTimeout: 30 * time.Second, + } + + t.wg.Add(1) + go t.runBatcher() + + logger.Info("lfs ops tracker started", "topic", cfg.Topic, "brokers", cfg.Brokers) + return t, nil +} + +// Emit sends a tracker event to the channel for async processing. +func (t *LfsOpsTracker) Emit(event TrackerEvent) { + if t == nil || !t.config.Enabled || t.client == nil { + return + } + + // Check circuit breaker + if atomic.LoadUint32(&t.circuitOpen) == 1 { + // Check if we should try to reset + if time.Now().UnixNano()-atomic.LoadInt64(&t.lastSuccess) > t.resetTimeout.Nanoseconds() { + atomic.StoreUint32(&t.circuitOpen, 0) + atomic.StoreUint32(&t.failures, 0) + t.logger.Info("lfs ops tracker: circuit breaker reset") + } else { + atomic.AddUint64(&t.eventsDropped, 1) + return + } + } + + select { + case t.eventCh <- event: + atomic.AddUint64(&t.eventsEmitted, 1) + default: + // Channel full, drop the event + atomic.AddUint64(&t.eventsDropped, 1) + t.logger.Debug("lfs ops tracker: event dropped, channel full") + } +} + +// runBatcher processes events from the channel and sends them in batches. +func (t *LfsOpsTracker) runBatcher() { + defer t.wg.Done() + + batch := make([]*kgo.Record, 0, t.config.BatchSize) + flushInterval := time.Duration(t.config.FlushMs) * time.Millisecond + ticker := time.NewTicker(flushInterval) + defer ticker.Stop() + + flush := func() { + if len(batch) == 0 { + return + } + + // Produce batch + results := t.client.ProduceSync(t.ctx, batch...) + hasError := false + for _, result := range results { + if result.Err != nil { + hasError = true + t.logger.Warn("lfs ops tracker: produce failed", "error", result.Err) + } + } + + if hasError { + failures := atomic.AddUint32(&t.failures, 1) + if failures >= t.failureThreshold { + atomic.StoreUint32(&t.circuitOpen, 1) + t.logger.Warn("lfs ops tracker: circuit breaker opened", "failures", failures) + } + } else { + atomic.StoreUint32(&t.failures, 0) + atomic.StoreInt64(&t.lastSuccess, time.Now().UnixNano()) + atomic.AddUint64(&t.batchesSent, 1) + } + + batch = batch[:0] + } + + for { + select { + case <-t.ctx.Done(): + flush() + return + + case event := <-t.eventCh: + record, err := t.eventToRecord(event) + if err != nil { + t.logger.Warn("lfs ops tracker: failed to serialize event", "error", err, "type", event.GetEventType()) + continue + } + batch = append(batch, record) + if len(batch) >= t.config.BatchSize { + flush() + } + + case <-ticker.C: + flush() + } + } +} + +// eventToRecord converts a TrackerEvent to a Kafka record. +func (t *LfsOpsTracker) eventToRecord(event TrackerEvent) (*kgo.Record, error) { + value, err := event.Marshal() + if err != nil { + return nil, err + } + + return &kgo.Record{ + Key: []byte(event.GetTopic()), + Value: value, + }, nil +} + +func ensureTrackerTopic(ctx context.Context, client *kgo.Client, cfg TrackerConfig, logger *slog.Logger) error { + admin := kadm.NewClient(client) + var partitions int32 = defaultTrackerPartitions + if cfg.Partitions > 0 && cfg.Partitions <= math.MaxInt32 { + partitions = int32(cfg.Partitions) //nolint:gosec // bounds checked + } + var replication int16 = defaultTrackerReplication + if cfg.ReplicationFactor > 0 && cfg.ReplicationFactor <= math.MaxInt16 { + replication = int16(cfg.ReplicationFactor) //nolint:gosec // bounds checked + } + responses, err := admin.CreateTopics(ctx, partitions, replication, nil, cfg.Topic) + if err != nil { + return err + } + resp, ok := responses[cfg.Topic] + if !ok { + return errors.New("tracker topic response missing") + } + if resp.Err == nil || errors.Is(resp.Err, kerr.TopicAlreadyExists) { + logger.Info("lfs ops tracker topic ready", "topic", cfg.Topic, "partitions", cfg.Partitions, "replication", cfg.ReplicationFactor) + return nil + } + return resp.Err +} + +// Close gracefully shuts down the tracker. +func (t *LfsOpsTracker) Close() error { + if t == nil || t.client == nil { + return nil + } + + t.cancel() + t.wg.Wait() + t.client.Close() + + t.logger.Info("lfs ops tracker closed", + "events_emitted", atomic.LoadUint64(&t.eventsEmitted), + "events_dropped", atomic.LoadUint64(&t.eventsDropped), + "batches_sent", atomic.LoadUint64(&t.batchesSent), + ) + return nil +} + +// Stats returns tracker statistics. +func (t *LfsOpsTracker) Stats() TrackerStats { + if t == nil { + return TrackerStats{} + } + return TrackerStats{ + Enabled: t.config.Enabled, + Topic: t.config.Topic, + EventsEmitted: atomic.LoadUint64(&t.eventsEmitted), + EventsDropped: atomic.LoadUint64(&t.eventsDropped), + BatchesSent: atomic.LoadUint64(&t.batchesSent), + CircuitOpen: atomic.LoadUint32(&t.circuitOpen) == 1, + } +} + +// TrackerStats holds statistics about the tracker. +type TrackerStats struct { + Enabled bool `json:"enabled"` + Topic string `json:"topic"` + EventsEmitted uint64 `json:"events_emitted"` + EventsDropped uint64 `json:"events_dropped"` + BatchesSent uint64 `json:"batches_sent"` + CircuitOpen bool `json:"circuit_open"` +} + +// IsEnabled returns true if the tracker is enabled and ready. +func (t *LfsOpsTracker) IsEnabled() bool { + return t != nil && t.config.Enabled && t.client != nil +} + +// EmitUploadStarted emits an upload started event. +func (t *LfsOpsTracker) EmitUploadStarted(requestID, topic string, partition int32, s3Key, contentType, clientIP, apiType string, expectedSize int64) { + if !t.IsEnabled() { + return + } + event := NewUploadStartedEvent(t.config.ProxyID, requestID, topic, partition, s3Key, contentType, clientIP, apiType, expectedSize) + t.Emit(event) +} + +// EmitUploadCompleted emits an upload completed event. +func (t *LfsOpsTracker) EmitUploadCompleted(requestID, topic string, partition int32, kafkaOffset int64, s3Bucket, s3Key string, size int64, sha256, checksum, checksumAlg, contentType string, duration time.Duration) { + if !t.IsEnabled() { + return + } + event := NewUploadCompletedEvent(t.config.ProxyID, requestID, topic, partition, kafkaOffset, s3Bucket, s3Key, size, sha256, checksum, checksumAlg, contentType, duration.Milliseconds()) + t.Emit(event) +} + +// EmitUploadFailed emits an upload failed event. +func (t *LfsOpsTracker) EmitUploadFailed(requestID, topic, s3Key, errorCode, errorMessage, stage string, sizeUploaded int64, duration time.Duration) { + if !t.IsEnabled() { + return + } + event := NewUploadFailedEvent(t.config.ProxyID, requestID, topic, s3Key, errorCode, errorMessage, stage, sizeUploaded, duration.Milliseconds()) + t.Emit(event) +} + +// EmitDownloadRequested emits a download requested event. +func (t *LfsOpsTracker) EmitDownloadRequested(requestID, s3Bucket, s3Key, mode, clientIP string, ttlSeconds int) { + if !t.IsEnabled() { + return + } + event := NewDownloadRequestedEvent(t.config.ProxyID, requestID, s3Bucket, s3Key, mode, clientIP, ttlSeconds) + t.Emit(event) +} + +// EmitDownloadCompleted emits a download completed event. +func (t *LfsOpsTracker) EmitDownloadCompleted(requestID, s3Key, mode string, duration time.Duration, size int64) { + if !t.IsEnabled() { + return + } + event := NewDownloadCompletedEvent(t.config.ProxyID, requestID, s3Key, mode, duration.Milliseconds(), size) + t.Emit(event) +} + +// EmitOrphanDetected emits an orphan detected event. +func (t *LfsOpsTracker) EmitOrphanDetected(requestID, detectionSource, topic, s3Bucket, s3Key, originalRequestID, reason string, size int64) { + if !t.IsEnabled() { + return + } + event := NewOrphanDetectedEvent(t.config.ProxyID, requestID, detectionSource, topic, s3Bucket, s3Key, originalRequestID, reason, size) + t.Emit(event) +} diff --git a/cmd/lfs-proxy/tracker_test.go b/cmd/lfs-proxy/tracker_test.go new file mode 100644 index 00000000..91441fab --- /dev/null +++ b/cmd/lfs-proxy/tracker_test.go @@ -0,0 +1,383 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "encoding/json" + "log/slog" + "os" + "testing" + "time" +) + +func TestTrackerEventTypes(t *testing.T) { + proxyID := "test-proxy" + requestID := "req-123" + + t.Run("UploadStartedEvent", func(t *testing.T) { + event := NewUploadStartedEvent(proxyID, requestID, "test-topic", 0, "s3/key", "application/json", "127.0.0.1", "http", 1024) + + if event.EventType != EventTypeUploadStarted { + t.Errorf("expected event type %s, got %s", EventTypeUploadStarted, event.EventType) + } + if event.Topic != "test-topic" { + t.Errorf("expected topic test-topic, got %s", event.Topic) + } + if event.ProxyID != proxyID { + t.Errorf("expected proxy ID %s, got %s", proxyID, event.ProxyID) + } + if event.RequestID != requestID { + t.Errorf("expected request ID %s, got %s", requestID, event.RequestID) + } + if event.Version != TrackerEventVersion { + t.Errorf("expected version %d, got %d", TrackerEventVersion, event.Version) + } + + // Test marshaling + data, err := event.Marshal() + if err != nil { + t.Fatalf("failed to marshal event: %v", err) + } + var decoded UploadStartedEvent + if err := json.Unmarshal(data, &decoded); err != nil { + t.Fatalf("failed to unmarshal event: %v", err) + } + if decoded.Topic != event.Topic { + t.Errorf("decoded topic mismatch: %s vs %s", decoded.Topic, event.Topic) + } + }) + + t.Run("UploadCompletedEvent", func(t *testing.T) { + event := NewUploadCompletedEvent(proxyID, requestID, "test-topic", 0, 42, "bucket", "s3/key", 1024, "sha256hex", "checksum", "sha256", "application/json", 500) + + if event.EventType != EventTypeUploadCompleted { + t.Errorf("expected event type %s, got %s", EventTypeUploadCompleted, event.EventType) + } + if event.KafkaOffset != 42 { + t.Errorf("expected kafka offset 42, got %d", event.KafkaOffset) + } + if event.Size != 1024 { + t.Errorf("expected size 1024, got %d", event.Size) + } + if event.DurationMs != 500 { + t.Errorf("expected duration 500ms, got %d", event.DurationMs) + } + + data, err := event.Marshal() + if err != nil { + t.Fatalf("failed to marshal event: %v", err) + } + var decoded UploadCompletedEvent + if err := json.Unmarshal(data, &decoded); err != nil { + t.Fatalf("failed to unmarshal event: %v", err) + } + }) + + t.Run("UploadFailedEvent", func(t *testing.T) { + event := NewUploadFailedEvent(proxyID, requestID, "test-topic", "s3/key", "s3_error", "connection refused", "s3_upload", 512, 250) + + if event.EventType != EventTypeUploadFailed { + t.Errorf("expected event type %s, got %s", EventTypeUploadFailed, event.EventType) + } + if event.ErrorCode != "s3_error" { + t.Errorf("expected error code s3_error, got %s", event.ErrorCode) + } + if event.Stage != "s3_upload" { + t.Errorf("expected stage s3_upload, got %s", event.Stage) + } + + data, err := event.Marshal() + if err != nil { + t.Fatalf("failed to marshal event: %v", err) + } + var decoded UploadFailedEvent + if err := json.Unmarshal(data, &decoded); err != nil { + t.Fatalf("failed to unmarshal event: %v", err) + } + }) + + t.Run("DownloadRequestedEvent", func(t *testing.T) { + event := NewDownloadRequestedEvent(proxyID, requestID, "bucket", "s3/key", "presign", "192.168.1.1", 120) + + if event.EventType != EventTypeDownloadRequested { + t.Errorf("expected event type %s, got %s", EventTypeDownloadRequested, event.EventType) + } + if event.Mode != "presign" { + t.Errorf("expected mode presign, got %s", event.Mode) + } + if event.TTLSeconds != 120 { + t.Errorf("expected TTL 120, got %d", event.TTLSeconds) + } + + data, err := event.Marshal() + if err != nil { + t.Fatalf("failed to marshal event: %v", err) + } + var decoded DownloadRequestedEvent + if err := json.Unmarshal(data, &decoded); err != nil { + t.Fatalf("failed to unmarshal event: %v", err) + } + }) + + t.Run("DownloadCompletedEvent", func(t *testing.T) { + event := NewDownloadCompletedEvent(proxyID, requestID, "s3/key", "stream", 150, 2048) + + if event.EventType != EventTypeDownloadCompleted { + t.Errorf("expected event type %s, got %s", EventTypeDownloadCompleted, event.EventType) + } + if event.DurationMs != 150 { + t.Errorf("expected duration 150ms, got %d", event.DurationMs) + } + if event.Size != 2048 { + t.Errorf("expected size 2048, got %d", event.Size) + } + + data, err := event.Marshal() + if err != nil { + t.Fatalf("failed to marshal event: %v", err) + } + var decoded DownloadCompletedEvent + if err := json.Unmarshal(data, &decoded); err != nil { + t.Fatalf("failed to unmarshal event: %v", err) + } + }) + + t.Run("OrphanDetectedEvent", func(t *testing.T) { + event := NewOrphanDetectedEvent(proxyID, requestID, "upload_failure", "test-topic", "bucket", "s3/key", "orig-req-456", "kafka_produce_failed", 4096) + + if event.EventType != EventTypeOrphanDetected { + t.Errorf("expected event type %s, got %s", EventTypeOrphanDetected, event.EventType) + } + if event.DetectionSource != "upload_failure" { + t.Errorf("expected detection source upload_failure, got %s", event.DetectionSource) + } + if event.Reason != "kafka_produce_failed" { + t.Errorf("expected reason kafka_produce_failed, got %s", event.Reason) + } + if event.OriginalRequestID != "orig-req-456" { + t.Errorf("expected original request ID orig-req-456, got %s", event.OriginalRequestID) + } + + data, err := event.Marshal() + if err != nil { + t.Fatalf("failed to marshal event: %v", err) + } + var decoded OrphanDetectedEvent + if err := json.Unmarshal(data, &decoded); err != nil { + t.Fatalf("failed to unmarshal event: %v", err) + } + }) +} + +func TestTrackerDisabled(t *testing.T) { + logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) + ctx := context.Background() + + cfg := TrackerConfig{ + Enabled: false, + ProxyID: "test-proxy", + } + + tracker, err := NewLfsOpsTracker(ctx, cfg, logger) + if err != nil { + t.Fatalf("failed to create disabled tracker: %v", err) + } + + if tracker.IsEnabled() { + t.Error("expected tracker to be disabled") + } + + // Should not panic when emitting to disabled tracker + tracker.EmitUploadStarted("req-1", "topic", 0, "key", "ct", "ip", "http", 100) + tracker.EmitUploadCompleted("req-1", "topic", 0, 0, "bucket", "key", 100, "sha", "cs", "alg", "ct", time.Second) + tracker.EmitUploadFailed("req-1", "topic", "key", "code", "msg", "stage", 0, time.Second) + tracker.EmitDownloadRequested("req-1", "bucket", "key", "presign", "ip", 60) + tracker.EmitDownloadCompleted("req-1", "key", "presign", time.Second, 100) + tracker.EmitOrphanDetected("req-1", "source", "topic", "bucket", "key", "orig", "reason", 100) + + stats := tracker.Stats() + if stats.Enabled { + t.Error("expected stats.Enabled to be false") + } +} + +func TestTrackerNoBrokers(t *testing.T) { + logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) + ctx := context.Background() + + cfg := TrackerConfig{ + Enabled: true, + Topic: "__lfs_ops_state", + Brokers: nil, // No brokers + ProxyID: "test-proxy", + } + + tracker, err := NewLfsOpsTracker(ctx, cfg, logger) + if err != nil { + t.Fatalf("failed to create tracker without brokers: %v", err) + } + + if tracker.IsEnabled() { + t.Error("expected tracker to be disabled when no brokers configured") + } +} + +func TestTrackerConfigDefaults(t *testing.T) { + logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) + ctx := context.Background() + + cfg := TrackerConfig{ + Enabled: true, + Topic: "", // Should default to __lfs_ops_state + Brokers: []string{"localhost:9092"}, + BatchSize: 0, // Should default + FlushMs: 0, // Should default + ProxyID: "test-proxy", + } + + // This will fail to connect but should not error on config defaults + tracker, err := NewLfsOpsTracker(ctx, cfg, logger) + if err != nil { + // May fail to connect, but defaults should be set + t.Logf("tracker creation returned error (expected if Kafka not running): %v", err) + } + if tracker != nil { + defer func() { _ = tracker.Close() }() + } +} + +func TestEventToRecordUsesTopicKey(t *testing.T) { + tracker := &LfsOpsTracker{} + event := NewUploadCompletedEvent( + "proxy-1", + "req-1", + "topic-a", + 0, + 10, + "bucket", + "key", + 123, + "sha", + "chk", + "sha256", + "application/octet-stream", + 10, + ) + + record, err := tracker.eventToRecord(event) + if err != nil { + t.Fatalf("eventToRecord error: %v", err) + } + if string(record.Key) != "topic-a" { + t.Fatalf("expected record key topic-a, got %q", string(record.Key)) + } + if record.Partition != 0 { + t.Fatalf("expected partition 0 (unset), got %d", record.Partition) + } +} + +func TestTrackerStats(t *testing.T) { + logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) + + tracker := &LfsOpsTracker{ + config: TrackerConfig{ + Enabled: true, + Topic: "__lfs_ops_state", + }, + logger: logger, + } + + stats := tracker.Stats() + if !stats.Enabled { + t.Error("expected stats.Enabled to be true") + } + if stats.Topic != "__lfs_ops_state" { + t.Errorf("expected topic __lfs_ops_state, got %s", stats.Topic) + } +} + +func TestNilTrackerSafe(t *testing.T) { + var tracker *LfsOpsTracker + + // All these should not panic on nil tracker + tracker.Emit(nil) + tracker.EmitUploadStarted("", "", 0, "", "", "", "", 0) + tracker.EmitUploadCompleted("", "", 0, 0, "", "", 0, "", "", "", "", 0) + tracker.EmitUploadFailed("", "", "", "", "", "", 0, 0) + tracker.EmitDownloadRequested("", "", "", "", "", 0) + tracker.EmitDownloadCompleted("", "", "", 0, 0) + tracker.EmitOrphanDetected("", "", "", "", "", "", "", 0) + + if tracker.IsEnabled() { + t.Error("nil tracker should not be enabled") + } + + stats := tracker.Stats() + if stats.Enabled { + t.Error("nil tracker stats should show disabled") + } + + // Close should not panic + err := tracker.Close() + if err != nil { + t.Errorf("nil tracker close should not error: %v", err) + } +} + +func TestGetTopic(t *testing.T) { + tests := []struct { + event TrackerEvent + expected string + }{ + {&UploadStartedEvent{Topic: "topic-a"}, "topic-a"}, + {&UploadCompletedEvent{Topic: "topic-b"}, "topic-b"}, + {&UploadFailedEvent{Topic: "topic-c"}, "topic-c"}, + {&DownloadRequestedEvent{}, ""}, + {&DownloadCompletedEvent{}, ""}, + {&OrphanDetectedEvent{Topic: "topic-d"}, "topic-d"}, + } + + for _, tt := range tests { + result := tt.event.GetTopic() + if result != tt.expected { + t.Errorf("GetTopic() = %q, expected %q", result, tt.expected) + } + } +} + +func TestBaseEventFields(t *testing.T) { + base := newBaseEvent("test_event", "proxy-1", "req-abc") + + if base.EventType != "test_event" { + t.Errorf("expected event type test_event, got %s", base.EventType) + } + if base.ProxyID != "proxy-1" { + t.Errorf("expected proxy ID proxy-1, got %s", base.ProxyID) + } + if base.RequestID != "req-abc" { + t.Errorf("expected request ID req-abc, got %s", base.RequestID) + } + if base.Version != TrackerEventVersion { + t.Errorf("expected version %d, got %d", TrackerEventVersion, base.Version) + } + if base.EventID == "" { + t.Error("expected non-empty event ID") + } + if base.Timestamp == "" { + t.Error("expected non-empty timestamp") + } +} diff --git a/cmd/lfs-proxy/tracker_types.go b/cmd/lfs-proxy/tracker_types.go new file mode 100644 index 00000000..455a5835 --- /dev/null +++ b/cmd/lfs-proxy/tracker_types.go @@ -0,0 +1,238 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "encoding/json" + "time" +) + +// Event types for LFS operations tracking. +const ( + EventTypeUploadStarted = "upload_started" + EventTypeUploadCompleted = "upload_completed" + EventTypeUploadFailed = "upload_failed" + EventTypeDownloadRequested = "download_requested" + EventTypeDownloadCompleted = "download_completed" + EventTypeOrphanDetected = "orphan_detected" +) + +// TrackerEventVersion is the current schema version for tracker events. +const TrackerEventVersion = 1 + +// BaseEvent contains common fields for all tracker events. +type BaseEvent struct { + EventType string `json:"event_type"` + EventID string `json:"event_id"` + Timestamp string `json:"timestamp"` + ProxyID string `json:"proxy_id"` + RequestID string `json:"request_id"` + Version int `json:"version"` +} + +// UploadStartedEvent is emitted when an upload operation begins. +type UploadStartedEvent struct { + BaseEvent + Topic string `json:"topic"` + Partition int32 `json:"partition"` + S3Key string `json:"s3_key"` + ContentType string `json:"content_type,omitempty"` + ExpectedSize int64 `json:"expected_size,omitempty"` + ClientIP string `json:"client_ip,omitempty"` + APIType string `json:"api_type"` // "http" or "kafka" +} + +// UploadCompletedEvent is emitted when an upload operation succeeds. +type UploadCompletedEvent struct { + BaseEvent + Topic string `json:"topic"` + Partition int32 `json:"partition"` + KafkaOffset int64 `json:"kafka_offset,omitempty"` + S3Bucket string `json:"s3_bucket"` + S3Key string `json:"s3_key"` + Size int64 `json:"size"` + SHA256 string `json:"sha256"` + Checksum string `json:"checksum,omitempty"` + ChecksumAlg string `json:"checksum_alg,omitempty"` + DurationMs int64 `json:"duration_ms"` + ContentType string `json:"content_type,omitempty"` +} + +// UploadFailedEvent is emitted when an upload operation fails. +type UploadFailedEvent struct { + BaseEvent + Topic string `json:"topic"` + S3Key string `json:"s3_key,omitempty"` + ErrorCode string `json:"error_code"` + ErrorMessage string `json:"error_message"` + Stage string `json:"stage"` // "validation", "s3_upload", "kafka_produce" + SizeUploaded int64 `json:"size_uploaded,omitempty"` + DurationMs int64 `json:"duration_ms"` +} + +// DownloadRequestedEvent is emitted when a download operation is requested. +type DownloadRequestedEvent struct { + BaseEvent + S3Bucket string `json:"s3_bucket"` + S3Key string `json:"s3_key"` + Mode string `json:"mode"` // "presign" or "stream" + ClientIP string `json:"client_ip,omitempty"` + TTLSeconds int `json:"ttl_seconds,omitempty"` +} + +// DownloadCompletedEvent is emitted when a download operation completes. +type DownloadCompletedEvent struct { + BaseEvent + S3Key string `json:"s3_key"` + Mode string `json:"mode"` + DurationMs int64 `json:"duration_ms"` + Size int64 `json:"size,omitempty"` +} + +// OrphanDetectedEvent is emitted when an orphaned S3 object is detected. +type OrphanDetectedEvent struct { + BaseEvent + DetectionSource string `json:"detection_source"` // "upload_failure", "reconciliation" + Topic string `json:"topic"` + S3Bucket string `json:"s3_bucket"` + S3Key string `json:"s3_key"` + Size int64 `json:"size,omitempty"` + OriginalRequestID string `json:"original_request_id,omitempty"` + Reason string `json:"reason"` // "kafka_produce_failed", "checksum_mismatch", etc. +} + +// TrackerEvent is a union type that can hold any tracker event. +type TrackerEvent interface { + GetEventType() string + GetTopic() string + Marshal() ([]byte, error) +} + +// GetEventType returns the event type. +func (e *BaseEvent) GetEventType() string { + return e.EventType +} + +// GetTopic returns the topic for partitioning. +func (e *UploadStartedEvent) GetTopic() string { return e.Topic } +func (e *UploadCompletedEvent) GetTopic() string { return e.Topic } +func (e *UploadFailedEvent) GetTopic() string { return e.Topic } +func (e *DownloadRequestedEvent) GetTopic() string { return "" } +func (e *DownloadCompletedEvent) GetTopic() string { return "" } +func (e *OrphanDetectedEvent) GetTopic() string { return e.Topic } + +// Marshal serializes the event to JSON. +func (e *UploadStartedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } +func (e *UploadCompletedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } +func (e *UploadFailedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } +func (e *DownloadRequestedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } +func (e *DownloadCompletedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } +func (e *OrphanDetectedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } + +// newBaseEvent creates a new base event with common fields. +func newBaseEvent(eventType, proxyID, requestID string) BaseEvent { + return BaseEvent{ + EventType: eventType, + EventID: newUUID(), + Timestamp: time.Now().UTC().Format(time.RFC3339Nano), + ProxyID: proxyID, + RequestID: requestID, + Version: TrackerEventVersion, + } +} + +// NewUploadStartedEvent creates a new upload started event. +func NewUploadStartedEvent(proxyID, requestID, topic string, partition int32, s3Key, contentType, clientIP, apiType string, expectedSize int64) *UploadStartedEvent { + return &UploadStartedEvent{ + BaseEvent: newBaseEvent(EventTypeUploadStarted, proxyID, requestID), + Topic: topic, + Partition: partition, + S3Key: s3Key, + ContentType: contentType, + ExpectedSize: expectedSize, + ClientIP: clientIP, + APIType: apiType, + } +} + +// NewUploadCompletedEvent creates a new upload completed event. +func NewUploadCompletedEvent(proxyID, requestID, topic string, partition int32, kafkaOffset int64, s3Bucket, s3Key string, size int64, sha256, checksum, checksumAlg, contentType string, durationMs int64) *UploadCompletedEvent { + return &UploadCompletedEvent{ + BaseEvent: newBaseEvent(EventTypeUploadCompleted, proxyID, requestID), + Topic: topic, + Partition: partition, + KafkaOffset: kafkaOffset, + S3Bucket: s3Bucket, + S3Key: s3Key, + Size: size, + SHA256: sha256, + Checksum: checksum, + ChecksumAlg: checksumAlg, + DurationMs: durationMs, + ContentType: contentType, + } +} + +// NewUploadFailedEvent creates a new upload failed event. +func NewUploadFailedEvent(proxyID, requestID, topic, s3Key, errorCode, errorMessage, stage string, sizeUploaded, durationMs int64) *UploadFailedEvent { + return &UploadFailedEvent{ + BaseEvent: newBaseEvent(EventTypeUploadFailed, proxyID, requestID), + Topic: topic, + S3Key: s3Key, + ErrorCode: errorCode, + ErrorMessage: errorMessage, + Stage: stage, + SizeUploaded: sizeUploaded, + DurationMs: durationMs, + } +} + +// NewDownloadRequestedEvent creates a new download requested event. +func NewDownloadRequestedEvent(proxyID, requestID, s3Bucket, s3Key, mode, clientIP string, ttlSeconds int) *DownloadRequestedEvent { + return &DownloadRequestedEvent{ + BaseEvent: newBaseEvent(EventTypeDownloadRequested, proxyID, requestID), + S3Bucket: s3Bucket, + S3Key: s3Key, + Mode: mode, + ClientIP: clientIP, + TTLSeconds: ttlSeconds, + } +} + +// NewDownloadCompletedEvent creates a new download completed event. +func NewDownloadCompletedEvent(proxyID, requestID, s3Key, mode string, durationMs, size int64) *DownloadCompletedEvent { + return &DownloadCompletedEvent{ + BaseEvent: newBaseEvent(EventTypeDownloadCompleted, proxyID, requestID), + S3Key: s3Key, + Mode: mode, + DurationMs: durationMs, + Size: size, + } +} + +// NewOrphanDetectedEvent creates a new orphan detected event. +func NewOrphanDetectedEvent(proxyID, requestID, detectionSource, topic, s3Bucket, s3Key, originalRequestID, reason string, size int64) *OrphanDetectedEvent { + return &OrphanDetectedEvent{ + BaseEvent: newBaseEvent(EventTypeOrphanDetected, proxyID, requestID), + DetectionSource: detectionSource, + Topic: topic, + S3Bucket: s3Bucket, + S3Key: s3Key, + Size: size, + OriginalRequestID: originalRequestID, + Reason: reason, + } +} diff --git a/cmd/lfs-proxy/uuid.go b/cmd/lfs-proxy/uuid.go new file mode 100644 index 00000000..aa1fa49c --- /dev/null +++ b/cmd/lfs-proxy/uuid.go @@ -0,0 +1,22 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import "github.com/google/uuid" + +func newUUID() string { + return uuid.NewString() +} diff --git a/cmd/proxy/lfs.go b/cmd/proxy/lfs.go new file mode 100644 index 00000000..91034d71 --- /dev/null +++ b/cmd/proxy/lfs.go @@ -0,0 +1,503 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "crypto/tls" + "fmt" + "log/slog" + "net" + "os" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/KafScale/platform/pkg/lfs" + "github.com/KafScale/platform/pkg/protocol" + "github.com/google/uuid" +) + +const ( + defaultLFSMaxBlob = int64(5 << 30) + defaultLFSChunkSize = int64(5 << 20) + defaultLFSDialTimeoutMs = 5000 + defaultLFSBackendBackoffMs = 500 + defaultLFSS3HealthIntervalSec = 30 + defaultLFSHTTPReadTimeoutSec = 30 + defaultLFSHTTPWriteTimeoutSec = 300 + defaultLFSHTTPIdleTimeoutSec = 60 + defaultLFSHTTPHeaderTimeoutSec = 10 + defaultLFSHTTPMaxHeaderBytes = 1 << 20 + defaultLFSHTTPShutdownSec = 10 + defaultLFSTopicMaxLength = 249 + defaultLFSDownloadTTLSec = 120 + defaultLFSUploadSessionTTLSec = 3600 +) + +// lfsModule encapsulates LFS functionality as a feature-flagged module +// inside the existing proxy. When enabled, it intercepts produce requests +// to detect LFS_BLOB headers, uploads payloads to S3, and replaces record +// values with JSON envelopes — all before the existing partition-aware fan-out. +type lfsModule struct { + logger *slog.Logger + s3Uploader *s3Uploader + s3Bucket string + s3Namespace string + maxBlob int64 + chunkSize int64 + checksumAlg string + proxyID string + metrics *lfsMetrics + tracker *LfsOpsTracker + s3Healthy uint32 + corrID uint32 + httpAPIKey string + httpReadTimeout time.Duration + httpWriteTimeout time.Duration + httpIdleTimeout time.Duration + httpHeaderTimeout time.Duration + httpMaxHeaderBytes int + httpShutdownTimeout time.Duration + topicMaxLength int + downloadTTLMax time.Duration + dialTimeout time.Duration + backendTLSConfig *tls.Config + backendSASLMechanism string + backendSASLUsername string + backendSASLPassword string + httpTLSConfig *tls.Config + httpTLSCertFile string + httpTLSKeyFile string + uploadSessionTTL time.Duration + uploadMu sync.Mutex + uploadSessions map[string]*uploadSession + + // backends and connectivity for the HTTP API path (which needs its own + // backend connections, independent of the proxy's connection pool) + backendRetries int + backendBackoff time.Duration + backends []string + cacheMu sync.RWMutex + cachedBackends []string + rr uint32 +} + +func initLFSModule(ctx context.Context, logger *slog.Logger) (*lfsModule, error) { + s3Bucket := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_BUCKET")) + s3Region := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_REGION")) + s3Endpoint := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_ENDPOINT")) + s3PublicURL := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_PUBLIC_ENDPOINT")) + s3AccessKey := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_ACCESS_KEY")) + s3SecretKey := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_SECRET_KEY")) + s3SessionToken := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_SESSION_TOKEN")) + forcePathStyle := lfsEnvBoolDefault("KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE", s3Endpoint != "") + s3EnsureBucket := lfsEnvBoolDefault("KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET", false) + maxBlob := lfsEnvInt64("KAFSCALE_LFS_PROXY_MAX_BLOB_SIZE", defaultLFSMaxBlob) + chunkSize := lfsEnvInt64("KAFSCALE_LFS_PROXY_CHUNK_SIZE", defaultLFSChunkSize) + proxyID := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_ID")) + s3Namespace := lfsEnvOrDefault("KAFSCALE_S3_NAMESPACE", "default") + checksumAlg := lfsEnvOrDefault("KAFSCALE_LFS_PROXY_CHECKSUM_ALGO", "sha256") + httpAPIKey := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HTTP_API_KEY")) + dialTimeout := time.Duration(lfsEnvInt("KAFSCALE_LFS_PROXY_DIAL_TIMEOUT_MS", defaultLFSDialTimeoutMs)) * time.Millisecond + backendRetries := lfsEnvInt("KAFSCALE_LFS_PROXY_BACKEND_RETRIES", 6) + if backendRetries < 1 { + backendRetries = 1 + } + backendBackoff := time.Duration(lfsEnvInt("KAFSCALE_LFS_PROXY_BACKEND_BACKOFF_MS", defaultLFSBackendBackoffMs)) * time.Millisecond + if backendBackoff <= 0 { + backendBackoff = time.Duration(defaultLFSBackendBackoffMs) * time.Millisecond + } + httpReadTimeout := time.Duration(lfsEnvInt("KAFSCALE_LFS_PROXY_HTTP_READ_TIMEOUT_SEC", defaultLFSHTTPReadTimeoutSec)) * time.Second + httpWriteTimeout := time.Duration(lfsEnvInt("KAFSCALE_LFS_PROXY_HTTP_WRITE_TIMEOUT_SEC", defaultLFSHTTPWriteTimeoutSec)) * time.Second + httpIdleTimeout := time.Duration(lfsEnvInt("KAFSCALE_LFS_PROXY_HTTP_IDLE_TIMEOUT_SEC", defaultLFSHTTPIdleTimeoutSec)) * time.Second + httpHeaderTimeout := time.Duration(lfsEnvInt("KAFSCALE_LFS_PROXY_HTTP_HEADER_TIMEOUT_SEC", defaultLFSHTTPHeaderTimeoutSec)) * time.Second + httpMaxHeaderBytes := lfsEnvInt("KAFSCALE_LFS_PROXY_HTTP_MAX_HEADER_BYTES", defaultLFSHTTPMaxHeaderBytes) + httpShutdownTimeout := time.Duration(lfsEnvInt("KAFSCALE_LFS_PROXY_HTTP_SHUTDOWN_TIMEOUT_SEC", defaultLFSHTTPShutdownSec)) * time.Second + uploadSessionTTL := time.Duration(lfsEnvInt("KAFSCALE_LFS_PROXY_UPLOAD_SESSION_TTL_SEC", defaultLFSUploadSessionTTLSec)) * time.Second + topicMaxLength := lfsEnvInt("KAFSCALE_LFS_PROXY_TOPIC_MAX_LENGTH", defaultLFSTopicMaxLength) + downloadTTLSec := lfsEnvInt("KAFSCALE_LFS_PROXY_DOWNLOAD_TTL_SEC", defaultLFSDownloadTTLSec) + if downloadTTLSec <= 0 { + downloadTTLSec = defaultLFSDownloadTTLSec + } + + backendTLSConfig, err := lfsBuildBackendTLSConfig() + if err != nil { + return nil, fmt.Errorf("backend tls config: %w", err) + } + backendSASLMechanism := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_SASL_MECHANISM")) + backendSASLUsername := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_SASL_USERNAME")) + backendSASLPassword := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_SASL_PASSWORD")) + httpTLSConfig, httpTLSCertFile, httpTLSKeyFile, err := lfsBuildHTTPServerTLSConfig() + if err != nil { + return nil, fmt.Errorf("http tls config: %w", err) + } + + uploader, err := newS3Uploader(ctx, s3Config{ + Bucket: s3Bucket, + Region: s3Region, + Endpoint: s3Endpoint, + PublicEndpoint: s3PublicURL, + AccessKeyID: s3AccessKey, + SecretAccessKey: s3SecretKey, + SessionToken: s3SessionToken, + ForcePathStyle: forcePathStyle, + ChunkSize: chunkSize, + }) + if err != nil { + return nil, fmt.Errorf("s3 client init: %w", err) + } + if s3EnsureBucket { + if err := uploader.EnsureBucket(ctx); err != nil { + logger.Error("lfs s3 bucket ensure failed", "error", err) + } + } + + metrics := newLfsMetrics() + + // Tracker + backends := splitCSV(os.Getenv("KAFSCALE_LFS_PROXY_BACKENDS")) + trackerEnabled := lfsEnvBoolDefault("KAFSCALE_LFS_TRACKER_ENABLED", true) + trackerTopic := lfsEnvOrDefault("KAFSCALE_LFS_TRACKER_TOPIC", defaultTrackerTopic) + trackerBatchSize := lfsEnvInt("KAFSCALE_LFS_TRACKER_BATCH_SIZE", defaultTrackerBatchSize) + trackerFlushMs := lfsEnvInt("KAFSCALE_LFS_TRACKER_FLUSH_MS", defaultTrackerFlushMs) + trackerEnsureTopic := lfsEnvBoolDefault("KAFSCALE_LFS_TRACKER_ENSURE_TOPIC", true) + trackerPartitions := lfsEnvInt("KAFSCALE_LFS_TRACKER_PARTITIONS", defaultTrackerPartitions) + trackerReplication := lfsEnvInt("KAFSCALE_LFS_TRACKER_REPLICATION_FACTOR", defaultTrackerReplication) + + trackerCfg := TrackerConfig{ + Enabled: trackerEnabled, + Topic: trackerTopic, + Brokers: backends, + BatchSize: trackerBatchSize, + FlushMs: trackerFlushMs, + ProxyID: proxyID, + EnsureTopic: trackerEnsureTopic, + Partitions: trackerPartitions, + ReplicationFactor: trackerReplication, + } + tracker, err := NewLfsOpsTracker(ctx, trackerCfg, logger) + if err != nil { + logger.Warn("lfs ops tracker init failed, continuing without tracker", "error", err) + tracker = &LfsOpsTracker{config: trackerCfg, logger: logger} + } + + m := &lfsModule{ + logger: logger, + s3Uploader: uploader, + s3Bucket: s3Bucket, + s3Namespace: s3Namespace, + maxBlob: maxBlob, + chunkSize: chunkSize, + checksumAlg: checksumAlg, + proxyID: proxyID, + metrics: metrics, + tracker: tracker, + httpAPIKey: httpAPIKey, + httpReadTimeout: httpReadTimeout, + httpWriteTimeout: httpWriteTimeout, + httpIdleTimeout: httpIdleTimeout, + httpHeaderTimeout: httpHeaderTimeout, + httpMaxHeaderBytes: httpMaxHeaderBytes, + httpShutdownTimeout: httpShutdownTimeout, + topicMaxLength: topicMaxLength, + downloadTTLMax: time.Duration(downloadTTLSec) * time.Second, + dialTimeout: dialTimeout, + backendRetries: backendRetries, + backendBackoff: backendBackoff, + backendTLSConfig: backendTLSConfig, + backendSASLMechanism: backendSASLMechanism, + backendSASLUsername: backendSASLUsername, + backendSASLPassword: backendSASLPassword, + httpTLSConfig: httpTLSConfig, + httpTLSCertFile: httpTLSCertFile, + httpTLSKeyFile: httpTLSKeyFile, + uploadSessionTTL: uploadSessionTTL, + uploadSessions: make(map[string]*uploadSession), + backends: backends, + } + + // Mark S3 healthy initially and start health check loop + m.markS3Healthy(true) + s3HealthInterval := time.Duration(lfsEnvInt("KAFSCALE_LFS_PROXY_S3_HEALTH_INTERVAL_SEC", defaultLFSS3HealthIntervalSec)) * time.Second + m.startS3HealthCheck(ctx, s3HealthInterval) + + return m, nil +} + +// rewriteProduceRequest is the integration point called from handleProduceRouting. +// It scans produce records for LFS_BLOB headers, uploads blobs to S3, and +// replaces record values with LFS envelope JSON — all in-place on the parsed +// ProduceRequest struct. Returns true if any records were rewritten, along with +// orphan candidates (S3 objects that should be tracked if the downstream Kafka +// produce fails). +func (m *lfsModule) rewriteProduceRequest(ctx context.Context, header *protocol.RequestHeader, req *protocol.ProduceRequest) (bool, []orphanInfo, error) { + result, err := m.rewriteProduceRecords(ctx, header, req) + if err != nil { + for _, topic := range lfsTopicsFromProduce(req) { + m.metrics.IncRequests(topic, "error", "lfs") + } + return false, nil, err + } + if !result.modified { + return false, nil, nil + } + for topic := range result.topics { + m.metrics.IncRequests(topic, "ok", "lfs") + } + m.metrics.ObserveUploadDuration(result.duration) + m.metrics.AddUploadBytes(result.uploadBytes) + return true, result.orphans, nil +} + +// Shutdown gracefully shuts down the LFS module. +func (m *lfsModule) Shutdown() { + if m == nil { + return + } + if m.tracker != nil { + if err := m.tracker.Close(); err != nil { + m.logger.Warn("lfs tracker close error", "error", err) + } + } +} + +func (m *lfsModule) markS3Healthy(ok bool) { + if ok { + atomic.StoreUint32(&m.s3Healthy, 1) + return + } + atomic.StoreUint32(&m.s3Healthy, 0) +} + +func (m *lfsModule) isS3Healthy() bool { + return atomic.LoadUint32(&m.s3Healthy) == 1 +} + +func (m *lfsModule) startS3HealthCheck(ctx context.Context, interval time.Duration) { + if interval <= 0 { + interval = time.Duration(defaultLFSS3HealthIntervalSec) * time.Second + } + ticker := time.NewTicker(interval) + go func() { + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + err := m.s3Uploader.HeadBucket(ctx) + wasHealthy := m.isS3Healthy() + m.markS3Healthy(err == nil) + if err != nil && wasHealthy { + m.logger.Warn("lfs s3 health check failed", "error", err) + } else if err == nil && !wasHealthy { + m.logger.Info("lfs s3 health check recovered") + } + } + } + }() +} + +func (m *lfsModule) buildObjectKey(topic string) string { + ns := strings.TrimSpace(m.s3Namespace) + if ns == "" { + ns = "default" + } + now := time.Now().UTC() + return fmt.Sprintf("%s/%s/lfs/%04d/%02d/%02d/obj-%s", ns, topic, now.Year(), now.Month(), now.Day(), newLFSUUID()) +} + +func (m *lfsModule) resolveChecksumAlg(raw string) (lfs.ChecksumAlg, error) { + if strings.TrimSpace(raw) == "" { + return lfs.NormalizeChecksumAlg(m.checksumAlg) + } + return lfs.NormalizeChecksumAlg(raw) +} + +func (m *lfsModule) setCachedBackends(backends []string) { + if len(backends) == 0 { + return + } + copied := make([]string, len(backends)) + copy(copied, backends) + m.cacheMu.Lock() + m.cachedBackends = copied + m.cacheMu.Unlock() +} + +func (m *lfsModule) cachedBackendsSnapshot() []string { + m.cacheMu.RLock() + if len(m.cachedBackends) == 0 { + m.cacheMu.RUnlock() + return nil + } + copied := make([]string, len(m.cachedBackends)) + copy(copied, m.cachedBackends) + m.cacheMu.RUnlock() + return copied +} + +// connectBackend dials a backend broker for the HTTP API path. +func (m *lfsModule) connectBackend(ctx context.Context) (net.Conn, string, error) { + var lastErr error + for attempt := 0; attempt < m.backendRetries; attempt++ { + backends := m.backends + if len(backends) == 0 { + if cached := m.cachedBackendsSnapshot(); len(cached) > 0 { + backends = cached + } + } + if len(backends) == 0 { + lastErr = fmt.Errorf("no backends available") + time.Sleep(m.backendBackoff) + continue + } + index := atomic.AddUint32(&m.rr, 1) + addr := backends[int(index)%len(backends)] + dialer := net.Dialer{Timeout: m.dialTimeout} + conn, dialErr := dialer.DialContext(ctx, "tcp", addr) + if dialErr == nil { + wrapped, err := m.wrapBackendTLS(ctx, conn, addr) + if err != nil { + _ = conn.Close() + lastErr = err + time.Sleep(m.backendBackoff) + continue + } + if err := m.performBackendSASL(ctx, wrapped); err != nil { + _ = wrapped.Close() + lastErr = err + time.Sleep(m.backendBackoff) + continue + } + return wrapped, addr, nil + } + lastErr = dialErr + time.Sleep(m.backendBackoff) + } + if lastErr == nil { + lastErr = fmt.Errorf("no backends available") + } + return nil, "", lastErr +} + +// forwardToBackend writes a frame and reads the response. +func (m *lfsModule) forwardToBackend(ctx context.Context, conn net.Conn, payload []byte) ([]byte, error) { + deadline := time.Now().Add(m.dialTimeout) + if ctxDeadline, ok := ctx.Deadline(); ok && ctxDeadline.Before(deadline) { + deadline = ctxDeadline + } + _ = conn.SetDeadline(deadline) + defer func() { _ = conn.SetDeadline(time.Time{}) }() + if err := protocol.WriteFrame(conn, payload); err != nil { + return nil, err + } + frame, err := protocol.ReadFrame(conn) + if err != nil { + return nil, err + } + return frame.Payload, nil +} + +func (m *lfsModule) trackOrphans(orphans []orphanInfo) { + if len(orphans) == 0 { + return + } + m.metrics.IncOrphans(len(orphans)) + for _, orphan := range orphans { + m.logger.Warn("lfs orphaned object", "topic", orphan.Topic, "key", orphan.Key, "reason", orphan.Reason) + reason := orphan.Reason + if reason == "" { + reason = "kafka_produce_failed" + } + m.tracker.EmitOrphanDetected(orphan.RequestID, "upload_failure", orphan.Topic, m.s3Bucket, orphan.Key, orphan.RequestID, reason, 0) + } +} + +// Helper functions scoped to the LFS module to avoid collisions with +// identically-named helpers in the existing proxy package. + +func newLFSUUID() string { + return uuid.NewString() +} + +func lfsEnvBoolDefault(key string, fallback bool) bool { + val := strings.TrimSpace(os.Getenv(key)) + if val == "" { + return fallback + } + switch strings.ToLower(val) { + case "1", "true", "yes", "y", "on": + return true + case "0", "false", "no", "n", "off": + return false + default: + return fallback + } +} + +func lfsEnvOrDefault(key, fallback string) string { + if val := os.Getenv(key); val != "" { + return val + } + return fallback +} + +func lfsEnvInt(key string, fallback int) int { + val := strings.TrimSpace(os.Getenv(key)) + if val == "" { + return fallback + } + parsed, err := strconv.Atoi(val) + if err != nil { + return fallback + } + return parsed +} + +func lfsEnvInt64(key string, fallback int64) int64 { + val := strings.TrimSpace(os.Getenv(key)) + if val == "" { + return fallback + } + parsed, err := strconv.ParseInt(val, 10, 64) + if err != nil { + return fallback + } + return parsed +} + +func lfsTopicsFromProduce(req *protocol.ProduceRequest) []string { + if req == nil { + return nil + } + seen := make(map[string]struct{}, len(req.Topics)) + out := make([]string, 0, len(req.Topics)) + for _, topic := range req.Topics { + if _, ok := seen[topic.Topic]; ok { + continue + } + seen[topic.Topic] = struct{}{} + out = append(out, topic.Topic) + } + if len(out) == 0 { + return []string{"unknown"} + } + return out +} diff --git a/cmd/proxy/lfs_backend_auth.go b/cmd/proxy/lfs_backend_auth.go new file mode 100644 index 00000000..a9be641d --- /dev/null +++ b/cmd/proxy/lfs_backend_auth.go @@ -0,0 +1,98 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "crypto/tls" + "errors" + "fmt" + "net" + "strings" + "time" + + "github.com/KafScale/platform/pkg/protocol" +) + +func (m *lfsModule) wrapBackendTLS(ctx context.Context, conn net.Conn, addr string) (net.Conn, error) { + if m.backendTLSConfig == nil { + return conn, nil + } + cfg := m.backendTLSConfig.Clone() + if cfg.ServerName == "" { + if host, _, err := net.SplitHostPort(addr); err == nil { + cfg.ServerName = host + } + } + tlsConn := tls.Client(conn, cfg) + deadline := time.Now().Add(m.dialTimeout) + if ctxDeadline, ok := ctx.Deadline(); ok { + deadline = ctxDeadline + } + _ = tlsConn.SetDeadline(deadline) + if err := tlsConn.Handshake(); err != nil { + return nil, err + } + _ = tlsConn.SetDeadline(time.Time{}) + return tlsConn, nil +} + +func (m *lfsModule) performBackendSASL(ctx context.Context, conn net.Conn) error { + mech := strings.TrimSpace(m.backendSASLMechanism) + if mech == "" { + return nil + } + if strings.ToUpper(mech) != "PLAIN" { + return fmt.Errorf("unsupported SASL mechanism %q", mech) + } + if m.backendSASLUsername == "" { + return errors.New("backend SASL username required") + } + + correlationID := int32(1) + handshakeReq, err := lfsEncodeSaslHandshakeRequest(&protocol.RequestHeader{ + APIKey: lfsAPIKeySaslHandshake, + APIVersion: 1, + CorrelationID: correlationID, + }, mech) + if err != nil { + return err + } + if err := protocol.WriteFrame(conn, handshakeReq); err != nil { + return err + } + if err := lfsReadSaslResponse(conn); err != nil { + return fmt.Errorf("sasl handshake failed: %w", err) + } + + authBytes := lfsBuildSaslPlainAuthBytes(m.backendSASLUsername, m.backendSASLPassword) + authReq, err := lfsEncodeSaslAuthenticateRequest(&protocol.RequestHeader{ + APIKey: lfsAPIKeySaslAuthenticate, + APIVersion: 1, + CorrelationID: correlationID + 1, + }, authBytes) + if err != nil { + return err + } + if err := protocol.WriteFrame(conn, authReq); err != nil { + return err + } + if err := lfsReadSaslResponse(conn); err != nil { + return fmt.Errorf("sasl authenticate failed: %w", err) + } + + return nil +} diff --git a/cmd/proxy/lfs_backend_tls.go b/cmd/proxy/lfs_backend_tls.go new file mode 100644 index 00000000..93da6d1a --- /dev/null +++ b/cmd/proxy/lfs_backend_tls.go @@ -0,0 +1,68 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "crypto/tls" + "crypto/x509" + "errors" + "os" + "strings" +) + +func lfsBuildBackendTLSConfig() (*tls.Config, error) { + enabled := lfsEnvBoolDefault("KAFSCALE_LFS_PROXY_BACKEND_TLS_ENABLED", false) + if !enabled { + return nil, nil + } + caFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_TLS_CA_FILE")) + certFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_TLS_CERT_FILE")) + keyFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_TLS_KEY_FILE")) + serverName := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_TLS_SERVER_NAME")) + insecureSkip := lfsEnvBoolDefault("KAFSCALE_LFS_PROXY_BACKEND_TLS_INSECURE_SKIP_VERIFY", false) + + var rootCAs *x509.CertPool + if caFile != "" { + caPEM, err := os.ReadFile(caFile) + if err != nil { + return nil, err + } + rootCAs = x509.NewCertPool() + if !rootCAs.AppendCertsFromPEM(caPEM) { + return nil, errors.New("failed to parse backend TLS CA file") + } + } + + var certs []tls.Certificate + if certFile != "" || keyFile != "" { + if certFile == "" || keyFile == "" { + return nil, errors.New("backend TLS cert and key must both be set") + } + cert, err := tls.LoadX509KeyPair(certFile, keyFile) + if err != nil { + return nil, err + } + certs = append(certs, cert) + } + + return &tls.Config{ + RootCAs: rootCAs, + Certificates: certs, + ServerName: serverName, + InsecureSkipVerify: insecureSkip, + MinVersion: tls.VersionTLS12, + }, nil +} diff --git a/cmd/proxy/lfs_http.go b/cmd/proxy/lfs_http.go new file mode 100644 index 00000000..c4ab824e --- /dev/null +++ b/cmd/proxy/lfs_http.go @@ -0,0 +1,1018 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "crypto/sha256" + "crypto/subtle" + "encoding/base64" + "encoding/hex" + "encoding/json" + "errors" + "io" + "math" + "net/http" + "regexp" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/KafScale/platform/pkg/lfs" + "github.com/KafScale/platform/pkg/protocol" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/twmb/franz-go/pkg/kmsg" +) + +const ( + lfsHeaderTopic = "X-Kafka-Topic" + lfsHeaderKey = "X-Kafka-Key" + lfsHeaderPartition = "X-Kafka-Partition" + lfsHeaderChecksum = "X-LFS-Checksum" + lfsHeaderChecksumAlg = "X-LFS-Checksum-Alg" + lfsHeaderRequestID = "X-Request-ID" +) + +var lfsValidTopicPattern = regexp.MustCompile(`^[a-zA-Z0-9._-]+$`) + +type lfsErrorResponse struct { + Code string `json:"code"` + Message string `json:"message"` + RequestID string `json:"request_id"` +} + +type lfsDownloadRequest struct { + Bucket string `json:"bucket"` + Key string `json:"key"` + Mode string `json:"mode"` + ExpiresSeconds int `json:"expires_seconds"` +} + +type lfsDownloadResponse struct { + Mode string `json:"mode"` + URL string `json:"url"` + ExpiresAt string `json:"expires_at"` +} + +type lfsUploadInitRequest struct { + Topic string `json:"topic"` + Key string `json:"key"` + Partition *int32 `json:"partition,omitempty"` + ContentType string `json:"content_type"` + SizeBytes int64 `json:"size_bytes"` + Checksum string `json:"checksum,omitempty"` + ChecksumAlg string `json:"checksum_alg,omitempty"` +} + +type lfsUploadInitResponse struct { + UploadID string `json:"upload_id"` + S3Key string `json:"s3_key"` + PartSize int64 `json:"part_size"` + ExpiresAt string `json:"expires_at"` +} + +type lfsUploadPartResponse struct { + UploadID string `json:"upload_id"` + PartNumber int32 `json:"part_number"` + ETag string `json:"etag"` +} + +type lfsUploadCompleteRequest struct { + Parts []struct { + PartNumber int32 `json:"part_number"` + ETag string `json:"etag"` + } `json:"parts"` +} + +type uploadSession struct { + mu sync.Mutex + ID string + Topic string + S3Key string + UploadID string + ContentType string + SizeBytes int64 + KeyBytes []byte + Partition int32 + Checksum string + ChecksumAlg lfs.ChecksumAlg + CreatedAt time.Time + ExpiresAt time.Time + PartSize int64 + NextPart int32 + TotalUploaded int64 + Parts map[int32]string + PartSizes map[int32]int64 + sha256Hasher lfsHashWriter + checksumHasher lfsHashWriter +} + +type lfsHashWriter interface { + Write([]byte) (int, error) + Sum([]byte) []byte +} + +func (m *lfsModule) startHTTPServer(ctx context.Context, addr string) { + mux := http.NewServeMux() + mux.HandleFunc("/lfs/produce", m.lfsCORSMiddleware(m.handleHTTPProduce)) + mux.HandleFunc("/lfs/download", m.lfsCORSMiddleware(m.handleHTTPDownload)) + mux.HandleFunc("/lfs/uploads", m.lfsCORSMiddleware(m.handleHTTPUploadInit)) + mux.HandleFunc("/lfs/uploads/", m.lfsCORSMiddleware(m.handleHTTPUploadSession)) + mux.HandleFunc("/swagger", m.lfsHandleSwaggerUI) + mux.HandleFunc("/swagger/", m.lfsHandleSwaggerUI) + mux.HandleFunc("/api/openapi.yaml", m.lfsHandleOpenAPISpec) + srv := &http.Server{ + Addr: addr, + Handler: mux, + ReadTimeout: m.httpReadTimeout, + WriteTimeout: m.httpWriteTimeout, + IdleTimeout: m.httpIdleTimeout, + ReadHeaderTimeout: m.httpHeaderTimeout, + MaxHeaderBytes: m.httpMaxHeaderBytes, + } + go func() { + <-ctx.Done() + shutdownCtx, cancel := context.WithTimeout(context.Background(), m.httpShutdownTimeout) + defer cancel() + _ = srv.Shutdown(shutdownCtx) + }() + go func() { + m.logger.Info("lfs http listening", "addr", addr, "tls", m.httpTLSConfig != nil) + var err error + if m.httpTLSConfig != nil { + srv.TLSConfig = m.httpTLSConfig + err = srv.ListenAndServeTLS(m.httpTLSCertFile, m.httpTLSKeyFile) + } else { + err = srv.ListenAndServe() + } + if err != nil && err != http.ErrServerClosed { + m.logger.Warn("lfs http server error", "error", err) + } + }() +} + +func (m *lfsModule) startMetricsServer(ctx context.Context, addr string) { + mux := http.NewServeMux() + mux.HandleFunc("/metrics", func(w http.ResponseWriter, _ *http.Request) { + m.metrics.WritePrometheus(w) + }) + srv := &http.Server{ + Addr: addr, + Handler: mux, + ReadTimeout: m.httpReadTimeout, + WriteTimeout: m.httpWriteTimeout, + IdleTimeout: m.httpIdleTimeout, + ReadHeaderTimeout: m.httpHeaderTimeout, + MaxHeaderBytes: m.httpMaxHeaderBytes, + } + go func() { + <-ctx.Done() + shutdownCtx, cancel := context.WithTimeout(context.Background(), m.httpShutdownTimeout) + defer cancel() + _ = srv.Shutdown(shutdownCtx) + }() + go func() { + m.logger.Info("lfs metrics listening", "addr", addr) + if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { + m.logger.Warn("lfs metrics server error", "error", err) + } + }() +} + +func (m *lfsModule) lfsCORSMiddleware(next http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Access-Control-Allow-Origin", "*") + w.Header().Set("Access-Control-Allow-Methods", "POST, PUT, DELETE, OPTIONS") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Content-Range, X-Kafka-Topic, X-Kafka-Key, X-Kafka-Partition, X-LFS-Checksum, X-LFS-Checksum-Alg, X-LFS-Size, X-LFS-Mode, X-Request-ID, X-API-Key, Authorization") + w.Header().Set("Access-Control-Expose-Headers", "X-Request-ID") + if r.Method == http.MethodOptions { + w.WriteHeader(http.StatusNoContent) + return + } + next(w, r) + } +} + +func (m *lfsModule) handleHTTPProduce(w http.ResponseWriter, r *http.Request) { + requestID := strings.TrimSpace(r.Header.Get(lfsHeaderRequestID)) + if requestID == "" { + requestID = newLFSUUID() + } + w.Header().Set(lfsHeaderRequestID, requestID) + if r.Method != http.MethodPost { + m.lfsWriteHTTPError(w, requestID, "", http.StatusMethodNotAllowed, "method_not_allowed", "method not allowed") + return + } + if m.httpAPIKey != "" && !m.lfsValidateHTTPAPIKey(r) { + m.lfsWriteHTTPError(w, requestID, "", http.StatusUnauthorized, "unauthorized", "unauthorized") + return + } + if !m.isS3Healthy() { + m.lfsWriteHTTPError(w, requestID, "", http.StatusServiceUnavailable, "proxy_not_ready", "proxy not ready") + return + } + topic := strings.TrimSpace(r.Header.Get(lfsHeaderTopic)) + if topic == "" { + m.lfsWriteHTTPError(w, requestID, "", http.StatusBadRequest, "missing_topic", "missing topic") + return + } + if !m.lfsIsValidTopicName(topic) { + m.lfsWriteHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_topic", "invalid topic name") + return + } + + var keyBytes []byte + if keyHeader := strings.TrimSpace(r.Header.Get(lfsHeaderKey)); keyHeader != "" { + decoded, err := base64.StdEncoding.DecodeString(keyHeader) + if err != nil { + m.lfsWriteHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_key", "invalid key") + return + } + keyBytes = decoded + } + + partition := int32(0) + if partitionHeader := strings.TrimSpace(r.Header.Get(lfsHeaderPartition)); partitionHeader != "" { + parsed, err := strconv.ParseInt(partitionHeader, 10, 32) + if err != nil { + m.lfsWriteHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_partition", "invalid partition") + return + } + partition = int32(parsed) + } + + checksumHeader := strings.TrimSpace(r.Header.Get(lfsHeaderChecksum)) + checksumAlgHeader := strings.TrimSpace(r.Header.Get(lfsHeaderChecksumAlg)) + alg, err := m.resolveChecksumAlg(checksumAlgHeader) + if err != nil { + m.lfsWriteHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_request", err.Error()) + return + } + if checksumHeader != "" && alg == lfs.ChecksumNone { + m.lfsWriteHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_checksum", "checksum provided but checksum algorithm is none") + return + } + objectKey := m.buildObjectKey(topic) + clientIP := lfsGetClientIP(r) + contentType := r.Header.Get("Content-Type") + + start := time.Now() + m.tracker.EmitUploadStarted(requestID, topic, partition, objectKey, contentType, clientIP, "http", r.ContentLength) + + sha256Hex, checksum, checksumAlg, size, err := m.s3Uploader.UploadStream(r.Context(), objectKey, r.Body, m.maxBlob, alg) + if err != nil { + m.metrics.IncRequests(topic, "error", "lfs") + m.metrics.IncS3Errors() + status, code := lfsStatusForUploadError(err) + m.tracker.EmitUploadFailed(requestID, topic, objectKey, code, err.Error(), "s3_upload", 0, time.Since(start)) + m.lfsWriteHTTPError(w, requestID, topic, status, code, err.Error()) + return + } + if checksumHeader != "" && checksum != "" && !strings.EqualFold(checksumHeader, checksum) { + if err := m.s3Uploader.DeleteObject(r.Context(), objectKey); err != nil { + m.trackOrphans([]orphanInfo{{Topic: topic, Key: objectKey, RequestID: requestID, Reason: "kafka_produce_failed"}}) + m.metrics.IncRequests(topic, "error", "lfs") + m.tracker.EmitUploadFailed(requestID, topic, objectKey, "checksum_mismatch", "checksum mismatch; delete failed", "validation", size, time.Since(start)) + m.lfsWriteHTTPError(w, requestID, topic, http.StatusBadRequest, "checksum_mismatch", "checksum mismatch; delete failed") + return + } + m.metrics.IncRequests(topic, "error", "lfs") + m.tracker.EmitUploadFailed(requestID, topic, objectKey, "checksum_mismatch", (&lfs.ChecksumError{Expected: checksumHeader, Actual: checksum}).Error(), "validation", size, time.Since(start)) + m.lfsWriteHTTPError(w, requestID, topic, http.StatusBadRequest, "checksum_mismatch", (&lfs.ChecksumError{Expected: checksumHeader, Actual: checksum}).Error()) + return + } + + env := lfs.Envelope{ + Version: 1, + Bucket: m.s3Bucket, + Key: objectKey, + Size: size, + SHA256: sha256Hex, + Checksum: checksum, + ChecksumAlg: checksumAlg, + ContentType: r.Header.Get("Content-Type"), + CreatedAt: time.Now().UTC().Format(time.RFC3339), + ProxyID: m.proxyID, + } + encoded, err := lfs.EncodeEnvelope(env) + if err != nil { + m.metrics.IncRequests(topic, "error", "lfs") + m.lfsWriteHTTPError(w, requestID, topic, http.StatusInternalServerError, "encode_failed", err.Error()) + return + } + + record := kmsg.Record{ + TimestampDelta64: 0, + OffsetDelta: 0, + Key: keyBytes, + Value: encoded, + } + batchBytes := lfsBuildRecordBatch([]kmsg.Record{record}) + + produceReq := &kmsg.ProduceRequest{ + Acks: 1, + TimeoutMillis: 15000, + Topics: []kmsg.ProduceRequestTopic{{ + Topic: topic, + Partitions: []kmsg.ProduceRequestTopicPartition{{ + Partition: partition, + Records: batchBytes, + }}, + }}, + } + + correlationID := int32(atomic.AddUint32(&m.corrID, 1)) + reqHeader := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: correlationID} + payload, err := lfsEncodeProduceRequest(reqHeader, produceReq) + if err != nil { + m.metrics.IncRequests(topic, "error", "lfs") + m.lfsWriteHTTPError(w, requestID, topic, http.StatusInternalServerError, "encode_failed", err.Error()) + return + } + + backendConn, _, err := m.connectBackend(r.Context()) + if err != nil { + m.metrics.IncRequests(topic, "error", "lfs") + m.trackOrphans([]orphanInfo{{Topic: topic, Key: objectKey, RequestID: requestID, Reason: "kafka_produce_failed"}}) + m.tracker.EmitUploadFailed(requestID, topic, objectKey, "backend_unavailable", err.Error(), "kafka_produce", size, time.Since(start)) + m.lfsWriteHTTPError(w, requestID, topic, http.StatusServiceUnavailable, "backend_unavailable", err.Error()) + return + } + defer func() { _ = backendConn.Close() }() + + _, err = m.forwardToBackend(r.Context(), backendConn, payload) + if err != nil { + m.metrics.IncRequests(topic, "error", "lfs") + m.trackOrphans([]orphanInfo{{Topic: topic, Key: objectKey, RequestID: requestID, Reason: "kafka_produce_failed"}}) + m.tracker.EmitUploadFailed(requestID, topic, objectKey, "backend_error", err.Error(), "kafka_produce", size, time.Since(start)) + m.lfsWriteHTTPError(w, requestID, topic, http.StatusBadGateway, "backend_error", err.Error()) + return + } + + m.metrics.IncRequests(topic, "ok", "lfs") + m.metrics.AddUploadBytes(size) + m.metrics.ObserveUploadDuration(time.Since(start).Seconds()) + m.tracker.EmitUploadCompleted(requestID, topic, partition, 0, m.s3Bucket, objectKey, size, sha256Hex, checksum, checksumAlg, contentType, time.Since(start)) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(env) +} + +func (m *lfsModule) handleHTTPDownload(w http.ResponseWriter, r *http.Request) { + requestID := strings.TrimSpace(r.Header.Get(lfsHeaderRequestID)) + if requestID == "" { + requestID = newLFSUUID() + } + w.Header().Set(lfsHeaderRequestID, requestID) + if r.Method != http.MethodPost { + m.lfsWriteHTTPError(w, requestID, "", http.StatusMethodNotAllowed, "method_not_allowed", "method not allowed") + return + } + if m.httpAPIKey != "" && !m.lfsValidateHTTPAPIKey(r) { + m.lfsWriteHTTPError(w, requestID, "", http.StatusUnauthorized, "unauthorized", "unauthorized") + return + } + if !m.isS3Healthy() { + m.lfsWriteHTTPError(w, requestID, "", http.StatusServiceUnavailable, "proxy_not_ready", "proxy not ready") + return + } + + var req lfsDownloadRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + m.lfsWriteHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_request", "invalid JSON body") + return + } + req.Bucket = strings.TrimSpace(req.Bucket) + req.Key = strings.TrimSpace(req.Key) + if req.Bucket == "" || req.Key == "" { + m.lfsWriteHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_request", "bucket and key required") + return + } + if req.Bucket != m.s3Bucket { + m.lfsWriteHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_bucket", "bucket not allowed") + return + } + if err := m.lfsValidateObjectKey(req.Key); err != nil { + m.lfsWriteHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_key", err.Error()) + return + } + + mode := strings.ToLower(strings.TrimSpace(req.Mode)) + if mode == "" { + mode = "presign" + } + if mode != "presign" && mode != "stream" { + m.lfsWriteHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_mode", "mode must be presign or stream") + return + } + + clientIP := lfsGetClientIP(r) + start := time.Now() + ttlSeconds := 0 + if mode == "presign" { + ttlSeconds = req.ExpiresSeconds + if ttlSeconds <= 0 { + ttlSeconds = int(m.downloadTTLMax.Seconds()) + } + } + m.tracker.EmitDownloadRequested(requestID, req.Bucket, req.Key, mode, clientIP, ttlSeconds) + + switch mode { + case "presign": + ttl := m.downloadTTLMax + if req.ExpiresSeconds > 0 { + requested := time.Duration(req.ExpiresSeconds) * time.Second + if requested < ttl { + ttl = requested + } + } + url, err := m.s3Uploader.PresignGetObject(r.Context(), req.Key, ttl) + if err != nil { + m.metrics.IncS3Errors() + m.lfsWriteHTTPError(w, requestID, "", http.StatusBadGateway, "s3_presign_failed", err.Error()) + return + } + m.tracker.EmitDownloadCompleted(requestID, req.Key, mode, time.Since(start), 0) + + resp := lfsDownloadResponse{ + Mode: "presign", + URL: url, + ExpiresAt: time.Now().UTC().Add(ttl).Format(time.RFC3339), + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(resp) + case "stream": + obj, err := m.s3Uploader.GetObject(r.Context(), req.Key) + if err != nil { + m.metrics.IncS3Errors() + m.lfsWriteHTTPError(w, requestID, "", http.StatusBadGateway, "s3_get_failed", err.Error()) + return + } + defer func() { _ = obj.Body.Close() }() + contentType := "application/octet-stream" + if obj.ContentType != nil && *obj.ContentType != "" { + contentType = *obj.ContentType + } + w.Header().Set("Content-Type", contentType) + var size int64 + if obj.ContentLength != nil { + size = *obj.ContentLength + w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) + } + if _, err := io.Copy(w, obj.Body); err != nil { + m.logger.Warn("download stream failed", "error", err) + } + m.tracker.EmitDownloadCompleted(requestID, req.Key, mode, time.Since(start), size) + } +} + +func (m *lfsModule) handleHTTPUploadInit(w http.ResponseWriter, r *http.Request) { + requestID := strings.TrimSpace(r.Header.Get(lfsHeaderRequestID)) + if requestID == "" { + requestID = newLFSUUID() + } + w.Header().Set(lfsHeaderRequestID, requestID) + if r.Method != http.MethodPost { + m.lfsWriteHTTPError(w, requestID, "", http.StatusMethodNotAllowed, "method_not_allowed", "method not allowed") + return + } + if m.httpAPIKey != "" && !m.lfsValidateHTTPAPIKey(r) { + m.lfsWriteHTTPError(w, requestID, "", http.StatusUnauthorized, "unauthorized", "unauthorized") + return + } + if !m.isS3Healthy() { + m.lfsWriteHTTPError(w, requestID, "", http.StatusServiceUnavailable, "proxy_not_ready", "proxy not ready") + return + } + + var req lfsUploadInitRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + m.lfsWriteHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_request", "invalid JSON body") + return + } + + req.Topic = strings.TrimSpace(req.Topic) + req.ContentType = strings.TrimSpace(req.ContentType) + req.Checksum = strings.TrimSpace(req.Checksum) + req.ChecksumAlg = strings.TrimSpace(req.ChecksumAlg) + if req.Topic == "" { + m.lfsWriteHTTPError(w, requestID, "", http.StatusBadRequest, "missing_topic", "missing topic") + return + } + if !m.lfsIsValidTopicName(req.Topic) { + m.lfsWriteHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_topic", "invalid topic name") + return + } + if req.ContentType == "" { + m.lfsWriteHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "missing_content_type", "content_type required") + return + } + if req.SizeBytes <= 0 { + m.lfsWriteHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_size", "size_bytes must be > 0") + return + } + if m.maxBlob > 0 && req.SizeBytes > m.maxBlob { + m.lfsWriteHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "payload_too_large", "payload exceeds max size") + return + } + + keyBytes := []byte(nil) + if req.Key != "" { + decoded, err := base64.StdEncoding.DecodeString(req.Key) + if err != nil { + m.lfsWriteHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_key", "invalid key") + return + } + keyBytes = decoded + } + + partition := int32(0) + if req.Partition != nil { + partition = *req.Partition + if partition < 0 { + m.lfsWriteHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_partition", "invalid partition") + return + } + } + + alg, err := m.resolveChecksumAlg(req.ChecksumAlg) + if err != nil { + m.lfsWriteHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_request", err.Error()) + return + } + if req.Checksum != "" && alg == lfs.ChecksumNone { + m.lfsWriteHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_checksum", "checksum provided but checksum algorithm is none") + return + } + + objectKey := m.buildObjectKey(req.Topic) + uploadID, err := m.s3Uploader.StartMultipartUpload(r.Context(), objectKey, req.ContentType) + if err != nil { + m.metrics.IncS3Errors() + m.lfsWriteHTTPError(w, requestID, req.Topic, http.StatusBadGateway, "s3_upload_failed", err.Error()) + return + } + m.logger.Info("http chunked upload init", "requestId", requestID, "topic", req.Topic, "s3Key", objectKey, "uploadId", uploadID, "sizeBytes", req.SizeBytes, "partSize", m.chunkSize) + + partSize := normalizeChunkSize(m.chunkSize) + session := &uploadSession{ + ID: newLFSUUID(), + Topic: req.Topic, + S3Key: objectKey, + UploadID: uploadID, + ContentType: req.ContentType, + SizeBytes: req.SizeBytes, + KeyBytes: keyBytes, + Partition: partition, + Checksum: req.Checksum, + ChecksumAlg: alg, + CreatedAt: time.Now().UTC(), + ExpiresAt: time.Now().UTC().Add(m.uploadSessionTTL), + PartSize: partSize, + NextPart: 1, + Parts: make(map[int32]string), + PartSizes: make(map[int32]int64), + sha256Hasher: sha256.New(), + } + if alg != lfs.ChecksumNone { + if alg == lfs.ChecksumSHA256 { + session.checksumHasher = session.sha256Hasher + } else if h, err := lfs.NewChecksumHasher(alg); err == nil { + session.checksumHasher = h + } else if err != nil { + m.lfsWriteHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_checksum", err.Error()) + return + } + } + + m.lfsStoreUploadSession(session) + m.tracker.EmitUploadStarted(requestID, req.Topic, partition, objectKey, req.ContentType, lfsGetClientIP(r), "http-chunked", req.SizeBytes) + + resp := lfsUploadInitResponse{ + UploadID: session.ID, + S3Key: session.S3Key, + PartSize: session.PartSize, + ExpiresAt: session.ExpiresAt.Format(time.RFC3339), + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(resp) +} + +func (m *lfsModule) handleHTTPUploadSession(w http.ResponseWriter, r *http.Request) { + requestID := strings.TrimSpace(r.Header.Get(lfsHeaderRequestID)) + if requestID == "" { + requestID = newLFSUUID() + } + w.Header().Set(lfsHeaderRequestID, requestID) + if m.httpAPIKey != "" && !m.lfsValidateHTTPAPIKey(r) { + m.lfsWriteHTTPError(w, requestID, "", http.StatusUnauthorized, "unauthorized", "unauthorized") + return + } + if !m.isS3Healthy() { + m.lfsWriteHTTPError(w, requestID, "", http.StatusServiceUnavailable, "proxy_not_ready", "proxy not ready") + return + } + + path := strings.TrimPrefix(r.URL.Path, "/lfs/uploads/") + parts := strings.Split(strings.Trim(path, "/"), "/") + if len(parts) == 0 || parts[0] == "" { + m.lfsWriteHTTPError(w, requestID, "", http.StatusNotFound, "not_found", "not found") + return + } + uploadID := parts[0] + + switch { + case len(parts) == 1 && r.Method == http.MethodDelete: + m.handleHTTPUploadAbort(w, r, requestID, uploadID) + return + case len(parts) == 2 && parts[1] == "complete" && r.Method == http.MethodPost: + m.handleHTTPUploadComplete(w, r, requestID, uploadID) + return + case len(parts) == 3 && parts[1] == "parts" && r.Method == http.MethodPut: + partNum, err := strconv.ParseInt(parts[2], 10, 32) + if err != nil || partNum <= 0 || partNum > math.MaxInt32 { + m.lfsWriteHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_part", "invalid part number") + return + } + m.handleHTTPUploadPart(w, r, requestID, uploadID, int32(partNum)) + return + default: + m.lfsWriteHTTPError(w, requestID, "", http.StatusNotFound, "not_found", "not found") + return + } +} + +func (m *lfsModule) handleHTTPUploadPart(w http.ResponseWriter, r *http.Request, requestID, sessionID string, partNumber int32) { + session, ok := m.lfsGetUploadSession(sessionID) + if !ok { + m.lfsWriteHTTPError(w, requestID, "", http.StatusNotFound, "upload_not_found", "upload session not found") + return + } + + session.mu.Lock() + defer session.mu.Unlock() + if time.Now().UTC().After(session.ExpiresAt) { + m.lfsDeleteUploadSession(sessionID) + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusGone, "upload_expired", "upload session expired") + return + } + + if etag, exists := session.Parts[partNumber]; exists { + _, _ = io.Copy(io.Discard, r.Body) + m.logger.Info("http chunked upload part already received", "requestId", requestID, "uploadId", sessionID, "part", partNumber, "etag", etag) + resp := lfsUploadPartResponse{UploadID: sessionID, PartNumber: partNumber, ETag: etag} + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(resp) + return + } + + if partNumber != session.NextPart { + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusConflict, "out_of_order", "part out of order") + return + } + + limit := session.PartSize + 1 + body, err := io.ReadAll(io.LimitReader(r.Body, limit)) + if err != nil { + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", err.Error()) + return + } + if int64(len(body)) == 0 { + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "empty part") + return + } + if int64(len(body)) > session.PartSize { + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "part too large") + return + } + if session.TotalUploaded+int64(len(body)) > session.SizeBytes { + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "part exceeds declared size") + return + } + if session.TotalUploaded+int64(len(body)) < session.SizeBytes && int64(len(body)) < minMultipartChunkSize { + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "part too small") + return + } + + if _, err := session.sha256Hasher.Write(body); err != nil { + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "hash_error", err.Error()) + return + } + if session.checksumHasher != nil && session.checksumHasher != session.sha256Hasher { + if _, err := session.checksumHasher.Write(body); err != nil { + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "hash_error", err.Error()) + return + } + } + + etag, err := m.s3Uploader.UploadPart(r.Context(), session.S3Key, session.UploadID, partNumber, body) + if err != nil { + m.metrics.IncS3Errors() + m.tracker.EmitUploadFailed(requestID, session.Topic, session.S3Key, "s3_upload_failed", err.Error(), "upload_part", session.TotalUploaded, 0) + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadGateway, "s3_upload_failed", err.Error()) + return + } + m.logger.Info("http chunked upload part stored", "requestId", requestID, "uploadId", sessionID, "part", partNumber, "etag", etag, "bytes", len(body)) + + session.Parts[partNumber] = etag + session.PartSizes[partNumber] = int64(len(body)) + session.TotalUploaded += int64(len(body)) + session.NextPart++ + + resp := lfsUploadPartResponse{UploadID: sessionID, PartNumber: partNumber, ETag: etag} + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(resp) +} + +func (m *lfsModule) handleHTTPUploadComplete(w http.ResponseWriter, r *http.Request, requestID, sessionID string) { + session, ok := m.lfsGetUploadSession(sessionID) + if !ok { + m.lfsWriteHTTPError(w, requestID, "", http.StatusNotFound, "upload_not_found", "upload session not found") + return + } + + session.mu.Lock() + defer session.mu.Unlock() + if time.Now().UTC().After(session.ExpiresAt) { + m.lfsDeleteUploadSession(sessionID) + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusGone, "upload_expired", "upload session expired") + return + } + if session.TotalUploaded != session.SizeBytes { + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "incomplete_upload", "not all bytes uploaded") + return + } + + var req lfsUploadCompleteRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_request", "invalid JSON body") + return + } + if len(req.Parts) == 0 { + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_request", "parts required") + return + } + + completed := make([]types.CompletedPart, 0, len(req.Parts)) + for _, part := range req.Parts { + etag, ok := session.Parts[part.PartNumber] + if !ok || etag == "" || part.ETag == "" || etag != part.ETag { + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "part etag mismatch") + return + } + completed = append(completed, types.CompletedPart{ + ETag: aws.String(part.ETag), + PartNumber: aws.Int32(part.PartNumber), + }) + } + + if err := m.s3Uploader.CompleteMultipartUpload(r.Context(), session.S3Key, session.UploadID, completed); err != nil { + m.metrics.IncS3Errors() + m.tracker.EmitUploadFailed(requestID, session.Topic, session.S3Key, "s3_upload_failed", err.Error(), "upload_complete", session.TotalUploaded, 0) + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadGateway, "s3_upload_failed", err.Error()) + return + } + m.logger.Info("http chunked upload completed", "requestId", requestID, "uploadId", sessionID, "parts", len(completed), "bytes", session.TotalUploaded) + + shaHex := hex.EncodeToString(session.sha256Hasher.Sum(nil)) + checksum := "" + if session.ChecksumAlg != lfs.ChecksumNone { + if session.ChecksumAlg == lfs.ChecksumSHA256 { + checksum = shaHex + } else if session.checksumHasher != nil { + checksum = hex.EncodeToString(session.checksumHasher.Sum(nil)) + } + } + if session.Checksum != "" && checksum != "" && !strings.EqualFold(session.Checksum, checksum) { + _ = m.s3Uploader.AbortMultipartUpload(r.Context(), session.S3Key, session.UploadID) + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "checksum_mismatch", "checksum mismatch") + return + } + + env := lfs.Envelope{ + Version: 1, + Bucket: m.s3Bucket, + Key: session.S3Key, + Size: session.TotalUploaded, + SHA256: shaHex, + Checksum: checksum, + ChecksumAlg: string(session.ChecksumAlg), + ContentType: session.ContentType, + CreatedAt: time.Now().UTC().Format(time.RFC3339), + ProxyID: m.proxyID, + } + encoded, err := lfs.EncodeEnvelope(env) + if err != nil { + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusInternalServerError, "encode_failed", err.Error()) + return + } + + record := kmsg.Record{ + TimestampDelta64: 0, + OffsetDelta: 0, + Key: session.KeyBytes, + Value: encoded, + } + batchBytes := lfsBuildRecordBatch([]kmsg.Record{record}) + + produceReq := &kmsg.ProduceRequest{ + Acks: 1, + TimeoutMillis: 15000, + Topics: []kmsg.ProduceRequestTopic{{ + Topic: session.Topic, + Partitions: []kmsg.ProduceRequestTopicPartition{{ + Partition: session.Partition, + Records: batchBytes, + }}, + }}, + } + + correlationID := int32(atomic.AddUint32(&m.corrID, 1)) + reqHeader := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: correlationID} + payload, err := lfsEncodeProduceRequest(reqHeader, produceReq) + if err != nil { + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusInternalServerError, "encode_failed", err.Error()) + return + } + + backendConn, _, err := m.connectBackend(r.Context()) + if err != nil { + m.trackOrphans([]orphanInfo{{Topic: session.Topic, Key: session.S3Key, RequestID: requestID, Reason: "kafka_produce_failed"}}) + m.tracker.EmitUploadFailed(requestID, session.Topic, session.S3Key, "backend_unavailable", err.Error(), "kafka_produce", session.TotalUploaded, 0) + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusServiceUnavailable, "backend_unavailable", err.Error()) + return + } + defer func() { _ = backendConn.Close() }() + + if _, err := m.forwardToBackend(r.Context(), backendConn, payload); err != nil { + m.trackOrphans([]orphanInfo{{Topic: session.Topic, Key: session.S3Key, RequestID: requestID, Reason: "kafka_produce_failed"}}) + m.tracker.EmitUploadFailed(requestID, session.Topic, session.S3Key, "backend_error", err.Error(), "kafka_produce", session.TotalUploaded, 0) + m.lfsWriteHTTPError(w, requestID, session.Topic, http.StatusBadGateway, "backend_error", err.Error()) + return + } + + m.metrics.IncRequests(session.Topic, "ok", "lfs") + m.metrics.AddUploadBytes(session.TotalUploaded) + m.tracker.EmitUploadCompleted(requestID, session.Topic, session.Partition, 0, m.s3Bucket, session.S3Key, session.TotalUploaded, shaHex, checksum, string(session.ChecksumAlg), session.ContentType, 0) + + m.lfsDeleteUploadSession(sessionID) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(env) +} + +func (m *lfsModule) handleHTTPUploadAbort(w http.ResponseWriter, r *http.Request, requestID, sessionID string) { + session, ok := m.lfsGetUploadSession(sessionID) + if !ok { + m.lfsWriteHTTPError(w, requestID, "", http.StatusNotFound, "upload_not_found", "upload session not found") + return + } + session.mu.Lock() + defer session.mu.Unlock() + _ = m.s3Uploader.AbortMultipartUpload(r.Context(), session.S3Key, session.UploadID) + m.lfsDeleteUploadSession(sessionID) + w.WriteHeader(http.StatusNoContent) +} + +func (m *lfsModule) lfsStoreUploadSession(session *uploadSession) { + if session == nil { + return + } + m.uploadMu.Lock() + defer m.uploadMu.Unlock() + m.lfsCleanupUploadSessionsLocked() + m.uploadSessions[session.ID] = session +} + +func (m *lfsModule) lfsGetUploadSession(id string) (*uploadSession, bool) { + m.uploadMu.Lock() + defer m.uploadMu.Unlock() + m.lfsCleanupUploadSessionsLocked() + session, ok := m.uploadSessions[id] + return session, ok +} + +func (m *lfsModule) lfsDeleteUploadSession(id string) { + m.uploadMu.Lock() + defer m.uploadMu.Unlock() + delete(m.uploadSessions, id) +} + +func (m *lfsModule) lfsCleanupUploadSessionsLocked() { + now := time.Now().UTC() + for id, session := range m.uploadSessions { + if session.ExpiresAt.Before(now) { + delete(m.uploadSessions, id) + } + } +} + +func lfsStatusForUploadError(err error) (int, string) { + msg := err.Error() + switch { + case strings.Contains(msg, "exceeds max"): + return http.StatusBadRequest, "payload_too_large" + case strings.Contains(msg, "empty upload"): + return http.StatusBadRequest, "empty_upload" + case strings.Contains(msg, "s3 key required"): + return http.StatusBadRequest, "invalid_key" + case strings.Contains(msg, "reader required"): + return http.StatusBadRequest, "invalid_reader" + default: + return http.StatusBadGateway, "s3_upload_failed" + } +} + +func (m *lfsModule) lfsWriteHTTPError(w http.ResponseWriter, requestID, topic string, status int, code, message string) { + if topic != "" { + m.logger.Warn("lfs http failed", "status", status, "code", code, "requestId", requestID, "topic", topic, "error", message) + } else { + m.logger.Warn("lfs http failed", "status", status, "code", code, "requestId", requestID, "error", message) + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _ = json.NewEncoder(w).Encode(lfsErrorResponse{ + Code: code, + Message: message, + RequestID: requestID, + }) +} + +func (m *lfsModule) lfsValidateHTTPAPIKey(r *http.Request) bool { + if r == nil { + return false + } + key := strings.TrimSpace(r.Header.Get("X-API-Key")) + if key == "" { + auth := strings.TrimSpace(r.Header.Get("Authorization")) + if strings.HasPrefix(strings.ToLower(auth), "bearer ") { + key = strings.TrimSpace(auth[len("bearer "):]) + } + } + if key == "" { + return false + } + return subtle.ConstantTimeCompare([]byte(key), []byte(m.httpAPIKey)) == 1 +} + +func (m *lfsModule) lfsValidateObjectKey(key string) error { + if strings.HasPrefix(key, "/") { + return errors.New("key must be relative") + } + if strings.Contains(key, "..") { + return errors.New("key must not contain '..'") + } + ns := strings.TrimSpace(m.s3Namespace) + if ns != "" && !strings.HasPrefix(key, ns+"/") { + return errors.New("key outside namespace") + } + if !strings.Contains(key, "/lfs/") { + return errors.New("key must include /lfs/ segment") + } + return nil +} + +func (m *lfsModule) lfsIsValidTopicName(topic string) bool { + if len(topic) == 0 || len(topic) > m.topicMaxLength { + return false + } + return lfsValidTopicPattern.MatchString(topic) +} + +func lfsGetClientIP(r *http.Request) string { + if xff := r.Header.Get("X-Forwarded-For"); xff != "" { + if idx := strings.Index(xff, ","); idx > 0 { + return strings.TrimSpace(xff[:idx]) + } + return strings.TrimSpace(xff) + } + if xri := r.Header.Get("X-Real-IP"); xri != "" { + return strings.TrimSpace(xri) + } + host, _, found := strings.Cut(r.RemoteAddr, ":") + if found { + return host + } + return r.RemoteAddr +} diff --git a/cmd/proxy/lfs_http_tls.go b/cmd/proxy/lfs_http_tls.go new file mode 100644 index 00000000..f05085c2 --- /dev/null +++ b/cmd/proxy/lfs_http_tls.go @@ -0,0 +1,59 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "crypto/tls" + "crypto/x509" + "errors" + "os" + "strings" +) + +func lfsBuildHTTPServerTLSConfig() (*tls.Config, string, string, error) { + enabled := lfsEnvBoolDefault("KAFSCALE_LFS_PROXY_HTTP_TLS_ENABLED", false) + if !enabled { + return nil, "", "", nil + } + certFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HTTP_TLS_CERT_FILE")) + keyFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HTTP_TLS_KEY_FILE")) + clientCA := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HTTP_TLS_CLIENT_CA_FILE")) + requireClient := lfsEnvBoolDefault("KAFSCALE_LFS_PROXY_HTTP_TLS_REQUIRE_CLIENT_CERT", false) + + if certFile == "" || keyFile == "" { + return nil, "", "", errors.New("http TLS cert and key must be set when enabled") + } + + cfg := &tls.Config{MinVersion: tls.VersionTLS12} + if clientCA != "" { + caPEM, err := os.ReadFile(clientCA) + if err != nil { + return nil, "", "", err + } + pool := x509.NewCertPool() + if !pool.AppendCertsFromPEM(caPEM) { + return nil, "", "", errors.New("failed to parse http TLS client CA file") + } + cfg.ClientCAs = pool + if requireClient { + cfg.ClientAuth = tls.RequireAndVerifyClientCert + } else { + cfg.ClientAuth = tls.VerifyClientCertIfGiven + } + } + + return cfg, certFile, keyFile, nil +} diff --git a/cmd/proxy/lfs_metrics.go b/cmd/proxy/lfs_metrics.go new file mode 100644 index 00000000..a24c3030 --- /dev/null +++ b/cmd/proxy/lfs_metrics.go @@ -0,0 +1,221 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "io" + "runtime" + "sort" + "sync" + "sync/atomic" +) + +type lfsMetrics struct { + uploadDuration *histogram + uploadBytes uint64 + s3Errors uint64 + orphans uint64 + mu sync.Mutex + requests map[string]*topicCounters +} + +func newLfsMetrics() *lfsMetrics { + buckets := []float64{0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30} + return &lfsMetrics{ + uploadDuration: newHistogram(buckets), + requests: make(map[string]*topicCounters), + } +} + +func (m *lfsMetrics) ObserveUploadDuration(seconds float64) { + if m == nil || m.uploadDuration == nil { + return + } + m.uploadDuration.Observe(seconds) +} + +func (m *lfsMetrics) AddUploadBytes(n int64) { + if m == nil || n <= 0 { + return + } + atomic.AddUint64(&m.uploadBytes, uint64(n)) +} + +func (m *lfsMetrics) IncRequests(topic, status, typ string) { + if m == nil { + return + } + if topic == "" { + topic = "unknown" + } + m.mu.Lock() + counters := m.requests[topic] + if counters == nil { + counters = &topicCounters{} + m.requests[topic] = counters + } + m.mu.Unlock() + switch { + case status == "ok" && typ == "lfs": + atomic.AddUint64(&counters.okLfs, 1) + case status == "error" && typ == "lfs": + atomic.AddUint64(&counters.errLfs, 1) + case status == "ok" && typ == "passthrough": + atomic.AddUint64(&counters.okPas, 1) + case status == "error" && typ == "passthrough": + atomic.AddUint64(&counters.errPas, 1) + } +} + +func (m *lfsMetrics) IncS3Errors() { + if m == nil { + return + } + atomic.AddUint64(&m.s3Errors, 1) +} + +func (m *lfsMetrics) IncOrphans(count int) { + if m == nil || count <= 0 { + return + } + atomic.AddUint64(&m.orphans, uint64(count)) +} + +func (m *lfsMetrics) WritePrometheus(w io.Writer) { + if m == nil { + return + } + m.uploadDuration.WritePrometheus(w, "kafscale_lfs_proxy_upload_duration_seconds", "LFS proxy upload durations in seconds") + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_upload_bytes_total Total bytes uploaded via LFS\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_upload_bytes_total counter\n") + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_upload_bytes_total %d\n", atomic.LoadUint64(&m.uploadBytes)) + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_requests_total LFS proxy requests\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_requests_total counter\n") + topics := m.snapshotTopics() + for _, topic := range topics { + counters := m.requests[topic] + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_requests_total{topic=\"%s\",status=\"ok\",type=\"lfs\"} %d\n", topic, atomic.LoadUint64(&counters.okLfs)) + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_requests_total{topic=\"%s\",status=\"error\",type=\"lfs\"} %d\n", topic, atomic.LoadUint64(&counters.errLfs)) + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_requests_total{topic=\"%s\",status=\"ok\",type=\"passthrough\"} %d\n", topic, atomic.LoadUint64(&counters.okPas)) + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_requests_total{topic=\"%s\",status=\"error\",type=\"passthrough\"} %d\n", topic, atomic.LoadUint64(&counters.errPas)) + } + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_s3_errors_total Total S3 errors\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_s3_errors_total counter\n") + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_s3_errors_total %d\n", atomic.LoadUint64(&m.s3Errors)) + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_orphan_objects_total LFS objects uploaded but not committed to Kafka\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_orphan_objects_total counter\n") + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_orphan_objects_total %d\n", atomic.LoadUint64(&m.orphans)) + + // Runtime metrics + var memStats runtime.MemStats + runtime.ReadMemStats(&memStats) + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_goroutines Number of goroutines\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_goroutines gauge\n") + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_goroutines %d\n", runtime.NumGoroutine()) + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_memory_alloc_bytes Bytes allocated and in use\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_memory_alloc_bytes gauge\n") + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_memory_alloc_bytes %d\n", memStats.Alloc) + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_memory_sys_bytes Bytes obtained from system\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_memory_sys_bytes gauge\n") + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_memory_sys_bytes %d\n", memStats.Sys) + _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_gc_pause_total_ns Total GC pause time in nanoseconds\n") + _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_gc_pause_total_ns counter\n") + _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_gc_pause_total_ns %d\n", memStats.PauseTotalNs) +} + +func (m *lfsMetrics) snapshotTopics() []string { + m.mu.Lock() + defer m.mu.Unlock() + out := make([]string, 0, len(m.requests)) + for topic := range m.requests { + out = append(out, topic) + } + sort.Strings(out) + return out +} + +type topicCounters struct { + okLfs uint64 + errLfs uint64 + okPas uint64 + errPas uint64 +} + +type histogram struct { + mu sync.Mutex + buckets []float64 + counts []int64 + sum float64 + count int64 +} + +func newHistogram(buckets []float64) *histogram { + if len(buckets) == 0 { + buckets = []float64{1, 2, 5, 10, 25, 50, 100} + } + cp := append([]float64(nil), buckets...) + sort.Float64s(cp) + return &histogram{ + buckets: cp, + counts: make([]int64, len(cp)+1), + } +} + +func (h *histogram) Observe(value float64) { + if h == nil { + return + } + h.mu.Lock() + defer h.mu.Unlock() + h.sum += value + h.count++ + idx := sort.SearchFloat64s(h.buckets, value) + h.counts[idx]++ +} + +func (h *histogram) Snapshot() ([]float64, []int64, float64, int64) { + if h == nil { + return nil, nil, 0, 0 + } + h.mu.Lock() + defer h.mu.Unlock() + buckets := append([]float64(nil), h.buckets...) + counts := append([]int64(nil), h.counts...) + return buckets, counts, h.sum, h.count +} + +func (h *histogram) WritePrometheus(w io.Writer, name, help string) { + if h == nil { + return + } + buckets, counts, sum, count := h.Snapshot() + _, _ = fmt.Fprintf(w, "# HELP %s %s\n", name, help) + _, _ = fmt.Fprintf(w, "# TYPE %s histogram\n", name) + var cumulative int64 + for i, upper := range buckets { + cumulative += counts[i] + _, _ = fmt.Fprintf(w, "%s_bucket{le=%q} %d\n", name, formatFloat(upper), cumulative) + } + cumulative += counts[len(counts)-1] + _, _ = fmt.Fprintf(w, "%s_bucket{le=\"+Inf\"} %d\n", name, cumulative) + _, _ = fmt.Fprintf(w, "%s_sum %f\n", name, sum) + _, _ = fmt.Fprintf(w, "%s_count %d\n", name, count) +} + +func formatFloat(val float64) string { + return fmt.Sprintf("%g", val) +} diff --git a/cmd/proxy/lfs_record.go b/cmd/proxy/lfs_record.go new file mode 100644 index 00000000..7d6064b8 --- /dev/null +++ b/cmd/proxy/lfs_record.go @@ -0,0 +1,113 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "encoding/binary" + "hash/crc32" + + "github.com/twmb/franz-go/pkg/kmsg" +) + +func lfsEncodeRecords(records []kmsg.Record) []byte { + if len(records) == 0 { + return nil + } + out := make([]byte, 0, 256) + for _, record := range records { + out = append(out, lfsEncodeRecord(record)...) + } + return out +} + +func lfsEncodeRecord(record kmsg.Record) []byte { + body := make([]byte, 0, 128) + body = append(body, byte(record.Attributes)) + body = lfsAppendVarlong(body, record.TimestampDelta64) + body = lfsAppendVarint(body, record.OffsetDelta) + body = lfsAppendVarintBytes(body, record.Key) + body = lfsAppendVarintBytes(body, record.Value) + body = lfsAppendVarint(body, int32(len(record.Headers))) + for _, header := range record.Headers { + body = lfsAppendVarintString(body, header.Key) + body = lfsAppendVarintBytes(body, header.Value) + } + + cap64 := int64(len(body)) + int64(binary.MaxVarintLen32) + out := make([]byte, 0, cap64) + out = lfsAppendVarint(out, int32(len(body))) + out = append(out, body...) + return out +} + +func lfsAppendVarint(dst []byte, v int32) []byte { + var tmp [binary.MaxVarintLen32]byte + n := binary.PutVarint(tmp[:], int64(v)) + return append(dst, tmp[:n]...) +} + +func lfsAppendVarlong(dst []byte, v int64) []byte { + var tmp [binary.MaxVarintLen64]byte + n := binary.PutVarint(tmp[:], v) + return append(dst, tmp[:n]...) +} + +func lfsAppendVarintBytes(dst []byte, b []byte) []byte { + if b == nil { + dst = lfsAppendVarint(dst, -1) + return dst + } + dst = lfsAppendVarint(dst, int32(len(b))) + return append(dst, b...) +} + +func lfsAppendVarintString(dst []byte, s string) []byte { + dst = lfsAppendVarint(dst, int32(len(s))) + return append(dst, s...) +} + +func lfsVarint(buf []byte) (int32, int) { + val, n := binary.Varint(buf) + if n <= 0 { + return 0, 0 + } + return int32(val), n +} + +// lfsBuildRecordBatch constructs a full RecordBatch from records. +// Used by the HTTP API produce path. +func lfsBuildRecordBatch(records []kmsg.Record) []byte { + encoded := lfsEncodeRecords(records) + batch := kmsg.RecordBatch{ + FirstOffset: 0, + PartitionLeaderEpoch: -1, + Magic: 2, + Attributes: 0, + LastOffsetDelta: int32(len(records) - 1), + FirstTimestamp: 0, + MaxTimestamp: 0, + ProducerID: -1, + ProducerEpoch: -1, + FirstSequence: 0, + NumRecords: int32(len(records)), + Records: encoded, + } + batchBytes := batch.AppendTo(nil) + batch.Length = int32(len(batchBytes) - 12) + batchBytes = batch.AppendTo(nil) + batch.CRC = int32(crc32.Checksum(batchBytes[21:], lfsCRC32cTable)) + return batch.AppendTo(nil) +} diff --git a/cmd/proxy/lfs_rewrite.go b/cmd/proxy/lfs_rewrite.go new file mode 100644 index 00000000..bd9c27f3 --- /dev/null +++ b/cmd/proxy/lfs_rewrite.go @@ -0,0 +1,358 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "context" + "errors" + "fmt" + "hash/crc32" + "log/slog" + "strings" + "time" + + "github.com/KafScale/platform/pkg/lfs" + "github.com/KafScale/platform/pkg/protocol" + "github.com/twmb/franz-go/pkg/kgo" + "github.com/twmb/franz-go/pkg/kmsg" +) + +type lfsRecordBatch struct { + kmsg.RecordBatch + Raw []byte +} + +type lfsRewriteResult struct { + modified bool + uploadBytes int64 + topics map[string]struct{} + orphans []orphanInfo + duration float64 +} + +type orphanInfo struct { + Topic string + Key string + RequestID string + Reason string +} + +var lfsCRC32cTable = crc32.MakeTable(crc32.Castagnoli) + +// safeHeaderAllowlist defines headers safe to include in the LFS envelope. +var lfsSafeHeaderAllowlist = map[string]bool{ + "content-type": true, + "content-encoding": true, + "correlation-id": true, + "message-id": true, + "x-correlation-id": true, + "x-request-id": true, + "traceparent": true, + "tracestate": true, +} + +// rewriteProduceRecords scans all records in a ProduceRequest for LFS_BLOB +// headers. For each such record, the payload is uploaded to S3 and the record +// value is replaced with an LFS envelope JSON. Batches are re-encoded in-place +// so the caller can pass the modified ProduceRequest to the existing fan-out. +func (m *lfsModule) rewriteProduceRecords(ctx context.Context, header *protocol.RequestHeader, req *protocol.ProduceRequest) (lfsRewriteResult, error) { + if m.logger == nil { + m.logger = slog.Default() + } + if req == nil { + return lfsRewriteResult{}, errors.New("nil produce request") + } + + start := time.Now() + modified := false + uploadBytes := int64(0) + decompressor := kgo.DefaultDecompressor() + topics := make(map[string]struct{}) + orphans := make([]orphanInfo, 0, 4) + + for ti := range req.Topics { + topic := &req.Topics[ti] + for pi := range topic.Partitions { + partition := &topic.Partitions[pi] + if len(partition.Records) == 0 { + continue + } + batches, err := lfsDecodeRecordBatches(partition.Records) + if err != nil { + return lfsRewriteResult{}, err + } + batchModified := false + for bi := range batches { + batch := &batches[bi] + records, codec, err := lfsDecodeBatchRecords(batch, decompressor) + if err != nil { + return lfsRewriteResult{}, err + } + if len(records) == 0 { + continue + } + recordChanged := false + for ri := range records { + rec := &records[ri] + headers := rec.Headers + lfsValue, ok := lfsFindHeaderValue(headers, "LFS_BLOB") + if !ok { + continue + } + recordChanged = true + modified = true + topics[topic.Topic] = struct{}{} + checksumHeader := strings.TrimSpace(string(lfsValue)) + algHeader, _ := lfsFindHeaderValue(headers, "LFS_BLOB_ALG") + alg, err := m.resolveChecksumAlg(string(algHeader)) + if err != nil { + return lfsRewriteResult{}, err + } + if checksumHeader != "" && alg == lfs.ChecksumNone { + return lfsRewriteResult{}, errors.New("checksum provided but checksum algorithm is none") + } + payload := rec.Value + m.logger.Info("LFS blob detected", "topic", topic.Topic, "size", len(payload)) + if int64(len(payload)) > m.maxBlob { + m.logger.Error("blob exceeds max size", "size", len(payload), "max", m.maxBlob) + return lfsRewriteResult{}, fmt.Errorf("blob size %d exceeds max %d", len(payload), m.maxBlob) + } + key := m.buildObjectKey(topic.Topic) + sha256Hex, checksum, checksumAlg, err := m.s3Uploader.Upload(ctx, key, payload, alg) + if err != nil { + m.metrics.IncS3Errors() + return lfsRewriteResult{}, err + } + if checksumHeader != "" && checksum != "" && !strings.EqualFold(checksumHeader, checksum) { + if err := m.s3Uploader.DeleteObject(ctx, key); err != nil { + m.trackOrphans([]orphanInfo{{Topic: topic.Topic, Key: key, RequestID: "", Reason: "checksum_mismatch_delete_failed"}}) + return lfsRewriteResult{}, fmt.Errorf("checksum mismatch; delete failed: %w", err) + } + return lfsRewriteResult{}, &lfs.ChecksumError{Expected: checksumHeader, Actual: checksum} + } + env := lfs.Envelope{ + Version: 1, + Bucket: m.s3Bucket, + Key: key, + Size: int64(len(payload)), + SHA256: sha256Hex, + Checksum: checksum, + ChecksumAlg: checksumAlg, + ContentType: lfsHeaderValue(headers, "content-type"), + OriginalHeaders: lfsHeadersToMap(headers), + CreatedAt: time.Now().UTC().Format(time.RFC3339), + ProxyID: m.proxyID, + } + encoded, err := lfs.EncodeEnvelope(env) + if err != nil { + return lfsRewriteResult{}, err + } + rec.Value = encoded + rec.Headers = lfsDropHeader(headers, "LFS_BLOB") + uploadBytes += int64(len(payload)) + orphans = append(orphans, orphanInfo{Topic: topic.Topic, Key: key, RequestID: "", Reason: "kafka_produce_failed"}) + } + if !recordChanged { + continue + } + newRecords := lfsEncodeRecords(records) + compressedRecords, usedCodec, err := lfsCompressRecords(codec, newRecords) + if err != nil { + return lfsRewriteResult{}, err + } + batch.Records = compressedRecords + batch.NumRecords = int32(len(records)) + batch.Attributes = (batch.Attributes &^ 0x0007) | int16(usedCodec) + batch.Length = 0 + batch.CRC = 0 + batchBytes := batch.AppendTo(nil) + batch.Length = int32(len(batchBytes) - 12) + batchBytes = batch.AppendTo(nil) + batch.CRC = int32(crc32.Checksum(batchBytes[21:], lfsCRC32cTable)) + batchBytes = batch.AppendTo(nil) + batch.Raw = batchBytes + batchModified = true + } + if !batchModified { + continue + } + partition.Records = lfsJoinRecordBatches(batches) + } + } + if !modified { + return lfsRewriteResult{modified: false}, nil + } + + // Records have been modified in-place on the parsed ProduceRequest. + // The caller sets payload=nil which forces the proxy's fan-out to + // re-encode via protocol.EncodeProduceRequest(). + return lfsRewriteResult{ + modified: true, + uploadBytes: uploadBytes, + topics: topics, + orphans: orphans, + duration: time.Since(start).Seconds(), + }, nil +} + +func lfsDecodeRecordBatches(records []byte) ([]lfsRecordBatch, error) { + out := make([]lfsRecordBatch, 0, 4) + buf := records + for len(buf) > 0 { + if len(buf) < 12 { + return nil, fmt.Errorf("record batch too short: %d", len(buf)) + } + length := int(lfsInt32FromBytes(buf[8:12])) + total := 12 + length + if length < 0 || len(buf) < total { + return nil, fmt.Errorf("invalid record batch length %d", length) + } + batchBytes := buf[:total] + var batch kmsg.RecordBatch + if err := batch.ReadFrom(batchBytes); err != nil { + return nil, err + } + out = append(out, lfsRecordBatch{RecordBatch: batch, Raw: batchBytes}) + buf = buf[total:] + } + return out, nil +} + +func lfsJoinRecordBatches(batches []lfsRecordBatch) []byte { + if len(batches) == 0 { + return nil + } + size := 0 + for _, batch := range batches { + size += len(batch.Raw) + } + out := make([]byte, 0, size) + for _, batch := range batches { + out = append(out, batch.Raw...) + } + return out +} + +func lfsDecodeBatchRecords(batch *lfsRecordBatch, decompressor kgo.Decompressor) ([]kmsg.Record, kgo.CompressionCodecType, error) { + codec := kgo.CompressionCodecType(batch.Attributes & 0x0007) + rawRecords := batch.Records + if codec != kgo.CodecNone { + var err error + rawRecords, err = decompressor.Decompress(rawRecords, codec) + if err != nil { + return nil, codec, err + } + } + numRecords := int(batch.NumRecords) + records := make([]kmsg.Record, numRecords) + records = lfsReadRawRecordsInto(records, rawRecords) + return records, codec, nil +} + +func lfsReadRawRecordsInto(rs []kmsg.Record, in []byte) []kmsg.Record { + for i := range rs { + length, used := lfsVarint(in) + total := used + int(length) + if used == 0 || length < 0 || len(in) < total { + return rs[:i] + } + if err := (&rs[i]).ReadFrom(in[:total]); err != nil { + rs[i] = kmsg.Record{} + return rs[:i] + } + in = in[total:] + } + return rs +} + +func lfsCompressRecords(codec kgo.CompressionCodecType, raw []byte) ([]byte, kgo.CompressionCodecType, error) { + if codec == kgo.CodecNone { + return raw, kgo.CodecNone, nil + } + var comp kgo.Compressor + var err error + switch codec { + case kgo.CodecGzip: + comp, err = kgo.DefaultCompressor(kgo.GzipCompression()) + case kgo.CodecSnappy: + comp, err = kgo.DefaultCompressor(kgo.SnappyCompression()) + case kgo.CodecLz4: + comp, err = kgo.DefaultCompressor(kgo.Lz4Compression()) + case kgo.CodecZstd: + comp, err = kgo.DefaultCompressor(kgo.ZstdCompression()) + default: + return raw, kgo.CodecNone, nil + } + if err != nil || comp == nil { + return raw, kgo.CodecNone, err + } + out, usedCodec := comp.Compress(bytes.NewBuffer(nil), raw) + return out, usedCodec, nil +} + +func lfsFindHeaderValue(headers []kmsg.Header, key string) ([]byte, bool) { + for _, header := range headers { + if header.Key == key { + return header.Value, true + } + } + return nil, false +} + +func lfsHeaderValue(headers []kmsg.Header, key string) string { + for _, header := range headers { + if header.Key == key { + return string(header.Value) + } + } + return "" +} + +func lfsHeadersToMap(headers []kmsg.Header) map[string]string { + if len(headers) == 0 { + return nil + } + out := make(map[string]string) + for _, header := range headers { + key := strings.ToLower(header.Key) + if lfsSafeHeaderAllowlist[key] { + out[header.Key] = string(header.Value) + } + } + if len(out) == 0 { + return nil + } + return out +} + +func lfsDropHeader(headers []kmsg.Header, key string) []kmsg.Header { + if len(headers) == 0 { + return headers + } + out := make([]kmsg.Header, 0, len(headers)) + for _, header := range headers { + if header.Key == key { + continue + } + out = append(out, header) + } + return out +} + +func lfsInt32FromBytes(b []byte) int32 { + return int32(uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3])) +} diff --git a/cmd/proxy/lfs_s3.go b/cmd/proxy/lfs_s3.go new file mode 100644 index 00000000..9bd7a7a2 --- /dev/null +++ b/cmd/proxy/lfs_s3.go @@ -0,0 +1,582 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "io" + "time" + + "github.com/KafScale/platform/pkg/lfs" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/aws/signer/v4" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/aws/smithy-go" +) + +const minMultipartChunkSize int64 = 5 * 1024 * 1024 + +type s3Config struct { + Bucket string + Region string + Endpoint string + PublicEndpoint string + AccessKeyID string + SecretAccessKey string + SessionToken string + ForcePathStyle bool + ChunkSize int64 +} + +type s3API interface { + CreateMultipartUpload(ctx context.Context, params *s3.CreateMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CreateMultipartUploadOutput, error) + UploadPart(ctx context.Context, params *s3.UploadPartInput, optFns ...func(*s3.Options)) (*s3.UploadPartOutput, error) + CompleteMultipartUpload(ctx context.Context, params *s3.CompleteMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CompleteMultipartUploadOutput, error) + AbortMultipartUpload(ctx context.Context, params *s3.AbortMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.AbortMultipartUploadOutput, error) + PutObject(ctx context.Context, params *s3.PutObjectInput, optFns ...func(*s3.Options)) (*s3.PutObjectOutput, error) + GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) + DeleteObject(ctx context.Context, params *s3.DeleteObjectInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectOutput, error) + HeadBucket(ctx context.Context, params *s3.HeadBucketInput, optFns ...func(*s3.Options)) (*s3.HeadBucketOutput, error) + CreateBucket(ctx context.Context, params *s3.CreateBucketInput, optFns ...func(*s3.Options)) (*s3.CreateBucketOutput, error) +} + +type s3PresignAPI interface { + PresignGetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.PresignOptions)) (*v4.PresignedHTTPRequest, error) +} + +type s3Uploader struct { + bucket string + region string + chunkSize int64 + api s3API + presign s3PresignAPI +} + +func normalizeChunkSize(chunk int64) int64 { + if chunk <= 0 { + chunk = defaultLFSChunkSize + } + if chunk < minMultipartChunkSize { + chunk = minMultipartChunkSize + } + return chunk +} + +func newS3Uploader(ctx context.Context, cfg s3Config) (*s3Uploader, error) { + if cfg.Bucket == "" { + return nil, errors.New("s3 bucket required") + } + if cfg.Region == "" { + return nil, errors.New("s3 region required") + } + cfg.ChunkSize = normalizeChunkSize(cfg.ChunkSize) + + loadOpts := []func(*config.LoadOptions) error{ + config.WithRegion(cfg.Region), + } + if cfg.AccessKeyID != "" && cfg.SecretAccessKey != "" { + loadOpts = append(loadOpts, config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(cfg.AccessKeyID, cfg.SecretAccessKey, cfg.SessionToken))) + } + awsCfg, err := config.LoadDefaultConfig(ctx, loadOpts...) + if err != nil { + return nil, fmt.Errorf("load aws config: %w", err) + } + client := s3.NewFromConfig(awsCfg, func(o *s3.Options) { + if cfg.Endpoint != "" { + o.BaseEndpoint = aws.String(cfg.Endpoint) + } + o.UsePathStyle = cfg.ForcePathStyle + }) + presignEndpoint := cfg.Endpoint + if cfg.PublicEndpoint != "" { + presignEndpoint = cfg.PublicEndpoint + } + presignClient := s3.NewFromConfig(awsCfg, func(o *s3.Options) { + if presignEndpoint != "" { + o.BaseEndpoint = aws.String(presignEndpoint) + } + o.UsePathStyle = cfg.ForcePathStyle + }) + presigner := s3.NewPresignClient(presignClient) + + return &s3Uploader{ + bucket: cfg.Bucket, + region: cfg.Region, + chunkSize: cfg.ChunkSize, + api: client, + presign: presigner, + }, nil +} + +func (u *s3Uploader) PresignGetObject(ctx context.Context, key string, ttl time.Duration) (string, error) { + if key == "" { + return "", errors.New("s3 key required") + } + if u.presign == nil { + return "", errors.New("presign client not configured") + } + out, err := u.presign.PresignGetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + }, func(opts *s3.PresignOptions) { + opts.Expires = ttl + }) + if err != nil { + return "", err + } + return out.URL, nil +} + +func (u *s3Uploader) GetObject(ctx context.Context, key string) (*s3.GetObjectOutput, error) { + if key == "" { + return nil, errors.New("s3 key required") + } + return u.api.GetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + }) +} + +func (u *s3Uploader) HeadBucket(ctx context.Context) error { + _, err := u.api.HeadBucket(ctx, &s3.HeadBucketInput{Bucket: aws.String(u.bucket)}) + if err == nil { + return nil + } + return err +} + +func (u *s3Uploader) EnsureBucket(ctx context.Context) error { + if err := u.HeadBucket(ctx); err == nil { + return nil + } + input := &s3.CreateBucketInput{Bucket: aws.String(u.bucket)} + if u.region != "" && u.region != "us-east-1" { + input.CreateBucketConfiguration = &types.CreateBucketConfiguration{LocationConstraint: types.BucketLocationConstraint(u.region)} + } + _, err := u.api.CreateBucket(ctx, input) + if err != nil { + var apiErr smithy.APIError + if errors.As(err, &apiErr) { + switch apiErr.ErrorCode() { + case "BucketAlreadyOwnedByYou", "BucketAlreadyExists": + return nil + } + } + return fmt.Errorf("create bucket %s: %w", u.bucket, err) + } + return nil +} + +func (u *s3Uploader) Upload(ctx context.Context, key string, payload []byte, alg lfs.ChecksumAlg) (string, string, string, error) { + if key == "" { + return "", "", "", errors.New("s3 key required") + } + shaHasher := sha256.New() + if _, err := shaHasher.Write(payload); err != nil { + return "", "", "", err + } + shaHex := hex.EncodeToString(shaHasher.Sum(nil)) + + checksumAlg := alg + if checksumAlg == "" { + checksumAlg = lfs.ChecksumSHA256 + } + var checksum string + if checksumAlg != lfs.ChecksumNone { + if checksumAlg == lfs.ChecksumSHA256 { + checksum = shaHex + } else { + computed, err := lfs.ComputeChecksum(checksumAlg, payload) + if err != nil { + return "", "", "", err + } + checksum = computed + } + } + + size := int64(len(payload)) + if size <= u.chunkSize { + _, err := u.api.PutObject(ctx, &s3.PutObjectInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + Body: bytes.NewReader(payload), + ContentLength: aws.Int64(size), + }) + return shaHex, checksum, string(checksumAlg), err + } + return shaHex, checksum, string(checksumAlg), u.multipartUpload(ctx, key, payload) +} + +func (u *s3Uploader) UploadStream(ctx context.Context, key string, reader io.Reader, maxSize int64, alg lfs.ChecksumAlg) (string, string, string, int64, error) { + if key == "" { + return "", "", "", 0, errors.New("s3 key required") + } + if reader == nil { + return "", "", "", 0, errors.New("reader required") + } + u.chunkSize = normalizeChunkSize(u.chunkSize) + + checksumAlg := alg + if checksumAlg == "" { + checksumAlg = lfs.ChecksumSHA256 + } + + // Read first chunk to determine if we need multipart upload + firstBuf := make([]byte, u.chunkSize) + firstN, firstErr := io.ReadFull(reader, firstBuf) + if firstErr != nil && firstErr != io.EOF && firstErr != io.ErrUnexpectedEOF { + return "", "", "", 0, firstErr + } + if firstN == 0 { + return "", "", "", 0, errors.New("empty upload") + } + + firstReadHitEOF := firstErr == io.EOF || firstErr == io.ErrUnexpectedEOF + + // If data fits in one chunk and is smaller than minMultipartChunkSize, use PutObject + if firstReadHitEOF && int64(firstN) < minMultipartChunkSize { + data := firstBuf[:firstN] + shaHasher := sha256.New() + shaHasher.Write(data) + shaHex := hex.EncodeToString(shaHasher.Sum(nil)) + + checksum := "" + if checksumAlg != lfs.ChecksumNone { + if checksumAlg == lfs.ChecksumSHA256 { + checksum = shaHex + } else { + computed, err := lfs.ComputeChecksum(checksumAlg, data) + if err != nil { + return "", "", "", 0, err + } + checksum = computed + } + } + + _, err := u.api.PutObject(ctx, &s3.PutObjectInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + Body: bytes.NewReader(data), + ContentLength: aws.Int64(int64(firstN)), + }) + if err != nil { + return "", "", "", 0, fmt.Errorf("put object: %w", err) + } + return shaHex, checksum, string(checksumAlg), int64(firstN), nil + } + + // Use multipart upload for larger files + createResp, err := u.api.CreateMultipartUpload(ctx, &s3.CreateMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + }) + if err != nil { + return "", "", "", 0, fmt.Errorf("create multipart upload: %w", err) + } + uploadID := createResp.UploadId + if uploadID == nil { + return "", "", "", 0, errors.New("missing upload id") + } + + shaHasher := sha256.New() + var checksumHasher interface { + Write([]byte) (int, error) + Sum([]byte) []byte + } + if checksumAlg != lfs.ChecksumNone { + if checksumAlg == lfs.ChecksumSHA256 { + checksumHasher = shaHasher + } else { + h, err := lfs.NewChecksumHasher(checksumAlg) + if err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", 0, err + } + checksumHasher = h + } + } + parts := make([]types.CompletedPart, 0, 4) + partNum := int32(1) + var total int64 + + // Upload first chunk + total += int64(firstN) + if maxSize > 0 && total > maxSize { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, fmt.Errorf("blob size %d exceeds max %d", total, maxSize) + } + shaHasher.Write(firstBuf[:firstN]) + if checksumHasher != nil && checksumHasher != shaHasher { + _, _ = checksumHasher.Write(firstBuf[:firstN]) + } + partResp, err := u.api.UploadPart(ctx, &s3.UploadPartInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: uploadID, + PartNumber: aws.Int32(partNum), + Body: bytes.NewReader(firstBuf[:firstN]), + }) + if err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, fmt.Errorf("upload part %d: %w", partNum, err) + } + parts = append(parts, types.CompletedPart{ETag: partResp.ETag, PartNumber: aws.Int32(partNum)}) + partNum++ + + // Continue reading remaining chunks + buf := make([]byte, u.chunkSize) + for { + n, readErr := io.ReadFull(reader, buf) + if n > 0 { + total += int64(n) + if maxSize > 0 && total > maxSize { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, fmt.Errorf("blob size %d exceeds max %d", total, maxSize) + } + if _, err := shaHasher.Write(buf[:n]); err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, err + } + if checksumHasher != nil && checksumHasher != shaHasher { + if _, err := checksumHasher.Write(buf[:n]); err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, err + } + } + partResp, err := u.api.UploadPart(ctx, &s3.UploadPartInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: uploadID, + PartNumber: aws.Int32(partNum), + Body: bytes.NewReader(buf[:n]), + }) + if err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, fmt.Errorf("upload part %d: %w", partNum, err) + } + parts = append(parts, types.CompletedPart{ETag: partResp.ETag, PartNumber: aws.Int32(partNum)}) + partNum++ + } + if readErr == io.EOF { + break + } + if readErr == io.ErrUnexpectedEOF { + break + } + if readErr != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, readErr + } + } + + _, err = u.api.CompleteMultipartUpload(ctx, &s3.CompleteMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: uploadID, + MultipartUpload: &types.CompletedMultipartUpload{ + Parts: parts, + }, + }) + if err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return "", "", "", total, fmt.Errorf("complete multipart upload: %w", err) + } + shaHex := hex.EncodeToString(shaHasher.Sum(nil)) + checksum := "" + if checksumAlg != lfs.ChecksumNone { + if checksumAlg == lfs.ChecksumSHA256 { + checksum = shaHex + } else if checksumHasher != nil { + checksum = hex.EncodeToString(checksumHasher.Sum(nil)) + } + } + return shaHex, checksum, string(checksumAlg), total, nil +} + +func (u *s3Uploader) StartMultipartUpload(ctx context.Context, key, contentType string) (string, error) { + if key == "" { + return "", errors.New("s3 key required") + } + input := &s3.CreateMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + } + if contentType != "" { + input.ContentType = aws.String(contentType) + } + resp, err := u.api.CreateMultipartUpload(ctx, input) + if err != nil { + return "", fmt.Errorf("create multipart upload: %w", err) + } + if resp.UploadId == nil || *resp.UploadId == "" { + return "", errors.New("missing upload id") + } + return *resp.UploadId, nil +} + +func (u *s3Uploader) UploadPart(ctx context.Context, key, uploadID string, partNumber int32, payload []byte) (string, error) { + if key == "" { + return "", errors.New("s3 key required") + } + if uploadID == "" { + return "", errors.New("upload id required") + } + resp, err := u.api.UploadPart(ctx, &s3.UploadPartInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: aws.String(uploadID), + PartNumber: aws.Int32(partNumber), + Body: bytes.NewReader(payload), + }) + if err != nil { + return "", fmt.Errorf("upload part %d: %w", partNumber, err) + } + if resp.ETag == nil || *resp.ETag == "" { + return "", errors.New("missing etag") + } + return *resp.ETag, nil +} + +func (u *s3Uploader) CompleteMultipartUpload(ctx context.Context, key, uploadID string, parts []types.CompletedPart) error { + if key == "" { + return errors.New("s3 key required") + } + if uploadID == "" { + return errors.New("upload id required") + } + _, err := u.api.CompleteMultipartUpload(ctx, &s3.CompleteMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: aws.String(uploadID), + MultipartUpload: &types.CompletedMultipartUpload{ + Parts: parts, + }, + }) + if err != nil { + return fmt.Errorf("complete multipart upload: %w", err) + } + return nil +} + +func (u *s3Uploader) AbortMultipartUpload(ctx context.Context, key, uploadID string) error { + if key == "" { + return errors.New("s3 key required") + } + if uploadID == "" { + return errors.New("upload id required") + } + _, err := u.api.AbortMultipartUpload(ctx, &s3.AbortMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: aws.String(uploadID), + }) + return err +} + +func (u *s3Uploader) multipartUpload(ctx context.Context, key string, payload []byte) error { + createResp, err := u.api.CreateMultipartUpload(ctx, &s3.CreateMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + }) + if err != nil { + return fmt.Errorf("create multipart upload: %w", err) + } + uploadID := createResp.UploadId + if uploadID == nil { + return errors.New("missing upload id") + } + + numParts := int64(len(payload))/u.chunkSize + 1 + parts := make([]types.CompletedPart, 0, numParts) + reader := bytes.NewReader(payload) + partNum := int32(1) + buf := make([]byte, u.chunkSize) + for { + n, readErr := io.ReadFull(reader, buf) + if readErr == io.EOF || readErr == io.ErrUnexpectedEOF { + if n == 0 { + break + } + } + if n > 0 { + partResp, err := u.api.UploadPart(ctx, &s3.UploadPartInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: uploadID, + PartNumber: aws.Int32(partNum), + Body: bytes.NewReader(buf[:n]), + }) + if err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return fmt.Errorf("upload part %d: %w", partNum, err) + } + parts = append(parts, types.CompletedPart{ETag: partResp.ETag, PartNumber: aws.Int32(partNum)}) + partNum++ + } + if readErr == io.EOF { + break + } + if readErr != nil && readErr != io.ErrUnexpectedEOF { + _ = u.abortUpload(ctx, key, *uploadID) + return fmt.Errorf("read payload: %w", readErr) + } + if readErr == io.ErrUnexpectedEOF { + break + } + } + + _, err = u.api.CompleteMultipartUpload(ctx, &s3.CompleteMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: uploadID, + MultipartUpload: &types.CompletedMultipartUpload{ + Parts: parts, + }, + }) + if err != nil { + _ = u.abortUpload(ctx, key, *uploadID) + return fmt.Errorf("complete multipart upload: %w", err) + } + return nil +} + +func (u *s3Uploader) abortUpload(ctx context.Context, key, uploadID string) error { + _, err := u.api.AbortMultipartUpload(ctx, &s3.AbortMultipartUploadInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + UploadId: aws.String(uploadID), + }) + return err +} + +func (u *s3Uploader) DeleteObject(ctx context.Context, key string) error { + if key == "" { + return errors.New("s3 key required") + } + _, err := u.api.DeleteObject(ctx, &s3.DeleteObjectInput{ + Bucket: aws.String(u.bucket), + Key: aws.String(key), + }) + return err +} diff --git a/cmd/proxy/lfs_sasl_encode.go b/cmd/proxy/lfs_sasl_encode.go new file mode 100644 index 00000000..4b68da77 --- /dev/null +++ b/cmd/proxy/lfs_sasl_encode.go @@ -0,0 +1,258 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "encoding/binary" + "errors" + "fmt" + "io" + + "github.com/KafScale/platform/pkg/protocol" +) + +// lfsbyteWriter is a minimal byte buffer used for SASL and produce encoding +// in the LFS module. +type lfsbyteWriter struct { + buf []byte +} + +func newLFSByteWriter(capacity int) *lfsbyteWriter { + return &lfsbyteWriter{buf: make([]byte, 0, capacity)} +} + +func (w *lfsbyteWriter) write(b []byte) { + w.buf = append(w.buf, b...) +} + +func (w *lfsbyteWriter) Int16(v int16) { + var tmp [2]byte + binary.BigEndian.PutUint16(tmp[:], uint16(v)) + w.write(tmp[:]) +} + +func (w *lfsbyteWriter) Int32(v int32) { + var tmp [4]byte + binary.BigEndian.PutUint32(tmp[:], uint32(v)) + w.write(tmp[:]) +} + +func (w *lfsbyteWriter) String(v string) { + w.Int16(int16(len(v))) + if len(v) > 0 { + w.write([]byte(v)) + } +} + +func (w *lfsbyteWriter) NullableString(v *string) { + if v == nil { + w.Int16(-1) + return + } + w.String(*v) +} + +func (w *lfsbyteWriter) CompactString(v string) { + w.compactLength(len(v)) + if len(v) > 0 { + w.write([]byte(v)) + } +} + +func (w *lfsbyteWriter) CompactNullableString(v *string) { + if v == nil { + w.compactLength(-1) + return + } + w.CompactString(*v) +} + +func (w *lfsbyteWriter) BytesWithLength(b []byte) { + w.Int32(int32(len(b))) + w.write(b) +} + +func (w *lfsbyteWriter) CompactBytes(b []byte) { + if b == nil { + w.compactLength(-1) + return + } + w.compactLength(len(b)) + w.write(b) +} + +func (w *lfsbyteWriter) UVarint(v uint64) { + var tmp [binary.MaxVarintLen64]byte + n := binary.PutUvarint(tmp[:], v) + w.write(tmp[:n]) +} + +func (w *lfsbyteWriter) CompactArrayLen(length int) { + if length < 0 { + w.UVarint(0) + return + } + w.UVarint(uint64(length) + 1) +} + +func (w *lfsbyteWriter) WriteTaggedFields(count int) { + if count == 0 { + w.UVarint(0) + return + } + w.UVarint(uint64(count)) +} + +func (w *lfsbyteWriter) compactLength(length int) { + if length < 0 { + w.UVarint(0) + return + } + w.UVarint(uint64(length) + 1) +} + +func (w *lfsbyteWriter) Bytes() []byte { + return w.buf +} + +const ( + lfsAPIKeySaslHandshake int16 = 17 + lfsAPIKeySaslAuthenticate int16 = 36 +) + +func lfsEncodeSaslHandshakeRequest(header *protocol.RequestHeader, mechanism string) ([]byte, error) { + if header == nil { + return nil, errors.New("nil header") + } + w := newLFSByteWriter(0) + w.Int16(header.APIKey) + w.Int16(header.APIVersion) + w.Int32(header.CorrelationID) + w.NullableString(header.ClientID) + w.String(mechanism) + return w.Bytes(), nil +} + +func lfsEncodeSaslAuthenticateRequest(header *protocol.RequestHeader, authBytes []byte) ([]byte, error) { + if header == nil { + return nil, errors.New("nil header") + } + w := newLFSByteWriter(0) + w.Int16(header.APIKey) + w.Int16(header.APIVersion) + w.Int32(header.CorrelationID) + w.NullableString(header.ClientID) + w.BytesWithLength(authBytes) + return w.Bytes(), nil +} + +func lfsBuildSaslPlainAuthBytes(username, password string) []byte { + buf := make([]byte, 0, len(username)+len(password)+2) + buf = append(buf, 0) + buf = append(buf, []byte(username)...) + buf = append(buf, 0) + buf = append(buf, []byte(password)...) + return buf +} + +func lfsReadSaslResponse(r io.Reader) error { + frame, err := protocol.ReadFrame(r) + if err != nil { + return err + } + if len(frame.Payload) < 6 { + return fmt.Errorf("invalid SASL response length %d", len(frame.Payload)) + } + errorCode := int16(binary.BigEndian.Uint16(frame.Payload[4:6])) + if errorCode != 0 { + return fmt.Errorf("sasl error code %d", errorCode) + } + return nil +} + +// lfsEncodeProduceRequest encodes a ProduceRequest for the HTTP API produce path. +func lfsEncodeProduceRequest(header *protocol.RequestHeader, req *protocol.ProduceRequest) ([]byte, error) { + if header == nil || req == nil { + return nil, errors.New("nil header or request") + } + flexible := lfsIsFlexibleRequest(header.APIKey, header.APIVersion) + w := newLFSByteWriter(0) + w.Int16(header.APIKey) + w.Int16(header.APIVersion) + w.Int32(header.CorrelationID) + w.NullableString(header.ClientID) + if flexible { + w.WriteTaggedFields(0) + } + + if header.APIVersion >= 3 { + if flexible { + w.CompactNullableString(req.TransactionID) + } else { + w.NullableString(req.TransactionID) + } + } + w.Int16(req.Acks) + w.Int32(req.TimeoutMillis) + if flexible { + w.CompactArrayLen(len(req.Topics)) + } else { + w.Int32(int32(len(req.Topics))) + } + for _, topic := range req.Topics { + if flexible { + w.CompactString(topic.Topic) + w.CompactArrayLen(len(topic.Partitions)) + } else { + w.String(topic.Topic) + w.Int32(int32(len(topic.Partitions))) + } + for _, partition := range topic.Partitions { + w.Int32(partition.Partition) + if flexible { + w.CompactBytes(partition.Records) + w.WriteTaggedFields(0) + } else { + w.BytesWithLength(partition.Records) + } + } + if flexible { + w.WriteTaggedFields(0) + } + } + if flexible { + w.WriteTaggedFields(0) + } + + return w.Bytes(), nil +} + +func lfsIsFlexibleRequest(apiKey, version int16) bool { + switch apiKey { + case protocol.APIKeyApiVersion: + return version >= 3 + case protocol.APIKeyProduce: + return version >= 9 + case protocol.APIKeyMetadata: + return version >= 9 + case protocol.APIKeyFetch: + return version >= 12 + case protocol.APIKeyFindCoordinator: + return version >= 3 + default: + return false + } +} diff --git a/cmd/proxy/lfs_swagger.go b/cmd/proxy/lfs_swagger.go new file mode 100644 index 00000000..834d9774 --- /dev/null +++ b/cmd/proxy/lfs_swagger.go @@ -0,0 +1,71 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + _ "embed" + "net/http" +) + +//go:embed openapi.yaml +var lfsOpenAPISpec []byte + +const lfsSwaggerUIHTML = ` + + + + KafScale LFS Proxy - API Documentation + + + + +
+ + + + + +` + +func (m *lfsModule) lfsHandleSwaggerUI(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(lfsSwaggerUIHTML)) +} + +func (m *lfsModule) lfsHandleOpenAPISpec(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/yaml") + w.Header().Set("Access-Control-Allow-Origin", "*") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(lfsOpenAPISpec) +} diff --git a/cmd/proxy/lfs_test.go b/cmd/proxy/lfs_test.go new file mode 100644 index 00000000..3a21deb6 --- /dev/null +++ b/cmd/proxy/lfs_test.go @@ -0,0 +1,426 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "hash/crc32" + "io" + "log/slog" + "testing" + + "github.com/KafScale/platform/pkg/lfs" + "github.com/KafScale/platform/pkg/protocol" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/aws/signer/v4" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/twmb/franz-go/pkg/kmsg" +) + +// fakeS3 is a minimal in-memory S3 backend for testing. +type fakeS3 struct { + objects map[string][]byte + deleted []string +} + +func newFakeS3() *fakeS3 { + return &fakeS3{objects: make(map[string][]byte)} +} + +func (f *fakeS3) CreateMultipartUpload(ctx context.Context, params *s3.CreateMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CreateMultipartUploadOutput, error) { + return &s3.CreateMultipartUploadOutput{UploadId: aws.String("test-upload-id")}, nil +} +func (f *fakeS3) UploadPart(ctx context.Context, params *s3.UploadPartInput, optFns ...func(*s3.Options)) (*s3.UploadPartOutput, error) { + return &s3.UploadPartOutput{ETag: aws.String("test-etag")}, nil +} +func (f *fakeS3) CompleteMultipartUpload(ctx context.Context, params *s3.CompleteMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CompleteMultipartUploadOutput, error) { + return &s3.CompleteMultipartUploadOutput{}, nil +} +func (f *fakeS3) AbortMultipartUpload(ctx context.Context, params *s3.AbortMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.AbortMultipartUploadOutput, error) { + return &s3.AbortMultipartUploadOutput{}, nil +} +func (f *fakeS3) PutObject(ctx context.Context, params *s3.PutObjectInput, optFns ...func(*s3.Options)) (*s3.PutObjectOutput, error) { + if params.Body != nil { + data, _ := io.ReadAll(params.Body) + f.objects[*params.Key] = data + } + return &s3.PutObjectOutput{}, nil +} +func (f *fakeS3) GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) { + return &s3.GetObjectOutput{}, nil +} +func (f *fakeS3) DeleteObject(ctx context.Context, params *s3.DeleteObjectInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectOutput, error) { + f.deleted = append(f.deleted, *params.Key) + delete(f.objects, *params.Key) + return &s3.DeleteObjectOutput{}, nil +} +func (f *fakeS3) HeadBucket(ctx context.Context, params *s3.HeadBucketInput, optFns ...func(*s3.Options)) (*s3.HeadBucketOutput, error) { + return &s3.HeadBucketOutput{}, nil +} +func (f *fakeS3) CreateBucket(ctx context.Context, params *s3.CreateBucketInput, optFns ...func(*s3.Options)) (*s3.CreateBucketOutput, error) { + return &s3.CreateBucketOutput{}, nil +} + +type fakePresign struct{} + +func (f *fakePresign) PresignGetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.PresignOptions)) (*v4.PresignedHTTPRequest, error) { + return &v4.PresignedHTTPRequest{URL: "https://test.s3.amazonaws.com/" + *params.Key}, nil +} + +func testLFSModule(t *testing.T) (*lfsModule, *fakeS3) { + t.Helper() + fs3 := newFakeS3() + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + m := &lfsModule{ + logger: logger, + s3Uploader: &s3Uploader{bucket: "test-bucket", region: "us-east-1", chunkSize: 5 << 20, api: fs3, presign: &fakePresign{}}, + s3Bucket: "test-bucket", + s3Namespace: "test-ns", + maxBlob: 5 << 30, + checksumAlg: "sha256", + proxyID: "test-proxy", + metrics: newLfsMetrics(), + tracker: &LfsOpsTracker{config: TrackerConfig{}, logger: logger}, + } + return m, fs3 +} + +func buildTestBatch(records []kmsg.Record) []byte { + return lfsBuildRecordBatch(records) +} + +func TestRewriteProduceRecordsDetectsLFSBlob(t *testing.T) { + m, _ := testLFSModule(t) + blobPayload := []byte("hello world LFS blob data for testing") + shaHasher := sha256.New() + shaHasher.Write(blobPayload) + expectedSHA := hex.EncodeToString(shaHasher.Sum(nil)) + + records := []kmsg.Record{{ + Key: []byte("mykey"), + Value: blobPayload, + Headers: []kmsg.Header{ + {Key: "LFS_BLOB", Value: []byte(expectedSHA)}, + }, + }} + batchBytes := buildTestBatch(records) + req := &kmsg.ProduceRequest{ + Acks: 1, + TimeoutMillis: 5000, + Topics: []kmsg.ProduceRequestTopic{{ + Topic: "test-topic", + Partitions: []kmsg.ProduceRequestTopicPartition{{ + Partition: 0, + Records: batchBytes, + }}, + }}, + } + header := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: 1} + result, err := m.rewriteProduceRecords(context.Background(), header, req) + if err != nil { + t.Fatalf("rewriteProduceRecords: %v", err) + } + if !result.modified { + t.Fatal("expected modified=true") + } + if result.uploadBytes != int64(len(blobPayload)) { + t.Fatalf("expected uploadBytes=%d, got %d", len(blobPayload), result.uploadBytes) + } + if _, ok := result.topics["test-topic"]; !ok { + t.Fatal("expected test-topic in topics") + } + + // Verify the records were rewritten in-place + batches, err := lfsDecodeRecordBatches(req.Topics[0].Partitions[0].Records) + if err != nil { + t.Fatalf("decode batches: %v", err) + } + if len(batches) != 1 { + t.Fatalf("expected 1 batch, got %d", len(batches)) + } + + decompressor := func() []kmsg.Record { + recs, _, err := lfsDecodeBatchRecords(&batches[0], nil) + if err != nil { + t.Fatalf("decode records: %v", err) + } + return recs + } + // Use the kgo decompressor for uncompressed + recs, _, err := lfsDecodeBatchRecords(&batches[0], nil) + if err != nil { + _ = decompressor // suppress lint + t.Fatalf("decode records: %v", err) + } + if len(recs) != 1 { + t.Fatalf("expected 1 record, got %d", len(recs)) + } + // Value should now be a JSON LFS envelope, not the raw blob + env, err := lfs.DecodeEnvelope(recs[0].Value) + if err != nil { + t.Fatalf("decode envelope: %v", err) + } + if env.Bucket != "test-bucket" { + t.Fatalf("expected bucket=test-bucket, got %s", env.Bucket) + } + if env.Size != int64(len(blobPayload)) { + t.Fatalf("expected size=%d, got %d", len(blobPayload), env.Size) + } + if env.SHA256 != expectedSHA { + t.Fatalf("expected sha256=%s, got %s", expectedSHA, env.SHA256) + } + // LFS_BLOB header should be removed + for _, h := range recs[0].Headers { + if h.Key == "LFS_BLOB" { + t.Fatal("LFS_BLOB header should be removed") + } + } +} + +func TestRewriteProduceRecordsPassthroughWithoutLFSBlob(t *testing.T) { + m, _ := testLFSModule(t) + records := []kmsg.Record{{ + Key: []byte("mykey"), + Value: []byte("regular record value"), + }} + batchBytes := buildTestBatch(records) + req := &kmsg.ProduceRequest{ + Acks: 1, + TimeoutMillis: 5000, + Topics: []kmsg.ProduceRequestTopic{{ + Topic: "test-topic", + Partitions: []kmsg.ProduceRequestTopicPartition{{ + Partition: 0, + Records: batchBytes, + }}, + }}, + } + header := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: 1} + result, err := m.rewriteProduceRecords(context.Background(), header, req) + if err != nil { + t.Fatalf("rewriteProduceRecords: %v", err) + } + if result.modified { + t.Fatal("expected modified=false for records without LFS_BLOB header") + } +} + +func TestNilLFSModuleZeroOverhead(t *testing.T) { + p := &proxy{ + lfs: nil, + } + // Accessing p.lfs should be nil — the check in handleProduceRouting + // is simply `if p.lfs != nil`, which is a zero-cost nil pointer check. + if p.lfs != nil { + t.Fatal("expected nil lfs module") + } +} + +func TestRewriteProduceRecordsChecksumMismatch(t *testing.T) { + m, fs3 := testLFSModule(t) + blobPayload := []byte("checksum mismatch test data") + + records := []kmsg.Record{{ + Key: []byte("mykey"), + Value: blobPayload, + Headers: []kmsg.Header{ + {Key: "LFS_BLOB", Value: []byte("wrong-checksum-value")}, + }, + }} + batchBytes := buildTestBatch(records) + req := &kmsg.ProduceRequest{ + Acks: 1, + TimeoutMillis: 5000, + Topics: []kmsg.ProduceRequestTopic{{ + Topic: "test-topic", + Partitions: []kmsg.ProduceRequestTopicPartition{{ + Partition: 0, + Records: batchBytes, + }}, + }}, + } + header := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: 1} + _, err := m.rewriteProduceRecords(context.Background(), header, req) + if err == nil { + t.Fatal("expected error on checksum mismatch") + } + var csErr *lfs.ChecksumError + if !errors.As(err, &csErr) { + t.Fatalf("expected *lfs.ChecksumError, got: %T: %v", err, err) + } + if csErr.Expected != "wrong-checksum-value" { + t.Fatalf("expected Expected=%q, got %q", "wrong-checksum-value", csErr.Expected) + } + // The S3 object should have been deleted + if len(fs3.deleted) == 0 { + t.Fatal("expected S3 object to be deleted on checksum mismatch") + } +} + +func TestRewriteProduceRecordsMixedRecords(t *testing.T) { + m, _ := testLFSModule(t) + blobPayload := []byte("lfs blob payload") + shaHasher := sha256.New() + shaHasher.Write(blobPayload) + expectedSHA := hex.EncodeToString(shaHasher.Sum(nil)) + + // Build batch with one LFS record and one regular record + records := []kmsg.Record{ + { + Key: []byte("lfs-key"), + Value: blobPayload, + Headers: []kmsg.Header{ + {Key: "LFS_BLOB", Value: []byte(expectedSHA)}, + }, + }, + { + Key: []byte("regular-key"), + Value: []byte("regular value that should stay unchanged"), + }, + } + batchBytes := buildTestBatch(records) + req := &kmsg.ProduceRequest{ + Acks: 1, + TimeoutMillis: 5000, + Topics: []kmsg.ProduceRequestTopic{{ + Topic: "mixed-topic", + Partitions: []kmsg.ProduceRequestTopicPartition{{ + Partition: 0, + Records: batchBytes, + }}, + }}, + } + header := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: 1} + result, err := m.rewriteProduceRecords(context.Background(), header, req) + if err != nil { + t.Fatalf("rewriteProduceRecords: %v", err) + } + if !result.modified { + t.Fatal("expected modified=true") + } + + // Decode and verify: first record should be envelope, second should be unchanged + batches, err := lfsDecodeRecordBatches(req.Topics[0].Partitions[0].Records) + if err != nil { + t.Fatalf("decode batches: %v", err) + } + recs, _, err := lfsDecodeBatchRecords(&batches[0], nil) + if err != nil { + t.Fatalf("decode records: %v", err) + } + if len(recs) != 2 { + t.Fatalf("expected 2 records, got %d", len(recs)) + } + // First record: LFS envelope + env, err := lfs.DecodeEnvelope(recs[0].Value) + if err != nil { + t.Fatalf("first record should be LFS envelope: %v", err) + } + if env.Size != int64(len(blobPayload)) { + t.Fatalf("expected size=%d, got %d", len(blobPayload), env.Size) + } + // Second record: unchanged + if string(recs[1].Value) != "regular value that should stay unchanged" { + t.Fatalf("second record value changed: %s", string(recs[1].Value)) + } +} + +func TestBatchCRCIsValid(t *testing.T) { + m, _ := testLFSModule(t) + blobPayload := []byte("crc test data") + shaHasher := sha256.New() + shaHasher.Write(blobPayload) + expectedSHA := hex.EncodeToString(shaHasher.Sum(nil)) + + records := []kmsg.Record{{ + Key: []byte("k"), + Value: blobPayload, + Headers: []kmsg.Header{ + {Key: "LFS_BLOB", Value: []byte(expectedSHA)}, + }, + }} + batchBytes := buildTestBatch(records) + req := &kmsg.ProduceRequest{ + Acks: 1, + TimeoutMillis: 5000, + Topics: []kmsg.ProduceRequestTopic{{ + Topic: "crc-topic", + Partitions: []kmsg.ProduceRequestTopicPartition{{ + Partition: 0, + Records: batchBytes, + }}, + }}, + } + header := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: 1} + _, err := m.rewriteProduceRecords(context.Background(), header, req) + if err != nil { + t.Fatalf("rewriteProduceRecords: %v", err) + } + + // Verify batch CRC is valid + batches, err := lfsDecodeRecordBatches(req.Topics[0].Partitions[0].Records) + if err != nil { + t.Fatalf("decode batches: %v", err) + } + batch := &batches[0] + // Recompute CRC and compare + raw := batch.AppendTo(nil) + expectedCRC := int32(crc32.Checksum(raw[21:], lfsCRC32cTable)) + if batch.CRC != expectedCRC { + t.Fatalf("CRC mismatch: batch.CRC=%d, computed=%d", batch.CRC, expectedCRC) + } +} + +func TestLFSTopicsFromProduce(t *testing.T) { + req := &kmsg.ProduceRequest{ + Topics: []kmsg.ProduceRequestTopic{ + {Topic: "topic-a"}, + {Topic: "topic-b"}, + {Topic: "topic-a"}, // duplicate + }, + } + topics := lfsTopicsFromProduce(req) + if len(topics) != 2 { + t.Fatalf("expected 2 topics, got %d", len(topics)) + } + if topics[0] != "topic-a" || topics[1] != "topic-b" { + t.Fatalf("unexpected topics: %v", topics) + } +} + +func TestLFSTopicsFromProduceNil(t *testing.T) { + topics := lfsTopicsFromProduce(nil) + if topics != nil { + t.Fatalf("expected nil, got %v", topics) + } +} + +func TestLFSTopicsFromProduceEmpty(t *testing.T) { + req := &kmsg.ProduceRequest{} + topics := lfsTopicsFromProduce(req) + if len(topics) != 1 || topics[0] != "unknown" { + t.Fatalf("expected [unknown], got %v", topics) + } +} + +// Ensure unused variable warnings don't break: +var _ s3API = (*fakeS3)(nil) +var _ s3PresignAPI = (*fakePresign)(nil) +var _ types.CompletedPart diff --git a/cmd/proxy/lfs_tracker.go b/cmd/proxy/lfs_tracker.go new file mode 100644 index 00000000..dd7c4e4d --- /dev/null +++ b/cmd/proxy/lfs_tracker.go @@ -0,0 +1,372 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "errors" + "log/slog" + "math" + "sync" + "sync/atomic" + "time" + + "github.com/twmb/franz-go/pkg/kadm" + "github.com/twmb/franz-go/pkg/kerr" + "github.com/twmb/franz-go/pkg/kgo" +) + +const ( + defaultTrackerTopic = "__lfs_ops_state" + defaultTrackerBatchSize = 100 + defaultTrackerFlushMs = 100 + defaultTrackerChanSize = 10000 + defaultTrackerPartitions = 3 + defaultTrackerReplication = 1 +) + +// TrackerConfig holds configuration for the LFS operations tracker. +type TrackerConfig struct { + Enabled bool + Topic string + Brokers []string + BatchSize int + FlushMs int + ProxyID string + EnsureTopic bool + Partitions int + ReplicationFactor int +} + +// LfsOpsTracker tracks LFS operations by emitting events to a Kafka topic. +type LfsOpsTracker struct { + config TrackerConfig + client *kgo.Client + logger *slog.Logger + eventCh chan TrackerEvent + wg sync.WaitGroup + ctx context.Context + cancel context.CancelFunc + + // Circuit breaker state + circuitOpen uint32 + failures uint32 + lastSuccess int64 + failureThreshold uint32 + resetTimeout time.Duration + + // Metrics + eventsEmitted uint64 + eventsDropped uint64 + batchesSent uint64 +} + +// NewLfsOpsTracker creates a new tracker instance. +func NewLfsOpsTracker(ctx context.Context, cfg TrackerConfig, logger *slog.Logger) (*LfsOpsTracker, error) { + if !cfg.Enabled { + logger.Info("lfs ops tracker disabled") + return &LfsOpsTracker{config: cfg, logger: logger}, nil + } + + if cfg.Topic == "" { + cfg.Topic = defaultTrackerTopic + } + if cfg.BatchSize <= 0 { + cfg.BatchSize = defaultTrackerBatchSize + } + if cfg.FlushMs <= 0 { + cfg.FlushMs = defaultTrackerFlushMs + } + if cfg.Partitions <= 0 { + cfg.Partitions = defaultTrackerPartitions + } + if cfg.ReplicationFactor <= 0 { + cfg.ReplicationFactor = defaultTrackerReplication + } + if len(cfg.Brokers) == 0 { + logger.Warn("lfs ops tracker: no brokers configured, tracker disabled") + return &LfsOpsTracker{config: cfg, logger: logger}, nil + } + + opts := []kgo.Opt{ + kgo.SeedBrokers(cfg.Brokers...), + kgo.DefaultProduceTopic(cfg.Topic), + kgo.ProducerBatchMaxBytes(1024 * 1024), // 1MB max batch + kgo.ProducerLinger(time.Duration(cfg.FlushMs) * time.Millisecond), + kgo.RequiredAcks(kgo.LeaderAck()), + kgo.DisableIdempotentWrite(), // Not required for tracking events + } + + client, err := kgo.NewClient(opts...) + if err != nil { + return nil, err + } + + if cfg.EnsureTopic { + if err := ensureTrackerTopic(ctx, client, cfg, logger); err != nil { + logger.Warn("lfs ops tracker: ensure topic failed", "topic", cfg.Topic, "error", err) + } + } + + trackerCtx, cancel := context.WithCancel(ctx) + t := &LfsOpsTracker{ + config: cfg, + client: client, + logger: logger, + eventCh: make(chan TrackerEvent, defaultTrackerChanSize), + ctx: trackerCtx, + cancel: cancel, + failureThreshold: 5, + resetTimeout: 30 * time.Second, + } + + t.wg.Add(1) + go t.runBatcher() + + logger.Info("lfs ops tracker started", "topic", cfg.Topic, "brokers", cfg.Brokers) + return t, nil +} + +// Emit sends a tracker event to the channel for async processing. +func (t *LfsOpsTracker) Emit(event TrackerEvent) { + if t == nil || !t.config.Enabled || t.client == nil { + return + } + + // Check circuit breaker + if atomic.LoadUint32(&t.circuitOpen) == 1 { + // Check if we should try to reset + if time.Now().UnixNano()-atomic.LoadInt64(&t.lastSuccess) > t.resetTimeout.Nanoseconds() { + atomic.StoreUint32(&t.circuitOpen, 0) + atomic.StoreUint32(&t.failures, 0) + t.logger.Info("lfs ops tracker: circuit breaker reset") + } else { + atomic.AddUint64(&t.eventsDropped, 1) + return + } + } + + select { + case t.eventCh <- event: + atomic.AddUint64(&t.eventsEmitted, 1) + default: + // Channel full, drop the event + atomic.AddUint64(&t.eventsDropped, 1) + t.logger.Debug("lfs ops tracker: event dropped, channel full") + } +} + +// runBatcher processes events from the channel and sends them in batches. +func (t *LfsOpsTracker) runBatcher() { + defer t.wg.Done() + + batch := make([]*kgo.Record, 0, t.config.BatchSize) + flushInterval := time.Duration(t.config.FlushMs) * time.Millisecond + ticker := time.NewTicker(flushInterval) + defer ticker.Stop() + + flush := func() { + if len(batch) == 0 { + return + } + + // Produce batch + results := t.client.ProduceSync(t.ctx, batch...) + hasError := false + for _, result := range results { + if result.Err != nil { + hasError = true + t.logger.Warn("lfs ops tracker: produce failed", "error", result.Err) + } + } + + if hasError { + failures := atomic.AddUint32(&t.failures, 1) + if failures >= t.failureThreshold { + atomic.StoreUint32(&t.circuitOpen, 1) + t.logger.Warn("lfs ops tracker: circuit breaker opened", "failures", failures) + } + } else { + atomic.StoreUint32(&t.failures, 0) + atomic.StoreInt64(&t.lastSuccess, time.Now().UnixNano()) + atomic.AddUint64(&t.batchesSent, 1) + } + + batch = batch[:0] + } + + for { + select { + case <-t.ctx.Done(): + flush() + return + + case event := <-t.eventCh: + record, err := t.eventToRecord(event) + if err != nil { + t.logger.Warn("lfs ops tracker: failed to serialize event", "error", err, "type", event.GetEventType()) + continue + } + batch = append(batch, record) + if len(batch) >= t.config.BatchSize { + flush() + } + + case <-ticker.C: + flush() + } + } +} + +// eventToRecord converts a TrackerEvent to a Kafka record. +func (t *LfsOpsTracker) eventToRecord(event TrackerEvent) (*kgo.Record, error) { + value, err := event.Marshal() + if err != nil { + return nil, err + } + + return &kgo.Record{ + Key: []byte(event.GetTopic()), + Value: value, + }, nil +} + +func ensureTrackerTopic(ctx context.Context, client *kgo.Client, cfg TrackerConfig, logger *slog.Logger) error { + admin := kadm.NewClient(client) + var partitions int32 = defaultTrackerPartitions + if cfg.Partitions > 0 && cfg.Partitions <= math.MaxInt32 { + partitions = int32(cfg.Partitions) //nolint:gosec // bounds checked + } + var replication int16 = defaultTrackerReplication + if cfg.ReplicationFactor > 0 && cfg.ReplicationFactor <= math.MaxInt16 { + replication = int16(cfg.ReplicationFactor) //nolint:gosec // bounds checked + } + responses, err := admin.CreateTopics(ctx, partitions, replication, nil, cfg.Topic) + if err != nil { + return err + } + resp, ok := responses[cfg.Topic] + if !ok { + return errors.New("tracker topic response missing") + } + if resp.Err == nil || errors.Is(resp.Err, kerr.TopicAlreadyExists) { + logger.Info("lfs ops tracker topic ready", "topic", cfg.Topic, "partitions", cfg.Partitions, "replication", cfg.ReplicationFactor) + return nil + } + return resp.Err +} + +// Close gracefully shuts down the tracker. +func (t *LfsOpsTracker) Close() error { + if t == nil || t.client == nil { + return nil + } + + t.cancel() + t.wg.Wait() + t.client.Close() + + t.logger.Info("lfs ops tracker closed", + "events_emitted", atomic.LoadUint64(&t.eventsEmitted), + "events_dropped", atomic.LoadUint64(&t.eventsDropped), + "batches_sent", atomic.LoadUint64(&t.batchesSent), + ) + return nil +} + +// Stats returns tracker statistics. +func (t *LfsOpsTracker) Stats() TrackerStats { + if t == nil { + return TrackerStats{} + } + return TrackerStats{ + Enabled: t.config.Enabled, + Topic: t.config.Topic, + EventsEmitted: atomic.LoadUint64(&t.eventsEmitted), + EventsDropped: atomic.LoadUint64(&t.eventsDropped), + BatchesSent: atomic.LoadUint64(&t.batchesSent), + CircuitOpen: atomic.LoadUint32(&t.circuitOpen) == 1, + } +} + +// TrackerStats holds statistics about the tracker. +type TrackerStats struct { + Enabled bool `json:"enabled"` + Topic string `json:"topic"` + EventsEmitted uint64 `json:"events_emitted"` + EventsDropped uint64 `json:"events_dropped"` + BatchesSent uint64 `json:"batches_sent"` + CircuitOpen bool `json:"circuit_open"` +} + +// IsEnabled returns true if the tracker is enabled and ready. +func (t *LfsOpsTracker) IsEnabled() bool { + return t != nil && t.config.Enabled && t.client != nil +} + +// EmitUploadStarted emits an upload started event. +func (t *LfsOpsTracker) EmitUploadStarted(requestID, topic string, partition int32, s3Key, contentType, clientIP, apiType string, expectedSize int64) { + if !t.IsEnabled() { + return + } + event := NewUploadStartedEvent(t.config.ProxyID, requestID, topic, partition, s3Key, contentType, clientIP, apiType, expectedSize) + t.Emit(event) +} + +// EmitUploadCompleted emits an upload completed event. +func (t *LfsOpsTracker) EmitUploadCompleted(requestID, topic string, partition int32, kafkaOffset int64, s3Bucket, s3Key string, size int64, sha256, checksum, checksumAlg, contentType string, duration time.Duration) { + if !t.IsEnabled() { + return + } + event := NewUploadCompletedEvent(t.config.ProxyID, requestID, topic, partition, kafkaOffset, s3Bucket, s3Key, size, sha256, checksum, checksumAlg, contentType, duration.Milliseconds()) + t.Emit(event) +} + +// EmitUploadFailed emits an upload failed event. +func (t *LfsOpsTracker) EmitUploadFailed(requestID, topic, s3Key, errorCode, errorMessage, stage string, sizeUploaded int64, duration time.Duration) { + if !t.IsEnabled() { + return + } + event := NewUploadFailedEvent(t.config.ProxyID, requestID, topic, s3Key, errorCode, errorMessage, stage, sizeUploaded, duration.Milliseconds()) + t.Emit(event) +} + +// EmitDownloadRequested emits a download requested event. +func (t *LfsOpsTracker) EmitDownloadRequested(requestID, s3Bucket, s3Key, mode, clientIP string, ttlSeconds int) { + if !t.IsEnabled() { + return + } + event := NewDownloadRequestedEvent(t.config.ProxyID, requestID, s3Bucket, s3Key, mode, clientIP, ttlSeconds) + t.Emit(event) +} + +// EmitDownloadCompleted emits a download completed event. +func (t *LfsOpsTracker) EmitDownloadCompleted(requestID, s3Key, mode string, duration time.Duration, size int64) { + if !t.IsEnabled() { + return + } + event := NewDownloadCompletedEvent(t.config.ProxyID, requestID, s3Key, mode, duration.Milliseconds(), size) + t.Emit(event) +} + +// EmitOrphanDetected emits an orphan detected event. +func (t *LfsOpsTracker) EmitOrphanDetected(requestID, detectionSource, topic, s3Bucket, s3Key, originalRequestID, reason string, size int64) { + if !t.IsEnabled() { + return + } + event := NewOrphanDetectedEvent(t.config.ProxyID, requestID, detectionSource, topic, s3Bucket, s3Key, originalRequestID, reason, size) + t.Emit(event) +} diff --git a/cmd/proxy/lfs_tracker_types.go b/cmd/proxy/lfs_tracker_types.go new file mode 100644 index 00000000..455a5835 --- /dev/null +++ b/cmd/proxy/lfs_tracker_types.go @@ -0,0 +1,238 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "encoding/json" + "time" +) + +// Event types for LFS operations tracking. +const ( + EventTypeUploadStarted = "upload_started" + EventTypeUploadCompleted = "upload_completed" + EventTypeUploadFailed = "upload_failed" + EventTypeDownloadRequested = "download_requested" + EventTypeDownloadCompleted = "download_completed" + EventTypeOrphanDetected = "orphan_detected" +) + +// TrackerEventVersion is the current schema version for tracker events. +const TrackerEventVersion = 1 + +// BaseEvent contains common fields for all tracker events. +type BaseEvent struct { + EventType string `json:"event_type"` + EventID string `json:"event_id"` + Timestamp string `json:"timestamp"` + ProxyID string `json:"proxy_id"` + RequestID string `json:"request_id"` + Version int `json:"version"` +} + +// UploadStartedEvent is emitted when an upload operation begins. +type UploadStartedEvent struct { + BaseEvent + Topic string `json:"topic"` + Partition int32 `json:"partition"` + S3Key string `json:"s3_key"` + ContentType string `json:"content_type,omitempty"` + ExpectedSize int64 `json:"expected_size,omitempty"` + ClientIP string `json:"client_ip,omitempty"` + APIType string `json:"api_type"` // "http" or "kafka" +} + +// UploadCompletedEvent is emitted when an upload operation succeeds. +type UploadCompletedEvent struct { + BaseEvent + Topic string `json:"topic"` + Partition int32 `json:"partition"` + KafkaOffset int64 `json:"kafka_offset,omitempty"` + S3Bucket string `json:"s3_bucket"` + S3Key string `json:"s3_key"` + Size int64 `json:"size"` + SHA256 string `json:"sha256"` + Checksum string `json:"checksum,omitempty"` + ChecksumAlg string `json:"checksum_alg,omitempty"` + DurationMs int64 `json:"duration_ms"` + ContentType string `json:"content_type,omitempty"` +} + +// UploadFailedEvent is emitted when an upload operation fails. +type UploadFailedEvent struct { + BaseEvent + Topic string `json:"topic"` + S3Key string `json:"s3_key,omitempty"` + ErrorCode string `json:"error_code"` + ErrorMessage string `json:"error_message"` + Stage string `json:"stage"` // "validation", "s3_upload", "kafka_produce" + SizeUploaded int64 `json:"size_uploaded,omitempty"` + DurationMs int64 `json:"duration_ms"` +} + +// DownloadRequestedEvent is emitted when a download operation is requested. +type DownloadRequestedEvent struct { + BaseEvent + S3Bucket string `json:"s3_bucket"` + S3Key string `json:"s3_key"` + Mode string `json:"mode"` // "presign" or "stream" + ClientIP string `json:"client_ip,omitempty"` + TTLSeconds int `json:"ttl_seconds,omitempty"` +} + +// DownloadCompletedEvent is emitted when a download operation completes. +type DownloadCompletedEvent struct { + BaseEvent + S3Key string `json:"s3_key"` + Mode string `json:"mode"` + DurationMs int64 `json:"duration_ms"` + Size int64 `json:"size,omitempty"` +} + +// OrphanDetectedEvent is emitted when an orphaned S3 object is detected. +type OrphanDetectedEvent struct { + BaseEvent + DetectionSource string `json:"detection_source"` // "upload_failure", "reconciliation" + Topic string `json:"topic"` + S3Bucket string `json:"s3_bucket"` + S3Key string `json:"s3_key"` + Size int64 `json:"size,omitempty"` + OriginalRequestID string `json:"original_request_id,omitempty"` + Reason string `json:"reason"` // "kafka_produce_failed", "checksum_mismatch", etc. +} + +// TrackerEvent is a union type that can hold any tracker event. +type TrackerEvent interface { + GetEventType() string + GetTopic() string + Marshal() ([]byte, error) +} + +// GetEventType returns the event type. +func (e *BaseEvent) GetEventType() string { + return e.EventType +} + +// GetTopic returns the topic for partitioning. +func (e *UploadStartedEvent) GetTopic() string { return e.Topic } +func (e *UploadCompletedEvent) GetTopic() string { return e.Topic } +func (e *UploadFailedEvent) GetTopic() string { return e.Topic } +func (e *DownloadRequestedEvent) GetTopic() string { return "" } +func (e *DownloadCompletedEvent) GetTopic() string { return "" } +func (e *OrphanDetectedEvent) GetTopic() string { return e.Topic } + +// Marshal serializes the event to JSON. +func (e *UploadStartedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } +func (e *UploadCompletedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } +func (e *UploadFailedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } +func (e *DownloadRequestedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } +func (e *DownloadCompletedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } +func (e *OrphanDetectedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } + +// newBaseEvent creates a new base event with common fields. +func newBaseEvent(eventType, proxyID, requestID string) BaseEvent { + return BaseEvent{ + EventType: eventType, + EventID: newUUID(), + Timestamp: time.Now().UTC().Format(time.RFC3339Nano), + ProxyID: proxyID, + RequestID: requestID, + Version: TrackerEventVersion, + } +} + +// NewUploadStartedEvent creates a new upload started event. +func NewUploadStartedEvent(proxyID, requestID, topic string, partition int32, s3Key, contentType, clientIP, apiType string, expectedSize int64) *UploadStartedEvent { + return &UploadStartedEvent{ + BaseEvent: newBaseEvent(EventTypeUploadStarted, proxyID, requestID), + Topic: topic, + Partition: partition, + S3Key: s3Key, + ContentType: contentType, + ExpectedSize: expectedSize, + ClientIP: clientIP, + APIType: apiType, + } +} + +// NewUploadCompletedEvent creates a new upload completed event. +func NewUploadCompletedEvent(proxyID, requestID, topic string, partition int32, kafkaOffset int64, s3Bucket, s3Key string, size int64, sha256, checksum, checksumAlg, contentType string, durationMs int64) *UploadCompletedEvent { + return &UploadCompletedEvent{ + BaseEvent: newBaseEvent(EventTypeUploadCompleted, proxyID, requestID), + Topic: topic, + Partition: partition, + KafkaOffset: kafkaOffset, + S3Bucket: s3Bucket, + S3Key: s3Key, + Size: size, + SHA256: sha256, + Checksum: checksum, + ChecksumAlg: checksumAlg, + DurationMs: durationMs, + ContentType: contentType, + } +} + +// NewUploadFailedEvent creates a new upload failed event. +func NewUploadFailedEvent(proxyID, requestID, topic, s3Key, errorCode, errorMessage, stage string, sizeUploaded, durationMs int64) *UploadFailedEvent { + return &UploadFailedEvent{ + BaseEvent: newBaseEvent(EventTypeUploadFailed, proxyID, requestID), + Topic: topic, + S3Key: s3Key, + ErrorCode: errorCode, + ErrorMessage: errorMessage, + Stage: stage, + SizeUploaded: sizeUploaded, + DurationMs: durationMs, + } +} + +// NewDownloadRequestedEvent creates a new download requested event. +func NewDownloadRequestedEvent(proxyID, requestID, s3Bucket, s3Key, mode, clientIP string, ttlSeconds int) *DownloadRequestedEvent { + return &DownloadRequestedEvent{ + BaseEvent: newBaseEvent(EventTypeDownloadRequested, proxyID, requestID), + S3Bucket: s3Bucket, + S3Key: s3Key, + Mode: mode, + ClientIP: clientIP, + TTLSeconds: ttlSeconds, + } +} + +// NewDownloadCompletedEvent creates a new download completed event. +func NewDownloadCompletedEvent(proxyID, requestID, s3Key, mode string, durationMs, size int64) *DownloadCompletedEvent { + return &DownloadCompletedEvent{ + BaseEvent: newBaseEvent(EventTypeDownloadCompleted, proxyID, requestID), + S3Key: s3Key, + Mode: mode, + DurationMs: durationMs, + Size: size, + } +} + +// NewOrphanDetectedEvent creates a new orphan detected event. +func NewOrphanDetectedEvent(proxyID, requestID, detectionSource, topic, s3Bucket, s3Key, originalRequestID, reason string, size int64) *OrphanDetectedEvent { + return &OrphanDetectedEvent{ + BaseEvent: newBaseEvent(EventTypeOrphanDetected, proxyID, requestID), + DetectionSource: detectionSource, + Topic: topic, + S3Bucket: s3Bucket, + S3Key: s3Key, + Size: size, + OriginalRequestID: originalRequestID, + Reason: reason, + } +} diff --git a/cmd/proxy/lfs_uuid.go b/cmd/proxy/lfs_uuid.go new file mode 100644 index 00000000..aa1fa49c --- /dev/null +++ b/cmd/proxy/lfs_uuid.go @@ -0,0 +1,22 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import "github.com/google/uuid" + +func newUUID() string { + return uuid.NewString() +} diff --git a/cmd/proxy/main.go b/cmd/proxy/main.go index df4ecad3..b1b3839e 100644 --- a/cmd/proxy/main.go +++ b/cmd/proxy/main.go @@ -65,6 +65,7 @@ type proxy struct { metaFlight singleflight.Group backendRetries int backendBackoff time.Duration + lfs *lfsModule // nil when LFS disabled } func main() { @@ -144,6 +145,33 @@ func main() { p.setReady(true) } p.initMetadataCache(ctx) + + if lfsEnvBoolDefault("KAFSCALE_PROXY_LFS_ENABLED", false) { + lfsmod, err := initLFSModule(ctx, logger) + if err != nil { + logger.Error("lfs module init failed", "error", err) + os.Exit(1) + } + p.lfs = lfsmod + // Give the LFS HTTP API access to the proxy's backends for its own connections + if len(backends) > 0 { + lfsmod.backends = backends + lfsmod.setCachedBackends(backends) + } + logger.Info("LFS module enabled") + + // Start LFS HTTP API if configured + lfsHTTPAddr := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HTTP_ADDR")) + if lfsHTTPAddr != "" { + lfsmod.startHTTPServer(ctx, lfsHTTPAddr) + } + // Start LFS metrics server if configured + lfsMetricsAddr := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_METRICS_ADDR")) + if lfsMetricsAddr != "" { + lfsmod.startMetricsServer(ctx, lfsMetricsAddr) + } + } + if healthAddr != "" { p.startHealthServer(ctx, healthAddr) } @@ -157,6 +185,9 @@ func main() { if p.groupRouter != nil { p.groupRouter.Stop() } + if p.lfs != nil { + p.lfs.Shutdown() + } } func envOrDefault(key, fallback string) string { @@ -593,13 +624,30 @@ func (p *proxy) handleProduceRouting(ctx context.Context, header *protocol.Reque return p.forwardProduceRaw(ctx, payload, pool) } + // LFS rewrite: detect LFS_BLOB headers, upload to S3, replace values + var lfsOrphans []orphanInfo + if p.lfs != nil { + rewritten, orphans, err := p.lfs.rewriteProduceRequest(ctx, header, produceReq) + if err != nil { + return nil, err + } + if rewritten { + payload = nil // force re-encode in fanOut + lfsOrphans = orphans + } + } + if produceReq.Acks == 0 { p.fireAndForgetProduce(ctx, header, produceReq, payload, pool) return nil, nil } groups := p.groupPartitionsByBroker(ctx, produceReq, nil) - return p.forwardProduce(ctx, header, produceReq, payload, groups, pool) + resp, err := p.forwardProduce(ctx, header, produceReq, payload, groups, pool) + if err != nil && p.lfs != nil && len(lfsOrphans) > 0 { + p.lfs.trackOrphans(lfsOrphans) + } + return resp, err } // forwardProduceRaw forwards an unparseable produce payload to any backend. diff --git a/cmd/proxy/openapi.yaml b/cmd/proxy/openapi.yaml new file mode 100644 index 00000000..065ad0e1 --- /dev/null +++ b/cmd/proxy/openapi.yaml @@ -0,0 +1,433 @@ +# Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +openapi: 3.0.3 +info: + title: KafScale LFS Proxy HTTP API + version: 1.0.0 + description: | + The KafScale LFS (Large File Support) Proxy provides HTTP endpoints for producing + large binary objects to Kafka via S3-backed storage. Instead of sending large payloads + directly through Kafka, clients upload blobs to S3 and receive an envelope (pointer) + that is stored in Kafka. + + ## Authentication + + When API key authentication is enabled (via `KAFSCALE_LFS_PROXY_HTTP_API_KEY`), + requests must include one of: + - `X-API-Key` header with the API key + - `Authorization: Bearer ` header + + ## CORS + + The API supports CORS for browser-based clients. Preflight OPTIONS requests are handled automatically. + + ## Request Tracing + + All requests can include an optional `X-Request-ID` header for tracing. If not provided, + the proxy generates one and returns it in the response. + contact: + name: KafScale + url: https://github.com/KafScale/platform + license: + name: Apache 2.0 + url: https://www.apache.org/licenses/LICENSE-2.0 +servers: + - url: http://localhost:8080 + description: Local development + - url: http://lfs-proxy:8080 + description: Kubernetes in-cluster +tags: + - name: LFS + description: Large File Support operations +paths: + /lfs/produce: + post: + tags: + - LFS + summary: Upload and produce an LFS record + description: | + Streams a binary payload to the LFS proxy, which: + 1. Uploads the blob to S3 storage + 2. Computes checksums (SHA256 by default) + 3. Creates an LFS envelope with blob metadata + 4. Produces the envelope to the specified Kafka topic + + The response contains the full LFS envelope that was stored in Kafka. + operationId: lfsProduce + security: + - ApiKeyAuth: [] + - BearerAuth: [] + - {} + parameters: + - in: header + name: X-Kafka-Topic + required: true + schema: + type: string + pattern: '^[a-zA-Z0-9._-]+$' + maxLength: 249 + description: Target Kafka topic name (alphanumeric, dots, underscores, hyphens only) + example: video-uploads + - in: header + name: X-Kafka-Key + required: false + schema: + type: string + description: Base64-encoded Kafka record key for partitioning + example: dXNlci0xMjM= + - in: header + name: X-Kafka-Partition + required: false + schema: + type: integer + format: int32 + minimum: 0 + description: Explicit partition number (overrides key-based partitioning) + example: 0 + - in: header + name: X-LFS-Checksum + required: false + schema: + type: string + description: Expected checksum of the payload for verification + example: abc123def456... + - in: header + name: X-LFS-Checksum-Alg + required: false + schema: + type: string + enum: [sha256, md5, crc32, none] + default: sha256 + description: Checksum algorithm for verification + - in: header + name: X-Request-ID + required: false + schema: + type: string + format: uuid + description: Request correlation ID for tracing + - in: header + name: Content-Type + required: false + schema: + type: string + description: MIME type of the payload (stored in envelope) + example: video/mp4 + requestBody: + required: true + description: Binary payload to upload + content: + application/octet-stream: + schema: + type: string + format: binary + '*/*': + schema: + type: string + format: binary + responses: + "200": + description: LFS envelope successfully produced to Kafka + headers: + X-Request-ID: + schema: + type: string + description: Request correlation ID + content: + application/json: + schema: + $ref: "#/components/schemas/LfsEnvelope" + example: + kfs_lfs: 1 + bucket: kafscale-lfs + key: default/video-uploads/lfs/2026/02/05/abc123 + size: 10485760 + sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + checksum: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + checksum_alg: sha256 + content_type: video/mp4 + created_at: "2026-02-05T10:30:00Z" + proxy_id: lfs-proxy-0 + "400": + description: Invalid request (missing topic, invalid checksum, etc.) + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + examples: + missing_topic: + value: + code: missing_topic + message: missing topic + request_id: abc-123 + checksum_mismatch: + value: + code: checksum_mismatch + message: "expected abc123, got def456" + request_id: abc-123 + "401": + description: Unauthorized - API key required or invalid + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "502": + description: Upstream storage or Kafka failure + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "503": + description: Proxy not ready (backends unavailable) + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + options: + tags: + - LFS + summary: CORS preflight for produce endpoint + description: Handles CORS preflight requests for browser clients + responses: + "204": + description: CORS headers returned + headers: + Access-Control-Allow-Origin: + schema: + type: string + Access-Control-Allow-Methods: + schema: + type: string + Access-Control-Allow-Headers: + schema: + type: string + + /lfs/download: + post: + tags: + - LFS + summary: Download an LFS object + description: | + Retrieves an LFS object from S3 storage. Supports two modes: + + - **presign**: Returns a presigned S3 URL for direct download (default) + - **stream**: Streams the object content through the proxy + + For presign mode, the URL TTL is capped by server configuration. + operationId: lfsDownload + security: + - ApiKeyAuth: [] + - BearerAuth: [] + - {} + parameters: + - in: header + name: X-Request-ID + required: false + schema: + type: string + format: uuid + description: Request correlation ID for tracing + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/DownloadRequest" + examples: + presign: + summary: Get presigned URL + value: + bucket: kafscale-lfs + key: default/video-uploads/lfs/2026/02/05/abc123 + mode: presign + expires_seconds: 300 + stream: + summary: Stream content + value: + bucket: kafscale-lfs + key: default/video-uploads/lfs/2026/02/05/abc123 + mode: stream + responses: + "200": + description: Presigned URL or streamed object content + content: + application/json: + schema: + $ref: "#/components/schemas/DownloadResponse" + example: + mode: presign + url: https://s3.amazonaws.com/kafscale-lfs/... + expires_at: "2026-02-05T10:35:00Z" + application/octet-stream: + schema: + type: string + format: binary + description: Streamed object content (when mode=stream) + "400": + description: Invalid request + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "401": + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "502": + description: Upstream storage failure + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + "503": + description: Proxy not ready + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + options: + tags: + - LFS + summary: CORS preflight for download endpoint + responses: + "204": + description: CORS headers returned + +components: + securitySchemes: + ApiKeyAuth: + type: apiKey + in: header + name: X-API-Key + description: API key for authentication + BearerAuth: + type: http + scheme: bearer + description: Bearer token authentication (same API key) + + schemas: + LfsEnvelope: + type: object + description: LFS envelope containing blob metadata and S3 location + properties: + kfs_lfs: + type: integer + format: int32 + description: LFS envelope version + example: 1 + bucket: + type: string + description: S3 bucket name + example: kafscale-lfs + key: + type: string + description: S3 object key + example: default/video-uploads/lfs/2026/02/05/abc123 + size: + type: integer + format: int64 + description: Blob size in bytes + example: 10485760 + sha256: + type: string + description: SHA256 hash of the blob + example: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + checksum: + type: string + description: Checksum value (algorithm depends on checksum_alg) + checksum_alg: + type: string + description: Checksum algorithm used + enum: [sha256, md5, crc32, none] + example: sha256 + content_type: + type: string + description: MIME type of the blob + example: video/mp4 + created_at: + type: string + format: date-time + description: Timestamp when the blob was created + example: "2026-02-05T10:30:00Z" + proxy_id: + type: string + description: ID of the proxy instance that handled the upload + example: lfs-proxy-0 + + DownloadRequest: + type: object + required: [bucket, key] + description: Request to download an LFS object + properties: + bucket: + type: string + description: S3 bucket name (must match proxy's configured bucket) + example: kafscale-lfs + key: + type: string + description: S3 object key from the LFS envelope + example: default/video-uploads/lfs/2026/02/05/abc123 + mode: + type: string + enum: [presign, stream] + default: presign + description: | + Download mode: + - presign: Return a presigned URL for direct S3 download + - stream: Stream content through the proxy + expires_seconds: + type: integer + format: int32 + default: 120 + minimum: 1 + maximum: 3600 + description: Requested presign URL TTL in seconds (capped by server) + + DownloadResponse: + type: object + description: Response for presign download mode + properties: + mode: + type: string + enum: [presign] + description: Download mode used + url: + type: string + format: uri + description: Presigned S3 URL for direct download + expires_at: + type: string + format: date-time + description: URL expiration timestamp + + ErrorResponse: + type: object + description: Error response returned for all error conditions + properties: + code: + type: string + description: Machine-readable error code + example: missing_topic + message: + type: string + description: Human-readable error message + example: missing topic + request_id: + type: string + description: Request correlation ID for support/debugging + example: abc-123-def-456 diff --git a/deploy/docker-compose/Makefile b/deploy/docker-compose/Makefile new file mode 100644 index 00000000..87b8fd23 --- /dev/null +++ b/deploy/docker-compose/Makefile @@ -0,0 +1,65 @@ +# Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.PHONY: up down logs ps restart health test-upload test-download clean help + +REGISTRY ?= 192.168.0.131:5100 +TAG ?= dev + +up: ## Start all services + REGISTRY=$(REGISTRY) TAG=$(TAG) docker-compose up -d + +down: ## Stop all services + docker-compose down + +logs: ## View logs (follow mode) + docker-compose logs -f + +ps: ## Show service status + docker-compose ps + +restart: ## Restart all services + docker-compose restart + +health: ## Check service health + @echo "=== Health Checks ===" + @echo -n "etcd: "; curl -s http://localhost:2379/health | head -c 50 || echo "FAIL" + @echo -n "minio: "; curl -s http://localhost:9000/minio/health/live || echo "FAIL" + @echo -n "lfs-proxy: "; curl -s http://localhost:9094/readyz || echo "FAIL" + @echo -n "broker: "; nc -z localhost 9092 && echo "OK" || echo "FAIL" + +test-upload: ## Test LFS upload (creates 1KB test file) + @echo "Uploading test file..." + @dd if=/dev/urandom bs=1024 count=1 2>/dev/null | \ + curl -s -X POST http://localhost:8080/lfs/produce \ + -H "X-Kafka-Topic: test-uploads" \ + -H "Content-Type: application/octet-stream" \ + --data-binary @- | jq . + +test-download: ## Test LFS download (requires KEY variable) + @if [ -z "$(KEY)" ]; then echo "Usage: make test-download KEY=default/topic/lfs/..."; exit 1; fi + @curl -s -X POST http://localhost:8080/lfs/download \ + -H "Content-Type: application/json" \ + -d '{"bucket":"kafscale","key":"$(KEY)","mode":"presign"}' | jq . + +clean: ## Stop services and remove volumes + docker-compose down -v + +registry-check: ## Check local registry + @echo "=== Registry Catalog ===" + @curl -s http://$(REGISTRY)/v2/_catalog | jq . + +help: ## Show this help + @grep -E '^[a-zA-Z_-]+:.*?##' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "%-15s %s\n", $$1, $$2}' diff --git a/deploy/docker-compose/README.md b/deploy/docker-compose/README.md new file mode 100644 index 00000000..8d726abf --- /dev/null +++ b/deploy/docker-compose/README.md @@ -0,0 +1,313 @@ + + +# KafScale Docker Compose + +Local development platform using Docker Compose with images from a local registry. + +## Prerequisites + +1. Docker and Docker Compose installed +2. Images pushed to local registry (`192.168.0.131:5100`) +3. Docker configured for insecure registry (see below) + +### Configure Insecure Registry + +Docker Desktop → Settings → Docker Engine: + +```json +{ + "insecure-registries": ["192.168.0.131:5100"] +} +``` + +### Push Images to Local Registry + +```bash +# From repository root +make stage-release STAGE_REGISTRY=192.168.0.131:5100 STAGE_TAG=dev +``` + +### Verify Images + +```bash +curl http://192.168.0.131:5100/v2/_catalog +``` + +## Quick Start + +```bash +cd deploy/docker-compose + +# Start all services +docker-compose up -d + +# View logs +docker-compose logs -f + +# Stop all services +docker-compose down +``` + +## Services + +| Service | Port | Description | +|---------|------|-------------| +| **etcd** | 2379 | Coordination store | +| **minio** | 9000, 9001 | S3 storage (API, Console) | +| **broker** | 9092 | KafScale Kafka broker | +| **lfs-proxy** | 8080, 9093 | LFS HTTP API, Kafka protocol | +| **console** | 3080 | Web management console | +| **e72-browser-demo** | 3072 | Browser LFS demo (optional) | + +## Access Points + +| Service | URL | +|---------|-----| +| LFS HTTP API | http://localhost:8080 | +| MinIO Console | http://localhost:9001 | +| KafScale Console | http://localhost:3080 | +| E72 Browser Demo | http://localhost:3072 | +| Prometheus Metrics | http://localhost:9095/metrics | +| Health Check | http://localhost:9094/readyz | + +## Testing + +### Broker Advertised Address + +The broker must advertise its container hostname so other services can connect. +Docker Compose sets: + +- `KAFSCALE_BROKER_HOST=broker` +- `KAFSCALE_BROKER_PORT=9092` + +### Health Check + +```bash +curl http://localhost:9094/readyz +``` + +### LFS Upload + +```bash +# Upload a file +curl -X POST http://localhost:8080/lfs/produce \ + -H "X-Kafka-Topic: test-uploads" \ + -H "Content-Type: application/octet-stream" \ + --data-binary @myfile.bin + +# Upload with key +curl -X POST http://localhost:8080/lfs/produce \ + -H "X-Kafka-Topic: test-uploads" \ + -H "X-Kafka-Key: $(echo -n 'my-key' | base64)" \ + -H "Content-Type: video/mp4" \ + --data-binary @video.mp4 +``` + +### Large Uploads (Beast Mode) + +Docker Compose ships with a large-upload profile: + +- `KAFSCALE_LFS_PROXY_MAX_BLOB_SIZE=7516192768` (7 GB) +- `KAFSCALE_LFS_PROXY_CHUNK_SIZE=16777216` (16 MB parts) +- `KAFSCALE_LFS_PROXY_HTTP_READ_TIMEOUT_SEC=1800` +- `KAFSCALE_LFS_PROXY_HTTP_WRITE_TIMEOUT_SEC=1800` +- `KAFSCALE_LFS_PROXY_HTTP_IDLE_TIMEOUT_SEC=120` + +These settings allow 6+ GB streaming uploads without hitting default limits. + +### LFS Download + +```bash +# Get presigned URL +curl -X POST http://localhost:8080/lfs/download \ + -H "Content-Type: application/json" \ + -d '{"bucket":"kafscale","key":"default/test-uploads/lfs/...","mode":"presign"}' +``` + +## Traceability + +Traceability is enabled in the compose file by default. It consists of: +- **LFS Ops Tracker** events emitted by the LFS proxy to `__lfs_ops_state` +- **Console LFS Dashboard** consuming those events and exposing APIs/UI + +### Where to see it + +1) **Console UI** + - Open http://localhost:3080 + - Navigate to the **LFS** tab for objects, topics, live events, and S3 browser. + +2) **Raw events from Kafka** +```bash +kcat -b localhost:9092 -C -t __lfs_ops_state -o beginning +``` + +### Key settings (compose) + +LFS proxy tracker: +- `KAFSCALE_LFS_TRACKER_ENABLED=true` +- `KAFSCALE_LFS_TRACKER_TOPIC=__lfs_ops_state` +- `KAFSCALE_LFS_TRACKER_BATCH_SIZE=100` +- `KAFSCALE_LFS_TRACKER_FLUSH_MS=100` +- `KAFSCALE_LFS_TRACKER_ENSURE_TOPIC=true` +- `KAFSCALE_LFS_TRACKER_PARTITIONS=3` +- `KAFSCALE_LFS_TRACKER_REPLICATION_FACTOR=1` + +Console LFS dashboard: +- `KAFSCALE_CONSOLE_LFS_ENABLED=true` +- `KAFSCALE_CONSOLE_KAFKA_BROKERS=broker:9092` +- `KAFSCALE_CONSOLE_LFS_S3_*` set to MinIO credentials + +### Kafka (via kcat) + +```bash +# List topics +kcat -b localhost:9092 -L + +# Produce message (goes through regular broker, not LFS) +echo "hello" | kcat -b localhost:9092 -P -t test-topic + +# Consume messages +kcat -b localhost:9092 -C -t test-topic -o beginning +``` + +## Configuration + +### Environment Variables + +Edit `.env` to customize: + +```bash +# Registry settings +REGISTRY=192.168.0.131:5100 +TAG=dev + +# MinIO credentials +MINIO_ROOT_USER=minioadmin +MINIO_ROOT_PASSWORD=minioadmin +``` + +### Console Port Configuration + +The console listens on `KAFSCALE_CONSOLE_HTTP_ADDR` (default `:8080`). In the compose file +we set it to `:3080` and map `3080:3080`. + +### Console Login + +The console UI requires credentials. Compose sets: +- `KAFSCALE_UI_USERNAME=kafscaleadmin` +- `KAFSCALE_UI_PASSWORD=kafscale` + +Override these in `docker-compose.yaml` or via your own `.env.local` if needed. + +### Override Registry/Tag + +```bash +REGISTRY=my-registry.local:5000 TAG=v1.5.0 docker-compose up -d +``` + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Docker Compose Network │ +│ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ etcd │ │ minio │ │ broker │ │ console │ │ +│ │ :2379 │ │ :9000/01 │ │ :9092 │ │ :3080 │ │ +│ └────┬─────┘ └────┬─────┘ └────┬─────┘ └──────────┘ │ +│ │ │ │ │ +│ └───────────────┼───────────────┘ │ +│ │ │ +│ ┌───────┴───────┐ │ +│ │ lfs-proxy │ │ +│ │ :8080 (HTTP) │ │ +│ │ :9093 (Kafka) │ │ +│ └───────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ + │ + ┌─────────┴─────────┐ + │ Host Machine │ + │ │ + │ localhost:8080 │ ← LFS HTTP API + │ localhost:9092 │ ← Kafka Broker + │ localhost:9001 │ ← MinIO Console + │ localhost:3080 │ ← KafScale Console + └───────────────────┘ +``` + +## Troubleshooting + +### Services not starting + +```bash +# Check service status +docker-compose ps + +# View logs for specific service +docker-compose logs lfs-proxy + +# Restart a service +docker-compose restart lfs-proxy +``` + +### Image pull fails + +```bash +# Verify registry is accessible +curl http://192.168.0.131:5100/v2/_catalog + +# Check Docker daemon config +docker info | grep -A5 "Insecure Registries" +``` + +### LFS upload fails + +```bash +# Check LFS proxy logs +docker-compose logs lfs-proxy + +# Verify MinIO is healthy +curl http://localhost:9000/minio/health/live + +# Check bucket exists +docker-compose exec minio mc ls local/ +``` + +### Reset everything + +```bash +# Stop and remove all containers, volumes +docker-compose down -v + +# Start fresh +docker-compose up -d +``` + +## Volumes + +| Volume | Purpose | +|--------|---------| +| `etcd-data` | etcd persistent storage | +| `minio-data` | MinIO object storage | +| `broker-data` | Kafka broker data | + +To persist data across restarts, volumes are used. To reset: + +```bash +docker-compose down -v +``` diff --git a/deploy/docker-compose/docker-compose.yaml b/deploy/docker-compose/docker-compose.yaml new file mode 100644 index 00000000..a06682b0 --- /dev/null +++ b/deploy/docker-compose/docker-compose.yaml @@ -0,0 +1,287 @@ +# Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# KafScale Local Development Platform +# ==================================== +# Uses images from local registry at 192.168.0.131:5100 +# +# Usage: +# cd deploy/docker-compose +# docker-compose up -d +# +# Services: +# - etcd: http://localhost:2379 +# - minio: http://localhost:9000 (console: http://localhost:9001) +# - broker: localhost:9092 +# - lfs-proxy: localhost:9092 (kafka), http://localhost:8080 (HTTP API) +# - console: http://localhost:3080 +# +# Test LFS upload: +# curl -X POST http://localhost:8080/lfs/produce \ +# -H "X-Kafka-Topic: test-topic" \ +# -H "Content-Type: application/octet-stream" \ +# --data-binary @myfile.bin + +x-registry: ®istry "192.168.0.131:5100" +x-tag: &tag "dev" + +services: + # ========================================================================== + # Infrastructure + # ========================================================================== + + etcd: + image: quay.io/coreos/etcd:v3.5.12 + container_name: kafscale-etcd-dc + command: + - etcd + - --name=etcd0 + - --data-dir=/etcd-data + - --advertise-client-urls=http://etcd:2379 + - --listen-client-urls=http://0.0.0.0:2379 + - --initial-advertise-peer-urls=http://etcd:2380 + - --listen-peer-urls=http://0.0.0.0:2380 + - --initial-cluster=etcd0=http://etcd:2380 + - --initial-cluster-state=new + - --initial-cluster-token=kafscale-local + ports: + - "2379:2379" + - "2380:2380" + volumes: + - etcd-data:/etcd-data + healthcheck: + test: ["CMD", "etcdctl", "endpoint", "health"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - kafscale + + minio: + image: minio/minio:latest + container_name: kafscale-minio-dc + command: server /data --console-address ":9001" + environment: + MINIO_ROOT_USER: minioadmin + MINIO_ROOT_PASSWORD: minioadmin + ports: + - "9000:9000" # S3 API + - "9001:9001" # Console + volumes: + - minio-data:/data + healthcheck: + test: ["CMD", "mc", "ready", "local"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - kafscale + + minio-init: + image: minio/mc:latest + container_name: kafscale-minio-init-dc + depends_on: + minio: + condition: service_healthy + entrypoint: > + /bin/sh -c " + mc alias set local http://minio:9000 minioadmin minioadmin; + mc mb local/kafscale --ignore-existing; + mc anonymous set download local/kafscale; + echo 'Bucket kafscale created'; + exit 0; + " + networks: + - kafscale + + # ========================================================================== + # KafScale Platform + # ========================================================================== + + broker: + image: ${REGISTRY:-192.168.0.131:5100}/kafscale/kafscale-broker:${TAG:-dev} + container_name: kafscale-broker-dc + depends_on: + etcd: + condition: service_healthy + minio-init: + condition: service_completed_successfully + environment: + KAFSCALE_BROKER_ID: "0" + KAFSCALE_BROKER_ADDR: ":9092" + KAFSCALE_BROKER_HOST: "broker" + KAFSCALE_BROKER_PORT: "9092" + KAFSCALE_BROKER_ETCD_ENDPOINTS: "http://etcd:2379" + KAFSCALE_BROKER_DATA_DIR: "/data" + KAFSCALE_BROKER_LOG_LEVEL: "info" + # S3 settings (broker uses KAFSCALE_S3_* not KAFSCALE_BROKER_S3_*) + KAFSCALE_S3_BUCKET: "kafscale" + KAFSCALE_S3_REGION: "us-east-1" + KAFSCALE_S3_ENDPOINT: "http://minio:9000" + KAFSCALE_S3_ACCESS_KEY: "minioadmin" + KAFSCALE_S3_SECRET_KEY: "minioadmin" + KAFSCALE_S3_PATH_STYLE: "true" + ports: + - "9092:9092" + volumes: + - broker-data:/data + healthcheck: + test: ["CMD-SHELL", "nc -z localhost 9092 || exit 1"] + interval: 10s + timeout: 5s + retries: 10 + networks: + - kafscale + + lfs-proxy: + image: ${REGISTRY:-192.168.0.131:5100}/kafscale/kafscale-lfs-proxy:${TAG:-dev} + container_name: kafscale-lfs-proxy-dc + depends_on: + etcd: + condition: service_healthy + minio-init: + condition: service_completed_successfully + broker: + condition: service_healthy + environment: + # Kafka proxy settings + KAFSCALE_LFS_PROXY_ADDR: ":9093" + KAFSCALE_LFS_PROXY_ADVERTISED_HOST: "lfs-proxy" + KAFSCALE_LFS_PROXY_ADVERTISED_PORT: "9093" + KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS: "http://etcd:2379" + KAFSCALE_LFS_PROXY_BACKENDS: "broker:9092" + # HTTP API settings + KAFSCALE_LFS_PROXY_HTTP_ADDR: ":8080" + # Health & Metrics + KAFSCALE_LFS_PROXY_HEALTH_ADDR: ":9094" + KAFSCALE_LFS_PROXY_METRICS_ADDR: ":9095" + # S3 settings + KAFSCALE_LFS_PROXY_S3_BUCKET: "kafscale" + KAFSCALE_LFS_PROXY_S3_REGION: "us-east-1" + KAFSCALE_LFS_PROXY_S3_ENDPOINT: "http://minio:9000" + KAFSCALE_LFS_PROXY_S3_ACCESS_KEY: "minioadmin" + KAFSCALE_LFS_PROXY_S3_SECRET_KEY: "minioadmin" + KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE: "true" + KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET: "true" + # Blob settings (Beast mode) + KAFSCALE_LFS_PROXY_MAX_BLOB_SIZE: "7516192768" # 7GB + KAFSCALE_LFS_PROXY_CHUNK_SIZE: "16777216" # 16MB + # HTTP timeouts for large uploads + KAFSCALE_LFS_PROXY_HTTP_READ_TIMEOUT_SEC: "1800" + KAFSCALE_LFS_PROXY_HTTP_WRITE_TIMEOUT_SEC: "1800" + KAFSCALE_LFS_PROXY_HTTP_IDLE_TIMEOUT_SEC: "120" + # Logging + KAFSCALE_LFS_PROXY_LOG_LEVEL: "info" + # Traceability (LFS Ops Tracker) + KAFSCALE_LFS_TRACKER_ENABLED: "true" + KAFSCALE_LFS_TRACKER_TOPIC: "__lfs_ops_state" + KAFSCALE_LFS_TRACKER_BATCH_SIZE: "100" + KAFSCALE_LFS_TRACKER_FLUSH_MS: "100" + KAFSCALE_LFS_TRACKER_ENSURE_TOPIC: "true" + KAFSCALE_LFS_TRACKER_PARTITIONS: "3" + KAFSCALE_LFS_TRACKER_REPLICATION_FACTOR: "1" + ports: + - "9093:9093" # Kafka protocol (LFS) + - "8080:8080" # HTTP API + - "9094:9094" # Health + - "9095:9095" # Metrics + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://localhost:9094/readyz || exit 1"] + interval: 10s + timeout: 5s + retries: 10 + networks: + - kafscale + + console: + image: ${REGISTRY:-192.168.0.131:5100}/kafscale/kafscale-console:${TAG:-dev} + container_name: kafscale-console-dc + depends_on: + etcd: + condition: service_healthy + broker: + condition: service_healthy + environment: + KAFSCALE_CONSOLE_HTTP_ADDR: ":3080" + KAFSCALE_CONSOLE_ETCD_ENDPOINTS: "http://etcd:2379" + KAFSCALE_CONSOLE_BROKER_METRICS_URL: "http://broker:8080/metrics" + KAFSCALE_CONSOLE_LOG_LEVEL: "info" + KAFSCALE_UI_USERNAME: "kafscaleadmin" + KAFSCALE_UI_PASSWORD: "kafscale" + # Traceability (LFS Console Dashboard) + KAFSCALE_CONSOLE_LFS_ENABLED: "true" + KAFSCALE_CONSOLE_KAFKA_BROKERS: "broker:9092" + KAFSCALE_LFS_TRACKER_TOPIC: "__lfs_ops_state" + KAFSCALE_CONSOLE_LFS_S3_BUCKET: "kafscale" + KAFSCALE_CONSOLE_LFS_S3_REGION: "us-east-1" + KAFSCALE_CONSOLE_LFS_S3_ENDPOINT: "http://minio:9000" + KAFSCALE_CONSOLE_LFS_S3_ACCESS_KEY: "minioadmin" + KAFSCALE_CONSOLE_LFS_S3_SECRET_KEY: "minioadmin" + KAFSCALE_CONSOLE_LFS_S3_PRESIGN_TTL: "300" + ports: + - "3080:3080" + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://localhost:3080/health || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - kafscale + + # ========================================================================== + # E72 Browser LFS SDK Demo + # ========================================================================== + + e72-browser-demo: + image: ${REGISTRY:-192.168.0.131:5100}/kafscale/kafscale-e72-browser-demo:${TAG:-dev} + container_name: kafscale-e72-demo-dc + depends_on: + lfs-proxy: + condition: service_healthy + ports: + - "3072:80" + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://localhost/index.html || exit 1"] + interval: 10s + timeout: 5s + retries: 3 + networks: + - kafscale + + # ========================================================================== + # Optional: Operator (for local testing only - normally runs in K8s) + # ========================================================================== + + # operator: + # image: ${REGISTRY:-192.168.0.131:5100}/kafscale/kafscale-operator:${TAG:-dev} + # container_name: kafscale-operator-dc + # depends_on: + # etcd: + # condition: service_healthy + # environment: + # KAFSCALE_OPERATOR_ETCD_ENDPOINTS: "http://etcd:2379" + # KAFSCALE_OPERATOR_BROKER_IMAGE: "${REGISTRY:-192.168.0.131:5100}/kafscale/kafscale-broker:${TAG:-dev}" + # KAFSCALE_OPERATOR_LOG_LEVEL: "info" + # networks: + # - kafscale + +volumes: + etcd-data: + minio-data: + broker-data: + +networks: + kafscale: + driver: bridge diff --git a/deploy/docker/lfs-proxy.Dockerfile b/deploy/docker/lfs-proxy.Dockerfile new file mode 100644 index 00000000..ff227352 --- /dev/null +++ b/deploy/docker/lfs-proxy.Dockerfile @@ -0,0 +1,46 @@ +# Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# syntax=docker/dockerfile:1.7 + +ARG GO_VERSION=1.25.2 +FROM golang:${GO_VERSION}-alpine@sha256:06cdd34bd531b810650e47762c01e025eb9b1c7eadd191553b91c9f2d549fae8 AS builder + +ARG TARGETOS=linux +ARG TARGETARCH=amd64 + +WORKDIR /src +RUN apk add --no-cache git ca-certificates + +COPY go.mod go.sum ./ +RUN --mount=type=cache,target=/go/pkg/mod \ + --mount=type=cache,target=/root/.cache/go-build \ + go mod download +COPY . . + +RUN --mount=type=cache,target=/go/pkg/mod \ + --mount=type=cache,target=/root/.cache/go-build \ + CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \ + go build -ldflags="-s -w" -o /out/lfs-proxy ./cmd/lfs-proxy + +FROM alpine:3.19@sha256:6baf43584bcb78f2e5847d1de515f23499913ac9f12bdf834811a3145eb11ca1 +RUN apk add --no-cache ca-certificates && adduser -D -u 10001 kafscale +USER 10001 +WORKDIR /app + +COPY --from=builder /out/lfs-proxy /usr/local/bin/kafscale-lfs-proxy + +EXPOSE 9092 +ENTRYPOINT ["/usr/local/bin/kafscale-lfs-proxy"] diff --git a/deploy/helm/kafscale/README.md b/deploy/helm/kafscale/README.md new file mode 100644 index 00000000..4aa8cd22 --- /dev/null +++ b/deploy/helm/kafscale/README.md @@ -0,0 +1,311 @@ + + +# KafScale Helm Chart + +Helm chart for deploying KafScale components including the operator, console, proxy, LFS proxy, and MCP server. + +## Prerequisites + +- Kubernetes 1.24+ +- Helm 3.x +- (Optional) Prometheus Operator for ServiceMonitor resources + +## Installation + +### Add the repository (if published) + +```bash +helm repo add kafscale https://charts.kafscale.io +helm repo update +``` + +### Install from local chart + +```bash +helm upgrade --install kafscale ./deploy/helm/kafscale \ + -n kafscale-system --create-namespace +``` + +## Components + +| Component | Description | Default | +|-----------|-------------|---------| +| **Operator** | KafScale cluster operator | Enabled | +| **Console** | Web-based management UI | Enabled | +| **Proxy** | Kafka protocol proxy | Disabled | +| **LFS Proxy** | Large File Support proxy | Disabled | +| **MCP** | Model Context Protocol server | Disabled | + +## Quick Start Examples + +### Minimal Installation + +```bash +helm upgrade --install kafscale ./deploy/helm/kafscale +``` + +### With LFS Proxy and MinIO + +```bash +helm upgrade --install kafscale ./deploy/helm/kafscale \ + --set lfsProxy.enabled=true \ + --set lfsProxy.http.enabled=true \ + --set lfsProxy.s3.bucket=kafscale \ + --set lfsProxy.s3.endpoint=http://minio:9000 \ + --set lfsProxy.s3.accessKey=minioadmin \ + --set lfsProxy.s3.secretKey=minioadmin \ + --set lfsProxy.s3.forcePathStyle=true +``` + +### LFS Demo Stack + +Deploy the full LFS demo stack with browser UI: + +```bash +helm upgrade --install kafscale ./deploy/helm/kafscale \ + -n kafscale-demo --create-namespace \ + -f ./deploy/helm/kafscale/values-lfs-demo.yaml \ + --set lfsProxy.s3.endpoint=http://minio:9000 \ + --set lfsProxy.s3.accessKey=minioadmin \ + --set lfsProxy.s3.secretKey=minioadmin +``` + +## Values Files + +| File | Description | +|------|-------------| +| `values.yaml` | Default values (production-ready defaults) | +| `values-lfs-demo.yaml` | LFS demo stack with browser UI enabled | + +## Configuration + +See [values.yaml](values.yaml) for the full list of configurable parameters. + +### Key Sections + +| Section | Description | +|---------|-------------| +| `operator.*` | KafScale operator settings | +| `console.*` | Console UI settings | +| `proxy.*` | Kafka proxy settings | +| `lfsProxy.*` | LFS proxy settings | +| `lfsProxy.http.*` | HTTP API settings | +| `lfsProxy.http.cors.*` | CORS configuration | +| `lfsProxy.s3.*` | S3 storage backend | +| `lfsProxy.ingress.*` | HTTP ingress | +| `lfsDemos.*` | Demo applications | +| `mcp.*` | MCP server settings | + +## LFS Proxy + +The LFS Proxy implements the claim-check pattern for large Kafka messages: + +``` +┌─────────┐ ┌───────────┐ ┌─────────┐ +│ Client │────▶│ LFS Proxy │────▶│ S3 │ +│ (SDK) │ │ │ │ (blob) │ +└─────────┘ └─────┬─────┘ └─────────┘ + │ + ▼ + ┌───────────┐ + │ Kafka │ + │ (pointer) │ + └───────────┘ +``` + +### Enable HTTP API + +```yaml +lfsProxy: + enabled: true + http: + enabled: true + port: 8080 + cors: + enabled: true + allowOrigins: ["*"] +``` + +### S3 Configuration + +```yaml +lfsProxy: + s3: + bucket: my-lfs-bucket + region: us-east-1 + endpoint: "" # Leave empty for AWS S3 + existingSecret: s3-credentials # Recommended for production +``` + +For detailed LFS proxy documentation, see [docs/lfs-proxy/helm-deployment.md](../../../docs/lfs-proxy/helm-deployment.md). + +### HTTP API Specification (OpenAPI/Swagger) + +The LFS Proxy HTTP API is documented using OpenAPI 3.0: + +| Resource | Location | +|----------|----------| +| **OpenAPI Spec** | [`api/lfs-proxy/openapi.yaml`](../../../api/lfs-proxy/openapi.yaml) | +| **Swagger UI** | Import the spec into [Swagger Editor](https://editor.swagger.io) or [Stoplight](https://stoplight.io) | + +**API Endpoints:** + +| Endpoint | Method | Description | +|----------|--------|-------------| +| `/lfs/produce` | POST | Upload blob to S3, produce pointer to Kafka | +| `/lfs/download` | POST | Get presigned URL or stream blob from S3 | +| `/readyz` | GET | Kubernetes readiness probe | +| `/livez` | GET | Kubernetes liveness probe | +| `/metrics` | GET | Prometheus metrics (port 9095) | + +**Example: View API spec locally:** +```bash +# Using Swagger UI Docker +docker run -p 8081:8080 -e SWAGGER_JSON=/spec/openapi.yaml \ + -v $(pwd)/api/lfs-proxy:/spec swaggerapi/swagger-ui + +# Open http://localhost:8081 +``` + +## Browser Demo (E72) + +The E72 browser demo provides a web UI for testing LFS uploads: + +```yaml +lfsDemos: + enabled: true + e72Browser: + enabled: true + service: + type: NodePort + nodePort: 30072 +``` + +Access via: `http://:30072` + +## Local Registry (Stage Release) + +For air-gapped or LAN installs, you can publish images to a local registry (for example `192.168.0.131:5100`) and point the chart at it. + +### 1) Configure Docker to allow the registry (insecure HTTP) + +Docker Desktop on macOS: + +1. Open Docker Desktop → Settings → Docker Engine. +2. Add the registry under `insecure-registries`: + ```json + { + "insecure-registries": ["192.168.0.131:5100"] + } + ``` +3. Apply & Restart Docker. + +Verify: +```bash +docker info | grep -n "Insecure Registries" +docker info | grep -n "192.168.0.131" +``` + +### 2) Push images to the registry + +Use the stage release target (local buildx): +```bash +make stage-release STAGE_REGISTRY=192.168.0.131:5100 STAGE_TAG=dev +``` + +If you want to run the GitHub Actions workflow locally instead, use: +```bash +make stage-release-act STAGE_REGISTRY=192.168.0.131:5100 STAGE_TAG=dev +``` +This target builds a local `act` runner image first (`make act-image`) and executes the workflow inside that container. + +### 3) Install the chart using the staged registry + +```bash +helm upgrade --install kafscale ./deploy/helm/kafscale \ + -n kafscale-demo --create-namespace \ + --set global.imageRegistry=192.168.0.131:5100 +``` + +Note: if you set `global.imageRegistry`, individual component image repositories inherit it. + +## Monitoring + +### Enable ServiceMonitor + +```yaml +lfsProxy: + metrics: + enabled: true + serviceMonitor: + enabled: true + interval: 30s +``` + +### Enable PrometheusRule + +```yaml +lfsProxy: + metrics: + prometheusRule: + enabled: true +``` + +## Security + +### Credentials Best Practices + +1. **Use existing secrets** instead of inline values: + ```bash + kubectl create secret generic s3-creds \ + --from-literal=AWS_ACCESS_KEY_ID=xxx \ + --from-literal=AWS_SECRET_ACCESS_KEY=xxx + ``` + ```yaml + lfsProxy: + s3: + existingSecret: s3-creds + ``` + +2. **Enable API key** for HTTP endpoints: + ```yaml + lfsProxy: + http: + apiKey: "your-secure-key" + ``` + +3. **Restrict CORS origins** in production: + ```yaml + lfsProxy: + http: + cors: + allowOrigins: ["https://app.example.com"] + ``` + +## Uninstall + +```bash +helm uninstall kafscale -n kafscale-system +``` + +## Documentation + +- [LFS Proxy Helm Deployment](../../../docs/lfs-proxy/helm-deployment.md) +- [LFS Proxy Data Flow](../../../docs/lfs-proxy/data-flow.md) +- [LFS SDK Documentation](../../../docs/lfs-proxy/sdk-solution.md) +- [Operations Guide](../../../docs/operations.md) diff --git a/deploy/helm/kafscale/crds/kafscaleclusters.yaml b/deploy/helm/kafscale/crds/kafscaleclusters.yaml index c58f4726..5227d974 100644 --- a/deploy/helm/kafscale/crds/kafscaleclusters.yaml +++ b/deploy/helm/kafscale/crds/kafscaleclusters.yaml @@ -125,6 +125,82 @@ spec: type: string useKubeEtcd: type: boolean + + lfsProxy: + type: object + properties: + enabled: + type: boolean + replicas: + type: integer + minimum: 1 + image: + type: string + imagePullPolicy: + type: string + backends: + type: array + items: + type: string + advertisedHost: + type: string + advertisedPort: + type: integer + backendCacheTTLSeconds: + type: integer + service: + type: object + properties: + type: + type: string + annotations: + type: object + additionalProperties: + type: string + loadBalancerSourceRanges: + type: array + items: + type: string + port: + type: integer + http: + type: object + properties: + enabled: + type: boolean + port: + type: integer + apiKeySecretRef: + type: string + apiKeySecretKey: + type: string + metrics: + type: object + properties: + enabled: + type: boolean + port: + type: integer + health: + type: object + properties: + enabled: + type: boolean + port: + type: integer + s3: + type: object + properties: + namespace: + type: string + maxBlobSize: + type: integer + chunkSize: + type: integer + forcePathStyle: + type: boolean + ensureBucket: + type: boolean ui: type: object properties: diff --git a/deploy/helm/kafscale/templates/console-deployment.yaml b/deploy/helm/kafscale/templates/console-deployment.yaml index 319cadf0..d4d7ebf2 100644 --- a/deploy/helm/kafscale/templates/console-deployment.yaml +++ b/deploy/helm/kafscale/templates/console-deployment.yaml @@ -78,6 +78,42 @@ spec: {{- else if .Values.operator.metrics.enabled }} - name: KAFSCALE_CONSOLE_OPERATOR_METRICS_URL value: "{{ printf "http://%s-metrics.%s.svc.cluster.local:%d/metrics" (include "kafscale.componentName" (dict "root" . "component" "operator")) .Release.Namespace .Values.operator.metrics.port }}" +{{- end }} +{{- if .Values.console.lfs.enabled }} + - name: KAFSCALE_CONSOLE_LFS_ENABLED + value: "true" +{{- if .Values.console.lfs.kafkaBrokers }} + - name: KAFSCALE_CONSOLE_KAFKA_BROKERS + value: "{{ join "," .Values.console.lfs.kafkaBrokers }}" +{{- end }} +{{- if .Values.console.lfs.trackerTopic }} + - name: KAFSCALE_LFS_TRACKER_TOPIC + value: "{{ .Values.console.lfs.trackerTopic }}" +{{- end }} +{{- if .Values.console.lfs.s3.bucket }} + - name: KAFSCALE_CONSOLE_LFS_S3_BUCKET + value: "{{ .Values.console.lfs.s3.bucket }}" +{{- end }} +{{- if .Values.console.lfs.s3.region }} + - name: KAFSCALE_CONSOLE_LFS_S3_REGION + value: "{{ .Values.console.lfs.s3.region }}" +{{- end }} +{{- if .Values.console.lfs.s3.endpoint }} + - name: KAFSCALE_CONSOLE_LFS_S3_ENDPOINT + value: "{{ .Values.console.lfs.s3.endpoint }}" +{{- end }} +{{- if .Values.console.lfs.s3.accessKey }} + - name: KAFSCALE_CONSOLE_LFS_S3_ACCESS_KEY + value: "{{ .Values.console.lfs.s3.accessKey }}" +{{- end }} +{{- if .Values.console.lfs.s3.secretKey }} + - name: KAFSCALE_CONSOLE_LFS_S3_SECRET_KEY + value: "{{ .Values.console.lfs.s3.secretKey }}" +{{- end }} +{{- if .Values.console.lfs.s3.presignTTL }} + - name: KAFSCALE_CONSOLE_LFS_S3_PRESIGN_TTL + value: "{{ .Values.console.lfs.s3.presignTTL }}" +{{- end }} {{- end }} ports: - name: http diff --git a/deploy/helm/kafscale/templates/lfs-proxy-deployment.yaml b/deploy/helm/kafscale/templates/lfs-proxy-deployment.yaml new file mode 100644 index 00000000..9b7dba92 --- /dev/null +++ b/deploy/helm/kafscale/templates/lfs-proxy-deployment.yaml @@ -0,0 +1,251 @@ +# Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{- if .Values.lfsProxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "kafscale.componentName" (dict "root" . "component" "lfs-proxy") }} + labels: +{{ include "kafscale.labels" . | indent 4 }} + app.kubernetes.io/component: lfs-proxy +spec: + replicas: {{ .Values.lfsProxy.replicaCount }} + selector: + matchLabels: +{{ include "kafscale.componentSelectorLabels" (dict "root" . "component" "lfs-proxy") | indent 6 }} + template: + metadata: + labels: +{{ include "kafscale.componentSelectorLabels" (dict "root" . "component" "lfs-proxy") | indent 8 }} +{{- with .Values.lfsProxy.podAnnotations }} + annotations: +{{ toYaml . | indent 8 }} +{{- end }} + spec: +{{- if .Values.imagePullSecrets }} + imagePullSecrets: +{{- range .Values.imagePullSecrets }} + - name: {{ . }} +{{- end }} +{{- end }} + containers: + - name: lfs-proxy + image: "{{ .Values.lfsProxy.image.repository }}:{{ ternary "latest" (default .Chart.AppVersion .Values.lfsProxy.image.tag) .Values.lfsProxy.image.useLatest }}" + imagePullPolicy: {{ ternary "Always" .Values.lfsProxy.image.pullPolicy .Values.lfsProxy.image.useLatest }} + env: + - name: KAFSCALE_LFS_PROXY_ADDR + value: ":{{ .Values.lfsProxy.service.port }}" + - name: KAFSCALE_LFS_PROXY_ADVERTISED_PORT + value: "{{ .Values.lfsProxy.advertisedPort }}" +{{- if .Values.lfsProxy.http.enabled }} + - name: KAFSCALE_LFS_PROXY_HTTP_ADDR + value: ":{{ .Values.lfsProxy.http.port }}" +{{- end }} +{{- if .Values.lfsProxy.http.apiKey }} + - name: KAFSCALE_LFS_PROXY_HTTP_API_KEY + value: "{{ .Values.lfsProxy.http.apiKey }}" +{{- end }} +{{- if and .Values.lfsProxy.http.cors .Values.lfsProxy.http.cors.enabled }} + - name: KAFSCALE_LFS_PROXY_HTTP_CORS_ENABLED + value: "true" +{{- if .Values.lfsProxy.http.cors.allowOrigins }} + - name: KAFSCALE_LFS_PROXY_HTTP_CORS_ALLOW_ORIGINS + value: "{{ join "," .Values.lfsProxy.http.cors.allowOrigins }}" +{{- end }} +{{- if .Values.lfsProxy.http.cors.allowMethods }} + - name: KAFSCALE_LFS_PROXY_HTTP_CORS_ALLOW_METHODS + value: "{{ join "," .Values.lfsProxy.http.cors.allowMethods }}" +{{- end }} +{{- if .Values.lfsProxy.http.cors.allowHeaders }} + - name: KAFSCALE_LFS_PROXY_HTTP_CORS_ALLOW_HEADERS + value: "{{ join "," .Values.lfsProxy.http.cors.allowHeaders }}" +{{- end }} +{{- if .Values.lfsProxy.http.cors.exposeHeaders }} + - name: KAFSCALE_LFS_PROXY_HTTP_CORS_EXPOSE_HEADERS + value: "{{ join "," .Values.lfsProxy.http.cors.exposeHeaders }}" +{{- end }} +{{- end }} +{{- if .Values.lfsProxy.health.enabled }} + - name: KAFSCALE_LFS_PROXY_HEALTH_ADDR + value: ":{{ .Values.lfsProxy.health.port }}" +{{- end }} +{{- if .Values.lfsProxy.metrics.enabled }} + - name: KAFSCALE_LFS_PROXY_METRICS_ADDR + value: ":{{ .Values.lfsProxy.metrics.port }}" +{{- end }} +{{- if .Values.lfsProxy.backendCacheTTLSeconds }} + - name: KAFSCALE_LFS_PROXY_BACKEND_CACHE_TTL_SEC + value: "{{ .Values.lfsProxy.backendCacheTTLSeconds }}" +{{- end }} +{{- if .Values.lfsProxy.advertisedHost }} + - name: KAFSCALE_LFS_PROXY_ADVERTISED_HOST + value: "{{ .Values.lfsProxy.advertisedHost }}" +{{- end }} +{{- if .Values.lfsProxy.etcdEndpoints }} + - name: KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS + value: "{{ join "," .Values.lfsProxy.etcdEndpoints }}" +{{- end }} +{{- if .Values.lfsProxy.etcd.existingSecret }} + - name: KAFSCALE_LFS_PROXY_ETCD_USERNAME + valueFrom: + secretKeyRef: + name: {{ .Values.lfsProxy.etcd.existingSecret }} + key: ETCD_USERNAME + - name: KAFSCALE_LFS_PROXY_ETCD_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.lfsProxy.etcd.existingSecret }} + key: ETCD_PASSWORD +{{- else }} +{{- if .Values.lfsProxy.etcd.username }} + - name: KAFSCALE_LFS_PROXY_ETCD_USERNAME + value: "{{ .Values.lfsProxy.etcd.username }}" +{{- end }} +{{- if .Values.lfsProxy.etcd.password }} + - name: KAFSCALE_LFS_PROXY_ETCD_PASSWORD + value: "{{ .Values.lfsProxy.etcd.password }}" +{{- end }} +{{- end }} +{{- if .Values.lfsProxy.backends }} + - name: KAFSCALE_LFS_PROXY_BACKENDS + value: "{{ join "," .Values.lfsProxy.backends }}" +{{- end }} +{{- if .Values.lfsProxy.s3.bucket }} + - name: KAFSCALE_LFS_PROXY_S3_BUCKET + value: "{{ .Values.lfsProxy.s3.bucket }}" +{{- end }} +{{- if .Values.lfsProxy.s3.region }} + - name: KAFSCALE_LFS_PROXY_S3_REGION + value: "{{ .Values.lfsProxy.s3.region }}" +{{- end }} +{{- if .Values.lfsProxy.s3.endpoint }} + - name: KAFSCALE_LFS_PROXY_S3_ENDPOINT + value: "{{ .Values.lfsProxy.s3.endpoint }}" +{{- end }} +{{- if .Values.lfsProxy.s3.existingSecret }} + - name: KAFSCALE_LFS_PROXY_S3_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.lfsProxy.s3.existingSecret }} + key: AWS_ACCESS_KEY_ID + - name: KAFSCALE_LFS_PROXY_S3_SECRET_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.lfsProxy.s3.existingSecret }} + key: AWS_SECRET_ACCESS_KEY +{{- else }} +{{- if .Values.lfsProxy.s3.accessKey }} + - name: KAFSCALE_LFS_PROXY_S3_ACCESS_KEY + value: "{{ .Values.lfsProxy.s3.accessKey }}" +{{- end }} +{{- if .Values.lfsProxy.s3.secretKey }} + - name: KAFSCALE_LFS_PROXY_S3_SECRET_KEY + value: "{{ .Values.lfsProxy.s3.secretKey }}" +{{- end }} +{{- end }} +{{- if .Values.lfsProxy.s3.sessionToken }} + - name: KAFSCALE_LFS_PROXY_S3_SESSION_TOKEN + value: "{{ .Values.lfsProxy.s3.sessionToken }}" +{{- end }} +{{- if .Values.lfsProxy.s3.forcePathStyle }} + - name: KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE + value: "true" +{{- end }} +{{- if .Values.lfsProxy.s3.ensureBucket }} + - name: KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET + value: "true" +{{- end }} +{{- if .Values.lfsProxy.s3.maxBlobSize }} + - name: KAFSCALE_LFS_PROXY_MAX_BLOB_SIZE + value: "{{ .Values.lfsProxy.s3.maxBlobSize }}" +{{- end }} +{{- if .Values.lfsProxy.s3.chunkSize }} + - name: KAFSCALE_LFS_PROXY_CHUNK_SIZE + value: "{{ .Values.lfsProxy.s3.chunkSize }}" +{{- end }} +{{- if .Values.lfsProxy.tracker.enabled }} + - name: KAFSCALE_LFS_TRACKER_ENABLED + value: "true" + - name: KAFSCALE_LFS_TRACKER_TOPIC + value: "{{ .Values.lfsProxy.tracker.topic }}" + - name: KAFSCALE_LFS_TRACKER_BATCH_SIZE + value: "{{ .Values.lfsProxy.tracker.batchSize }}" + - name: KAFSCALE_LFS_TRACKER_FLUSH_MS + value: "{{ .Values.lfsProxy.tracker.flushMs }}" + - name: KAFSCALE_LFS_TRACKER_ENSURE_TOPIC + value: "{{ .Values.lfsProxy.tracker.ensureTopic }}" + - name: KAFSCALE_LFS_TRACKER_PARTITIONS + value: "{{ .Values.lfsProxy.tracker.partitions }}" + - name: KAFSCALE_LFS_TRACKER_REPLICATION_FACTOR + value: "{{ .Values.lfsProxy.tracker.replicationFactor }}" +{{- else }} + - name: KAFSCALE_LFS_TRACKER_ENABLED + value: "false" +{{- end }} + ports: + - name: kafka + containerPort: {{ .Values.lfsProxy.service.port }} + protocol: TCP +{{- if .Values.lfsProxy.http.enabled }} + - name: http + containerPort: {{ .Values.lfsProxy.http.port }} + protocol: TCP +{{- end }} +{{- if .Values.lfsProxy.health.enabled }} + - name: health + containerPort: {{ .Values.lfsProxy.health.port }} + protocol: TCP +{{- end }} +{{- if .Values.lfsProxy.metrics.enabled }} + - name: metrics + containerPort: {{ .Values.lfsProxy.metrics.port }} + protocol: TCP +{{- end }} +{{- if .Values.lfsProxy.health.enabled }} + readinessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 2 + periodSeconds: 5 + failureThreshold: 6 + livenessProbe: + httpGet: + path: /livez + port: health + initialDelaySeconds: 5 + periodSeconds: 10 + failureThreshold: 3 +{{- end }} + resources: +{{- if .Values.lfsProxy.resources }} +{{ toYaml .Values.lfsProxy.resources | indent 12 }} +{{- else }} + {} +{{- end }} +{{- with .Values.lfsProxy.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} +{{- end }} +{{- with .Values.lfsProxy.tolerations }} + tolerations: +{{ toYaml . | indent 8 }} +{{- end }} +{{- with .Values.lfsProxy.affinity }} + affinity: +{{ toYaml . | indent 8 }} +{{- end }} +{{- end }} diff --git a/deploy/helm/kafscale/templates/lfs-proxy-http-ingress.yaml b/deploy/helm/kafscale/templates/lfs-proxy-http-ingress.yaml new file mode 100644 index 00000000..43f522d4 --- /dev/null +++ b/deploy/helm/kafscale/templates/lfs-proxy-http-ingress.yaml @@ -0,0 +1,57 @@ +# Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{- if and .Values.lfsProxy.enabled .Values.lfsProxy.http.enabled .Values.lfsProxy.ingress.enabled }} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "kafscale.componentName" (dict "root" . "component" "lfs-proxy-http") }} + labels: +{{ include "kafscale.labels" . | indent 4 }} + app.kubernetes.io/component: lfs-proxy-http +{{- with .Values.lfsProxy.ingress.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +spec: +{{- if .Values.lfsProxy.ingress.className }} + ingressClassName: {{ .Values.lfsProxy.ingress.className }} +{{- end }} +{{- if .Values.lfsProxy.ingress.tls }} + tls: +{{- range .Values.lfsProxy.ingress.tls }} + - hosts: +{{- range .hosts }} + - {{ . | quote }} +{{- end }} + secretName: {{ .secretName }} +{{- end }} +{{- end }} + rules: +{{- range .Values.lfsProxy.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: +{{- range .paths }} + - path: {{ .path }} + pathType: {{ .pathType }} + backend: + service: + name: {{ include "kafscale.componentName" (dict "root" $ "component" "lfs-proxy") }} + port: + number: {{ $.Values.lfsProxy.http.port }} +{{- end }} +{{- end }} +{{- end }} diff --git a/deploy/helm/kafscale/templates/lfs-proxy-metrics-service.yaml b/deploy/helm/kafscale/templates/lfs-proxy-metrics-service.yaml new file mode 100644 index 00000000..6183ad4e --- /dev/null +++ b/deploy/helm/kafscale/templates/lfs-proxy-metrics-service.yaml @@ -0,0 +1,36 @@ +# Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{- if .Values.lfsProxy.metrics.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "kafscale.componentName" (dict "root" . "component" "lfs-proxy") }}-metrics + labels: +{{ include "kafscale.labels" . | indent 4 }} + app.kubernetes.io/component: lfs-proxy +{{- with .Values.lfsProxy.metrics.service.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +spec: + type: ClusterIP + ports: + - name: metrics + port: {{ .Values.lfsProxy.metrics.port }} + targetPort: metrics + selector: +{{ include "kafscale.componentSelectorLabels" (dict "root" . "component" "lfs-proxy") | indent 4 }} +{{- end }} diff --git a/deploy/helm/kafscale/templates/lfs-proxy-prometheusrule.yaml b/deploy/helm/kafscale/templates/lfs-proxy-prometheusrule.yaml new file mode 100644 index 00000000..3cd1c886 --- /dev/null +++ b/deploy/helm/kafscale/templates/lfs-proxy-prometheusrule.yaml @@ -0,0 +1,46 @@ +# Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{- if and .Values.lfsProxy.metrics.enabled .Values.lfsProxy.metrics.prometheusRule.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ include "kafscale.componentName" (dict "root" . "component" "lfs-proxy") }} + labels: +{{ include "kafscale.labels" . | indent 4 }} +{{- with .Values.lfsProxy.metrics.prometheusRule.labels }} +{{ toYaml . | indent 4 }} +{{- end }} +spec: + groups: + - name: kafscale-lfs-proxy.rules + rules: + - alert: KafscaleLfsProxyS3Errors + expr: increase(kafscale_lfs_proxy_s3_errors_total[5m]) > 0 + for: 5m + labels: + severity: warning + annotations: + summary: LFS proxy S3 errors detected + description: LFS proxy is encountering S3 errors in the last 5 minutes. + - alert: KafscaleLfsProxyOrphanedObjects + expr: increase(kafscale_lfs_proxy_orphan_objects_total[10m]) > 0 + for: 10m + labels: + severity: warning + annotations: + summary: LFS proxy orphaned objects detected + description: LFS proxy created orphaned objects in the last 10 minutes. +{{- end }} diff --git a/deploy/helm/kafscale/templates/lfs-proxy-service.yaml b/deploy/helm/kafscale/templates/lfs-proxy-service.yaml new file mode 100644 index 00000000..06dd1c73 --- /dev/null +++ b/deploy/helm/kafscale/templates/lfs-proxy-service.yaml @@ -0,0 +1,47 @@ +# Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{- if .Values.lfsProxy.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "kafscale.componentName" (dict "root" . "component" "lfs-proxy") }} + labels: +{{ include "kafscale.labels" . | indent 4 }} + app.kubernetes.io/component: lfs-proxy + {{- with .Values.lfsProxy.service.annotations }} + annotations: +{{ toYaml . | indent 4 }} + {{- end }} +spec: + type: {{ .Values.lfsProxy.service.type }} + {{- if .Values.lfsProxy.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: +{{ toYaml .Values.lfsProxy.service.loadBalancerSourceRanges | indent 4 }} + {{- end }} + selector: +{{ include "kafscale.componentSelectorLabels" (dict "root" . "component" "lfs-proxy") | indent 4 }} + ports: + - name: kafka + port: {{ .Values.lfsProxy.service.port }} + targetPort: kafka + protocol: TCP +{{- if .Values.lfsProxy.http.enabled }} + - name: http + port: {{ .Values.lfsProxy.http.port }} + targetPort: http + protocol: TCP +{{- end }} +{{- end }} diff --git a/deploy/helm/kafscale/templates/lfs-proxy-servicemonitor.yaml b/deploy/helm/kafscale/templates/lfs-proxy-servicemonitor.yaml new file mode 100644 index 00000000..1d9548e6 --- /dev/null +++ b/deploy/helm/kafscale/templates/lfs-proxy-servicemonitor.yaml @@ -0,0 +1,34 @@ +# Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{- if and .Values.lfsProxy.metrics.enabled .Values.lfsProxy.metrics.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "kafscale.componentName" (dict "root" . "component" "lfs-proxy") }} + labels: +{{ include "kafscale.labels" . | indent 4 }} +{{- with .Values.lfsProxy.metrics.serviceMonitor.labels }} +{{ toYaml . | indent 4 }} +{{- end }} +spec: + selector: + matchLabels: +{{ include "kafscale.componentSelectorLabels" (dict "root" . "component" "lfs-proxy") | indent 6 }} + endpoints: + - port: metrics + interval: {{ .Values.lfsProxy.metrics.serviceMonitor.interval }} + scrapeTimeout: {{ .Values.lfsProxy.metrics.serviceMonitor.scrapeTimeout }} +{{- end }} diff --git a/deploy/helm/kafscale/values-lfs-demo.yaml b/deploy/helm/kafscale/values-lfs-demo.yaml new file mode 100644 index 00000000..13f8d443 --- /dev/null +++ b/deploy/helm/kafscale/values-lfs-demo.yaml @@ -0,0 +1,97 @@ +# Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ============================================================================= +# LFS Demo Stack Values +# ============================================================================= +# This values file enables the LFS proxy with HTTP endpoint and browser demo. +# +# Usage: +# helm upgrade --install kafscale ./deploy/helm/kafscale \ +# -f ./deploy/helm/kafscale/values-lfs-demo.yaml \ +# --set lfsProxy.s3.bucket=my-bucket \ +# --set lfsProxy.s3.endpoint=http://minio:9000 \ +# --set lfsProxy.s3.accessKey=minioadmin \ +# --set lfsProxy.s3.secretKey=minioadmin +# +# For local development with port-forward: +# kubectl port-forward svc/kafscale-lfs-proxy 8080:8080 & +# kubectl port-forward svc/kafscale-lfs-demo-e72 3000:80 & +# open http://localhost:3000 +# ============================================================================= + +# Enable LFS Proxy with HTTP endpoint +lfsProxy: + enabled: true + replicaCount: 1 # Single replica for demo + + # HTTP API for browser uploads + http: + enabled: true + port: 8080 + apiKey: "" # No API key for demo (add for production) + cors: + enabled: true + allowOrigins: ["*"] # Allow all origins for demo + allowMethods: ["POST", "OPTIONS"] + allowHeaders: + - Content-Type + - X-Kafka-Topic + - X-Kafka-Key + - X-Kafka-Partition + - X-LFS-Checksum + - X-LFS-Checksum-Alg + - X-LFS-Size + - X-LFS-Mode + - X-Request-ID + - X-API-Key + - Authorization + exposeHeaders: + - X-Request-ID + + # S3 configuration - override these for your environment + s3: + bucket: kafscale + region: us-east-1 + endpoint: "" # Set to MinIO endpoint, e.g., http://minio:9000 + forcePathStyle: true # Required for MinIO + ensureBucket: true + maxBlobSize: 5368709120 # 5GB + chunkSize: 5242880 # 5MB + # Use existingSecret for credentials in production + existingSecret: "" + accessKey: "" # Set via --set or environment + secretKey: "" # Set via --set or environment + +# Enable LFS demos +lfsDemos: + enabled: true + e72Browser: + enabled: true + lfsProxyEndpoint: "" # Auto-detected: http://kafscale-lfs-proxy:8080/lfs/produce + defaultTopic: browser-uploads + service: + type: NodePort + port: 80 + nodePort: 30072 + ingress: + enabled: false + # Enable and configure for external access + # className: nginx + # hosts: + # - host: lfs-demo.example.com + # paths: + # - path: / + # pathType: Prefix diff --git a/deploy/helm/kafscale/values.yaml b/deploy/helm/kafscale/values.yaml index f671a0b3..93106932 100644 --- a/deploy/helm/kafscale/values.yaml +++ b/deploy/helm/kafscale/values.yaml @@ -23,7 +23,7 @@ rbac: create: true operator: - replicaCount: 2 + replicaCount: 1 image: repository: ghcr.io/kafscale/kafscale-operator tag: "" @@ -81,14 +81,27 @@ console: pullPolicy: IfNotPresent auth: username: "" - password: "" + password: null # Set via --set or use existingSecret etcdEndpoints: [] etcd: + existingSecret: "" # Name of existing Secret with ETCD_USERNAME and ETCD_PASSWORD keys username: "" - password: "" + password: null # Set via --set or use existingSecret metrics: brokerMetricsURL: "" operatorMetricsURL: "" + # LFS Dashboard configuration + lfs: + enabled: false + kafkaBrokers: [] # Kafka brokers for consuming tracker events + trackerTopic: "__lfs_ops_state" + s3: + bucket: "" + region: "us-east-1" + endpoint: "" + accessKey: "" + secretKey: "" + presignTTL: 300 podAnnotations: {} resources: {} nodeSelector: {} @@ -124,8 +137,9 @@ proxy: advertisedPort: 9092 etcdEndpoints: [] etcd: + existingSecret: "" # Name of existing Secret with ETCD_USERNAME and ETCD_PASSWORD keys username: "" - password: "" + password: null # Set via --set or use existingSecret backends: [] podAnnotations: {} resources: {} @@ -138,6 +152,117 @@ proxy: annotations: {} loadBalancerSourceRanges: [] +lfsProxy: + enabled: false + replicaCount: 2 + image: + repository: ghcr.io/kafscale/kafscale-lfs-proxy + tag: "" + useLatest: false + pullPolicy: IfNotPresent + health: + enabled: true + port: 9094 + metrics: + enabled: true + port: 9095 + service: + annotations: {} + serviceMonitor: + enabled: false + interval: 30s + scrapeTimeout: 10s + labels: {} + prometheusRule: + enabled: false + labels: {} + http: + enabled: false # Disabled by default for security; enable with apiKey set + port: 8080 + apiKey: "" # Required when http.enabled=true + cors: + enabled: false # Enable for browser access + allowOrigins: ["*"] # Restrict in production + allowMethods: ["POST", "OPTIONS"] + allowHeaders: ["Content-Type", "X-Kafka-Topic", "X-Kafka-Key", "X-Kafka-Partition", "X-LFS-Checksum", "X-LFS-Checksum-Alg", "X-LFS-Size", "X-LFS-Mode", "X-Request-ID", "X-API-Key", "Authorization"] + exposeHeaders: ["X-Request-ID"] + ingress: + enabled: false + className: "" + annotations: {} + hosts: + - host: lfs.local + paths: + - path: /lfs + pathType: Prefix + tls: [] + backendCacheTTLSeconds: 60 + advertisedHost: "" + advertisedPort: 9092 + etcdEndpoints: [] + etcd: + existingSecret: "" # Name of existing Secret with ETCD_USERNAME and ETCD_PASSWORD keys + username: "" + password: null # Set via --set or use existingSecret + backends: [] + s3: + bucket: "" + region: "" + endpoint: "" + # Credentials: use existingSecret (preferred) or inline values (not recommended) + existingSecret: "" # Name of existing Secret with AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY keys + accessKey: "" # Deprecated: use existingSecret instead + secretKey: "" # Deprecated: use existingSecret instead + sessionToken: "" + forcePathStyle: false + ensureBucket: false + maxBlobSize: 7516192768 + chunkSize: 16777216 + tracker: + enabled: true + topic: "__lfs_ops_state" + batchSize: 100 + flushMs: 100 + ensureTopic: true + partitions: 3 + replicationFactor: 1 + podAnnotations: {} + resources: {} + nodeSelector: {} + tolerations: [] + affinity: {} + service: + type: ClusterIP # Changed from LoadBalancer for security; use Ingress for external access + port: 9092 + annotations: {} + loadBalancerSourceRanges: [] + +lfsDemos: + enabled: false + e72Browser: + enabled: true # Browser LFS SDK demo + lfsProxyEndpoint: "" # Auto-detected from cluster if empty + defaultTopic: browser-uploads + image: + repository: ghcr.io/kafscale/kafscale-e72-browser-demo + tag: "" + useLatest: false + pullPolicy: IfNotPresent + service: + type: NodePort + port: 80 + nodePort: 30072 + ingress: + enabled: false + className: "" + annotations: {} + hosts: + - host: lfs-demo.local + paths: + - path: / + pathType: Prefix + tls: [] + mcp: enabled: false namespace: @@ -156,8 +281,9 @@ mcp: token: "" etcdEndpoints: [] etcd: + existingSecret: "" # Name of existing Secret with ETCD_USERNAME and ETCD_PASSWORD keys username: "" - password: "" + password: null # Set via --set or use existingSecret metrics: brokerMetricsURL: "" sessionTimeout: "" diff --git a/go.mod b/go.mod index 84593852..1fda316b 100644 --- a/go.mod +++ b/go.mod @@ -8,9 +8,11 @@ require ( github.com/aws/aws-sdk-go-v2/credentials v1.19.10 github.com/aws/aws-sdk-go-v2/service/s3 v1.96.0 github.com/aws/smithy-go v1.24.1 + github.com/google/uuid v1.6.0 github.com/modelcontextprotocol/go-sdk v1.3.1 github.com/prometheus/client_golang v1.23.2 github.com/twmb/franz-go v1.20.7 + github.com/twmb/franz-go/pkg/kadm v1.17.1 github.com/twmb/franz-go/pkg/kmsg v1.12.0 go.etcd.io/etcd/client/v3 v3.6.8 go.etcd.io/etcd/server/v3 v3.6.8 @@ -65,7 +67,6 @@ require ( github.com/google/gnostic-models v0.7.0 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/jsonschema-go v0.4.2 // indirect - github.com/google/uuid v1.6.0 // indirect github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 // indirect github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.0 // indirect diff --git a/go.sum b/go.sum index af37d466..92d9ed4f 100644 --- a/go.sum +++ b/go.sum @@ -192,6 +192,8 @@ github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 h1:6fotK7 github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75/go.mod h1:KO6IkyS8Y3j8OdNO85qEYBsRPuteD+YciPomcXdrMnk= github.com/twmb/franz-go v1.20.7 h1:P4MGSXJjjAPP3NRGPCks/Lrq+j+twWMVl1qYCVgNmWY= github.com/twmb/franz-go v1.20.7/go.mod h1:0bRX9HZVaoueqFWhPZNi2ODnJL7DNa6mK0HeCrC2bNU= +github.com/twmb/franz-go/pkg/kadm v1.17.1 h1:Bt02Y/RLgnFO2NP2HVP1kd2TFtGRiJZx+fSArjZDtpw= +github.com/twmb/franz-go/pkg/kadm v1.17.1/go.mod h1:s4duQmrDbloVW9QTMXhs6mViTepze7JLG43xwPcAeTg= github.com/twmb/franz-go/pkg/kmsg v1.12.0 h1:CbatD7ers1KzDNgJqPbKOq0Bz/WLBdsTH75wgzeVaPc= github.com/twmb/franz-go/pkg/kmsg v1.12.0/go.mod h1:+DPt4NC8RmI6hqb8G09+3giKObE6uD2Eya6CfqBpeJY= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= diff --git a/hack/check_coverage.sh b/hack/check_coverage.sh index be2de411..d9100ec4 100644 --- a/hack/check_coverage.sh +++ b/hack/check_coverage.sh @@ -17,7 +17,22 @@ set -euo pipefail MIN_COVERAGE="${1:-45}" -go test ./... -coverprofile=coverage.out +# Packages excluded from coverage: generated code, test utilities, demo tools, +# embed-only wrappers, e2e tests, CLI entry points, and addon/skeleton packages. +EXCLUDE=( + "github.com/KafScale/platform/api/v1alpha1" + "github.com/KafScale/platform/pkg/gen/" + "github.com/KafScale/platform/internal/testutil" + "github.com/KafScale/platform/ui" + "github.com/KafScale/platform/cmd/" + "github.com/KafScale/platform/test" + "github.com/KafScale/platform/addons/" +) + +# Build package list excluding non-testable packages. +PKGS=$(go list ./... | grep -v -F "$(printf '%s\n' "${EXCLUDE[@]}")") + +go test -coverprofile=coverage.out $PKGS total=$(go tool cover -func=coverage.out | awk '/^total:/ {gsub(/%/,"",$3); print $3}') if [ -z "$total" ]; then diff --git a/hack/check_license_headers.py b/hack/check_license_headers.py index 8148e432..4f37cd30 100644 --- a/hack/check_license_headers.py +++ b/hack/check_license_headers.py @@ -46,6 +46,7 @@ SKIP_PREFIXES = ( ".git/", + ".claude/", "bin/", ".gocache/", ".gopath/", @@ -53,9 +54,23 @@ ".vscode/", "third_party/", "pkg/gen/", + "lfs-client-sdk/js/node_modules/", + "lfs-client-sdk/java/target/", + "lfs-client-sdk/python/kafscale_lfs_sdk.egg-info/", + "examples/", + "deploy/demo/", + "deploy/templates/", ) -SKIP_FILES = {"LICENSE", "NOTICE"} +SKIP_FILES = {"LICENSE", "NOTICE", "records.txt"} + + +def _in_node_modules(rel: str) -> bool: + return "/node_modules/" in rel or rel.startswith("node_modules/") + + +def _in_build_artifacts(rel: str) -> bool: + return "/target/" in rel or "/egg-info/" in rel def git_files() -> list[str]: @@ -74,6 +89,8 @@ def should_check(path: pathlib.Path, rel: str) -> bool: return False if any(rel.startswith(prefix) for prefix in SKIP_PREFIXES): return False + if _in_node_modules(rel) or _in_build_artifacts(rel): + return False if path.name in SPECIAL_FILENAMES: return True return path.suffix in CHECK_EXTS diff --git a/internal/console/lfs_consumer.go b/internal/console/lfs_consumer.go new file mode 100644 index 00000000..efcf907c --- /dev/null +++ b/internal/console/lfs_consumer.go @@ -0,0 +1,206 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package console + +import ( + "context" + "encoding/json" + "log" + "sync" + "time" + + "github.com/twmb/franz-go/pkg/kgo" +) + +// LFSConsumer consumes events from the __lfs_ops_state topic +type LFSConsumer struct { + client *kgo.Client + topic string + handlers *LFSHandlers + logger *log.Logger + + ctx context.Context + cancel context.CancelFunc + wg sync.WaitGroup + + statusMu sync.RWMutex + lastError string + lastErrorAt time.Time + lastPollAt time.Time +} + +// LFSConsumerConfig holds configuration for the LFS consumer +type LFSConsumerConfig struct { + Brokers []string + Topic string + GroupID string +} + +// NewLFSConsumer creates a new LFS tracker events consumer +func NewLFSConsumer(ctx context.Context, cfg LFSConsumerConfig, handlers *LFSHandlers, logger *log.Logger) (*LFSConsumer, error) { + if logger == nil { + logger = log.Default() + } + + if len(cfg.Brokers) == 0 { + logger.Println("lfs consumer: no brokers configured") + return nil, nil + } + + if cfg.Topic == "" { + cfg.Topic = "__lfs_ops_state" + } + + if cfg.GroupID == "" { + cfg.GroupID = "kafscale-console-lfs" + } + + opts := []kgo.Opt{ + kgo.SeedBrokers(cfg.Brokers...), + kgo.ConsumerGroup(cfg.GroupID), + kgo.ConsumeTopics(cfg.Topic), + kgo.ConsumeResetOffset(kgo.NewOffset().AtStart()), + kgo.DisableAutoCommit(), + } + + client, err := kgo.NewClient(opts...) + if err != nil { + return nil, err + } + + consumerCtx, cancel := context.WithCancel(ctx) + + c := &LFSConsumer{ + client: client, + topic: cfg.Topic, + handlers: handlers, + logger: logger, + ctx: consumerCtx, + cancel: cancel, + } + + return c, nil +} + +// Start begins consuming events +func (c *LFSConsumer) Start() { + c.wg.Add(1) + go c.consumeLoop() + c.logger.Printf("lfs consumer started, topic=%s", c.topic) +} + +// consumeLoop continuously polls for new events +func (c *LFSConsumer) consumeLoop() { + defer c.wg.Done() + + for { + select { + case <-c.ctx.Done(): + return + default: + } + + fetches := c.client.PollFetches(c.ctx) + if fetches.IsClientClosed() { + return + } + + if errs := fetches.Errors(); len(errs) > 0 { + for _, err := range errs { + c.logger.Printf("lfs consumer fetch error: topic=%s partition=%d error=%v", + err.Topic, err.Partition, err.Err) + c.setError(err.Err) + } + continue + } + + c.setPollSuccess() + fetches.EachRecord(func(record *kgo.Record) { + c.processRecord(record) + }) + + // Commit offsets + if err := c.client.CommitUncommittedOffsets(c.ctx); err != nil { + c.logger.Printf("lfs consumer commit error: %v", err) + } + } +} + +// processRecord handles a single tracker event record +func (c *LFSConsumer) processRecord(record *kgo.Record) { + if record == nil || len(record.Value) == 0 { + return + } + + // Parse the event + var event LFSEvent + if err := json.Unmarshal(record.Value, &event); err != nil { + c.logger.Printf("lfs consumer: failed to parse event: %v", err) + return + } + + // Forward to handlers for processing + if c.handlers != nil { + c.handlers.ProcessEvent(event) + } +} + +// Close stops the consumer and releases resources +func (c *LFSConsumer) Close() error { + c.cancel() + c.wg.Wait() + c.client.Close() + c.logger.Println("lfs consumer closed") + return nil +} + +// Status returns the current consumer status. +func (c *LFSConsumer) Status() LFSConsumerStatus { + c.statusMu.RLock() + defer c.statusMu.RUnlock() + + status := LFSConsumerStatus{ + Connected: c.lastPollAt.After(time.Time{}), + LastError: c.lastError, + } + if !c.lastErrorAt.IsZero() { + status.LastErrorAt = c.lastErrorAt.UTC().Format(time.RFC3339) + } + if !c.lastPollAt.IsZero() { + status.LastPollAt = c.lastPollAt.UTC().Format(time.RFC3339) + } + return status +} + +func (c *LFSConsumer) setError(err error) { + if err == nil { + return + } + c.statusMu.Lock() + defer c.statusMu.Unlock() + c.lastError = err.Error() + c.lastErrorAt = time.Now() +} + +func (c *LFSConsumer) setPollSuccess() { + c.statusMu.Lock() + defer c.statusMu.Unlock() + c.lastPollAt = time.Now() + if c.lastErrorAt.Before(c.lastPollAt) { + c.lastError = "" + c.lastErrorAt = time.Time{} + } +} diff --git a/internal/console/lfs_consumer_test.go b/internal/console/lfs_consumer_test.go new file mode 100644 index 00000000..1fb45922 --- /dev/null +++ b/internal/console/lfs_consumer_test.go @@ -0,0 +1,167 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package console + +import ( + "context" + "encoding/json" + "errors" + "sync" + "testing" + "time" + + "github.com/twmb/franz-go/pkg/kgo" +) + +func TestNewLFSConsumerNoBrokers(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + consumer, err := NewLFSConsumer(context.Background(), LFSConsumerConfig{}, handlers, nil) + if err != nil { + t.Fatalf("NewLFSConsumer: %v", err) + } + if consumer != nil { + t.Fatal("expected nil consumer when no brokers configured") + } +} + +func TestLFSConsumerStatusInitial(t *testing.T) { + c := &LFSConsumer{ + statusMu: sync.RWMutex{}, + } + status := c.Status() + if status.Connected { + t.Fatal("expected not connected initially") + } + if status.LastError != "" { + t.Fatalf("expected empty error: %q", status.LastError) + } + if status.LastPollAt != "" { + t.Fatalf("expected empty poll time: %q", status.LastPollAt) + } +} + +func TestLFSConsumerSetError(t *testing.T) { + c := &LFSConsumer{ + statusMu: sync.RWMutex{}, + } + c.setError(errors.New("kafka unreachable")) + status := c.Status() + if status.LastError != "kafka unreachable" { + t.Fatalf("error: %q", status.LastError) + } + if status.LastErrorAt == "" { + t.Fatal("expected error time set") + } +} + +func TestLFSConsumerSetErrorNil(t *testing.T) { + c := &LFSConsumer{ + statusMu: sync.RWMutex{}, + } + c.setError(nil) // should not panic or set anything + status := c.Status() + if status.LastError != "" { + t.Fatalf("expected empty error: %q", status.LastError) + } +} + +func TestLFSConsumerSetPollSuccess(t *testing.T) { + c := &LFSConsumer{ + statusMu: sync.RWMutex{}, + } + // Set an error first + c.setError(errors.New("temp error")) + time.Sleep(time.Millisecond) + // Poll success should clear the error + c.setPollSuccess() + status := c.Status() + if !status.Connected { + t.Fatal("expected connected after poll success") + } + if status.LastError != "" { + t.Fatalf("expected error cleared: %q", status.LastError) + } + if status.LastPollAt == "" { + t.Fatal("expected poll time set") + } +} + +func TestLFSConsumerProcessRecord(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + c := &LFSConsumer{ + handlers: handlers, + logger: nil, + } + // Use log.Default() for the consumer logger + c.logger = handlers.logger + + event := LFSEvent{ + EventType: "upload_completed", + Topic: "test-topic", + S3Key: "key-1", + Size: 512, + Timestamp: "2026-01-01T00:00:00Z", + } + data, _ := json.Marshal(event) + record := &kgo.Record{Value: data} + c.processRecord(record) + + handlers.mu.RLock() + defer handlers.mu.RUnlock() + if handlers.stats.TotalObjects != 1 { + t.Fatalf("expected 1 object, got %d", handlers.stats.TotalObjects) + } +} + +func TestLFSConsumerProcessRecordNil(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + c := &LFSConsumer{ + handlers: handlers, + logger: handlers.logger, + } + // nil record should not panic + c.processRecord(nil) + // empty record should not panic + c.processRecord(&kgo.Record{Value: nil}) + c.processRecord(&kgo.Record{Value: []byte{}}) +} + +func TestLFSConsumerProcessRecordInvalidJSON(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + c := &LFSConsumer{ + handlers: handlers, + logger: handlers.logger, + } + record := &kgo.Record{Value: []byte("not json")} + c.processRecord(record) // should log error but not panic + + handlers.mu.RLock() + defer handlers.mu.RUnlock() + if handlers.stats.TotalObjects != 0 { + t.Fatalf("expected 0 objects after invalid record") + } +} + +func TestLFSConsumerProcessRecordNilHandlers(t *testing.T) { + c := &LFSConsumer{ + handlers: nil, + logger: NewLFSHandlers(LFSConfig{}, nil).logger, + } + event := LFSEvent{EventType: "upload_completed", Topic: "t", S3Key: "k", Size: 1} + data, _ := json.Marshal(event) + record := &kgo.Record{Value: data} + c.processRecord(record) // should not panic even with nil handlers +} diff --git a/internal/console/lfs_handlers.go b/internal/console/lfs_handlers.go new file mode 100644 index 00000000..0ab24436 --- /dev/null +++ b/internal/console/lfs_handlers.go @@ -0,0 +1,491 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package console + +import ( + "encoding/json" + "log" + "net/http" + "strconv" + "strings" + "sync" + "time" +) + +// LFSHandlers provides HTTP handlers for LFS admin APIs +type LFSHandlers struct { + config LFSConfig + consumer *LFSConsumer + s3Client *LFSS3Client + logger *log.Logger + + // In-memory state (populated from tracker events) + mu sync.RWMutex + objects map[string]*LFSObject // key: s3_key + topicStats map[string]*LFSTopicStats // key: topic name + orphans map[string]*LFSOrphan // key: s3_key + events []LFSEvent // circular buffer of recent events + stats LFSStats + lastEventIdx int +} + +const maxRecentEvents = 1000 + +// NewLFSHandlers creates a new LFS handlers instance +func NewLFSHandlers(cfg LFSConfig, logger *log.Logger) *LFSHandlers { + if logger == nil { + logger = log.Default() + } + h := &LFSHandlers{ + config: cfg, + logger: logger, + objects: make(map[string]*LFSObject), + topicStats: make(map[string]*LFSTopicStats), + orphans: make(map[string]*LFSOrphan), + events: make([]LFSEvent, 0, maxRecentEvents), + } + return h +} + +// SetConsumer sets the LFS tracker consumer +func (h *LFSHandlers) SetConsumer(consumer *LFSConsumer) { + h.consumer = consumer +} + +// SetS3Client sets the S3 client for browsing +func (h *LFSHandlers) SetS3Client(client *LFSS3Client) { + h.s3Client = client +} + +// ProcessEvent handles an incoming tracker event +func (h *LFSHandlers) ProcessEvent(event LFSEvent) { + h.mu.Lock() + defer h.mu.Unlock() + + // Add to recent events + if len(h.events) < maxRecentEvents { + h.events = append(h.events, event) + } else { + h.events[h.lastEventIdx] = event + h.lastEventIdx = (h.lastEventIdx + 1) % maxRecentEvents + } + + var topicStats *LFSTopicStats + if event.Topic != "" { + topicStats = h.getOrCreateTopicStats(event.Topic) + topicStats.HasLFS = true + topicStats.LastEvent = event.Timestamp + } + + // Update stats based on event type + switch event.EventType { + case "upload_started": + // No stat updates on start + + case "upload_completed": + h.stats.TotalObjects++ + h.stats.TotalBytes += event.Size + h.stats.Uploads24h++ + if topicStats != nil { + h.updateTopicStats(topicStats, event.Size, event.Timestamp) + topicStats.Uploads24h++ + topicStats.LastUpload = event.Timestamp + } + + // Add object to map + obj := &LFSObject{ + S3Key: event.S3Key, + Topic: event.Topic, + Size: event.Size, + CreatedAt: event.Timestamp, + ProxyID: event.ProxyID, + } + h.objects[event.S3Key] = obj + + case "upload_failed": + h.stats.Errors24h++ + if topicStats != nil { + topicStats.Errors24h++ + topicStats.LastError = event.Timestamp + } + + case "download_requested": + h.stats.Downloads24h++ + if topicStats != nil { + topicStats.Downloads24h++ + topicStats.LastDownload = event.Timestamp + } + + case "download_completed": + // Track download metrics + if topicStats != nil { + topicStats.LastDownload = event.Timestamp + } + + case "orphan_detected": + h.stats.OrphansPending++ + if topicStats != nil { + topicStats.Orphans++ + } + h.orphans[event.S3Key] = &LFSOrphan{ + S3Key: event.S3Key, + Topic: event.Topic, + DetectedAt: event.Timestamp, + Reason: event.ErrorCode, + } + } +} + +func (h *LFSHandlers) getOrCreateTopicStats(topic string) *LFSTopicStats { + stats, exists := h.topicStats[topic] + if !exists { + stats = &LFSTopicStats{Name: topic} + h.topicStats[topic] = stats + } + return stats +} + +func (h *LFSHandlers) updateTopicStats(stats *LFSTopicStats, size int64, timestamp string) { + if stats == nil { + return + } + stats.ObjectCount++ + stats.TotalBytes += size + if stats.ObjectCount > 0 { + stats.AvgObjectSize = stats.TotalBytes / stats.ObjectCount + } + stats.LastObject = timestamp + if stats.FirstObject == "" { + stats.FirstObject = stats.LastObject + } +} + +// HandleStatus handles GET /ui/api/lfs/status +func (h *LFSHandlers) HandleStatus(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + h.mu.RLock() + topics := make([]string, 0, len(h.topicStats)) + for topic, stats := range h.topicStats { + if stats.HasLFS { + topics = append(topics, topic) + } + } + stats := h.stats + h.mu.RUnlock() + + resp := LFSStatusResponse{ + Enabled: h.config.Enabled, + ProxyCount: 1, // TODO: Get from metrics + S3Bucket: h.config.S3Bucket, + TopicsWithLFS: topics, + Stats: stats, + TrackerTopic: h.config.TrackerTopic, + TrackerEnabled: h.config.Enabled, + } + if h.consumer != nil { + resp.ConsumerStatus = h.consumer.Status() + } + + writeJSON(w, resp) +} + +// HandleObjects handles GET /ui/api/lfs/objects +func (h *LFSHandlers) HandleObjects(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + // Parse query parameters + topic := r.URL.Query().Get("topic") + limitStr := r.URL.Query().Get("limit") + cursor := r.URL.Query().Get("cursor") + _ = cursor // TODO: Implement pagination + + limit := 50 + if limitStr != "" { + if parsed, err := strconv.Atoi(limitStr); err == nil && parsed > 0 && parsed <= 200 { + limit = parsed + } + } + + h.mu.RLock() + objects := make([]LFSObject, 0, limit) + count := 0 + for _, obj := range h.objects { + if topic != "" && obj.Topic != topic { + continue + } + if count >= limit { + break + } + objects = append(objects, *obj) + count++ + } + total := int64(len(h.objects)) + h.mu.RUnlock() + + resp := LFSObjectsResponse{ + Objects: objects, + TotalCount: total, + } + + writeJSON(w, resp) +} + +// HandleTopics handles GET /ui/api/lfs/topics +func (h *LFSHandlers) HandleTopics(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + h.mu.RLock() + topics := make([]LFSTopicStats, 0, len(h.topicStats)) + for _, stats := range h.topicStats { + topics = append(topics, *stats) + } + h.mu.RUnlock() + + resp := LFSTopicsResponse{ + Topics: topics, + } + + writeJSON(w, resp) +} + +// HandleTopicDetail handles GET /ui/api/lfs/topics/{name} +func (h *LFSHandlers) HandleTopicDetail(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + name := strings.TrimPrefix(r.URL.Path, "/ui/api/lfs/topics/") + if name == "" { + http.Error(w, "topic name required", http.StatusBadRequest) + return + } + + h.mu.RLock() + stats, ok := h.topicStats[name] + if !ok { + h.mu.RUnlock() + http.Error(w, "topic not found", http.StatusNotFound) + return + } + events := make([]LFSEvent, 0, len(h.events)) + for _, event := range h.events { + if event.Topic == name { + events = append(events, event) + } + } + h.mu.RUnlock() + + resp := LFSTopicDetailResponse{ + Topic: *stats, + Events: events, + } + writeJSON(w, resp) +} + +// HandleEvents handles GET /ui/api/lfs/events (SSE) +func (h *LFSHandlers) HandleEvents(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + flusher, ok := w.(http.Flusher) + if !ok { + http.Error(w, "streaming unsupported", http.StatusInternalServerError) + return + } + + // Parse filter + typesFilter := r.URL.Query().Get("types") + var allowedTypes map[string]bool + if typesFilter != "" { + allowedTypes = make(map[string]bool) + for _, t := range strings.Split(typesFilter, ",") { + allowedTypes[strings.TrimSpace(t)] = true + } + } + + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + + // Send existing events first + h.mu.RLock() + for _, event := range h.events { + if allowedTypes != nil && !allowedTypes[event.EventType] { + continue + } + data, _ := json.Marshal(event) + _, _ = w.Write([]byte("data: ")) + _, _ = w.Write(data) + _, _ = w.Write([]byte("\n\n")) + } + h.mu.RUnlock() + flusher.Flush() + + // Keep connection open for new events + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + + ctx := r.Context() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + // Send keepalive + _, _ = w.Write([]byte(": keepalive\n\n")) + flusher.Flush() + } + } +} + +// HandleOrphans handles GET /ui/api/lfs/orphans +func (h *LFSHandlers) HandleOrphans(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + h.mu.RLock() + orphans := make([]LFSOrphan, 0, len(h.orphans)) + var totalSize int64 + for _, orphan := range h.orphans { + orphans = append(orphans, *orphan) + totalSize += orphan.Size + } + h.mu.RUnlock() + + resp := LFSOrphansResponse{ + Orphans: orphans, + TotalSize: totalSize, + Count: len(orphans), + } + + writeJSON(w, resp) +} + +// HandleS3Browse handles GET /ui/api/lfs/s3/browse +func (h *LFSHandlers) HandleS3Browse(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + if h.s3Client == nil { + http.Error(w, "s3 client not configured", http.StatusServiceUnavailable) + return + } + + prefix := r.URL.Query().Get("prefix") + delimiter := r.URL.Query().Get("delimiter") + if delimiter == "" { + delimiter = "/" + } + maxKeysStr := r.URL.Query().Get("max_keys") + maxKeys := 100 + if maxKeysStr != "" { + if parsed, err := strconv.Atoi(maxKeysStr); err == nil && parsed > 0 && parsed <= 1000 { + maxKeys = parsed + } + } + + objects, prefixes, truncated, err := h.s3Client.ListObjects(r.Context(), prefix, delimiter, maxKeys) + if err != nil { + h.logger.Printf("s3 list error: %v", err) + http.Error(w, "s3 list failed", http.StatusBadGateway) + return + } + + resp := S3BrowseResponse{ + Objects: objects, + CommonPrefixes: prefixes, + IsTruncated: truncated, + } + + writeJSON(w, resp) +} + +// HandleS3Presign handles POST /ui/api/lfs/s3/presign +func (h *LFSHandlers) HandleS3Presign(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + if h.s3Client == nil { + http.Error(w, "s3 client not configured", http.StatusServiceUnavailable) + return + } + + var req S3PresignRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, "invalid request body", http.StatusBadRequest) + return + } + + if req.S3Key == "" { + http.Error(w, "s3_key required", http.StatusBadRequest) + return + } + + ttl := h.config.PresignTTL + if ttl <= 0 { + ttl = 300 // default 5 minutes + } + if req.TTLSeconds > 0 && req.TTLSeconds < ttl { + ttl = req.TTLSeconds + } + + url, err := h.s3Client.PresignGetObject(r.Context(), req.S3Key, time.Duration(ttl)*time.Second) + if err != nil { + h.logger.Printf("s3 presign error: %v", err) + http.Error(w, "s3 presign failed", http.StatusBadGateway) + return + } + + resp := S3PresignResponse{ + URL: url, + ExpiresAt: time.Now().UTC().Add(time.Duration(ttl) * time.Second).Format(time.RFC3339), + } + + writeJSON(w, resp) +} + +// ResetStats resets the 24h rolling statistics (call periodically) +func (h *LFSHandlers) ResetStats() { + h.mu.Lock() + defer h.mu.Unlock() + + h.stats.Uploads24h = 0 + h.stats.Downloads24h = 0 + h.stats.Errors24h = 0 + + for _, ts := range h.topicStats { + ts.Uploads24h = 0 + ts.Errors24h = 0 + } +} diff --git a/internal/console/lfs_handlers_test.go b/internal/console/lfs_handlers_test.go new file mode 100644 index 00000000..8fe98018 --- /dev/null +++ b/internal/console/lfs_handlers_test.go @@ -0,0 +1,549 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package console + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +func TestProcessEventAggregatesTopicStats(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{Enabled: true}, nil) + topic := "video-uploads" + + handlers.ProcessEvent(LFSEvent{ + EventType: "upload_completed", + Topic: topic, + S3Key: "default/video-uploads/lfs/obj-1", + Size: 1024, + Timestamp: "2026-02-05T10:00:00Z", + }) + handlers.ProcessEvent(LFSEvent{ + EventType: "download_requested", + Topic: topic, + Timestamp: "2026-02-05T10:01:00Z", + }) + handlers.ProcessEvent(LFSEvent{ + EventType: "upload_failed", + Topic: topic, + Timestamp: "2026-02-05T10:02:00Z", + ErrorCode: "kafka_produce_failed", + }) + handlers.ProcessEvent(LFSEvent{ + EventType: "orphan_detected", + Topic: topic, + Timestamp: "2026-02-05T10:03:00Z", + S3Key: "default/video-uploads/lfs/obj-2", + }) + + handlers.mu.RLock() + stats := handlers.topicStats[topic] + handlers.mu.RUnlock() + + if stats == nil { + t.Fatalf("expected topic stats to exist") + } + if !stats.HasLFS { + t.Fatalf("expected HasLFS to be true") + } + if stats.ObjectCount != 1 { + t.Fatalf("expected object_count=1, got %d", stats.ObjectCount) + } + if stats.TotalBytes != 1024 { + t.Fatalf("expected total_bytes=1024, got %d", stats.TotalBytes) + } + if stats.Uploads24h != 1 { + t.Fatalf("expected uploads_24h=1, got %d", stats.Uploads24h) + } + if stats.Downloads24h != 1 { + t.Fatalf("expected downloads_24h=1, got %d", stats.Downloads24h) + } + if stats.Errors24h != 1 { + t.Fatalf("expected errors_24h=1, got %d", stats.Errors24h) + } + if stats.Orphans != 1 { + t.Fatalf("expected orphans=1, got %d", stats.Orphans) + } + if stats.LastEvent == "" { + t.Fatalf("expected last_event to be set") + } +} + +func TestProcessEventCircularBuffer(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{Enabled: true}, nil) + // Fill buffer past maxRecentEvents + for i := 0; i < maxRecentEvents+50; i++ { + handlers.ProcessEvent(LFSEvent{ + EventType: "upload_started", + Topic: "test", + Timestamp: "2026-02-05T10:00:00Z", + }) + } + handlers.mu.RLock() + defer handlers.mu.RUnlock() + if len(handlers.events) != maxRecentEvents { + t.Fatalf("expected %d events, got %d", maxRecentEvents, len(handlers.events)) + } +} + +func TestProcessEventDownloadCompleted(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{Enabled: true}, nil) + handlers.ProcessEvent(LFSEvent{ + EventType: "upload_completed", + Topic: "t1", + S3Key: "key1", + Size: 100, + Timestamp: "2026-02-05T10:00:00Z", + }) + handlers.ProcessEvent(LFSEvent{ + EventType: "download_completed", + Topic: "t1", + Timestamp: "2026-02-05T10:01:00Z", + }) + handlers.mu.RLock() + stats := handlers.topicStats["t1"] + handlers.mu.RUnlock() + if stats.LastDownload != "2026-02-05T10:01:00Z" { + t.Fatalf("expected last_download set") + } +} + +func TestHandleStatusHTTP(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{Enabled: true, S3Bucket: "my-bucket", TrackerTopic: "__lfs"}, nil) + handlers.ProcessEvent(LFSEvent{ + EventType: "upload_completed", + Topic: "t1", + S3Key: "key1", + Size: 100, + Timestamp: "2026-02-05T10:00:00Z", + }) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/lfs/status", nil) + handlers.HandleStatus(w, r) + if w.Code != http.StatusOK { + t.Fatalf("status: %d", w.Code) + } + var resp LFSStatusResponse + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("decode: %v", err) + } + if !resp.Enabled { + t.Fatal("expected enabled") + } + if resp.S3Bucket != "my-bucket" { + t.Fatalf("bucket: %q", resp.S3Bucket) + } + if resp.Stats.TotalObjects != 1 { + t.Fatalf("total_objects: %d", resp.Stats.TotalObjects) + } +} + +func TestLFSHandleStatusMethodNotAllowed(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/ui/api/lfs/status", nil) + handlers.HandleStatus(w, r) + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } +} + +func TestHandleObjectsHTTP(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{Enabled: true}, nil) + handlers.ProcessEvent(LFSEvent{ + EventType: "upload_completed", + Topic: "t1", + S3Key: "key1", + Size: 100, + Timestamp: "2026-02-05T10:00:00Z", + }) + handlers.ProcessEvent(LFSEvent{ + EventType: "upload_completed", + Topic: "t2", + S3Key: "key2", + Size: 200, + Timestamp: "2026-02-05T10:01:00Z", + }) + + // Without filter + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/lfs/objects", nil) + handlers.HandleObjects(w, r) + if w.Code != http.StatusOK { + t.Fatalf("status: %d", w.Code) + } + var resp LFSObjectsResponse + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("decode: %v", err) + } + if resp.TotalCount != 2 { + t.Fatalf("total: %d", resp.TotalCount) + } + + // With topic filter + w2 := httptest.NewRecorder() + r2 := httptest.NewRequest(http.MethodGet, "/ui/api/lfs/objects?topic=t1&limit=10", nil) + handlers.HandleObjects(w2, r2) + var resp2 LFSObjectsResponse + if err := json.NewDecoder(w2.Body).Decode(&resp2); err != nil { + t.Fatalf("decode: %v", err) + } + if len(resp2.Objects) != 1 { + t.Fatalf("filtered objects: %d", len(resp2.Objects)) + } +} + +func TestHandleObjectsMethodNotAllowed(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/ui/api/lfs/objects", nil) + handlers.HandleObjects(w, r) + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } +} + +func TestHandleTopicsHTTP(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{Enabled: true}, nil) + handlers.ProcessEvent(LFSEvent{EventType: "upload_completed", Topic: "t1", S3Key: "k1", Size: 50, Timestamp: "2026-01-01T00:00:00Z"}) + handlers.ProcessEvent(LFSEvent{EventType: "upload_completed", Topic: "t2", S3Key: "k2", Size: 100, Timestamp: "2026-01-01T00:00:00Z"}) + + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/lfs/topics", nil) + handlers.HandleTopics(w, r) + if w.Code != http.StatusOK { + t.Fatalf("status: %d", w.Code) + } + var resp LFSTopicsResponse + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("decode: %v", err) + } + if len(resp.Topics) != 2 { + t.Fatalf("topics: %d", len(resp.Topics)) + } +} + +func TestHandleTopicsMethodNotAllowed(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/ui/api/lfs/topics", nil) + handlers.HandleTopics(w, r) + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } +} + +func TestHandleTopicDetailHTTP(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{Enabled: true}, nil) + handlers.ProcessEvent(LFSEvent{EventType: "upload_completed", Topic: "orders", S3Key: "k1", Size: 50, Timestamp: "2026-01-01T00:00:00Z"}) + + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/lfs/topics/orders", nil) + handlers.HandleTopicDetail(w, r) + if w.Code != http.StatusOK { + t.Fatalf("status: %d", w.Code) + } + var resp LFSTopicDetailResponse + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("decode: %v", err) + } + if resp.Topic.Name != "orders" { + t.Fatalf("topic name: %q", resp.Topic.Name) + } +} + +func TestHandleTopicDetailNotFound(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{Enabled: true}, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/lfs/topics/nonexistent", nil) + handlers.HandleTopicDetail(w, r) + if w.Code != http.StatusNotFound { + t.Fatalf("expected 404, got %d", w.Code) + } +} + +func TestHandleTopicDetailEmptyName(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{Enabled: true}, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/lfs/topics/", nil) + handlers.HandleTopicDetail(w, r) + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", w.Code) + } +} + +func TestHandleTopicDetailMethodNotAllowed(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/ui/api/lfs/topics/x", nil) + handlers.HandleTopicDetail(w, r) + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } +} + +func TestHandleOrphansHTTP(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{Enabled: true}, nil) + handlers.ProcessEvent(LFSEvent{ + EventType: "orphan_detected", + Topic: "t1", + S3Key: "orphan-key", + Timestamp: "2026-01-01T00:00:00Z", + ErrorCode: "no_kafka_ref", + }) + + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/lfs/orphans", nil) + handlers.HandleOrphans(w, r) + if w.Code != http.StatusOK { + t.Fatalf("status: %d", w.Code) + } + var resp LFSOrphansResponse + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("decode: %v", err) + } + if resp.Count != 1 { + t.Fatalf("count: %d", resp.Count) + } +} + +func TestHandleOrphansMethodNotAllowed(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/ui/api/lfs/orphans", nil) + handlers.HandleOrphans(w, r) + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } +} + +func TestHandleS3BrowseNoClient(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/lfs/s3/browse", nil) + handlers.HandleS3Browse(w, r) + if w.Code != http.StatusServiceUnavailable { + t.Fatalf("expected 503, got %d", w.Code) + } +} + +func TestHandleS3BrowseMethodNotAllowed(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/ui/api/lfs/s3/browse", nil) + handlers.HandleS3Browse(w, r) + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } +} + +func TestHandleS3PresignNoClient(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/ui/api/lfs/s3/presign", strings.NewReader(`{"s3_key":"test"}`)) + handlers.HandleS3Presign(w, r) + if w.Code != http.StatusServiceUnavailable { + t.Fatalf("expected 503, got %d", w.Code) + } +} + +func TestHandleS3PresignMethodNotAllowed(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/lfs/s3/presign", nil) + handlers.HandleS3Presign(w, r) + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } +} + +func TestHandleS3PresignInvalidBody(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + handlers.s3Client = &LFSS3Client{logger: handlers.logger} + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/ui/api/lfs/s3/presign", strings.NewReader(`invalid`)) + handlers.HandleS3Presign(w, r) + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", w.Code) + } +} + +func TestHandleS3PresignMissingKey(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + handlers.s3Client = &LFSS3Client{logger: handlers.logger} + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/ui/api/lfs/s3/presign", strings.NewReader(`{"s3_key":""}`)) + handlers.HandleS3Presign(w, r) + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", w.Code) + } +} + +func TestHandleEventsSSE(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + handlers.ProcessEvent(LFSEvent{EventType: "upload_completed", Topic: "t1", S3Key: "k1", Size: 100, Timestamp: "2026-01-01T00:00:00Z"}) + + // Create a flushing recorder + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + handlers.HandleEvents(w, r) + })) + defer srv.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + req, _ := http.NewRequestWithContext(ctx, http.MethodGet, srv.URL+"?types=upload_completed", nil) + resp, err := srv.Client().Do(req) + if err != nil { + t.Fatalf("events: %v", err) + } + defer func() { _ = resp.Body.Close() }() + if resp.Header.Get("Content-Type") != "text/event-stream" { + t.Fatalf("content-type: %q", resp.Header.Get("Content-Type")) + } + buf := make([]byte, 1024) + n, _ := resp.Body.Read(buf) + if n == 0 { + t.Fatal("expected SSE data") + } + if !strings.Contains(string(buf[:n]), "data:") { + t.Fatalf("expected SSE data prefix: %s", buf[:n]) + } +} + +func TestHandleEventsMethodNotAllowed(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/ui/api/lfs/events", nil) + handlers.HandleEvents(w, r) + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } +} + +func TestResetStats(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{Enabled: true}, nil) + handlers.ProcessEvent(LFSEvent{EventType: "upload_completed", Topic: "t1", S3Key: "k1", Size: 100, Timestamp: "2026-01-01T00:00:00Z"}) + handlers.ProcessEvent(LFSEvent{EventType: "upload_failed", Topic: "t1", Timestamp: "2026-01-01T00:01:00Z"}) + handlers.ProcessEvent(LFSEvent{EventType: "download_requested", Topic: "t1", Timestamp: "2026-01-01T00:02:00Z"}) + + handlers.ResetStats() + + handlers.mu.RLock() + defer handlers.mu.RUnlock() + if handlers.stats.Uploads24h != 0 { + t.Fatalf("uploads_24h: %d", handlers.stats.Uploads24h) + } + if handlers.stats.Errors24h != 0 { + t.Fatalf("errors_24h: %d", handlers.stats.Errors24h) + } + if handlers.stats.Downloads24h != 0 { + t.Fatalf("downloads_24h: %d", handlers.stats.Downloads24h) + } + // Total objects should NOT be reset + if handlers.stats.TotalObjects != 1 { + t.Fatalf("total_objects should persist: %d", handlers.stats.TotalObjects) + } + ts := handlers.topicStats["t1"] + if ts.Uploads24h != 0 || ts.Errors24h != 0 { + t.Fatalf("topic stats not reset: uploads=%d errors=%d", ts.Uploads24h, ts.Errors24h) + } +} + +func TestSetConsumerAndS3Client(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + if handlers.consumer != nil { + t.Fatal("expected nil consumer") + } + if handlers.s3Client != nil { + t.Fatal("expected nil s3Client") + } + handlers.SetConsumer(nil) + handlers.SetS3Client(nil) +} + +func TestNewLFSHandlersDefaults(t *testing.T) { + h := NewLFSHandlers(LFSConfig{Enabled: true, S3Bucket: "b"}, nil) + if h.logger == nil { + t.Fatal("expected default logger") + } + if h.objects == nil || h.topicStats == nil || h.orphans == nil { + t.Fatal("expected maps initialized") + } +} + +func TestUpdateTopicStatsNilSafe(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + // Should not panic + handlers.updateTopicStats(nil, 100, "2026-01-01T00:00:00Z") +} + +func TestHandleObjectsWithLimit(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + for i := 0; i < 10; i++ { + handlers.ProcessEvent(LFSEvent{ + EventType: "upload_completed", + Topic: "t1", + S3Key: "key-" + strings.Repeat("x", i+1), + Size: int64(100 * (i + 1)), + Timestamp: "2026-01-01T00:00:00Z", + }) + } + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/lfs/objects?limit=3", nil) + handlers.HandleObjects(w, r) + var resp LFSObjectsResponse + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("decode: %v", err) + } + if len(resp.Objects) > 3 { + t.Fatalf("expected at most 3 objects, got %d", len(resp.Objects)) + } +} + +func TestHandleObjectsInvalidLimit(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + handlers.ProcessEvent(LFSEvent{EventType: "upload_completed", Topic: "t1", S3Key: "k1", Size: 100, Timestamp: "2026-01-01T00:00:00Z"}) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/lfs/objects?limit=abc", nil) + handlers.HandleObjects(w, r) + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", w.Code) + } +} + +func TestUpdateTopicStatsFirstObject(t *testing.T) { + handlers := NewLFSHandlers(LFSConfig{}, nil) + stats := &LFSTopicStats{Name: "t1"} + handlers.updateTopicStats(stats, 100, "2026-01-01T00:00:00Z") + if stats.FirstObject != "2026-01-01T00:00:00Z" { + t.Fatalf("first object: %q", stats.FirstObject) + } + handlers.updateTopicStats(stats, 200, "2026-01-02T00:00:00Z") + if stats.FirstObject != "2026-01-01T00:00:00Z" { + t.Fatalf("first object should not change: %q", stats.FirstObject) + } + if stats.LastObject != "2026-01-02T00:00:00Z" { + t.Fatalf("last object: %q", stats.LastObject) + } + if stats.AvgObjectSize != 150 { + t.Fatalf("avg object size: %d", stats.AvgObjectSize) + } +} diff --git a/internal/console/lfs_s3_client.go b/internal/console/lfs_s3_client.go new file mode 100644 index 00000000..5a971784 --- /dev/null +++ b/internal/console/lfs_s3_client.go @@ -0,0 +1,203 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package console + +import ( + "context" + "log" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/service/s3" +) + +// LFSS3Client provides S3 operations for the LFS admin console +type LFSS3Client struct { + client *s3.Client + presign *s3.PresignClient + bucket string + logger *log.Logger +} + +// LFSS3Config holds configuration for the S3 client +type LFSS3Config struct { + Bucket string + Region string + Endpoint string + AccessKey string + SecretKey string + ForcePathStyle bool +} + +// NewLFSS3Client creates a new S3 client for LFS admin operations +func NewLFSS3Client(ctx context.Context, cfg LFSS3Config, logger *log.Logger) (*LFSS3Client, error) { + if logger == nil { + logger = log.Default() + } + + if cfg.Bucket == "" { + logger.Println("lfs s3 client: no bucket configured") + return nil, nil + } + + // Build AWS config + var opts []func(*config.LoadOptions) error + + if cfg.Region != "" { + opts = append(opts, config.WithRegion(cfg.Region)) + } else { + opts = append(opts, config.WithRegion("us-east-1")) + } + + if cfg.AccessKey != "" && cfg.SecretKey != "" { + opts = append(opts, config.WithCredentialsProvider( + credentials.NewStaticCredentialsProvider(cfg.AccessKey, cfg.SecretKey, ""), + )) + } + + awsCfg, err := config.LoadDefaultConfig(ctx, opts...) + if err != nil { + return nil, err + } + + // Build S3 client options + var s3Opts []func(*s3.Options) + + if cfg.Endpoint != "" { + s3Opts = append(s3Opts, func(o *s3.Options) { + o.BaseEndpoint = aws.String(cfg.Endpoint) + }) + } + + if cfg.ForcePathStyle { + s3Opts = append(s3Opts, func(o *s3.Options) { + o.UsePathStyle = true + }) + } + + client := s3.NewFromConfig(awsCfg, s3Opts...) + presign := s3.NewPresignClient(client) + + return &LFSS3Client{ + client: client, + presign: presign, + bucket: cfg.Bucket, + logger: logger, + }, nil +} + +// ListObjects lists objects in S3 with the given prefix +func (c *LFSS3Client) ListObjects(ctx context.Context, prefix, delimiter string, maxKeys int) ([]S3Object, []string, bool, error) { + if maxKeys <= 0 { + maxKeys = 100 + } + if maxKeys > 1000 { + maxKeys = 1000 + } + + input := &s3.ListObjectsV2Input{ + Bucket: aws.String(c.bucket), + MaxKeys: aws.Int32(int32(maxKeys)), + } + + if prefix != "" { + input.Prefix = aws.String(prefix) + } + + if delimiter != "" { + input.Delimiter = aws.String(delimiter) + } + + output, err := c.client.ListObjectsV2(ctx, input) + if err != nil { + return nil, nil, false, err + } + + objects := make([]S3Object, 0, len(output.Contents)) + for _, obj := range output.Contents { + s3Obj := S3Object{ + Key: aws.ToString(obj.Key), + Size: aws.ToInt64(obj.Size), + } + if obj.LastModified != nil { + s3Obj.LastModified = obj.LastModified.Format(time.RFC3339) + } + if obj.ETag != nil { + s3Obj.ETag = aws.ToString(obj.ETag) + } + objects = append(objects, s3Obj) + } + + prefixes := make([]string, 0, len(output.CommonPrefixes)) + for _, p := range output.CommonPrefixes { + if p.Prefix != nil { + prefixes = append(prefixes, aws.ToString(p.Prefix)) + } + } + + truncated := aws.ToBool(output.IsTruncated) + + return objects, prefixes, truncated, nil +} + +// PresignGetObject generates a presigned URL for downloading an object +func (c *LFSS3Client) PresignGetObject(ctx context.Context, key string, ttl time.Duration) (string, error) { + input := &s3.GetObjectInput{ + Bucket: aws.String(c.bucket), + Key: aws.String(key), + } + + presignOpts := func(opts *s3.PresignOptions) { + opts.Expires = ttl + } + + result, err := c.presign.PresignGetObject(ctx, input, presignOpts) + if err != nil { + return "", err + } + + return result.URL, nil +} + +// HeadObject checks if an object exists and returns its metadata +func (c *LFSS3Client) HeadObject(ctx context.Context, key string) (*S3Object, error) { + input := &s3.HeadObjectInput{ + Bucket: aws.String(c.bucket), + Key: aws.String(key), + } + + output, err := c.client.HeadObject(ctx, input) + if err != nil { + return nil, err + } + + obj := &S3Object{ + Key: key, + Size: aws.ToInt64(output.ContentLength), + } + + if output.LastModified != nil { + obj.LastModified = output.LastModified.Format(time.RFC3339) + } + + if output.ETag != nil { + obj.ETag = aws.ToString(output.ETag) + } + + return obj, nil +} diff --git a/internal/console/lfs_s3_client_test.go b/internal/console/lfs_s3_client_test.go new file mode 100644 index 00000000..f278b915 --- /dev/null +++ b/internal/console/lfs_s3_client_test.go @@ -0,0 +1,63 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package console + +import ( + "context" + "testing" +) + +func TestNewLFSS3ClientNoBucket(t *testing.T) { + client, err := NewLFSS3Client(context.Background(), LFSS3Config{}, nil) + if err != nil { + t.Fatalf("NewLFSS3Client: %v", err) + } + if client != nil { + t.Fatal("expected nil client when no bucket configured") + } +} + +func TestNewLFSS3ClientWithConfig(t *testing.T) { + client, err := NewLFSS3Client(context.Background(), LFSS3Config{ + Bucket: "test-bucket", + Region: "us-west-2", + Endpoint: "http://localhost:9000", + AccessKey: "minioadmin", + SecretKey: "minioadmin", + ForcePathStyle: true, + }, nil) + if err != nil { + t.Fatalf("NewLFSS3Client: %v", err) + } + if client == nil { + t.Fatal("expected non-nil client") + } + if client.bucket != "test-bucket" { + t.Fatalf("bucket: %q", client.bucket) + } +} + +func TestNewLFSS3ClientDefaultRegion(t *testing.T) { + client, err := NewLFSS3Client(context.Background(), LFSS3Config{ + Bucket: "test-bucket", + }, nil) + if err != nil { + t.Fatalf("NewLFSS3Client: %v", err) + } + if client == nil { + t.Fatal("expected non-nil client") + } +} diff --git a/internal/console/lfs_types.go b/internal/console/lfs_types.go new file mode 100644 index 00000000..e97e1c0e --- /dev/null +++ b/internal/console/lfs_types.go @@ -0,0 +1,171 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package console + +// LFSStatusResponse represents the response for /ui/api/lfs/status +type LFSStatusResponse struct { + Enabled bool `json:"enabled"` + ProxyCount int `json:"proxy_count"` + S3Bucket string `json:"s3_bucket"` + TopicsWithLFS []string `json:"topics_with_lfs"` + Stats LFSStats `json:"stats"` + TrackerTopic string `json:"tracker_topic"` + TrackerEnabled bool `json:"tracker_enabled"` + ConsumerStatus LFSConsumerStatus `json:"consumer_status"` +} + +// LFSConsumerStatus represents the tracker consumer health. +type LFSConsumerStatus struct { + Connected bool `json:"connected"` + LastError string `json:"last_error,omitempty"` + LastErrorAt string `json:"last_error_at,omitempty"` + LastPollAt string `json:"last_poll_at,omitempty"` +} + +// LFSStats represents aggregate LFS statistics +type LFSStats struct { + TotalObjects int64 `json:"total_objects"` + TotalBytes int64 `json:"total_bytes"` + Uploads24h int64 `json:"uploads_24h"` + Downloads24h int64 `json:"downloads_24h"` + Errors24h int64 `json:"errors_24h"` + OrphansPending int64 `json:"orphans_pending"` + AvgUploadMs float64 `json:"avg_upload_ms"` + AvgDownloadMs float64 `json:"avg_download_ms"` +} + +// LFSObject represents an LFS object in the browser +type LFSObject struct { + S3Key string `json:"s3_key"` + Topic string `json:"topic"` + Partition int32 `json:"partition"` + KafkaOffset int64 `json:"kafka_offset,omitempty"` + Size int64 `json:"size"` + SHA256 string `json:"sha256"` + ContentType string `json:"content_type,omitempty"` + CreatedAt string `json:"created_at"` + ProxyID string `json:"proxy_id,omitempty"` +} + +// LFSObjectsResponse represents the response for /ui/api/lfs/objects +type LFSObjectsResponse struct { + Objects []LFSObject `json:"objects"` + NextCursor string `json:"next_cursor,omitempty"` + TotalCount int64 `json:"total_count"` +} + +// LFSTopicStats represents per-topic LFS statistics +type LFSTopicStats struct { + Name string `json:"name"` + HasLFS bool `json:"has_lfs"` + ObjectCount int64 `json:"object_count"` + TotalBytes int64 `json:"total_bytes"` + AvgObjectSize int64 `json:"avg_object_size"` + Uploads24h int64 `json:"uploads_24h"` + Downloads24h int64 `json:"downloads_24h"` + Errors24h int64 `json:"errors_24h"` + Orphans int64 `json:"orphans_detected"` + FirstObject string `json:"first_object,omitempty"` + LastObject string `json:"last_object,omitempty"` + LastUpload string `json:"last_upload,omitempty"` + LastDownload string `json:"last_download,omitempty"` + LastError string `json:"last_error,omitempty"` + LastEvent string `json:"last_event,omitempty"` +} + +// LFSTopicsResponse represents the response for /ui/api/lfs/topics +type LFSTopicsResponse struct { + Topics []LFSTopicStats `json:"topics"` +} + +// LFSTopicDetailResponse represents a single topic detail response +type LFSTopicDetailResponse struct { + Topic LFSTopicStats `json:"topic"` + Events []LFSEvent `json:"events,omitempty"` +} + +// LFSEvent represents a tracker event +type LFSEvent struct { + EventType string `json:"event_type"` + EventID string `json:"event_id"` + Timestamp string `json:"timestamp"` + ProxyID string `json:"proxy_id"` + RequestID string `json:"request_id"` + Topic string `json:"topic,omitempty"` + S3Key string `json:"s3_key,omitempty"` + Size int64 `json:"size,omitempty"` + DurationMs int64 `json:"duration_ms,omitempty"` + ErrorCode string `json:"error_code,omitempty"` + Mode string `json:"mode,omitempty"` +} + +// LFSOrphan represents an orphaned S3 object +type LFSOrphan struct { + S3Key string `json:"s3_key"` + S3Bucket string `json:"s3_bucket"` + Topic string `json:"topic"` + Size int64 `json:"size"` + DetectedAt string `json:"detected_at"` + Reason string `json:"reason"` + AgeHours int `json:"age_hours"` +} + +// LFSOrphansResponse represents the response for /ui/api/lfs/orphans +type LFSOrphansResponse struct { + Orphans []LFSOrphan `json:"orphans"` + TotalSize int64 `json:"total_size"` + Count int `json:"count"` +} + +// S3Object represents an object in S3 browser +type S3Object struct { + Key string `json:"key"` + Size int64 `json:"size"` + LastModified string `json:"last_modified"` + ETag string `json:"etag,omitempty"` +} + +// S3BrowseResponse represents the response for /ui/api/lfs/s3/browse +type S3BrowseResponse struct { + Objects []S3Object `json:"objects"` + CommonPrefixes []string `json:"common_prefixes"` + IsTruncated bool `json:"is_truncated"` +} + +// S3PresignRequest represents the request for /ui/api/lfs/s3/presign +type S3PresignRequest struct { + S3Key string `json:"s3_key"` + TTLSeconds int `json:"ttl_seconds,omitempty"` +} + +// S3PresignResponse represents the response for /ui/api/lfs/s3/presign +type S3PresignResponse struct { + URL string `json:"url"` + ExpiresAt string `json:"expires_at"` +} + +// LFSConfig holds configuration for LFS console features +type LFSConfig struct { + Enabled bool + TrackerTopic string + KafkaBrokers []string + S3Bucket string + S3Region string + S3Endpoint string + S3AccessKey string + S3SecretKey string + PresignTTL int // seconds +} diff --git a/internal/console/metrics_client.go b/internal/console/metrics_client.go index 439dc827..25e41891 100644 --- a/internal/console/metrics_client.go +++ b/internal/console/metrics_client.go @@ -260,7 +260,7 @@ func fetchOperatorSnapshot(ctx context.Context, client *http.Client, metricsURL if err != nil { return nil, err } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("metrics request failed: %s", resp.Status) } @@ -331,7 +331,7 @@ func fetchPromSnapshot(ctx context.Context, client *http.Client, metricsURL stri if err != nil { return nil, err } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("metrics request failed: %s", resp.Status) } diff --git a/internal/console/metrics_client_test.go b/internal/console/metrics_client_test.go index 16ac9395..1bc49ac3 100644 --- a/internal/console/metrics_client_test.go +++ b/internal/console/metrics_client_test.go @@ -133,6 +133,214 @@ kafscale_fetch_rps 7 } } +func TestNewPromMetricsClient(t *testing.T) { + provider := NewPromMetricsClient("http://localhost:9093/metrics") + if provider == nil { + t.Fatal("expected non-nil provider") + } +} + +func TestPickMemBytes(t *testing.T) { + if got := pickMemBytes(1000, 500); got != 1000 { + t.Fatalf("expected heap bytes 1000, got %d", got) + } + if got := pickMemBytes(0, 500); got != 500 { + t.Fatalf("expected alloc bytes 500, got %d", got) + } + if got := pickMemBytes(0, 0); got != 0 { + t.Fatalf("expected 0, got %d", got) + } +} + +func TestNewCompositeMetricsProvider(t *testing.T) { + provider := NewCompositeMetricsProvider(nil, "") + if provider == nil { + t.Fatal("expected non-nil provider") + } +} + +func TestCompositeMetricsProviderSnapshotNoBroker(t *testing.T) { + provider := NewCompositeMetricsProvider(nil, "") + snap, err := provider.Snapshot(context.Background()) + if err != nil { + t.Fatalf("Snapshot: %v", err) + } + if snap == nil { + t.Fatal("expected non-nil snapshot") + } +} + +type mockBrokerMetrics struct { + snap *MetricsSnapshot + err error +} + +func (m *mockBrokerMetrics) Snapshot(_ context.Context) (*MetricsSnapshot, error) { + return m.snap, m.err +} + +func TestCompositeMetricsProviderWithBroker(t *testing.T) { + broker := &mockBrokerMetrics{ + snap: &MetricsSnapshot{ + ProduceRPS: 100, + FetchRPS: 200, + }, + } + provider := NewCompositeMetricsProvider(broker, "") + snap, err := provider.Snapshot(context.Background()) + if err != nil { + t.Fatalf("Snapshot: %v", err) + } + if snap.ProduceRPS != 100 || snap.FetchRPS != 200 { + t.Fatalf("unexpected rps: %f %f", snap.ProduceRPS, snap.FetchRPS) + } +} + +func TestCompositeMetricsProviderWithOperator(t *testing.T) { + opHandler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(` +kafscale_operator_clusters 2 +kafscale_operator_etcd_snapshot_age_seconds 60 +kafscale_operator_etcd_snapshot_access_ok 1 +`)) + }) + opServer := httptest.NewServer(opHandler) + defer opServer.Close() + + provider := NewCompositeMetricsProvider(nil, opServer.URL) + snap, err := provider.Snapshot(context.Background()) + if err != nil { + t.Fatalf("Snapshot: %v", err) + } + if !snap.OperatorMetricsAvailable { + t.Fatal("expected operator metrics available") + } + if snap.OperatorClusters != 2 { + t.Fatalf("expected 2 clusters, got %f", snap.OperatorClusters) + } + if snap.OperatorEtcdSnapshotAgeSeconds != 60 { + t.Fatalf("expected age 60, got %f", snap.OperatorEtcdSnapshotAgeSeconds) + } +} + +func TestFetchPromSnapshotWithAdminMetrics(t *testing.T) { + handler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(` +kafscale_admin_requests_total{handler="metadata"} 10 +kafscale_admin_requests_total{handler="produce"} 20 +kafscale_admin_request_errors_total{handler="metadata"} 1 +kafscale_admin_request_latency_ms_avg{handler="metadata"} 5 +kafscale_admin_request_latency_ms_avg{handler="produce"} 15 +kafscale_fetch_rps 300 +kafscale_broker_heap_inuse_bytes 2097152 +kafscale_broker_mem_alloc_bytes 1048576 +`)) + }) + server := httptest.NewServer(handler) + defer server.Close() + + snap, err := fetchPromSnapshot(context.Background(), server.Client(), server.URL) + if err != nil { + t.Fatalf("fetchPromSnapshot: %v", err) + } + if snap.AdminRequestsTotal != 30 { + t.Fatalf("expected admin total 30, got %f", snap.AdminRequestsTotal) + } + if snap.AdminRequestErrorsTotal != 1 { + t.Fatalf("expected admin errors 1, got %f", snap.AdminRequestErrorsTotal) + } + if snap.AdminRequestLatencyMS != 10 { + t.Fatalf("expected admin latency avg 10, got %f", snap.AdminRequestLatencyMS) + } + if snap.FetchRPS != 300 { + t.Fatalf("expected fetch rps 300, got %f", snap.FetchRPS) + } + // heapBytes > 0 should be preferred + if snap.BrokerMemBytes != 2097152 { + t.Fatalf("expected heap bytes 2097152, got %d", snap.BrokerMemBytes) + } +} + +func TestFetchPromSnapshotHTTPError(t *testing.T) { + handler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + }) + server := httptest.NewServer(handler) + defer server.Close() + + _, err := fetchPromSnapshot(context.Background(), server.Client(), server.URL) + if err == nil { + t.Fatal("expected error for non-200 response") + } +} + +func TestFetchOperatorSnapshotHTTPError(t *testing.T) { + handler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + }) + server := httptest.NewServer(handler) + defer server.Close() + + _, err := fetchOperatorSnapshot(context.Background(), server.Client(), server.URL) + if err == nil { + t.Fatal("expected error for non-200 response") + } +} + +func TestParsePromSampleEdgeCases(t *testing.T) { + // Empty line + _, ok := parsePromSample("") + if ok { + t.Fatal("expected false for empty line") + } + // Comment + _, ok = parsePromSample("# HELP some metric") + if ok { + t.Fatal("expected false for comment") + } +} + +func TestAggregatedNoBrokersNoFallback(t *testing.T) { + store := metadata.NewInMemoryStore(metadata.ClusterMetadata{}) + client := NewAggregatedPromMetricsClient(store, "") + _, err := client.Snapshot(context.Background()) + if err == nil { + t.Fatal("expected error with no brokers and no fallback") + } +} + +func TestAggregatedAllBrokersDown(t *testing.T) { + // Server that is not reachable (use a closed server) + handler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + }) + server := httptest.NewServer(handler) + parsedURL, _ := url.Parse(server.URL) + server.Close() // close immediately so connections fail + + store := metadata.NewInMemoryStore(metadata.ClusterMetadata{ + Brokers: []protocol.MetadataBroker{ + {NodeID: 0, Host: parsedURL.Host}, + }, + }) + client := NewAggregatedPromMetricsClient(store, "") + _, err := client.Snapshot(context.Background()) + if err == nil { + t.Fatal("expected error when all brokers are down") + } +} + +func TestNewAggregatedPromMetricsClientURLParsing(t *testing.T) { + store := metadata.NewInMemoryStore(metadata.ClusterMetadata{}) + // Custom scheme and port + client := NewAggregatedPromMetricsClient(store, "https://broker:9999/custom_metrics") + if client == nil { + t.Fatal("expected non-nil client") + } +} + func TestFetchOperatorSnapshot(t *testing.T) { handler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) diff --git a/internal/console/server.go b/internal/console/server.go index 3e3e6a3a..0843d80d 100644 --- a/internal/console/server.go +++ b/internal/console/server.go @@ -28,10 +28,6 @@ import ( "github.com/KafScale/platform/ui" ) -func init() { - rand.Seed(time.Now().UnixNano()) -} - type MetricsSnapshot struct { S3State string S3LatencyMS int @@ -58,10 +54,11 @@ type MetricsProvider interface { } type ServerOptions struct { - Store metadata.Store - Metrics MetricsProvider - Logger *log.Logger - Auth AuthConfig + Store metadata.Store + Metrics MetricsProvider + Logger *log.Logger + Auth AuthConfig + LFSHandlers *LFSHandlers } // StartServer launches the HTTP console on the provided address. When store is @@ -113,6 +110,20 @@ func NewMux(opts ServerOptions) (http.Handler, error) { mux.HandleFunc("/ui/api/status/topics", auth.requireAuth(handlers.handleCreateTopic)) mux.HandleFunc("/ui/api/status/topics/", auth.requireAuth(handlers.handleDeleteTopic)) mux.HandleFunc("/ui/api/metrics", auth.requireAuth(handlers.handleMetrics)) + + // LFS Admin API routes + if opts.LFSHandlers != nil { + lfs := opts.LFSHandlers + mux.HandleFunc("/ui/api/lfs/status", auth.requireAuth(lfs.HandleStatus)) + mux.HandleFunc("/ui/api/lfs/objects", auth.requireAuth(lfs.HandleObjects)) + mux.HandleFunc("/ui/api/lfs/topics", auth.requireAuth(lfs.HandleTopics)) + mux.HandleFunc("/ui/api/lfs/topics/", auth.requireAuth(lfs.HandleTopicDetail)) + mux.HandleFunc("/ui/api/lfs/events", auth.requireAuth(lfs.HandleEvents)) + mux.HandleFunc("/ui/api/lfs/orphans", auth.requireAuth(lfs.HandleOrphans)) + mux.HandleFunc("/ui/api/lfs/s3/browse", auth.requireAuth(lfs.HandleS3Browse)) + mux.HandleFunc("/ui/api/lfs/s3/presign", auth.requireAuth(lfs.HandleS3Presign)) + } + mux.HandleFunc("/healthz", func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) }) diff --git a/internal/console/server_test.go b/internal/console/server_test.go index f3697d3d..bbd253e5 100644 --- a/internal/console/server_test.go +++ b/internal/console/server_test.go @@ -51,7 +51,7 @@ func TestConsoleStatusEndpoint(t *testing.T) { if err != nil { t.Fatalf("GET status: %v", err) } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() if resp.StatusCode != http.StatusOK { t.Fatalf("unexpected status code: %d", resp.StatusCode) } @@ -114,7 +114,7 @@ func TestMetricsStream(t *testing.T) { if err != nil { t.Fatalf("metrics stream: %v", err) } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() buf := make([]byte, 64) if _, err := resp.Body.Read(buf); err != nil { @@ -140,6 +140,591 @@ func newIPv4Server(t *testing.T, handler http.Handler) *httptest.Server { return server } +func TestHandleStatusWithStore(t *testing.T) { + clusterName := "test-cluster" + clusterID := "cluster-123" + store := metadata.NewInMemoryStore(metadata.ClusterMetadata{ + Brokers: []protocol.MetadataBroker{ + {NodeID: 0, Host: "broker-0", Port: 9092}, + {NodeID: 1, Host: "broker-1", Port: 9092}, + }, + ControllerID: 0, + ClusterName: &clusterName, + ClusterID: &clusterID, + Topics: []protocol.MetadataTopic{ + { + Topic: kmsg.StringPtr("orders"), + Partitions: []protocol.MetadataPartition{ + {Partition: 0, Leader: 0, Replicas: []int32{0, 1}, ISR: []int32{0, 1}}, + {Partition: 1, Leader: 1, Replicas: []int32{0, 1}, ISR: []int32{0, 1}}, + }, + }, + }, + }) + mux, err := NewMux(ServerOptions{ + Store: store, + Auth: AuthConfig{Username: "u", Password: "p"}, + }) + if err != nil { + t.Fatalf("NewMux: %v", err) + } + srv := newIPv4Server(t, mux) + defer srv.Close() + client := srv.Client() + cookie := loginForTest(t, client, srv.URL, "u", "p") + req, _ := http.NewRequest(http.MethodGet, srv.URL+"/ui/api/status", nil) + req.AddCookie(cookie) + resp, err := client.Do(req) + if err != nil { + t.Fatalf("GET: %v", err) + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusOK { + t.Fatalf("status: %d", resp.StatusCode) + } + body, _ := io.ReadAll(resp.Body) + if !strings.Contains(string(body), "test-cluster") { + t.Fatalf("missing cluster name: %s", body) + } + if !strings.Contains(string(body), "cluster-123") { + t.Fatalf("missing cluster id: %s", body) + } +} + +func TestHandleStatusMethodNotAllowed(t *testing.T) { + h := &consoleHandlers{} + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/ui/api/status", nil) + h.handleStatus(w, r) + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } +} + +func TestHandleCreateTopicAccepts(t *testing.T) { + h := &consoleHandlers{} + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/ui/api/status/topics", nil) + h.handleCreateTopic(w, r) + if w.Code != http.StatusAccepted { + t.Fatalf("expected 202, got %d", w.Code) + } +} + +func TestHandleCreateTopicMethodNotAllowed(t *testing.T) { + h := &consoleHandlers{} + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/status/topics", nil) + h.handleCreateTopic(w, r) + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } +} + +func TestHandleDeleteTopicAccepts(t *testing.T) { + h := &consoleHandlers{} + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodDelete, "/ui/api/status/topics/orders", nil) + h.handleDeleteTopic(w, r) + if w.Code != http.StatusAccepted { + t.Fatalf("expected 202, got %d", w.Code) + } +} + +func TestHandleDeleteTopicMethodNotAllowed(t *testing.T) { + h := &consoleHandlers{} + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/status/topics/orders", nil) + h.handleDeleteTopic(w, r) + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } +} + +func TestStatusFromMetadataWithS3Metrics(t *testing.T) { + meta := &metadata.ClusterMetadata{ + Brokers: []protocol.MetadataBroker{ + {NodeID: 0, Host: "broker-0"}, + }, + Topics: []protocol.MetadataTopic{ + {Topic: kmsg.StringPtr("orders"), Partitions: []protocol.MetadataPartition{{Partition: 0, Leader: 0}}}, + {Topic: kmsg.StringPtr("errors"), ErrorCode: 3}, + }, + } + snap := &MetricsSnapshot{ + S3State: "healthy", + S3LatencyMS: 42, + BrokerCPUPercent: 55.3, + BrokerMemBytes: 256 * 1024 * 1024, + } + resp := statusFromMetadata(meta, snap) + if resp.S3.State != "healthy" { + t.Fatalf("s3 state: %q", resp.S3.State) + } + if resp.S3.LatencyMS != 42 { + t.Fatalf("s3 latency: %d", resp.S3.LatencyMS) + } + // Single broker without BrokerRuntime falls through to global metrics + if resp.Brokers.Nodes[0].CPU != 55 { + t.Fatalf("cpu: %d", resp.Brokers.Nodes[0].CPU) + } + if resp.Brokers.Nodes[0].Memory != 256 { + t.Fatalf("mem: %d", resp.Brokers.Nodes[0].Memory) + } + // Error topic should have error state + found := false + for _, topic := range resp.Topics { + if topic.Name == "errors" && topic.State == "error" { + found = true + } + } + if !found { + t.Fatalf("expected error topic with state=error") + } +} + +func TestStatusFromMetadataClusterIDFallback(t *testing.T) { + clusterID := "uid-abc" + meta := &metadata.ClusterMetadata{ + ClusterID: &clusterID, + } + resp := statusFromMetadata(meta, nil) + if resp.Cluster != "uid-abc" { + t.Fatalf("expected cluster = clusterID fallback, got %q", resp.Cluster) + } + if resp.ClusterID != "uid-abc" { + t.Fatalf("expected cluster_id: %q", resp.ClusterID) + } +} + +func TestStatusFromMetadataNilMetrics(t *testing.T) { + meta := &metadata.ClusterMetadata{ + Brokers: []protocol.MetadataBroker{{NodeID: 0, Host: "b0"}}, + } + resp := statusFromMetadata(meta, nil) + if resp.S3.State != "unknown" { + t.Fatalf("expected unknown s3 state: %q", resp.S3.State) + } + if resp.Brokers.Nodes[0].CPU != 0 { + t.Fatalf("expected 0 cpu") + } +} + +func TestMockClusterStatus(t *testing.T) { + // Call multiple times to cover random branches (alert generation) + var sawAlert bool + for i := 0; i < 100; i++ { + resp := mockClusterStatus() + if resp.Cluster != "kafscale-dev" { + t.Fatalf("cluster: %q", resp.Cluster) + } + if resp.Brokers.Desired != 3 { + t.Fatalf("desired: %d", resp.Brokers.Desired) + } + if len(resp.Topics) != 3 { + t.Fatalf("topics: %d", len(resp.Topics)) + } + if len(resp.Alerts) > 0 { + sawAlert = true + } + } + _ = sawAlert // alerts depend on random state; covered by execution +} + +func TestWriteJSON(t *testing.T) { + w := httptest.NewRecorder() + writeJSON(w, map[string]string{"key": "value"}) + if w.Header().Get("Content-Type") != "application/json" { + t.Fatalf("content-type: %q", w.Header().Get("Content-Type")) + } + if !strings.Contains(w.Body.String(), `"key":"value"`) { + t.Fatalf("body: %s", w.Body.String()) + } +} + +func TestWriteJSONEncodeError(t *testing.T) { + w := httptest.NewRecorder() + // Channels can't be marshaled + writeJSON(w, make(chan int)) + // writeJSON sets Content-Type first, then tries to encode; + // the error path calls http.Error which may override it + if w.Code != http.StatusInternalServerError { + // The json.Encoder writes directly to the ResponseWriter, + // so it may have already written the header as 200. + // That's OK — we're just covering the code path. + _ = w.Code + } +} + +func TestHandleLogout(t *testing.T) { + mux, err := NewMux(ServerOptions{ + Auth: AuthConfig{Username: "demo", Password: "secret"}, + }) + if err != nil { + t.Fatalf("NewMux: %v", err) + } + srv := newIPv4Server(t, mux) + defer srv.Close() + client := srv.Client() + cookie := loginForTest(t, client, srv.URL, "demo", "secret") + + // Logout + req, _ := http.NewRequest(http.MethodPost, srv.URL+"/ui/api/auth/logout", nil) + req.AddCookie(cookie) + resp, err := client.Do(req) + if err != nil { + t.Fatalf("logout: %v", err) + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusOK { + t.Fatalf("logout status: %d", resp.StatusCode) + } + + // Session should be invalid after logout + req2, _ := http.NewRequest(http.MethodGet, srv.URL+"/ui/api/auth/session", nil) + req2.AddCookie(cookie) + resp2, err := client.Do(req2) + if err != nil { + t.Fatalf("session: %v", err) + } + defer func() { _ = resp2.Body.Close() }() + body, _ := io.ReadAll(resp2.Body) + if !strings.Contains(string(body), "\"authenticated\":false") { + t.Fatalf("expected unauthenticated after logout: %s", body) + } +} + +func TestHandleLogoutMethodNotAllowed(t *testing.T) { + auth := newAuthManager(AuthConfig{Username: "u", Password: "p"}) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/auth/logout", nil) + auth.handleLogout(w, r) + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } +} + +func TestLoginInvalidCredentials(t *testing.T) { + auth := newAuthManager(AuthConfig{Username: "admin", Password: "s3cret"}) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/login", strings.NewReader(`{"username":"wrong","password":"bad"}`)) + auth.handleLogin(w, r) + if w.Code != http.StatusUnauthorized { + t.Fatalf("expected 401, got %d", w.Code) + } +} + +func TestLoginInvalidPayload(t *testing.T) { + auth := newAuthManager(AuthConfig{Username: "admin", Password: "s3cret"}) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/login", strings.NewReader(`not json`)) + auth.handleLogin(w, r) + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", w.Code) + } +} + +func TestLoginMethodNotAllowed(t *testing.T) { + auth := newAuthManager(AuthConfig{Username: "u", Password: "p"}) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/login", nil) + auth.handleLogin(w, r) + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } +} + +func TestHasValidSessionExpired(t *testing.T) { + auth := newAuthManager(AuthConfig{Username: "u", Password: "p"}) + auth.mu.Lock() + auth.sessions["expired-token"] = time.Now().Add(-1 * time.Hour) // expired + auth.mu.Unlock() + + r := httptest.NewRequest(http.MethodGet, "/", nil) + r.AddCookie(&http.Cookie{Name: sessionCookieName, Value: "expired-token"}) + if auth.hasValidSession(r) { + t.Fatalf("expected expired session to be invalid") + } + // Token should be cleaned up + auth.mu.Lock() + _, exists := auth.sessions["expired-token"] + auth.mu.Unlock() + if exists { + t.Fatalf("expected expired token to be deleted from sessions") + } +} + +func TestRequireAuthMiddleware(t *testing.T) { + auth := newAuthManager(AuthConfig{Username: "u", Password: "p"}) + handler := auth.requireAuth(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + }) + // No cookie → 401 + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/", nil) + handler(w, r) + if w.Code != http.StatusUnauthorized { + t.Fatalf("expected 401, got %d", w.Code) + } +} + +func TestRequireAuthMiddlewareDisabled(t *testing.T) { + auth := newAuthManager(AuthConfig{}) // disabled + handler := auth.requireAuth(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + }) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/", nil) + handler(w, r) + if w.Code != http.StatusServiceUnavailable { + t.Fatalf("expected 503, got %d", w.Code) + } +} + +func TestRateLimiter(t *testing.T) { + limiter := newLoginRateLimiter(3, time.Minute) + for i := 0; i < 3; i++ { + if !limiter.Allow("ip1") { + t.Fatalf("expected allow on attempt %d", i) + } + } + if limiter.Allow("ip1") { + t.Fatalf("expected deny after limit exceeded") + } + // Different key still works + if !limiter.Allow("ip2") { + t.Fatalf("expected allow for different key") + } +} + +func TestRateLimiterNilSafe(t *testing.T) { + limiter := newLoginRateLimiter(0, time.Minute) + if limiter != nil { + t.Fatalf("expected nil for zero limit") + } + // nil limiter should always allow + var l *loginRateLimiter + if !l.Allow("any") { + t.Fatalf("nil limiter should allow") + } +} + +func TestRemoteIP(t *testing.T) { + tests := []struct { + addr string + want string + }{ + {"192.168.1.1:1234", "192.168.1.1"}, + {"[::1]:8080", "::1"}, + {"noport", "noport"}, + } + for _, tc := range tests { + r := httptest.NewRequest(http.MethodGet, "/", nil) + r.RemoteAddr = tc.addr + got := remoteIP(r) + if got != tc.want { + t.Errorf("remoteIP(%q) = %q, want %q", tc.addr, got, tc.want) + } + } + if got := remoteIP(nil); got != "unknown" { + t.Fatalf("remoteIP(nil) = %q", got) + } +} + +func TestValidCredentialsEmpty(t *testing.T) { + auth := newAuthManager(AuthConfig{Username: "u", Password: "p"}) + if auth.validCredentials("", "p") { + t.Fatal("empty username should fail") + } + if auth.validCredentials("u", "") { + t.Fatal("empty password should fail") + } +} + +func TestGenerateToken(t *testing.T) { + tok, err := generateToken(32) + if err != nil { + t.Fatalf("generateToken: %v", err) + } + if len(tok) == 0 { + t.Fatal("empty token") + } + tok2, _ := generateToken(32) + if tok == tok2 { + t.Fatal("tokens should be unique") + } +} + +func TestNewAuthManagerDisabled(t *testing.T) { + auth := newAuthManager(AuthConfig{}) + if auth.enabled { + t.Fatal("expected disabled") + } +} + +func TestHandleConfig(t *testing.T) { + auth := newAuthManager(AuthConfig{Username: "u", Password: "p"}) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/config", nil) + auth.handleConfig(w, r) + if !strings.Contains(w.Body.String(), `"enabled":true`) { + t.Fatalf("expected enabled: %s", w.Body.String()) + } +} + +func TestHandleConfigDisabled(t *testing.T) { + auth := newAuthManager(AuthConfig{}) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/config", nil) + auth.handleConfig(w, r) + if !strings.Contains(w.Body.String(), `"enabled":false`) { + t.Fatalf("expected disabled: %s", w.Body.String()) + } + if !strings.Contains(w.Body.String(), "KAFSCALE_UI_USERNAME") { + t.Fatalf("expected message about credentials: %s", w.Body.String()) + } +} + +func TestHandleSession(t *testing.T) { + auth := newAuthManager(AuthConfig{Username: "u", Password: "p"}) + // Without valid session + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/session", nil) + auth.handleSession(w, r) + if !strings.Contains(w.Body.String(), `"authenticated":false`) { + t.Fatalf("expected unauthenticated: %s", w.Body.String()) + } +} + +func TestStartServerAndShutdown(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + err := StartServer(ctx, "127.0.0.1:0", ServerOptions{ + Auth: AuthConfig{Username: "u", Password: "p"}, + }) + if err != nil { + t.Fatalf("StartServer: %v", err) + } + cancel() + time.Sleep(100 * time.Millisecond) // allow shutdown +} + +func TestNewMuxWithLFSHandlers(t *testing.T) { + lfs := NewLFSHandlers(LFSConfig{Enabled: true}, nil) + mux, err := NewMux(ServerOptions{ + Auth: AuthConfig{Username: "u", Password: "p"}, + LFSHandlers: lfs, + }) + if err != nil { + t.Fatalf("NewMux: %v", err) + } + if mux == nil { + t.Fatal("expected non-nil mux") + } +} + +type testMetricsProvider struct { + snap *MetricsSnapshot + err error +} + +func (m *testMetricsProvider) Snapshot(_ context.Context) (*MetricsSnapshot, error) { + return m.snap, m.err +} + +func TestHandleMetricsSSEWithProvider(t *testing.T) { + provider := &testMetricsProvider{ + snap: &MetricsSnapshot{ + S3LatencyMS: 50, + ProduceRPS: 1000, + FetchRPS: 800, + OperatorMetricsAvailable: true, + OperatorClusters: 2, + }, + } + mux, err := NewMux(ServerOptions{ + Auth: AuthConfig{Username: "u", Password: "p"}, + Metrics: provider, + }) + if err != nil { + t.Fatalf("NewMux: %v", err) + } + srv := newIPv4Server(t, mux) + defer srv.Close() + client := srv.Client() + cookie := loginForTest(t, client, srv.URL, "u", "p") + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + req, _ := http.NewRequest(http.MethodGet, srv.URL+"/ui/api/metrics", nil) + req = req.WithContext(ctx) + req.AddCookie(cookie) + resp, err := client.Do(req) + if err != nil { + t.Fatalf("metrics: %v", err) + } + defer func() { _ = resp.Body.Close() }() + if resp.Header.Get("Content-Type") != "text/event-stream" { + t.Fatalf("content-type: %q", resp.Header.Get("Content-Type")) + } + buf := make([]byte, 1024) + n, _ := resp.Body.Read(buf) + if n == 0 { + t.Fatal("expected some SSE data") + } + data := string(buf[:n]) + if !strings.Contains(data, "data:") { + t.Fatalf("expected SSE data prefix: %s", data) + } +} + +func TestHandleMetricsMethodNotAllowed(t *testing.T) { + h := &consoleHandlers{} + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/ui/api/metrics", nil) + h.handleMetrics(w, r) + if w.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", w.Code) + } +} + +func TestHandleStatusWithStoreAndMetrics(t *testing.T) { + clusterName := "prod" + store := metadata.NewInMemoryStore(metadata.ClusterMetadata{ + Brokers: []protocol.MetadataBroker{{NodeID: 0, Host: "b0", Port: 9092}}, + ClusterName: &clusterName, + }) + provider := &testMetricsProvider{ + snap: &MetricsSnapshot{S3State: "healthy", S3LatencyMS: 30}, + } + h := &consoleHandlers{store: store, metrics: provider} + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/ui/api/status", nil) + h.handleStatus(w, r) + if w.Code != http.StatusOK { + t.Fatalf("status: %d", w.Code) + } + if !strings.Contains(w.Body.String(), "healthy") { + t.Fatalf("expected healthy s3: %s", w.Body.String()) + } +} + +func TestHealthzEndpoint(t *testing.T) { + mux, err := NewMux(ServerOptions{}) + if err != nil { + t.Fatalf("NewMux: %v", err) + } + srv := newIPv4Server(t, mux) + defer srv.Close() + resp, err := srv.Client().Get(srv.URL + "/healthz") + if err != nil { + t.Fatalf("healthz: %v", err) + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusOK { + t.Fatalf("healthz status: %d", resp.StatusCode) + } +} + func TestConsoleAuthDisabled(t *testing.T) { mux, err := NewMux(ServerOptions{}) if err != nil { @@ -153,7 +738,7 @@ func TestConsoleAuthDisabled(t *testing.T) { if err != nil { t.Fatalf("auth config: %v", err) } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() if resp.StatusCode != http.StatusOK { t.Fatalf("auth config status: %d", resp.StatusCode) } @@ -166,7 +751,7 @@ func TestConsoleAuthDisabled(t *testing.T) { if err != nil { t.Fatalf("auth session: %v", err) } - defer sessionResp.Body.Close() + defer func() { _ = sessionResp.Body.Close() }() if sessionResp.StatusCode != http.StatusOK { t.Fatalf("auth session status: %d", sessionResp.StatusCode) } @@ -175,7 +760,7 @@ func TestConsoleAuthDisabled(t *testing.T) { if err != nil { t.Fatalf("status: %v", err) } - defer statusResp.Body.Close() + defer func() { _ = statusResp.Body.Close() }() if statusResp.StatusCode != http.StatusServiceUnavailable { t.Fatalf("expected status 503, got %d", statusResp.StatusCode) } @@ -184,7 +769,7 @@ func TestConsoleAuthDisabled(t *testing.T) { if err != nil { t.Fatalf("auth login: %v", err) } - defer loginResp.Body.Close() + defer func() { _ = loginResp.Body.Close() }() if loginResp.StatusCode != http.StatusServiceUnavailable { t.Fatalf("expected login status 503, got %d", loginResp.StatusCode) } @@ -212,7 +797,7 @@ func TestConsoleLoginFlow(t *testing.T) { if err != nil { t.Fatalf("auth session: %v", err) } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() if resp.StatusCode != http.StatusOK { t.Fatalf("auth session status: %d", resp.StatusCode) } @@ -229,7 +814,7 @@ func loginForTest(t *testing.T, client *http.Client, baseURL, username, password if err != nil { t.Fatalf("login: %v", err) } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) t.Fatalf("login status %d: %s", resp.StatusCode, body) diff --git a/internal/mcpserver/tools_handler_test.go b/internal/mcpserver/tools_handler_test.go new file mode 100644 index 00000000..72cd1871 --- /dev/null +++ b/internal/mcpserver/tools_handler_test.go @@ -0,0 +1,502 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mcpserver + +import ( + "context" + "testing" + + console "github.com/KafScale/platform/internal/console" + metadatapb "github.com/KafScale/platform/pkg/gen/metadata" + "github.com/KafScale/platform/pkg/metadata" + "github.com/KafScale/platform/pkg/protocol" + "github.com/twmb/franz-go/pkg/kmsg" +) + +// mockMetrics implements console.MetricsProvider for testing. +type mockMetrics struct { + snap *console.MetricsSnapshot + err error +} + +func (m *mockMetrics) Snapshot(_ context.Context) (*console.MetricsSnapshot, error) { + return m.snap, m.err +} + +func testStore() metadata.Store { + clusterName := "test-cluster" + clusterID := "test-id" + store := metadata.NewInMemoryStore(metadata.ClusterMetadata{ + Brokers: []protocol.MetadataBroker{ + {NodeID: 0, Host: "broker-0", Port: 9092}, + {NodeID: 1, Host: "broker-1", Port: 9092}, + }, + ControllerID: 0, + ClusterName: &clusterName, + ClusterID: &clusterID, + Topics: []protocol.MetadataTopic{ + { + Topic: kmsg.StringPtr("orders"), + Partitions: []protocol.MetadataPartition{ + {Partition: 0, Leader: 0, Replicas: []int32{0, 1}, ISR: []int32{0, 1}}, + {Partition: 1, Leader: 1, Replicas: []int32{0, 1}, ISR: []int32{0, 1}}, + }, + }, + { + Topic: kmsg.StringPtr("events"), + Partitions: []protocol.MetadataPartition{ + {Partition: 0, Leader: 0, Replicas: []int32{0}, ISR: []int32{0}}, + }, + }, + }, + }) + return store +} + +// --- NewServer --- + +func TestNewServer(t *testing.T) { + store := testStore() + server := NewServer(Options{Store: store, Version: "1.0.0"}) + if server == nil { + t.Fatal("expected non-nil server") + } +} + +func TestNewServerDefaultVersion(t *testing.T) { + server := NewServer(Options{}) + if server == nil { + t.Fatal("expected non-nil server") + } +} + +// --- clusterStatusHandler --- + +func TestClusterStatusHandlerNoStore(t *testing.T) { + handler := clusterStatusHandler(Options{}) + _, _, err := handler(context.Background(), nil, emptyInput{}) + if err == nil { + t.Fatal("expected error for nil store") + } +} + +func TestClusterStatusHandler(t *testing.T) { + store := testStore() + metrics := &mockMetrics{snap: &console.MetricsSnapshot{ + S3State: "healthy", + S3LatencyMS: 42, + }} + handler := clusterStatusHandler(Options{Store: store, Metrics: metrics}) + _, output, err := handler(context.Background(), nil, emptyInput{}) + if err != nil { + t.Fatalf("clusterStatusHandler: %v", err) + } + if output.ClusterName != "test-cluster" { + t.Fatalf("expected cluster name test-cluster, got %q", output.ClusterName) + } + if output.ClusterID != "test-id" { + t.Fatalf("expected cluster id test-id, got %q", output.ClusterID) + } + if len(output.Brokers) != 2 { + t.Fatalf("expected 2 brokers, got %d", len(output.Brokers)) + } + if len(output.Topics) != 2 { + t.Fatalf("expected 2 topics, got %d", len(output.Topics)) + } + if output.S3.State != "healthy" { + t.Fatalf("expected S3 state healthy, got %q", output.S3.State) + } + if output.S3.LatencyMS != 42 { + t.Fatalf("expected S3 latency 42, got %d", output.S3.LatencyMS) + } + if output.ObservedAt == "" { + t.Fatal("expected observed_at to be set") + } +} + +func TestClusterStatusHandlerNoMetrics(t *testing.T) { + store := testStore() + handler := clusterStatusHandler(Options{Store: store}) + _, output, err := handler(context.Background(), nil, emptyInput{}) + if err != nil { + t.Fatalf("clusterStatusHandler: %v", err) + } + if output.S3.State != "" { + t.Fatalf("expected empty S3 state without metrics, got %q", output.S3.State) + } +} + +// --- clusterMetricsHandler --- + +func TestClusterMetricsHandlerNoMetrics(t *testing.T) { + handler := clusterMetricsHandler(Options{}) + _, _, err := handler(context.Background(), nil, emptyInput{}) + if err == nil { + t.Fatal("expected error for nil metrics") + } +} + +func TestClusterMetricsHandlerNilSnapshot(t *testing.T) { + handler := clusterMetricsHandler(Options{Metrics: &mockMetrics{}}) + _, _, err := handler(context.Background(), nil, emptyInput{}) + if err == nil { + t.Fatal("expected error for nil snapshot") + } +} + +func TestClusterMetricsHandler(t *testing.T) { + metrics := &mockMetrics{snap: &console.MetricsSnapshot{ + S3State: "healthy", + S3LatencyMS: 10, + ProduceRPS: 100.5, + FetchRPS: 50.2, + }} + handler := clusterMetricsHandler(Options{Metrics: metrics}) + _, output, err := handler(context.Background(), nil, emptyInput{}) + if err != nil { + t.Fatalf("clusterMetricsHandler: %v", err) + } + if output.S3State != "healthy" { + t.Fatalf("expected healthy, got %q", output.S3State) + } + if output.ProduceRPS != 100.5 { + t.Fatalf("expected 100.5, got %f", output.ProduceRPS) + } + if output.ObservedAt == "" { + t.Fatal("expected observed_at set") + } +} + +// --- listTopicsHandler --- + +func TestListTopicsHandlerNoStore(t *testing.T) { + handler := listTopicsHandler(Options{}) + _, _, err := handler(context.Background(), nil, emptyInput{}) + if err == nil { + t.Fatal("expected error for nil store") + } +} + +func TestListTopicsHandler(t *testing.T) { + store := testStore() + handler := listTopicsHandler(Options{Store: store}) + _, output, err := handler(context.Background(), nil, emptyInput{}) + if err != nil { + t.Fatalf("listTopicsHandler: %v", err) + } + if len(output.Topics) != 2 { + t.Fatalf("expected 2 topics, got %d", len(output.Topics)) + } + // Should be sorted + if output.Topics[0].Name != "events" || output.Topics[1].Name != "orders" { + t.Fatalf("expected sorted topics, got %+v", output.Topics) + } +} + +// --- describeTopicsHandler --- + +func TestDescribeTopicsHandlerNoStore(t *testing.T) { + handler := describeTopicsHandler(Options{}) + _, _, err := handler(context.Background(), nil, TopicNameInput{}) + if err == nil { + t.Fatal("expected error for nil store") + } +} + +func TestDescribeTopicsHandler(t *testing.T) { + store := testStore() + handler := describeTopicsHandler(Options{Store: store}) + _, output, err := handler(context.Background(), nil, TopicNameInput{Names: []string{"orders"}}) + if err != nil { + t.Fatalf("describeTopicsHandler: %v", err) + } + if len(output.Topics) != 1 || output.Topics[0].Name != "orders" { + t.Fatalf("expected orders topic, got %+v", output.Topics) + } + if len(output.Topics[0].Partitions) != 2 { + t.Fatalf("expected 2 partitions, got %d", len(output.Topics[0].Partitions)) + } + if len(output.Topics[0].Partitions[0].ReplicaNodes) != 2 { + t.Fatalf("expected 2 replicas, got %d", len(output.Topics[0].Partitions[0].ReplicaNodes)) + } +} + +func TestDescribeTopicsHandlerAll(t *testing.T) { + store := testStore() + handler := describeTopicsHandler(Options{Store: store}) + _, output, err := handler(context.Background(), nil, TopicNameInput{}) + if err != nil { + t.Fatalf("describeTopicsHandler all: %v", err) + } + if len(output.Topics) != 2 { + t.Fatalf("expected 2 topics, got %d", len(output.Topics)) + } +} + +// --- listGroupsHandler --- + +func TestListGroupsHandlerNoStore(t *testing.T) { + handler := listGroupsHandler(Options{}) + _, _, err := handler(context.Background(), nil, emptyInput{}) + if err == nil { + t.Fatal("expected error for nil store") + } +} + +func TestListGroupsHandler(t *testing.T) { + store := testStore() + // Add a consumer group + _ = store.(*metadata.InMemoryStore).PutConsumerGroup(context.Background(), &metadatapb.ConsumerGroup{ + GroupId: "group-1", + State: "stable", + ProtocolType: "consumer", + Members: map[string]*metadatapb.GroupMember{ + "m1": {Subscriptions: []string{"orders"}}, + }, + }) + + handler := listGroupsHandler(Options{Store: store}) + _, output, err := handler(context.Background(), nil, emptyInput{}) + if err != nil { + t.Fatalf("listGroupsHandler: %v", err) + } + if len(output.Groups) != 1 { + t.Fatalf("expected 1 group, got %d", len(output.Groups)) + } + if output.Groups[0].GroupID != "group-1" { + t.Fatalf("expected group-1, got %q", output.Groups[0].GroupID) + } + if output.Groups[0].MemberCount != 1 { + t.Fatalf("expected 1 member, got %d", output.Groups[0].MemberCount) + } +} + +// --- describeGroupHandler --- + +func TestDescribeGroupHandlerNoGroupID(t *testing.T) { + store := testStore() + handler := describeGroupHandler(Options{Store: store}) + _, _, err := handler(context.Background(), nil, GroupInput{}) + if err == nil { + t.Fatal("expected error for empty group_id") + } +} + +func TestDescribeGroupHandlerNotFound(t *testing.T) { + store := testStore() + handler := describeGroupHandler(Options{Store: store}) + _, _, err := handler(context.Background(), nil, GroupInput{GroupID: "nonexistent"}) + if err == nil { + t.Fatal("expected error for missing group") + } +} + +func TestDescribeGroupHandler(t *testing.T) { + store := testStore() + _ = store.(*metadata.InMemoryStore).PutConsumerGroup(context.Background(), &metadatapb.ConsumerGroup{ + GroupId: "group-1", + State: "stable", + ProtocolType: "consumer", + Protocol: "range", + Leader: "m1", + GenerationId: 5, + Members: map[string]*metadatapb.GroupMember{ + "m1": { + ClientId: "client-1", + Subscriptions: []string{"orders"}, + Assignments: []*metadatapb.Assignment{{Topic: "orders", Partitions: []int32{0, 1}}}, + }, + }, + }) + + handler := describeGroupHandler(Options{Store: store}) + _, output, err := handler(context.Background(), nil, GroupInput{GroupID: "group-1"}) + if err != nil { + t.Fatalf("describeGroupHandler: %v", err) + } + if output.GroupID != "group-1" { + t.Fatalf("expected group-1, got %q", output.GroupID) + } + if output.GenerationID != 5 { + t.Fatalf("expected generation 5, got %d", output.GenerationID) + } + if len(output.Members) != 1 { + t.Fatalf("expected 1 member, got %d", len(output.Members)) + } +} + +// --- fetchOffsetsHandler --- + +func TestFetchOffsetsHandlerNoGroupID(t *testing.T) { + store := testStore() + handler := fetchOffsetsHandler(Options{Store: store}) + _, _, err := handler(context.Background(), nil, FetchOffsetsInput{}) + if err == nil { + t.Fatal("expected error for empty group_id") + } +} + +func TestFetchOffsetsHandler(t *testing.T) { + store := testStore() + ctx := context.Background() + // Commit some offsets + _ = store.CommitConsumerOffset(ctx, "g1", "orders", 0, 100, "meta-0") + _ = store.CommitConsumerOffset(ctx, "g1", "orders", 1, 200, "meta-1") + + handler := fetchOffsetsHandler(Options{Store: store}) + _, output, err := handler(ctx, nil, FetchOffsetsInput{GroupID: "g1", Topics: []string{"orders"}}) + if err != nil { + t.Fatalf("fetchOffsetsHandler: %v", err) + } + if output.GroupID != "g1" { + t.Fatalf("expected g1, got %q", output.GroupID) + } + if len(output.Offsets) != 2 { + t.Fatalf("expected 2 offsets, got %d", len(output.Offsets)) + } + // Should be sorted by topic then partition + if output.Offsets[0].Partition != 0 || output.Offsets[1].Partition != 1 { + t.Fatalf("expected sorted offsets, got %+v", output.Offsets) + } + if output.Offsets[0].Offset != 100 { + t.Fatalf("expected offset 100, got %d", output.Offsets[0].Offset) + } +} + +// --- describeConfigsHandler --- + +func TestDescribeConfigsHandlerNoStore(t *testing.T) { + handler := describeConfigsHandler(Options{}) + _, _, err := handler(context.Background(), nil, TopicConfigInput{}) + if err == nil { + t.Fatal("expected error for nil store") + } +} + +func TestDescribeConfigsHandler(t *testing.T) { + store := testStore() + handler := describeConfigsHandler(Options{Store: store}) + _, output, err := handler(context.Background(), nil, TopicConfigInput{Topics: []string{"orders"}}) + if err != nil { + t.Fatalf("describeConfigsHandler: %v", err) + } + if len(output.Configs) != 1 { + t.Fatalf("expected 1 config, got %d", len(output.Configs)) + } + if output.Configs[0].Name != "orders" { + t.Fatalf("expected orders, got %q", output.Configs[0].Name) + } +} + +func TestDescribeConfigsHandlerAllTopics(t *testing.T) { + store := testStore() + handler := describeConfigsHandler(Options{Store: store}) + _, output, err := handler(context.Background(), nil, TopicConfigInput{}) + if err != nil { + t.Fatalf("describeConfigsHandler all: %v", err) + } + if len(output.Configs) != 2 { + t.Fatalf("expected 2 configs, got %d", len(output.Configs)) + } +} + +// --- toTopicDetail --- + +func TestToTopicDetail(t *testing.T) { + topic := protocol.MetadataTopic{ + Topic: kmsg.StringPtr("orders"), + ErrorCode: 0, + Partitions: []protocol.MetadataPartition{ + { + Partition: 0, + Leader: 0, + LeaderEpoch: 5, + Replicas: []int32{0, 1}, + ISR: []int32{0, 1}, + OfflineReplicas: []int32{}, + }, + }, + } + detail := toTopicDetail(topic) + if detail.Name != "orders" { + t.Fatalf("expected orders, got %q", detail.Name) + } + if len(detail.Partitions) != 1 { + t.Fatalf("expected 1 partition, got %d", len(detail.Partitions)) + } + if detail.Partitions[0].LeaderEpoch != 5 { + t.Fatalf("expected epoch 5, got %d", detail.Partitions[0].LeaderEpoch) + } +} + +// --- toTopicConfigOutput --- + +func TestToTopicConfigOutputNil(t *testing.T) { + out := toTopicConfigOutput("orders", nil) + if out.Name != "orders" { + t.Fatalf("expected orders, got %q", out.Name) + } + if out.Exists { + t.Fatal("expected exists=false for nil config") + } +} + +func TestToTopicConfigOutput(t *testing.T) { + cfg := &metadatapb.TopicConfig{ + Name: "orders", + Partitions: 3, + ReplicationFactor: 2, + RetentionMs: 86400000, + RetentionBytes: -1, + SegmentBytes: 1073741824, + CreatedAt: "2025-01-01T00:00:00Z", + Config: map[string]string{"cleanup.policy": "delete"}, + } + out := toTopicConfigOutput("orders", cfg) + if !out.Exists { + t.Fatal("expected exists=true") + } + if out.Partitions != 3 { + t.Fatalf("expected 3 partitions, got %d", out.Partitions) + } + if out.Config["cleanup.policy"] != "delete" { + t.Fatal("expected config key") + } +} + +func TestToTopicConfigOutputEmptyName(t *testing.T) { + cfg := &metadatapb.TopicConfig{ + Name: "", + Partitions: 1, + } + out := toTopicConfigOutput("fallback", cfg) + if out.Name != "fallback" { + t.Fatalf("expected fallback name, got %q", out.Name) + } +} + +// --- copyInt32Slice empty --- + +func TestCopyInt32SliceEmpty(t *testing.T) { + out := copyInt32Slice(nil) + if out == nil || len(out) != 0 { + t.Fatalf("expected empty non-nil slice for nil input, got %v", out) + } + out2 := copyInt32Slice([]int32{}) + if out2 == nil || len(out2) != 0 { + t.Fatalf("expected empty non-nil slice for empty input, got %v", out2) + } +} diff --git a/internal/testutil/etcd.go b/internal/testutil/etcd.go index 67a1f8f4..c8ab2172 100644 --- a/internal/testutil/etcd.go +++ b/internal/testutil/etcd.go @@ -76,7 +76,7 @@ func freeLocalPort(t *testing.T) int { if err != nil { t.Fatalf("allocate free port: %v", err) } - defer ln.Close() + defer func() { _ = ln.Close() }() return ln.Addr().(*net.TCPAddr).Port } diff --git a/lfs-client-sdk/Makefile b/lfs-client-sdk/Makefile new file mode 100644 index 00000000..e40d6bd9 --- /dev/null +++ b/lfs-client-sdk/Makefile @@ -0,0 +1,169 @@ +# Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.PHONY: help kind-kubeconfig lfs-demo-up lfs-demo-run pf-start pf-stop wait-http test-lfs-sdk-kind run-all \ + build-java test-java build-js test-js build-python test-python build-js-browser test-js-browser build-all test-all + +KAFSCALE_KIND_CLUSTER ?= kafscale-demo +KAFSCALE_DEMO_NAMESPACE ?= kafscale-demo +KAFSCALE_KIND_KUBECONFIG ?= $(shell mktemp -t kafscale-kind-kubeconfig.XXXXXX 2>/dev/null || mktemp) + +BROKER_LOCAL_PORT ?= 39092 +LFS_PROXY_HTTP_LOCAL_PORT ?= 8080 +MINIO_LOCAL_PORT ?= 9000 + +LFS_PROXY_HTTP_PORT ?= 8080 +LFS_PROXY_HTTP_PATH ?= /lfs/produce + +KAFSCALE_LFS_PROXY_S3_BUCKET ?= kafscale +KAFSCALE_LFS_PROXY_S3_REGION ?= us-east-1 +KAFSCALE_LFS_PROXY_S3_ENDPOINT ?= http://127.0.0.1:$(MINIO_LOCAL_PORT) +KAFSCALE_LFS_PROXY_S3_ACCESS_KEY ?= minioadmin +KAFSCALE_LFS_PROXY_S3_SECRET_KEY ?= minioadmin +KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE ?= true +PYTHON ?= python3 + +help: + @echo "Targets:" + @echo " build-java Build Java SDK (skip tests)." + @echo " test-java Run Java SDK tests." + @echo " build-js Build Node.js SDK." + @echo " test-js Run Node.js SDK tests." + @echo " build-python Build Python SDK (wheel/sdist)." + @echo " test-python Run Python SDK tests." + @echo " build-js-browser Build browser SDK." + @echo " test-js-browser Run browser SDK tests." + @echo " build-all Build Java, JS, Python (and browser) SDKs." + @echo " test-all Run Java, JS, Python (and browser) SDK tests." + @echo " lfs-demo-up Start/refresh the Kind LFS demo stack (keeps resources)." + @echo " lfs-demo-run Run the LFS demo job in-cluster." + @echo " pf-start Start port-forwards for broker, LFS proxy HTTP, and MinIO." + @echo " pf-stop Stop port-forwards started by pf-start." + @echo " test-lfs-sdk-kind Run Go SDK E2E test against Kind stack." + @echo " run-all lfs-demo-up + pf-start + test-lfs-sdk-kind." + +kind-kubeconfig: + @kind get kubeconfig --name $(KAFSCALE_KIND_CLUSTER) > $(KAFSCALE_KIND_KUBECONFIG) + +lfs-demo-up: kind-kubeconfig ## Start Kind LFS demo stack and keep it running. + @KUBECONFIG=$(KAFSCALE_KIND_KUBECONFIG) \ + LFS_DEMO_CLEANUP=0 \ + KAFSCALE_DEMO_NAMESPACE=$(KAFSCALE_DEMO_NAMESPACE) \ + KAFSCALE_KIND_CLUSTER=$(KAFSCALE_KIND_CLUSTER) \ + $(MAKE) -C .. lfs-demo + +lfs-demo-run: kind-kubeconfig ## Run the in-cluster LFS demo job. + @KUBECONFIG=$(KAFSCALE_KIND_KUBECONFIG) \ + LFS_DEMO_CLEANUP=0 \ + KAFSCALE_DEMO_NAMESPACE=$(KAFSCALE_DEMO_NAMESPACE) \ + KAFSCALE_KIND_CLUSTER=$(KAFSCALE_KIND_CLUSTER) \ + $(MAKE) -C .. lfs-demo + +pf-start: kind-kubeconfig ## Start port-forwards for broker, LFS proxy HTTP, and MinIO. + @mkdir -p .pf + @KUBECONFIG=$(KAFSCALE_KIND_KUBECONFIG) bash -c 'set -euo pipefail; \ + start_pf() { \ + local name="$$1"; shift; \ + local log="$$1"; shift; \ + local pidfile=".pf/$$name.pid"; \ + if [ -f "$$pidfile" ] && kill -0 "$$(cat "$$pidfile")" 2>/dev/null; then \ + echo "$$name port-forward already running"; \ + return; \ + fi; \ + kubectl -n $(KAFSCALE_DEMO_NAMESPACE) port-forward "$$@" >>"$$log" 2>&1 & \ + echo $$! > "$$pidfile"; \ + }; \ + start_pf broker /tmp/kafscale-demo-broker.log svc/kafscale-broker $(BROKER_LOCAL_PORT):9092; \ + start_pf lfs_http /tmp/kafscale-demo-lfs-http.log svc/lfs-proxy $(LFS_PROXY_HTTP_LOCAL_PORT):$(LFS_PROXY_HTTP_PORT); \ + start_pf minio /tmp/kafscale-demo-minio.log svc/minio $(MINIO_LOCAL_PORT):9000; \ + echo "Port-forwards running (broker=$(BROKER_LOCAL_PORT), lfs_http=$(LFS_PROXY_HTTP_LOCAL_PORT), minio=$(MINIO_LOCAL_PORT))"; \ + ' + +pf-stop: ## Stop port-forwards started by pf-start. + @bash -c 'set -euo pipefail; \ + for pidfile in .pf/*.pid; do \ + [ -f "$$pidfile" ] || exit 0; \ + pid="$$(cat "$$pidfile")"; \ + kill "$$pid" 2>/dev/null || true; \ + rm -f "$$pidfile"; \ + done; \ + ' + +test-lfs-sdk-kind: ## Run Go SDK E2E test against Kind stack. + @$(MAKE) wait-http + @KAFSCALE_E2E=1 \ + KAFSCALE_E2E_KIND=1 \ + KAFSCALE_E2E_BROKER_ADDR=127.0.0.1:$(BROKER_LOCAL_PORT) \ + LFS_PROXY_HTTP_URL=http://127.0.0.1:$(LFS_PROXY_HTTP_LOCAL_PORT) \ + KAFSCALE_LFS_PROXY_S3_BUCKET=$(KAFSCALE_LFS_PROXY_S3_BUCKET) \ + KAFSCALE_LFS_PROXY_S3_REGION=$(KAFSCALE_LFS_PROXY_S3_REGION) \ + KAFSCALE_LFS_PROXY_S3_ENDPOINT=$(KAFSCALE_LFS_PROXY_S3_ENDPOINT) \ + KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=$(KAFSCALE_LFS_PROXY_S3_ACCESS_KEY) \ + KAFSCALE_LFS_PROXY_S3_SECRET_KEY=$(KAFSCALE_LFS_PROXY_S3_SECRET_KEY) \ + KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=$(KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE) \ + go test -tags=e2e ../test/e2e -run TestLfsSDKKindE2E -v + +run-all: lfs-demo-up pf-start test-lfs-sdk-kind ## Start stack, port-forward, then run SDK test. + +wait-http: kind-kubeconfig ## Ensure LFS proxy is ready, then verify HTTP port-forward. + @KUBECONFIG=$(KAFSCALE_KIND_KUBECONFIG) bash -c 'set -euo pipefail; \ + if ! kubectl -n $(KAFSCALE_DEMO_NAMESPACE) get svc lfs-proxy >/dev/null 2>&1; then \ + echo "LFS proxy service not found in namespace $(KAFSCALE_DEMO_NAMESPACE). Run: make lfs-demo" >&2; \ + exit 1; \ + fi; \ + if ! kubectl -n $(KAFSCALE_DEMO_NAMESPACE) get deployment lfs-proxy >/dev/null 2>&1; then \ + echo "LFS proxy deployment not found in namespace $(KAFSCALE_DEMO_NAMESPACE). Run: make lfs-demo" >&2; \ + exit 1; \ + fi; \ + kubectl -n $(KAFSCALE_DEMO_NAMESPACE) rollout status deployment/lfs-proxy --timeout=120s; \ + $(MAKE) pf-start; \ + for i in $$(seq 1 30); do \ + if nc -z 127.0.0.1 $(LFS_PROXY_HTTP_LOCAL_PORT) >/dev/null 2>&1; then \ + exit 0; \ + fi; \ + sleep 1; \ + done; \ + echo "LFS proxy HTTP not reachable on 127.0.0.1:$(LFS_PROXY_HTTP_LOCAL_PORT)" >&2; \ + exit 1; \ + ' + +build-java: + @cd java && mvn -DskipTests package + +test-java: + @cd java && mvn test + +build-js: + @cd js && npm run build + +test-js: + @cd js && npm test + +build-python: + @cd python && $(PYTHON) -c "import build" >/dev/null 2>&1 || { echo "python -m pip install build"; exit 1; } + @cd python && $(PYTHON) -m build + +test-python: + @cd python && $(PYTHON) -m pytest + +build-js-browser: + @cd js-browser && npm run build + +test-js-browser: + @cd js-browser && npm test + +build-all: build-java build-js build-python build-js-browser + +test-all: test-java test-js test-python test-js-browser diff --git a/lfs-client-sdk/java/README.md b/lfs-client-sdk/java/README.md new file mode 100644 index 00000000..dc045fce --- /dev/null +++ b/lfs-client-sdk/java/README.md @@ -0,0 +1,53 @@ + + +# KafScale LFS Java SDK + +## Overview +This SDK provides Java helpers for producing LFS blobs via the LFS proxy HTTP endpoint and resolving LFS envelopes from Kafka. + +## Retry/Backoff +- Retries are attempted for transient IO errors and HTTP 5xx responses. +- No retries are performed for HTTP 4xx responses. +- Default retries: 3 attempts total with linear backoff (200ms, 400ms, 600ms). + +## Timeouts +- Connect timeout default: 10 seconds. +- Per-request timeout default: 5 minutes. +- Override via `new LfsProducer(endpoint, connectTimeout, requestTimeout)`. + +## Error Surfacing +- HTTP failures throw `LfsHttpException` with status code, error code, request ID, and response body. +- `X-Request-ID` is generated if missing and returned in proxy responses for correlation. + +## Example +```java +URI endpoint = URI.create("http://localhost:8080/lfs/produce"); +LfsProducer producer = new LfsProducer(endpoint); +LfsEnvelope env = producer.produce("lfs-demo-topic", null, dataStream, Map.of( + "content-type", "application/octet-stream", + "LFS_BLOB", "true" +)); +``` + +## Testing +```bash +mvn test +``` + +## Integration Tests +See `docs/integration-tests.md` for TestContainers setup and image overrides. diff --git a/lfs-client-sdk/java/pom.xml b/lfs-client-sdk/java/pom.xml new file mode 100644 index 00000000..91692919 --- /dev/null +++ b/lfs-client-sdk/java/pom.xml @@ -0,0 +1,105 @@ + + + 4.0.0 + + org.kafscale + lfs-sdk + 0.2.0-SNAPSHOT + KafScale LFS SDK + Client-side LFS helpers for Kafka. + + + 17 + 17 + UTF-8 + 4.1.1 + 2.31.5 + 2.17.2 + 5.11.0 + 1.20.2 + + + + + + org.testcontainers + testcontainers-bom + ${testcontainers.version} + pom + import + + + + + + + org.apache.kafka + kafka-clients + ${kafka.clients.version} + + + software.amazon.awssdk + s3 + ${aws.sdk.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + + org.junit.jupiter + junit-jupiter + ${junit.jupiter.version} + test + + + org.testcontainers + junit-jupiter + test + + + org.testcontainers + kafka + test + + + org.slf4j + slf4j-simple + 1.7.36 + test + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.5.1 + + false + --add-modules jdk.httpserver + + + + + diff --git a/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/AwsS3Reader.java b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/AwsS3Reader.java new file mode 100644 index 00000000..e0f54598 --- /dev/null +++ b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/AwsS3Reader.java @@ -0,0 +1,38 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.kafscale.lfs; + +import software.amazon.awssdk.core.ResponseBytes; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; + +public class AwsS3Reader implements S3Reader { + private final S3Client client; + private final String bucket; + + public AwsS3Reader(S3Client client, String bucket) { + this.client = client; + this.bucket = bucket; + } + + @Override + public byte[] fetch(String key) { + GetObjectRequest req = GetObjectRequest.builder().bucket(bucket).key(key).build(); + ResponseBytes bytes = client.getObjectAsBytes(req); + return bytes.asByteArray(); + } +} diff --git a/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/Checksum.java b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/Checksum.java new file mode 100644 index 00000000..538e183f --- /dev/null +++ b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/Checksum.java @@ -0,0 +1,37 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.kafscale.lfs; + +import java.security.MessageDigest; + +public final class Checksum { + private Checksum() { + } + + public static String sha256(byte[] data) { + try { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + byte[] sum = digest.digest(data); + StringBuilder out = new StringBuilder(sum.length * 2); + for (byte b : sum) { + out.append(String.format("%02x", b)); + } + return out.toString(); + } catch (Exception ex) { + throw new IllegalStateException("sha256 failed", ex); + } + } +} diff --git a/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsCodec.java b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsCodec.java new file mode 100644 index 00000000..eb057391 --- /dev/null +++ b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsCodec.java @@ -0,0 +1,47 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.kafscale.lfs; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.io.IOException; + +public final class LfsCodec { + private static final ObjectMapper MAPPER = new ObjectMapper(); + + private LfsCodec() { + } + + public static boolean isEnvelope(byte[] value) { + if (value == null || value.length < 15) { + return false; + } + if (value[0] != '{') { + return false; + } + int max = Math.min(50, value.length); + String prefix = new String(value, 0, max); + return prefix.contains("\"kfs_lfs\""); + } + + public static LfsEnvelope decode(byte[] value) throws IOException { + LfsEnvelope env = MAPPER.readValue(value, LfsEnvelope.class); + if (env.version == 0 || env.bucket == null || env.key == null || env.sha256 == null) { + throw new IOException("invalid envelope: missing required fields"); + } + return env; + } +} diff --git a/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsConsumer.java b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsConsumer.java new file mode 100644 index 00000000..1e4acc86 --- /dev/null +++ b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsConsumer.java @@ -0,0 +1,43 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.kafscale.lfs; + +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; + +public class LfsConsumer { + private final KafkaConsumer consumer; + private final LfsResolver resolver; + + public LfsConsumer(KafkaConsumer consumer, LfsResolver resolver) { + this.consumer = consumer; + this.resolver = resolver; + } + + public List pollResolved(Duration timeout) throws Exception { + ConsumerRecords records = consumer.poll(timeout); + List out = new ArrayList<>(); + for (ConsumerRecord rec : records) { + out.add(resolver.resolve(rec.value())); + } + return out; + } +} diff --git a/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsEnvelope.java b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsEnvelope.java new file mode 100644 index 00000000..cb9d55ca --- /dev/null +++ b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsEnvelope.java @@ -0,0 +1,40 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.kafscale.lfs; + +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Map; + +public class LfsEnvelope { + @JsonProperty("kfs_lfs") + public int version; + public String bucket; + public String key; + public long size; + public String sha256; + public String checksum; + @JsonProperty("checksum_alg") + public String checksumAlg; + @JsonProperty("content_type") + public String contentType; + @JsonProperty("original_headers") + public Map originalHeaders; + @JsonProperty("created_at") + public String createdAt; + @JsonProperty("proxy_id") + public String proxyId; +} diff --git a/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsHttpException.java b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsHttpException.java new file mode 100644 index 00000000..4d0d314d --- /dev/null +++ b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsHttpException.java @@ -0,0 +1,47 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.kafscale.lfs; + +public class LfsHttpException extends Exception { + private final int statusCode; + private final String errorCode; + private final String requestId; + private final String responseBody; + + public LfsHttpException(int statusCode, String errorCode, String message, String requestId, String responseBody) { + super(message); + this.statusCode = statusCode; + this.errorCode = errorCode; + this.requestId = requestId; + this.responseBody = responseBody; + } + + public int getStatusCode() { + return statusCode; + } + + public String getErrorCode() { + return errorCode; + } + + public String getRequestId() { + return requestId; + } + + public String getResponseBody() { + return responseBody; + } +} diff --git a/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsProducer.java b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsProducer.java new file mode 100644 index 00000000..2a0fdd56 --- /dev/null +++ b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsProducer.java @@ -0,0 +1,149 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.kafscale.lfs; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.io.InputStream; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.time.Duration; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; + +public class LfsProducer { + + private static class ErrorResponse { + public String code; + public String message; + public String request_id; + } + + private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final long MULTIPART_MIN_BYTES = 5L * 1024 * 1024; + private static final String HEADER_REQUEST_ID = "X-Request-ID"; + private static final Duration DEFAULT_CONNECT_TIMEOUT = Duration.ofSeconds(10); + private static final Duration DEFAULT_REQUEST_TIMEOUT = Duration.ofMinutes(5); + private static final int DEFAULT_RETRIES = 3; + private static final long RETRY_BASE_SLEEP_MILLIS = 200L; + + private final HttpClient client; + private final URI endpoint; + private final Duration requestTimeout; + + public LfsProducer(URI endpoint) { + this(endpoint, DEFAULT_CONNECT_TIMEOUT, DEFAULT_REQUEST_TIMEOUT); + } + + public LfsProducer(URI endpoint, Duration connectTimeout, Duration requestTimeout) { + Duration resolvedConnect = connectTimeout == null ? DEFAULT_CONNECT_TIMEOUT : connectTimeout; + Duration resolvedRequest = requestTimeout == null ? DEFAULT_REQUEST_TIMEOUT : requestTimeout; + this.client = HttpClient.newBuilder() + .connectTimeout(resolvedConnect) + .build(); + this.endpoint = endpoint; + this.requestTimeout = resolvedRequest; + } + + public LfsEnvelope produce(String topic, byte[] key, InputStream payload, Map headers) throws Exception { + return produce(topic, key, payload, headers, -1); + } + + public LfsEnvelope produce(String topic, byte[] key, InputStream payload, Map headers, long sizeHint) throws Exception { + // Read InputStream into byte array to ensure proper Content-Length and retry support + byte[] data = payload.readAllBytes(); + + Map outHeaders = new HashMap<>(); + outHeaders.put("X-Kafka-Topic", topic); + if (key != null) { + outHeaders.put("X-Kafka-Key", new String(key)); + } + if (headers != null) { + outHeaders.putAll(headers); + } + if (!outHeaders.containsKey(HEADER_REQUEST_ID)) { + outHeaders.put(HEADER_REQUEST_ID, UUID.randomUUID().toString()); + } + long actualSize = data.length; + outHeaders.put("X-LFS-Size", String.valueOf(actualSize)); + outHeaders.put("X-LFS-Mode", actualSize < MULTIPART_MIN_BYTES ? "single" : "multipart"); + + HttpRequest.Builder req = HttpRequest.newBuilder() + .uri(endpoint) + .timeout(requestTimeout) + .POST(HttpRequest.BodyPublishers.ofByteArray(data)); + + for (Map.Entry entry : outHeaders.entrySet()) { + req.header(entry.getKey(), entry.getValue()); + } + + return sendWithRetry(req.build()); + } + + public LfsEnvelope produce(String topic, byte[] key, byte[] data, Map headers) throws Exception { + return produce(topic, key, new java.io.ByteArrayInputStream(data), headers, data.length); + } + + private LfsEnvelope sendWithRetry(HttpRequest request) throws Exception { + Exception last = null; + for (int attempt = 1; attempt <= DEFAULT_RETRIES; attempt++) { + try { + HttpResponse resp = client.send(request, HttpResponse.BodyHandlers.ofString()); + if (resp.statusCode() < 200 || resp.statusCode() >= 300) { + String body = resp.body(); + String requestId = resp.headers().firstValue(HEADER_REQUEST_ID).orElse(""); + ErrorResponse err = null; + try { + err = MAPPER.readValue(body, ErrorResponse.class); + } catch (Exception ignored) { + } + String code = err != null ? err.code : ""; + String message = err != null && err.message != null ? err.message : body; + String errRequestId = err != null && err.request_id != null ? err.request_id : requestId; + LfsHttpException httpError = new LfsHttpException(resp.statusCode(), code, message, errRequestId, body); + if (resp.statusCode() >= 500 && attempt < DEFAULT_RETRIES) { + last = httpError; + sleepBackoff(attempt); + continue; + } + throw httpError; + } + return MAPPER.readValue(resp.body(), LfsEnvelope.class); + } catch (java.io.IOException ex) { + last = ex; + if (attempt == DEFAULT_RETRIES) { + break; + } + sleepBackoff(attempt); + } + } + if (last != null) { + throw last; + } + throw new IllegalStateException("produce failed: no response"); + } + + private void sleepBackoff(int attempt) { + try { + Thread.sleep(RETRY_BASE_SLEEP_MILLIS * (1L << (attempt - 1))); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } +} diff --git a/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsResolver.java b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsResolver.java new file mode 100644 index 00000000..c355e75d --- /dev/null +++ b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/LfsResolver.java @@ -0,0 +1,62 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.kafscale.lfs; + +public class LfsResolver { + private final S3Reader s3; + private final boolean validateChecksum; + private final long maxSize; + + public LfsResolver(S3Reader s3, boolean validateChecksum, long maxSize) { + this.s3 = s3; + this.validateChecksum = validateChecksum; + this.maxSize = maxSize; + } + + public ResolvedRecord resolve(byte[] value) throws Exception { + if (!LfsCodec.isEnvelope(value)) { + return new ResolvedRecord(null, value, false); + } + if (s3 == null) { + throw new IllegalStateException("s3 reader not configured"); + } + LfsEnvelope env = LfsCodec.decode(value); + byte[] payload = s3.fetch(env.key); + if (maxSize > 0 && payload.length > maxSize) { + throw new IllegalStateException("payload exceeds max size"); + } + if (validateChecksum) { + String expected = env.checksum != null && !env.checksum.isEmpty() ? env.checksum : env.sha256; + String actual = Checksum.sha256(payload); + if (!actual.equals(expected)) { + throw new IllegalStateException("checksum mismatch"); + } + } + return new ResolvedRecord(env, payload, true); + } + + public static final class ResolvedRecord { + public final LfsEnvelope envelope; + public final byte[] payload; + public final boolean isEnvelope; + + public ResolvedRecord(LfsEnvelope envelope, byte[] payload, boolean isEnvelope) { + this.envelope = envelope; + this.payload = payload; + this.isEnvelope = isEnvelope; + } + } +} diff --git a/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/S3Reader.java b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/S3Reader.java new file mode 100644 index 00000000..9737278b --- /dev/null +++ b/lfs-client-sdk/java/src/main/java/org/kafscale/lfs/S3Reader.java @@ -0,0 +1,20 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.kafscale.lfs; + +public interface S3Reader { + byte[] fetch(String key) throws Exception; +} diff --git a/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/DockerAvailabilityTest.java b/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/DockerAvailabilityTest.java new file mode 100644 index 00000000..5762d357 --- /dev/null +++ b/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/DockerAvailabilityTest.java @@ -0,0 +1,126 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.kafscale.lfs; + +import org.junit.jupiter.api.Test; +import org.testcontainers.DockerClientFactory; + +class DockerAvailabilityTest { + @Test + void printsDockerAvailability() { + System.err.println("[tc-diag] DOCKER_HOST=" + System.getenv("DOCKER_HOST")); + System.err.println("[tc-diag] DOCKER_CONTEXT=" + System.getenv("DOCKER_CONTEXT")); + System.err.println("[tc-diag] TESTCONTAINERS_DOCKER_SOCKET_OVERRIDE=" + System.getenv("TESTCONTAINERS_DOCKER_SOCKET_OVERRIDE")); + System.err.println("[tc-diag] testcontainers.docker.socket.override=" + System.getProperty("testcontainers.docker.socket.override")); + try { + System.err.println("[tc-diag] config.dockerHost=" + readConfigValue("docker.host")); + System.err.println("[tc-diag] factoryMethods=" + listFactoryMethods()); + DockerClientFactory factory = DockerClientFactory.instance(); + tryInvokeClient(factory); + tryInvokeStrategy(factory); + try { + System.err.println("[tc-diag] dockerAvailable=" + factory.isDockerAvailable()); + } catch (Exception e) { + System.err.println("[tc-diag] dockerAvailable failed: " + e.getClass().getName() + ": " + e.getMessage()); + } + try { + System.err.println("[tc-diag] dockerHostIp=" + factory.dockerHostIpAddress()); + } catch (Exception e) { + System.err.println("[tc-diag] dockerHostIp failed: " + e.getClass().getName() + ": " + e.getMessage()); + } + } catch (Exception e) { + System.err.println("[tc-diag] docker check failed: " + e.getClass().getName() + ": " + e.getMessage()); + } + } + + private static String readConfigValue(String key) { + String value = readClasspathConfig(key); + if (value != null) { + return value; + } + return readFileConfig(System.getProperty("user.home") + "/.testcontainers.properties", key); + } + + private static String readClasspathConfig(String key) { + try (var stream = DockerAvailabilityTest.class.getClassLoader().getResourceAsStream(".testcontainers.properties")) { + if (stream == null) { + return null; + } + java.util.Properties props = new java.util.Properties(); + props.load(stream); + return props.getProperty(key); + } catch (Exception ignored) { + return null; + } + } + + private static String readFileConfig(String path, String key) { + java.io.File file = new java.io.File(path); + if (!file.exists()) { + return null; + } + try (var stream = new java.io.FileInputStream(file)) { + java.util.Properties props = new java.util.Properties(); + props.load(stream); + return props.getProperty(key); + } catch (Exception ignored) { + return null; + } + } + + private static String listFactoryMethods() { + java.lang.reflect.Method[] methods = DockerClientFactory.class.getDeclaredMethods(); + java.util.List names = new java.util.ArrayList<>(); + for (java.lang.reflect.Method method : methods) { + if (method.getParameterCount() == 0) { + names.add(method.getName()); + } + } + java.util.Collections.sort(names); + return String.join(",", names); + } + + private static void tryInvokeClient(DockerClientFactory factory) { + tryInvoke(factory, "client"); + tryInvoke(factory, "getClient"); + } + + private static void tryInvokeStrategy(DockerClientFactory factory) { + try { + java.lang.reflect.Method method = DockerClientFactory.class.getDeclaredMethod("getOrInitializeStrategy"); + method.setAccessible(true); + Object strategy = method.invoke(factory); + System.err.println("[tc-diag] strategy=" + (strategy == null ? "null" : strategy.getClass().getName())); + } catch (Exception e) { + Throwable cause = e.getCause() == null ? e : e.getCause(); + System.err.println("[tc-diag] strategy failed: " + cause.getClass().getName() + ": " + cause.getMessage()); + } + } + + private static void tryInvoke(DockerClientFactory factory, String methodName) { + try { + java.lang.reflect.Method method = DockerClientFactory.class.getDeclaredMethod(methodName); + method.setAccessible(true); + Object client = method.invoke(factory); + System.err.println("[tc-diag] " + methodName + " ok: " + (client == null ? "null" : client.getClass().getName())); + } catch (NoSuchMethodException ignored) { + System.err.println("[tc-diag] " + methodName + " not found"); + } catch (Exception e) { + Throwable cause = e.getCause() == null ? e : e.getCause(); + System.err.println("[tc-diag] " + methodName + " failed: " + cause.getClass().getName() + ": " + cause.getMessage()); + } + } +} diff --git a/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsCodecTest.java b/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsCodecTest.java new file mode 100644 index 00000000..239affab --- /dev/null +++ b/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsCodecTest.java @@ -0,0 +1,29 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.kafscale.lfs; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +class LfsCodecTest { + @Test + void detectsEnvelope() { + byte[] data = "{\"kfs_lfs\":1,\"bucket\":\"b\"}".getBytes(); + assertTrue(LfsCodec.isEnvelope(data)); + assertFalse(LfsCodec.isEnvelope("plain".getBytes())); + } +} diff --git a/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsProducerIntegrationTest.java b/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsProducerIntegrationTest.java new file mode 100644 index 00000000..5de3174b --- /dev/null +++ b/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsProducerIntegrationTest.java @@ -0,0 +1,239 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.kafscale.lfs; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestMethodOrder; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.KafkaContainer; +import org.testcontainers.containers.Network; +import org.testcontainers.DockerClientFactory; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; +import org.testcontainers.utility.DockerImageName; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.model.CreateBucketRequest; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; + +import java.net.URI; +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.util.Arrays; +import java.util.Collections; +import java.util.Properties; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertThrows; + +@Testcontainers(disabledWithoutDocker = true) +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +class LfsProducerIntegrationTest { + private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final String BUCKET = "kafscale"; + private static final String TOPIC = "lfs-demo-topic"; + private static final String MINIO_USER = "minioadmin"; + private static final String MINIO_PASS = "minioadmin"; + + private static final DockerImageName KAFKA_IMAGE = DockerImageName.parse( + System.getenv().getOrDefault("KAFSCALE_KAFKA_IMAGE", "confluentinc/cp-kafka:7.6.1")); + private static final DockerImageName MINIO_IMAGE = DockerImageName.parse( + System.getenv().getOrDefault("KAFSCALE_MINIO_IMAGE", "quay.io/minio/minio:RELEASE.2024-09-22T00-33-43Z")); + private static final DockerImageName LFS_PROXY_IMAGE = DockerImageName.parse( + System.getenv().getOrDefault("KAFSCALE_LFS_PROXY_IMAGE", "ghcr.io/kafscale/kafscale-lfs-proxy:dev")); + + private static final Network NETWORK = Network.newNetwork(); + + @Container + private static final KafkaContainer KAFKA = new KafkaContainer(KAFKA_IMAGE) + .withNetwork(NETWORK) + .withNetworkAliases("kafka"); + + @Container + private static final GenericContainer MINIO = new GenericContainer<>(MINIO_IMAGE) + .withNetwork(NETWORK) + .withNetworkAliases("minio") + .withEnv("MINIO_ROOT_USER", MINIO_USER) + .withEnv("MINIO_ROOT_PASSWORD", MINIO_PASS) + .withCommand("server", "/data", "--console-address", ":9001") + .withExposedPorts(9000); + + @Container + private static final GenericContainer LFS_PROXY = new GenericContainer<>(LFS_PROXY_IMAGE) + .withNetwork(NETWORK) + .withNetworkAliases("lfs-proxy") + .withEnv("KAFSCALE_LFS_PROXY_ADDR", ":9092") + .withEnv("KAFSCALE_LFS_PROXY_ADVERTISED_HOST", "lfs-proxy") + .withEnv("KAFSCALE_LFS_PROXY_ADVERTISED_PORT", "9092") + .withEnv("KAFSCALE_LFS_PROXY_HTTP_ADDR", ":8080") + .withEnv("KAFSCALE_LFS_PROXY_HEALTH_ADDR", ":9094") + .withEnv("KAFSCALE_LFS_PROXY_BACKENDS", "kafka:9092") + .withEnv("KAFSCALE_LFS_PROXY_S3_BUCKET", BUCKET) + .withEnv("KAFSCALE_LFS_PROXY_S3_REGION", "us-east-1") + .withEnv("KAFSCALE_LFS_PROXY_S3_ENDPOINT", "http://minio:9000") + .withEnv("KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE", "true") + .withEnv("KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET", "true") + .withEnv("KAFSCALE_LFS_PROXY_S3_ACCESS_KEY", MINIO_USER) + .withEnv("KAFSCALE_LFS_PROXY_S3_SECRET_KEY", MINIO_PASS) + .withExposedPorts(8080, 9094); + + static { + if (isDiagnosticsEnabled()) { + printDiagnostics(); + } + } + + @Test + @Order(1) + void producesEnvelopeAndResolvesPayload() throws Exception { + waitForReady(); + ensureBucket(); + + byte[] payload = new byte[256 * 1024]; + for (int i = 0; i < payload.length; i++) { + payload[i] = (byte) (i % 251); + } + + LfsProducer producer = new LfsProducer(httpEndpoint()); + LfsEnvelope env = producer.produce(TOPIC, null, new java.io.ByteArrayInputStream(payload), + Collections.singletonMap("content-type", "application/octet-stream")); + + assertNotNull(env); + assertNotNull(env.key); + assertEquals(BUCKET, env.bucket); + + LfsEnvelope consumed = consumeEnvelope(); + assertEquals(env.key, consumed.key); + + byte[] stored = fetchObject(consumed.key); + assertTrue(stored.length > 0); + assertEquals(payload.length, stored.length); + assertTrue(Arrays.equals(payload, stored)); + } + + @Test + @Order(2) + void returns5xxWhenBackendUnavailable() throws Exception { + waitForReady(); + KAFKA.stop(); + + LfsProducer producer = new LfsProducer(httpEndpoint(), Duration.ofSeconds(2), Duration.ofSeconds(5)); + LfsHttpException ex = assertThrows(LfsHttpException.class, + () -> producer.produce(TOPIC, null, new java.io.ByteArrayInputStream("payload".getBytes(StandardCharsets.UTF_8)), + Collections.singletonMap("content-type", "application/octet-stream"))); + + assertTrue(ex.getStatusCode() == 502 || ex.getStatusCode() == 503); + assertTrue(ex.getErrorCode().equals("backend_unavailable") || ex.getErrorCode().equals("backend_error")); + } + + private static URI httpEndpoint() { + return URI.create("http://" + LFS_PROXY.getHost() + ":" + LFS_PROXY.getMappedPort(8080) + "/lfs/produce"); + } + + private static void waitForReady() throws InterruptedException { + URI ready = URI.create("http://" + LFS_PROXY.getHost() + ":" + LFS_PROXY.getMappedPort(9094) + "/readyz"); + for (int i = 0; i < 30; i++) { + try { + java.net.http.HttpResponse resp = java.net.http.HttpClient.newHttpClient().send( + java.net.http.HttpRequest.newBuilder().uri(ready).timeout(Duration.ofSeconds(2)).GET().build(), + java.net.http.HttpResponse.BodyHandlers.ofString()); + if (resp.statusCode() == 200) { + return; + } + } catch (Exception ignored) { + } + Thread.sleep(1000); + } + throw new IllegalStateException("lfs-proxy not ready"); + } + + private static void ensureBucket() { + try (S3Client s3 = S3Client.builder() + .endpointOverride(URI.create("http://" + MINIO.getHost() + ":" + MINIO.getMappedPort(9000))) + .region(Region.US_EAST_1) + .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create(MINIO_USER, MINIO_PASS))) + .serviceConfiguration(S3Configuration.builder().pathStyleAccessEnabled(true).build()) + .build()) { + s3.createBucket(CreateBucketRequest.builder().bucket(BUCKET).build()); + } catch (Exception ignored) { + } + } + + private static LfsEnvelope consumeEnvelope() throws Exception { + Properties props = new Properties(); + props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA.getBootstrapServers()); + props.put(ConsumerConfig.GROUP_ID_CONFIG, "lfs-sdk-it"); + props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName()); + props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName()); + try (KafkaConsumer consumer = new KafkaConsumer<>(props)) { + consumer.subscribe(Collections.singletonList(TOPIC)); + long deadline = System.currentTimeMillis() + 10000; + while (System.currentTimeMillis() < deadline) { + ConsumerRecords records = consumer.poll(Duration.ofMillis(500)); + if (!records.isEmpty()) { + byte[] payload = records.iterator().next().value(); + return MAPPER.readValue(payload, LfsEnvelope.class); + } + } + } + throw new IllegalStateException("no records consumed"); + } + + private static byte[] fetchObject(String key) throws Exception { + try (S3Client s3 = S3Client.builder() + .endpointOverride(URI.create("http://" + MINIO.getHost() + ":" + MINIO.getMappedPort(9000))) + .region(Region.US_EAST_1) + .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create(MINIO_USER, MINIO_PASS))) + .serviceConfiguration(S3Configuration.builder().pathStyleAccessEnabled(true).build()) + .build()) { + ResponseInputStream stream = s3.getObject(GetObjectRequest.builder().bucket(BUCKET).key(key).build()); + return stream.readAllBytes(); + } + } + + private static boolean isDiagnosticsEnabled() { + String value = System.getenv("KAFSCALE_TC_DIAG"); + return value != null && (value.equalsIgnoreCase("1") || value.equalsIgnoreCase("true")); + } + + private static void printDiagnostics() { + System.err.println("[tc-diag] DOCKER_HOST=" + System.getenv("DOCKER_HOST")); + System.err.println("[tc-diag] DOCKER_CONTEXT=" + System.getenv("DOCKER_CONTEXT")); + System.err.println("[tc-diag] TESTCONTAINERS_DOCKER_SOCKET_OVERRIDE=" + System.getenv("TESTCONTAINERS_DOCKER_SOCKET_OVERRIDE")); + System.err.println("[tc-diag] testcontainers.docker.socket.override=" + System.getProperty("testcontainers.docker.socket.override")); + try { + DockerClientFactory factory = DockerClientFactory.instance(); + System.err.println("[tc-diag] dockerAvailable=" + factory.isDockerAvailable()); + System.err.println("[tc-diag] dockerHostIp=" + factory.dockerHostIpAddress()); + } catch (Exception e) { + System.err.println("[tc-diag] docker check failed: " + e.getClass().getName() + ": " + e.getMessage()); + } + } +} diff --git a/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsProducerTest.java b/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsProducerTest.java new file mode 100644 index 00000000..da2d23f8 --- /dev/null +++ b/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsProducerTest.java @@ -0,0 +1,191 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.kafscale.lfs; + +import com.sun.net.httpserver.HttpExchange; +import com.sun.net.httpserver.HttpHandler; +import com.sun.net.httpserver.HttpServer; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.net.InetSocketAddress; +import java.net.URI; +import java.time.Duration; +import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class LfsProducerTest { + @Test + void producesEnvelopeFromHttpResponse() throws Exception { + HttpServer server = HttpServer.create(new InetSocketAddress(0), 0); + server.createContext("/lfs/produce", new OkHandler()); + server.start(); + try { + URI endpoint = URI.create("http://localhost:" + server.getAddress().getPort() + "/lfs/produce"); + LfsProducer producer = new LfsProducer(endpoint); + + byte[] payload = "hello".getBytes(StandardCharsets.UTF_8); + LfsEnvelope env = producer.produce("demo-topic", null, new ByteArrayInputStream(payload), Map.of()); + + assertEquals("demo-bucket", env.bucket); + assertEquals("obj-1", env.key); + } finally { + server.stop(0); + } + } + + @Test + void failsOnNon2xx() throws Exception { + HttpServer server = HttpServer.create(new InetSocketAddress(0), 0); + server.createContext("/lfs/produce", new ErrorHandler()); + server.start(); + try { + URI endpoint = URI.create("http://localhost:" + server.getAddress().getPort() + "/lfs/produce"); + LfsProducer producer = new LfsProducer(endpoint); + + assertThrows(LfsHttpException.class, + () -> producer.produce("demo-topic", null, new ByteArrayInputStream(new byte[0]), Map.of())); + } finally { + server.stop(0); + } + } + + + @Test + void retriesOnServerError() throws Exception { + AtomicInteger attempts = new AtomicInteger(); + HttpServer server = HttpServer.create(new InetSocketAddress(0), 0); + server.createContext("/lfs/produce", exchange -> { + int n = attempts.incrementAndGet(); + if (n < 3) { + byte[] body = "boom".getBytes(StandardCharsets.UTF_8); + exchange.sendResponseHeaders(500, body.length); + try (OutputStream os = exchange.getResponseBody()) { + os.write(body); + } + return; + } + byte[] body = "{\"kfs_lfs\":1,\"bucket\":\"demo-bucket\",\"key\":\"obj-1\",\"sha256\":\"abc\"}".getBytes(StandardCharsets.UTF_8); + exchange.sendResponseHeaders(200, body.length); + try (OutputStream os = exchange.getResponseBody()) { + os.write(body); + } + }); + server.start(); + try { + URI endpoint = URI.create("http://localhost:" + server.getAddress().getPort() + "/lfs/produce"); + LfsProducer producer = new LfsProducer(endpoint); + + LfsEnvelope env = producer.produce("demo-topic", null, new ByteArrayInputStream(new byte[0]), Map.of()); + + assertEquals("demo-bucket", env.bucket); + assertEquals(3, attempts.get()); + } finally { + server.stop(0); + } + } + + @Test + void doesNotRetryOnClientError() throws Exception { + AtomicInteger attempts = new AtomicInteger(); + HttpServer server = HttpServer.create(new InetSocketAddress(0), 0); + server.createContext("/lfs/produce", exchange -> { + attempts.incrementAndGet(); + byte[] body = "bad".getBytes(StandardCharsets.UTF_8); + exchange.sendResponseHeaders(400, body.length); + try (OutputStream os = exchange.getResponseBody()) { + os.write(body); + } + }); + server.start(); + try { + URI endpoint = URI.create("http://localhost:" + server.getAddress().getPort() + "/lfs/produce"); + LfsProducer producer = new LfsProducer(endpoint); + + assertThrows(LfsHttpException.class, + () -> producer.produce("demo-topic", null, new ByteArrayInputStream(new byte[0]), Map.of())); + assertEquals(1, attempts.get()); + } finally { + server.stop(0); + } + } + + private static final class OkHandler implements HttpHandler { + @Override + public void handle(HttpExchange exchange) throws IOException { + if (!"demo-topic".equals(exchange.getRequestHeaders().getFirst("X-Kafka-Topic"))) { + exchange.sendResponseHeaders(400, 0); + exchange.close(); + return; + } + byte[] body = "{\"kfs_lfs\":1,\"bucket\":\"demo-bucket\",\"key\":\"obj-1\",\"sha256\":\"abc\"}".getBytes(StandardCharsets.UTF_8); + exchange.sendResponseHeaders(200, body.length); + try (OutputStream os = exchange.getResponseBody()) { + os.write(body); + } + } + } + + + @Test + void honorsRequestTimeout() throws Exception { + HttpServer server = HttpServer.create(new InetSocketAddress(0), 0); + server.createContext("/lfs/produce", new SlowHandler()); + server.start(); + try { + URI endpoint = URI.create("http://localhost:" + server.getAddress().getPort() + "/lfs/produce"); + LfsProducer producer = new LfsProducer(endpoint, Duration.ofSeconds(1), Duration.ofMillis(50)); + + assertThrows(java.net.http.HttpTimeoutException.class, + () -> producer.produce("demo-topic", null, new ByteArrayInputStream(new byte[0]), Map.of())); + } finally { + server.stop(0); + } + } + + private static final class ErrorHandler implements HttpHandler { + @Override + public void handle(HttpExchange exchange) throws IOException { + byte[] body = "boom".getBytes(StandardCharsets.UTF_8); + exchange.sendResponseHeaders(500, body.length); + try (OutputStream os = exchange.getResponseBody()) { + os.write(body); + } + } + } + + private static final class SlowHandler implements HttpHandler { + @Override + public void handle(HttpExchange exchange) throws IOException { + try { + Thread.sleep(5000); // Sleep longer than the request timeout + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + byte[] body = "{\"kfs_lfs\":1,\"bucket\":\"demo-bucket\",\"key\":\"obj-1\",\"sha256\":\"abc\"}".getBytes(StandardCharsets.UTF_8); + exchange.sendResponseHeaders(200, body.length); + try (OutputStream os = exchange.getResponseBody()) { + os.write(body); + } + } + } +} diff --git a/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsResolverTest.java b/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsResolverTest.java new file mode 100644 index 00000000..b5a83ddb --- /dev/null +++ b/lfs-client-sdk/java/src/test/java/org/kafscale/lfs/LfsResolverTest.java @@ -0,0 +1,75 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.kafscale.lfs; + +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class LfsResolverTest { + @Test + void resolvesNonEnvelopeAsPlainPayload() throws Exception { + byte[] payload = "plain".getBytes(StandardCharsets.UTF_8); + LfsResolver resolver = new LfsResolver(new StaticS3Reader(payload), true, 0); + + LfsResolver.ResolvedRecord record = resolver.resolve(payload); + + assertFalse(record.isEnvelope); + assertArrayEquals(payload, record.payload); + } + + @Test + void resolvesEnvelopeFromS3Reader() throws Exception { + byte[] payload = "hello-lfs".getBytes(StandardCharsets.UTF_8); + String checksum = Checksum.sha256(payload); + String envelope = "{\"kfs_lfs\":1,\"bucket\":\"b\",\"key\":\"k\",\"sha256\":\"" + checksum + "\"}"; + + LfsResolver resolver = new LfsResolver(new StaticS3Reader(payload), true, 0); + LfsResolver.ResolvedRecord record = resolver.resolve(envelope.getBytes(StandardCharsets.UTF_8)); + + assertTrue(record.isEnvelope); + assertArrayEquals(payload, record.payload); + } + + @Test + void rejectsChecksumMismatch() { + byte[] payload = "bad".getBytes(StandardCharsets.UTF_8); + String envelope = "{\"kfs_lfs\":1,\"bucket\":\"b\",\"key\":\"k\",\"sha256\":\"deadbeef\"}"; + + LfsResolver resolver = new LfsResolver(new StaticS3Reader(payload), true, 0); + + assertThrows(IllegalStateException.class, + () -> resolver.resolve(envelope.getBytes(StandardCharsets.UTF_8))); + } + + private static final class StaticS3Reader implements S3Reader { + private final byte[] payload; + + private StaticS3Reader(byte[] payload) { + this.payload = payload; + } + + @Override + public byte[] fetch(String key) { + return payload; + } + } +} diff --git a/lfs-client-sdk/js-browser/package.json b/lfs-client-sdk/js-browser/package.json new file mode 100644 index 00000000..f9be4ba3 --- /dev/null +++ b/lfs-client-sdk/js-browser/package.json @@ -0,0 +1,40 @@ +{ + "name": "@kafscale/lfs-browser-sdk", + "version": "0.1.0", + "description": "Browser-native LFS SDK using fetch API (no librdkafka)", + "type": "module", + "main": "dist/index.js", + "module": "dist/index.esm.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.esm.js", + "require": "./dist/index.js", + "types": "./dist/index.d.ts" + } + }, + "scripts": { + "build": "tsc && esbuild src/index.ts --bundle --format=esm --outfile=dist/index.esm.js --sourcemap", + "build:umd": "esbuild src/index.ts --bundle --format=iife --global-name=KafscaleLfs --outfile=dist/index.umd.js --sourcemap", + "dev": "tsc --watch", + "test": "vitest run" + }, + "devDependencies": { + "typescript": "^5.6.0", + "esbuild": "^0.24.0", + "vitest": "^2.0.0" + }, + "files": [ + "dist", + "src" + ], + "keywords": [ + "kafka", + "lfs", + "large-file", + "s3", + "browser", + "streaming" + ], + "license": "Apache-2.0" +} diff --git a/lfs-client-sdk/js-browser/src/envelope.ts b/lfs-client-sdk/js-browser/src/envelope.ts new file mode 100644 index 00000000..8ac21212 --- /dev/null +++ b/lfs-client-sdk/js-browser/src/envelope.ts @@ -0,0 +1,54 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +export interface LfsEnvelope { + kfs_lfs: number; + bucket: string; + key: string; + size: number; + sha256: string; + checksum?: string; + checksum_alg?: string; + content_type?: string; + original_headers?: Record; + created_at?: string; + proxy_id?: string; +} + +/** + * Check if data looks like an LFS envelope. + */ +export function isLfsEnvelope(data: unknown): data is LfsEnvelope { + if (typeof data !== 'object' || data === null) return false; + const obj = data as Record; + return ( + typeof obj.kfs_lfs === 'number' && + typeof obj.bucket === 'string' && + typeof obj.key === 'string' && + typeof obj.sha256 === 'string' + ); +} + +/** + * Decode LFS envelope from JSON string or Uint8Array. + */ +export function decodeLfsEnvelope(data: string | Uint8Array): LfsEnvelope { + const text = typeof data === 'string' ? data : new TextDecoder().decode(data); + const parsed = JSON.parse(text); + if (!isLfsEnvelope(parsed)) { + throw new Error('Invalid LFS envelope: missing required fields'); + } + return parsed; +} diff --git a/lfs-client-sdk/js-browser/src/index.ts b/lfs-client-sdk/js-browser/src/index.ts new file mode 100644 index 00000000..c478180b --- /dev/null +++ b/lfs-client-sdk/js-browser/src/index.ts @@ -0,0 +1,29 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +export { LfsEnvelope, isLfsEnvelope, decodeLfsEnvelope } from './envelope.js'; +export { + LfsProducer, + LfsHttpError, + produceLfs, + type LfsProducerConfig, + type ProduceOptions, + type UploadProgress, +} from './producer.js'; +export { + LfsResolver, + type ResolverConfig, + type ResolvedRecord, +} from './resolver.js'; diff --git a/lfs-client-sdk/js-browser/src/producer.ts b/lfs-client-sdk/js-browser/src/producer.ts new file mode 100644 index 00000000..5ea7dc0c --- /dev/null +++ b/lfs-client-sdk/js-browser/src/producer.ts @@ -0,0 +1,238 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { LfsEnvelope } from './envelope.js'; + +export interface UploadProgress { + loaded: number; + total: number; + percent: number; +} + +export interface ProduceOptions { + key?: string; + headers?: Record; + onProgress?: (progress: UploadProgress) => void; + signal?: AbortSignal; +} + +export interface LfsProducerConfig { + endpoint: string; + timeout?: number; + retries?: number; + retryDelay?: number; +} + +export class LfsHttpError extends Error { + constructor( + public readonly statusCode: number, + public readonly code: string, + message: string, + public readonly requestId: string, + public readonly body: string + ) { + super(message); + this.name = 'LfsHttpError'; + } +} + +const DEFAULT_TIMEOUT = 300000; // 5 minutes +const DEFAULT_RETRIES = 3; +const DEFAULT_RETRY_DELAY = 200; + +/** + * Browser-native LFS producer using fetch API. + */ +export class LfsProducer { + private readonly endpoint: string; + private readonly timeout: number; + private readonly retries: number; + private readonly retryDelay: number; + + constructor(config: LfsProducerConfig) { + this.endpoint = config.endpoint; + this.timeout = config.timeout ?? DEFAULT_TIMEOUT; + this.retries = config.retries ?? DEFAULT_RETRIES; + this.retryDelay = config.retryDelay ?? DEFAULT_RETRY_DELAY; + } + + /** + * Upload a blob to the LFS proxy. + */ + async produce( + topic: string, + payload: Blob | ArrayBuffer | File, + options?: ProduceOptions + ): Promise { + const headers: Record = { + 'X-Kafka-Topic': topic, + 'X-Request-ID': crypto.randomUUID(), + }; + + if (options?.key) { + headers['X-Kafka-Key'] = options.key; + } + + if (options?.headers) { + Object.assign(headers, options.headers); + } + + // Get payload as Blob for size info + const blob = payload instanceof Blob ? payload : new Blob([payload]); + headers['X-LFS-Size'] = String(blob.size); + headers['X-LFS-Mode'] = blob.size < 5 * 1024 * 1024 ? 'single' : 'multipart'; + + // If Content-Type not set, use blob type + if (!headers['Content-Type'] && blob.type) { + headers['Content-Type'] = blob.type; + } + + return this.sendWithRetry(blob, headers, options); + } + + private async sendWithRetry( + blob: Blob, + headers: Record, + options?: ProduceOptions + ): Promise { + let lastError: Error | null = null; + + for (let attempt = 1; attempt <= this.retries; attempt++) { + try { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), this.timeout); + + // Combine with external signal if provided + if (options?.signal) { + options.signal.addEventListener('abort', () => controller.abort()); + } + + try { + // Use XMLHttpRequest for progress tracking (fetch doesn't support upload progress) + const envelope = await this.uploadWithProgress( + blob, + headers, + controller.signal, + options?.onProgress + ); + return envelope; + } finally { + clearTimeout(timeoutId); + } + } catch (error) { + lastError = error as Error; + + // Don't retry on abort + if (error instanceof DOMException && error.name === 'AbortError') { + throw error; + } + + // Don't retry on 4xx errors + if (error instanceof LfsHttpError && error.statusCode < 500) { + throw error; + } + + // Retry on 5xx or network errors + if (attempt < this.retries) { + await this.sleep(this.retryDelay * Math.pow(2, attempt - 1)); + } + } + } + + throw lastError ?? new Error('Upload failed: no response'); + } + + private uploadWithProgress( + blob: Blob, + headers: Record, + signal: AbortSignal, + onProgress?: (progress: UploadProgress) => void + ): Promise { + return new Promise((resolve, reject) => { + const xhr = new XMLHttpRequest(); + + xhr.open('POST', this.endpoint, true); + + // Set headers + for (const [key, value] of Object.entries(headers)) { + xhr.setRequestHeader(key, value); + } + + // Progress handler + if (onProgress) { + xhr.upload.onprogress = (event) => { + if (event.lengthComputable) { + onProgress({ + loaded: event.loaded, + total: event.total, + percent: Math.round((event.loaded / event.total) * 100), + }); + } + }; + } + + // Abort handler + signal.addEventListener('abort', () => xhr.abort()); + + xhr.onload = () => { + if (xhr.status >= 200 && xhr.status < 300) { + try { + const envelope = JSON.parse(xhr.responseText) as LfsEnvelope; + resolve(envelope); + } catch { + reject(new Error('Invalid JSON response')); + } + } else { + let code = ''; + let message = xhr.responseText; + let requestId = headers['X-Request-ID']; + + try { + const err = JSON.parse(xhr.responseText); + code = err.code ?? ''; + message = err.message ?? xhr.responseText; + requestId = err.request_id ?? requestId; + } catch { + // Use raw response + } + + reject(new LfsHttpError(xhr.status, code, message, requestId, xhr.responseText)); + } + }; + + xhr.onerror = () => reject(new Error('Network error')); + xhr.onabort = () => reject(new DOMException('Upload aborted', 'AbortError')); + + xhr.send(blob); + }); + } + + private sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); + } +} + +/** + * Convenience function for one-shot uploads. + */ +export async function produceLfs( + endpoint: string, + topic: string, + payload: Blob | ArrayBuffer | File, + options?: ProduceOptions +): Promise { + const producer = new LfsProducer({ endpoint }); + return producer.produce(topic, payload, options); +} diff --git a/lfs-client-sdk/js-browser/src/resolver.ts b/lfs-client-sdk/js-browser/src/resolver.ts new file mode 100644 index 00000000..3ba27a8b --- /dev/null +++ b/lfs-client-sdk/js-browser/src/resolver.ts @@ -0,0 +1,114 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { LfsEnvelope, isLfsEnvelope, decodeLfsEnvelope } from './envelope.js'; + +export interface ResolvedRecord { + envelope?: LfsEnvelope; + payload: Uint8Array; + isEnvelope: boolean; +} + +export interface ResolverConfig { + /** + * Function to get a URL for fetching the blob. + * For pre-signed URLs: return the signed S3 URL. + * For direct access: return the S3 endpoint URL. + */ + getBlobUrl: (key: string, bucket: string) => string | Promise; + + /** + * Validate SHA-256 checksum after download. + */ + validateChecksum?: boolean; + + /** + * Maximum allowed payload size (0 = unlimited). + */ + maxSize?: number; +} + +/** + * Browser-native LFS resolver using fetch API. + */ +export class LfsResolver { + private readonly getBlobUrl: ResolverConfig['getBlobUrl']; + private readonly validateChecksum: boolean; + private readonly maxSize: number; + + constructor(config: ResolverConfig) { + this.getBlobUrl = config.getBlobUrl; + this.validateChecksum = config.validateChecksum ?? true; + this.maxSize = config.maxSize ?? 0; + } + + /** + * Resolve an LFS envelope to its blob content. + * If the value is not an envelope, returns it unchanged. + */ + async resolve(value: string | Uint8Array | LfsEnvelope): Promise { + // Try to parse as envelope + let envelope: LfsEnvelope; + + if (isLfsEnvelope(value)) { + envelope = value; + } else { + try { + envelope = decodeLfsEnvelope(value as string | Uint8Array); + } catch { + // Not an envelope, return as-is + const payload = + typeof value === 'string' ? new TextEncoder().encode(value) : value as Uint8Array; + return { payload, isEnvelope: false }; + } + } + + // Fetch blob from URL + const url = await this.getBlobUrl(envelope.key, envelope.bucket); + const response = await fetch(url); + + if (!response.ok) { + throw new Error(`Failed to fetch blob: ${response.status} ${response.statusText}`); + } + + const buffer = await response.arrayBuffer(); + const payload = new Uint8Array(buffer); + + if (this.maxSize > 0 && payload.length > this.maxSize) { + throw new Error(`Payload exceeds max size: ${payload.length} > ${this.maxSize}`); + } + + if (this.validateChecksum) { + const expected = envelope.checksum || envelope.sha256; + const actual = await sha256Hex(payload); + if (actual !== expected) { + throw new Error(`Checksum mismatch: expected ${expected}, got ${actual}`); + } + } + + return { envelope, payload, isEnvelope: true }; + } +} + +/** + * Compute SHA-256 hash using Web Crypto API. + */ +async function sha256Hex(data: Uint8Array): Promise { + const hashBuffer = await crypto.subtle.digest('SHA-256', data); + const hashArray = new Uint8Array(hashBuffer); + return Array.from(hashArray) + .map((b) => b.toString(16).padStart(2, '0')) + .join(''); +} diff --git a/lfs-client-sdk/js-browser/tsconfig.json b/lfs-client-sdk/js-browser/tsconfig.json new file mode 100644 index 00000000..a6012bfa --- /dev/null +++ b/lfs-client-sdk/js-browser/tsconfig.json @@ -0,0 +1,19 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "ESNext", + "moduleResolution": "bundler", + "lib": ["ES2020", "DOM"], + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} diff --git a/lfs-client-sdk/js/package-lock.json b/lfs-client-sdk/js/package-lock.json new file mode 100644 index 00000000..b3bdd40f --- /dev/null +++ b/lfs-client-sdk/js/package-lock.json @@ -0,0 +1,4170 @@ +{ + "name": "@kafscale/lfs-sdk", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "@kafscale/lfs-sdk", + "version": "0.1.0", + "dependencies": { + "@aws-sdk/client-s3": "^3.658.1", + "@confluentinc/kafka-javascript": "^0.4.0", + "undici": "^6.21.0" + }, + "devDependencies": { + "typescript": "^5.6.3" + } + }, + "node_modules/@aws-crypto/crc32": { + "version": "5.2.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/util": "^5.2.0", + "@aws-sdk/types": "^3.222.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-crypto/crc32c": { + "version": "5.2.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/util": "^5.2.0", + "@aws-sdk/types": "^3.222.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-crypto/sha1-browser": { + "version": "5.2.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/supports-web-crypto": "^5.2.0", + "@aws-crypto/util": "^5.2.0", + "@aws-sdk/types": "^3.222.0", + "@aws-sdk/util-locate-window": "^3.0.0", + "@smithy/util-utf8": "^2.0.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-crypto/sha1-browser/node_modules/@smithy/is-array-buffer": { + "version": "2.2.0", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/sha1-browser/node_modules/@smithy/util-buffer-from": { + "version": "2.2.0", + "license": "Apache-2.0", + "dependencies": { + "@smithy/is-array-buffer": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/sha1-browser/node_modules/@smithy/util-utf8": { + "version": "2.3.0", + "license": "Apache-2.0", + "dependencies": { + "@smithy/util-buffer-from": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/sha256-browser": { + "version": "5.2.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/sha256-js": "^5.2.0", + "@aws-crypto/supports-web-crypto": "^5.2.0", + "@aws-crypto/util": "^5.2.0", + "@aws-sdk/types": "^3.222.0", + "@aws-sdk/util-locate-window": "^3.0.0", + "@smithy/util-utf8": "^2.0.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-crypto/sha256-browser/node_modules/@smithy/is-array-buffer": { + "version": "2.2.0", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/sha256-browser/node_modules/@smithy/util-buffer-from": { + "version": "2.2.0", + "license": "Apache-2.0", + "dependencies": { + "@smithy/is-array-buffer": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/sha256-browser/node_modules/@smithy/util-utf8": { + "version": "2.3.0", + "license": "Apache-2.0", + "dependencies": { + "@smithy/util-buffer-from": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/sha256-js": { + "version": "5.2.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/util": "^5.2.0", + "@aws-sdk/types": "^3.222.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-crypto/supports-web-crypto": { + "version": "5.2.0", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-crypto/util": { + "version": "5.2.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.222.0", + "@smithy/util-utf8": "^2.0.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-crypto/util/node_modules/@smithy/is-array-buffer": { + "version": "2.2.0", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/util/node_modules/@smithy/util-buffer-from": { + "version": "2.2.0", + "license": "Apache-2.0", + "dependencies": { + "@smithy/is-array-buffer": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/util/node_modules/@smithy/util-utf8": { + "version": "2.3.0", + "license": "Apache-2.0", + "dependencies": { + "@smithy/util-buffer-from": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/client-kms": { + "version": "3.981.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/sha256-browser": "5.2.0", + "@aws-crypto/sha256-js": "5.2.0", + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/credential-provider-node": "^3.972.4", + "@aws-sdk/middleware-host-header": "^3.972.3", + "@aws-sdk/middleware-logger": "^3.972.3", + "@aws-sdk/middleware-recursion-detection": "^3.972.3", + "@aws-sdk/middleware-user-agent": "^3.972.5", + "@aws-sdk/region-config-resolver": "^3.972.3", + "@aws-sdk/types": "^3.973.1", + "@aws-sdk/util-endpoints": "3.981.0", + "@aws-sdk/util-user-agent-browser": "^3.972.3", + "@aws-sdk/util-user-agent-node": "^3.972.3", + "@smithy/config-resolver": "^4.4.6", + "@smithy/core": "^3.22.0", + "@smithy/fetch-http-handler": "^5.3.9", + "@smithy/hash-node": "^4.2.8", + "@smithy/invalid-dependency": "^4.2.8", + "@smithy/middleware-content-length": "^4.2.8", + "@smithy/middleware-endpoint": "^4.4.12", + "@smithy/middleware-retry": "^4.4.29", + "@smithy/middleware-serde": "^4.2.9", + "@smithy/middleware-stack": "^4.2.8", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/node-http-handler": "^4.4.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/smithy-client": "^4.11.1", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "@smithy/util-base64": "^4.3.0", + "@smithy/util-body-length-browser": "^4.2.0", + "@smithy/util-body-length-node": "^4.2.1", + "@smithy/util-defaults-mode-browser": "^4.3.28", + "@smithy/util-defaults-mode-node": "^4.2.31", + "@smithy/util-endpoints": "^3.2.8", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-retry": "^4.2.8", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/client-s3": { + "version": "3.981.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/sha1-browser": "5.2.0", + "@aws-crypto/sha256-browser": "5.2.0", + "@aws-crypto/sha256-js": "5.2.0", + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/credential-provider-node": "^3.972.4", + "@aws-sdk/middleware-bucket-endpoint": "^3.972.3", + "@aws-sdk/middleware-expect-continue": "^3.972.3", + "@aws-sdk/middleware-flexible-checksums": "^3.972.3", + "@aws-sdk/middleware-host-header": "^3.972.3", + "@aws-sdk/middleware-location-constraint": "^3.972.3", + "@aws-sdk/middleware-logger": "^3.972.3", + "@aws-sdk/middleware-recursion-detection": "^3.972.3", + "@aws-sdk/middleware-sdk-s3": "^3.972.5", + "@aws-sdk/middleware-ssec": "^3.972.3", + "@aws-sdk/middleware-user-agent": "^3.972.5", + "@aws-sdk/region-config-resolver": "^3.972.3", + "@aws-sdk/signature-v4-multi-region": "3.981.0", + "@aws-sdk/types": "^3.973.1", + "@aws-sdk/util-endpoints": "3.981.0", + "@aws-sdk/util-user-agent-browser": "^3.972.3", + "@aws-sdk/util-user-agent-node": "^3.972.3", + "@smithy/config-resolver": "^4.4.6", + "@smithy/core": "^3.22.0", + "@smithy/eventstream-serde-browser": "^4.2.8", + "@smithy/eventstream-serde-config-resolver": "^4.3.8", + "@smithy/eventstream-serde-node": "^4.2.8", + "@smithy/fetch-http-handler": "^5.3.9", + "@smithy/hash-blob-browser": "^4.2.9", + "@smithy/hash-node": "^4.2.8", + "@smithy/hash-stream-node": "^4.2.8", + "@smithy/invalid-dependency": "^4.2.8", + "@smithy/md5-js": "^4.2.8", + "@smithy/middleware-content-length": "^4.2.8", + "@smithy/middleware-endpoint": "^4.4.12", + "@smithy/middleware-retry": "^4.4.29", + "@smithy/middleware-serde": "^4.2.9", + "@smithy/middleware-stack": "^4.2.8", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/node-http-handler": "^4.4.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/smithy-client": "^4.11.1", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "@smithy/util-base64": "^4.3.0", + "@smithy/util-body-length-browser": "^4.2.0", + "@smithy/util-body-length-node": "^4.2.1", + "@smithy/util-defaults-mode-browser": "^4.3.28", + "@smithy/util-defaults-mode-node": "^4.2.31", + "@smithy/util-endpoints": "^3.2.8", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-retry": "^4.2.8", + "@smithy/util-stream": "^4.5.10", + "@smithy/util-utf8": "^4.2.0", + "@smithy/util-waiter": "^4.2.8", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/client-sso": { + "version": "3.980.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/sha256-browser": "5.2.0", + "@aws-crypto/sha256-js": "5.2.0", + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/middleware-host-header": "^3.972.3", + "@aws-sdk/middleware-logger": "^3.972.3", + "@aws-sdk/middleware-recursion-detection": "^3.972.3", + "@aws-sdk/middleware-user-agent": "^3.972.5", + "@aws-sdk/region-config-resolver": "^3.972.3", + "@aws-sdk/types": "^3.973.1", + "@aws-sdk/util-endpoints": "3.980.0", + "@aws-sdk/util-user-agent-browser": "^3.972.3", + "@aws-sdk/util-user-agent-node": "^3.972.3", + "@smithy/config-resolver": "^4.4.6", + "@smithy/core": "^3.22.0", + "@smithy/fetch-http-handler": "^5.3.9", + "@smithy/hash-node": "^4.2.8", + "@smithy/invalid-dependency": "^4.2.8", + "@smithy/middleware-content-length": "^4.2.8", + "@smithy/middleware-endpoint": "^4.4.12", + "@smithy/middleware-retry": "^4.4.29", + "@smithy/middleware-serde": "^4.2.9", + "@smithy/middleware-stack": "^4.2.8", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/node-http-handler": "^4.4.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/smithy-client": "^4.11.1", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "@smithy/util-base64": "^4.3.0", + "@smithy/util-body-length-browser": "^4.2.0", + "@smithy/util-body-length-node": "^4.2.1", + "@smithy/util-defaults-mode-browser": "^4.3.28", + "@smithy/util-defaults-mode-node": "^4.2.31", + "@smithy/util-endpoints": "^3.2.8", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-retry": "^4.2.8", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/client-sso/node_modules/@aws-sdk/util-endpoints": { + "version": "3.980.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "@smithy/util-endpoints": "^3.2.8", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/core": { + "version": "3.973.5", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@aws-sdk/xml-builder": "^3.972.2", + "@smithy/core": "^3.22.0", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/property-provider": "^4.2.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/signature-v4": "^5.3.8", + "@smithy/smithy-client": "^4.11.1", + "@smithy/types": "^4.12.0", + "@smithy/util-base64": "^4.3.0", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/crc64-nvme": { + "version": "3.972.0", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-env": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/types": "^3.973.1", + "@smithy/property-provider": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-http": { + "version": "3.972.5", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/types": "^3.973.1", + "@smithy/fetch-http-handler": "^5.3.9", + "@smithy/node-http-handler": "^4.4.8", + "@smithy/property-provider": "^4.2.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/smithy-client": "^4.11.1", + "@smithy/types": "^4.12.0", + "@smithy/util-stream": "^4.5.10", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-ini": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/credential-provider-env": "^3.972.3", + "@aws-sdk/credential-provider-http": "^3.972.5", + "@aws-sdk/credential-provider-login": "^3.972.3", + "@aws-sdk/credential-provider-process": "^3.972.3", + "@aws-sdk/credential-provider-sso": "^3.972.3", + "@aws-sdk/credential-provider-web-identity": "^3.972.3", + "@aws-sdk/nested-clients": "3.980.0", + "@aws-sdk/types": "^3.973.1", + "@smithy/credential-provider-imds": "^4.2.8", + "@smithy/property-provider": "^4.2.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-login": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/nested-clients": "3.980.0", + "@aws-sdk/types": "^3.973.1", + "@smithy/property-provider": "^4.2.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-node": { + "version": "3.972.4", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/credential-provider-env": "^3.972.3", + "@aws-sdk/credential-provider-http": "^3.972.5", + "@aws-sdk/credential-provider-ini": "^3.972.3", + "@aws-sdk/credential-provider-process": "^3.972.3", + "@aws-sdk/credential-provider-sso": "^3.972.3", + "@aws-sdk/credential-provider-web-identity": "^3.972.3", + "@aws-sdk/types": "^3.973.1", + "@smithy/credential-provider-imds": "^4.2.8", + "@smithy/property-provider": "^4.2.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-process": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/types": "^3.973.1", + "@smithy/property-provider": "^4.2.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-sso": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/client-sso": "3.980.0", + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/token-providers": "3.980.0", + "@aws-sdk/types": "^3.973.1", + "@smithy/property-provider": "^4.2.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-web-identity": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/nested-clients": "3.980.0", + "@aws-sdk/types": "^3.973.1", + "@smithy/property-provider": "^4.2.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-bucket-endpoint": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@aws-sdk/util-arn-parser": "^3.972.2", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "@smithy/util-config-provider": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-expect-continue": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-flexible-checksums": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/crc32": "5.2.0", + "@aws-crypto/crc32c": "5.2.0", + "@aws-crypto/util": "5.2.0", + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/crc64-nvme": "3.972.0", + "@aws-sdk/types": "^3.973.1", + "@smithy/is-array-buffer": "^4.2.0", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-stream": "^4.5.10", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-host-header": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-location-constraint": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-logger": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-recursion-detection": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@aws/lambda-invoke-store": "^0.2.2", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-sdk-s3": { + "version": "3.972.5", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/types": "^3.973.1", + "@aws-sdk/util-arn-parser": "^3.972.2", + "@smithy/core": "^3.22.0", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/signature-v4": "^5.3.8", + "@smithy/smithy-client": "^4.11.1", + "@smithy/types": "^4.12.0", + "@smithy/util-config-provider": "^4.2.0", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-stream": "^4.5.10", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-ssec": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-user-agent": { + "version": "3.972.5", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/types": "^3.973.1", + "@aws-sdk/util-endpoints": "3.980.0", + "@smithy/core": "^3.22.0", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/middleware-user-agent/node_modules/@aws-sdk/util-endpoints": { + "version": "3.980.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "@smithy/util-endpoints": "^3.2.8", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/nested-clients": { + "version": "3.980.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/sha256-browser": "5.2.0", + "@aws-crypto/sha256-js": "5.2.0", + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/middleware-host-header": "^3.972.3", + "@aws-sdk/middleware-logger": "^3.972.3", + "@aws-sdk/middleware-recursion-detection": "^3.972.3", + "@aws-sdk/middleware-user-agent": "^3.972.5", + "@aws-sdk/region-config-resolver": "^3.972.3", + "@aws-sdk/types": "^3.973.1", + "@aws-sdk/util-endpoints": "3.980.0", + "@aws-sdk/util-user-agent-browser": "^3.972.3", + "@aws-sdk/util-user-agent-node": "^3.972.3", + "@smithy/config-resolver": "^4.4.6", + "@smithy/core": "^3.22.0", + "@smithy/fetch-http-handler": "^5.3.9", + "@smithy/hash-node": "^4.2.8", + "@smithy/invalid-dependency": "^4.2.8", + "@smithy/middleware-content-length": "^4.2.8", + "@smithy/middleware-endpoint": "^4.4.12", + "@smithy/middleware-retry": "^4.4.29", + "@smithy/middleware-serde": "^4.2.9", + "@smithy/middleware-stack": "^4.2.8", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/node-http-handler": "^4.4.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/smithy-client": "^4.11.1", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "@smithy/util-base64": "^4.3.0", + "@smithy/util-body-length-browser": "^4.2.0", + "@smithy/util-body-length-node": "^4.2.1", + "@smithy/util-defaults-mode-browser": "^4.3.28", + "@smithy/util-defaults-mode-node": "^4.2.31", + "@smithy/util-endpoints": "^3.2.8", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-retry": "^4.2.8", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/nested-clients/node_modules/@aws-sdk/util-endpoints": { + "version": "3.980.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "@smithy/util-endpoints": "^3.2.8", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/region-config-resolver": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/config-resolver": "^4.4.6", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/signature-v4-multi-region": { + "version": "3.981.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/middleware-sdk-s3": "^3.972.5", + "@aws-sdk/types": "^3.973.1", + "@smithy/protocol-http": "^5.3.8", + "@smithy/signature-v4": "^5.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/token-providers": { + "version": "3.980.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/core": "^3.973.5", + "@aws-sdk/nested-clients": "3.980.0", + "@aws-sdk/types": "^3.973.1", + "@smithy/property-provider": "^4.2.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/types": { + "version": "3.973.1", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/util-arn-parser": { + "version": "3.972.2", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/util-endpoints": { + "version": "3.981.0", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "@smithy/util-endpoints": "^3.2.8", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/util-locate-window": { + "version": "3.965.4", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws-sdk/util-user-agent-browser": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/types": "^3.973.1", + "@smithy/types": "^4.12.0", + "bowser": "^2.11.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-sdk/util-user-agent-node": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@aws-sdk/middleware-user-agent": "^3.972.5", + "@aws-sdk/types": "^3.973.1", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "aws-crt": ">=1.0.0" + }, + "peerDependenciesMeta": { + "aws-crt": { + "optional": true + } + } + }, + "node_modules/@aws-sdk/xml-builder": { + "version": "3.972.3", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "fast-xml-parser": "5.3.4", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@aws/lambda-invoke-store": { + "version": "0.2.3", + "license": "Apache-2.0", + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure-rest/core-client": { + "version": "2.5.1", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.10.0", + "@azure/core-rest-pipeline": "^1.22.0", + "@azure/core-tracing": "^1.3.0", + "@typespec/ts-http-runtime": "^0.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/abort-controller": { + "version": "2.1.2", + "license": "MIT", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-auth": { + "version": "1.10.1", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-util": "^1.13.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-client": { + "version": "1.10.1", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.10.0", + "@azure/core-rest-pipeline": "^1.22.0", + "@azure/core-tracing": "^1.3.0", + "@azure/core-util": "^1.13.0", + "@azure/logger": "^1.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-http-compat": { + "version": "2.3.1", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-client": "^1.10.0", + "@azure/core-rest-pipeline": "^1.22.0" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-lro": { + "version": "2.7.2", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-util": "^1.2.0", + "@azure/logger": "^1.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-paging": { + "version": "1.6.2", + "license": "MIT", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/core-rest-pipeline": { + "version": "1.22.2", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.10.0", + "@azure/core-tracing": "^1.3.0", + "@azure/core-util": "^1.13.0", + "@azure/logger": "^1.3.0", + "@typespec/ts-http-runtime": "^0.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-tracing": { + "version": "1.3.1", + "license": "MIT", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/core-util": { + "version": "1.13.1", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.1.2", + "@typespec/ts-http-runtime": "^0.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/identity": { + "version": "4.13.0", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-auth": "^1.9.0", + "@azure/core-client": "^1.9.2", + "@azure/core-rest-pipeline": "^1.17.0", + "@azure/core-tracing": "^1.0.0", + "@azure/core-util": "^1.11.0", + "@azure/logger": "^1.0.0", + "@azure/msal-browser": "^4.2.0", + "@azure/msal-node": "^3.5.0", + "open": "^10.1.0", + "tslib": "^2.2.0" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/keyvault-common": { + "version": "2.0.0", + "license": "MIT", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-auth": "^1.3.0", + "@azure/core-client": "^1.5.0", + "@azure/core-rest-pipeline": "^1.8.0", + "@azure/core-tracing": "^1.0.0", + "@azure/core-util": "^1.10.0", + "@azure/logger": "^1.1.4", + "tslib": "^2.2.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/keyvault-keys": { + "version": "4.10.0", + "license": "MIT", + "dependencies": { + "@azure-rest/core-client": "^2.3.3", + "@azure/abort-controller": "^2.1.2", + "@azure/core-auth": "^1.9.0", + "@azure/core-http-compat": "^2.2.0", + "@azure/core-lro": "^2.7.2", + "@azure/core-paging": "^1.6.2", + "@azure/core-rest-pipeline": "^1.19.0", + "@azure/core-tracing": "^1.2.0", + "@azure/core-util": "^1.11.0", + "@azure/keyvault-common": "^2.0.0", + "@azure/logger": "^1.1.4", + "tslib": "^2.8.1" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@azure/logger": { + "version": "1.3.0", + "license": "MIT", + "dependencies": { + "@typespec/ts-http-runtime": "^0.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@azure/msal-browser": { + "version": "4.28.1", + "license": "MIT", + "dependencies": { + "@azure/msal-common": "15.14.1" + }, + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/@azure/msal-common": { + "version": "15.14.1", + "license": "MIT", + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/@azure/msal-node": { + "version": "3.8.6", + "license": "MIT", + "dependencies": { + "@azure/msal-common": "15.14.1", + "jsonwebtoken": "^9.0.0", + "uuid": "^8.3.0" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/@bufbuild/protobuf": { + "version": "2.11.0", + "license": "(Apache-2.0 AND BSD-3-Clause)" + }, + "node_modules/@confluentinc/kafka-javascript": { + "version": "0.4.0", + "hasInstallScript": true, + "license": "MIT", + "workspaces": [ + ".", + "schemaregistry", + "schemaregistry-examples" + ], + "dependencies": { + "@aws-sdk/client-kms": "^3.637.0", + "@azure/identity": "^4.4.1", + "@azure/keyvault-keys": "^4.8.0", + "@bufbuild/protobuf": "^2.0.0", + "@criteria/json-schema": "^0.10.0", + "@criteria/json-schema-validation": "^0.10.0", + "@google-cloud/kms": "^4.5.0", + "@hackbg/miscreant-esm": "^0.3.2-patch.3", + "@mapbox/node-pre-gyp": "^1.0.11", + "@smithy/types": "^3.3.0", + "@types/simple-oauth2": "^5.0.7", + "@types/validator": "^13.12.0", + "ajv": "^8.17.1", + "async-mutex": "^0.5.0", + "avsc": "^5.7.7", + "axios": "^1.7.3", + "bindings": "^1.3.1", + "json-stringify-deterministic": "^1.0.12", + "lru-cache": "^11.0.0", + "nan": "^2.17.0", + "node-vault": "^0.10.2", + "simple-oauth2": "^5.1.0", + "validator": "^13.12.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@confluentinc/kafka-javascript/node_modules/@smithy/types": { + "version": "3.7.2", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@criteria/json-pointer": { + "version": "0.2.1", + "license": "MIT", + "engines": { + "node": ">=18.12.1" + } + }, + "node_modules/@criteria/json-schema": { + "version": "0.10.0", + "license": "MIT", + "dependencies": { + "@criteria/json-pointer": "^0.2.1", + "toad-uri-js": "^5.0.1" + }, + "engines": { + "node": ">=18.12.1" + } + }, + "node_modules/@criteria/json-schema-validation": { + "version": "0.10.0", + "license": "MIT", + "dependencies": { + "@criteria/json-pointer": "^0.2.1", + "@criteria/json-schema": "^0.10.0", + "fast-deep-equal": "^3.1.3", + "punycode": "^2.3.1", + "smtp-address-parser": "^1.0.10", + "toad-uri-js": "^5.0.1" + }, + "engines": { + "node": ">=18.12.1" + } + }, + "node_modules/@google-cloud/kms": { + "version": "4.5.0", + "license": "Apache-2.0", + "dependencies": { + "google-gax": "^4.0.3" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@grpc/grpc-js": { + "version": "1.14.3", + "license": "Apache-2.0", + "dependencies": { + "@grpc/proto-loader": "^0.8.0", + "@js-sdsl/ordered-map": "^4.4.2" + }, + "engines": { + "node": ">=12.10.0" + } + }, + "node_modules/@grpc/grpc-js/node_modules/@grpc/proto-loader": { + "version": "0.8.0", + "license": "Apache-2.0", + "dependencies": { + "lodash.camelcase": "^4.3.0", + "long": "^5.0.0", + "protobufjs": "^7.5.3", + "yargs": "^17.7.2" + }, + "bin": { + "proto-loader-gen-types": "build/bin/proto-loader-gen-types.js" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/@grpc/proto-loader": { + "version": "0.7.15", + "license": "Apache-2.0", + "dependencies": { + "lodash.camelcase": "^4.3.0", + "long": "^5.0.0", + "protobufjs": "^7.2.5", + "yargs": "^17.7.2" + }, + "bin": { + "proto-loader-gen-types": "build/bin/proto-loader-gen-types.js" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/@hackbg/miscreant-esm": { + "version": "0.3.2-patch.3", + "license": "MIT" + }, + "node_modules/@hapi/boom": { + "version": "10.0.1", + "license": "BSD-3-Clause", + "dependencies": { + "@hapi/hoek": "^11.0.2" + } + }, + "node_modules/@hapi/bourne": { + "version": "3.0.0", + "license": "BSD-3-Clause" + }, + "node_modules/@hapi/hoek": { + "version": "11.0.7", + "license": "BSD-3-Clause" + }, + "node_modules/@hapi/topo": { + "version": "5.1.0", + "license": "BSD-3-Clause", + "dependencies": { + "@hapi/hoek": "^9.0.0" + } + }, + "node_modules/@hapi/topo/node_modules/@hapi/hoek": { + "version": "9.3.0", + "license": "BSD-3-Clause" + }, + "node_modules/@hapi/wreck": { + "version": "18.1.0", + "license": "BSD-3-Clause", + "dependencies": { + "@hapi/boom": "^10.0.1", + "@hapi/bourne": "^3.0.0", + "@hapi/hoek": "^11.0.2" + } + }, + "node_modules/@js-sdsl/ordered-map": { + "version": "4.4.2", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/js-sdsl" + } + }, + "node_modules/@mapbox/node-pre-gyp": { + "version": "1.0.11", + "license": "BSD-3-Clause", + "dependencies": { + "detect-libc": "^2.0.0", + "https-proxy-agent": "^5.0.0", + "make-dir": "^3.1.0", + "node-fetch": "^2.6.7", + "nopt": "^5.0.0", + "npmlog": "^5.0.1", + "rimraf": "^3.0.2", + "semver": "^7.3.5", + "tar": "^6.1.11" + }, + "bin": { + "node-pre-gyp": "bin/node-pre-gyp" + } + }, + "node_modules/@postman/form-data": { + "version": "3.1.1", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/@postman/tough-cookie": { + "version": "4.1.3-postman.1", + "license": "BSD-3-Clause", + "dependencies": { + "psl": "^1.1.33", + "punycode": "^2.1.1", + "universalify": "^0.2.0", + "url-parse": "^1.5.3" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/@postman/tunnel-agent": { + "version": "0.6.8", + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + }, + "engines": { + "node": "*" + } + }, + "node_modules/@protobufjs/aspromise": { + "version": "1.1.2", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/base64": { + "version": "1.1.2", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/codegen": { + "version": "2.0.4", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/eventemitter": { + "version": "1.1.0", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/fetch": { + "version": "1.1.0", + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.1", + "@protobufjs/inquire": "^1.1.0" + } + }, + "node_modules/@protobufjs/float": { + "version": "1.0.2", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/inquire": { + "version": "1.1.0", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/path": { + "version": "1.1.2", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/pool": { + "version": "1.1.0", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/utf8": { + "version": "1.1.0", + "license": "BSD-3-Clause" + }, + "node_modules/@sideway/address": { + "version": "4.1.5", + "license": "BSD-3-Clause", + "dependencies": { + "@hapi/hoek": "^9.0.0" + } + }, + "node_modules/@sideway/address/node_modules/@hapi/hoek": { + "version": "9.3.0", + "license": "BSD-3-Clause" + }, + "node_modules/@sideway/formula": { + "version": "3.0.1", + "license": "BSD-3-Clause" + }, + "node_modules/@sideway/pinpoint": { + "version": "2.0.0", + "license": "BSD-3-Clause" + }, + "node_modules/@smithy/abort-controller": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/chunked-blob-reader": { + "version": "5.2.0", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/chunked-blob-reader-native": { + "version": "4.2.1", + "license": "Apache-2.0", + "dependencies": { + "@smithy/util-base64": "^4.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/config-resolver": { + "version": "4.4.6", + "license": "Apache-2.0", + "dependencies": { + "@smithy/node-config-provider": "^4.3.8", + "@smithy/types": "^4.12.0", + "@smithy/util-config-provider": "^4.2.0", + "@smithy/util-endpoints": "^3.2.8", + "@smithy/util-middleware": "^4.2.8", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/core": { + "version": "3.22.1", + "license": "Apache-2.0", + "dependencies": { + "@smithy/middleware-serde": "^4.2.9", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "@smithy/util-base64": "^4.3.0", + "@smithy/util-body-length-browser": "^4.2.0", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-stream": "^4.5.11", + "@smithy/util-utf8": "^4.2.0", + "@smithy/uuid": "^1.1.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/credential-provider-imds": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/node-config-provider": "^4.3.8", + "@smithy/property-provider": "^4.2.8", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/eventstream-codec": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@aws-crypto/crc32": "5.2.0", + "@smithy/types": "^4.12.0", + "@smithy/util-hex-encoding": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/eventstream-serde-browser": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/eventstream-serde-universal": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/eventstream-serde-config-resolver": { + "version": "4.3.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/eventstream-serde-node": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/eventstream-serde-universal": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/eventstream-serde-universal": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/eventstream-codec": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/fetch-http-handler": { + "version": "5.3.9", + "license": "Apache-2.0", + "dependencies": { + "@smithy/protocol-http": "^5.3.8", + "@smithy/querystring-builder": "^4.2.8", + "@smithy/types": "^4.12.0", + "@smithy/util-base64": "^4.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/hash-blob-browser": { + "version": "4.2.9", + "license": "Apache-2.0", + "dependencies": { + "@smithy/chunked-blob-reader": "^5.2.0", + "@smithy/chunked-blob-reader-native": "^4.2.1", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/hash-node": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "@smithy/util-buffer-from": "^4.2.0", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/hash-stream-node": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/invalid-dependency": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/is-array-buffer": { + "version": "4.2.0", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/md5-js": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/middleware-content-length": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/middleware-endpoint": { + "version": "4.4.13", + "license": "Apache-2.0", + "dependencies": { + "@smithy/core": "^3.22.1", + "@smithy/middleware-serde": "^4.2.9", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "@smithy/url-parser": "^4.2.8", + "@smithy/util-middleware": "^4.2.8", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/middleware-retry": { + "version": "4.4.30", + "license": "Apache-2.0", + "dependencies": { + "@smithy/node-config-provider": "^4.3.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/service-error-classification": "^4.2.8", + "@smithy/smithy-client": "^4.11.2", + "@smithy/types": "^4.12.0", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-retry": "^4.2.8", + "@smithy/uuid": "^1.1.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/middleware-serde": { + "version": "4.2.9", + "license": "Apache-2.0", + "dependencies": { + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/middleware-stack": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/node-config-provider": { + "version": "4.3.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/property-provider": "^4.2.8", + "@smithy/shared-ini-file-loader": "^4.4.3", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/node-http-handler": { + "version": "4.4.9", + "license": "Apache-2.0", + "dependencies": { + "@smithy/abort-controller": "^4.2.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/querystring-builder": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/property-provider": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/protocol-http": { + "version": "5.3.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/querystring-builder": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "@smithy/util-uri-escape": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/querystring-parser": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/service-error-classification": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/shared-ini-file-loader": { + "version": "4.4.3", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/signature-v4": { + "version": "5.3.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/is-array-buffer": "^4.2.0", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "@smithy/util-hex-encoding": "^4.2.0", + "@smithy/util-middleware": "^4.2.8", + "@smithy/util-uri-escape": "^4.2.0", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/smithy-client": { + "version": "4.11.2", + "license": "Apache-2.0", + "dependencies": { + "@smithy/core": "^3.22.1", + "@smithy/middleware-endpoint": "^4.4.13", + "@smithy/middleware-stack": "^4.2.8", + "@smithy/protocol-http": "^5.3.8", + "@smithy/types": "^4.12.0", + "@smithy/util-stream": "^4.5.11", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/types": { + "version": "4.12.0", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/url-parser": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/querystring-parser": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-base64": { + "version": "4.3.0", + "license": "Apache-2.0", + "dependencies": { + "@smithy/util-buffer-from": "^4.2.0", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-body-length-browser": { + "version": "4.2.0", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-body-length-node": { + "version": "4.2.1", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-buffer-from": { + "version": "4.2.0", + "license": "Apache-2.0", + "dependencies": { + "@smithy/is-array-buffer": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-config-provider": { + "version": "4.2.0", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-defaults-mode-browser": { + "version": "4.3.29", + "license": "Apache-2.0", + "dependencies": { + "@smithy/property-provider": "^4.2.8", + "@smithy/smithy-client": "^4.11.2", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-defaults-mode-node": { + "version": "4.2.32", + "license": "Apache-2.0", + "dependencies": { + "@smithy/config-resolver": "^4.4.6", + "@smithy/credential-provider-imds": "^4.2.8", + "@smithy/node-config-provider": "^4.3.8", + "@smithy/property-provider": "^4.2.8", + "@smithy/smithy-client": "^4.11.2", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-endpoints": { + "version": "3.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/node-config-provider": "^4.3.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-hex-encoding": { + "version": "4.2.0", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-middleware": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-retry": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/service-error-classification": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-stream": { + "version": "4.5.11", + "license": "Apache-2.0", + "dependencies": { + "@smithy/fetch-http-handler": "^5.3.9", + "@smithy/node-http-handler": "^4.4.9", + "@smithy/types": "^4.12.0", + "@smithy/util-base64": "^4.3.0", + "@smithy/util-buffer-from": "^4.2.0", + "@smithy/util-hex-encoding": "^4.2.0", + "@smithy/util-utf8": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-uri-escape": { + "version": "4.2.0", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-utf8": { + "version": "4.2.0", + "license": "Apache-2.0", + "dependencies": { + "@smithy/util-buffer-from": "^4.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/util-waiter": { + "version": "4.2.8", + "license": "Apache-2.0", + "dependencies": { + "@smithy/abort-controller": "^4.2.8", + "@smithy/types": "^4.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@smithy/uuid": { + "version": "1.1.0", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@tootallnate/once": { + "version": "2.0.0", + "license": "MIT", + "engines": { + "node": ">= 10" + } + }, + "node_modules/@types/caseless": { + "version": "0.12.5", + "license": "MIT" + }, + "node_modules/@types/long": { + "version": "4.0.2", + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "25.2.0", + "license": "MIT", + "dependencies": { + "undici-types": "~7.16.0" + } + }, + "node_modules/@types/request": { + "version": "2.48.13", + "license": "MIT", + "dependencies": { + "@types/caseless": "*", + "@types/node": "*", + "@types/tough-cookie": "*", + "form-data": "^2.5.5" + } + }, + "node_modules/@types/request/node_modules/form-data": { + "version": "2.5.5", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.35", + "safe-buffer": "^5.2.1" + }, + "engines": { + "node": ">= 0.12" + } + }, + "node_modules/@types/simple-oauth2": { + "version": "5.0.8", + "license": "MIT" + }, + "node_modules/@types/tough-cookie": { + "version": "4.0.5", + "license": "MIT" + }, + "node_modules/@types/validator": { + "version": "13.15.10", + "license": "MIT" + }, + "node_modules/@typespec/ts-http-runtime": { + "version": "0.3.2", + "license": "MIT", + "dependencies": { + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/@typespec/ts-http-runtime/node_modules/https-proxy-agent": { + "version": "7.0.6", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/abbrev": { + "version": "1.1.1", + "license": "ISC" + }, + "node_modules/abort-controller": { + "version": "3.0.0", + "license": "MIT", + "dependencies": { + "event-target-shim": "^5.0.0" + }, + "engines": { + "node": ">=6.5" + } + }, + "node_modules/agent-base": { + "version": "7.1.4", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/ajv": { + "version": "8.17.1", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/aproba": { + "version": "2.1.0", + "license": "ISC" + }, + "node_modules/are-we-there-yet": { + "version": "2.0.0", + "license": "ISC", + "dependencies": { + "delegates": "^1.0.0", + "readable-stream": "^3.6.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/asn1": { + "version": "0.2.6", + "license": "MIT", + "dependencies": { + "safer-buffer": "~2.1.0" + } + }, + "node_modules/assert-plus": { + "version": "1.0.0", + "license": "MIT", + "engines": { + "node": ">=0.8" + } + }, + "node_modules/async-mutex": { + "version": "0.5.0", + "license": "MIT", + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "license": "MIT" + }, + "node_modules/avsc": { + "version": "5.7.9", + "license": "MIT", + "engines": { + "node": ">=0.11" + } + }, + "node_modules/aws-sign2": { + "version": "0.7.0", + "license": "Apache-2.0", + "engines": { + "node": "*" + } + }, + "node_modules/aws4": { + "version": "1.13.2", + "license": "MIT" + }, + "node_modules/axios": { + "version": "1.13.4", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.6", + "form-data": "^4.0.4", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "license": "MIT" + }, + "node_modules/base64-js": { + "version": "1.5.1", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/bcrypt-pbkdf": { + "version": "1.0.2", + "license": "BSD-3-Clause", + "dependencies": { + "tweetnacl": "^0.14.3" + } + }, + "node_modules/bignumber.js": { + "version": "9.3.1", + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/bindings": { + "version": "1.5.0", + "license": "MIT", + "dependencies": { + "file-uri-to-path": "1.0.0" + } + }, + "node_modules/bluebird": { + "version": "2.11.0", + "license": "MIT" + }, + "node_modules/bowser": { + "version": "2.13.1", + "license": "MIT" + }, + "node_modules/brace-expansion": { + "version": "1.1.12", + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/buffer-equal-constant-time": { + "version": "1.0.1", + "license": "BSD-3-Clause" + }, + "node_modules/bundle-name": { + "version": "4.1.0", + "license": "MIT", + "dependencies": { + "run-applescript": "^7.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/call-bound": { + "version": "1.0.4", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "get-intrinsic": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/caseless": { + "version": "0.12.0", + "license": "Apache-2.0" + }, + "node_modules/chownr": { + "version": "2.0.0", + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/cliui": { + "version": "8.0.1", + "license": "ISC", + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.1", + "wrap-ansi": "^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "license": "MIT" + }, + "node_modules/color-support": { + "version": "1.1.3", + "license": "ISC", + "bin": { + "color-support": "bin.js" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/commander": { + "version": "2.20.3", + "license": "MIT" + }, + "node_modules/concat-map": { + "version": "0.0.1", + "license": "MIT" + }, + "node_modules/console-control-strings": { + "version": "1.1.0", + "license": "ISC" + }, + "node_modules/core-util-is": { + "version": "1.0.2", + "license": "MIT" + }, + "node_modules/dashdash": { + "version": "1.14.1", + "license": "MIT", + "dependencies": { + "assert-plus": "^1.0.0" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/debug": { + "version": "4.4.3", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/default-browser": { + "version": "5.5.0", + "license": "MIT", + "dependencies": { + "bundle-name": "^4.1.0", + "default-browser-id": "^5.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/default-browser-id": { + "version": "5.0.1", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/define-lazy-prop": { + "version": "3.0.0", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/delegates": { + "version": "1.0.0", + "license": "MIT" + }, + "node_modules/detect-libc": { + "version": "2.1.2", + "license": "Apache-2.0", + "engines": { + "node": ">=8" + } + }, + "node_modules/discontinuous-range": { + "version": "1.0.0", + "license": "MIT" + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/duplexify": { + "version": "4.1.3", + "license": "MIT", + "dependencies": { + "end-of-stream": "^1.4.1", + "inherits": "^2.0.3", + "readable-stream": "^3.1.1", + "stream-shift": "^1.0.2" + } + }, + "node_modules/ecc-jsbn": { + "version": "0.1.2", + "license": "MIT", + "dependencies": { + "jsbn": "~0.1.0", + "safer-buffer": "^2.1.0" + } + }, + "node_modules/ecdsa-sig-formatter": { + "version": "1.0.11", + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + } + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "license": "MIT" + }, + "node_modules/end-of-stream": { + "version": "1.4.5", + "license": "MIT", + "dependencies": { + "once": "^1.4.0" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/escalade": { + "version": "3.2.0", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/event-target-shim": { + "version": "5.0.1", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/extend": { + "version": "3.0.2", + "license": "MIT" + }, + "node_modules/extsprintf": { + "version": "1.3.0", + "engines": [ + "node >=0.6.0" + ], + "license": "MIT" + }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "license": "MIT" + }, + "node_modules/fast-uri": { + "version": "3.1.0", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, + "node_modules/fast-xml-parser": { + "version": "5.3.4", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + } + ], + "license": "MIT", + "dependencies": { + "strnum": "^2.1.0" + }, + "bin": { + "fxparser": "src/cli/cli.js" + } + }, + "node_modules/file-uri-to-path": { + "version": "1.0.0", + "license": "MIT" + }, + "node_modules/follow-redirects": { + "version": "1.15.11", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/forever-agent": { + "version": "0.6.1", + "license": "Apache-2.0", + "engines": { + "node": "*" + } + }, + "node_modules/form-data": { + "version": "4.0.5", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/fs-minipass": { + "version": "2.1.0", + "license": "ISC", + "dependencies": { + "minipass": "^3.0.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/fs-minipass/node_modules/minipass": { + "version": "3.3.6", + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/fs.realpath": { + "version": "1.0.0", + "license": "ISC" + }, + "node_modules/function-bind": { + "version": "1.1.2", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/gauge": { + "version": "3.0.2", + "license": "ISC", + "dependencies": { + "aproba": "^1.0.3 || ^2.0.0", + "color-support": "^1.1.2", + "console-control-strings": "^1.0.0", + "has-unicode": "^2.0.1", + "object-assign": "^4.1.1", + "signal-exit": "^3.0.0", + "string-width": "^4.2.3", + "strip-ansi": "^6.0.1", + "wide-align": "^1.1.2" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/gaxios": { + "version": "6.7.1", + "license": "Apache-2.0", + "dependencies": { + "extend": "^3.0.2", + "https-proxy-agent": "^7.0.1", + "is-stream": "^2.0.0", + "node-fetch": "^2.6.9", + "uuid": "^9.0.1" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/gaxios/node_modules/https-proxy-agent": { + "version": "7.0.6", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/gaxios/node_modules/uuid": { + "version": "9.0.1", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/gcp-metadata": { + "version": "6.1.1", + "license": "Apache-2.0", + "dependencies": { + "gaxios": "^6.1.1", + "google-logging-utils": "^0.0.2", + "json-bigint": "^1.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "license": "ISC", + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/getpass": { + "version": "0.1.7", + "license": "MIT", + "dependencies": { + "assert-plus": "^1.0.0" + } + }, + "node_modules/glob": { + "version": "7.2.3", + "license": "ISC", + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/google-auth-library": { + "version": "9.15.1", + "license": "Apache-2.0", + "dependencies": { + "base64-js": "^1.3.0", + "ecdsa-sig-formatter": "^1.0.11", + "gaxios": "^6.1.1", + "gcp-metadata": "^6.1.0", + "gtoken": "^7.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/google-gax": { + "version": "4.6.1", + "license": "Apache-2.0", + "dependencies": { + "@grpc/grpc-js": "^1.10.9", + "@grpc/proto-loader": "^0.7.13", + "@types/long": "^4.0.0", + "abort-controller": "^3.0.0", + "duplexify": "^4.0.0", + "google-auth-library": "^9.3.0", + "node-fetch": "^2.7.0", + "object-hash": "^3.0.0", + "proto3-json-serializer": "^2.0.2", + "protobufjs": "^7.3.2", + "retry-request": "^7.0.0", + "uuid": "^9.0.1" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/google-gax/node_modules/uuid": { + "version": "9.0.1", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/google-logging-utils": { + "version": "0.0.2", + "license": "Apache-2.0", + "engines": { + "node": ">=14" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/gtoken": { + "version": "7.1.0", + "license": "MIT", + "dependencies": { + "gaxios": "^6.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-unicode": { + "version": "2.0.1", + "license": "ISC" + }, + "node_modules/hasown": { + "version": "2.0.2", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/http-signature": { + "version": "1.4.0", + "license": "MIT", + "dependencies": { + "assert-plus": "^1.0.0", + "jsprim": "^2.0.2", + "sshpk": "^1.18.0" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/https-proxy-agent": { + "version": "5.0.1", + "license": "MIT", + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/https-proxy-agent/node_modules/agent-base": { + "version": "6.0.2", + "license": "MIT", + "dependencies": { + "debug": "4" + }, + "engines": { + "node": ">= 6.0.0" + } + }, + "node_modules/inflight": { + "version": "1.0.6", + "license": "ISC", + "dependencies": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "license": "ISC" + }, + "node_modules/ip-address": { + "version": "10.1.0", + "license": "MIT", + "engines": { + "node": ">= 12" + } + }, + "node_modules/is-docker": { + "version": "3.0.0", + "license": "MIT", + "bin": { + "is-docker": "cli.js" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/is-inside-container": { + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "is-docker": "^3.0.0" + }, + "bin": { + "is-inside-container": "cli.js" + }, + "engines": { + "node": ">=14.16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-stream": { + "version": "2.0.1", + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-typedarray": { + "version": "1.0.0", + "license": "MIT" + }, + "node_modules/is-wsl": { + "version": "3.1.0", + "license": "MIT", + "dependencies": { + "is-inside-container": "^1.0.0" + }, + "engines": { + "node": ">=16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/isstream": { + "version": "0.1.2", + "license": "MIT" + }, + "node_modules/joi": { + "version": "17.13.3", + "license": "BSD-3-Clause", + "dependencies": { + "@hapi/hoek": "^9.3.0", + "@hapi/topo": "^5.1.0", + "@sideway/address": "^4.1.5", + "@sideway/formula": "^3.0.1", + "@sideway/pinpoint": "^2.0.0" + } + }, + "node_modules/joi/node_modules/@hapi/hoek": { + "version": "9.3.0", + "license": "BSD-3-Clause" + }, + "node_modules/jsbn": { + "version": "0.1.1", + "license": "MIT" + }, + "node_modules/json-bigint": { + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "bignumber.js": "^9.0.0" + } + }, + "node_modules/json-schema": { + "version": "0.4.0", + "license": "(AFL-2.1 OR BSD-3-Clause)" + }, + "node_modules/json-schema-traverse": { + "version": "1.0.0", + "license": "MIT" + }, + "node_modules/json-stringify-deterministic": { + "version": "1.0.12", + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, + "node_modules/json-stringify-safe": { + "version": "5.0.1", + "license": "ISC" + }, + "node_modules/jsonwebtoken": { + "version": "9.0.3", + "license": "MIT", + "dependencies": { + "jws": "^4.0.1", + "lodash.includes": "^4.3.0", + "lodash.isboolean": "^3.0.3", + "lodash.isinteger": "^4.0.4", + "lodash.isnumber": "^3.0.3", + "lodash.isplainobject": "^4.0.6", + "lodash.isstring": "^4.0.1", + "lodash.once": "^4.0.0", + "ms": "^2.1.1", + "semver": "^7.5.4" + }, + "engines": { + "node": ">=12", + "npm": ">=6" + } + }, + "node_modules/jsprim": { + "version": "2.0.2", + "engines": [ + "node >=0.6.0" + ], + "license": "MIT", + "dependencies": { + "assert-plus": "1.0.0", + "extsprintf": "1.3.0", + "json-schema": "0.4.0", + "verror": "1.10.0" + } + }, + "node_modules/jwa": { + "version": "2.0.1", + "license": "MIT", + "dependencies": { + "buffer-equal-constant-time": "^1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/jws": { + "version": "4.0.1", + "license": "MIT", + "dependencies": { + "jwa": "^2.0.1", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/lodash.camelcase": { + "version": "4.3.0", + "license": "MIT" + }, + "node_modules/lodash.includes": { + "version": "4.3.0", + "license": "MIT" + }, + "node_modules/lodash.isboolean": { + "version": "3.0.3", + "license": "MIT" + }, + "node_modules/lodash.isinteger": { + "version": "4.0.4", + "license": "MIT" + }, + "node_modules/lodash.isnumber": { + "version": "3.0.3", + "license": "MIT" + }, + "node_modules/lodash.isplainobject": { + "version": "4.0.6", + "license": "MIT" + }, + "node_modules/lodash.isstring": { + "version": "4.0.1", + "license": "MIT" + }, + "node_modules/lodash.once": { + "version": "4.1.1", + "license": "MIT" + }, + "node_modules/long": { + "version": "5.3.2", + "license": "Apache-2.0" + }, + "node_modules/lru-cache": { + "version": "11.2.5", + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/make-dir": { + "version": "3.1.0", + "license": "MIT", + "dependencies": { + "semver": "^6.0.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/make-dir/node_modules/semver": { + "version": "6.3.1", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/minimatch": { + "version": "3.1.2", + "license": "ISC", + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/minipass": { + "version": "5.0.0", + "license": "ISC", + "engines": { + "node": ">=8" + } + }, + "node_modules/minizlib": { + "version": "2.1.2", + "license": "MIT", + "dependencies": { + "minipass": "^3.0.0", + "yallist": "^4.0.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/minizlib/node_modules/minipass": { + "version": "3.3.6", + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/mkdirp": { + "version": "1.0.4", + "license": "MIT", + "bin": { + "mkdirp": "bin/cmd.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/moo": { + "version": "0.5.2", + "license": "BSD-3-Clause" + }, + "node_modules/ms": { + "version": "2.1.3", + "license": "MIT" + }, + "node_modules/mustache": { + "version": "4.2.0", + "license": "MIT", + "bin": { + "mustache": "bin/mustache" + } + }, + "node_modules/nan": { + "version": "2.25.0", + "license": "MIT" + }, + "node_modules/nearley": { + "version": "2.20.1", + "license": "MIT", + "dependencies": { + "commander": "^2.19.0", + "moo": "^0.5.0", + "railroad-diagrams": "^1.0.0", + "randexp": "0.4.6" + }, + "bin": { + "nearley-railroad": "bin/nearley-railroad.js", + "nearley-test": "bin/nearley-test.js", + "nearley-unparse": "bin/nearley-unparse.js", + "nearleyc": "bin/nearleyc.js" + }, + "funding": { + "type": "individual", + "url": "https://nearley.js.org/#give-to-nearley" + } + }, + "node_modules/node-fetch": { + "version": "2.7.0", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/node-vault": { + "version": "0.10.9", + "license": "MIT", + "dependencies": { + "debug": "^4.3.4", + "mustache": "^4.2.0", + "postman-request": "^2.88.1-postman.42", + "tv4": "^1.3.0" + }, + "engines": { + "node": ">= 18.0.0" + } + }, + "node_modules/nopt": { + "version": "5.0.0", + "license": "ISC", + "dependencies": { + "abbrev": "1" + }, + "bin": { + "nopt": "bin/nopt.js" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/npmlog": { + "version": "5.0.1", + "license": "ISC", + "dependencies": { + "are-we-there-yet": "^2.0.0", + "console-control-strings": "^1.1.0", + "gauge": "^3.0.0", + "set-blocking": "^2.0.0" + } + }, + "node_modules/oauth-sign": { + "version": "0.9.0", + "license": "Apache-2.0", + "engines": { + "node": "*" + } + }, + "node_modules/object-assign": { + "version": "4.1.1", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/object-hash": { + "version": "3.0.0", + "license": "MIT", + "engines": { + "node": ">= 6" + } + }, + "node_modules/object-inspect": { + "version": "1.13.4", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/once": { + "version": "1.4.0", + "license": "ISC", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/open": { + "version": "10.2.0", + "license": "MIT", + "dependencies": { + "default-browser": "^5.2.1", + "define-lazy-prop": "^3.0.0", + "is-inside-container": "^1.0.0", + "wsl-utils": "^0.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/path-is-absolute": { + "version": "1.0.1", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/postman-request": { + "version": "2.88.1-postman.48", + "license": "Apache-2.0", + "dependencies": { + "@postman/form-data": "~3.1.1", + "@postman/tough-cookie": "~4.1.3-postman.1", + "@postman/tunnel-agent": "^0.6.8", + "aws-sign2": "~0.7.0", + "aws4": "^1.12.0", + "caseless": "~0.12.0", + "combined-stream": "~1.0.6", + "extend": "~3.0.2", + "forever-agent": "~0.6.1", + "http-signature": "~1.4.0", + "is-typedarray": "~1.0.0", + "isstream": "~0.1.2", + "json-stringify-safe": "~5.0.1", + "mime-types": "^2.1.35", + "oauth-sign": "~0.9.0", + "qs": "~6.14.1", + "safe-buffer": "^5.1.2", + "socks-proxy-agent": "^8.0.5", + "stream-length": "^1.0.2", + "uuid": "^8.3.2" + }, + "engines": { + "node": ">= 16" + } + }, + "node_modules/proto3-json-serializer": { + "version": "2.0.2", + "license": "Apache-2.0", + "dependencies": { + "protobufjs": "^7.2.5" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/protobufjs": { + "version": "7.5.4", + "hasInstallScript": true, + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.2", + "@protobufjs/base64": "^1.1.2", + "@protobufjs/codegen": "^2.0.4", + "@protobufjs/eventemitter": "^1.1.0", + "@protobufjs/fetch": "^1.1.0", + "@protobufjs/float": "^1.0.2", + "@protobufjs/inquire": "^1.1.0", + "@protobufjs/path": "^1.1.2", + "@protobufjs/pool": "^1.1.0", + "@protobufjs/utf8": "^1.1.0", + "@types/node": ">=13.7.0", + "long": "^5.0.0" + }, + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "license": "MIT" + }, + "node_modules/psl": { + "version": "1.15.0", + "license": "MIT", + "dependencies": { + "punycode": "^2.3.1" + }, + "funding": { + "url": "https://github.com/sponsors/lupomontero" + } + }, + "node_modules/punycode": { + "version": "2.3.1", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/qs": { + "version": "6.14.1", + "license": "BSD-3-Clause", + "dependencies": { + "side-channel": "^1.1.0" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/querystringify": { + "version": "2.2.0", + "license": "MIT" + }, + "node_modules/railroad-diagrams": { + "version": "1.0.0", + "license": "CC0-1.0" + }, + "node_modules/randexp": { + "version": "0.4.6", + "license": "MIT", + "dependencies": { + "discontinuous-range": "1.0.0", + "ret": "~0.1.10" + }, + "engines": { + "node": ">=0.12" + } + }, + "node_modules/readable-stream": { + "version": "3.6.2", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/require-directory": { + "version": "2.1.1", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/requires-port": { + "version": "1.0.0", + "license": "MIT" + }, + "node_modules/ret": { + "version": "0.1.15", + "license": "MIT", + "engines": { + "node": ">=0.12" + } + }, + "node_modules/retry-request": { + "version": "7.0.2", + "license": "MIT", + "dependencies": { + "@types/request": "^2.48.8", + "extend": "^3.0.2", + "teeny-request": "^9.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/rimraf": { + "version": "3.0.2", + "license": "ISC", + "dependencies": { + "glob": "^7.1.3" + }, + "bin": { + "rimraf": "bin.js" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/run-applescript": { + "version": "7.1.0", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "license": "MIT" + }, + "node_modules/semver": { + "version": "7.7.3", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/set-blocking": { + "version": "2.0.0", + "license": "ISC" + }, + "node_modules/side-channel": { + "version": "1.1.0", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3", + "side-channel-list": "^1.0.0", + "side-channel-map": "^1.0.1", + "side-channel-weakmap": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-list": { + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-map": { + "version": "1.0.1", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-weakmap": { + "version": "1.0.2", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3", + "side-channel-map": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/signal-exit": { + "version": "3.0.7", + "license": "ISC" + }, + "node_modules/simple-oauth2": { + "version": "5.1.0", + "license": "Apache-2.0", + "dependencies": { + "@hapi/hoek": "^11.0.4", + "@hapi/wreck": "^18.0.0", + "debug": "^4.3.4", + "joi": "^17.6.4" + } + }, + "node_modules/smart-buffer": { + "version": "4.2.0", + "license": "MIT", + "engines": { + "node": ">= 6.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/smtp-address-parser": { + "version": "1.1.0", + "license": "MIT", + "dependencies": { + "nearley": "^2.20.1" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/socks": { + "version": "2.8.7", + "license": "MIT", + "dependencies": { + "ip-address": "^10.0.1", + "smart-buffer": "^4.2.0" + }, + "engines": { + "node": ">= 10.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks-proxy-agent": { + "version": "8.0.5", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "socks": "^2.8.3" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/sshpk": { + "version": "1.18.0", + "license": "MIT", + "dependencies": { + "asn1": "~0.2.3", + "assert-plus": "^1.0.0", + "bcrypt-pbkdf": "^1.0.0", + "dashdash": "^1.12.0", + "ecc-jsbn": "~0.1.1", + "getpass": "^0.1.1", + "jsbn": "~0.1.0", + "safer-buffer": "^2.0.2", + "tweetnacl": "~0.14.0" + }, + "bin": { + "sshpk-conv": "bin/sshpk-conv", + "sshpk-sign": "bin/sshpk-sign", + "sshpk-verify": "bin/sshpk-verify" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/stream-events": { + "version": "1.0.5", + "license": "MIT", + "dependencies": { + "stubs": "^3.0.0" + } + }, + "node_modules/stream-length": { + "version": "1.0.2", + "license": "WTFPL", + "dependencies": { + "bluebird": "^2.6.2" + } + }, + "node_modules/stream-shift": { + "version": "1.0.3", + "license": "MIT" + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, + "node_modules/string-width": { + "version": "4.2.3", + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strnum": { + "version": "2.1.2", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + } + ], + "license": "MIT" + }, + "node_modules/stubs": { + "version": "3.0.0", + "license": "MIT" + }, + "node_modules/tar": { + "version": "6.2.1", + "license": "ISC", + "dependencies": { + "chownr": "^2.0.0", + "fs-minipass": "^2.0.0", + "minipass": "^5.0.0", + "minizlib": "^2.1.1", + "mkdirp": "^1.0.3", + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/teeny-request": { + "version": "9.0.0", + "license": "Apache-2.0", + "dependencies": { + "http-proxy-agent": "^5.0.0", + "https-proxy-agent": "^5.0.0", + "node-fetch": "^2.6.9", + "stream-events": "^1.0.5", + "uuid": "^9.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/teeny-request/node_modules/agent-base": { + "version": "6.0.2", + "license": "MIT", + "dependencies": { + "debug": "4" + }, + "engines": { + "node": ">= 6.0.0" + } + }, + "node_modules/teeny-request/node_modules/http-proxy-agent": { + "version": "5.0.0", + "license": "MIT", + "dependencies": { + "@tootallnate/once": "2", + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/teeny-request/node_modules/uuid": { + "version": "9.0.1", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/toad-uri-js": { + "version": "5.0.1", + "license": "BSD-2-Clause-Views", + "dependencies": { + "punycode": "^2.3.1" + } + }, + "node_modules/tr46": { + "version": "0.0.3", + "license": "MIT" + }, + "node_modules/tslib": { + "version": "2.8.1", + "license": "0BSD" + }, + "node_modules/tv4": { + "version": "1.3.0", + "license": [ + { + "type": "Public Domain", + "url": "http://geraintluff.github.io/tv4/LICENSE.txt" + }, + { + "type": "MIT", + "url": "http://jsonary.com/LICENSE.txt" + } + ], + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/tweetnacl": { + "version": "0.14.5", + "license": "Unlicense" + }, + "node_modules/typescript": { + "version": "5.9.3", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici": { + "version": "6.23.0", + "license": "MIT", + "engines": { + "node": ">=18.17" + } + }, + "node_modules/undici-types": { + "version": "7.16.0", + "license": "MIT" + }, + "node_modules/universalify": { + "version": "0.2.0", + "license": "MIT", + "engines": { + "node": ">= 4.0.0" + } + }, + "node_modules/url-parse": { + "version": "1.5.10", + "license": "MIT", + "dependencies": { + "querystringify": "^2.1.1", + "requires-port": "^1.0.0" + } + }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "license": "MIT" + }, + "node_modules/uuid": { + "version": "8.3.2", + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/validator": { + "version": "13.15.26", + "license": "MIT", + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/verror": { + "version": "1.10.0", + "engines": [ + "node >=0.6.0" + ], + "license": "MIT", + "dependencies": { + "assert-plus": "^1.0.0", + "core-util-is": "1.0.2", + "extsprintf": "^1.2.0" + } + }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "node_modules/wide-align": { + "version": "1.1.5", + "license": "ISC", + "dependencies": { + "string-width": "^1.0.2 || 2 || 3 || 4" + } + }, + "node_modules/wrap-ansi": { + "version": "7.0.0", + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "license": "ISC" + }, + "node_modules/wsl-utils": { + "version": "0.1.0", + "license": "MIT", + "dependencies": { + "is-wsl": "^3.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/y18n": { + "version": "5.0.8", + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/yallist": { + "version": "4.0.0", + "license": "ISC" + }, + "node_modules/yargs": { + "version": "17.7.2", + "license": "MIT", + "dependencies": { + "cliui": "^8.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.3", + "y18n": "^5.0.5", + "yargs-parser": "^21.1.1" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/yargs-parser": { + "version": "21.1.1", + "license": "ISC", + "engines": { + "node": ">=12" + } + } + } +} diff --git a/lfs-client-sdk/js/package.json b/lfs-client-sdk/js/package.json new file mode 100644 index 00000000..1d5e18f2 --- /dev/null +++ b/lfs-client-sdk/js/package.json @@ -0,0 +1,20 @@ +{ + "name": "@kafscale/lfs-sdk", + "version": "0.1.0", + "description": "Client-side LFS helpers for Kafka.", + "type": "module", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "scripts": { + "build": "tsc -p tsconfig.json", + "test": "npm run build && node --test dist/__tests__/envelope.test.js" + }, + "dependencies": { + "@confluentinc/kafka-javascript": "^0.4.0", + "undici": "^6.21.0", + "@aws-sdk/client-s3": "^3.658.1" + }, + "devDependencies": { + "typescript": "^5.6.3" + } +} diff --git a/lfs-client-sdk/js/src/__tests__/envelope.test.ts b/lfs-client-sdk/js/src/__tests__/envelope.test.ts new file mode 100644 index 00000000..2d4d3962 --- /dev/null +++ b/lfs-client-sdk/js/src/__tests__/envelope.test.ts @@ -0,0 +1,8 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { isLfsEnvelope } from '../envelope.js'; + +test('isLfsEnvelope detects marker', () => { + assert.equal(isLfsEnvelope(new TextEncoder().encode('{"kfs_lfs":1}')), true); + assert.equal(isLfsEnvelope(new TextEncoder().encode('plain')), false); +}); diff --git a/lfs-client-sdk/js/src/envelope.ts b/lfs-client-sdk/js/src/envelope.ts new file mode 100644 index 00000000..c6767c94 --- /dev/null +++ b/lfs-client-sdk/js/src/envelope.ts @@ -0,0 +1,28 @@ +export interface LfsEnvelope { + kfs_lfs: number; + bucket: string; + key: string; + size: number; + sha256: string; + checksum?: string; + checksum_alg?: string; + content_type?: string; + original_headers?: Record; + created_at?: string; + proxy_id?: string; +} + +export function isLfsEnvelope(value: Uint8Array | null | undefined): boolean { + if (!value || value.length < 15) return false; + if (value[0] !== 123) return false; + const prefix = new TextDecoder().decode(value.slice(0, Math.min(50, value.length))); + return prefix.includes('"kfs_lfs"'); +} + +export function decodeEnvelope(value: Uint8Array): LfsEnvelope { + const env = JSON.parse(new TextDecoder().decode(value)) as LfsEnvelope; + if (!env.kfs_lfs || !env.bucket || !env.key || !env.sha256) { + throw new Error('invalid envelope: missing required fields'); + } + return env; +} diff --git a/lfs-client-sdk/js/src/index.ts b/lfs-client-sdk/js/src/index.ts new file mode 100644 index 00000000..bd371e0c --- /dev/null +++ b/lfs-client-sdk/js/src/index.ts @@ -0,0 +1,3 @@ +export * from './envelope.js'; +export * from './resolver.js'; +export * from './producer.js'; diff --git a/lfs-client-sdk/js/src/producer.ts b/lfs-client-sdk/js/src/producer.ts new file mode 100644 index 00000000..5243ba44 --- /dev/null +++ b/lfs-client-sdk/js/src/producer.ts @@ -0,0 +1,31 @@ +import { request } from 'undici'; +import { LfsEnvelope } from './envelope.js'; + +export interface ProduceOptions { + topic: string; + key?: Uint8Array; + headers?: Record; +} + +export async function produceLfs(endpoint: string, payload: Uint8Array, options: ProduceOptions): Promise { + const headers: Record = { + 'X-Kafka-Topic': options.topic, + }; + if (options.key) { + headers['X-Kafka-Key'] = Buffer.from(options.key).toString('utf8'); + } + if (options.headers) { + Object.assign(headers, options.headers); + } + + const res = await request(endpoint, { + method: 'POST', + headers, + body: payload, + }); + const body = await res.body.text(); + if (res.statusCode < 200 || res.statusCode >= 300) { + throw new Error(`produce failed: ${res.statusCode} ${body}`); + } + return JSON.parse(body) as LfsEnvelope; +} diff --git a/lfs-client-sdk/js/src/resolver.ts b/lfs-client-sdk/js/src/resolver.ts new file mode 100644 index 00000000..11f160cd --- /dev/null +++ b/lfs-client-sdk/js/src/resolver.ts @@ -0,0 +1,44 @@ +import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3'; +import { decodeEnvelope, isLfsEnvelope, LfsEnvelope } from './envelope.js'; + +export interface ResolvedRecord { + envelope?: LfsEnvelope; + payload: Uint8Array; + isEnvelope: boolean; +} + +export interface ResolverOptions { + validateChecksum?: boolean; + maxSize?: number; +} + +export class LfsResolver { + private readonly s3: S3Client; + private readonly bucket: string; + private readonly validateChecksum: boolean; + private readonly maxSize: number; + + constructor(s3: S3Client, bucket: string, options?: ResolverOptions) { + this.s3 = s3; + this.bucket = bucket; + this.validateChecksum = options?.validateChecksum ?? true; + this.maxSize = options?.maxSize ?? 0; + } + + async resolve(value: Uint8Array): Promise { + if (!isLfsEnvelope(value)) { + return { payload: value, isEnvelope: false }; + } + const env = decodeEnvelope(value); + const obj = await this.s3.send(new GetObjectCommand({ Bucket: this.bucket, Key: env.key })); + const body = await obj.Body?.transformToByteArray(); + const payload = body ?? new Uint8Array(); + if (this.maxSize > 0 && payload.length > this.maxSize) { + throw new Error('payload exceeds max size'); + } + if (this.validateChecksum) { + // checksum validation placeholder (sha256) + } + return { envelope: env, payload, isEnvelope: true }; + } +} diff --git a/lfs-client-sdk/js/tsconfig.json b/lfs-client-sdk/js/tsconfig.json new file mode 100644 index 00000000..ec2fd5f8 --- /dev/null +++ b/lfs-client-sdk/js/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ES2022", + "moduleResolution": "Bundler", + "outDir": "dist", + "declaration": true, + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true + }, + "include": ["src/**/*.ts"] +} diff --git a/lfs-client-sdk/python/README.md b/lfs-client-sdk/python/README.md new file mode 100644 index 00000000..22776818 --- /dev/null +++ b/lfs-client-sdk/python/README.md @@ -0,0 +1,20 @@ + + +# KafScale LFS SDK (Python) + +Client-side LFS helpers for Kafka. diff --git a/lfs-client-sdk/python/lfs_sdk/__init__.py b/lfs-client-sdk/python/lfs_sdk/__init__.py new file mode 100644 index 00000000..1a6cb313 --- /dev/null +++ b/lfs-client-sdk/python/lfs_sdk/__init__.py @@ -0,0 +1,14 @@ +from .envelope import LfsEnvelope, decode_envelope, is_lfs_envelope +from .resolver import LfsResolver, ResolvedRecord +from .producer import produce_lfs, LfsProducer, LfsHttpException + +__all__ = [ + "LfsEnvelope", + "decode_envelope", + "is_lfs_envelope", + "LfsResolver", + "ResolvedRecord", + "produce_lfs", + "LfsProducer", + "LfsHttpException", +] diff --git a/lfs-client-sdk/python/lfs_sdk/envelope.py b/lfs-client-sdk/python/lfs_sdk/envelope.py new file mode 100644 index 00000000..e6271007 --- /dev/null +++ b/lfs-client-sdk/python/lfs_sdk/envelope.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from dataclasses import dataclass +import json +from typing import Any, Dict + + +@dataclass +class LfsEnvelope: + kfs_lfs: int + bucket: str + key: str + size: int + sha256: str + checksum: str | None = None + checksum_alg: str | None = None + content_type: str | None = None + original_headers: Dict[str, str] | None = None + created_at: str | None = None + proxy_id: str | None = None + + +def is_lfs_envelope(value: bytes | None) -> bool: + if not value or len(value) < 15: + return False + if value[:1] != b"{": + return False + prefix = value[:50].decode("utf-8", errors="ignore") + return "\"kfs_lfs\"" in prefix + + +def decode_envelope(value: bytes) -> LfsEnvelope: + payload = json.loads(value.decode("utf-8")) + if not payload.get("kfs_lfs") or not payload.get("bucket") or not payload.get("key") or not payload.get("sha256"): + raise ValueError("invalid envelope: missing required fields") + return LfsEnvelope(**payload) diff --git a/lfs-client-sdk/python/lfs_sdk/producer.py b/lfs-client-sdk/python/lfs_sdk/producer.py new file mode 100644 index 00000000..782e1e43 --- /dev/null +++ b/lfs-client-sdk/python/lfs_sdk/producer.py @@ -0,0 +1,169 @@ +from __future__ import annotations + +import time +import uuid +from dataclasses import dataclass +from pathlib import Path +from typing import BinaryIO, Dict, Optional, Union + +import requests + + +MULTIPART_MIN_BYTES = 5 * 1024 * 1024 # 5MB +DEFAULT_CONNECT_TIMEOUT = 10.0 +DEFAULT_REQUEST_TIMEOUT = 300.0 +DEFAULT_RETRIES = 3 +RETRY_BASE_SLEEP_SECONDS = 0.2 + + +@dataclass +class LfsHttpException(Exception): + """Exception raised for LFS HTTP errors with structured error info.""" + status_code: int + code: str + message: str + request_id: str + body: str + + def __str__(self) -> str: + return f"LfsHttpException({self.status_code}, code={self.code}, message={self.message}, request_id={self.request_id})" + + +class LfsProducer: + """LFS producer with retry/backoff support for producing large blobs.""" + + def __init__( + self, + endpoint: str, + connect_timeout: float = DEFAULT_CONNECT_TIMEOUT, + request_timeout: float = DEFAULT_REQUEST_TIMEOUT, + retries: int = DEFAULT_RETRIES, + ) -> None: + self.endpoint = endpoint + self.connect_timeout = connect_timeout + self.request_timeout = request_timeout + self.retries = retries + self._session = requests.Session() + + def produce( + self, + topic: str, + payload: Union[bytes, BinaryIO, Path], + key: Optional[bytes] = None, + headers: Optional[Dict[str, str]] = None, + ) -> dict: + """ + Produce a blob to the LFS proxy. + + Args: + topic: Kafka topic name + payload: Data to send - bytes, file-like object, or Path to a file + key: Optional Kafka key + headers: Optional additional headers + + Returns: + LFS envelope dict from the proxy response + """ + # Convert payload to bytes for proper Content-Length and retry support + if isinstance(payload, Path): + data = payload.read_bytes() + elif hasattr(payload, "read"): + data = payload.read() + else: + data = payload + + out_headers = {"X-Kafka-Topic": topic} + if key is not None: + out_headers["X-Kafka-Key"] = key.decode("utf-8", errors="ignore") + if headers: + out_headers.update(headers) + if "X-Request-ID" not in out_headers: + out_headers["X-Request-ID"] = str(uuid.uuid4()) + + actual_size = len(data) + out_headers["X-LFS-Size"] = str(actual_size) + out_headers["X-LFS-Mode"] = "single" if actual_size < MULTIPART_MIN_BYTES else "multipart" + + return self._send_with_retry(data, out_headers) + + def _send_with_retry(self, data: bytes, headers: Dict[str, str]) -> dict: + last_error: Optional[Exception] = None + for attempt in range(1, self.retries + 1): + try: + resp = self._session.post( + self.endpoint, + data=data, + headers=headers, + timeout=(self.connect_timeout, self.request_timeout), + ) + if 200 <= resp.status_code < 300: + return resp.json() + + # Parse error response + body = resp.text + request_id = resp.headers.get("X-Request-ID", "") + code = "" + message = body + try: + err = resp.json() + code = err.get("code", "") + message = err.get("message", body) + request_id = err.get("request_id", request_id) + except Exception: + pass + + http_error = LfsHttpException( + status_code=resp.status_code, + code=code, + message=message, + request_id=request_id, + body=body, + ) + + # Retry on 5xx errors + if resp.status_code >= 500 and attempt < self.retries: + last_error = http_error + self._sleep_backoff(attempt) + continue + raise http_error + + except requests.exceptions.RequestException as ex: + last_error = ex + if attempt == self.retries: + break + self._sleep_backoff(attempt) + + if last_error: + raise last_error + raise RuntimeError("produce failed: no response") + + def _sleep_backoff(self, attempt: int) -> None: + sleep_time = RETRY_BASE_SLEEP_SECONDS * (2 ** (attempt - 1)) + time.sleep(sleep_time) + + def close(self) -> None: + """Close the underlying session.""" + self._session.close() + + def __enter__(self) -> "LfsProducer": + return self + + def __exit__(self, *args) -> None: + self.close() + + +def produce_lfs( + endpoint: str, + topic: str, + payload: Union[bytes, BinaryIO, Path], + key: Optional[bytes] = None, + headers: Optional[Dict[str, str]] = None, + timeout: float = DEFAULT_REQUEST_TIMEOUT, +) -> dict: + """ + Convenience function for one-shot LFS produce. + + For multiple produces, use LfsProducer for connection reuse. + """ + with LfsProducer(endpoint, request_timeout=timeout) as producer: + return producer.produce(topic, payload, key, headers) diff --git a/lfs-client-sdk/python/lfs_sdk/resolver.py b/lfs-client-sdk/python/lfs_sdk/resolver.py new file mode 100644 index 00000000..9ef91f10 --- /dev/null +++ b/lfs-client-sdk/python/lfs_sdk/resolver.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import hashlib +from dataclasses import dataclass +from typing import Optional + +import boto3 + +from .envelope import decode_envelope, is_lfs_envelope, LfsEnvelope + + +@dataclass +class ResolvedRecord: + envelope: Optional[LfsEnvelope] + payload: bytes + is_envelope: bool + + +class LfsResolver: + def __init__(self, bucket: str, s3_client=None, validate_checksum: bool = True, max_size: int = 0) -> None: + self.bucket = bucket + self.s3 = s3_client or boto3.client("s3") + self.validate_checksum = validate_checksum + self.max_size = max_size + + def resolve(self, value: bytes) -> ResolvedRecord: + if not is_lfs_envelope(value): + return ResolvedRecord(None, value, False) + + env = decode_envelope(value) + obj = self.s3.get_object(Bucket=self.bucket, Key=env.key) + payload = obj["Body"].read() + if self.max_size > 0 and len(payload) > self.max_size: + raise ValueError("payload exceeds max size") + if self.validate_checksum: + expected = env.checksum or env.sha256 + actual = hashlib.sha256(payload).hexdigest() + if actual != expected: + raise ValueError("checksum mismatch") + return ResolvedRecord(env, payload, True) diff --git a/lfs-client-sdk/python/pyproject.toml b/lfs-client-sdk/python/pyproject.toml new file mode 100644 index 00000000..bd80d637 --- /dev/null +++ b/lfs-client-sdk/python/pyproject.toml @@ -0,0 +1,36 @@ +# Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +requires = ["setuptools>=70", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "kafscale-lfs-sdk" +version = "0.1.0" +description = "Client-side LFS helpers for Kafka." +readme = "README.md" +requires-python = ">=3.10" +license = {text = "Apache-2.0"} +dependencies = [ + "confluent-kafka>=2.5.0", + "boto3>=1.34.0", + "requests>=2.32.0" +] + +[project.optional-dependencies] + +[tool.pytest.ini_options] +testpaths = ["tests"] diff --git a/lfs-client-sdk/python/tests/test_envelope.py b/lfs-client-sdk/python/tests/test_envelope.py new file mode 100644 index 00000000..c10c3e00 --- /dev/null +++ b/lfs-client-sdk/python/tests/test_envelope.py @@ -0,0 +1,6 @@ +from lfs_sdk.envelope import is_lfs_envelope + + +def test_is_lfs_envelope(): + assert is_lfs_envelope(b'{"kfs_lfs":1,"bucket":"b"}') + assert not is_lfs_envelope(b'plain') diff --git a/pkg/acl/acl_test.go b/pkg/acl/acl_test.go index 70693c0f..fe1ead4a 100644 --- a/pkg/acl/acl_test.go +++ b/pkg/acl/acl_test.go @@ -88,3 +88,165 @@ func TestAuthorizerWildcardName(t *testing.T) { t.Fatalf("expected prefix wildcard match") } } + +func TestAuthorizerEnabled(t *testing.T) { + auth := NewAuthorizer(Config{Enabled: true}) + if !auth.Enabled() { + t.Fatal("expected Enabled() = true") + } + auth2 := NewAuthorizer(Config{Enabled: false}) + if auth2.Enabled() { + t.Fatal("expected Enabled() = false") + } +} + +func TestAuthorizerNilReceiver(t *testing.T) { + var auth *Authorizer + if auth.Enabled() { + t.Fatal("nil Authorizer should not be enabled") + } + if !auth.Allows("user", ActionFetch, ResourceTopic, "orders") { + t.Fatal("nil Authorizer should allow all") + } +} + +func TestAuthorizerDisabledAllows(t *testing.T) { + auth := NewAuthorizer(Config{ + Enabled: false, + DefaultPolicy: "deny", + }) + if !auth.Allows("any", ActionAdmin, ResourceCluster, "test") { + t.Fatal("disabled authorizer should allow all") + } +} + +func TestAuthorizerAnonymousPrincipal(t *testing.T) { + auth := NewAuthorizer(Config{ + Enabled: true, + DefaultPolicy: "deny", + Principals: []PrincipalRules{ + { + Name: "anonymous", + Allow: []Rule{{Action: ActionFetch, Resource: ResourceTopic, Name: "public"}}, + }, + }, + }) + // Empty principal maps to "anonymous" + if !auth.Allows("", ActionFetch, ResourceTopic, "public") { + t.Fatal("empty principal should map to anonymous") + } + if !auth.Allows(" ", ActionFetch, ResourceTopic, "public") { + t.Fatal("whitespace principal should map to anonymous") + } +} + +func TestAuthorizerEmptyPrincipalInConfig(t *testing.T) { + auth := NewAuthorizer(Config{ + Enabled: true, + DefaultPolicy: "deny", + Principals: []PrincipalRules{ + {Name: "", Allow: []Rule{{Action: ActionAny, Resource: ResourceAny}}}, + }, + }) + // Empty name principal should be skipped + if auth.Allows("unknown", ActionFetch, ResourceTopic, "orders") { + t.Fatal("empty principal name should be skipped in config") + } +} + +func TestAuthorizerWildcardActionAndResource(t *testing.T) { + auth := NewAuthorizer(Config{ + Enabled: true, + DefaultPolicy: "deny", + Principals: []PrincipalRules{ + { + Name: "superuser", + Allow: []Rule{{Action: ActionAny, Resource: ResourceAny, Name: "*"}}, + }, + }, + }) + if !auth.Allows("superuser", ActionProduce, ResourceTopic, "anything") { + t.Fatal("wildcard action+resource+name should allow all") + } + if !auth.Allows("superuser", ActionAdmin, ResourceCluster, "cluster-1") { + t.Fatal("wildcard should allow admin on cluster") + } +} + +func TestAuthorizerCaseInsensitivePolicy(t *testing.T) { + auth := NewAuthorizer(Config{ + Enabled: true, + DefaultPolicy: "ALLOW", + }) + if !auth.Allows("unknown", ActionFetch, ResourceTopic, "orders") { + t.Fatal("ALLOW (uppercase) should work") + } +} + +func TestNameMatchesExact(t *testing.T) { + if !nameMatches("orders", "orders") { + t.Fatal("exact match should succeed") + } + if nameMatches("orders", "other") { + t.Fatal("different name should not match") + } +} + +func TestNameMatchesEmptyAndWildcard(t *testing.T) { + if !nameMatches("", "anything") { + t.Fatal("empty ruleName should match anything") + } + if !nameMatches("*", "anything") { + t.Fatal("* ruleName should match anything") + } + if !nameMatches(" ", "anything") { + t.Fatal("whitespace ruleName should match anything") + } +} + +func TestActionMatches(t *testing.T) { + if !actionMatches("", ActionFetch) { + t.Fatal("empty rule action should match any action") + } + if !actionMatches(ActionAny, ActionFetch) { + t.Fatal("* action should match any action") + } + if !actionMatches(ActionFetch, ActionFetch) { + t.Fatal("same action should match") + } + if actionMatches(ActionFetch, ActionProduce) { + t.Fatal("different actions should not match") + } +} + +func TestResourceMatches(t *testing.T) { + if !resourceMatches("", ResourceTopic) { + t.Fatal("empty rule resource should match any resource") + } + if !resourceMatches(ResourceAny, ResourceTopic) { + t.Fatal("* resource should match any resource") + } + if !resourceMatches(ResourceTopic, ResourceTopic) { + t.Fatal("same resource should match") + } + if resourceMatches(ResourceTopic, ResourceGroup) { + t.Fatal("different resources should not match") + } +} + +func TestAuthorizerFallbackToDefault(t *testing.T) { + auth := NewAuthorizer(Config{ + Enabled: true, + DefaultPolicy: "allow", + Principals: []PrincipalRules{ + { + Name: "client-a", + Allow: []Rule{{Action: ActionFetch, Resource: ResourceTopic, Name: "orders"}}, + }, + }, + }) + // Action that doesn't match any rule falls back to default + if !auth.Allows("client-a", ActionAdmin, ResourceCluster, "cluster") { + t.Fatal("unmatched action should fall back to default (allow)") + } +} diff --git a/pkg/broker/conn_context_test.go b/pkg/broker/conn_context_test.go new file mode 100644 index 00000000..6a83f63a --- /dev/null +++ b/pkg/broker/conn_context_test.go @@ -0,0 +1,69 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package broker + +import ( + "context" + "testing" +) + +func TestContextWithConnInfo(t *testing.T) { + ctx := context.Background() + info := &ConnContext{Principal: "user-1", RemoteAddr: "1.2.3.4:5678"} + + ctx2 := ContextWithConnInfo(ctx, info) + got := ConnInfoFromContext(ctx2) + if got == nil || got.Principal != "user-1" || got.RemoteAddr != "1.2.3.4:5678" { + t.Fatalf("expected conn info, got %+v", got) + } +} + +func TestContextWithConnInfoNil(t *testing.T) { + ctx := context.Background() + ctx2 := ContextWithConnInfo(ctx, nil) + got := ConnInfoFromContext(ctx2) + if got != nil { + t.Fatalf("expected nil for nil input, got %+v", got) + } +} + +func TestConnInfoFromContextNilContext(t *testing.T) { + got := ConnInfoFromContext(nil) + if got != nil { + t.Fatalf("expected nil for nil context, got %+v", got) + } +} + +func TestConnInfoFromContextMissing(t *testing.T) { + got := ConnInfoFromContext(context.Background()) + if got != nil { + t.Fatalf("expected nil for empty context, got %+v", got) + } +} + +func TestConnContextProxyAddr(t *testing.T) { + ctx := context.Background() + info := &ConnContext{ + Principal: "admin", + RemoteAddr: "10.0.0.1:1234", + ProxyAddr: "10.0.0.100:443", + } + ctx2 := ContextWithConnInfo(ctx, info) + got := ConnInfoFromContext(ctx2) + if got.ProxyAddr != "10.0.0.100:443" { + t.Fatalf("expected proxy addr, got %q", got.ProxyAddr) + } +} diff --git a/pkg/broker/proxyproto_test.go b/pkg/broker/proxyproto_test.go index 96f8ec4f..c5a355f4 100644 --- a/pkg/broker/proxyproto_test.go +++ b/pkg/broker/proxyproto_test.go @@ -16,7 +16,9 @@ package broker import ( + "bufio" "bytes" + "encoding/binary" "io" "net" "testing" @@ -24,8 +26,8 @@ import ( func TestProxyProtocolV1Unknown(t *testing.T) { conn, peer := net.Pipe() - defer conn.Close() - defer peer.Close() + defer func() { _ = conn.Close() }() + defer func() { _ = peer.Close() }() payload := []byte("PROXY UNKNOWN\r\nping") go func() { @@ -50,8 +52,8 @@ func TestProxyProtocolV1Unknown(t *testing.T) { func TestProxyProtocolV2Local(t *testing.T) { conn, peer := net.Pipe() - defer conn.Close() - defer peer.Close() + defer func() { _ = conn.Close() }() + defer func() { _ = peer.Close() }() header := append([]byte{}, proxyV2Signature...) header = append(header, 0x20) // v2 + LOCAL @@ -78,3 +80,202 @@ func TestProxyProtocolV2Local(t *testing.T) { t.Fatalf("unexpected payload %q", string(buf)) } } + +func TestProxyProtocolV1Full(t *testing.T) { + conn, peer := net.Pipe() + defer func() { _ = conn.Close() }() + defer func() { _ = peer.Close() }() + + header := "PROXY TCP4 192.168.1.1 192.168.1.2 12345 80\r\ndata" + go func() { + _, _ = peer.Write([]byte(header)) + }() + + wrapped, info, err := ReadProxyProtocol(conn) + if err != nil { + t.Fatalf("ReadProxyProtocol: %v", err) + } + if info == nil || info.Local { + t.Fatalf("expected non-local proxy info, got %+v", info) + } + if info.SourceIP != "192.168.1.1" || info.DestIP != "192.168.1.2" { + t.Fatalf("unexpected IPs: %+v", info) + } + if info.SourcePort != 12345 || info.DestPort != 80 { + t.Fatalf("unexpected ports: %+v", info) + } + buf := make([]byte, 4) + if _, err := io.ReadFull(wrapped, buf); err != nil { + t.Fatalf("read payload: %v", err) + } + if string(buf) != "data" { + t.Fatalf("unexpected trailing data: %q", buf) + } +} + +func TestProxyProtocolV2IPv4(t *testing.T) { + conn, peer := net.Pipe() + defer func() { _ = conn.Close() }() + defer func() { _ = peer.Close() }() + + header := make([]byte, 16) + copy(header[:12], proxyV2Signature) + header[12] = 0x21 // v2 + PROXY + header[13] = 0x11 // AF_INET + STREAM + // IPv4 addresses: src=10.0.0.1, dst=10.0.0.2, src_port=1234, dst_port=5678 + payload := make([]byte, 12) + copy(payload[0:4], net.ParseIP("10.0.0.1").To4()) + copy(payload[4:8], net.ParseIP("10.0.0.2").To4()) + binary.BigEndian.PutUint16(payload[8:10], 1234) + binary.BigEndian.PutUint16(payload[10:12], 5678) + binary.BigEndian.PutUint16(header[14:16], uint16(len(payload))) + fullHeader := append(header, payload...) + fullHeader = append(fullHeader, []byte("rest")...) + + go func() { + _, _ = peer.Write(fullHeader) + }() + + wrapped, info, err := ReadProxyProtocol(conn) + if err != nil { + t.Fatalf("ReadProxyProtocol: %v", err) + } + if info == nil || info.Local { + t.Fatalf("expected non-local proxy info, got %+v", info) + } + if info.SourceIP != "10.0.0.1" || info.DestIP != "10.0.0.2" { + t.Fatalf("unexpected IPs: src=%s dst=%s", info.SourceIP, info.DestIP) + } + if info.SourcePort != 1234 || info.DestPort != 5678 { + t.Fatalf("unexpected ports: src=%d dst=%d", info.SourcePort, info.DestPort) + } + buf := make([]byte, 4) + if _, err := io.ReadFull(wrapped, buf); err != nil { + t.Fatalf("read trailing: %v", err) + } + if string(buf) != "rest" { + t.Fatalf("unexpected trailing: %q", buf) + } +} + +func TestProxyProtocolV2IPv6(t *testing.T) { + conn, peer := net.Pipe() + defer func() { _ = conn.Close() }() + defer func() { _ = peer.Close() }() + + header := make([]byte, 16) + copy(header[:12], proxyV2Signature) + header[12] = 0x21 // v2 + PROXY + header[13] = 0x22 // routes to parseProxyV2Inet6 (low nibble 0x2) + // IPv6 addresses: src=::1, dst=::2, src_port=8080, dst_port=9090 + payload := make([]byte, 36) + srcIP := net.ParseIP("::1") + dstIP := net.ParseIP("::2") + copy(payload[0:16], srcIP.To16()) + copy(payload[16:32], dstIP.To16()) + binary.BigEndian.PutUint16(payload[32:34], 8080) + binary.BigEndian.PutUint16(payload[34:36], 9090) + binary.BigEndian.PutUint16(header[14:16], uint16(len(payload))) + fullData := append(header, payload...) + + go func() { + _, _ = peer.Write(fullData) + }() + + _, info, err := ReadProxyProtocol(conn) + if err != nil { + t.Fatalf("ReadProxyProtocol: %v", err) + } + if info == nil || info.Local { + t.Fatalf("expected non-local proxy info, got %+v", info) + } + if info.SourcePort != 8080 || info.DestPort != 9090 { + t.Fatalf("unexpected ports: src=%d dst=%d", info.SourcePort, info.DestPort) + } +} + +func TestProxyProtocolNone(t *testing.T) { + conn, peer := net.Pipe() + defer func() { _ = conn.Close() }() + defer func() { _ = peer.Close() }() + + go func() { + _, _ = peer.Write([]byte("not a proxy header")) + }() + + wrapped, info, err := ReadProxyProtocol(conn) + if err != nil { + t.Fatalf("ReadProxyProtocol: %v", err) + } + if info != nil { + t.Fatalf("expected nil info for non-proxy data, got %+v", info) + } + buf := make([]byte, 3) + if _, err := io.ReadFull(wrapped, buf); err != nil { + t.Fatalf("read data: %v", err) + } + if string(buf) != "not" { + t.Fatalf("unexpected data: %q", buf) + } +} + +func TestAtoiOrZero(t *testing.T) { + tests := []struct { + input string + want int + }{ + {"0", 0}, + {"123", 123}, + {"80", 80}, + {"", 0}, + {"abc", 0}, + {"12x3", 0}, + } + for _, tc := range tests { + if got := atoiOrZero(tc.input); got != tc.want { + t.Errorf("atoiOrZero(%q) = %d, want %d", tc.input, got, tc.want) + } + } +} + +func TestWrapConnWithReaderNil(t *testing.T) { + conn, _ := net.Pipe() + defer func() { _ = conn.Close() }() + wrapped := wrapConnWithReader(conn, nil) + if wrapped != conn { + t.Fatal("expected original conn when reader is nil") + } +} + +func TestParseProxyV2InetTooShort(t *testing.T) { + _, err := parseProxyV2Inet([]byte{1, 2, 3}) + if err == nil { + t.Fatal("expected error for short payload") + } +} + +func TestParseProxyV2Inet6TooShort(t *testing.T) { + _, err := parseProxyV2Inet6(make([]byte, 10)) + if err == nil { + t.Fatal("expected error for short payload") + } +} + +func TestReadProxyV1LineTooLong(t *testing.T) { + data := bytes.Repeat([]byte("x"), 300) + br := bufio.NewReader(bytes.NewReader(data)) + _, err := readProxyV1Line(br, 256) + if err == nil { + t.Fatal("expected error for line too long") + } +} + +func TestParseProxyV1Malformed(t *testing.T) { + // Too few fields + data := "PROXY TCP4 1.2.3.4\r\n" + br := bufio.NewReader(bytes.NewBufferString(data)) + _, err := parseProxyV1(br) + if err == nil { + t.Fatal("expected error for malformed v1 header") + } +} diff --git a/pkg/broker/s3_health_test.go b/pkg/broker/s3_health_test.go index 60d14169..f46b83f8 100644 --- a/pkg/broker/s3_health_test.go +++ b/pkg/broker/s3_health_test.go @@ -57,3 +57,60 @@ func TestS3HealthStateTransitions(t *testing.T) { t.Fatalf("expected healthy after recovery got %s", got) } } + +func TestS3HealthSnapshot(t *testing.T) { + monitor := NewS3HealthMonitor(S3HealthConfig{ + Window: time.Minute, + LatencyWarn: 100 * time.Millisecond, + LatencyCrit: time.Second, + ErrorWarn: 0.3, + ErrorCrit: 0.7, + MaxSamples: 64, + }) + + monitor.RecordOperation("upload", 10*time.Millisecond, nil) + snap := monitor.Snapshot() + if snap.State != S3StateHealthy { + t.Fatalf("expected healthy state, got %s", snap.State) + } + if snap.Since.IsZero() { + t.Fatal("expected non-zero Since") + } + if snap.AvgLatency == 0 { + t.Fatal("expected non-zero avg latency") + } + if snap.ErrorRate != 0 { + t.Fatalf("expected 0 error rate, got %f", snap.ErrorRate) + } +} + +func TestS3HealthMonitorDefaults(t *testing.T) { + // All zero config → should use defaults + monitor := NewS3HealthMonitor(S3HealthConfig{}) + if monitor.State() != S3StateHealthy { + t.Fatalf("expected healthy initial state") + } + // Record a few operations to ensure it works with defaults + monitor.RecordOperation("upload", time.Millisecond, nil) + snap := monitor.Snapshot() + if snap.State != S3StateHealthy { + t.Fatalf("expected healthy after normal ops") + } +} + +func TestS3HealthTruncation(t *testing.T) { + monitor := NewS3HealthMonitor(S3HealthConfig{ + Window: 100 * time.Millisecond, + MaxSamples: 4, + }) + + // Record several operations + for i := 0; i < 10; i++ { + monitor.RecordOperation("upload", time.Millisecond, nil) + } + // After truncation, max samples should be honored + snap := monitor.Snapshot() + if snap.State != S3StateHealthy { + t.Fatalf("expected healthy, got %s", snap.State) + } +} diff --git a/pkg/broker/server.go b/pkg/broker/server.go index 2efdfa4e..b8724a76 100644 --- a/pkg/broker/server.go +++ b/pkg/broker/server.go @@ -70,8 +70,8 @@ func (s *Server) ListenAndServe(ctx context.Context) error { return nil default: } - if ne, ok := err.(net.Error); ok && ne.Temporary() { - log.Printf("accept temporary error: %v", err) + if ne, ok := err.(net.Error); ok && !ne.Timeout() { + log.Printf("accept error: %v", err) continue } return err @@ -100,7 +100,7 @@ func (s *Server) ListenAddress() string { func (s *Server) handleConnection(conn net.Conn) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - defer conn.Close() + defer func() { _ = conn.Close() }() if s.ConnContextFunc != nil { wrapped, info, err := s.ConnContextFunc(conn) if err != nil { @@ -130,8 +130,13 @@ func (s *Server) handleConnection(conn net.Conn) { } respPayload, err := s.Handler.Handle(ctx, header, req) if err != nil { - log.Printf("handle request: %v", err) - return + log.Printf("handle request api=%d v=%d: %v", header.APIKey, header.APIVersion, err) + // Send an UNKNOWN_SERVER_ERROR response instead of dropping the + // connection so the client can recover gracefully. + if errResp := buildErrorResponse(header); errResp != nil { + _ = protocol.WriteFrame(conn, errResp) + } + continue } if respPayload == nil { continue @@ -142,3 +147,15 @@ func (s *Server) handleConnection(conn net.Conn) { } } } + +// buildErrorResponse creates a minimal Kafka error response for the given +// request header so the client receives a proper error instead of a closed +// connection. Returns nil if no suitable response can be constructed. +func buildErrorResponse(header *protocol.RequestHeader) []byte { + resp := kmsg.ResponseForKey(header.APIKey) + if resp == nil { + return nil + } + resp.SetVersion(header.APIVersion) + return protocol.EncodeResponse(header.CorrelationID, header.APIVersion, resp) +} diff --git a/pkg/broker/server_test.go b/pkg/broker/server_test.go index 3e985f9f..f95f6406 100644 --- a/pkg/broker/server_test.go +++ b/pkg/broker/server_test.go @@ -99,7 +99,7 @@ func writeNullableString(buf *bytes.Buffer, s *string) { func TestServerHandleConnection_ApiVersions(t *testing.T) { serverConn, clientConn := net.Pipe() - defer clientConn.Close() + defer func() { _ = clientConn.Close() }() s := &Server{Handler: &testHandler{}} @@ -128,7 +128,7 @@ func TestServerHandleConnection_ApiVersions(t *testing.T) { t.Fatalf("expected correlation id 42 got %d", corr) } - clientConn.Close() + _ = clientConn.Close() select { case <-done: case <-time.After(time.Second): @@ -138,7 +138,7 @@ func TestServerHandleConnection_ApiVersions(t *testing.T) { func TestServerHandleConnection_Metadata(t *testing.T) { serverConn, clientConn := net.Pipe() - defer clientConn.Close() + defer func() { _ = clientConn.Close() }() s := &Server{Handler: &testHandler{}} @@ -166,7 +166,7 @@ func TestServerHandleConnection_Metadata(t *testing.T) { t.Fatalf("expected correlation id 5 got %d", corr) } - clientConn.Close() + _ = clientConn.Close() select { case <-done: case <-time.After(time.Second): @@ -204,3 +204,193 @@ func TestServerListenAndServe_Shutdown(t *testing.T) { t.Fatalf("server did not exit after cancel") } } + +func TestServerListenAndServeNoHandler(t *testing.T) { + s := &Server{Addr: "127.0.0.1:0"} + err := s.ListenAndServe(context.Background()) + if err == nil || err.Error() != "broker.Server requires a Handler" { + t.Fatalf("expected handler required error, got: %v", err) + } +} + +func TestServerWait(t *testing.T) { + s := &Server{Handler: &testHandler{}} + // No goroutines → Wait returns immediately + done := make(chan struct{}) + go func() { + s.Wait() + close(done) + }() + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("Wait should return immediately with no connections") + } +} + +func TestServerListenAddress(t *testing.T) { + s := &Server{Addr: "127.0.0.1:9999", Handler: &testHandler{}} + // Before listening, returns configured addr + if got := s.ListenAddress(); got != "127.0.0.1:9999" { + t.Fatalf("expected configured addr, got %q", got) + } +} + +func TestServerHandleConnection_ConnContext(t *testing.T) { + serverConn, clientConn := net.Pipe() + defer func() { _ = clientConn.Close() }() + + s := &Server{ + Handler: &testHandler{}, + ConnContextFunc: func(conn net.Conn) (net.Conn, *ConnContext, error) { + return conn, &ConnContext{Principal: "test-user", RemoteAddr: "1.2.3.4:5678"}, nil + }, + } + + done := make(chan struct{}) + go func() { + defer close(done) + s.handleConnection(serverConn) + }() + + if err := protocol.WriteFrame(clientConn, buildApiVersionsRequest()); err != nil { + t.Fatalf("WriteFrame: %v", err) + } + + resp, err := protocol.ReadFrame(clientConn) + if err != nil { + t.Fatalf("ReadFrame: %v", err) + } + + reader := bytes.NewReader(resp.Payload) + var corr int32 + if err := binary.Read(reader, binary.BigEndian, &corr); err != nil { + t.Fatalf("read correlation id: %v", err) + } + if corr != 42 { + t.Fatalf("expected correlation 42, got %d", corr) + } + + _ = clientConn.Close() + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("handleConnection did not exit") + } +} + +func TestServerHandleConnection_ConnContextError(t *testing.T) { + serverConn, clientConn := net.Pipe() + defer func() { _ = clientConn.Close() }() + + s := &Server{ + Handler: &testHandler{}, + ConnContextFunc: func(conn net.Conn) (net.Conn, *ConnContext, error) { + return nil, nil, errors.New("auth failed") + }, + } + + done := make(chan struct{}) + go func() { + defer close(done) + s.handleConnection(serverConn) + }() + + select { + case <-done: + // Connection should close immediately due to error + case <-time.After(time.Second): + t.Fatal("handleConnection should exit immediately on ConnContext error") + } +} + +func TestServerHandleConnection_BadFrame(t *testing.T) { + serverConn, clientConn := net.Pipe() + defer func() { _ = clientConn.Close() }() + + s := &Server{Handler: &testHandler{}} + + done := make(chan struct{}) + go func() { + defer close(done) + s.handleConnection(serverConn) + }() + + // Write invalid data (too short for a frame header) + _, _ = clientConn.Write([]byte{0, 0, 0, 2, 0xff, 0xff}) + _ = clientConn.Close() + + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("handleConnection should exit on bad parse") + } +} + +type errorHandler struct{} + +func (h *errorHandler) Handle(ctx context.Context, header *protocol.RequestHeader, req kmsg.Request) ([]byte, error) { + return nil, errors.New("handler error") +} + +func TestServerHandleConnection_HandlerError(t *testing.T) { + serverConn, clientConn := net.Pipe() + defer func() { _ = clientConn.Close() }() + + s := &Server{Handler: &errorHandler{}} + + done := make(chan struct{}) + go func() { + defer close(done) + s.handleConnection(serverConn) + }() + + if err := protocol.WriteFrame(clientConn, buildApiVersionsRequest()); err != nil { + t.Fatalf("WriteFrame: %v", err) + } + + // Handler error should send an error response instead of closing the + // connection so the client can recover gracefully. + frame, err := protocol.ReadFrame(clientConn) + if err != nil { + t.Fatalf("expected error response frame, got read error: %v", err) + } + if len(frame.Payload) == 0 { + t.Fatal("expected non-empty error response payload") + } +} + +type nilHandler struct{} + +func (h *nilHandler) Handle(ctx context.Context, header *protocol.RequestHeader, req kmsg.Request) ([]byte, error) { + return nil, nil +} + +func TestServerHandleConnection_NilResponse(t *testing.T) { + serverConn, clientConn := net.Pipe() + defer func() { _ = clientConn.Close() }() + + s := &Server{Handler: &nilHandler{}} + + done := make(chan struct{}) + go func() { + defer close(done) + s.handleConnection(serverConn) + }() + + if err := protocol.WriteFrame(clientConn, buildApiVersionsRequest()); err != nil { + t.Fatalf("WriteFrame: %v", err) + } + + // Handler returns nil → server continues loop, send another then close + if err := protocol.WriteFrame(clientConn, buildApiVersionsRequest()); err != nil { + t.Fatalf("WriteFrame 2: %v", err) + } + _ = clientConn.Close() + + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("handleConnection should exit after client close") + } +} diff --git a/pkg/cache/segment_cache_test.go b/pkg/cache/segment_cache_test.go index 6f47e319..5e28ccd4 100644 --- a/pkg/cache/segment_cache_test.go +++ b/pkg/cache/segment_cache_test.go @@ -15,7 +15,10 @@ package cache -import "testing" +import ( + "sync" + "testing" +) func TestSegmentCacheEviction(t *testing.T) { cache := NewSegmentCache(10) @@ -36,3 +39,119 @@ func TestSegmentCacheEviction(t *testing.T) { t.Fatalf("new entry missing") } } + +func TestNewSegmentCacheZeroCapacity(t *testing.T) { + c := NewSegmentCache(0) + if c.capacity != 1 { + t.Fatalf("expected capacity 1 for zero input, got %d", c.capacity) + } + c2 := NewSegmentCache(-5) + if c2.capacity != 1 { + t.Fatalf("expected capacity 1 for negative input, got %d", c2.capacity) + } +} + +func TestGetSegmentCacheMiss(t *testing.T) { + c := NewSegmentCache(100) + data, ok := c.GetSegment("missing", 0, 0) + if ok { + t.Fatal("expected cache miss") + } + if data != nil { + t.Fatalf("expected nil data on miss, got %v", data) + } +} + +func TestSetSegmentUpdateExisting(t *testing.T) { + c := NewSegmentCache(100) + c.SetSegment("topic", 0, 0, []byte("old")) + c.SetSegment("topic", 0, 0, []byte("new-value")) + + data, ok := c.GetSegment("topic", 0, 0) + if !ok { + t.Fatal("expected cache hit after update") + } + if string(data) != "new-value" { + t.Fatalf("expected 'new-value', got '%s'", data) + } + if c.ll.Len() != 1 { + t.Fatalf("expected 1 entry after update, got %d", c.ll.Len()) + } +} + +func TestSetSegmentLargerThanCapacity(t *testing.T) { + c := NewSegmentCache(5) + c.SetSegment("topic", 0, 0, []byte("ab")) // 2 bytes, fits + c.SetSegment("topic", 0, 1, []byte("cde")) // 3 bytes, total=5 at capacity + + // This add exceeds capacity: entry 0 gets evicted to make room + c.SetSegment("topic", 0, 2, []byte("fghij")) // 5 bytes + if _, ok := c.GetSegment("topic", 0, 0); ok { + t.Fatal("expected entry 0 to be evicted") + } + if _, ok := c.GetSegment("topic", 0, 2); !ok { + t.Fatal("expected new entry to be present") + } +} + +func TestLRUOrdering(t *testing.T) { + c := NewSegmentCache(15) // room for exactly 3 x 5-byte entries + + c.SetSegment("t", 0, 0, []byte("aaaaa")) // 5 bytes + c.SetSegment("t", 0, 1, []byte("bbbbb")) // 5 bytes + c.SetSegment("t", 0, 2, []byte("ccccc")) // 5 bytes = 15 total, exactly at capacity + + // Access entry 0 to make it recently used + c.GetSegment("t", 0, 0) + + // Adding a new entry should evict entry 1 (least recently used), not entry 0 + c.SetSegment("t", 0, 3, []byte("ddddd")) + + if _, ok := c.GetSegment("t", 0, 1); ok { + t.Fatal("expected entry 1 to be evicted (LRU)") + } + if _, ok := c.GetSegment("t", 0, 0); !ok { + t.Fatal("expected entry 0 to still be present (was accessed recently)") + } + if _, ok := c.GetSegment("t", 0, 3); !ok { + t.Fatal("expected new entry 3 to be present") + } +} + +func TestMultipleTopicsAndPartitions(t *testing.T) { + c := NewSegmentCache(1000) + c.SetSegment("orders", 0, 0, []byte("a")) + c.SetSegment("orders", 1, 0, []byte("b")) + c.SetSegment("events", 0, 0, []byte("c")) + + if d, ok := c.GetSegment("orders", 0, 0); !ok || string(d) != "a" { + t.Fatal("orders:0:0 mismatch") + } + if d, ok := c.GetSegment("orders", 1, 0); !ok || string(d) != "b" { + t.Fatal("orders:1:0 mismatch") + } + if d, ok := c.GetSegment("events", 0, 0); !ok || string(d) != "c" { + t.Fatal("events:0:0 mismatch") + } +} + +func TestConcurrentAccess(t *testing.T) { + c := NewSegmentCache(10000) + var wg sync.WaitGroup + for i := 0; i < 50; i++ { + wg.Add(1) + go func(n int32) { + defer wg.Done() + c.SetSegment("topic", n, 0, []byte("data")) + c.GetSegment("topic", n, 0) + }(int32(i)) + } + wg.Wait() +} + +func TestMakeKey(t *testing.T) { + key := makeKey("orders", 3, 100) + if key != "orders:3:100" { + t.Fatalf("unexpected key format: %s", key) + } +} diff --git a/pkg/gen/control/broker.pb.go b/pkg/gen/control/broker.pb.go index 123dbc94..0876f5d8 100644 --- a/pkg/gen/control/broker.pb.go +++ b/pkg/gen/control/broker.pb.go @@ -1,3 +1,18 @@ +// Copyright 2025 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.36.11 @@ -767,8 +782,8 @@ const file_control_broker_proto_rawDesc = "" + "\fTriggerFlush\x12%.kafscale.control.TriggerFlushRequest\x1a&.kafscale.control.TriggerFlushResponse\x12I\n" + "\rStreamMetrics\x12\x1f.kafscale.control.MetricsSample\x1a\x15.kafscale.control.Ack(\x012~\n" + "\x10AssignmentStream\x12j\n" + - "\x10WatchAssignments\x12(.kafscale.control.AssignmentWatchRequest\x1a*.kafscale.control.PartitionAssignmentEvent0\x01B\xad\x01\n" + - "\x14com.kafscale.controlB\vBrokerProtoP\x01Z'github.com/alo/kafscale/pkg/gen/control\xa2\x02\x03KCX\xaa\x02\x10Kafscale.Control\xca\x02\x10Kafscale\\Control\xe2\x02\x1cKafscale\\Control\\GPBMetadata\xea\x02\x11Kafscale::Controlb\x06proto3" + "\x10WatchAssignments\x12(.kafscale.control.AssignmentWatchRequest\x1a*.kafscale.control.PartitionAssignmentEvent0\x01B\xb2\x01\n" + + "\x14com.kafscale.controlB\vBrokerProtoP\x01Z,github.com/KafScale/platform/pkg/gen/control\xa2\x02\x03KCX\xaa\x02\x10Kafscale.Control\xca\x02\x10Kafscale\\Control\xe2\x02\x1cKafscale\\Control\\GPBMetadata\xea\x02\x11Kafscale::Controlb\x06proto3" var ( file_control_broker_proto_rawDescOnce sync.Once diff --git a/pkg/gen/control/broker_grpc.pb.go b/pkg/gen/control/broker_grpc.pb.go index b37507f6..dc1aafbc 100644 --- a/pkg/gen/control/broker_grpc.pb.go +++ b/pkg/gen/control/broker_grpc.pb.go @@ -1,6 +1,21 @@ +// Copyright 2025 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: -// - protoc-gen-go-grpc v1.6.0 +// - protoc-gen-go-grpc v1.6.1 // - protoc (unknown) // source: control/broker.proto diff --git a/pkg/gen/metadata/metadata.pb.go b/pkg/gen/metadata/metadata.pb.go index cf51269a..4cb134ff 100644 --- a/pkg/gen/metadata/metadata.pb.go +++ b/pkg/gen/metadata/metadata.pb.go @@ -1,7 +1,22 @@ +// Copyright 2025 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.36.1 -// protoc v5.29.3 +// protoc-gen-go v1.36.11 +// protoc (unknown) // source: metadata/metadata.proto package metadata @@ -11,6 +26,7 @@ import ( protoimpl "google.golang.org/protobuf/runtime/protoimpl" reflect "reflect" sync "sync" + unsafe "unsafe" ) const ( @@ -746,153 +762,96 @@ func (x *PartitionAssignment) GetAssignedAt() string { var File_metadata_metadata_proto protoreflect.FileDescriptor -var file_metadata_metadata_proto_rawDesc = []byte{ - 0x0a, 0x17, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2f, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x11, 0x6b, 0x61, 0x66, 0x73, 0x63, - 0x61, 0x6c, 0x65, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0xff, 0x02, 0x0a, - 0x0b, 0x54, 0x6f, 0x70, 0x69, 0x63, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x12, 0x0a, 0x04, - 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, - 0x12, 0x1e, 0x0a, 0x0a, 0x70, 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x70, 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x12, 0x2d, 0x0a, 0x12, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, - 0x66, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x11, 0x72, 0x65, - 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, - 0x21, 0x0a, 0x0c, 0x72, 0x65, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6d, 0x73, 0x18, - 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0b, 0x72, 0x65, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, - 0x4d, 0x73, 0x12, 0x27, 0x0a, 0x0f, 0x72, 0x65, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x5f, - 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0e, 0x72, 0x65, 0x74, - 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x23, 0x0a, 0x0d, 0x73, - 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x06, 0x20, 0x01, - 0x28, 0x03, 0x52, 0x0c, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x42, 0x79, 0x74, 0x65, 0x73, - 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, 0x07, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74, 0x12, - 0x42, 0x0a, 0x06, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x2a, 0x2e, 0x6b, 0x61, 0x66, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x54, 0x6f, 0x70, 0x69, 0x63, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, - 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x63, 0x6f, 0x6e, - 0x66, 0x69, 0x67, 0x1a, 0x39, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x45, 0x6e, 0x74, - 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x6c, - 0x0a, 0x0b, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x1f, 0x0a, - 0x0b, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x03, 0x52, 0x0a, 0x62, 0x61, 0x73, 0x65, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x1d, - 0x0a, 0x0a, 0x73, 0x69, 0x7a, 0x65, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x03, 0x52, 0x09, 0x73, 0x69, 0x7a, 0x65, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x1d, 0x0a, - 0x0a, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x09, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74, 0x22, 0xe6, 0x02, 0x0a, - 0x0e, 0x50, 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, - 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x70, 0x69, 0x63, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, - 0x74, 0x6f, 0x70, 0x69, 0x63, 0x12, 0x1c, 0x0a, 0x09, 0x70, 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, - 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x70, 0x61, 0x72, 0x74, 0x69, 0x74, - 0x69, 0x6f, 0x6e, 0x12, 0x23, 0x0a, 0x0d, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x5f, 0x62, 0x72, - 0x6f, 0x6b, 0x65, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x6c, 0x65, 0x61, 0x64, - 0x65, 0x72, 0x42, 0x72, 0x6f, 0x6b, 0x65, 0x72, 0x12, 0x21, 0x0a, 0x0c, 0x6c, 0x65, 0x61, 0x64, - 0x65, 0x72, 0x5f, 0x65, 0x70, 0x6f, 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, - 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x45, 0x70, 0x6f, 0x63, 0x68, 0x12, 0x28, 0x0a, 0x10, 0x6c, - 0x6f, 0x67, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, - 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0e, 0x6c, 0x6f, 0x67, 0x53, 0x74, 0x61, 0x72, 0x74, 0x4f, - 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x24, 0x0a, 0x0e, 0x6c, 0x6f, 0x67, 0x5f, 0x65, 0x6e, 0x64, - 0x5f, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, 0x6c, - 0x6f, 0x67, 0x45, 0x6e, 0x64, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x25, 0x0a, 0x0e, 0x68, - 0x69, 0x67, 0x68, 0x5f, 0x77, 0x61, 0x74, 0x65, 0x72, 0x6d, 0x61, 0x72, 0x6b, 0x18, 0x07, 0x20, - 0x01, 0x28, 0x03, 0x52, 0x0d, 0x68, 0x69, 0x67, 0x68, 0x57, 0x61, 0x74, 0x65, 0x72, 0x6d, 0x61, - 0x72, 0x6b, 0x12, 0x25, 0x0a, 0x0e, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x73, 0x65, 0x67, - 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x61, 0x63, 0x74, 0x69, - 0x76, 0x65, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x3a, 0x0a, 0x08, 0x73, 0x65, 0x67, - 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x09, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x6b, 0x61, - 0x66, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, - 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x08, 0x73, 0x65, 0x67, - 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x22, 0x42, 0x0a, 0x0a, 0x41, 0x73, 0x73, 0x69, 0x67, 0x6e, 0x6d, - 0x65, 0x6e, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x70, 0x69, 0x63, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x05, 0x74, 0x6f, 0x70, 0x69, 0x63, 0x12, 0x1e, 0x0a, 0x0a, 0x70, 0x61, 0x72, - 0x74, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, 0x52, 0x0a, 0x70, - 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x83, 0x02, 0x0a, 0x0b, 0x47, 0x72, - 0x6f, 0x75, 0x70, 0x4d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x1b, 0x0a, 0x09, 0x63, 0x6c, 0x69, - 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x63, 0x6c, - 0x69, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, - 0x5f, 0x68, 0x6f, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x63, 0x6c, 0x69, - 0x65, 0x6e, 0x74, 0x48, 0x6f, 0x73, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x68, 0x65, 0x61, 0x72, 0x74, - 0x62, 0x65, 0x61, 0x74, 0x5f, 0x61, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x68, - 0x65, 0x61, 0x72, 0x74, 0x62, 0x65, 0x61, 0x74, 0x41, 0x74, 0x12, 0x3f, 0x0a, 0x0b, 0x61, 0x73, - 0x73, 0x69, 0x67, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x1d, 0x2e, 0x6b, 0x61, 0x66, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x41, 0x73, 0x73, 0x69, 0x67, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x0b, - 0x61, 0x73, 0x73, 0x69, 0x67, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x24, 0x0a, 0x0d, 0x73, - 0x75, 0x62, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, - 0x28, 0x09, 0x52, 0x0d, 0x73, 0x75, 0x62, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x12, 0x2c, 0x0a, 0x12, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x69, 0x6d, - 0x65, 0x6f, 0x75, 0x74, 0x5f, 0x6d, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x10, 0x73, - 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x4d, 0x73, 0x22, - 0x95, 0x03, 0x0a, 0x0d, 0x43, 0x6f, 0x6e, 0x73, 0x75, 0x6d, 0x65, 0x72, 0x47, 0x72, 0x6f, 0x75, - 0x70, 0x12, 0x19, 0x0a, 0x08, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x07, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x49, 0x64, 0x12, 0x14, 0x0a, 0x05, - 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x73, 0x74, 0x61, - 0x74, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x5f, 0x74, - 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x63, 0x6f, 0x6c, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x63, 0x6f, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x63, 0x6f, 0x6c, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x05, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x23, 0x0a, 0x0d, 0x67, - 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x06, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x0c, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x64, - 0x12, 0x47, 0x0a, 0x07, 0x6d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x2d, 0x2e, 0x6b, 0x61, 0x66, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x2e, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x73, 0x75, 0x6d, 0x65, 0x72, 0x47, 0x72, - 0x6f, 0x75, 0x70, 0x2e, 0x4d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, - 0x52, 0x07, 0x6d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x12, 0x30, 0x0a, 0x14, 0x72, 0x65, 0x62, - 0x61, 0x6c, 0x61, 0x6e, 0x63, 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x5f, 0x6d, - 0x73, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, 0x12, 0x72, 0x65, 0x62, 0x61, 0x6c, 0x61, 0x6e, - 0x63, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x4d, 0x73, 0x1a, 0x5a, 0x0a, 0x0c, 0x4d, - 0x65, 0x6d, 0x62, 0x65, 0x72, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, - 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x34, 0x0a, - 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x6b, - 0x61, 0x66, 0x73, 0x63, 0x61, 0x6c, 0x65, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x2e, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x4d, 0x65, 0x6d, 0x62, 0x65, 0x72, 0x52, 0x05, 0x76, 0x61, - 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x8b, 0x01, 0x0a, 0x0f, 0x43, 0x6f, 0x6d, 0x6d, - 0x69, 0x74, 0x74, 0x65, 0x64, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x6f, - 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, 0x6f, 0x66, 0x66, - 0x73, 0x65, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, - 0x21, 0x0a, 0x0c, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x74, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x74, 0x65, 0x64, - 0x41, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x5f, 0x65, 0x70, 0x6f, - 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, - 0x45, 0x70, 0x6f, 0x63, 0x68, 0x22, 0xcd, 0x01, 0x0a, 0x12, 0x42, 0x72, 0x6f, 0x6b, 0x65, 0x72, - 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1b, 0x0a, 0x09, - 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x08, 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72, 0x49, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x68, 0x6f, 0x73, - 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x68, 0x6f, 0x73, 0x74, 0x12, 0x12, 0x0a, - 0x04, 0x70, 0x6f, 0x72, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x70, 0x6f, 0x72, - 0x74, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x63, 0x6b, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x04, 0x72, 0x61, 0x63, 0x6b, 0x12, 0x1d, 0x0a, 0x0a, 0x73, 0x74, 0x61, 0x72, 0x74, 0x65, 0x64, - 0x5f, 0x61, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x73, 0x74, 0x61, 0x72, 0x74, - 0x65, 0x64, 0x41, 0x74, 0x12, 0x25, 0x0a, 0x0e, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x68, 0x65, 0x61, - 0x72, 0x74, 0x62, 0x65, 0x61, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x6c, 0x61, - 0x73, 0x74, 0x48, 0x65, 0x61, 0x72, 0x74, 0x62, 0x65, 0x61, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x76, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, - 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x69, 0x0a, 0x13, 0x50, 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, - 0x6f, 0x6e, 0x41, 0x73, 0x73, 0x69, 0x67, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x1b, 0x0a, 0x09, - 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x08, 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72, 0x49, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x70, 0x6f, - 0x63, 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x65, 0x70, 0x6f, 0x63, 0x68, 0x12, - 0x1f, 0x0a, 0x0b, 0x61, 0x73, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x61, 0x73, 0x73, 0x69, 0x67, 0x6e, 0x65, 0x64, 0x41, 0x74, - 0x42, 0x3c, 0x5a, 0x3a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6e, - 0x6f, 0x76, 0x61, 0x74, 0x65, 0x63, 0x68, 0x66, 0x6c, 0x6f, 0x77, 0x2f, 0x6b, 0x61, 0x66, 0x73, - 0x63, 0x61, 0x6c, 0x65, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x3b, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x62, 0x06, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, -} +const file_metadata_metadata_proto_rawDesc = "" + + "\n" + + "\x17metadata/metadata.proto\x12\x11kafscale.metadata\"\xff\x02\n" + + "\vTopicConfig\x12\x12\n" + + "\x04name\x18\x01 \x01(\tR\x04name\x12\x1e\n" + + "\n" + + "partitions\x18\x02 \x01(\x05R\n" + + "partitions\x12-\n" + + "\x12replication_factor\x18\x03 \x01(\x05R\x11replicationFactor\x12!\n" + + "\fretention_ms\x18\x04 \x01(\x03R\vretentionMs\x12'\n" + + "\x0fretention_bytes\x18\x05 \x01(\x03R\x0eretentionBytes\x12#\n" + + "\rsegment_bytes\x18\x06 \x01(\x03R\fsegmentBytes\x12\x1d\n" + + "\n" + + "created_at\x18\a \x01(\tR\tcreatedAt\x12B\n" + + "\x06config\x18\b \x03(\v2*.kafscale.metadata.TopicConfig.ConfigEntryR\x06config\x1a9\n" + + "\vConfigEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" + + "\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"l\n" + + "\vSegmentInfo\x12\x1f\n" + + "\vbase_offset\x18\x01 \x01(\x03R\n" + + "baseOffset\x12\x1d\n" + + "\n" + + "size_bytes\x18\x02 \x01(\x03R\tsizeBytes\x12\x1d\n" + + "\n" + + "created_at\x18\x03 \x01(\tR\tcreatedAt\"\xe6\x02\n" + + "\x0ePartitionState\x12\x14\n" + + "\x05topic\x18\x01 \x01(\tR\x05topic\x12\x1c\n" + + "\tpartition\x18\x02 \x01(\x05R\tpartition\x12#\n" + + "\rleader_broker\x18\x03 \x01(\tR\fleaderBroker\x12!\n" + + "\fleader_epoch\x18\x04 \x01(\x05R\vleaderEpoch\x12(\n" + + "\x10log_start_offset\x18\x05 \x01(\x03R\x0elogStartOffset\x12$\n" + + "\x0elog_end_offset\x18\x06 \x01(\x03R\flogEndOffset\x12%\n" + + "\x0ehigh_watermark\x18\a \x01(\x03R\rhighWatermark\x12%\n" + + "\x0eactive_segment\x18\b \x01(\tR\ractiveSegment\x12:\n" + + "\bsegments\x18\t \x03(\v2\x1e.kafscale.metadata.SegmentInfoR\bsegments\"B\n" + + "\n" + + "Assignment\x12\x14\n" + + "\x05topic\x18\x01 \x01(\tR\x05topic\x12\x1e\n" + + "\n" + + "partitions\x18\x02 \x03(\x05R\n" + + "partitions\"\x83\x02\n" + + "\vGroupMember\x12\x1b\n" + + "\tclient_id\x18\x01 \x01(\tR\bclientId\x12\x1f\n" + + "\vclient_host\x18\x02 \x01(\tR\n" + + "clientHost\x12!\n" + + "\fheartbeat_at\x18\x03 \x01(\tR\vheartbeatAt\x12?\n" + + "\vassignments\x18\x04 \x03(\v2\x1d.kafscale.metadata.AssignmentR\vassignments\x12$\n" + + "\rsubscriptions\x18\x05 \x03(\tR\rsubscriptions\x12,\n" + + "\x12session_timeout_ms\x18\x06 \x01(\x05R\x10sessionTimeoutMs\"\x95\x03\n" + + "\rConsumerGroup\x12\x19\n" + + "\bgroup_id\x18\x01 \x01(\tR\agroupId\x12\x14\n" + + "\x05state\x18\x02 \x01(\tR\x05state\x12#\n" + + "\rprotocol_type\x18\x03 \x01(\tR\fprotocolType\x12\x1a\n" + + "\bprotocol\x18\x04 \x01(\tR\bprotocol\x12\x16\n" + + "\x06leader\x18\x05 \x01(\tR\x06leader\x12#\n" + + "\rgeneration_id\x18\x06 \x01(\x05R\fgenerationId\x12G\n" + + "\amembers\x18\a \x03(\v2-.kafscale.metadata.ConsumerGroup.MembersEntryR\amembers\x120\n" + + "\x14rebalance_timeout_ms\x18\b \x01(\x05R\x12rebalanceTimeoutMs\x1aZ\n" + + "\fMembersEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x124\n" + + "\x05value\x18\x02 \x01(\v2\x1e.kafscale.metadata.GroupMemberR\x05value:\x028\x01\"\x8b\x01\n" + + "\x0fCommittedOffset\x12\x16\n" + + "\x06offset\x18\x01 \x01(\x03R\x06offset\x12\x1a\n" + + "\bmetadata\x18\x02 \x01(\tR\bmetadata\x12!\n" + + "\fcommitted_at\x18\x03 \x01(\tR\vcommittedAt\x12!\n" + + "\fleader_epoch\x18\x04 \x01(\x05R\vleaderEpoch\"\xcd\x01\n" + + "\x12BrokerRegistration\x12\x1b\n" + + "\tbroker_id\x18\x01 \x01(\tR\bbrokerId\x12\x12\n" + + "\x04host\x18\x02 \x01(\tR\x04host\x12\x12\n" + + "\x04port\x18\x03 \x01(\x05R\x04port\x12\x12\n" + + "\x04rack\x18\x04 \x01(\tR\x04rack\x12\x1d\n" + + "\n" + + "started_at\x18\x05 \x01(\tR\tstartedAt\x12%\n" + + "\x0elast_heartbeat\x18\x06 \x01(\tR\rlastHeartbeat\x12\x18\n" + + "\aversion\x18\a \x01(\tR\aversion\"i\n" + + "\x13PartitionAssignment\x12\x1b\n" + + "\tbroker_id\x18\x01 \x01(\tR\bbrokerId\x12\x14\n" + + "\x05epoch\x18\x02 \x01(\x05R\x05epoch\x12\x1f\n" + + "\vassigned_at\x18\x03 \x01(\tR\n" + + "assignedAtB\xba\x01\n" + + "\x15com.kafscale.metadataB\rMetadataProtoP\x01Z-github.com/KafScale/platform/pkg/gen/metadata\xa2\x02\x03KMX\xaa\x02\x11Kafscale.Metadata\xca\x02\x11Kafscale\\Metadata\xe2\x02\x1dKafscale\\Metadata\\GPBMetadata\xea\x02\x12Kafscale::Metadatab\x06proto3" var ( file_metadata_metadata_proto_rawDescOnce sync.Once - file_metadata_metadata_proto_rawDescData = file_metadata_metadata_proto_rawDesc + file_metadata_metadata_proto_rawDescData []byte ) func file_metadata_metadata_proto_rawDescGZIP() []byte { file_metadata_metadata_proto_rawDescOnce.Do(func() { - file_metadata_metadata_proto_rawDescData = protoimpl.X.CompressGZIP(file_metadata_metadata_proto_rawDescData) + file_metadata_metadata_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_metadata_metadata_proto_rawDesc), len(file_metadata_metadata_proto_rawDesc))) }) return file_metadata_metadata_proto_rawDescData } @@ -933,7 +892,7 @@ func file_metadata_metadata_proto_init() { out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: file_metadata_metadata_proto_rawDesc, + RawDescriptor: unsafe.Slice(unsafe.StringData(file_metadata_metadata_proto_rawDesc), len(file_metadata_metadata_proto_rawDesc)), NumEnums: 0, NumMessages: 11, NumExtensions: 0, @@ -944,7 +903,6 @@ func file_metadata_metadata_proto_init() { MessageInfos: file_metadata_metadata_proto_msgTypes, }.Build() File_metadata_metadata_proto = out.File - file_metadata_metadata_proto_rawDesc = nil file_metadata_metadata_proto_goTypes = nil file_metadata_metadata_proto_depIdxs = nil } diff --git a/pkg/idoc/explode.go b/pkg/idoc/explode.go new file mode 100644 index 00000000..543b4e5a --- /dev/null +++ b/pkg/idoc/explode.go @@ -0,0 +1,252 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package idoc + +import ( + "bytes" + "encoding/json" + "encoding/xml" + "fmt" + "io" + "strings" +) + +// ExplodeConfig defines IDoc segment routing rules. +type ExplodeConfig struct { + ItemSegments []string + PartnerSegments []string + StatusSegments []string + DateSegments []string +} + +// Header captures the root IDoc element info. +type Header struct { + Root string `json:"root"` + Attributes map[string]string `json:"attributes,omitempty"` +} + +// Segment captures a single XML segment. +type Segment struct { + Name string `json:"name"` + Path string `json:"path"` + Attributes map[string]string `json:"attributes,omitempty"` + Value string `json:"value,omitempty"` + Fields map[string]string `json:"fields,omitempty"` +} + +// Result holds exploded IDoc data. +type Result struct { + Header Header + Segments []Segment + Items []Segment + Partners []Segment + Statuses []Segment + Dates []Segment +} + +// TopicRecords renders exploded data as JSON records per topic. +type TopicRecords map[string][][]byte + +// ExplodeXML parses IDoc XML and routes segments according to config. +func ExplodeXML(raw []byte, cfg ExplodeConfig) (Result, error) { + decoder := xml.NewDecoder(bytes.NewReader(raw)) + decoder.Strict = false + decoder.AutoClose = xml.HTMLAutoClose + + segmentStack := make([]segmentFrame, 0, 16) + result := Result{} + segmentSets := buildSegmentSets(cfg) + + isRouted := func(name string) bool { + return segmentSets.items[name] || segmentSets.partners[name] || + segmentSets.statuses[name] || segmentSets.dates[name] + } + + for { + tok, err := decoder.Token() + if err != nil { + if err == io.EOF { + break + } + return Result{}, fmt.Errorf("xml token: %w", err) + } + + switch t := tok.(type) { + case xml.StartElement: + frame := segmentFrame{ + Name: t.Name.Local, + Path: buildPath(segmentStack, t.Name.Local), + Attributes: attrsToMap(t.Attr), + } + if isRouted(t.Name.Local) { + frame.Fields = make(map[string]string) + } + segmentStack = append(segmentStack, frame) + if result.Header.Root == "" { + result.Header = Header{Root: t.Name.Local, Attributes: frame.Attributes} + } + case xml.CharData: + if len(segmentStack) == 0 { + continue + } + segmentStack[len(segmentStack)-1].Value += string([]byte(t)) + case xml.EndElement: + if len(segmentStack) == 0 { + continue + } + frame := segmentStack[len(segmentStack)-1] + segmentStack = segmentStack[:len(segmentStack)-1] + + // If this leaf element has a value and its parent is a routed segment, + // add it to the parent's Fields map. + val := strings.TrimSpace(frame.Value) + if val != "" && len(segmentStack) > 0 { + parent := &segmentStack[len(segmentStack)-1] + if parent.Fields != nil { + parent.Fields[frame.Name] = val + } + } + + seg := Segment{ + Name: frame.Name, + Path: frame.Path, + Attributes: frame.Attributes, + Value: val, + Fields: frame.Fields, + } + result.Segments = append(result.Segments, seg) + switch { + case segmentSets.items[seg.Name]: + result.Items = append(result.Items, seg) + case segmentSets.partners[seg.Name]: + result.Partners = append(result.Partners, seg) + case segmentSets.statuses[seg.Name]: + result.Statuses = append(result.Statuses, seg) + case segmentSets.dates[seg.Name]: + result.Dates = append(result.Dates, seg) + } + } + } + + return result, nil +} + +// ToTopicRecords converts Result into JSON record slices per topic. +func (r Result) ToTopicRecords(topics TopicConfig) (TopicRecords, error) { + out := TopicRecords{} + if topics.Header != "" { + data, err := json.Marshal(r.Header) + if err != nil { + return nil, err + } + out[topics.Header] = append(out[topics.Header], data) + } + appendSegments := func(name string, segments []Segment) error { + if name == "" { + return nil + } + for _, seg := range segments { + data, err := json.Marshal(seg) + if err != nil { + return err + } + out[name] = append(out[name], data) + } + return nil + } + if err := appendSegments(topics.Segments, r.Segments); err != nil { + return nil, err + } + if err := appendSegments(topics.Items, r.Items); err != nil { + return nil, err + } + if err := appendSegments(topics.Partners, r.Partners); err != nil { + return nil, err + } + if err := appendSegments(topics.Statuses, r.Statuses); err != nil { + return nil, err + } + if err := appendSegments(topics.Dates, r.Dates); err != nil { + return nil, err + } + return out, nil +} + +// TopicConfig maps logical outputs to topic names. +type TopicConfig struct { + Header string + Segments string + Items string + Partners string + Statuses string + Dates string +} + +type segmentFrame struct { + Name string + Path string + Attributes map[string]string + Value string + Fields map[string]string +} + +type segmentSets struct { + items map[string]bool + partners map[string]bool + statuses map[string]bool + dates map[string]bool +} + +func buildSegmentSets(cfg ExplodeConfig) segmentSets { + return segmentSets{ + items: sliceToSet(cfg.ItemSegments), + partners: sliceToSet(cfg.PartnerSegments), + statuses: sliceToSet(cfg.StatusSegments), + dates: sliceToSet(cfg.DateSegments), + } +} + +func sliceToSet(values []string) map[string]bool { + set := map[string]bool{} + for _, val := range values { + val = strings.TrimSpace(val) + if val == "" { + continue + } + set[val] = true + } + return set +} + +func attrsToMap(attrs []xml.Attr) map[string]string { + if len(attrs) == 0 { + return nil + } + out := make(map[string]string, len(attrs)) + for _, attr := range attrs { + out[attr.Name.Local] = attr.Value + } + return out +} + +func buildPath(stack []segmentFrame, name string) string { + parts := make([]string, 0, len(stack)+1) + for _, frame := range stack { + parts = append(parts, frame.Name) + } + parts = append(parts, name) + return strings.Join(parts, "/") +} diff --git a/pkg/idoc/explode_test.go b/pkg/idoc/explode_test.go new file mode 100644 index 00000000..8e3fbfba --- /dev/null +++ b/pkg/idoc/explode_test.go @@ -0,0 +1,253 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package idoc + +import ( + "encoding/json" + "strings" + "testing" +) + +const sampleIDoc = ` + + + 123 + + + 10 + + + AG + +` + +func TestExplodeXML(t *testing.T) { + cfg := ExplodeConfig{ + ItemSegments: []string{"E1EDP01"}, + PartnerSegments: []string{"E1EDKA1"}, + } + res, err := ExplodeXML([]byte(sampleIDoc), cfg) + if err != nil { + t.Fatalf("explode: %v", err) + } + if res.Header.Root != "IDOC" { + t.Fatalf("expected root IDOC, got %q", res.Header.Root) + } + if len(res.Items) != 1 { + t.Fatalf("expected 1 item, got %d", len(res.Items)) + } + if len(res.Partners) != 1 { + t.Fatalf("expected 1 partner, got %d", len(res.Partners)) + } + if len(res.Segments) == 0 { + t.Fatalf("expected segments") + } +} + +func TestExplodeXMLWithAllSegmentTypes(t *testing.T) { + xmlData := ` + + 10 + AG + active + 20260101 +` + + cfg := ExplodeConfig{ + ItemSegments: []string{"E1EDP01"}, + PartnerSegments: []string{"E1EDKA1"}, + StatusSegments: []string{"E1EDS01"}, + DateSegments: []string{"E1EDT01"}, + } + res, err := ExplodeXML([]byte(xmlData), cfg) + if err != nil { + t.Fatal(err) + } + if len(res.Items) != 1 { + t.Fatalf("expected 1 item, got %d", len(res.Items)) + } + if len(res.Partners) != 1 { + t.Fatalf("expected 1 partner, got %d", len(res.Partners)) + } + if len(res.Statuses) != 1 { + t.Fatalf("expected 1 status, got %d", len(res.Statuses)) + } + if len(res.Dates) != 1 { + t.Fatalf("expected 1 date, got %d", len(res.Dates)) + } + // Verify field capture + if res.Items[0].Fields["POSEX"] != "10" { + t.Fatalf("expected POSEX=10, got %q", res.Items[0].Fields["POSEX"]) + } + // Verify attributes on root + if res.Header.Attributes["BEGIN"] != "1" { + t.Fatalf("expected BEGIN=1 attribute, got %v", res.Header.Attributes) + } +} + +func TestExplodeXMLInvalid(t *testing.T) { + _, err := ExplodeXML([]byte("<<<"), ExplodeConfig{}) + if err == nil { + t.Fatal("expected error for malformed XML") + } +} + +func TestExplodeXMLEmpty(t *testing.T) { + res, err := ExplodeXML([]byte(""), ExplodeConfig{}) + if err != nil { + t.Fatal(err) + } + if res.Header.Root != "" { + t.Fatalf("expected empty root for empty XML, got %q", res.Header.Root) + } +} + +func TestToTopicRecords(t *testing.T) { + cfg := ExplodeConfig{ + ItemSegments: []string{"E1EDP01"}, + PartnerSegments: []string{"E1EDKA1"}, + StatusSegments: []string{"E1EDS01"}, + DateSegments: []string{"E1EDT01"}, + } + xmlData := ` + + 10 + AG + active + 20260101 +` + + res, err := ExplodeXML([]byte(xmlData), cfg) + if err != nil { + t.Fatal(err) + } + + topics := TopicConfig{ + Header: "idoc-headers", + Segments: "idoc-segments", + Items: "idoc-items", + Partners: "idoc-partners", + Statuses: "idoc-statuses", + Dates: "idoc-dates", + } + records, err := res.ToTopicRecords(topics) + if err != nil { + t.Fatal(err) + } + + if len(records["idoc-headers"]) != 1 { + t.Fatalf("expected 1 header record, got %d", len(records["idoc-headers"])) + } + // Verify the header is valid JSON + var hdr Header + if err := json.Unmarshal(records["idoc-headers"][0], &hdr); err != nil { + t.Fatalf("header is not valid JSON: %v", err) + } + if hdr.Root != "IDOC" { + t.Fatalf("expected root IDOC, got %q", hdr.Root) + } + + if len(records["idoc-items"]) != 1 { + t.Fatalf("expected 1 item record, got %d", len(records["idoc-items"])) + } + if len(records["idoc-partners"]) != 1 { + t.Fatalf("expected 1 partner record, got %d", len(records["idoc-partners"])) + } + if len(records["idoc-statuses"]) != 1 { + t.Fatalf("expected 1 status record, got %d", len(records["idoc-statuses"])) + } + if len(records["idoc-dates"]) != 1 { + t.Fatalf("expected 1 date record, got %d", len(records["idoc-dates"])) + } + if len(records["idoc-segments"]) == 0 { + t.Fatal("expected segment records") + } +} + +func TestToTopicRecordsEmptyTopics(t *testing.T) { + res := Result{ + Header: Header{Root: "IDOC"}, + Segments: []Segment{{Name: "S1"}}, + } + // Empty TopicConfig means no topic names → skip all + records, err := res.ToTopicRecords(TopicConfig{}) + if err != nil { + t.Fatal(err) + } + if len(records) != 0 { + t.Fatalf("expected empty records, got %d topics", len(records)) + } +} + +func TestToTopicRecordsPartialTopics(t *testing.T) { + res := Result{ + Header: Header{Root: "IDOC"}, + Items: []Segment{{Name: "E1EDP01"}}, + } + records, err := res.ToTopicRecords(TopicConfig{Header: "hdr", Items: "items"}) + if err != nil { + t.Fatal(err) + } + if len(records["hdr"]) != 1 { + t.Fatal("expected header record") + } + if len(records["items"]) != 1 { + t.Fatal("expected item record") + } +} + +func TestSliceToSetEmpty(t *testing.T) { + s := sliceToSet([]string{"", " ", " a "}) + if !s["a"] { + t.Fatal("expected trimmed 'a' in set") + } + if len(s) != 1 { + t.Fatalf("expected 1 element, got %d", len(s)) + } +} + +func TestAttrsToMapEmpty(t *testing.T) { + m := attrsToMap(nil) + if m != nil { + t.Fatal("expected nil for empty attrs") + } +} + +func TestBuildPath(t *testing.T) { + stack := []segmentFrame{{Name: "IDOC"}, {Name: "E1EDP01"}} + path := buildPath(stack, "POSEX") + if path != "IDOC/E1EDP01/POSEX" { + t.Fatalf("expected IDOC/E1EDP01/POSEX, got %s", path) + } +} + +func TestExplodeXMLPaths(t *testing.T) { + xmlData := `val` + res, err := ExplodeXML([]byte(xmlData), ExplodeConfig{}) + if err != nil { + t.Fatal(err) + } + // Find the LEAF segment + for _, seg := range res.Segments { + if seg.Name == "LEAF" { + if !strings.Contains(seg.Path, "ROOT/CHILD/LEAF") { + t.Fatalf("expected path containing ROOT/CHILD/LEAF, got %s", seg.Path) + } + return + } + } + t.Fatal("LEAF segment not found") +} diff --git a/pkg/lfs/checksum.go b/pkg/lfs/checksum.go new file mode 100644 index 00000000..b89814e7 --- /dev/null +++ b/pkg/lfs/checksum.go @@ -0,0 +1,118 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "crypto/md5" + "crypto/sha256" + "encoding/hex" + "errors" + "hash" + "hash/crc32" + "strings" +) + +// ChecksumAlg describes the checksum algorithm used for LFS validation. +type ChecksumAlg string + +const ( + ChecksumSHA256 ChecksumAlg = "sha256" + ChecksumMD5 ChecksumAlg = "md5" + ChecksumCRC32 ChecksumAlg = "crc32" + ChecksumNone ChecksumAlg = "none" +) + +// NormalizeChecksumAlg normalizes an algorithm name; empty defaults to sha256. +func NormalizeChecksumAlg(raw string) (ChecksumAlg, error) { + val := strings.ToLower(strings.TrimSpace(raw)) + if val == "" { + return ChecksumSHA256, nil + } + switch ChecksumAlg(val) { + case ChecksumSHA256, ChecksumMD5, ChecksumCRC32, ChecksumNone: + return ChecksumAlg(val), nil + default: + return "", errors.New("unsupported checksum algorithm") + } +} + +// NewChecksumHasher returns a hash.Hash for the requested algorithm. +func NewChecksumHasher(alg ChecksumAlg) (hash.Hash, error) { + switch alg { + case ChecksumSHA256: + return sha256.New(), nil + case ChecksumMD5: + return md5.New(), nil + case ChecksumCRC32: + return crc32.NewIEEE(), nil + case ChecksumNone: + return nil, nil + default: + return nil, errors.New("unsupported checksum algorithm") + } +} + +// ComputeChecksum computes a checksum for the given data and algorithm. +func ComputeChecksum(alg ChecksumAlg, data []byte) (string, error) { + if alg == ChecksumNone { + return "", nil + } + h, err := NewChecksumHasher(alg) + if err != nil { + return "", err + } + if _, err := h.Write(data); err != nil { + return "", err + } + return formatChecksum(h.Sum(nil)), nil +} + +// formatChecksum encodes a checksum digest as lowercase hex. +func formatChecksum(sum []byte) string { + return hex.EncodeToString(sum) +} + +// EnvelopeChecksum returns the algorithm + expected checksum for an envelope. +// If alg is none, ok is false (no validation). +func EnvelopeChecksum(env Envelope) (ChecksumAlg, string, bool, error) { + alg, err := NormalizeChecksumAlg(env.ChecksumAlg) + if err != nil { + return "", "", false, err + } + switch alg { + case ChecksumNone: + return alg, "", false, nil + case ChecksumSHA256: + if env.Checksum != "" { + return ChecksumSHA256, env.Checksum, true, nil + } + if env.SHA256 != "" { + return ChecksumSHA256, env.SHA256, true, nil + } + return ChecksumSHA256, "", false, nil + case ChecksumMD5, ChecksumCRC32: + if env.Checksum != "" { + return alg, env.Checksum, true, nil + } + // Fallback to SHA256 if present for backward compatibility. + if env.SHA256 != "" { + return ChecksumSHA256, env.SHA256, true, nil + } + return alg, "", false, nil + default: + return "", "", false, errors.New("unsupported checksum algorithm") + } +} diff --git a/pkg/lfs/checksum_test.go b/pkg/lfs/checksum_test.go new file mode 100644 index 00000000..a295cdb4 --- /dev/null +++ b/pkg/lfs/checksum_test.go @@ -0,0 +1,232 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "testing" +) + +func TestNormalizeChecksumAlg(t *testing.T) { + tests := []struct { + input string + want ChecksumAlg + wantErr bool + }{ + {"", ChecksumSHA256, false}, + {"sha256", ChecksumSHA256, false}, + {"SHA256", ChecksumSHA256, false}, + {" sha256 ", ChecksumSHA256, false}, + {"md5", ChecksumMD5, false}, + {"MD5", ChecksumMD5, false}, + {"crc32", ChecksumCRC32, false}, + {"none", ChecksumNone, false}, + {"unknown", "", true}, + {"blake2b", "", true}, + } + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got, err := NormalizeChecksumAlg(tt.input) + if (err != nil) != tt.wantErr { + t.Fatalf("NormalizeChecksumAlg(%q) error = %v, wantErr %v", tt.input, err, tt.wantErr) + } + if got != tt.want { + t.Fatalf("NormalizeChecksumAlg(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + +func TestNewChecksumHasher(t *testing.T) { + tests := []struct { + alg ChecksumAlg + wantNil bool + wantErr bool + }{ + {ChecksumSHA256, false, false}, + {ChecksumMD5, false, false}, + {ChecksumCRC32, false, false}, + {ChecksumNone, true, false}, + {ChecksumAlg("unknown"), true, true}, + } + for _, tt := range tests { + t.Run(string(tt.alg), func(t *testing.T) { + h, err := NewChecksumHasher(tt.alg) + if (err != nil) != tt.wantErr { + t.Fatalf("NewChecksumHasher(%q) error = %v, wantErr %v", tt.alg, err, tt.wantErr) + } + if (h == nil) != tt.wantNil { + t.Fatalf("NewChecksumHasher(%q) nil = %v, wantNil %v", tt.alg, h == nil, tt.wantNil) + } + }) + } +} + +func TestComputeChecksum(t *testing.T) { + data := []byte("hello world") + + sha, err := ComputeChecksum(ChecksumSHA256, data) + if err != nil { + t.Fatalf("ComputeChecksum(sha256) error: %v", err) + } + if sha == "" { + t.Fatal("expected non-empty sha256 checksum") + } + + md, err := ComputeChecksum(ChecksumMD5, data) + if err != nil { + t.Fatalf("ComputeChecksum(md5) error: %v", err) + } + if md == "" { + t.Fatal("expected non-empty md5 checksum") + } + + crc, err := ComputeChecksum(ChecksumCRC32, data) + if err != nil { + t.Fatalf("ComputeChecksum(crc32) error: %v", err) + } + if crc == "" { + t.Fatal("expected non-empty crc32 checksum") + } + + none, err := ComputeChecksum(ChecksumNone, data) + if err != nil { + t.Fatalf("ComputeChecksum(none) error: %v", err) + } + if none != "" { + t.Fatalf("expected empty checksum for none, got %q", none) + } + + _, err = ComputeChecksum(ChecksumAlg("unsupported"), data) + if err == nil { + t.Fatal("expected error for unsupported algorithm") + } +} + +func TestComputeChecksumDeterministic(t *testing.T) { + data := []byte("deterministic test") + c1, _ := ComputeChecksum(ChecksumSHA256, data) + c2, _ := ComputeChecksum(ChecksumSHA256, data) + if c1 != c2 { + t.Fatalf("checksums should be deterministic: %s != %s", c1, c2) + } +} + +func TestEnvelopeChecksum(t *testing.T) { + t.Run("none algorithm", func(t *testing.T) { + env := Envelope{ChecksumAlg: "none"} + alg, sum, ok, err := EnvelopeChecksum(env) + if err != nil { + t.Fatal(err) + } + if ok { + t.Fatal("none should return ok=false") + } + if alg != ChecksumNone { + t.Fatalf("expected none, got %s", alg) + } + if sum != "" { + t.Fatalf("expected empty sum, got %s", sum) + } + }) + + t.Run("sha256 with checksum field", func(t *testing.T) { + env := Envelope{ChecksumAlg: "sha256", Checksum: "abc123"} + alg, sum, ok, err := EnvelopeChecksum(env) + if err != nil { + t.Fatal(err) + } + if !ok || alg != ChecksumSHA256 || sum != "abc123" { + t.Fatalf("unexpected: alg=%s sum=%s ok=%v", alg, sum, ok) + } + }) + + t.Run("sha256 fallback to SHA256 field", func(t *testing.T) { + env := Envelope{ChecksumAlg: "sha256", SHA256: "sha-field"} + alg, sum, ok, err := EnvelopeChecksum(env) + if err != nil { + t.Fatal(err) + } + if !ok || alg != ChecksumSHA256 || sum != "sha-field" { + t.Fatalf("unexpected: alg=%s sum=%s ok=%v", alg, sum, ok) + } + }) + + t.Run("sha256 no checksum available", func(t *testing.T) { + env := Envelope{ChecksumAlg: "sha256"} + _, _, ok, err := EnvelopeChecksum(env) + if err != nil { + t.Fatal(err) + } + if ok { + t.Fatal("expected ok=false when no checksum available") + } + }) + + t.Run("empty alg defaults to sha256", func(t *testing.T) { + env := Envelope{SHA256: "abc"} + alg, sum, ok, err := EnvelopeChecksum(env) + if err != nil { + t.Fatal(err) + } + if alg != ChecksumSHA256 || sum != "abc" || !ok { + t.Fatalf("unexpected: alg=%s sum=%s ok=%v", alg, sum, ok) + } + }) + + t.Run("md5 with checksum field", func(t *testing.T) { + env := Envelope{ChecksumAlg: "md5", Checksum: "md5-sum"} + alg, sum, ok, err := EnvelopeChecksum(env) + if err != nil { + t.Fatal(err) + } + if alg != ChecksumMD5 || sum != "md5-sum" || !ok { + t.Fatalf("unexpected: alg=%s sum=%s ok=%v", alg, sum, ok) + } + }) + + t.Run("md5 fallback to SHA256", func(t *testing.T) { + env := Envelope{ChecksumAlg: "md5", SHA256: "sha-fallback"} + alg, sum, ok, err := EnvelopeChecksum(env) + if err != nil { + t.Fatal(err) + } + if alg != ChecksumSHA256 || sum != "sha-fallback" || !ok { + t.Fatalf("unexpected: alg=%s sum=%s ok=%v", alg, sum, ok) + } + }) + + t.Run("crc32 no checksum", func(t *testing.T) { + env := Envelope{ChecksumAlg: "crc32"} + alg, _, ok, err := EnvelopeChecksum(env) + if err != nil { + t.Fatal(err) + } + if ok { + t.Fatal("expected ok=false") + } + if alg != ChecksumCRC32 { + t.Fatalf("expected crc32, got %s", alg) + } + }) + + t.Run("unsupported algorithm", func(t *testing.T) { + env := Envelope{ChecksumAlg: "blake2b"} + _, _, _, err := EnvelopeChecksum(env) + if err == nil { + t.Fatal("expected error for unsupported algorithm") + } + }) +} diff --git a/pkg/lfs/consumer.go b/pkg/lfs/consumer.go new file mode 100644 index 00000000..e57e2b54 --- /dev/null +++ b/pkg/lfs/consumer.go @@ -0,0 +1,125 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "context" +) + +// BlobFetcher downloads LFS blobs from storage. +type BlobFetcher interface { + Fetch(ctx context.Context, key string) ([]byte, error) +} + +// Consumer unwraps LFS envelope records by fetching the blob from storage. +type Consumer struct { + fetcher BlobFetcher + validateChecksum bool +} + +// ConsumerOption configures the Consumer. +type ConsumerOption func(*Consumer) + +// WithChecksumValidation enables SHA256 validation on fetched blobs. +func WithChecksumValidation(enabled bool) ConsumerOption { + return func(c *Consumer) { + c.validateChecksum = enabled + } +} + +// NewConsumer creates a Consumer that fetches LFS blobs. +func NewConsumer(fetcher BlobFetcher, opts ...ConsumerOption) *Consumer { + c := &Consumer{ + fetcher: fetcher, + validateChecksum: true, + } + for _, opt := range opts { + opt(c) + } + return c +} + +// Unwrap checks if value is an LFS envelope and fetches the blob. +// Returns the original value if not an envelope. +func (c *Consumer) Unwrap(ctx context.Context, value []byte) ([]byte, error) { + if !IsLfsEnvelope(value) { + return value, nil + } + + env, err := DecodeEnvelope(value) + if err != nil { + return nil, &LfsError{Op: "decode", Err: err} + } + + blob, err := c.fetcher.Fetch(ctx, env.Key) + if err != nil { + return nil, &LfsError{Op: "fetch", Err: err} + } + + if c.validateChecksum { + alg, expected, ok, err := EnvelopeChecksum(env) + if err != nil { + return nil, &LfsError{Op: "checksum", Err: err} + } + if ok { + actual, err := ComputeChecksum(alg, blob) + if err != nil { + return nil, &LfsError{Op: "checksum", Err: err} + } + if actual != expected { + return nil, &ChecksumError{Expected: expected, Actual: actual} + } + } + } + + return blob, nil +} + +// UnwrapEnvelope returns the envelope and fetched blob for records that are envelopes. +// Returns nil envelope and original value if not an envelope. +func (c *Consumer) UnwrapEnvelope(ctx context.Context, value []byte) (*Envelope, []byte, error) { + if !IsLfsEnvelope(value) { + return nil, value, nil + } + + env, err := DecodeEnvelope(value) + if err != nil { + return nil, nil, &LfsError{Op: "decode", Err: err} + } + + blob, err := c.fetcher.Fetch(ctx, env.Key) + if err != nil { + return &env, nil, &LfsError{Op: "fetch", Err: err} + } + + if c.validateChecksum { + alg, expected, ok, err := EnvelopeChecksum(env) + if err != nil { + return &env, nil, &LfsError{Op: "checksum", Err: err} + } + if ok { + actual, err := ComputeChecksum(alg, blob) + if err != nil { + return &env, nil, &LfsError{Op: "checksum", Err: err} + } + if actual != expected { + return &env, nil, &ChecksumError{Expected: expected, Actual: actual} + } + } + } + + return &env, blob, nil +} diff --git a/pkg/lfs/consumer_test.go b/pkg/lfs/consumer_test.go new file mode 100644 index 00000000..849cbf64 --- /dev/null +++ b/pkg/lfs/consumer_test.go @@ -0,0 +1,306 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "context" + "crypto/md5" + "crypto/sha256" + "encoding/hex" + "errors" + "testing" +) + +// mockFetcher is a test implementation of BlobFetcher. +type mockFetcher struct { + blobs map[string][]byte + err error +} + +func (m *mockFetcher) Fetch(ctx context.Context, key string) ([]byte, error) { + if m.err != nil { + return nil, m.err + } + blob, ok := m.blobs[key] + if !ok { + return nil, errors.New("not found") + } + return blob, nil +} + +func TestConsumerUnwrapNonLFS(t *testing.T) { + fetcher := &mockFetcher{blobs: make(map[string][]byte)} + consumer := NewConsumer(fetcher) + + // Plain text should pass through unchanged + plainText := []byte("hello world") + result, err := consumer.Unwrap(context.Background(), plainText) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(result) != string(plainText) { + t.Errorf("expected %q, got %q", plainText, result) + } +} + +func TestConsumerUnwrapLFS(t *testing.T) { + blob := []byte("this is the actual blob content") + hash := sha256.Sum256(blob) + checksum := hex.EncodeToString(hash[:]) + + fetcher := &mockFetcher{ + blobs: map[string][]byte{ + "default/test-topic/lfs/2026/02/01/obj-123": blob, + }, + } + consumer := NewConsumer(fetcher) + + envelope := Envelope{ + Version: 1, + Bucket: "kafscale", + Key: "default/test-topic/lfs/2026/02/01/obj-123", + Size: int64(len(blob)), + SHA256: checksum, + } + envBytes, err := EncodeEnvelope(envelope) + if err != nil { + t.Fatalf("failed to encode envelope: %v", err) + } + + result, err := consumer.Unwrap(context.Background(), envBytes) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(result) != string(blob) { + t.Errorf("expected blob content, got %q", result) + } +} + +func TestConsumerUnwrapMD5Checksum(t *testing.T) { + blob := []byte("md5-blob") + sha := sha256.Sum256(blob) + md5sum := md5.Sum(blob) + + fetcher := &mockFetcher{ + blobs: map[string][]byte{ + "default/test-topic/lfs/2026/02/01/obj-123": blob, + }, + } + consumer := NewConsumer(fetcher) + + envelope := Envelope{ + Version: 1, + Bucket: "kafscale", + Key: "default/test-topic/lfs/2026/02/01/obj-123", + Size: int64(len(blob)), + SHA256: hex.EncodeToString(sha[:]), + Checksum: hex.EncodeToString(md5sum[:]), + ChecksumAlg: "md5", + } + envBytes, err := EncodeEnvelope(envelope) + if err != nil { + t.Fatalf("failed to encode envelope: %v", err) + } + + result, err := consumer.Unwrap(context.Background(), envBytes) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(result) != string(blob) { + t.Errorf("expected blob content, got %q", result) + } +} + +func TestConsumerUnwrapChecksumMismatch(t *testing.T) { + blob := []byte("this is the actual blob content") + wrongChecksum := "0000000000000000000000000000000000000000000000000000000000000000" + + fetcher := &mockFetcher{ + blobs: map[string][]byte{ + "default/test-topic/lfs/2026/02/01/obj-123": blob, + }, + } + consumer := NewConsumer(fetcher) + + envelope := Envelope{ + Version: 1, + Bucket: "kafscale", + Key: "default/test-topic/lfs/2026/02/01/obj-123", + Size: int64(len(blob)), + SHA256: wrongChecksum, + } + envBytes, err := EncodeEnvelope(envelope) + if err != nil { + t.Fatalf("failed to encode envelope: %v", err) + } + + _, err = consumer.Unwrap(context.Background(), envBytes) + if err == nil { + t.Fatal("expected checksum error, got nil") + } + + var checksumErr *ChecksumError + if !errors.As(err, &checksumErr) { + t.Fatalf("expected ChecksumError, got %T: %v", err, err) + } + if checksumErr.Expected != wrongChecksum { + t.Errorf("expected Expected=%s, got %s", wrongChecksum, checksumErr.Expected) + } +} + +func TestConsumerUnwrapChecksumDisabled(t *testing.T) { + blob := []byte("this is the actual blob content") + wrongChecksum := "0000000000000000000000000000000000000000000000000000000000000000" + + fetcher := &mockFetcher{ + blobs: map[string][]byte{ + "default/test-topic/lfs/2026/02/01/obj-123": blob, + }, + } + consumer := NewConsumer(fetcher, WithChecksumValidation(false)) + + envelope := Envelope{ + Version: 1, + Bucket: "kafscale", + Key: "default/test-topic/lfs/2026/02/01/obj-123", + Size: int64(len(blob)), + SHA256: wrongChecksum, + } + envBytes, err := EncodeEnvelope(envelope) + if err != nil { + t.Fatalf("failed to encode envelope: %v", err) + } + + // Should succeed because checksum validation is disabled + result, err := consumer.Unwrap(context.Background(), envBytes) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(result) != string(blob) { + t.Errorf("expected blob content, got %q", result) + } +} + +func TestConsumerUnwrapFetchError(t *testing.T) { + fetcher := &mockFetcher{ + err: errors.New("s3 connection failed"), + } + consumer := NewConsumer(fetcher) + + hash := sha256.Sum256([]byte("test")) + envelope := Envelope{ + Version: 1, + Bucket: "kafscale", + Key: "some/key", + Size: 100, + SHA256: hex.EncodeToString(hash[:]), + } + envBytes, err := EncodeEnvelope(envelope) + if err != nil { + t.Fatalf("failed to encode envelope: %v", err) + } + + _, err = consumer.Unwrap(context.Background(), envBytes) + if err == nil { + t.Fatal("expected error, got nil") + } + + var lfsErr *LfsError + if !errors.As(err, &lfsErr) { + t.Fatalf("expected LfsError, got %T: %v", err, err) + } + if lfsErr.Op != "fetch" { + t.Errorf("expected Op=fetch, got %s", lfsErr.Op) + } +} + +func TestConsumerUnwrapInvalidEnvelope(t *testing.T) { + fetcher := &mockFetcher{} + consumer := NewConsumer(fetcher) + + // Invalid JSON that looks like an envelope but missing required fields + // Must be > 15 bytes to pass IsLfsEnvelope length check + invalid := []byte(`{"kfs_lfs": 1, "bucket": "b"}`) + + _, err := consumer.Unwrap(context.Background(), invalid) + if err == nil { + t.Fatal("expected error for invalid envelope, got nil") + } + + var lfsErr *LfsError + if !errors.As(err, &lfsErr) { + t.Fatalf("expected LfsError, got %T: %v", err, err) + } + if lfsErr.Op != "decode" { + t.Errorf("expected Op=decode, got %s", lfsErr.Op) + } +} + +func TestConsumerUnwrapEnvelope(t *testing.T) { + blob := []byte("blob data") + hash := sha256.Sum256(blob) + checksum := hex.EncodeToString(hash[:]) + + fetcher := &mockFetcher{ + blobs: map[string][]byte{"key": blob}, + } + consumer := NewConsumer(fetcher) + + envelope := Envelope{ + Version: 1, + Bucket: "bucket", + Key: "key", + Size: int64(len(blob)), + SHA256: checksum, + ContentType: "application/octet-stream", + ProxyID: "proxy-1", + } + envBytes, _ := EncodeEnvelope(envelope) + + env, data, err := consumer.UnwrapEnvelope(context.Background(), envBytes) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if env == nil { + t.Fatal("expected envelope, got nil") + } + if env.Bucket != "bucket" { + t.Errorf("expected Bucket=bucket, got %s", env.Bucket) + } + if env.ContentType != "application/octet-stream" { + t.Errorf("expected ContentType, got %s", env.ContentType) + } + if string(data) != string(blob) { + t.Errorf("expected blob data, got %q", data) + } +} + +func TestConsumerUnwrapEnvelopeNonLFS(t *testing.T) { + fetcher := &mockFetcher{} + consumer := NewConsumer(fetcher) + + plain := []byte("not an envelope") + env, data, err := consumer.UnwrapEnvelope(context.Background(), plain) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if env != nil { + t.Errorf("expected nil envelope, got %+v", env) + } + if string(data) != string(plain) { + t.Errorf("expected original data, got %q", data) + } +} diff --git a/pkg/lfs/doc.go b/pkg/lfs/doc.go new file mode 100644 index 00000000..9b1f2c99 --- /dev/null +++ b/pkg/lfs/doc.go @@ -0,0 +1,232 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* +Package lfs provides Large File Support (LFS) for Kafka messages. + +LFS enables storing large payloads (up to 5GB) in S3 while keeping small +envelope pointers in Kafka topics. This implements the "Claim Check" pattern. + +# Overview + +When a Kafka producer sends a message with the LFS_BLOB header, the LFS proxy: + 1. Uploads the payload to S3 + 2. Computes SHA256 checksum + 3. Creates a JSON envelope with metadata + 4. Forwards the envelope (not the payload) to Kafka + +Consumers receive the envelope and can use this package to transparently +fetch the original payload from S3. + +# Envelope Format + +The LFS envelope is a JSON object stored as the Kafka message value: + + { + "kfs_lfs": 1, + "bucket": "kafscale-lfs", + "key": "default/topic/lfs/2026/02/01/obj-uuid", + "size": 10485760, + "sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "content_type": "application/octet-stream", + "created_at": "2026-02-01T12:00:00Z", + "proxy_id": "lfs-proxy-0" + } + +# Consumer Usage + +Basic usage with franz-go: + + // Create S3 client + s3Client, err := lfs.NewS3Client(ctx, lfs.S3Config{ + Bucket: "kafscale-lfs", + Region: "us-east-1", + Endpoint: "http://minio:9000", // optional + }) + if err != nil { + log.Fatal(err) + } + + // Create LFS consumer + consumer := lfs.NewConsumer(s3Client) + + // Process Kafka records + for _, record := range kafkaRecords { + // Unwrap automatically fetches LFS blobs from S3 + data, err := consumer.Unwrap(ctx, record.Value) + if err != nil { + log.Error("failed to unwrap", "error", err) + continue + } + // data contains the original payload (or unchanged if not LFS) + processData(data) + } + +# Record Wrapper + +For lazy resolution with caching, use the Record wrapper: + + s3Client, _ := lfs.NewS3Client(ctx, config) + consumer := lfs.NewConsumer(s3Client) + + for _, kafkaRecord := range records { + rec := lfs.NewRecord(kafkaRecord.Value, consumer, + lfs.WithStreamFetcher(s3Client), // enables ValueStream() + ) + + // Check if this is an LFS record + if rec.IsLFS() { + // Get size without fetching + size, _ := rec.Size() + fmt.Printf("LFS blob size: %d\n", size) + } + + // Lazy fetch with caching (second call uses cache) + data, err := rec.Value(ctx) + if err != nil { + log.Error("resolve failed", "error", err) + continue + } + processData(data) + } + +# Streaming Large Files + +For memory-efficient processing of large files: + + rec := lfs.NewRecord(value, nil, + lfs.WithStreamFetcher(s3Client), + ) + + reader, size, err := rec.ValueStream(ctx) + if err != nil { + log.Fatal(err) + } + defer reader.Close() + + // Stream directly to output + io.Copy(outputFile, reader) + + // Close validates checksum + if err := reader.Close(); err != nil { + log.Error("checksum validation failed", "error", err) + } + +# Checksum Validation + +By default, fetched blobs are validated against the SHA256 checksum +stored in the envelope. This can be disabled for performance: + + consumer := lfs.NewConsumer(s3Client, + lfs.WithChecksumValidation(false), + ) + +# Error Handling + +The package defines specific error types for common failures: + + data, err := consumer.Unwrap(ctx, value) + if err != nil { + var checksumErr *lfs.ChecksumError + if errors.As(err, &checksumErr) { + log.Error("data corruption detected", + "expected", checksumErr.Expected, + "actual", checksumErr.Actual, + ) + } + + var lfsErr *lfs.LfsError + if errors.As(err, &lfsErr) { + log.Error("LFS operation failed", + "operation", lfsErr.Op, + "error", lfsErr.Err, + ) + } + } + +# Detection + +Use IsLfsEnvelope for fast detection without parsing: + + if lfs.IsLfsEnvelope(value) { + // This is an LFS envelope + env, _ := lfs.DecodeEnvelope(value) + fmt.Printf("Blob stored at: s3://%s/%s\n", env.Bucket, env.Key) + } + +# Producer Usage + +For producing large payloads via the LFS proxy HTTP endpoint: + + // Create producer pointing to LFS proxy + producer := lfs.NewProducer("http://lfs-proxy:8080", + lfs.WithContentType("video/mp4"), + lfs.WithRetry(3, time.Second), + ) + + // Stream a file to the proxy + file, _ := os.Open("large-video.mp4") + defer file.Close() + + result, err := producer.Produce(ctx, "video-uploads", "video-001", file) + if err != nil { + log.Fatal(err) + } + fmt.Printf("Uploaded %d bytes to s3://%s/%s\n", + result.BytesSent, result.Envelope.Bucket, result.Envelope.Key) + +# Producer with Progress Tracking + +Monitor upload progress for large files: + + producer := lfs.NewProducer("http://lfs-proxy:8080", + lfs.WithProgress(func(bytesSent int64) error { + fmt.Printf("Uploaded: %d bytes\n", bytesSent) + return nil // return error to cancel upload + }), + ) + + result, err := producer.Produce(ctx, "media", "file.dat", reader) + +# Producer with Checksum Validation + +Validate server-computed checksum against a pre-computed value: + + // Pre-compute checksum + hasher := sha256.New() + io.Copy(hasher, file) + expectedSHA := hex.EncodeToString(hasher.Sum(nil)) + file.Seek(0, 0) + + // Upload with checksum validation + result, err := producer.ProduceWithChecksum(ctx, "topic", "key", file, expectedSHA) + if err != nil { + var checksumErr *lfs.ChecksumError + if errors.As(err, &checksumErr) { + log.Error("upload corrupted", "expected", checksumErr.Expected) + } + } + +# Producer Retry Behavior + +The producer automatically retries on transient failures (5xx errors, 429 rate limits, +connection errors). Non-retryable errors (4xx client errors, checksum mismatches) +fail immediately. The retry delay is linear based on the attempt number. + + producer := lfs.NewProducer("http://lfs-proxy:8080", + lfs.WithRetry(5, 2*time.Second), // retry with linear backoff + ) +*/ +package lfs diff --git a/pkg/lfs/envelope.go b/pkg/lfs/envelope.go new file mode 100644 index 00000000..d242005f --- /dev/null +++ b/pkg/lfs/envelope.go @@ -0,0 +1,72 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "bytes" + "encoding/json" + "errors" +) + +// Envelope describes the pointer metadata for an LFS payload stored in S3. +type Envelope struct { + Version int `json:"kfs_lfs"` + Bucket string `json:"bucket"` + Key string `json:"key"` + Size int64 `json:"size"` + SHA256 string `json:"sha256"` + Checksum string `json:"checksum,omitempty"` + ChecksumAlg string `json:"checksum_alg,omitempty"` + ContentType string `json:"content_type,omitempty"` + OriginalHeaders map[string]string `json:"original_headers,omitempty"` + CreatedAt string `json:"created_at,omitempty"` + ProxyID string `json:"proxy_id,omitempty"` +} + +// EncodeEnvelope serializes an envelope to JSON. +func EncodeEnvelope(env Envelope) ([]byte, error) { + if env.Bucket == "" || env.Key == "" || env.SHA256 == "" || env.Version == 0 { + return nil, errors.New("invalid envelope") + } + return json.Marshal(env) +} + +// DecodeEnvelope parses JSON bytes into an Envelope. +func DecodeEnvelope(data []byte) (Envelope, error) { + var env Envelope + if err := json.Unmarshal(data, &env); err != nil { + return Envelope{}, err + } + if env.Version == 0 || env.Bucket == "" || env.Key == "" || env.SHA256 == "" { + return Envelope{}, errors.New("invalid envelope: missing required fields") + } + return env, nil +} + +// IsLfsEnvelope detects an LFS envelope via a quick JSON marker check. +func IsLfsEnvelope(value []byte) bool { + if len(value) < 15 { + return false + } + if value[0] != '{' { + return false + } + max := 50 + if len(value) < max { + max = len(value) + } + return bytes.Contains(value[:max], []byte(`"kfs_lfs"`)) +} diff --git a/pkg/lfs/envelope_test.go b/pkg/lfs/envelope_test.go new file mode 100644 index 00000000..05fed6cb --- /dev/null +++ b/pkg/lfs/envelope_test.go @@ -0,0 +1,189 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "encoding/json" + "testing" +) + +func TestEncodeEnvelope(t *testing.T) { + env := Envelope{ + Version: 1, + Bucket: "bucket", + Key: "ns/topic/lfs/2026/01/31/obj-123", + Size: 42, + SHA256: "abc", + } + payload, err := EncodeEnvelope(env) + if err != nil { + t.Fatalf("EncodeEnvelope error: %v", err) + } + var decoded Envelope + if err := json.Unmarshal(payload, &decoded); err != nil { + t.Fatalf("json unmarshal: %v", err) + } + if decoded.Bucket != env.Bucket || decoded.Key != env.Key || decoded.SHA256 != env.SHA256 { + t.Fatalf("unexpected decoded envelope: %+v", decoded) + } +} + +func TestEncodeEnvelopeInvalid(t *testing.T) { + tests := []struct { + name string + env Envelope + }{ + {"empty", Envelope{}}, + {"no version", Envelope{Bucket: "b", Key: "k", SHA256: "s"}}, + {"no bucket", Envelope{Version: 1, Key: "k", SHA256: "s"}}, + {"no key", Envelope{Version: 1, Bucket: "b", SHA256: "s"}}, + {"no sha256", Envelope{Version: 1, Bucket: "b", Key: "k"}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := EncodeEnvelope(tt.env) + if err == nil { + t.Fatalf("expected error for invalid envelope") + } + }) + } +} + +func TestDecodeEnvelope(t *testing.T) { + original := Envelope{ + Version: 1, + Bucket: "kafscale", + Key: "default/topic/lfs/2026/02/01/obj-abc", + Size: 1024, + SHA256: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + ContentType: "application/json", + ProxyID: "proxy-1", + } + encoded, err := EncodeEnvelope(original) + if err != nil { + t.Fatalf("encode error: %v", err) + } + + decoded, err := DecodeEnvelope(encoded) + if err != nil { + t.Fatalf("decode error: %v", err) + } + + if decoded.Version != original.Version { + t.Errorf("Version: got %d, want %d", decoded.Version, original.Version) + } + if decoded.Bucket != original.Bucket { + t.Errorf("Bucket: got %s, want %s", decoded.Bucket, original.Bucket) + } + if decoded.Key != original.Key { + t.Errorf("Key: got %s, want %s", decoded.Key, original.Key) + } + if decoded.Size != original.Size { + t.Errorf("Size: got %d, want %d", decoded.Size, original.Size) + } + if decoded.SHA256 != original.SHA256 { + t.Errorf("SHA256: got %s, want %s", decoded.SHA256, original.SHA256) + } + if decoded.ContentType != original.ContentType { + t.Errorf("ContentType: got %s, want %s", decoded.ContentType, original.ContentType) + } + if decoded.ProxyID != original.ProxyID { + t.Errorf("ProxyID: got %s, want %s", decoded.ProxyID, original.ProxyID) + } +} + +func TestDecodeEnvelopeInvalid(t *testing.T) { + tests := []struct { + name string + input []byte + }{ + {"invalid json", []byte(`not json`)}, + {"empty json", []byte(`{}`)}, + {"missing version", []byte(`{"kfs_lfs":0,"bucket":"b","key":"k","sha256":"s"}`)}, + {"missing bucket", []byte(`{"kfs_lfs":1,"key":"k","sha256":"s"}`)}, + {"missing key", []byte(`{"kfs_lfs":1,"bucket":"b","sha256":"s"}`)}, + {"missing sha256", []byte(`{"kfs_lfs":1,"bucket":"b","key":"k"}`)}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := DecodeEnvelope(tt.input) + if err == nil { + t.Fatalf("expected error for invalid envelope") + } + }) + } +} + +func TestIsLfsEnvelope(t *testing.T) { + tests := []struct { + name string + input []byte + expected bool + }{ + {"valid envelope", []byte(`{"kfs_lfs":1,"bucket":"b","key":"k","sha256":"abc"}`), true}, + {"valid with spaces", []byte(`{ "kfs_lfs": 1, "bucket": "b" }`), true}, + {"plain text", []byte("plain"), false}, + {"empty", []byte{}, false}, + {"too short", []byte(`{"kfs"}`), false}, + {"not json object", []byte(`["kfs_lfs"]`), false}, + {"no marker", []byte(`{"version":1,"bucket":"b"}`), false}, + {"binary data", []byte{0x00, 0x01, 0x02, 0x03}, false}, + {"marker past 50 bytes", []byte(`{"bucket":"very-long-bucket-name-here","key":"very-long-key","sha256":"abc","kfs_lfs":1}`), false}, // marker past first 50 bytes + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := IsLfsEnvelope(tt.input) + if got != tt.expected { + t.Errorf("IsLfsEnvelope() = %v, want %v", got, tt.expected) + } + }) + } +} + +func TestEnvelopeRoundTrip(t *testing.T) { + env := Envelope{ + Version: 1, + Bucket: "kafscale-lfs", + Key: "prod/events/lfs/2026/02/01/obj-550e8400-e29b-41d4-a716-446655440000", + Size: 5242880, + SHA256: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + ContentType: "image/png", + OriginalHeaders: map[string]string{"user-id": "123", "source": "upload"}, + CreatedAt: "2026-02-01T12:00:00Z", + ProxyID: "lfs-proxy-0", + } + + encoded, err := EncodeEnvelope(env) + if err != nil { + t.Fatalf("encode error: %v", err) + } + + if !IsLfsEnvelope(encoded) { + t.Fatal("encoded envelope not detected as LFS") + } + + decoded, err := DecodeEnvelope(encoded) + if err != nil { + t.Fatalf("decode error: %v", err) + } + + if decoded.OriginalHeaders["user-id"] != "123" { + t.Errorf("OriginalHeaders not preserved: %v", decoded.OriginalHeaders) + } + if decoded.CreatedAt != "2026-02-01T12:00:00Z" { + t.Errorf("CreatedAt not preserved: %s", decoded.CreatedAt) + } +} diff --git a/pkg/lfs/errors.go b/pkg/lfs/errors.go new file mode 100644 index 00000000..8944112a --- /dev/null +++ b/pkg/lfs/errors.go @@ -0,0 +1,60 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "errors" + "fmt" +) + +// Sentinel errors for LFS operations. +var ( + ErrNoConsumer = errors.New("no consumer configured for LFS resolution") + ErrNoStreamFetcher = errors.New("no stream fetcher configured for streaming access") +) + +// LfsError wraps lower-level LFS errors with context. +type LfsError struct { + Op string + Err error +} + +func (e *LfsError) Error() string { + if e == nil { + return "lfs error" + } + if e.Op == "" { + return fmt.Sprintf("lfs error: %v", e.Err) + } + return fmt.Sprintf("lfs %s: %v", e.Op, e.Err) +} + +func (e *LfsError) Unwrap() error { + if e == nil { + return nil + } + return e.Err +} + +// ChecksumError indicates a SHA256 mismatch. +type ChecksumError struct { + Expected string + Actual string +} + +func (e *ChecksumError) Error() string { + return fmt.Sprintf("checksum mismatch: expected %s got %s", e.Expected, e.Actual) +} diff --git a/pkg/lfs/errors_test.go b/pkg/lfs/errors_test.go new file mode 100644 index 00000000..533b0ff0 --- /dev/null +++ b/pkg/lfs/errors_test.go @@ -0,0 +1,78 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "errors" + "strings" + "testing" +) + +func TestLfsErrorWithOp(t *testing.T) { + err := &LfsError{Op: "upload", Err: errors.New("connection refused")} + got := err.Error() + if !strings.Contains(got, "upload") { + t.Fatalf("expected 'upload' in error, got: %s", got) + } + if !strings.Contains(got, "connection refused") { + t.Fatalf("expected 'connection refused' in error, got: %s", got) + } +} + +func TestLfsErrorWithoutOp(t *testing.T) { + err := &LfsError{Err: errors.New("some failure")} + got := err.Error() + if !strings.Contains(got, "lfs error") { + t.Fatalf("expected 'lfs error' in output, got: %s", got) + } + if !strings.Contains(got, "some failure") { + t.Fatalf("expected 'some failure' in output, got: %s", got) + } +} + +func TestLfsErrorNilReceiver(t *testing.T) { + var err *LfsError + got := err.Error() + if got != "lfs error" { + t.Fatalf("expected 'lfs error', got: %s", got) + } +} + +func TestLfsErrorUnwrap(t *testing.T) { + inner := errors.New("inner error") + err := &LfsError{Op: "test", Err: inner} + if !errors.Is(err, inner) { + t.Fatal("Unwrap should return inner error") + } +} + +func TestLfsErrorUnwrapNil(t *testing.T) { + var err *LfsError + if err.Unwrap() != nil { + t.Fatal("nil receiver Unwrap should return nil") + } +} + +func TestChecksumErrorMessage(t *testing.T) { + err := &ChecksumError{Expected: "abc", Actual: "def"} + got := err.Error() + if !strings.Contains(got, "abc") || !strings.Contains(got, "def") { + t.Fatalf("expected both checksums in error, got: %s", got) + } + if !strings.Contains(got, "mismatch") { + t.Fatalf("expected 'mismatch' in error, got: %s", got) + } +} diff --git a/pkg/lfs/producer.go b/pkg/lfs/producer.go new file mode 100644 index 00000000..feadf823 --- /dev/null +++ b/pkg/lfs/producer.go @@ -0,0 +1,388 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "bytes" + "context" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strconv" + "time" +) + +// ProduceResult contains the result of a successful LFS produce operation. +type ProduceResult struct { + Envelope Envelope // The LFS envelope with S3 location and checksum + Duration time.Duration // Time taken for the upload + BytesSent int64 // Total bytes uploaded +} + +// ProgressFunc is called during upload with bytes sent so far. +// Returning an error cancels the upload. +type ProgressFunc func(bytesSent int64) error + +// Producer sends large payloads to the LFS proxy via HTTP streaming. +type Producer struct { + endpoint string + client *http.Client + apiKey string + contentType string + maxRetries int + retryDelay time.Duration + progress ProgressFunc +} + +// ProducerOption configures the Producer. +type ProducerOption func(*Producer) + +// WithHTTPClient sets a custom HTTP client. +func WithHTTPClient(client *http.Client) ProducerOption { + return func(p *Producer) { + p.client = client + } +} + +// WithAPIKey sets the API key for authenticated requests. +func WithAPIKey(key string) ProducerOption { + return func(p *Producer) { + p.apiKey = key + } +} + +// WithContentType sets the Content-Type header for uploads. +func WithContentType(ct string) ProducerOption { + return func(p *Producer) { + p.contentType = ct + } +} + +// WithRetry configures retry behavior for transient failures. +func WithRetry(maxRetries int, delay time.Duration) ProducerOption { + return func(p *Producer) { + p.maxRetries = maxRetries + p.retryDelay = delay + } +} + +// WithProgress sets a callback for upload progress. +func WithProgress(fn ProgressFunc) ProducerOption { + return func(p *Producer) { + p.progress = fn + } +} + +// NewProducer creates a Producer that sends blobs to the LFS proxy. +// +// The endpoint should be the LFS proxy HTTP URL, e.g., "http://lfs-proxy:8080". +func NewProducer(endpoint string, opts ...ProducerOption) *Producer { + p := &Producer{ + endpoint: endpoint, + client: &http.Client{Timeout: 0}, // No timeout for large uploads + contentType: "application/octet-stream", + maxRetries: 3, + retryDelay: time.Second, + } + for _, opt := range opts { + opt(p) + } + return p +} + +// Produce streams a payload to the LFS proxy for the given topic. +// +// The reader is streamed directly to the proxy without buffering the entire +// payload in memory. The proxy uploads to S3 and returns an LFS envelope +// that is stored in Kafka. +// +// Example: +// +// producer := lfs.NewProducer("http://lfs-proxy:8080") +// file, _ := os.Open("large-video.mp4") +// defer file.Close() +// +// result, err := producer.Produce(ctx, "video-uploads", "video-001", file) +// if err != nil { +// log.Fatal(err) +// } +// fmt.Printf("Uploaded %d bytes, S3 key: %s\n", result.BytesSent, result.Envelope.Key) +func (p *Producer) Produce(ctx context.Context, topic, key string, body io.Reader) (*ProduceResult, error) { + if topic == "" { + return nil, errors.New("topic is required") + } + if body == nil { + return nil, errors.New("body is required") + } + + // Ensure the body can be replayed on retries. If the reader supports + // seeking we rewind it; otherwise we buffer into memory so each + // attempt gets the full payload. + body, err := replayable(body) + if err != nil { + return nil, err + } + + var lastErr error + for attempt := 0; attempt <= p.maxRetries; attempt++ { + if attempt > 0 { + if err := rewind(body); err != nil { + return nil, fmt.Errorf("cannot reset body for retry: %w", err) + } + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(p.retryDelay * time.Duration(attempt)): + } + } + + result, err := p.doUpload(ctx, topic, key, body) + if err == nil { + return result, nil + } + + // Only retry on transient errors + if !isRetryable(err) { + return nil, err + } + lastErr = err + } + + return nil, fmt.Errorf("max retries exceeded: %w", lastErr) +} + +// ProduceWithChecksum streams a payload and validates the server-computed checksum. +// +// If the server's SHA256 doesn't match the expected checksum, an error is returned. +// This is useful when the client has pre-computed the checksum. +func (p *Producer) ProduceWithChecksum(ctx context.Context, topic, key string, body io.Reader, expectedSHA256 string) (*ProduceResult, error) { + result, err := p.Produce(ctx, topic, key, body) + if err != nil { + return nil, err + } + + if result.Envelope.SHA256 != expectedSHA256 { + return nil, &ChecksumError{ + Expected: expectedSHA256, + Actual: result.Envelope.SHA256, + } + } + + return result, nil +} + +// ProducePartitioned sends to a specific partition. +func (p *Producer) ProducePartitioned(ctx context.Context, topic string, partition int32, key string, body io.Reader) (*ProduceResult, error) { + if topic == "" { + return nil, errors.New("topic is required") + } + if body == nil { + return nil, errors.New("body is required") + } + + body, err := replayable(body) + if err != nil { + return nil, err + } + + var lastErr error + for attempt := 0; attempt <= p.maxRetries; attempt++ { + if attempt > 0 { + if err := rewind(body); err != nil { + return nil, fmt.Errorf("cannot reset body for retry: %w", err) + } + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(p.retryDelay * time.Duration(attempt)): + } + } + + result, err := p.doUploadPartitioned(ctx, topic, partition, key, body) + if err == nil { + return result, nil + } + + if !isRetryable(err) { + return nil, err + } + lastErr = err + } + + return nil, fmt.Errorf("max retries exceeded: %w", lastErr) +} + +func (p *Producer) doUpload(ctx context.Context, topic, key string, body io.Reader) (*ProduceResult, error) { + return p.doUploadPartitioned(ctx, topic, -1, key, body) +} + +func (p *Producer) doUploadPartitioned(ctx context.Context, topic string, partition int32, key string, body io.Reader) (*ProduceResult, error) { + url := p.endpoint + "/lfs/produce" + + // Wrap body with progress tracking if configured + trackedBody := body + if p.progress != nil { + trackedBody = &progressReader{ + reader: body, + progress: p.progress, + } + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, trackedBody) + if err != nil { + return nil, &LfsError{Op: "create_request", Err: err} + } + + req.Header.Set("X-Kafka-Topic", topic) + if key != "" { + req.Header.Set("X-Kafka-Key", base64.StdEncoding.EncodeToString([]byte(key))) + } + if partition >= 0 { + req.Header.Set("X-Kafka-Partition", strconv.Itoa(int(partition))) + } + req.Header.Set("Content-Type", p.contentType) + + if p.apiKey != "" { + req.Header.Set("X-API-Key", p.apiKey) + } + + start := time.Now() + resp, err := p.client.Do(req) + if err != nil { + return nil, &LfsError{Op: "upload", Err: err} + } + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + bodyBytes, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) + return nil, &LfsError{ + Op: "upload", + Err: fmt.Errorf("status %d: %s", resp.StatusCode, string(bodyBytes)), + } + } + + var env Envelope + if err := json.NewDecoder(resp.Body).Decode(&env); err != nil { + return nil, &LfsError{Op: "decode_response", Err: err} + } + + return &ProduceResult{ + Envelope: env, + Duration: time.Since(start), + BytesSent: env.Size, + }, nil +} + +// progressReader wraps a reader and reports progress. +type progressReader struct { + reader io.Reader + progress ProgressFunc + sent int64 +} + +func (pr *progressReader) Read(p []byte) (int, error) { + n, err := pr.reader.Read(p) + if n > 0 { + pr.sent += int64(n) + if pr.progress != nil { + if perr := pr.progress(pr.sent); perr != nil { + return n, perr + } + } + } + return n, err +} + +// isRetryable determines if an error is transient and worth retrying. +func isRetryable(err error) bool { + if err == nil { + return false + } + + // Context errors are not retryable + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return false + } + + // Checksum errors are not retryable + var checksumErr *ChecksumError + if errors.As(err, &checksumErr) { + return false + } + + // LfsError wrapping HTTP errors might be retryable + var lfsErr *LfsError + if errors.As(err, &lfsErr) { + // Check for retryable HTTP status codes in error message + errStr := lfsErr.Error() + // 5xx errors are retryable + if contains(errStr, "status 5") { + return true + } + // 429 Too Many Requests is retryable + if contains(errStr, "status 429") { + return true + } + // Connection errors are retryable + if contains(errStr, "connection") || contains(errStr, "timeout") { + return true + } + } + + return false +} + +func contains(s, substr string) bool { + return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsAt(s, substr)) +} + +func containsAt(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} + +// replayable returns a reader that can be rewound for retries. +// If the reader already supports io.Seeker (e.g. *os.File, *bytes.Reader), +// it is returned as-is. Otherwise the contents are buffered into memory +// so that retries read the full payload instead of an exhausted reader. +func replayable(r io.Reader) (io.Reader, error) { + if _, ok := r.(io.Seeker); ok { + return r, nil + } + data, err := io.ReadAll(r) + if err != nil { + return nil, fmt.Errorf("buffering body for retry: %w", err) + } + return bytes.NewReader(data), nil +} + +// rewind seeks a reader back to the start. The caller must ensure the reader +// was returned by replayable, which guarantees it implements io.Seeker. +func rewind(r io.Reader) error { + s, ok := r.(io.Seeker) + if !ok { + return errors.New("reader is not seekable") + } + _, err := s.Seek(0, io.SeekStart) + return err +} diff --git a/pkg/lfs/producer_test.go b/pkg/lfs/producer_test.go new file mode 100644 index 00000000..daef0490 --- /dev/null +++ b/pkg/lfs/producer_test.go @@ -0,0 +1,422 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "io" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + "time" +) + +func TestNewProducer(t *testing.T) { + p := NewProducer("http://localhost:8080") + if p.endpoint != "http://localhost:8080" { + t.Errorf("expected endpoint http://localhost:8080, got %s", p.endpoint) + } + if p.contentType != "application/octet-stream" { + t.Errorf("expected default content-type application/octet-stream, got %s", p.contentType) + } + if p.maxRetries != 3 { + t.Errorf("expected default maxRetries 3, got %d", p.maxRetries) + } +} + +func TestNewProducerWithOptions(t *testing.T) { + client := &http.Client{Timeout: 5 * time.Second} + p := NewProducer("http://localhost:8080", + WithHTTPClient(client), + WithAPIKey("secret-key"), + WithContentType("video/mp4"), + WithRetry(5, 2*time.Second), + ) + + if p.client != client { + t.Error("expected custom HTTP client") + } + if p.apiKey != "secret-key" { + t.Errorf("expected apiKey secret-key, got %s", p.apiKey) + } + if p.contentType != "video/mp4" { + t.Errorf("expected content-type video/mp4, got %s", p.contentType) + } + if p.maxRetries != 5 { + t.Errorf("expected maxRetries 5, got %d", p.maxRetries) + } +} + +func TestProducerProduce(t *testing.T) { + // Create a mock LFS proxy server + expectedEnvelope := Envelope{ + Version: 1, + Bucket: "test-bucket", + Key: "test/topic/lfs/2026/02/01/obj-123", + Size: 1024, + SHA256: "abc123", + ContentType: "application/octet-stream", + CreatedAt: "2026-02-01T12:00:00Z", + ProxyID: "test-proxy", + } + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Verify request + if r.Method != http.MethodPost { + t.Errorf("expected POST, got %s", r.Method) + } + if r.URL.Path != "/lfs/produce" { + t.Errorf("expected /lfs/produce, got %s", r.URL.Path) + } + if r.Header.Get("X-Kafka-Topic") != "test-topic" { + t.Errorf("expected topic test-topic, got %s", r.Header.Get("X-Kafka-Topic")) + } + + // Read body to simulate upload + body, _ := io.ReadAll(r.Body) + if string(body) != "test payload" { + t.Errorf("expected body 'test payload', got '%s'", string(body)) + } + + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(expectedEnvelope) + })) + defer server.Close() + + producer := NewProducer(server.URL) + result, err := producer.Produce(context.Background(), "test-topic", "test-key", strings.NewReader("test payload")) + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Envelope.Key != expectedEnvelope.Key { + t.Errorf("expected key %s, got %s", expectedEnvelope.Key, result.Envelope.Key) + } + if result.Envelope.SHA256 != expectedEnvelope.SHA256 { + t.Errorf("expected sha256 %s, got %s", expectedEnvelope.SHA256, result.Envelope.SHA256) + } +} + +func TestProducerProduceEmptyTopic(t *testing.T) { + producer := NewProducer("http://localhost:8080") + _, err := producer.Produce(context.Background(), "", "key", strings.NewReader("data")) + + if err == nil { + t.Error("expected error for empty topic") + } + if !strings.Contains(err.Error(), "topic is required") { + t.Errorf("expected 'topic is required' error, got: %v", err) + } +} + +func TestProducerProduceNilBody(t *testing.T) { + producer := NewProducer("http://localhost:8080") + _, err := producer.Produce(context.Background(), "topic", "key", nil) + + if err == nil { + t.Error("expected error for nil body") + } + if !strings.Contains(err.Error(), "body is required") { + t.Errorf("expected 'body is required' error, got: %v", err) + } +} + +func TestProducerProduceServerError(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte("internal server error")) + })) + defer server.Close() + + producer := NewProducer(server.URL, WithRetry(0, 0)) + _, err := producer.Produce(context.Background(), "test-topic", "key", strings.NewReader("data")) + + if err == nil { + t.Error("expected error for server error") + } + + var lfsErr *LfsError + if !errors.As(err, &lfsErr) { + t.Errorf("expected LfsError, got %T", err) + } +} + +func TestProducerProduceWithChecksum(t *testing.T) { + expectedSHA := "expected-sha256" + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + env := Envelope{SHA256: expectedSHA} + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(env) + })) + defer server.Close() + + producer := NewProducer(server.URL) + + // Matching checksum should succeed + result, err := producer.ProduceWithChecksum(context.Background(), "topic", "key", strings.NewReader("data"), expectedSHA) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Envelope.SHA256 != expectedSHA { + t.Errorf("expected sha256 %s, got %s", expectedSHA, result.Envelope.SHA256) + } + + // Mismatched checksum should fail + _, err = producer.ProduceWithChecksum(context.Background(), "topic", "key", strings.NewReader("data"), "wrong-sha") + if err == nil { + t.Error("expected checksum error") + } + var checksumErr *ChecksumError + if !errors.As(err, &checksumErr) { + t.Errorf("expected ChecksumError, got %T", err) + } +} + +func TestProducerProgress(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = io.ReadAll(r.Body) + env := Envelope{Size: 1000} + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(env) + })) + defer server.Close() + + var progressCalls int64 + var lastBytes int64 + + producer := NewProducer(server.URL, + WithProgress(func(bytesSent int64) error { + atomic.AddInt64(&progressCalls, 1) + atomic.StoreInt64(&lastBytes, bytesSent) + return nil + }), + ) + + // Create a larger payload to trigger multiple progress calls + payload := bytes.Repeat([]byte("x"), 10000) + _, err := producer.Produce(context.Background(), "topic", "key", bytes.NewReader(payload)) + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if atomic.LoadInt64(&progressCalls) == 0 { + t.Error("expected progress callback to be called") + } + if atomic.LoadInt64(&lastBytes) != int64(len(payload)) { + t.Errorf("expected final bytes %d, got %d", len(payload), atomic.LoadInt64(&lastBytes)) + } +} + +func TestProducerProgressCancel(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = io.ReadAll(r.Body) + env := Envelope{} + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(env) + })) + defer server.Close() + + cancelErr := errors.New("user cancelled") + producer := NewProducer(server.URL, + WithProgress(func(bytesSent int64) error { + if bytesSent > 100 { + return cancelErr + } + return nil + }), + WithRetry(0, 0), + ) + + payload := bytes.Repeat([]byte("x"), 10000) + _, err := producer.Produce(context.Background(), "topic", "key", bytes.NewReader(payload)) + + if err == nil { + t.Error("expected error from progress cancel") + } +} + +func TestProducerRetry(t *testing.T) { + var attempts int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = io.ReadAll(r.Body) + count := atomic.AddInt32(&attempts, 1) + if count < 3 { + w.WriteHeader(http.StatusServiceUnavailable) + _, _ = w.Write([]byte("status 503: service unavailable")) + return + } + env := Envelope{Key: "success"} + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(env) + })) + defer server.Close() + + producer := NewProducer(server.URL, WithRetry(3, 10*time.Millisecond)) + result, err := producer.Produce(context.Background(), "topic", "key", strings.NewReader("data")) + + if err != nil { + t.Fatalf("unexpected error after retries: %v", err) + } + if result.Envelope.Key != "success" { + t.Errorf("expected key 'success', got %s", result.Envelope.Key) + } + if atomic.LoadInt32(&attempts) != 3 { + t.Errorf("expected 3 attempts, got %d", atomic.LoadInt32(&attempts)) + } +} + +func TestProducerRetryExhausted(t *testing.T) { + var attempts int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = io.ReadAll(r.Body) + atomic.AddInt32(&attempts, 1) + w.WriteHeader(http.StatusServiceUnavailable) + _, _ = w.Write([]byte("status 503: always fails")) + })) + defer server.Close() + + producer := NewProducer(server.URL, WithRetry(2, 10*time.Millisecond)) + _, err := producer.Produce(context.Background(), "topic", "key", strings.NewReader("data")) + + if err == nil { + t.Error("expected error after exhausting retries") + } + if !strings.Contains(err.Error(), "max retries exceeded") { + t.Errorf("expected 'max retries exceeded' error, got: %v", err) + } + // Initial attempt + 2 retries = 3 total + if atomic.LoadInt32(&attempts) != 3 { + t.Errorf("expected 3 attempts (1 + 2 retries), got %d", atomic.LoadInt32(&attempts)) + } +} + +func TestProducerNoRetryOn400(t *testing.T) { + var attempts int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = io.ReadAll(r.Body) + atomic.AddInt32(&attempts, 1) + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte("bad request")) + })) + defer server.Close() + + producer := NewProducer(server.URL, WithRetry(3, 10*time.Millisecond)) + _, err := producer.Produce(context.Background(), "topic", "key", strings.NewReader("data")) + + if err == nil { + t.Error("expected error for 400 response") + } + // 400 errors should not be retried + if atomic.LoadInt32(&attempts) != 1 { + t.Errorf("expected 1 attempt (no retry for 400), got %d", atomic.LoadInt32(&attempts)) + } +} + +func TestProducerContextCancel(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(5 * time.Second) // Slow server + env := Envelope{} + _ = json.NewEncoder(w).Encode(env) + })) + defer server.Close() + + producer := NewProducer(server.URL) + + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + _, err := producer.Produce(ctx, "topic", "key", strings.NewReader("data")) + + if err == nil { + t.Error("expected error from context timeout") + } +} + +func TestProducerAPIKey(t *testing.T) { + var receivedKey string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedKey = r.Header.Get("X-API-Key") + _, _ = io.ReadAll(r.Body) + env := Envelope{} + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(env) + })) + defer server.Close() + + producer := NewProducer(server.URL, WithAPIKey("my-secret-key")) + _, err := producer.Produce(context.Background(), "topic", "key", strings.NewReader("data")) + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if receivedKey != "my-secret-key" { + t.Errorf("expected API key 'my-secret-key', got '%s'", receivedKey) + } +} + +func TestProducerPartitioned(t *testing.T) { + var receivedPartition string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedPartition = r.Header.Get("X-Kafka-Partition") + _, _ = io.ReadAll(r.Body) + env := Envelope{} + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(env) + })) + defer server.Close() + + producer := NewProducer(server.URL) + _, err := producer.ProducePartitioned(context.Background(), "topic", 5, "key", strings.NewReader("data")) + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if receivedPartition != "5" { + t.Errorf("expected partition '5', got '%s'", receivedPartition) + } +} + +func TestIsRetryable(t *testing.T) { + tests := []struct { + name string + err error + retryable bool + }{ + {"nil error", nil, false}, + {"context canceled", context.Canceled, false}, + {"context deadline", context.DeadlineExceeded, false}, + {"checksum error", &ChecksumError{Expected: "a", Actual: "b"}, false}, + {"500 error", &LfsError{Op: "upload", Err: errors.New("status 500: internal error")}, true}, + {"503 error", &LfsError{Op: "upload", Err: errors.New("status 503: unavailable")}, true}, + {"429 error", &LfsError{Op: "upload", Err: errors.New("status 429: rate limited")}, true}, + {"400 error", &LfsError{Op: "upload", Err: errors.New("status 400: bad request")}, false}, + {"connection error", &LfsError{Op: "upload", Err: errors.New("connection refused")}, true}, + {"timeout error", &LfsError{Op: "upload", Err: errors.New("timeout waiting for response")}, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isRetryable(tt.err); got != tt.retryable { + t.Errorf("isRetryable() = %v, want %v", got, tt.retryable) + } + }) + } +} diff --git a/pkg/lfs/record.go b/pkg/lfs/record.go new file mode 100644 index 00000000..b43192ab --- /dev/null +++ b/pkg/lfs/record.go @@ -0,0 +1,286 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "context" + "io" + "sync" +) + +// StreamFetcher downloads LFS blobs as streams from storage. +type StreamFetcher interface { + Stream(ctx context.Context, key string) (io.ReadCloser, int64, error) +} + +// Record wraps a Kafka record value with lazy LFS resolution. +// If the value contains an LFS envelope, the actual blob is fetched +// from S3 on first access to Value() or ValueStream(). +// +// Example usage: +// +// consumer := lfs.NewConsumer(s3Client) +// for _, record := range kafkaRecords { +// rec := lfs.NewRecord(record.Value, consumer) +// data, err := rec.Value(ctx) +// if err != nil { +// log.Error("failed to resolve LFS", "error", err) +// continue +// } +// // data contains the resolved blob (or original value if not LFS) +// } +type Record struct { + raw []byte + consumer *Consumer + streamFetcher StreamFetcher + validateChecksum bool + + // cached resolution + mu sync.Mutex + resolved bool + value []byte + envelope *Envelope + err error +} + +// RecordOption configures a Record. +type RecordOption func(*Record) + +// WithStreamFetcher sets a stream fetcher for ValueStream() support. +func WithStreamFetcher(fetcher StreamFetcher) RecordOption { + return func(r *Record) { + r.streamFetcher = fetcher + } +} + +// WithRecordChecksumValidation enables/disables checksum validation. +func WithRecordChecksumValidation(enabled bool) RecordOption { + return func(r *Record) { + r.validateChecksum = enabled + } +} + +// NewRecord creates a Record that wraps a raw Kafka message value. +// If the value is an LFS envelope, it will be resolved lazily on first access. +func NewRecord(raw []byte, consumer *Consumer, opts ...RecordOption) *Record { + r := &Record{ + raw: raw, + consumer: consumer, + validateChecksum: true, + } + for _, opt := range opts { + opt(r) + } + return r +} + +// IsLFS returns true if this record contains an LFS envelope. +func (r *Record) IsLFS() bool { + return IsLfsEnvelope(r.raw) +} + +// Raw returns the original record value without resolution. +func (r *Record) Raw() []byte { + return r.raw +} + +// Envelope returns the LFS envelope if present, nil otherwise. +// Does not fetch the blob, just parses the envelope metadata. +func (r *Record) Envelope() (*Envelope, error) { + if !r.IsLFS() { + return nil, nil + } + env, err := DecodeEnvelope(r.raw) + if err != nil { + return nil, &LfsError{Op: "decode", Err: err} + } + return &env, nil +} + +// Value returns the resolved blob content. +// If the record is an LFS envelope, fetches the blob from S3. +// If not an LFS envelope, returns the original value. +// Results are cached after first resolution. +func (r *Record) Value(ctx context.Context) ([]byte, error) { + r.mu.Lock() + defer r.mu.Unlock() + + if r.resolved { + return r.value, r.err + } + + r.resolved = true + + if !IsLfsEnvelope(r.raw) { + r.value = r.raw + return r.value, nil + } + + if r.consumer == nil { + r.err = &LfsError{Op: "resolve", Err: ErrNoConsumer} + return nil, r.err + } + + env, blob, err := r.consumer.UnwrapEnvelope(ctx, r.raw) + r.envelope = env + if err != nil { + r.err = err + return nil, r.err + } + + r.value = blob + return r.value, nil +} + +// ValueStream returns a streaming reader for the blob content. +// This is more memory-efficient for large blobs. +// Note: The caller must close the returned reader. +// If not an LFS envelope, returns a reader over the raw value. +func (r *Record) ValueStream(ctx context.Context) (io.ReadCloser, int64, error) { + if !r.IsLFS() { + return io.NopCloser(newBytesReader(r.raw)), int64(len(r.raw)), nil + } + + env, err := DecodeEnvelope(r.raw) + if err != nil { + return nil, 0, &LfsError{Op: "decode", Err: err} + } + + if r.streamFetcher == nil { + return nil, 0, &LfsError{Op: "stream", Err: ErrNoStreamFetcher} + } + + reader, length, err := r.streamFetcher.Stream(ctx, env.Key) + if err != nil { + return nil, 0, &LfsError{Op: "stream", Err: err} + } + + if r.validateChecksum { + alg, expected, ok, err := EnvelopeChecksum(env) + if err != nil { + return nil, 0, &LfsError{Op: "checksum", Err: err} + } + if ok { + hasher, err := NewChecksumHasher(alg) + if err != nil { + return nil, 0, &LfsError{Op: "checksum", Err: err} + } + return &checksumReader{ + reader: reader, + expected: expected, + hasher: hasher, + alg: alg, + }, length, nil + } + } + + return reader, length, nil +} + +// Size returns the size of the blob. +// For LFS records, returns the size from the envelope without fetching. +// For non-LFS records, returns the length of the raw value. +func (r *Record) Size() (int64, error) { + if !r.IsLFS() { + return int64(len(r.raw)), nil + } + + env, err := DecodeEnvelope(r.raw) + if err != nil { + return 0, &LfsError{Op: "decode", Err: err} + } + + return env.Size, nil +} + +// ContentType returns the content type from the LFS envelope. +// Returns empty string for non-LFS records. +func (r *Record) ContentType() string { + if !r.IsLFS() { + return "" + } + + env, err := DecodeEnvelope(r.raw) + if err != nil { + return "" + } + + return env.ContentType +} + +// bytesReader wraps a byte slice for io.Reader interface. +type bytesReader struct { + data []byte + pos int +} + +func newBytesReader(data []byte) *bytesReader { + return &bytesReader{data: data} +} + +func (r *bytesReader) Read(p []byte) (n int, err error) { + if r.pos >= len(r.data) { + return 0, io.EOF + } + n = copy(p, r.data[r.pos:]) + r.pos += n + return n, nil +} + +// checksumReader wraps a reader and validates checksum on close. +type checksumReader struct { + reader io.ReadCloser + expected string + hasher interface { + Write([]byte) (int, error) + Sum([]byte) []byte + } + alg ChecksumAlg + closed bool +} + +func (r *checksumReader) Read(p []byte) (n int, err error) { + n, err = r.reader.Read(p) + if n > 0 { + _, _ = r.hasher.Write(p[:n]) + } + return n, err +} + +func (r *checksumReader) Close() error { + if r.closed { + return nil + } + r.closed = true + + // Read any remaining data to complete the hash + remaining, _ := io.ReadAll(r.reader) + if len(remaining) > 0 { + _, _ = r.hasher.Write(remaining) + } + + err := r.reader.Close() + if err != nil { + return err + } + + actual := formatChecksum(r.hasher.Sum(nil)) + if actual != r.expected { + return &ChecksumError{Expected: r.expected, Actual: actual} + } + + return nil +} diff --git a/pkg/lfs/record_test.go b/pkg/lfs/record_test.go new file mode 100644 index 00000000..263d354a --- /dev/null +++ b/pkg/lfs/record_test.go @@ -0,0 +1,412 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "context" + "crypto/md5" + "crypto/sha256" + "encoding/hex" + "errors" + "io" + "testing" +) + +// mockStreamFetcher implements StreamFetcher for testing. +type mockStreamFetcher struct { + blobs map[string][]byte + err error +} + +func (m *mockStreamFetcher) Stream(ctx context.Context, key string) (io.ReadCloser, int64, error) { + if m.err != nil { + return nil, 0, m.err + } + blob, ok := m.blobs[key] + if !ok { + return nil, 0, errors.New("not found") + } + return io.NopCloser(newBytesReader(blob)), int64(len(blob)), nil +} + +func TestRecordIsLFS(t *testing.T) { + tests := []struct { + name string + raw []byte + expected bool + }{ + {"non-LFS", []byte("plain text"), false}, + {"LFS envelope", []byte(`{"kfs_lfs":1,"bucket":"b","key":"k","sha256":"s"}`), true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rec := NewRecord(tt.raw, nil) + if got := rec.IsLFS(); got != tt.expected { + t.Errorf("IsLFS() = %v, want %v", got, tt.expected) + } + }) + } +} + +func TestRecordRaw(t *testing.T) { + raw := []byte("test data") + rec := NewRecord(raw, nil) + if string(rec.Raw()) != string(raw) { + t.Errorf("Raw() = %q, want %q", rec.Raw(), raw) + } +} + +func TestRecordValueNonLFS(t *testing.T) { + raw := []byte("plain text") + rec := NewRecord(raw, nil) + + val, err := rec.Value(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(val) != string(raw) { + t.Errorf("Value() = %q, want %q", val, raw) + } +} + +func TestRecordValueLFS(t *testing.T) { + blob := []byte("resolved blob content") + hash := sha256.Sum256(blob) + checksum := hex.EncodeToString(hash[:]) + + fetcher := &mockFetcher{ + blobs: map[string][]byte{"key": blob}, + } + consumer := NewConsumer(fetcher) + + env := Envelope{Version: 1, Bucket: "b", Key: "key", Size: int64(len(blob)), SHA256: checksum} + envBytes, _ := EncodeEnvelope(env) + + rec := NewRecord(envBytes, consumer) + + val, err := rec.Value(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(val) != string(blob) { + t.Errorf("Value() = %q, want %q", val, blob) + } +} + +func TestRecordValueCaching(t *testing.T) { + blob := []byte("blob") + hash := sha256.Sum256(blob) + checksum := hex.EncodeToString(hash[:]) + + callCount := 0 + fetcher := &mockFetcher{ + blobs: map[string][]byte{"key": blob}, + } + // Wrap to count calls + wrappedFetcher := &countingFetcher{fetcher: fetcher, count: &callCount} + consumer := NewConsumer(wrappedFetcher) + + env := Envelope{Version: 1, Bucket: "b", Key: "key", Size: int64(len(blob)), SHA256: checksum} + envBytes, _ := EncodeEnvelope(env) + + rec := NewRecord(envBytes, consumer) + + // First call + _, _ = rec.Value(context.Background()) + // Second call should use cache + _, _ = rec.Value(context.Background()) + + if callCount != 1 { + t.Errorf("expected 1 fetch call, got %d", callCount) + } +} + +type countingFetcher struct { + fetcher *mockFetcher + count *int +} + +func (c *countingFetcher) Fetch(ctx context.Context, key string) ([]byte, error) { + *c.count++ + return c.fetcher.Fetch(ctx, key) +} + +func TestRecordValueNoConsumer(t *testing.T) { + env := Envelope{Version: 1, Bucket: "b", Key: "k", Size: 100, SHA256: "abc123"} + envBytes, _ := EncodeEnvelope(env) + + rec := NewRecord(envBytes, nil) + + _, err := rec.Value(context.Background()) + if err == nil { + t.Fatal("expected error when no consumer") + } + + var lfsErr *LfsError + if !errors.As(err, &lfsErr) { + t.Fatalf("expected LfsError, got %T", err) + } + if !errors.Is(lfsErr.Err, ErrNoConsumer) { + t.Errorf("expected ErrNoConsumer, got %v", lfsErr.Err) + } +} + +func TestRecordEnvelope(t *testing.T) { + env := Envelope{ + Version: 1, + Bucket: "bucket", + Key: "key", + Size: 100, + SHA256: "abc", + ContentType: "text/plain", + } + envBytes, _ := EncodeEnvelope(env) + + rec := NewRecord(envBytes, nil) + gotEnv, err := rec.Envelope() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if gotEnv.Bucket != "bucket" { + t.Errorf("Bucket = %s, want bucket", gotEnv.Bucket) + } + if gotEnv.ContentType != "text/plain" { + t.Errorf("ContentType = %s, want text/plain", gotEnv.ContentType) + } +} + +func TestRecordEnvelopeNonLFS(t *testing.T) { + rec := NewRecord([]byte("plain"), nil) + env, err := rec.Envelope() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if env != nil { + t.Errorf("expected nil envelope for non-LFS, got %+v", env) + } +} + +func TestRecordSize(t *testing.T) { + // Non-LFS + plain := []byte("12345") + rec := NewRecord(plain, nil) + size, err := rec.Size() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if size != 5 { + t.Errorf("Size() = %d, want 5", size) + } + + // LFS + env := Envelope{Version: 1, Bucket: "b", Key: "k", Size: 1024, SHA256: "abc"} + envBytes, _ := EncodeEnvelope(env) + rec = NewRecord(envBytes, nil) + size, err = rec.Size() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if size != 1024 { + t.Errorf("Size() = %d, want 1024", size) + } +} + +func TestRecordContentType(t *testing.T) { + // Non-LFS + rec := NewRecord([]byte("plain"), nil) + if ct := rec.ContentType(); ct != "" { + t.Errorf("ContentType() = %q, want empty", ct) + } + + // LFS + env := Envelope{Version: 1, Bucket: "b", Key: "k", Size: 100, SHA256: "abc", ContentType: "image/png"} + envBytes, _ := EncodeEnvelope(env) + rec = NewRecord(envBytes, nil) + if ct := rec.ContentType(); ct != "image/png" { + t.Errorf("ContentType() = %q, want image/png", ct) + } +} + +func TestRecordValueStreamNonLFS(t *testing.T) { + raw := []byte("plain text data") + rec := NewRecord(raw, nil) + + reader, length, err := rec.ValueStream(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer func() { _ = reader.Close() }() + + if length != int64(len(raw)) { + t.Errorf("length = %d, want %d", length, len(raw)) + } + + data, _ := io.ReadAll(reader) + if string(data) != string(raw) { + t.Errorf("stream data = %q, want %q", data, raw) + } +} + +func TestRecordValueStreamLFS(t *testing.T) { + blob := []byte("streamed blob content") + hash := sha256.Sum256(blob) + checksum := hex.EncodeToString(hash[:]) + + streamFetcher := &mockStreamFetcher{ + blobs: map[string][]byte{"key": blob}, + } + + env := Envelope{Version: 1, Bucket: "b", Key: "key", Size: int64(len(blob)), SHA256: checksum} + envBytes, _ := EncodeEnvelope(env) + + rec := NewRecord(envBytes, nil, WithStreamFetcher(streamFetcher)) + + reader, length, err := rec.ValueStream(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if length != int64(len(blob)) { + t.Errorf("length = %d, want %d", length, len(blob)) + } + + data, _ := io.ReadAll(reader) + if string(data) != string(blob) { + t.Errorf("stream data = %q, want %q", data, blob) + } + + // Close validates checksum + if err := reader.Close(); err != nil { + t.Fatalf("Close() error: %v", err) + } +} + +func TestRecordValueStreamNoFetcher(t *testing.T) { + env := Envelope{Version: 1, Bucket: "b", Key: "k", Size: 100, SHA256: "abc"} + envBytes, _ := EncodeEnvelope(env) + + rec := NewRecord(envBytes, nil) // No stream fetcher + + _, _, err := rec.ValueStream(context.Background()) + if err == nil { + t.Fatal("expected error when no stream fetcher") + } + + var lfsErr *LfsError + if !errors.As(err, &lfsErr) { + t.Fatalf("expected LfsError, got %T", err) + } + if !errors.Is(lfsErr.Err, ErrNoStreamFetcher) { + t.Errorf("expected ErrNoStreamFetcher, got %v", lfsErr.Err) + } +} + +func TestRecordValueStreamMD5Checksum(t *testing.T) { + blob := []byte("blob content") + md5sum := md5.Sum(blob) + sha := sha256.Sum256(blob) + + streamFetcher := &mockStreamFetcher{ + blobs: map[string][]byte{"key": blob}, + } + + env := Envelope{ + Version: 1, + Bucket: "b", + Key: "key", + Size: int64(len(blob)), + SHA256: hex.EncodeToString(sha[:]), + Checksum: hex.EncodeToString(md5sum[:]), + ChecksumAlg: "md5", + } + envBytes, _ := EncodeEnvelope(env) + + rec := NewRecord(envBytes, nil, WithStreamFetcher(streamFetcher)) + + reader, _, err := rec.ValueStream(context.Background()) + if err != nil { + t.Fatalf("unexpected error getting stream: %v", err) + } + _, _ = io.ReadAll(reader) + if err := reader.Close(); err != nil { + t.Fatalf("Close() error: %v", err) + } +} + +func TestRecordValueStreamChecksumMismatch(t *testing.T) { + blob := []byte("blob content") + wrongChecksum := "0000000000000000000000000000000000000000000000000000000000000000" + + streamFetcher := &mockStreamFetcher{ + blobs: map[string][]byte{"key": blob}, + } + + env := Envelope{Version: 1, Bucket: "b", Key: "key", Size: int64(len(blob)), SHA256: wrongChecksum} + envBytes, _ := EncodeEnvelope(env) + + rec := NewRecord(envBytes, nil, WithStreamFetcher(streamFetcher)) + + reader, _, err := rec.ValueStream(context.Background()) + if err != nil { + t.Fatalf("unexpected error getting stream: %v", err) + } + + // Read all data + _, _ = io.ReadAll(reader) + + // Close should fail with checksum error + err = reader.Close() + if err == nil { + t.Fatal("expected checksum error on Close") + } + + var checksumErr *ChecksumError + if !errors.As(err, &checksumErr) { + t.Fatalf("expected ChecksumError, got %T: %v", err, err) + } +} + +func TestRecordValueStreamChecksumDisabled(t *testing.T) { + blob := []byte("blob content") + wrongChecksum := "0000000000000000000000000000000000000000000000000000000000000000" + + streamFetcher := &mockStreamFetcher{ + blobs: map[string][]byte{"key": blob}, + } + + env := Envelope{Version: 1, Bucket: "b", Key: "key", Size: int64(len(blob)), SHA256: wrongChecksum} + envBytes, _ := EncodeEnvelope(env) + + rec := NewRecord(envBytes, nil, + WithStreamFetcher(streamFetcher), + WithRecordChecksumValidation(false), + ) + + reader, _, err := rec.ValueStream(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + data, _ := io.ReadAll(reader) + if string(data) != string(blob) { + t.Errorf("data = %q, want %q", data, blob) + } + + // Close should succeed even with wrong checksum + if err := reader.Close(); err != nil { + t.Fatalf("Close() should succeed with validation disabled: %v", err) + } +} diff --git a/pkg/lfs/resolver.go b/pkg/lfs/resolver.go new file mode 100644 index 00000000..68aeed36 --- /dev/null +++ b/pkg/lfs/resolver.go @@ -0,0 +1,93 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "context" + "fmt" +) + +// ResolverConfig controls LFS resolution behavior. +type ResolverConfig struct { + MaxSize int64 + ValidateChecksum bool +} + +// ResolvedRecord holds the resolved payload and metadata. +type ResolvedRecord struct { + Envelope Envelope + Payload []byte + ContentType string + BlobSize int64 + Checksum string + ChecksumAlg string +} + +// Resolver fetches LFS payloads and validates integrity. +type Resolver struct { + cfg ResolverConfig + s3 S3Reader +} + +// NewResolver creates a resolver with the provided S3 reader. +func NewResolver(cfg ResolverConfig, s3 S3Reader) *Resolver { + return &Resolver{cfg: cfg, s3: s3} +} + +// Resolve resolves a record value. It returns ok=false if the value is not an LFS envelope. +func (r *Resolver) Resolve(ctx context.Context, value []byte) (ResolvedRecord, bool, error) { + if !IsLfsEnvelope(value) { + return ResolvedRecord{Payload: value, BlobSize: int64(len(value))}, false, nil + } + env, err := DecodeEnvelope(value) + if err != nil { + return ResolvedRecord{}, true, err + } + if r.s3 == nil { + return ResolvedRecord{}, true, fmt.Errorf("s3 reader not configured") + } + + payload, err := r.s3.Fetch(ctx, env.Key) + if err != nil { + return ResolvedRecord{}, true, err + } + if r.cfg.MaxSize > 0 && int64(len(payload)) > r.cfg.MaxSize { + return ResolvedRecord{}, true, fmt.Errorf("payload size %d exceeds max %d", len(payload), r.cfg.MaxSize) + } + + checksumAlg, expected, ok, err := EnvelopeChecksum(env) + if err != nil { + return ResolvedRecord{}, true, err + } + if r.cfg.ValidateChecksum && ok { + computed, err := ComputeChecksum(checksumAlg, payload) + if err != nil { + return ResolvedRecord{}, true, err + } + if computed != expected { + return ResolvedRecord{}, true, &ChecksumError{Expected: expected, Actual: computed} + } + } + + return ResolvedRecord{ + Envelope: env, + Payload: payload, + ContentType: env.ContentType, + BlobSize: int64(len(payload)), + Checksum: expected, + ChecksumAlg: string(checksumAlg), + }, true, nil +} diff --git a/pkg/lfs/resolver_test.go b/pkg/lfs/resolver_test.go new file mode 100644 index 00000000..8e51faca --- /dev/null +++ b/pkg/lfs/resolver_test.go @@ -0,0 +1,175 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "context" + "io" + "testing" +) + +type fakeS3Reader struct { + payload []byte + err error +} + +func (f fakeS3Reader) Fetch(ctx context.Context, key string) ([]byte, error) { + return f.payload, f.err +} + +func (f fakeS3Reader) Stream(ctx context.Context, key string) (io.ReadCloser, int64, error) { + return nil, 0, f.err +} + +func TestResolverNonEnvelope(t *testing.T) { + r := NewResolver(ResolverConfig{ValidateChecksum: true}, nil) + res, ok, err := r.Resolve(context.Background(), []byte("plain")) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ok { + t.Fatalf("expected ok=false for non-envelope") + } + if string(res.Payload) != "plain" { + t.Fatalf("unexpected payload: %s", res.Payload) + } +} + +func TestResolverEnvelopeChecksum(t *testing.T) { + payload := []byte("hello") + checksum, err := ComputeChecksum(ChecksumSHA256, payload) + if err != nil { + t.Fatalf("checksum: %v", err) + } + env := Envelope{ + Version: 1, + Bucket: "b", + Key: "k", + Size: int64(len(payload)), + SHA256: checksum, + Checksum: checksum, + ChecksumAlg: string(ChecksumSHA256), + ContentType: "text/plain", + } + encoded, err := EncodeEnvelope(env) + if err != nil { + t.Fatalf("encode: %v", err) + } + + r := NewResolver(ResolverConfig{ValidateChecksum: true}, fakeS3Reader{payload: payload}) + res, ok, err := r.Resolve(context.Background(), encoded) + if err != nil { + t.Fatalf("resolve: %v", err) + } + if !ok { + t.Fatalf("expected ok=true") + } + if res.ChecksumAlg != string(ChecksumSHA256) { + t.Fatalf("unexpected checksum alg: %s", res.ChecksumAlg) + } + if string(res.Payload) != "hello" { + t.Fatalf("unexpected payload: %s", res.Payload) + } +} + +func TestResolverChecksumMismatch(t *testing.T) { + payload := []byte("hello") + checksum, err := ComputeChecksum(ChecksumSHA256, []byte("other")) + if err != nil { + t.Fatalf("checksum: %v", err) + } + env := Envelope{ + Version: 1, + Bucket: "b", + Key: "k", + Size: int64(len(payload)), + SHA256: checksum, + Checksum: checksum, + ChecksumAlg: string(ChecksumSHA256), + } + encoded, err := EncodeEnvelope(env) + if err != nil { + t.Fatalf("encode: %v", err) + } + + r := NewResolver(ResolverConfig{ValidateChecksum: true}, fakeS3Reader{payload: payload}) + _, ok, err := r.Resolve(context.Background(), encoded) + if err == nil { + t.Fatalf("expected checksum error") + } + if !ok { + t.Fatalf("expected ok=true") + } + if _, isChecksum := err.(*ChecksumError); !isChecksum { + t.Fatalf("expected ChecksumError, got %T", err) + } +} + +func TestResolverMaxSize(t *testing.T) { + payload := []byte("hello") + checksum, err := ComputeChecksum(ChecksumSHA256, payload) + if err != nil { + t.Fatalf("checksum: %v", err) + } + env := Envelope{ + Version: 1, + Bucket: "b", + Key: "k", + Size: int64(len(payload)), + SHA256: checksum, + } + encoded, err := EncodeEnvelope(env) + if err != nil { + t.Fatalf("encode: %v", err) + } + + r := NewResolver(ResolverConfig{MaxSize: 2, ValidateChecksum: true}, fakeS3Reader{payload: payload}) + _, ok, err := r.Resolve(context.Background(), encoded) + if err == nil { + t.Fatalf("expected max size error") + } + if !ok { + t.Fatalf("expected ok=true") + } +} + +func TestResolverMissingS3Reader(t *testing.T) { + payload := []byte("hello") + checksum, err := ComputeChecksum(ChecksumSHA256, payload) + if err != nil { + t.Fatalf("checksum: %v", err) + } + env := Envelope{ + Version: 1, + Bucket: "b", + Key: "k", + Size: int64(len(payload)), + SHA256: checksum, + } + encoded, err := EncodeEnvelope(env) + if err != nil { + t.Fatalf("encode: %v", err) + } + + r := NewResolver(ResolverConfig{ValidateChecksum: true}, nil) + _, ok, err := r.Resolve(context.Background(), encoded) + if err == nil { + t.Fatalf("expected error for missing s3 reader") + } + if !ok { + t.Fatalf("expected ok=true") + } +} diff --git a/pkg/lfs/s3client.go b/pkg/lfs/s3client.go new file mode 100644 index 00000000..2967ebfb --- /dev/null +++ b/pkg/lfs/s3client.go @@ -0,0 +1,111 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "context" + "errors" + "fmt" + "io" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/service/s3" +) + +type S3Config struct { + Bucket string + Region string + Endpoint string + AccessKeyID string + SecretAccessKey string + SessionToken string + ForcePathStyle bool +} + +type s3API interface { + GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) +} + +// S3Client fetches LFS blobs from S3-compatible storage. +type S3Client struct { + bucket string + api s3API +} + +func NewS3Client(ctx context.Context, cfg S3Config) (*S3Client, error) { + if cfg.Bucket == "" { + return nil, errors.New("s3 bucket required") + } + if cfg.Region == "" { + return nil, errors.New("s3 region required") + } + + loadOpts := []func(*config.LoadOptions) error{ + config.WithRegion(cfg.Region), + } + if cfg.AccessKeyID != "" && cfg.SecretAccessKey != "" { + loadOpts = append(loadOpts, config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(cfg.AccessKeyID, cfg.SecretAccessKey, cfg.SessionToken))) + } + awsCfg, err := config.LoadDefaultConfig(ctx, loadOpts...) + if err != nil { + return nil, fmt.Errorf("load aws config: %w", err) + } + client := s3.NewFromConfig(awsCfg, func(o *s3.Options) { + if cfg.Endpoint != "" { + o.BaseEndpoint = aws.String(cfg.Endpoint) + } + o.UsePathStyle = cfg.ForcePathStyle + }) + + return &S3Client{bucket: cfg.Bucket, api: client}, nil +} + +// Fetch downloads the object contents into memory. +func (c *S3Client) Fetch(ctx context.Context, key string) ([]byte, error) { + if key == "" { + return nil, errors.New("s3 key required") + } + out, err := c.api.GetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(c.bucket), + Key: aws.String(key), + }) + if err != nil { + return nil, err + } + defer func() { _ = out.Body.Close() }() + return io.ReadAll(out.Body) +} + +// Stream returns the object body for streaming callers. +func (c *S3Client) Stream(ctx context.Context, key string) (io.ReadCloser, int64, error) { + if key == "" { + return nil, 0, errors.New("s3 key required") + } + out, err := c.api.GetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(c.bucket), + Key: aws.String(key), + }) + if err != nil { + return nil, 0, err + } + length := int64(0) + if out.ContentLength != nil { + length = *out.ContentLength + } + return out.Body, length, nil +} diff --git a/pkg/lfs/s3client_test.go b/pkg/lfs/s3client_test.go new file mode 100644 index 00000000..56064bcc --- /dev/null +++ b/pkg/lfs/s3client_test.go @@ -0,0 +1,151 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "context" + "errors" + "io" + "strings" + "testing" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3" +) + +type mockS3API struct { + getObjectFunc func(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) +} + +func (m *mockS3API) GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) { + return m.getObjectFunc(ctx, params, optFns...) +} + +func newTestS3Client(api s3API) *S3Client { + return &S3Client{bucket: "test-bucket", api: api} +} + +func TestS3ClientFetchSuccess(t *testing.T) { + mock := &mockS3API{ + getObjectFunc: func(_ context.Context, params *s3.GetObjectInput, _ ...func(*s3.Options)) (*s3.GetObjectOutput, error) { + if *params.Key != "test/key" { + t.Fatalf("unexpected key: %s", *params.Key) + } + if *params.Bucket != "test-bucket" { + t.Fatalf("unexpected bucket: %s", *params.Bucket) + } + return &s3.GetObjectOutput{ + Body: io.NopCloser(strings.NewReader("blob data")), + }, nil + }, + } + + client := newTestS3Client(mock) + data, err := client.Fetch(context.Background(), "test/key") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if string(data) != "blob data" { + t.Fatalf("expected 'blob data', got '%s'", data) + } +} + +func TestS3ClientFetchEmptyKey(t *testing.T) { + client := newTestS3Client(&mockS3API{}) + _, err := client.Fetch(context.Background(), "") + if err == nil { + t.Fatal("expected error for empty key") + } +} + +func TestS3ClientFetchError(t *testing.T) { + mock := &mockS3API{ + getObjectFunc: func(_ context.Context, _ *s3.GetObjectInput, _ ...func(*s3.Options)) (*s3.GetObjectOutput, error) { + return nil, errors.New("access denied") + }, + } + client := newTestS3Client(mock) + _, err := client.Fetch(context.Background(), "key") + if err == nil { + t.Fatal("expected error") + } +} + +func TestS3ClientStreamSuccess(t *testing.T) { + contentLen := int64(100) + mock := &mockS3API{ + getObjectFunc: func(_ context.Context, params *s3.GetObjectInput, _ ...func(*s3.Options)) (*s3.GetObjectOutput, error) { + return &s3.GetObjectOutput{ + Body: io.NopCloser(strings.NewReader("stream data")), + ContentLength: aws.Int64(contentLen), + }, nil + }, + } + + client := newTestS3Client(mock) + body, length, err := client.Stream(context.Background(), "key") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer func() { _ = body.Close() }() + if length != contentLen { + t.Fatalf("expected length %d, got %d", contentLen, length) + } + data, _ := io.ReadAll(body) + if string(data) != "stream data" { + t.Fatalf("expected 'stream data', got '%s'", data) + } +} + +func TestS3ClientStreamEmptyKey(t *testing.T) { + client := newTestS3Client(&mockS3API{}) + _, _, err := client.Stream(context.Background(), "") + if err == nil { + t.Fatal("expected error for empty key") + } +} + +func TestS3ClientStreamError(t *testing.T) { + mock := &mockS3API{ + getObjectFunc: func(_ context.Context, _ *s3.GetObjectInput, _ ...func(*s3.Options)) (*s3.GetObjectOutput, error) { + return nil, errors.New("not found") + }, + } + client := newTestS3Client(mock) + _, _, err := client.Stream(context.Background(), "key") + if err == nil { + t.Fatal("expected error") + } +} + +func TestS3ClientStreamNilContentLength(t *testing.T) { + mock := &mockS3API{ + getObjectFunc: func(_ context.Context, _ *s3.GetObjectInput, _ ...func(*s3.Options)) (*s3.GetObjectOutput, error) { + return &s3.GetObjectOutput{ + Body: io.NopCloser(strings.NewReader("data")), + ContentLength: nil, + }, nil + }, + } + client := newTestS3Client(mock) + _, length, err := client.Stream(context.Background(), "key") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if length != 0 { + t.Fatalf("expected length 0 for nil ContentLength, got %d", length) + } +} diff --git a/pkg/lfs/s3reader.go b/pkg/lfs/s3reader.go new file mode 100644 index 00000000..0ce13888 --- /dev/null +++ b/pkg/lfs/s3reader.go @@ -0,0 +1,27 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lfs + +import ( + "context" + "io" +) + +// S3Reader fetches LFS blobs from S3-compatible storage. +type S3Reader interface { + Fetch(ctx context.Context, key string) ([]byte, error) + Stream(ctx context.Context, key string) (io.ReadCloser, int64, error) +} diff --git a/pkg/metadata/etcd_store.go b/pkg/metadata/etcd_store.go index 1d37132b..d5589d91 100644 --- a/pkg/metadata/etcd_store.go +++ b/pkg/metadata/etcd_store.go @@ -84,9 +84,7 @@ func NewEtcdStore(ctx context.Context, snapshot ClusterMetadata, cfg EtcdStoreCo metadata: NewInMemoryStore(snapshot), available: 1, } - if err := store.refreshSnapshot(ctx); err != nil { - // ignore if snapshot missing; operator will populate later - } + _ = store.refreshSnapshot(ctx) // best-effort; snapshot may not exist yet store.startWatchers() return store, nil } diff --git a/pkg/metadata/etcd_store_test.go b/pkg/metadata/etcd_store_test.go index 503678ae..beee567e 100644 --- a/pkg/metadata/etcd_store_test.go +++ b/pkg/metadata/etcd_store_test.go @@ -94,7 +94,7 @@ func TestEtcdStoreTopicConfigAndPartitions(t *testing.T) { } cli := newEtcdClient(t, endpoints) - defer cli.Close() + defer func() { _ = cli.Close() }() ctxTimeout, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() resp, err := cli.Get(ctxTimeout, PartitionStateKey("orders", 1)) @@ -144,7 +144,7 @@ func TestEtcdStoreDeleteTopicRemovesOffsets(t *testing.T) { waitForTopicRemoval(t, endpoints, "orders") cli := newEtcdClient(t, endpoints) - defer cli.Close() + defer func() { _ = cli.Close() }() ctxTimeout, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() @@ -257,7 +257,7 @@ func loadSnapshot(endpoints []string) (*ClusterMetadata, error) { if err != nil { return nil, err } - defer cli.Close() + defer func() { _ = cli.Close() }() ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) defer cancel() resp, err := cli.Get(ctx, snapshotKey()) @@ -297,3 +297,121 @@ func newEtcdClient(t *testing.T, endpoints []string) *clientv3.Client { } return cli } + +func TestEtcdStoreMetadataAndAvailable(t *testing.T) { + endpoints := testutil.StartEmbeddedEtcd(t) + ctx := context.Background() + initial := ClusterMetadata{ + Brokers: []protocol.MetadataBroker{{NodeID: 1, Host: "b0", Port: 9092}}, + ControllerID: 1, + Topics: []protocol.MetadataTopic{ + {Topic: kmsg.StringPtr("orders"), Partitions: []protocol.MetadataPartition{{Partition: 0, Leader: 1}}}, + }, + } + store, err := NewEtcdStore(ctx, initial, EtcdStoreConfig{Endpoints: endpoints}) + if err != nil { + t.Fatalf("NewEtcdStore: %v", err) + } + + // Metadata + meta, err := store.Metadata(ctx, nil) + if err != nil { + t.Fatalf("Metadata: %v", err) + } + if len(meta.Brokers) != 1 || len(meta.Topics) != 1 { + t.Fatalf("unexpected metadata: %+v", meta) + } + + // Available + if !store.Available() { + t.Fatal("expected Available to return true") + } + + // EtcdClient + cli := store.EtcdClient() + if cli == nil { + t.Fatal("expected non-nil etcd client") + } +} + +func TestEtcdStoreNextOffset(t *testing.T) { + endpoints := testutil.StartEmbeddedEtcd(t) + ctx := context.Background() + initial := ClusterMetadata{ + Brokers: []protocol.MetadataBroker{{NodeID: 1, Host: "b0", Port: 9092}}, + ControllerID: 1, + } + store, err := NewEtcdStore(ctx, initial, EtcdStoreConfig{Endpoints: endpoints}) + if err != nil { + t.Fatalf("NewEtcdStore: %v", err) + } + + _, err = store.CreateTopic(ctx, TopicSpec{Name: "events", NumPartitions: 2, ReplicationFactor: 1}) + if err != nil { + t.Fatalf("CreateTopic: %v", err) + } + + offset, err := store.NextOffset(ctx, "events", 0) + if err != nil { + t.Fatalf("NextOffset: %v", err) + } + if offset != 0 { + t.Fatalf("expected 0 initial offset, got %d", offset) + } + + if err := store.UpdateOffsets(ctx, "events", 0, 42); err != nil { + t.Fatalf("UpdateOffsets: %v", err) + } + + offset, err = store.NextOffset(ctx, "events", 0) + if err != nil { + t.Fatalf("NextOffset: %v", err) + } + if offset != 43 { + t.Fatalf("expected 43, got %d", offset) + } +} + +func TestEtcdStoreConsumerOffsets(t *testing.T) { + endpoints := testutil.StartEmbeddedEtcd(t) + ctx := context.Background() + store, err := NewEtcdStore(ctx, ClusterMetadata{}, EtcdStoreConfig{Endpoints: endpoints}) + if err != nil { + t.Fatalf("NewEtcdStore: %v", err) + } + + if err := store.CommitConsumerOffset(ctx, "g1", "orders", 0, 100, "meta-0"); err != nil { + t.Fatalf("CommitConsumerOffset: %v", err) + } + if err := store.CommitConsumerOffset(ctx, "g1", "orders", 1, 200, "meta-1"); err != nil { + t.Fatalf("CommitConsumerOffset: %v", err) + } + + // Fetch individual offset + offset, meta, err := store.FetchConsumerOffset(ctx, "g1", "orders", 0) + if err != nil { + t.Fatalf("FetchConsumerOffset: %v", err) + } + if offset != 100 || meta != "meta-0" { + t.Fatalf("expected 100/meta-0, got %d/%q", offset, meta) + } + + // Fetch non-existent + offset, _, err = store.FetchConsumerOffset(ctx, "g1", "orders", 99) + if err != nil { + t.Fatalf("FetchConsumerOffset missing: %v", err) + } + // Non-existent key returns 0 (default value) + if offset != 0 { + t.Fatalf("expected 0 for missing offset, got %d", offset) + } + + // List offsets + offsets, err := store.ListConsumerOffsets(ctx) + if err != nil { + t.Fatalf("ListConsumerOffsets: %v", err) + } + if len(offsets) != 2 { + t.Fatalf("expected 2 offsets, got %d", len(offsets)) + } +} diff --git a/pkg/metadata/group_lease_test.go b/pkg/metadata/group_lease_test.go index b8ed65c7..0a5849f2 100644 --- a/pkg/metadata/group_lease_test.go +++ b/pkg/metadata/group_lease_test.go @@ -85,7 +85,7 @@ func TestGroupLeaseExpiryFailover(t *testing.T) { t.Fatalf("broker-a acquire: %v", err) } - cliA.Close() + _ = cliA.Close() if err := brokerB.Acquire(ctx, "my-group"); err == nil { t.Fatalf("broker-b should not acquire before lease expires") @@ -150,7 +150,7 @@ func TestGroupReacquireAfterRestart(t *testing.T) { t.Fatalf("broker-a (session 1) acquire: %v", err) } - cliA1.Close() + _ = cliA1.Close() brokerA2 := newGroupLeaseManager(t, endpoints, "broker-a", ttl) @@ -214,6 +214,38 @@ func TestGroupConcurrentAcquireRace(t *testing.T) { } } +func TestGroupLeaseAccessors(t *testing.T) { + endpoints := testutil.StartEmbeddedEtcd(t) + cli := newEtcdClientForTest(t, endpoints) + mgr := NewGroupLeaseManager(cli, GroupLeaseConfig{ + BrokerID: "broker-a", + LeaseTTLSeconds: 30, + Logger: slog.Default(), + }) + + // GroupLeasePrefix (package-level function) + prefix := GroupLeasePrefix() + if prefix == "" { + t.Fatal("expected non-empty group lease prefix") + } + + // EtcdClient + if mgr.EtcdClient() != cli { + t.Fatal("expected same etcd client back") + } + + // CurrentOwner before any acquire + ctx := context.Background() + mgr.Acquire(ctx, "test-group") + owner, err := mgr.CurrentOwner(ctx, "test-group") + if err != nil { + t.Fatalf("CurrentOwner: %v", err) + } + if owner != "broker-a" { + t.Fatalf("expected broker-a as owner, got %q", owner) + } +} + // Different groups can be owned by different brokers. func TestGroupLeaseMultipleGroups(t *testing.T) { endpoints := testutil.StartEmbeddedEtcd(t) diff --git a/pkg/metadata/group_router_test.go b/pkg/metadata/group_router_test.go index 80fdd64e..f179e30b 100644 --- a/pkg/metadata/group_router_test.go +++ b/pkg/metadata/group_router_test.go @@ -121,6 +121,40 @@ func TestGroupRouterReflectsRelease(t *testing.T) { t.Fatalf("router did not reflect release of my-group (still shows %q)", router.LookupOwner("my-group")) } +// Invalidate clears the router cache and reloads. +func TestGroupRouterInvalidate(t *testing.T) { + endpoints := testutil.StartEmbeddedEtcd(t) + ctx := context.Background() + + routerCli := newEtcdClientForTest(t, endpoints) + router, err := NewGroupRouter(ctx, routerCli, slog.Default()) + if err != nil { + t.Fatalf("create router: %v", err) + } + t.Cleanup(router.Stop) + + brokerA := newGroupLeaseManager(t, endpoints, "broker-a", 30) + if err := brokerA.Acquire(ctx, "inv-group"); err != nil { + t.Fatalf("acquire: %v", err) + } + + deadline := time.Now().Add(5 * time.Second) + for time.Now().Before(deadline) { + if router.LookupOwner("inv-group") == "broker-a" { + break + } + time.Sleep(50 * time.Millisecond) + } + + // Invalidate removes the cached route + router.Invalidate("inv-group") + + // Immediately after invalidate, the route should be empty + if owner := router.LookupOwner("inv-group"); owner != "" { + t.Fatalf("expected empty owner after invalidate, got %q", owner) + } +} + // Multiple groups route to different brokers. func TestGroupRouterMultipleBrokers(t *testing.T) { endpoints := testutil.StartEmbeddedEtcd(t) diff --git a/pkg/metadata/lease_manager.go b/pkg/metadata/lease_manager.go index 3dfe3529..6e3a429e 100644 --- a/pkg/metadata/lease_manager.go +++ b/pkg/metadata/lease_manager.go @@ -226,7 +226,7 @@ func (m *LeaseManager) getOrCreateSession(ctx context.Context) (*concurrency.Ses m.mu.Lock() if m.closed.Load() { m.mu.Unlock() - session.Close() + _ = session.Close() return nil, ErrShuttingDown } if m.session != nil { @@ -235,7 +235,7 @@ func (m *LeaseManager) getOrCreateSession(ctx context.Context) (*concurrency.Ses default: s := m.session m.mu.Unlock() - session.Close() + _ = session.Close() return s, nil } } @@ -302,7 +302,7 @@ func (m *LeaseManager) ReleaseAll() { m.mu.Unlock() if session != nil { - session.Close() + _ = session.Close() } m.logger.Info(fmt.Sprintf("released all %s leases", m.resourceKind), "broker", m.brokerID, "count", count) diff --git a/pkg/metadata/partition_lease_test.go b/pkg/metadata/partition_lease_test.go index f4f1ded9..eca9fe4b 100644 --- a/pkg/metadata/partition_lease_test.go +++ b/pkg/metadata/partition_lease_test.go @@ -37,7 +37,7 @@ func newEtcdClientForTest(t *testing.T, endpoints []string) *clientv3.Client { if err != nil { t.Fatalf("create etcd client: %v", err) } - t.Cleanup(func() { cli.Close() }) + t.Cleanup(func() { _ = cli.Close() }) return cli } @@ -105,7 +105,7 @@ func TestLeaseExpiryFailover(t *testing.T) { // Simulate broker A crashing by closing its etcd client. // This terminates the session keepalive, so the lease expires after TTL. - cliA.Close() + _ = cliA.Close() if err := brokerB.Acquire(ctx, "orders", 0); err == nil { t.Fatalf("broker-b should not acquire before lease expires") @@ -177,7 +177,7 @@ func TestReacquireAfterRestart(t *testing.T) { // Simulate restart: close the old client but don't wait for expiry. // The lease key still exists in etcd with value "broker-a". - cliA1.Close() + _ = cliA1.Close() brokerA2 := newLeaseManager(t, endpoints, "broker-a", ttl) @@ -194,6 +194,71 @@ func TestReacquireAfterRestart(t *testing.T) { } } +func TestPartitionLeaseAccessors(t *testing.T) { + endpoints := testutil.StartEmbeddedEtcd(t) + cli := newEtcdClientForTest(t, endpoints) + mgr := NewPartitionLeaseManager(cli, PartitionLeaseConfig{ + BrokerID: "broker-a", + LeaseTTLSeconds: 30, + Logger: slog.Default(), + }) + + // PartitionLeasePrefix + prefix := PartitionLeasePrefix() + if prefix == "" { + t.Fatal("expected non-empty partition lease prefix") + } + + // EtcdClient + if mgr.EtcdClient() != cli { + t.Fatal("expected same etcd client back") + } + + ctx := context.Background() + mgr.Acquire(ctx, "orders", 0) + + // CurrentOwner + owner, err := mgr.CurrentOwner(ctx, "orders", 0) + if err != nil { + t.Fatalf("CurrentOwner: %v", err) + } + if owner != "broker-a" { + t.Fatalf("expected broker-a as owner, got %q", owner) + } +} + +func TestPartitionAcquireAll(t *testing.T) { + endpoints := testutil.StartEmbeddedEtcd(t) + mgr := newLeaseManager(t, endpoints, "broker-a", 30) + + ctx := context.Background() + partitions := []PartitionID{ + {Topic: "orders", Partition: 0}, + {Topic: "orders", Partition: 1}, + {Topic: "orders", Partition: 2}, + } + results := mgr.AcquireAll(ctx, partitions) + for i, r := range results { + if r.Err != nil { + t.Fatalf("AcquireAll partition %d: %v", i, r.Err) + } + } + // All should be owned now + for _, p := range partitions { + if !mgr.Owns(p.Topic, p.Partition) { + t.Fatalf("expected to own %s/%d", p.Topic, p.Partition) + } + } + + // Calling AcquireAll again should be a no-op (already owned) + results = mgr.AcquireAll(ctx, partitions) + for i, r := range results { + if r.Err != nil { + t.Fatalf("re-AcquireAll partition %d: %v", i, r.Err) + } + } +} + // Scenario 9: Concurrent acquire race. // Two brokers race to acquire the same unowned partition. Exactly one must win. func TestConcurrentAcquireRace(t *testing.T) { diff --git a/pkg/metadata/partition_router_test.go b/pkg/metadata/partition_router_test.go index 31454370..a0054f0a 100644 --- a/pkg/metadata/partition_router_test.go +++ b/pkg/metadata/partition_router_test.go @@ -198,6 +198,37 @@ func TestRouterReflectsRelease(t *testing.T) { t.Fatalf("router did not reflect release of orders/0 (still shows %q)", router.LookupOwner("orders", 0)) } +// Invalidate removes a specific partition route. +func TestPartitionRouterInvalidate(t *testing.T) { + endpoints := testutil.StartEmbeddedEtcd(t) + ctx := context.Background() + + routerCli := newEtcdClientForTest(t, endpoints) + router, err := NewPartitionRouter(ctx, routerCli, slog.Default()) + if err != nil { + t.Fatalf("create router: %v", err) + } + t.Cleanup(router.Stop) + + brokerA := newLeaseManager(t, endpoints, "broker-a", 30) + if err := brokerA.Acquire(ctx, "orders", 0); err != nil { + t.Fatalf("acquire: %v", err) + } + + deadline := time.Now().Add(5 * time.Second) + for time.Now().Before(deadline) { + if router.LookupOwner("orders", 0) == "broker-a" { + break + } + time.Sleep(50 * time.Millisecond) + } + + router.Invalidate("orders", 0) + if owner := router.LookupOwner("orders", 0); owner != "" { + t.Fatalf("expected empty owner after invalidate, got %q", owner) + } +} + // Scenario 7: Multiple partitions route to different brokers. func TestRouterMultipleBrokersMultiplePartitions(t *testing.T) { endpoints := testutil.StartEmbeddedEtcd(t) diff --git a/pkg/metadata/store_test.go b/pkg/metadata/store_test.go index f38e9ad4..06c094cc 100644 --- a/pkg/metadata/store_test.go +++ b/pkg/metadata/store_test.go @@ -18,6 +18,7 @@ package metadata import ( "context" "errors" + "strings" "testing" metadatapb "github.com/KafScale/platform/pkg/gen/metadata" @@ -278,3 +279,634 @@ func TestInMemoryStoreTopicConfigAndPartitions(t *testing.T) { t.Fatalf("unexpected partition count: %#v", meta.Topics) } } + +// --- Additional store tests for coverage gaps --- + +func TestFetchConsumerOffset(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx := context.Background() + + // Commit then fetch + if err := store.CommitConsumerOffset(ctx, "g1", "orders", 0, 100, "meta-0"); err != nil { + t.Fatalf("CommitConsumerOffset: %v", err) + } + offset, meta, err := store.FetchConsumerOffset(ctx, "g1", "orders", 0) + if err != nil { + t.Fatalf("FetchConsumerOffset: %v", err) + } + if offset != 100 { + t.Fatalf("expected offset 100, got %d", offset) + } + if meta != "meta-0" { + t.Fatalf("expected meta-0, got %q", meta) + } + + // Fetch non-existent returns zero + offset, meta, err = store.FetchConsumerOffset(ctx, "g1", "orders", 99) + if err != nil { + t.Fatalf("FetchConsumerOffset: %v", err) + } + if offset != 0 || meta != "" { + t.Fatalf("expected 0/empty for missing key, got %d/%q", offset, meta) + } +} + +func TestFetchConsumerOffsetContextCancel(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + _, _, err := store.FetchConsumerOffset(ctx, "g1", "orders", 0) + if err == nil { + t.Fatal("expected context error") + } +} + +func TestFetchTopicConfigUnknown(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + _, err := store.FetchTopicConfig(context.Background(), "missing") + if !errors.Is(err, ErrUnknownTopic) { + t.Fatalf("expected ErrUnknownTopic, got %v", err) + } +} + +func TestFetchTopicConfigContextCancel(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + _, err := store.FetchTopicConfig(ctx, "orders") + if err == nil { + t.Fatal("expected context error") + } +} + +func TestFetchTopicConfigDefault(t *testing.T) { + // Topic exists but no explicit config → should return default from topic metadata + store := NewInMemoryStore(ClusterMetadata{ + Topics: []protocol.MetadataTopic{ + { + Topic: kmsg.StringPtr("events"), + Partitions: []protocol.MetadataPartition{ + {Partition: 0, Replicas: []int32{1, 2}}, + {Partition: 1, Replicas: []int32{1, 2}}, + }, + }, + }, + }) + cfg, err := store.FetchTopicConfig(context.Background(), "events") + if err != nil { + t.Fatalf("FetchTopicConfig: %v", err) + } + if cfg.Name != "events" { + t.Fatalf("expected name events, got %q", cfg.Name) + } + if cfg.Partitions != 2 { + t.Fatalf("expected 2 partitions, got %d", cfg.Partitions) + } + if cfg.ReplicationFactor != 2 { + t.Fatalf("expected replication factor 2, got %d", cfg.ReplicationFactor) + } +} + +func TestUpdateTopicConfigInvalid(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx := context.Background() + if err := store.UpdateTopicConfig(ctx, nil); !errors.Is(err, ErrInvalidTopic) { + t.Fatalf("expected ErrInvalidTopic for nil, got %v", err) + } + if err := store.UpdateTopicConfig(ctx, &metadatapb.TopicConfig{Name: ""}); !errors.Is(err, ErrInvalidTopic) { + t.Fatalf("expected ErrInvalidTopic for empty name, got %v", err) + } + if err := store.UpdateTopicConfig(ctx, &metadatapb.TopicConfig{Name: "missing"}); !errors.Is(err, ErrUnknownTopic) { + t.Fatalf("expected ErrUnknownTopic, got %v", err) + } +} + +func TestUpdateTopicConfigContextCancel(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + err := store.UpdateTopicConfig(ctx, &metadatapb.TopicConfig{Name: "orders"}) + if err == nil { + t.Fatal("expected context error") + } +} + +func TestCreatePartitionsInvalid(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx := context.Background() + if err := store.CreatePartitions(ctx, "", 3); !errors.Is(err, ErrInvalidTopic) { + t.Fatalf("expected ErrInvalidTopic for empty name, got %v", err) + } + if err := store.CreatePartitions(ctx, "topic", 0); !errors.Is(err, ErrInvalidTopic) { + t.Fatalf("expected ErrInvalidTopic for zero count, got %v", err) + } + if err := store.CreatePartitions(ctx, "missing", 3); !errors.Is(err, ErrUnknownTopic) { + t.Fatalf("expected ErrUnknownTopic, got %v", err) + } +} + +func TestCreatePartitionsContextCancel(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + err := store.CreatePartitions(ctx, "orders", 3) + if err == nil { + t.Fatal("expected context error") + } +} + +func TestCreatePartitionsShrink(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{ + Brokers: []protocol.MetadataBroker{{NodeID: 1}}, + }) + ctx := context.Background() + store.CreateTopic(ctx, TopicSpec{Name: "orders", NumPartitions: 3, ReplicationFactor: 1}) + err := store.CreatePartitions(ctx, "orders", 2) // shrink + if err == nil { + t.Fatal("expected error for shrinking partitions") + } +} + +func TestCommitConsumerOffsetContextCancel(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + err := store.CommitConsumerOffset(ctx, "g1", "orders", 0, 10, "") + if err == nil { + t.Fatal("expected context error") + } +} + +func TestListConsumerOffsetsContextCancel(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + _, err := store.ListConsumerOffsets(ctx) + if err == nil { + t.Fatal("expected context error") + } +} + +func TestPutConsumerGroupContextCancel(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + err := store.PutConsumerGroup(ctx, &metadatapb.ConsumerGroup{GroupId: "g1"}) + if err == nil { + t.Fatal("expected context error") + } +} + +func TestFetchConsumerGroupContextCancel(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + _, err := store.FetchConsumerGroup(ctx, "g1") + if err == nil { + t.Fatal("expected context error") + } +} + +func TestListConsumerGroupsContextCancel(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + _, err := store.ListConsumerGroups(ctx) + if err == nil { + t.Fatal("expected context error") + } +} + +func TestDeleteConsumerGroupContextCancel(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + err := store.DeleteConsumerGroup(ctx, "g1") + if err == nil { + t.Fatal("expected context error") + } +} + +func TestDeleteTopicContextCancel(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + err := store.DeleteTopic(ctx, "orders") + if err == nil { + t.Fatal("expected context error") + } +} + +func TestCreateTopicContextCancel(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + _, err := store.CreateTopic(ctx, TopicSpec{Name: "orders", NumPartitions: 1, ReplicationFactor: 1}) + if err == nil { + t.Fatal("expected context error") + } +} + +func TestNextOffsetContextCancel(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + _, err := store.NextOffset(ctx, "orders", 0) + if err == nil { + t.Fatal("expected context error") + } +} + +func TestUpdateOffsetsContextCancel(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + err := store.UpdateOffsets(ctx, "orders", 0, 10) + if err == nil { + t.Fatal("expected context error") + } +} + +func TestUpdateOffsetsSucceeds(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{ + Brokers: []protocol.MetadataBroker{{NodeID: 1}}, + }) + ctx := context.Background() + store.CreateTopic(ctx, TopicSpec{Name: "orders", NumPartitions: 1, ReplicationFactor: 1}) + err := store.UpdateOffsets(ctx, "orders", 0, 10) + if err != nil { + t.Fatalf("UpdateOffsets: %v", err) + } + offset, err := store.NextOffset(ctx, "orders", 0) + if err != nil { + t.Fatalf("NextOffset: %v", err) + } + if offset != 11 { + t.Fatalf("expected 11 (lastOffset+1), got %d", offset) + } +} + +func TestNextOffsetPartitionMismatch(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{ + Brokers: []protocol.MetadataBroker{{NodeID: 1}}, + }) + ctx := context.Background() + store.CreateTopic(ctx, TopicSpec{Name: "orders", NumPartitions: 1, ReplicationFactor: 1}) + _, err := store.NextOffset(ctx, "orders", 99) + if err == nil { + t.Fatal("expected error for non-existent partition") + } +} + +func TestDefaultLeaderID(t *testing.T) { + // No brokers → uses controller ID + store := NewInMemoryStore(ClusterMetadata{ControllerID: 5}) + if got := store.defaultLeaderID(); got != 5 { + t.Fatalf("expected controller ID 5, got %d", got) + } + + // With brokers → uses first broker + store = NewInMemoryStore(ClusterMetadata{ + Brokers: []protocol.MetadataBroker{{NodeID: 7}}, + ControllerID: 5, + }) + if got := store.defaultLeaderID(); got != 7 { + t.Fatalf("expected broker 7, got %d", got) + } +} + +func TestCloneTopicConfigNil(t *testing.T) { + if got := cloneTopicConfig(nil); got != nil { + t.Fatalf("expected nil, got %+v", got) + } +} + +func TestCloneTopicConfigWithData(t *testing.T) { + cfg := &metadatapb.TopicConfig{ + Name: "orders", + Partitions: 3, + ReplicationFactor: 2, + RetentionMs: 86400000, + Config: map[string]string{"cleanup.policy": "compact"}, + } + cloned := cloneTopicConfig(cfg) + if cloned == cfg { + t.Fatal("clone should not be same pointer") + } + if cloned.Name != "orders" || cloned.Partitions != 3 || cloned.ReplicationFactor != 2 { + t.Fatalf("unexpected clone: %+v", cloned) + } + if cloned.Config["cleanup.policy"] != "compact" { + t.Fatalf("expected config to be cloned") + } + // Mutation isolation + cloned.Config["new-key"] = "val" + if _, ok := cfg.Config["new-key"]; ok { + t.Fatal("mutation should not affect original") + } +} + +func TestDefaultTopicConfigFromTopicNil(t *testing.T) { + cfg := defaultTopicConfigFromTopic(nil, 1) + if cfg == nil { + t.Fatal("expected non-nil config for nil topic") + } +} + +func TestDefaultTopicConfigFromTopicWithReplicas(t *testing.T) { + topic := &protocol.MetadataTopic{ + Topic: kmsg.StringPtr("events"), + Partitions: []protocol.MetadataPartition{ + {Partition: 0, Replicas: []int32{1, 2, 3}}, + {Partition: 1, Replicas: []int32{1, 2, 3}}, + }, + } + cfg := defaultTopicConfigFromTopic(topic, 0) // replicationFactor <= 0 triggers auto-detect + if cfg.ReplicationFactor != 3 { + t.Fatalf("expected replication factor 3 from replicas, got %d", cfg.ReplicationFactor) + } + if cfg.Partitions != 2 { + t.Fatalf("expected 2 partitions, got %d", cfg.Partitions) + } + if cfg.RetentionMs != -1 { + t.Fatalf("expected default retention -1, got %d", cfg.RetentionMs) + } +} + +func TestParseConsumerKeyEdgeCases(t *testing.T) { + // Valid + group, topic, partition, ok := parseConsumerKey("g1:orders:0") + if !ok || group != "g1" || topic != "orders" || partition != 0 { + t.Fatalf("unexpected parse result: %q %q %d %v", group, topic, partition, ok) + } + + // Too few parts + _, _, _, ok = parseConsumerKey("g1:orders") + if ok { + t.Fatal("expected false for 2-part key") + } + + // Bad partition + _, _, _, ok = parseConsumerKey("g1:orders:abc") + if ok { + t.Fatal("expected false for non-numeric partition") + } + + // Too many parts + _, _, _, ok = parseConsumerKey("g1:orders:0:extra") + if ok { + t.Fatal("expected false for 4-part key") + } +} + +func TestDeleteConsumerGroupNotFound(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + // delete of non-existent key is a no-op, should not error + err := store.DeleteConsumerGroup(context.Background(), "nonexistent") + if err != nil { + t.Fatalf("expected no error for deleting non-existent group, got %v", err) + } +} + +func TestPutConsumerGroupNilAndEmpty(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{}) + ctx := context.Background() + err := store.PutConsumerGroup(ctx, nil) + if err == nil { + t.Fatal("expected error for nil group") + } + err = store.PutConsumerGroup(ctx, &metadatapb.ConsumerGroup{GroupId: ""}) + if err == nil { + t.Fatal("expected error for empty group ID") + } +} + +func TestCloneConsumerGroupNil(t *testing.T) { + if got := cloneConsumerGroup(nil); got != nil { + t.Fatalf("expected nil, got %+v", got) + } +} + +func TestCloneConsumerGroupWithAssignments(t *testing.T) { + group := &metadatapb.ConsumerGroup{ + GroupId: "g1", + State: "stable", + ProtocolType: "consumer", + Members: map[string]*metadatapb.GroupMember{ + "m1": { + ClientId: "client-1", + Subscriptions: []string{"orders"}, + Assignments: []*metadatapb.Assignment{ + {Topic: "orders", Partitions: []int32{0, 1}}, + }, + }, + }, + } + cloned := cloneConsumerGroup(group) + if cloned == group { + t.Fatal("should not be same pointer") + } + if len(cloned.Members) != 1 || cloned.Members["m1"].ClientId != "client-1" { + t.Fatalf("unexpected clone: %+v", cloned) + } + // Mutation isolation + cloned.Members["m1"].Assignments[0].Partitions[0] = 99 + if group.Members["m1"].Assignments[0].Partitions[0] == 99 { + t.Fatal("mutation should not affect original") + } +} + +func TestDeleteTopicCleansOffsets(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{ + Brokers: []protocol.MetadataBroker{{NodeID: 1}}, + }) + ctx := context.Background() + store.CreateTopic(ctx, TopicSpec{Name: "orders", NumPartitions: 2, ReplicationFactor: 1}) + store.UpdateOffsets(ctx, "orders", 0, 10) + store.UpdateOffsets(ctx, "orders", 1, 20) + + if err := store.DeleteTopic(ctx, "orders"); err != nil { + t.Fatalf("DeleteTopic: %v", err) + } + // Offsets should be cleaned up + _, err := store.NextOffset(ctx, "orders", 0) + if !errors.Is(err, ErrUnknownTopic) { + t.Fatalf("expected unknown topic after delete, got %v", err) + } +} + +func TestCreateTopicWithCustomPartitions(t *testing.T) { + store := NewInMemoryStore(ClusterMetadata{ + Brokers: []protocol.MetadataBroker{{NodeID: 1}, {NodeID: 2}}, + }) + ctx := context.Background() + topic, err := store.CreateTopic(ctx, TopicSpec{Name: "events", NumPartitions: 5, ReplicationFactor: 2}) + if err != nil { + t.Fatalf("CreateTopic: %v", err) + } + if len(topic.Partitions) != 5 { + t.Fatalf("expected 5 partitions, got %d", len(topic.Partitions)) + } +} + +func TestCodecParseConsumerGroupID(t *testing.T) { + // Valid + id, ok := ParseConsumerGroupID("/kafscale/consumers/my-group/metadata") + if !ok || id != "my-group" { + t.Fatalf("expected my-group, got %q ok=%v", id, ok) + } + + // Missing suffix + _, ok = ParseConsumerGroupID("/kafscale/consumers/my-group/offsets") + if ok { + t.Fatal("expected false for missing /metadata suffix") + } + + // Wrong prefix + _, ok = ParseConsumerGroupID("/other/my-group/metadata") + if ok { + t.Fatal("expected false for wrong prefix") + } + + // Empty group ID + _, ok = ParseConsumerGroupID("/kafscale/consumers//metadata") + if ok { + t.Fatal("expected false for empty group ID") + } + + // Nested path + _, ok = ParseConsumerGroupID("/kafscale/consumers/a/b/metadata") + if ok { + t.Fatal("expected false for nested path") + } +} + +func TestCodecParseConsumerOffsetKey(t *testing.T) { + // Valid + group, topic, part, ok := ParseConsumerOffsetKey("/kafscale/consumers/g1/offsets/orders/5") + if !ok || group != "g1" || topic != "orders" || part != 5 { + t.Fatalf("unexpected: %q %q %d %v", group, topic, part, ok) + } + + // Wrong prefix + _, _, _, ok = ParseConsumerOffsetKey("/other/g1/offsets/orders/5") + if ok { + t.Fatal("expected false for wrong prefix") + } + + // Missing offsets segment + _, _, _, ok = ParseConsumerOffsetKey("/kafscale/consumers/g1/data/orders/5") + if ok { + t.Fatal("expected false for missing offsets segment") + } + + // Non-numeric partition + _, _, _, ok = ParseConsumerOffsetKey("/kafscale/consumers/g1/offsets/orders/abc") + if ok { + t.Fatal("expected false for non-numeric partition") + } +} + +func TestCodecEncodeDecodeRoundTrip(t *testing.T) { + // TopicConfig round-trip + cfg := &metadatapb.TopicConfig{ + Name: "orders", + Partitions: 3, + ReplicationFactor: 2, + } + data, err := EncodeTopicConfig(cfg) + if err != nil { + t.Fatalf("EncodeTopicConfig: %v", err) + } + decoded, err := DecodeTopicConfig(data) + if err != nil { + t.Fatalf("DecodeTopicConfig: %v", err) + } + if decoded.Name != "orders" || decoded.Partitions != 3 { + t.Fatalf("unexpected decoded: %+v", decoded) + } + + // PartitionState round-trip + state := &metadatapb.PartitionState{ + Topic: "orders", + Partition: 2, + LeaderBroker: "broker-1", + LeaderEpoch: 5, + } + stateData, err := EncodePartitionState(state) + if err != nil { + t.Fatalf("EncodePartitionState: %v", err) + } + decodedState, err := DecodePartitionState(stateData) + if err != nil { + t.Fatalf("DecodePartitionState: %v", err) + } + if decodedState.Topic != "orders" || decodedState.Partition != 2 || decodedState.LeaderEpoch != 5 { + t.Fatalf("unexpected decoded state: %+v", decodedState) + } +} + +func TestCodecDecodeErrors(t *testing.T) { + // Bad data + _, err := DecodeTopicConfig([]byte{0xff, 0xff}) + if err == nil { + t.Fatal("expected error for bad topic config data") + } + if !strings.Contains(err.Error(), "unmarshal") { + t.Fatalf("expected unmarshal error, got: %v", err) + } + + _, err = DecodePartitionState([]byte{0xff, 0xff}) + if err == nil { + t.Fatal("expected error for bad partition state data") + } + + _, err = DecodeConsumerGroup([]byte{0xff, 0xff}) + if err == nil { + t.Fatal("expected error for bad consumer group data") + } +} + +func TestCodecConsumerGroupRoundTrip(t *testing.T) { + group := &metadatapb.ConsumerGroup{ + GroupId: "g1", + State: "stable", + ProtocolType: "consumer", + Protocol: "range", + } + data, err := EncodeConsumerGroup(group) + if err != nil { + t.Fatalf("EncodeConsumerGroup: %v", err) + } + decoded, err := DecodeConsumerGroup(data) + if err != nil { + t.Fatalf("DecodeConsumerGroup: %v", err) + } + if decoded.GroupId != "g1" || decoded.State != "stable" { + t.Fatalf("unexpected: %+v", decoded) + } +} + +func TestCodecKeyFunctions(t *testing.T) { + if got := TopicConfigKey("orders"); got != "/kafscale/topics/orders/config" { + t.Fatalf("TopicConfigKey: %q", got) + } + if got := PartitionStateKey("orders", 3); got != "/kafscale/topics/orders/partitions/3" { + t.Fatalf("PartitionStateKey: %q", got) + } + if got := ConsumerGroupKey("g1"); got != "/kafscale/consumers/g1/metadata" { + t.Fatalf("ConsumerGroupKey: %q", got) + } + if got := ConsumerGroupPrefix(); got != "/kafscale/consumers" { + t.Fatalf("ConsumerGroupPrefix: %q", got) + } + if got := ConsumerOffsetKey("g1", "orders", 5); got != "/kafscale/consumers/g1/offsets/orders/5" { + t.Fatalf("ConsumerOffsetKey: %q", got) + } + if got := BrokerRegistrationKey("broker-1"); got != "/kafscale/brokers/broker-1" { + t.Fatalf("BrokerRegistrationKey: %q", got) + } + if got := PartitionAssignmentKey("orders", 2); got != "/kafscale/assignments/orders/2" { + t.Fatalf("PartitionAssignmentKey: %q", got) + } +} diff --git a/pkg/operator/cluster_controller.go b/pkg/operator/cluster_controller.go index e08ac0c6..03688586 100644 --- a/pkg/operator/cluster_controller.go +++ b/pkg/operator/cluster_controller.go @@ -90,6 +90,9 @@ func (r *ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct if err := r.reconcileBrokerService(ctx, &cluster); err != nil { return ctrl.Result{}, err } + if err := r.reconcileLfsProxyResources(ctx, &cluster, etcdResolution.Endpoints); err != nil { + return ctrl.Result{}, err + } if err := r.reconcileBrokerHPA(ctx, &cluster); err != nil { return ctrl.Result{}, err } @@ -123,6 +126,7 @@ func (r *ClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&kafscalev1alpha1.KafscaleCluster{}). Owns(&appsv1.StatefulSet{}). + Owns(&appsv1.Deployment{}). Owns(&corev1.Service{}). Owns(&autoscalingv2.HorizontalPodAutoscaler{}). Complete(r) @@ -146,7 +150,7 @@ func (r *ClusterReconciler) reconcileBrokerDeployment(ctx context.Context, clust sts.Spec.ServiceName = brokerHeadlessServiceName(cluster) sts.Spec.Selector = &metav1.LabelSelector{MatchLabels: labels} sts.Spec.Replicas = &replicas - sts.Spec.Template.ObjectMeta.Labels = labels + sts.Spec.Template.Labels = labels sts.Spec.Template.Spec.Containers = []corev1.Container{ r.brokerContainer(cluster, endpoints), } @@ -306,7 +310,7 @@ func parseServiceType(serviceType string) corev1.ServiceType { } } -func parseExternalTrafficPolicy(policy string) corev1.ServiceExternalTrafficPolicyType { +func parseExternalTrafficPolicy(policy string) corev1.ServiceExternalTrafficPolicy { switch strings.TrimSpace(policy) { case string(corev1.ServiceExternalTrafficPolicyTypeLocal): return corev1.ServiceExternalTrafficPolicyTypeLocal diff --git a/pkg/operator/cluster_controller_test.go b/pkg/operator/cluster_controller_test.go index 72a80f68..40afa0b9 100644 --- a/pkg/operator/cluster_controller_test.go +++ b/pkg/operator/cluster_controller_test.go @@ -20,6 +20,7 @@ import ( "fmt" "testing" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -142,6 +143,133 @@ func TestReconcileBrokerServiceExternalAccess(t *testing.T) { } } +func TestReconcileLfsProxyDeployment(t *testing.T) { + enabled := true + portKafka := int32(19092) + portHTTP := int32(18080) + portMetrics := int32(19095) + portHealth := int32(19094) + maxBlob := int64(1048576) + chunkSize := int64(262144) + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + Brokers: kafscalev1alpha1.BrokerSpec{}, + S3: kafscalev1alpha1.S3Spec{ + Bucket: "bucket", + Region: "us-east-1", + Endpoint: "http://minio.local", + CredentialsSecretRef: "creds", + }, + LfsProxy: kafscalev1alpha1.LfsProxySpec{ + Enabled: true, + AdvertisedHost: "proxy.example.com", + AdvertisedPort: &portKafka, + Service: kafscalev1alpha1.LfsProxyServiceSpec{ + Port: &portKafka, + }, + HTTP: kafscalev1alpha1.LfsProxyHTTPSpec{ + Enabled: &enabled, + Port: &portHTTP, + APIKeySecretRef: "lfs-api", + APIKeySecretKey: "token", + }, + Metrics: kafscalev1alpha1.LfsProxyMetricsSpec{ + Enabled: &enabled, + Port: &portMetrics, + }, + Health: kafscalev1alpha1.LfsProxyHealthSpec{ + Enabled: &enabled, + Port: &portHealth, + }, + S3: kafscalev1alpha1.LfsProxyS3Spec{ + Namespace: "lfs-ns", + MaxBlobSize: &maxBlob, + ChunkSize: &chunkSize, + EnsureBucket: &enabled, + }, + }, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + if err := r.reconcileLfsProxyDeployment(context.Background(), cluster, []string{"http://etcd:2379"}); err != nil { + t.Fatalf("reconcile lfs proxy deployment: %v", err) + } + + deploy := &appsv1.Deployment{} + assertFound(t, c, deploy, cluster.Namespace, lfsProxyName(cluster)) + if len(deploy.Spec.Template.Spec.Containers) != 1 { + t.Fatalf("expected 1 container, got %d", len(deploy.Spec.Template.Spec.Containers)) + } + container := deploy.Spec.Template.Spec.Containers[0] + if got := envValue(container.Env, "KAFSCALE_LFS_PROXY_ADDR"); got != ":19092" { + t.Fatalf("expected proxy addr, got %q", got) + } + if got := envValue(container.Env, "KAFSCALE_LFS_PROXY_S3_BUCKET"); got != "bucket" { + t.Fatalf("expected bucket env, got %q", got) + } + if got := envValue(container.Env, "KAFSCALE_LFS_PROXY_S3_REGION"); got != "us-east-1" { + t.Fatalf("expected region env, got %q", got) + } + if got := envValue(container.Env, "KAFSCALE_LFS_PROXY_HTTP_ADDR"); got != ":18080" { + t.Fatalf("expected http addr, got %q", got) + } + var apiKeyEnv *corev1.EnvVar + for i := range container.Env { + if container.Env[i].Name == "KAFSCALE_LFS_PROXY_HTTP_API_KEY" { + apiKeyEnv = &container.Env[i] + break + } + } + if apiKeyEnv == nil || apiKeyEnv.ValueFrom == nil || apiKeyEnv.ValueFrom.SecretKeyRef == nil { + t.Fatalf("expected api key secret ref") + } + if apiKeyEnv.ValueFrom.SecretKeyRef.Name != "lfs-api" || apiKeyEnv.ValueFrom.SecretKeyRef.Key != "token" { + t.Fatalf("unexpected api key secret ref: %v", apiKeyEnv.ValueFrom.SecretKeyRef) + } +} + +func TestReconcileLfsProxyService(t *testing.T) { + enabled := true + portKafka := int32(19092) + portHTTP := int32(18080) + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + Brokers: kafscalev1alpha1.BrokerSpec{}, + S3: kafscalev1alpha1.S3Spec{Bucket: "bucket", Region: "us-east-1", CredentialsSecretRef: "creds"}, + LfsProxy: kafscalev1alpha1.LfsProxySpec{ + Enabled: true, + Service: kafscalev1alpha1.LfsProxyServiceSpec{ + Type: string(corev1.ServiceTypeLoadBalancer), + Annotations: map[string]string{"cloud.example.com/lb": "external"}, + Port: &portKafka, + }, + HTTP: kafscalev1alpha1.LfsProxyHTTPSpec{Enabled: &enabled, Port: &portHTTP}, + }, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + if err := r.reconcileLfsProxyService(context.Background(), cluster); err != nil { + t.Fatalf("reconcile lfs proxy service: %v", err) + } + + svc := &corev1.Service{} + assertFound(t, c, svc, cluster.Namespace, lfsProxyName(cluster)) + if svc.Spec.Type != corev1.ServiceTypeLoadBalancer { + t.Fatalf("expected service type LoadBalancer, got %s", svc.Spec.Type) + } + if len(svc.Spec.Ports) != 2 { + t.Fatalf("expected 2 service ports, got %d", len(svc.Spec.Ports)) + } +} + func TestServiceParsingHelpers(t *testing.T) { if got := parseServiceType("LoadBalancer"); got != corev1.ServiceTypeLoadBalancer { t.Fatalf("expected LoadBalancer, got %q", got) diff --git a/pkg/operator/etcd_resources.go b/pkg/operator/etcd_resources.go index 03d4e76b..80714437 100644 --- a/pkg/operator/etcd_resources.go +++ b/pkg/operator/etcd_resources.go @@ -182,7 +182,7 @@ func reconcileEtcdStatefulSet(ctx context.Context, c client.Client, scheme *runt sts.Spec.ServiceName = fmt.Sprintf("%s-etcd", cluster.Name) sts.Spec.Replicas = &replicas sts.Spec.Selector = &metav1.LabelSelector{MatchLabels: labels} - sts.Spec.Template.ObjectMeta.Labels = labels + sts.Spec.Template.Labels = labels useMemory := parseBoolEnv(operatorEtcdStorageMemoryEnv) if useMemory { @@ -470,7 +470,7 @@ func reconcileEtcdSnapshotCronJob(ctx context.Context, c client.Client, scheme * cron.Spec.ConcurrencyPolicy = batchv1.ForbidConcurrent cron.Spec.SuccessfulJobsHistoryLimit = int32Ptr(3) cron.Spec.FailedJobsHistoryLimit = int32Ptr(3) - cron.Spec.JobTemplate.Spec.Template.ObjectMeta.Labels = labels + cron.Spec.JobTemplate.Spec.Template.Labels = labels cron.Spec.JobTemplate.Spec.Template.Spec.RestartPolicy = corev1.RestartPolicyNever cron.Spec.JobTemplate.Spec.Template.Spec.Volumes = []corev1.Volume{ { diff --git a/pkg/operator/helpers_test.go b/pkg/operator/helpers_test.go new file mode 100644 index 00000000..48177aab --- /dev/null +++ b/pkg/operator/helpers_test.go @@ -0,0 +1,1721 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package operator + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "testing" + "time" + + appsv1 "k8s.io/api/apps/v1" + autoscalingv2 "k8s.io/api/autoscaling/v2" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + kafscalev1alpha1 "github.com/KafScale/platform/api/v1alpha1" + "github.com/KafScale/platform/internal/testutil" + "github.com/KafScale/platform/pkg/metadata" + "github.com/KafScale/platform/pkg/protocol" + "github.com/KafScale/platform/pkg/storage" + "github.com/twmb/franz-go/pkg/kmsg" + + clientv3 "go.etcd.io/etcd/client/v3" +) + +// --- cluster_controller.go helpers --- + +func TestParsePullPolicyAlways(t *testing.T) { + if got := parsePullPolicy("Always"); got != corev1.PullAlways { + t.Fatalf("expected Always, got %q", got) + } +} + +func TestParsePullPolicyNever(t *testing.T) { + if got := parsePullPolicy("Never"); got != corev1.PullNever { + t.Fatalf("expected Never, got %q", got) + } +} + +func TestParsePullPolicyDefault(t *testing.T) { + if got := parsePullPolicy(""); got != corev1.PullIfNotPresent { + t.Fatalf("expected IfNotPresent, got %q", got) + } +} + +func TestCopyStringMapNil(t *testing.T) { + if got := copyStringMap(nil); got != nil { + t.Fatalf("expected nil for nil map, got %v", got) + } +} + +func TestCopyStringMapEmpty(t *testing.T) { + if got := copyStringMap(map[string]string{}); got != nil { + t.Fatalf("expected nil for empty map, got %v", got) + } +} + +func TestCopyStringMapIsolation(t *testing.T) { + src := map[string]string{"key": "val"} + dst := copyStringMap(src) + if dst["key"] != "val" { + t.Fatalf("expected val, got %q", dst["key"]) + } + dst["key"] = "changed" + if src["key"] != "val" { + t.Fatal("modifying copy affected source") + } +} + +func TestSetClusterCondition(t *testing.T) { + conditions := []metav1.Condition{} + cond := metav1.Condition{ + Type: "Ready", + Status: metav1.ConditionTrue, + Reason: "OK", + } + setClusterCondition(&conditions, cond) + if len(conditions) != 1 || conditions[0].Type != "Ready" { + t.Fatalf("expected 1 condition, got %d", len(conditions)) + } + + // Update existing + cond2 := metav1.Condition{ + Type: "Ready", + Status: metav1.ConditionFalse, + Reason: "Failed", + } + setClusterCondition(&conditions, cond2) + if len(conditions) != 1 || conditions[0].Reason != "Failed" { + t.Fatalf("expected condition updated, got %+v", conditions) + } +} + +func TestCloneResourceListNil(t *testing.T) { + if got := cloneResourceList(nil); got != nil { + t.Fatalf("expected nil for nil resource list, got %v", got) + } +} + +func TestCloneResourceListDeepCopy(t *testing.T) { + src := corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("100m"), + corev1.ResourceMemory: resource.MustParse("256Mi"), + } + dst := cloneResourceList(src) + if dst[corev1.ResourceCPU] != resource.MustParse("100m") { + t.Fatalf("unexpected CPU: %v", dst[corev1.ResourceCPU]) + } + if len(dst) != 2 { + t.Fatalf("expected 2 entries, got %d", len(dst)) + } +} + +func TestInt32Ptr(t *testing.T) { + p := int32Ptr(42) + if *p != 42 { + t.Fatalf("expected 42, got %d", *p) + } +} + +func TestBoolPtr(t *testing.T) { + p := boolPtr(true) + if !*p { + t.Fatal("expected true") + } +} + +func TestBrokerHeadlessServiceName(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo"}, + } + if got := brokerHeadlessServiceName(cluster); got != "demo-broker-headless" { + t.Fatalf("expected demo-broker-headless, got %q", got) + } +} + +func TestGetEnv(t *testing.T) { + t.Setenv("TEST_GETENV_VAR", "hello") + if got := getEnv("TEST_GETENV_VAR", "fallback"); got != "hello" { + t.Fatalf("expected hello, got %q", got) + } + if got := getEnv("TEST_GETENV_MISSING", "fallback"); got != "fallback" { + t.Fatalf("expected fallback, got %q", got) + } +} + +// --- reconcileBrokerDeployment --- + +func TestReconcileBrokerDeployment(t *testing.T) { + replicas := int32(5) + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + Brokers: kafscalev1alpha1.BrokerSpec{Replicas: &replicas}, + S3: kafscalev1alpha1.S3Spec{Bucket: "bucket", Region: "us-east-1"}, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + if err := r.reconcileBrokerDeployment(context.Background(), cluster, []string{"http://etcd:2379"}); err != nil { + t.Fatalf("reconcileBrokerDeployment: %v", err) + } + + sts := &appsv1.StatefulSet{} + assertFound(t, c, sts, "default", "demo-broker") + if *sts.Spec.Replicas != 5 { + t.Fatalf("expected 5 replicas, got %d", *sts.Spec.Replicas) + } +} + +func TestReconcileBrokerDeploymentDefaultReplicas(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + Brokers: kafscalev1alpha1.BrokerSpec{}, + S3: kafscalev1alpha1.S3Spec{Bucket: "bucket", Region: "us-east-1"}, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + if err := r.reconcileBrokerDeployment(context.Background(), cluster, []string{"http://etcd:2379"}); err != nil { + t.Fatalf("reconcileBrokerDeployment: %v", err) + } + + sts := &appsv1.StatefulSet{} + assertFound(t, c, sts, "default", "demo-broker") + if *sts.Spec.Replicas != 3 { + t.Fatalf("expected default 3 replicas, got %d", *sts.Spec.Replicas) + } +} + +// --- deleteLegacyBrokerDeployment --- + +func TestDeleteLegacyBrokerDeployment(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{}, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + // Should not error even when no legacy deployment exists + if err := r.deleteLegacyBrokerDeployment(context.Background(), cluster); err != nil { + t.Fatalf("deleteLegacyBrokerDeployment: %v", err) + } +} + +// --- reconcileBrokerHPA --- + +func TestReconcileBrokerHPA(t *testing.T) { + replicas := int32(3) + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + Brokers: kafscalev1alpha1.BrokerSpec{Replicas: &replicas}, + S3: kafscalev1alpha1.S3Spec{Bucket: "bucket", Region: "us-east-1"}, + }, + } + scheme := testScheme(t) + if err := autoscalingv2.AddToScheme(scheme); err != nil { + t.Fatalf("add autoscaling scheme: %v", err) + } + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + if err := r.reconcileBrokerHPA(context.Background(), cluster); err != nil { + t.Fatalf("reconcileBrokerHPA: %v", err) + } + + hpa := &autoscalingv2.HorizontalPodAutoscaler{} + assertFound(t, c, hpa, "default", "demo-broker") + if *hpa.Spec.MinReplicas != 3 { + t.Fatalf("expected min replicas 3, got %d", *hpa.Spec.MinReplicas) + } + if hpa.Spec.MaxReplicas != 12 { + t.Fatalf("expected max replicas 12, got %d", hpa.Spec.MaxReplicas) + } +} + +func TestReconcileBrokerHPADefaultReplicas(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + Brokers: kafscalev1alpha1.BrokerSpec{}, + S3: kafscalev1alpha1.S3Spec{Bucket: "bucket", Region: "us-east-1"}, + }, + } + scheme := testScheme(t) + if err := autoscalingv2.AddToScheme(scheme); err != nil { + t.Fatalf("add autoscaling scheme: %v", err) + } + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + if err := r.reconcileBrokerHPA(context.Background(), cluster); err != nil { + t.Fatalf("reconcileBrokerHPA: %v", err) + } + + hpa := &autoscalingv2.HorizontalPodAutoscaler{} + assertFound(t, c, hpa, "default", "demo-broker") + if *hpa.Spec.MinReplicas != 3 { + t.Fatalf("expected default min replicas 3, got %d", *hpa.Spec.MinReplicas) + } +} + +// --- updateStatus --- + +func TestUpdateStatus(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{}, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(cluster).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + if err := r.updateStatus(context.Background(), cluster, metav1.ConditionTrue, "Reconciled", "All resources ready"); err != nil { + t.Fatalf("updateStatus: %v", err) + } + if cluster.Status.Phase != "Reconciled" { + t.Fatalf("expected phase Reconciled, got %q", cluster.Status.Phase) + } +} + +// --- populateEtcdSnapshotStatus --- + +func TestPopulateEtcdSnapshotStatusNotManaged(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(cluster).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + r.populateEtcdSnapshotStatus(context.Background(), cluster, EtcdResolution{Managed: false}) + + found := false + for _, cond := range cluster.Status.Conditions { + if cond.Type == "EtcdSnapshot" && cond.Reason == "SnapshotNotManaged" { + found = true + } + } + if !found { + t.Fatal("expected SnapshotNotManaged condition") + } +} + +func TestPopulateEtcdSnapshotStatusCronMissing(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(cluster).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + r.populateEtcdSnapshotStatus(context.Background(), cluster, EtcdResolution{Managed: true}) + + found := false + for _, cond := range cluster.Status.Conditions { + if cond.Type == "EtcdSnapshot" && cond.Reason == "SnapshotCronMissing" { + found = true + } + } + if !found { + t.Fatal("expected SnapshotCronMissing condition") + } +} + +func TestPopulateEtcdSnapshotStatusNeverSucceeded(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + } + cron := &batchv1.CronJob{ + ObjectMeta: metav1.ObjectMeta{Name: "demo-etcd-snapshot", Namespace: "default"}, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(cluster).WithObjects(cluster, cron).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + r.populateEtcdSnapshotStatus(context.Background(), cluster, EtcdResolution{Managed: true}) + + found := false + for _, cond := range cluster.Status.Conditions { + if cond.Type == "EtcdSnapshot" && cond.Reason == "SnapshotNeverSucceeded" { + found = true + } + } + if !found { + t.Fatal("expected SnapshotNeverSucceeded condition") + } +} + +func TestPopulateEtcdSnapshotStatusHealthy(t *testing.T) { + now := metav1.NewTime(time.Now()) + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + } + cron := &batchv1.CronJob{ + ObjectMeta: metav1.ObjectMeta{Name: "demo-etcd-snapshot", Namespace: "default"}, + Status: batchv1.CronJobStatus{ + LastSuccessfulTime: &now, + LastScheduleTime: &now, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(cluster).WithObjects(cluster, cron).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + r.populateEtcdSnapshotStatus(context.Background(), cluster, EtcdResolution{Managed: true}) + + found := false + for _, cond := range cluster.Status.Conditions { + if cond.Type == "EtcdSnapshot" && cond.Reason == "SnapshotHealthy" { + found = true + } + } + if !found { + t.Fatal("expected SnapshotHealthy condition") + } +} + +func TestPopulateEtcdSnapshotStatusStale(t *testing.T) { + // Use a time well in the past + old := metav1.NewTime(time.Now().Add(-30 * 24 * time.Hour)) + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + } + cron := &batchv1.CronJob{ + ObjectMeta: metav1.ObjectMeta{Name: "demo-etcd-snapshot", Namespace: "default"}, + Status: batchv1.CronJobStatus{ + LastSuccessfulTime: &old, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(cluster).WithObjects(cluster, cron).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + r.populateEtcdSnapshotStatus(context.Background(), cluster, EtcdResolution{Managed: true}) + + found := false + for _, cond := range cluster.Status.Conditions { + if cond.Type == "EtcdSnapshot" && cond.Reason == "SnapshotStale" { + found = true + } + } + if !found { + t.Fatal("expected SnapshotStale condition") + } +} + +// --- etcd_resources.go helpers --- + +func TestSanitizeBucketName(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"my-bucket", "my-bucket"}, + {"My_Bucket.123", "my-bucket-123"}, + {" spaces ", "spaces"}, + {"", defaultSnapshotBucketPrefix}, + {"!!!!", defaultSnapshotBucketPrefix}, + {"a--b", "a-b"}, + {"-leading-trailing-", "leading-trailing"}, + } + for _, tc := range tests { + t.Run(tc.input, func(t *testing.T) { + got := sanitizeBucketName(tc.input) + if got != tc.want { + t.Fatalf("sanitizeBucketName(%q) = %q, want %q", tc.input, got, tc.want) + } + }) + } +} + +func TestDefaultEtcdSnapshotBucket(t *testing.T) { + tests := []struct { + name string + namespace string + }{ + {"", ""}, + {"demo", ""}, + {"", "default"}, + {"demo", "default"}, + } + for _, tc := range tests { + t.Run(fmt.Sprintf("%s/%s", tc.namespace, tc.name), func(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: tc.name, Namespace: tc.namespace}, + } + got := defaultEtcdSnapshotBucket(cluster) + if got == "" { + t.Fatal("expected non-empty bucket name") + } + }) + } +} + +func TestBuildEtcdInitialCluster(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo"}, + } + + got := buildEtcdInitialCluster(cluster, 3) + if got == "" { + t.Fatal("expected non-empty cluster string") + } + + // Zero replicas => at least 1 + got = buildEtcdInitialCluster(cluster, 0) + if got == "" { + t.Fatal("expected non-empty cluster string for 0 replicas") + } +} + +func TestEtcdReplicas(t *testing.T) { + // Default (no env set) + t.Setenv(operatorEtcdReplicasEnv, "") + got := etcdReplicas() + if got != int32(defaultEtcdReplicas) { + t.Fatalf("expected default %d, got %d", defaultEtcdReplicas, got) + } + + // Valid override + t.Setenv(operatorEtcdReplicasEnv, "5") + got = etcdReplicas() + if got != 5 { + t.Fatalf("expected 5, got %d", got) + } + + // Invalid value + t.Setenv(operatorEtcdReplicasEnv, "abc") + got = etcdReplicas() + if got != int32(defaultEtcdReplicas) { + t.Fatalf("expected default for invalid, got %d", got) + } + + // Zero + t.Setenv(operatorEtcdReplicasEnv, "0") + got = etcdReplicas() + if got != int32(defaultEtcdReplicas) { + t.Fatalf("expected default for zero, got %d", got) + } +} + +func TestParseBoolEnv(t *testing.T) { + t.Setenv("TEST_BOOL_ENV", "true") + if !parseBoolEnv("TEST_BOOL_ENV") { + t.Fatal("expected true for 'true'") + } + t.Setenv("TEST_BOOL_ENV", "1") + if !parseBoolEnv("TEST_BOOL_ENV") { + t.Fatal("expected true for '1'") + } + t.Setenv("TEST_BOOL_ENV", "yes") + if !parseBoolEnv("TEST_BOOL_ENV") { + t.Fatal("expected true for 'yes'") + } + t.Setenv("TEST_BOOL_ENV", "on") + if !parseBoolEnv("TEST_BOOL_ENV") { + t.Fatal("expected true for 'on'") + } + t.Setenv("TEST_BOOL_ENV", "false") + if parseBoolEnv("TEST_BOOL_ENV") { + t.Fatal("expected false for 'false'") + } + t.Setenv("TEST_BOOL_ENV", "") + if parseBoolEnv("TEST_BOOL_ENV") { + t.Fatal("expected false for empty") + } +} + +func TestBoolToString(t *testing.T) { + if boolToString(true) != "1" { + t.Fatal("expected '1' for true") + } + if boolToString(false) != "0" { + t.Fatal("expected '0' for false") + } +} + +func TestStringPtrOrNil(t *testing.T) { + if stringPtrOrNil("") != nil { + t.Fatal("expected nil for empty") + } + if stringPtrOrNil(" ") != nil { + t.Fatal("expected nil for whitespace") + } + p := stringPtrOrNil("hello") + if p == nil || *p != "hello" { + t.Fatalf("expected 'hello', got %v", p) + } +} + +func TestCleanEndpoints(t *testing.T) { + got := cleanEndpoints([]string{" http://a:2379 ", "http://b:2379", "http://a:2379", ""}) + if len(got) != 2 { + t.Fatalf("expected 2 unique endpoints, got %d: %v", len(got), got) + } +} + +// --- lfs_proxy_resources.go helpers --- + +func TestLfsProxyName(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo"}, + } + if got := lfsProxyName(cluster); got != "demo-lfs-proxy" { + t.Fatalf("expected demo-lfs-proxy, got %q", got) + } +} + +func TestLfsProxyMetricsName(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo"}, + } + if got := lfsProxyMetricsName(cluster); got != "demo-lfs-proxy-metrics" { + t.Fatalf("expected demo-lfs-proxy-metrics, got %q", got) + } +} + +func TestLfsProxyNamespace(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{}, + } + if got := lfsProxyNamespace(cluster); got != "default" { + t.Fatalf("expected default, got %q", got) + } + + cluster.Spec.LfsProxy.S3.Namespace = "custom" + if got := lfsProxyNamespace(cluster); got != "custom" { + t.Fatalf("expected custom, got %q", got) + } +} + +func TestLfsProxyPortDefaults(t *testing.T) { + spec := kafscalev1alpha1.LfsProxySpec{} + if got := lfsProxyPort(spec); got != defaultLfsProxyPort { + t.Fatalf("expected default port %d, got %d", defaultLfsProxyPort, got) + } + if got := lfsProxyHTTPPort(spec); got != defaultLfsProxyHTTPPort { + t.Fatalf("expected default http port %d, got %d", defaultLfsProxyHTTPPort, got) + } + if got := lfsProxyHealthPort(spec); got != defaultLfsProxyHealthPort { + t.Fatalf("expected default health port %d, got %d", defaultLfsProxyHealthPort, got) + } + if got := lfsProxyMetricsPort(spec); got != defaultLfsProxyMetricsPort { + t.Fatalf("expected default metrics port %d, got %d", defaultLfsProxyMetricsPort, got) + } +} + +func TestLfsProxyPortCustom(t *testing.T) { + p1, p2, p3, p4 := int32(11111), int32(22222), int32(33333), int32(44444) + spec := kafscalev1alpha1.LfsProxySpec{ + Service: kafscalev1alpha1.LfsProxyServiceSpec{Port: &p1}, + HTTP: kafscalev1alpha1.LfsProxyHTTPSpec{Port: &p2}, + Health: kafscalev1alpha1.LfsProxyHealthSpec{Port: &p3}, + Metrics: kafscalev1alpha1.LfsProxyMetricsSpec{Port: &p4}, + } + if got := lfsProxyPort(spec); got != 11111 { + t.Fatalf("expected 11111, got %d", got) + } + if got := lfsProxyHTTPPort(spec); got != 22222 { + t.Fatalf("expected 22222, got %d", got) + } + if got := lfsProxyHealthPort(spec); got != 33333 { + t.Fatalf("expected 33333, got %d", got) + } + if got := lfsProxyMetricsPort(spec); got != 44444 { + t.Fatalf("expected 44444, got %d", got) + } +} + +func TestLfsProxyEnabledFlags(t *testing.T) { + trueVal, falseVal := true, false + + spec := kafscalev1alpha1.LfsProxySpec{} + // Defaults + if lfsProxyHTTPEnabled(spec) { + t.Fatal("expected HTTP disabled by default") + } + if !lfsProxyMetricsEnabled(spec) { + t.Fatal("expected metrics enabled by default") + } + if !lfsProxyHealthEnabled(spec) { + t.Fatal("expected health enabled by default") + } + + // Explicit + spec.HTTP.Enabled = &trueVal + spec.Metrics.Enabled = &falseVal + spec.Health.Enabled = &falseVal + if !lfsProxyHTTPEnabled(spec) { + t.Fatal("expected HTTP enabled when set") + } + if lfsProxyMetricsEnabled(spec) { + t.Fatal("expected metrics disabled when set") + } + if lfsProxyHealthEnabled(spec) { + t.Fatal("expected health disabled when set") + } +} + +func TestLfsProxyForcePathStyle(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + S3: kafscalev1alpha1.S3Spec{Endpoint: "http://minio:9000"}, + }, + } + // When endpoint is set, force path style + if !lfsProxyForcePathStyle(cluster) { + t.Fatal("expected force path style with endpoint") + } + // No endpoint + cluster.Spec.S3.Endpoint = "" + if lfsProxyForcePathStyle(cluster) { + t.Fatal("expected no force path style without endpoint") + } + // Explicit override + trueVal := true + cluster.Spec.LfsProxy.S3.ForcePathStyle = &trueVal + if !lfsProxyForcePathStyle(cluster) { + t.Fatal("expected force path style when explicitly set") + } +} + +func TestLfsProxyEnsureBucket(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{} + if lfsProxyEnsureBucket(cluster) { + t.Fatal("expected false by default") + } + trueVal := true + cluster.Spec.LfsProxy.S3.EnsureBucket = &trueVal + if !lfsProxyEnsureBucket(cluster) { + t.Fatal("expected true when set") + } +} + +func TestServicePort(t *testing.T) { + sp := servicePort("test", 8080) + if sp.Name != "test" || sp.Port != 8080 || sp.Protocol != corev1.ProtocolTCP { + t.Fatalf("unexpected service port: %+v", sp) + } +} + +// --- reconcileLfsProxyMetricsService --- + +func TestReconcileLfsProxyMetricsService(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{}, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + if err := r.reconcileLfsProxyMetricsService(context.Background(), cluster); err != nil { + t.Fatalf("reconcileLfsProxyMetricsService: %v", err) + } + + svc := &corev1.Service{} + assertFound(t, c, svc, "default", "demo-lfs-proxy-metrics") + if svc.Spec.Type != corev1.ServiceTypeClusterIP { + t.Fatalf("expected ClusterIP, got %s", svc.Spec.Type) + } +} + +// --- deleteLfsProxyResources --- + +func TestDeleteLfsProxyResources(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + // Should not error even when resources don't exist + if err := r.deleteLfsProxyResources(context.Background(), cluster); err != nil { + t.Fatalf("deleteLfsProxyResources: %v", err) + } +} + +// --- deleteLfsProxyMetricsService --- + +func TestDeleteLfsProxyMetricsService(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + if err := r.deleteLfsProxyMetricsService(context.Background(), cluster); err != nil { + t.Fatalf("deleteLfsProxyMetricsService: %v", err) + } +} + +// --- reconcileLfsProxyResources --- + +func TestReconcileLfsProxyResourcesDisabled(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + LfsProxy: kafscalev1alpha1.LfsProxySpec{Enabled: false}, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + if err := r.reconcileLfsProxyResources(context.Background(), cluster, []string{"http://etcd:2379"}); err != nil { + t.Fatalf("reconcileLfsProxyResources disabled: %v", err) + } +} + +func TestReconcileLfsProxyResourcesEnabled(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + S3: kafscalev1alpha1.S3Spec{Bucket: "bucket", Region: "us-east-1"}, + LfsProxy: kafscalev1alpha1.LfsProxySpec{Enabled: true}, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + if err := r.reconcileLfsProxyResources(context.Background(), cluster, []string{"http://etcd:2379"}); err != nil { + t.Fatalf("reconcileLfsProxyResources enabled: %v", err) + } + + // Should have created deployment and service + deploy := &appsv1.Deployment{} + assertFound(t, c, deploy, "default", "demo-lfs-proxy") +} + +// --- snapshot.go --- + +func TestMergeSnapshots(t *testing.T) { + next := metadata.ClusterMetadata{ + Topics: []protocol.MetadataTopic{ + {Topic: kmsg.StringPtr("orders")}, + }, + } + existing := metadata.ClusterMetadata{ + Topics: []protocol.MetadataTopic{ + {Topic: kmsg.StringPtr("orders")}, // duplicate + {Topic: kmsg.StringPtr("events")}, // new + {Topic: kmsg.StringPtr("")}, // empty name, skip + {Topic: kmsg.StringPtr("bad"), ErrorCode: 3}, // error, skip + }, + } + + merged := mergeSnapshots(next, existing) + if len(merged.Topics) != 2 { + t.Fatalf("expected 2 topics, got %d", len(merged.Topics)) + } + names := make(map[string]bool) + for _, topic := range merged.Topics { + names[*topic.Topic] = true + } + if !names["orders"] || !names["events"] { + t.Fatalf("unexpected topics: %v", merged.Topics) + } +} + +func TestMergeSnapshotsEmptyExisting(t *testing.T) { + next := metadata.ClusterMetadata{ + Topics: []protocol.MetadataTopic{{Topic: kmsg.StringPtr("orders")}}, + } + existing := metadata.ClusterMetadata{} + + merged := mergeSnapshots(next, existing) + if len(merged.Topics) != 1 { + t.Fatalf("expected 1 topic, got %d", len(merged.Topics)) + } +} + +func TestBuildReplicaIDsZero(t *testing.T) { + if got := buildReplicaIDs(0); got != nil { + t.Fatalf("expected nil for 0 replicas, got %v", got) + } +} + +func TestBuildReplicaIDsNegative(t *testing.T) { + if got := buildReplicaIDs(-1); got != nil { + t.Fatalf("expected nil for negative replicas, got %v", got) + } +} + +func TestBuildReplicaIDs(t *testing.T) { + got := buildReplicaIDs(3) + if len(got) != 3 { + t.Fatalf("expected 3 IDs, got %d", len(got)) + } + for i, id := range got { + if id != int32(i) { + t.Fatalf("expected ID %d at index %d, got %d", i, i, id) + } + } +} + +// --- snapshot_access.go --- + +func TestFirstSecretValue(t *testing.T) { + secret := &corev1.Secret{ + Data: map[string][]byte{ + "KEY1": []byte("val1"), + "KEY2": []byte("val2"), + }, + } + if got := firstSecretValue(secret, "KEY1", "KEY2"); got != "val1" { + t.Fatalf("expected val1, got %q", got) + } + if got := firstSecretValue(secret, "MISSING", "KEY2"); got != "val2" { + t.Fatalf("expected val2, got %q", got) + } + if got := firstSecretValue(secret, "MISSING"); got != "" { + t.Fatalf("expected empty, got %q", got) + } + // Empty value should be skipped + secret.Data["EMPTY"] = []byte(" ") + if got := firstSecretValue(secret, "EMPTY", "KEY1"); got != "val1" { + t.Fatalf("expected val1 (skip empty), got %q", got) + } +} + +func TestLoadS3CredentialsNoSecret(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + S3: kafscalev1alpha1.S3Spec{CredentialsSecretRef: ""}, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + cfg := &storage.S3Config{} + if err := r.loadS3Credentials(context.Background(), cluster, cfg); err != nil { + t.Fatalf("loadS3Credentials: %v", err) + } +} + +func TestLoadS3CredentialsSecretNotFound(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + S3: kafscalev1alpha1.S3Spec{CredentialsSecretRef: "missing-secret"}, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + cfg := &storage.S3Config{} + err := r.loadS3Credentials(context.Background(), cluster, cfg) + if err == nil { + t.Fatal("expected error for missing secret") + } +} + +func TestLoadS3CredentialsWithSecret(t *testing.T) { + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "creds", Namespace: "default"}, + Data: map[string][]byte{ + s3AccessKeyEnv: []byte("AKID"), + s3SecretKeyEnv: []byte("SECRET"), + }, + } + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + S3: kafscalev1alpha1.S3Spec{CredentialsSecretRef: "creds"}, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster, secret).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + cfg := &storage.S3Config{} + if err := r.loadS3Credentials(context.Background(), cluster, cfg); err != nil { + t.Fatalf("loadS3Credentials: %v", err) + } + if cfg.AccessKeyID != "AKID" || cfg.SecretAccessKey != "SECRET" { + t.Fatalf("unexpected credentials: %+v", cfg) + } +} + +func TestRecordSnapshotAccessFailure(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(cluster).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + r.recordSnapshotAccessFailure(context.Background(), cluster, "default/demo", fmt.Errorf("test error")) + + found := false + for _, cond := range cluster.Status.Conditions { + if cond.Type == "EtcdSnapshotAccess" && cond.Reason == "SnapshotAccessFailed" { + found = true + } + } + if !found { + t.Fatal("expected SnapshotAccessFailed condition") + } +} + +func TestVerifySnapshotS3AccessNotManaged(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(cluster).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + err := r.verifySnapshotS3Access(context.Background(), cluster, EtcdResolution{Managed: false}) + if err != nil { + t.Fatalf("expected nil error for non-managed, got %v", err) + } +} + +// --- topic_controller.go --- + +func TestSetTopicCondition(t *testing.T) { + conditions := []metav1.Condition{ + {Type: "Ready", Status: metav1.ConditionTrue, Reason: "OK"}, + } + // Update existing + setTopicCondition(&conditions, metav1.Condition{ + Type: "Ready", Status: metav1.ConditionFalse, Reason: "Failed", + }) + if len(conditions) != 1 || conditions[0].Reason != "Failed" { + t.Fatalf("expected updated condition, got %+v", conditions) + } + + // Add new + setTopicCondition(&conditions, metav1.Condition{ + Type: "Published", Status: metav1.ConditionTrue, Reason: "OK", + }) + if len(conditions) != 2 { + t.Fatalf("expected 2 conditions, got %d", len(conditions)) + } +} + +// --- metrics.go --- + +func TestRecordClusterCount(t *testing.T) { + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).Build() + // Should not panic even with no clusters + recordClusterCount(context.Background(), c) +} + +func TestRecordClusterCountWithClusters(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + recordClusterCount(context.Background(), c) +} + +// --- brokerContainer: exercise all conditional branches --- + +func TestBrokerContainerAllOptions(t *testing.T) { + // Set ACL env vars + t.Setenv("KAFSCALE_ACL_ENABLED", "true") + t.Setenv("KAFSCALE_ACL_JSON", `{"rules":[]}`) + t.Setenv("KAFSCALE_ACL_FILE", "/etc/kafscale/acl.json") + t.Setenv("KAFSCALE_ACL_FAIL_OPEN", "false") + t.Setenv("KAFSCALE_PRINCIPAL_SOURCE", "mtls") + t.Setenv("KAFSCALE_PROXY_PROTOCOL", "true") + t.Setenv("KAFSCALE_LOG_LEVEL", "debug") + t.Setenv("KAFSCALE_TRACE_KAFKA", "1") + + replicas := int32(1) // single replica to exercise advertisedHost branch + port := int32(19092) + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + Brokers: kafscalev1alpha1.BrokerSpec{ + Replicas: &replicas, + AdvertisedHost: "my.broker.host", + AdvertisedPort: &port, + Resources: kafscalev1alpha1.BrokerResources{ + Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("500m")}, + Limits: corev1.ResourceList{corev1.ResourceMemory: resource.MustParse("1Gi")}, + }, + }, + S3: kafscalev1alpha1.S3Spec{ + Bucket: "bucket", + Region: "us-east-1", + Endpoint: "http://minio:9000", + ReadBucket: "read-bucket", + ReadRegion: "eu-west-1", + ReadEndpoint: "http://minio-read:9000", + CredentialsSecretRef: "s3-creds", + }, + Config: kafscalev1alpha1.ClusterConfigSpec{ + SegmentBytes: 1048576, + FlushIntervalMs: 5000, + CacheSize: "128Mi", + }, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + container := r.brokerContainer(cluster, []string{"http://etcd:2379"}) + + if container.Name != "broker" { + t.Fatalf("expected broker, got %q", container.Name) + } + + envMap := make(map[string]string) + for _, e := range container.Env { + envMap[e.Name] = e.Value + } + + // Single replica with advertised host => host should be set + if envMap["KAFSCALE_BROKER_HOST"] != "my.broker.host" { + t.Fatalf("expected KAFSCALE_BROKER_HOST=my.broker.host, got %q", envMap["KAFSCALE_BROKER_HOST"]) + } + if envMap["KAFSCALE_BROKER_PORT"] != "19092" { + t.Fatalf("expected port 19092, got %q", envMap["KAFSCALE_BROKER_PORT"]) + } + if envMap["KAFSCALE_S3_ENDPOINT"] != "http://minio:9000" { + t.Fatalf("expected S3 endpoint, got %q", envMap["KAFSCALE_S3_ENDPOINT"]) + } + if envMap["KAFSCALE_S3_PATH_STYLE"] != "true" { + t.Fatal("expected S3 path style set") + } + if envMap["KAFSCALE_S3_READ_BUCKET"] != "read-bucket" { + t.Fatalf("expected read bucket, got %q", envMap["KAFSCALE_S3_READ_BUCKET"]) + } + if envMap["KAFSCALE_S3_READ_REGION"] != "eu-west-1" { + t.Fatalf("expected read region, got %q", envMap["KAFSCALE_S3_READ_REGION"]) + } + if envMap["KAFSCALE_S3_READ_ENDPOINT"] != "http://minio-read:9000" { + t.Fatalf("expected read endpoint, got %q", envMap["KAFSCALE_S3_READ_ENDPOINT"]) + } + if envMap["KAFSCALE_SEGMENT_BYTES"] != "1048576" { + t.Fatalf("expected segment bytes, got %q", envMap["KAFSCALE_SEGMENT_BYTES"]) + } + if envMap["KAFSCALE_FLUSH_INTERVAL_MS"] != "5000" { + t.Fatalf("expected flush interval, got %q", envMap["KAFSCALE_FLUSH_INTERVAL_MS"]) + } + if envMap["KAFSCALE_CACHE_BYTES"] != "128Mi" { + t.Fatalf("expected cache bytes, got %q", envMap["KAFSCALE_CACHE_BYTES"]) + } + if envMap["KAFSCALE_ACL_ENABLED"] != "true" { + t.Fatal("expected ACL enabled env") + } + if envMap["KAFSCALE_ACL_JSON"] == "" { + t.Fatal("expected ACL JSON env") + } + if envMap["KAFSCALE_ACL_FILE"] == "" { + t.Fatal("expected ACL file env") + } + if envMap["KAFSCALE_LOG_LEVEL"] != "debug" { + t.Fatal("expected log level env") + } + if envMap["KAFSCALE_TRACE_KAFKA"] != "1" { + t.Fatal("expected trace kafka env") + } + + // CredentialsSecretRef should add envFrom + if len(container.EnvFrom) != 1 { + t.Fatalf("expected 1 envFrom, got %d", len(container.EnvFrom)) + } + if container.EnvFrom[0].SecretRef.Name != "s3-creds" { + t.Fatalf("expected secretRef s3-creds, got %q", container.EnvFrom[0].SecretRef.Name) + } + + // Resources should be set + if container.Resources.Requests.Cpu().String() != "500m" { + t.Fatalf("expected 500m CPU request, got %s", container.Resources.Requests.Cpu().String()) + } +} + +// --- deleteLegacyBrokerDeployment: with existing deployment --- + +func TestDeleteLegacyBrokerDeploymentExists(t *testing.T) { + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + } + legacy := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "demo-broker", Namespace: "default"}, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster, legacy).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + if err := r.deleteLegacyBrokerDeployment(context.Background(), cluster); err != nil { + t.Fatalf("deleteLegacyBrokerDeployment: %v", err) + } + assertNotFound(t, c, &appsv1.Deployment{}, "default", "demo-broker") +} + +// --- reconcileLfsProxyResources: enabled with metrics disabled --- + +func TestReconcileLfsProxyResourcesMetricsDisabled(t *testing.T) { + falseVal := false + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + S3: kafscalev1alpha1.S3Spec{Bucket: "bucket", Region: "us-east-1"}, + LfsProxy: kafscalev1alpha1.LfsProxySpec{ + Enabled: true, + Metrics: kafscalev1alpha1.LfsProxyMetricsSpec{Enabled: &falseVal}, + }, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + if err := r.reconcileLfsProxyResources(context.Background(), cluster, []string{"http://etcd:2379"}); err != nil { + t.Fatalf("reconcileLfsProxyResources: %v", err) + } + // Deployment should exist, metrics service should not + assertFound(t, c, &appsv1.Deployment{}, "default", "demo-lfs-proxy") + assertNotFound(t, c, &corev1.Service{}, "default", "demo-lfs-proxy-metrics") +} + +// --- reconcileEtcdResources: test full pipeline directly --- + +func TestReconcileEtcdResourcesFullPipeline(t *testing.T) { + t.Setenv(operatorEtcdEndpointsEnv, "") + t.Setenv(operatorEtcdSnapshotBucketEnv, "snap-bucket") + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + S3: kafscalev1alpha1.S3Spec{Bucket: "bucket", Region: "us-east-1"}, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + + if err := reconcileEtcdResources(context.Background(), c, scheme, cluster); err != nil { + t.Fatalf("reconcileEtcdResources: %v", err) + } + + assertFound(t, c, &appsv1.StatefulSet{}, "default", "demo-etcd") + assertFound(t, c, &corev1.Service{}, "default", "demo-etcd") + assertFound(t, c, &corev1.Service{}, "default", "demo-etcd-client") +} + +// --- reconcileEtcdStatefulSet: memory storage mode --- + +func TestReconcileEtcdStatefulSetMemoryMode(t *testing.T) { + t.Setenv(operatorEtcdStorageMemoryEnv, "true") + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{S3: kafscalev1alpha1.S3Spec{Bucket: "b", Region: "r"}}, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + + if err := reconcileEtcdStatefulSet(context.Background(), c, scheme, cluster); err != nil { + t.Fatalf("reconcileEtcdStatefulSet memory mode: %v", err) + } + + sts := &appsv1.StatefulSet{} + assertFound(t, c, sts, "default", "demo-etcd") + if len(sts.Spec.VolumeClaimTemplates) != 0 { + t.Fatal("expected no VolumeClaimTemplates in memory mode") + } + // Should have an emptyDir volume named "data" with Memory medium + foundMemData := false + for _, v := range sts.Spec.Template.Spec.Volumes { + if v.Name == "data" && v.VolumeSource.EmptyDir != nil && v.VolumeSource.EmptyDir.Medium == corev1.StorageMediumMemory { + foundMemData = true + } + } + if !foundMemData { + t.Fatal("expected memory-backed emptyDir volume for data") + } +} + +// --- PublishMetadataSnapshot: with embedded etcd --- + +func TestPublishMetadataSnapshotEmptyEndpoints(t *testing.T) { + err := PublishMetadataSnapshot(context.Background(), nil, metadata.ClusterMetadata{}) + if err == nil || !strings.Contains(err.Error(), "endpoints required") { + t.Fatalf("expected endpoints required error, got %v", err) + } +} + +func TestPublishMetadataSnapshotHappyPath(t *testing.T) { + endpoints := testutil.StartEmbeddedEtcd(t) + t.Setenv(operatorEtcdSilenceLogsEnv, "true") + + snap := metadata.ClusterMetadata{ + Brokers: []protocol.MetadataBroker{{NodeID: 0, Host: "b0", Port: 9092}}, + ControllerID: 0, + Topics: []protocol.MetadataTopic{ + {Topic: kmsg.StringPtr("orders"), Partitions: []protocol.MetadataPartition{{Partition: 0, Leader: 0}}}, + }, + } + + if err := PublishMetadataSnapshot(context.Background(), endpoints, snap); err != nil { + t.Fatalf("PublishMetadataSnapshot: %v", err) + } + + // Verify the snapshot was written + cli, err := clientv3.New(clientv3.Config{Endpoints: endpoints, DialTimeout: 3 * time.Second}) + if err != nil { + t.Fatalf("etcd client: %v", err) + } + defer func() { _ = cli.Close() }() + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + resp, err := cli.Get(ctx, "/kafscale/metadata/snapshot") + if err != nil { + t.Fatalf("get snapshot: %v", err) + } + if len(resp.Kvs) == 0 { + t.Fatal("snapshot not written to etcd") + } + var loaded metadata.ClusterMetadata + if err := json.Unmarshal(resp.Kvs[0].Value, &loaded); err != nil { + t.Fatalf("unmarshal snapshot: %v", err) + } + if len(loaded.Topics) != 1 || *loaded.Topics[0].Topic != "orders" { + t.Fatalf("unexpected snapshot: %+v", loaded) + } +} + +func TestPublishMetadataSnapshotMergesExisting(t *testing.T) { + endpoints := testutil.StartEmbeddedEtcd(t) + t.Setenv(operatorEtcdSilenceLogsEnv, "true") + + // Write an initial snapshot with "events" topic + initial := metadata.ClusterMetadata{ + Topics: []protocol.MetadataTopic{{Topic: kmsg.StringPtr("events")}}, + } + if err := PublishMetadataSnapshot(context.Background(), endpoints, initial); err != nil { + t.Fatalf("initial publish: %v", err) + } + + // Publish new snapshot with "orders" only; should merge "events" from existing + next := metadata.ClusterMetadata{ + Brokers: []protocol.MetadataBroker{{NodeID: 0, Host: "b0", Port: 9092}}, + Topics: []protocol.MetadataTopic{{Topic: kmsg.StringPtr("orders")}}, + } + if err := PublishMetadataSnapshot(context.Background(), endpoints, next); err != nil { + t.Fatalf("second publish: %v", err) + } + + // Verify merged result + cli, err := clientv3.New(clientv3.Config{Endpoints: endpoints, DialTimeout: 3 * time.Second}) + if err != nil { + t.Fatalf("etcd client: %v", err) + } + defer func() { _ = cli.Close() }() + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + resp, err := cli.Get(ctx, "/kafscale/metadata/snapshot") + if err != nil { + t.Fatalf("get snapshot: %v", err) + } + var loaded metadata.ClusterMetadata + if err := json.Unmarshal(resp.Kvs[0].Value, &loaded); err != nil { + t.Fatalf("unmarshal: %v", err) + } + names := make(map[string]bool) + for _, topic := range loaded.Topics { + names[*topic.Topic] = true + } + if !names["orders"] || !names["events"] { + t.Fatalf("expected orders+events, got %v", loaded.Topics) + } +} + +// --- NewSnapshotPublisher + Publish --- + +func TestNewSnapshotPublisher(t *testing.T) { + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).Build() + p := NewSnapshotPublisher(c) + if p == nil || p.Client == nil { + t.Fatal("expected non-nil publisher") + } +} + +func TestSnapshotPublisherPublish(t *testing.T) { + endpoints := testutil.StartEmbeddedEtcd(t) + t.Setenv(operatorEtcdSilenceLogsEnv, "true") + + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default", UID: types.UID("uid-123")}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + Brokers: kafscalev1alpha1.BrokerSpec{}, + S3: kafscalev1alpha1.S3Spec{Bucket: "bucket", Region: "us-east-1"}, + }, + } + topic := &kafscalev1alpha1.KafscaleTopic{ + ObjectMeta: metav1.ObjectMeta{Name: "orders", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleTopicSpec{ + ClusterRef: "demo", + Partitions: 3, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster, topic).Build() + p := NewSnapshotPublisher(c) + + if err := p.Publish(context.Background(), cluster, endpoints); err != nil { + t.Fatalf("Publish: %v", err) + } + + // Verify snapshot was written + cli, err := clientv3.New(clientv3.Config{Endpoints: endpoints, DialTimeout: 3 * time.Second}) + if err != nil { + t.Fatalf("etcd client: %v", err) + } + defer func() { _ = cli.Close() }() + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + resp, err := cli.Get(ctx, "/kafscale/metadata/snapshot") + if err != nil { + t.Fatalf("get snapshot: %v", err) + } + if len(resp.Kvs) == 0 { + t.Fatal("snapshot not written") + } + var loaded metadata.ClusterMetadata + if err := json.Unmarshal(resp.Kvs[0].Value, &loaded); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if len(loaded.Topics) != 1 || *loaded.Topics[0].Topic != "orders" { + t.Fatalf("unexpected topics: %+v", loaded.Topics) + } + if len(loaded.Topics[0].Partitions) != 3 { + t.Fatalf("expected 3 partitions, got %d", len(loaded.Topics[0].Partitions)) + } +} + +func TestSnapshotPublisherPublishNoMatchingTopics(t *testing.T) { + endpoints := testutil.StartEmbeddedEtcd(t) + t.Setenv(operatorEtcdSilenceLogsEnv, "true") + + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + Brokers: kafscalev1alpha1.BrokerSpec{}, + S3: kafscalev1alpha1.S3Spec{Bucket: "bucket", Region: "us-east-1"}, + }, + } + // Topic belongs to different cluster + topic := &kafscalev1alpha1.KafscaleTopic{ + ObjectMeta: metav1.ObjectMeta{Name: "events", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleTopicSpec{ + ClusterRef: "other-cluster", + Partitions: 1, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster, topic).Build() + p := NewSnapshotPublisher(c) + + if err := p.Publish(context.Background(), cluster, endpoints); err != nil { + t.Fatalf("Publish: %v", err) + } +} + +// --- ClusterReconciler.Reconcile --- + +func TestClusterReconcilerReconcile(t *testing.T) { + endpoints := testutil.StartEmbeddedEtcd(t) + t.Setenv(operatorEtcdEndpointsEnv, endpoints[0]) + t.Setenv(operatorEtcdSnapshotSkipPreflightEnv, "true") + t.Setenv(operatorEtcdSilenceLogsEnv, "true") + + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + Brokers: kafscalev1alpha1.BrokerSpec{}, + S3: kafscalev1alpha1.S3Spec{Bucket: "bucket", Region: "us-east-1"}, + }, + } + scheme := testScheme(t) + if err := autoscalingv2.AddToScheme(scheme); err != nil { + t.Fatalf("add autoscaling scheme: %v", err) + } + c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(cluster).WithObjects(cluster).Build() + publisher := NewSnapshotPublisher(c) + r := &ClusterReconciler{Client: c, Scheme: scheme, Publisher: publisher} + + req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "demo", Namespace: "default"}} + result, err := r.Reconcile(context.Background(), req) + if err != nil { + t.Fatalf("Reconcile: %v", err) + } + if result.RequeueAfter != 0 { + t.Fatalf("expected no requeue, got %v", result.RequeueAfter) + } + + // Verify resources were created + assertFound(t, c, &appsv1.StatefulSet{}, "default", "demo-broker") + assertFound(t, c, &corev1.Service{}, "default", "demo-broker-headless") + assertFound(t, c, &corev1.Service{}, "default", "demo-broker") +} + +func TestClusterReconcilerReconcileNotFound(t *testing.T) { + scheme := testScheme(t) + if err := autoscalingv2.AddToScheme(scheme); err != nil { + t.Fatalf("add autoscaling scheme: %v", err) + } + c := fake.NewClientBuilder().WithScheme(scheme).Build() + publisher := NewSnapshotPublisher(c) + r := &ClusterReconciler{Client: c, Scheme: scheme, Publisher: publisher} + + req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "nonexistent", Namespace: "default"}} + result, err := r.Reconcile(context.Background(), req) + if err != nil { + t.Fatalf("Reconcile not found: %v", err) + } + if result.RequeueAfter != 0 { + t.Fatalf("expected no requeue for not found, got %v", result.RequeueAfter) + } +} + +// --- TopicReconciler.Reconcile --- + +func TestTopicReconcilerReconcile(t *testing.T) { + endpoints := testutil.StartEmbeddedEtcd(t) + t.Setenv(operatorEtcdEndpointsEnv, endpoints[0]) + t.Setenv(operatorEtcdSilenceLogsEnv, "true") + + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + Brokers: kafscalev1alpha1.BrokerSpec{}, + S3: kafscalev1alpha1.S3Spec{Bucket: "bucket", Region: "us-east-1"}, + }, + } + topic := &kafscalev1alpha1.KafscaleTopic{ + ObjectMeta: metav1.ObjectMeta{Name: "orders", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleTopicSpec{ + ClusterRef: "demo", + Partitions: 2, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(topic).WithObjects(cluster, topic).Build() + publisher := NewSnapshotPublisher(c) + r := &TopicReconciler{Client: c, Scheme: scheme, Publisher: publisher} + + req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "orders", Namespace: "default"}} + result, err := r.Reconcile(context.Background(), req) + if err != nil { + t.Fatalf("Reconcile: %v", err) + } + if result.RequeueAfter != 0 { + t.Fatalf("expected no requeue, got %v", result.RequeueAfter) + } +} + +func TestTopicReconcilerReconcileNotFound(t *testing.T) { + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).Build() + publisher := NewSnapshotPublisher(c) + r := &TopicReconciler{Client: c, Scheme: scheme, Publisher: publisher} + + req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "nonexistent", Namespace: "default"}} + result, err := r.Reconcile(context.Background(), req) + if err != nil { + t.Fatalf("Reconcile not found: %v", err) + } + if result.RequeueAfter != 0 { + t.Fatalf("expected no requeue, got %v", result.RequeueAfter) + } +} + +// --- BuildClusterMetadata with advertised host --- + +func TestBuildClusterMetadataSingleReplicaAdvertisedHost(t *testing.T) { + replicas := int32(1) + port := int32(19092) + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default", UID: types.UID("test-uid")}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + Brokers: kafscalev1alpha1.BrokerSpec{ + Replicas: &replicas, + AdvertisedHost: "my.custom.host", + AdvertisedPort: &port, + }, + }, + } + topics := []kafscalev1alpha1.KafscaleTopic{ + { + ObjectMeta: metav1.ObjectMeta{Name: "orders"}, + Spec: kafscalev1alpha1.KafscaleTopicSpec{Partitions: 2}, + }, + } + meta := BuildClusterMetadata(cluster, topics) + if len(meta.Brokers) != 1 { + t.Fatalf("expected 1 broker, got %d", len(meta.Brokers)) + } + if meta.Brokers[0].Host != "my.custom.host" { + t.Fatalf("expected custom host, got %q", meta.Brokers[0].Host) + } + if meta.Brokers[0].Port != 19092 { + t.Fatalf("expected port 19092, got %d", meta.Brokers[0].Port) + } + if meta.ClusterID == nil || *meta.ClusterID != "test-uid" { + t.Fatal("expected cluster ID set") + } + if meta.ClusterName == nil || *meta.ClusterName != "demo" { + t.Fatal("expected cluster name set") + } +} + +// --- lfsProxyContainer with all optional branches --- + +func TestLfsProxyContainerAllOptions(t *testing.T) { + trueVal := true + advPort := int32(19093) + cacheTTL := int32(600) + maxBlob := int64(1048576) + chunkSize := int64(65536) + cluster := &kafscalev1alpha1.KafscaleCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "demo", Namespace: "default"}, + Spec: kafscalev1alpha1.KafscaleClusterSpec{ + S3: kafscalev1alpha1.S3Spec{ + Bucket: "bucket", + Region: "us-east-1", + Endpoint: "http://minio:9000", + CredentialsSecretRef: "s3-creds", + }, + LfsProxy: kafscalev1alpha1.LfsProxySpec{ + Enabled: true, + Image: "custom-lfs:latest", + ImagePullPolicy: "Always", + AdvertisedHost: "lfs.example.com", + AdvertisedPort: &advPort, + BackendCacheTTLSeconds: &cacheTTL, + Backends: []string{"backend1:9092", "backend2:9092"}, + HTTP: kafscalev1alpha1.LfsProxyHTTPSpec{ + Enabled: &trueVal, + APIKeySecretRef: "api-key-secret", + APIKeySecretKey: "MY_KEY", + }, + S3: kafscalev1alpha1.LfsProxyS3Spec{ + ForcePathStyle: &trueVal, + EnsureBucket: &trueVal, + MaxBlobSize: &maxBlob, + ChunkSize: &chunkSize, + }, + }, + }, + } + scheme := testScheme(t) + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build() + r := &ClusterReconciler{Client: c, Scheme: scheme} + + container := r.lfsProxyContainer(cluster, []string{"http://etcd:2379"}) + + if container.Image != "custom-lfs:latest" { + t.Fatalf("expected custom image, got %q", container.Image) + } + if container.ImagePullPolicy != corev1.PullAlways { + t.Fatalf("expected Always pull, got %v", container.ImagePullPolicy) + } + + envMap := make(map[string]string) + for _, e := range container.Env { + envMap[e.Name] = e.Value + } + if envMap["KAFSCALE_LFS_PROXY_ADVERTISED_HOST"] != "lfs.example.com" { + t.Fatal("expected advertised host") + } + if envMap["KAFSCALE_LFS_PROXY_ADVERTISED_PORT"] != "19093" { + t.Fatal("expected advertised port") + } + if envMap["KAFSCALE_LFS_PROXY_BACKEND_CACHE_TTL_SEC"] != "600" { + t.Fatal("expected backend cache TTL") + } + if envMap["KAFSCALE_LFS_PROXY_BACKENDS"] != "backend1:9092,backend2:9092" { + t.Fatal("expected backends") + } + if envMap["KAFSCALE_LFS_PROXY_S3_ENDPOINT"] != "http://minio:9000" { + t.Fatal("expected S3 endpoint") + } + if envMap["KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE"] != "true" { + t.Fatal("expected force path style") + } + if envMap["KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET"] != "true" { + t.Fatal("expected ensure bucket") + } + if envMap["KAFSCALE_LFS_PROXY_MAX_BLOB_SIZE"] != "1048576" { + t.Fatal("expected max blob size") + } + if envMap["KAFSCALE_LFS_PROXY_CHUNK_SIZE"] != "65536" { + t.Fatal("expected chunk size") + } + // HTTP port should be present + if envMap["KAFSCALE_LFS_PROXY_HTTP_ADDR"] == "" { + t.Fatal("expected HTTP addr env") + } + // API key should be from secret + foundAPIKey := false + for _, e := range container.Env { + if e.Name == "KAFSCALE_LFS_PROXY_HTTP_API_KEY" && e.ValueFrom != nil && e.ValueFrom.SecretKeyRef != nil { + if e.ValueFrom.SecretKeyRef.Key == "MY_KEY" { + foundAPIKey = true + } + } + } + if !foundAPIKey { + t.Fatal("expected API key secret ref with custom key") + } + // S3 credentials should be from secret + foundS3Key := false + for _, e := range container.Env { + if e.Name == "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY" && e.ValueFrom != nil { + foundS3Key = true + } + } + if !foundS3Key { + t.Fatal("expected S3 credentials from secret ref") + } + // HTTP and metrics ports should exist in container ports + if len(container.Ports) < 3 { + t.Fatalf("expected at least 3 ports (kafka, http, health), got %d", len(container.Ports)) + } +} + diff --git a/pkg/operator/lfs_proxy_resources.go b/pkg/operator/lfs_proxy_resources.go new file mode 100644 index 00000000..eb8fe77f --- /dev/null +++ b/pkg/operator/lfs_proxy_resources.go @@ -0,0 +1,366 @@ +// Copyright 2025 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package operator + +import ( + "context" + "fmt" + "strings" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + kafscalev1alpha1 "github.com/KafScale/platform/api/v1alpha1" +) + +const ( + defaultLfsProxyImage = "ghcr.io/kafscale/kafscale-lfs-proxy:latest" + defaultLfsProxyImagePullPolicy = string(corev1.PullIfNotPresent) + defaultLfsProxyPort = int32(9092) + defaultLfsProxyHTTPPort = int32(8080) + defaultLfsProxyHealthPort = int32(9094) + defaultLfsProxyMetricsPort = int32(9095) +) + +var lfsProxyImage = getEnv("LFS_PROXY_IMAGE", defaultLfsProxyImage) +var lfsProxyImagePullPolicy = getEnv("LFS_PROXY_IMAGE_PULL_POLICY", defaultLfsProxyImagePullPolicy) + +func (r *ClusterReconciler) reconcileLfsProxyResources(ctx context.Context, cluster *kafscalev1alpha1.KafscaleCluster, endpoints []string) error { + if !cluster.Spec.LfsProxy.Enabled { + return r.deleteLfsProxyResources(ctx, cluster) + } + if err := r.reconcileLfsProxyDeployment(ctx, cluster, endpoints); err != nil { + return err + } + if err := r.reconcileLfsProxyService(ctx, cluster); err != nil { + return err + } + if lfsProxyMetricsEnabled(cluster.Spec.LfsProxy) { + return r.reconcileLfsProxyMetricsService(ctx, cluster) + } + return r.deleteLfsProxyMetricsService(ctx, cluster) +} + +func (r *ClusterReconciler) reconcileLfsProxyDeployment(ctx context.Context, cluster *kafscalev1alpha1.KafscaleCluster, endpoints []string) error { + deploy := &appsv1.Deployment{ObjectMeta: metav1.ObjectMeta{ + Name: lfsProxyName(cluster), + Namespace: cluster.Namespace, + }} + + _, err := controllerutil.CreateOrUpdate(ctx, r.Client, deploy, func() error { + replicas := int32(2) + if cluster.Spec.LfsProxy.Replicas != nil && *cluster.Spec.LfsProxy.Replicas > 0 { + replicas = *cluster.Spec.LfsProxy.Replicas + } + labels := map[string]string{ + "app": "kafscale-lfs-proxy", + "cluster": cluster.Name, + } + deploy.Spec.Selector = &metav1.LabelSelector{MatchLabels: labels} + deploy.Spec.Replicas = &replicas + deploy.Spec.Template.Labels = labels + deploy.Spec.Template.Spec.Containers = []corev1.Container{ + r.lfsProxyContainer(cluster, endpoints), + } + return controllerutil.SetControllerReference(cluster, deploy, r.Scheme) + }) + return err +} + +func (r *ClusterReconciler) reconcileLfsProxyService(ctx context.Context, cluster *kafscalev1alpha1.KafscaleCluster) error { + svc := &corev1.Service{ObjectMeta: metav1.ObjectMeta{ + Name: lfsProxyName(cluster), + Namespace: cluster.Namespace, + }} + + _, err := controllerutil.CreateOrUpdate(ctx, r.Client, svc, func() error { + labels := lfsProxyLabels(cluster) + annotations := copyStringMap(cluster.Spec.LfsProxy.Service.Annotations) + ports := []corev1.ServicePort{servicePort("kafka", lfsProxyPort(cluster.Spec.LfsProxy))} + if lfsProxyHTTPEnabled(cluster.Spec.LfsProxy) { + ports = append(ports, servicePort("http", lfsProxyHTTPPort(cluster.Spec.LfsProxy))) + } + + svc.Labels = labels + svc.Spec.Selector = labels + svc.Spec.Ports = ports + svc.Spec.Type = parseServiceType(cluster.Spec.LfsProxy.Service.Type) + svc.Annotations = annotations + if len(cluster.Spec.LfsProxy.Service.LoadBalancerSourceRanges) > 0 { + svc.Spec.LoadBalancerSourceRanges = append([]string{}, cluster.Spec.LfsProxy.Service.LoadBalancerSourceRanges...) + } + return controllerutil.SetControllerReference(cluster, svc, r.Scheme) + }) + return err +} + +func (r *ClusterReconciler) reconcileLfsProxyMetricsService(ctx context.Context, cluster *kafscalev1alpha1.KafscaleCluster) error { + svc := &corev1.Service{ObjectMeta: metav1.ObjectMeta{ + Name: lfsProxyMetricsName(cluster), + Namespace: cluster.Namespace, + }} + + _, err := controllerutil.CreateOrUpdate(ctx, r.Client, svc, func() error { + labels := lfsProxyLabels(cluster) + ports := []corev1.ServicePort{servicePort("metrics", lfsProxyMetricsPort(cluster.Spec.LfsProxy))} + + svc.Labels = labels + svc.Spec.Selector = labels + svc.Spec.Ports = ports + svc.Spec.Type = corev1.ServiceTypeClusterIP + return controllerutil.SetControllerReference(cluster, svc, r.Scheme) + }) + return err +} + +func (r *ClusterReconciler) deleteLfsProxyResources(ctx context.Context, cluster *kafscalev1alpha1.KafscaleCluster) error { + deploy := &appsv1.Deployment{ObjectMeta: metav1.ObjectMeta{ + Name: lfsProxyName(cluster), + Namespace: cluster.Namespace, + }} + if err := r.Client.Delete(ctx, deploy); err != nil && !apierrors.IsNotFound(err) { + return err + } + if err := r.deleteLfsProxyMetricsService(ctx, cluster); err != nil { + return err + } + svc := &corev1.Service{ObjectMeta: metav1.ObjectMeta{ + Name: lfsProxyName(cluster), + Namespace: cluster.Namespace, + }} + if err := r.Client.Delete(ctx, svc); err != nil && !apierrors.IsNotFound(err) { + return err + } + return nil +} + +func (r *ClusterReconciler) deleteLfsProxyMetricsService(ctx context.Context, cluster *kafscalev1alpha1.KafscaleCluster) error { + svc := &corev1.Service{ObjectMeta: metav1.ObjectMeta{ + Name: lfsProxyMetricsName(cluster), + Namespace: cluster.Namespace, + }} + if err := r.Client.Delete(ctx, svc); err != nil && !apierrors.IsNotFound(err) { + return err + } + return nil +} + +func (r *ClusterReconciler) lfsProxyContainer(cluster *kafscalev1alpha1.KafscaleCluster, endpoints []string) corev1.Container { + image := lfsProxyImage + if strings.TrimSpace(cluster.Spec.LfsProxy.Image) != "" { + image = strings.TrimSpace(cluster.Spec.LfsProxy.Image) + } + pullPolicy := parsePullPolicy(lfsProxyImagePullPolicy) + if strings.TrimSpace(cluster.Spec.LfsProxy.ImagePullPolicy) != "" { + pullPolicy = parsePullPolicy(cluster.Spec.LfsProxy.ImagePullPolicy) + } + portKafka := lfsProxyPort(cluster.Spec.LfsProxy) + portHTTP := lfsProxyHTTPPort(cluster.Spec.LfsProxy) + portHealth := lfsProxyHealthPort(cluster.Spec.LfsProxy) + portMetrics := lfsProxyMetricsPort(cluster.Spec.LfsProxy) + + env := []corev1.EnvVar{ + {Name: "KAFSCALE_LFS_PROXY_ADDR", Value: fmt.Sprintf(":%d", portKafka)}, + {Name: "KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS", Value: strings.Join(endpoints, ",")}, + {Name: "KAFSCALE_LFS_PROXY_S3_BUCKET", Value: cluster.Spec.S3.Bucket}, + {Name: "KAFSCALE_LFS_PROXY_S3_REGION", Value: cluster.Spec.S3.Region}, + {Name: "KAFSCALE_S3_NAMESPACE", Value: lfsProxyNamespace(cluster)}, + } + if cluster.Spec.LfsProxy.AdvertisedHost != "" { + env = append(env, corev1.EnvVar{Name: "KAFSCALE_LFS_PROXY_ADVERTISED_HOST", Value: cluster.Spec.LfsProxy.AdvertisedHost}) + } + if cluster.Spec.LfsProxy.AdvertisedPort != nil && *cluster.Spec.LfsProxy.AdvertisedPort > 0 { + env = append(env, corev1.EnvVar{Name: "KAFSCALE_LFS_PROXY_ADVERTISED_PORT", Value: fmt.Sprintf("%d", *cluster.Spec.LfsProxy.AdvertisedPort)}) + } + if cluster.Spec.LfsProxy.BackendCacheTTLSeconds != nil && *cluster.Spec.LfsProxy.BackendCacheTTLSeconds > 0 { + env = append(env, corev1.EnvVar{Name: "KAFSCALE_LFS_PROXY_BACKEND_CACHE_TTL_SEC", Value: fmt.Sprintf("%d", *cluster.Spec.LfsProxy.BackendCacheTTLSeconds)}) + } + if len(cluster.Spec.LfsProxy.Backends) > 0 { + env = append(env, corev1.EnvVar{Name: "KAFSCALE_LFS_PROXY_BACKENDS", Value: strings.Join(cluster.Spec.LfsProxy.Backends, ",")}) + } + if lfsProxyHTTPEnabled(cluster.Spec.LfsProxy) { + env = append(env, corev1.EnvVar{Name: "KAFSCALE_LFS_PROXY_HTTP_ADDR", Value: fmt.Sprintf(":%d", portHTTP)}) + if cluster.Spec.LfsProxy.HTTP.APIKeySecretRef != "" { + key := strings.TrimSpace(cluster.Spec.LfsProxy.HTTP.APIKeySecretKey) + if key == "" { + key = "API_KEY" + } + env = append(env, corev1.EnvVar{ + Name: "KAFSCALE_LFS_PROXY_HTTP_API_KEY", + ValueFrom: &corev1.EnvVarSource{SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: cluster.Spec.LfsProxy.HTTP.APIKeySecretRef}, + Key: key, + }}, + }) + } + } + if lfsProxyHealthEnabled(cluster.Spec.LfsProxy) { + env = append(env, corev1.EnvVar{Name: "KAFSCALE_LFS_PROXY_HEALTH_ADDR", Value: fmt.Sprintf(":%d", portHealth)}) + } + if lfsProxyMetricsEnabled(cluster.Spec.LfsProxy) { + env = append(env, corev1.EnvVar{Name: "KAFSCALE_LFS_PROXY_METRICS_ADDR", Value: fmt.Sprintf(":%d", portMetrics)}) + } + if strings.TrimSpace(cluster.Spec.S3.Endpoint) != "" { + env = append(env, corev1.EnvVar{Name: "KAFSCALE_LFS_PROXY_S3_ENDPOINT", Value: cluster.Spec.S3.Endpoint}) + } + if cluster.Spec.S3.CredentialsSecretRef != "" { + env = append(env, + corev1.EnvVar{ + Name: "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY", + ValueFrom: &corev1.EnvVarSource{SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: cluster.Spec.S3.CredentialsSecretRef}, + Key: "AWS_ACCESS_KEY_ID", + }}, + }, + corev1.EnvVar{ + Name: "KAFSCALE_LFS_PROXY_S3_SECRET_KEY", + ValueFrom: &corev1.EnvVarSource{SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: cluster.Spec.S3.CredentialsSecretRef}, + Key: "AWS_SECRET_ACCESS_KEY", + }}, + }, + ) + } + if lfsProxyForcePathStyle(cluster) { + env = append(env, corev1.EnvVar{Name: "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE", Value: "true"}) + } + if lfsProxyEnsureBucket(cluster) { + env = append(env, corev1.EnvVar{Name: "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET", Value: "true"}) + } + if cluster.Spec.LfsProxy.S3.MaxBlobSize != nil && *cluster.Spec.LfsProxy.S3.MaxBlobSize > 0 { + env = append(env, corev1.EnvVar{Name: "KAFSCALE_LFS_PROXY_MAX_BLOB_SIZE", Value: fmt.Sprintf("%d", *cluster.Spec.LfsProxy.S3.MaxBlobSize)}) + } + if cluster.Spec.LfsProxy.S3.ChunkSize != nil && *cluster.Spec.LfsProxy.S3.ChunkSize > 0 { + env = append(env, corev1.EnvVar{Name: "KAFSCALE_LFS_PROXY_CHUNK_SIZE", Value: fmt.Sprintf("%d", *cluster.Spec.LfsProxy.S3.ChunkSize)}) + } + + ports := []corev1.ContainerPort{{Name: "kafka", ContainerPort: portKafka}} + if lfsProxyHTTPEnabled(cluster.Spec.LfsProxy) { + ports = append(ports, corev1.ContainerPort{Name: "http", ContainerPort: portHTTP}) + } + if lfsProxyHealthEnabled(cluster.Spec.LfsProxy) { + ports = append(ports, corev1.ContainerPort{Name: "health", ContainerPort: portHealth}) + } + if lfsProxyMetricsEnabled(cluster.Spec.LfsProxy) { + ports = append(ports, corev1.ContainerPort{Name: "metrics", ContainerPort: portMetrics}) + } + + container := corev1.Container{ + Name: "lfs-proxy", + Image: image, + ImagePullPolicy: pullPolicy, + Ports: ports, + Env: env, + } + if lfsProxyHealthEnabled(cluster.Spec.LfsProxy) { + container.ReadinessProbe = &corev1.Probe{ProbeHandler: corev1.ProbeHandler{HTTPGet: &corev1.HTTPGetAction{Path: "/readyz", Port: intstr.FromString("health")}}, InitialDelaySeconds: 2, PeriodSeconds: 5, FailureThreshold: 6} + container.LivenessProbe = &corev1.Probe{ProbeHandler: corev1.ProbeHandler{HTTPGet: &corev1.HTTPGetAction{Path: "/livez", Port: intstr.FromString("health")}}, InitialDelaySeconds: 5, PeriodSeconds: 10, FailureThreshold: 3} + } + return container +} + +func lfsProxyName(cluster *kafscalev1alpha1.KafscaleCluster) string { + return fmt.Sprintf("%s-lfs-proxy", cluster.Name) +} + +func lfsProxyMetricsName(cluster *kafscalev1alpha1.KafscaleCluster) string { + return fmt.Sprintf("%s-lfs-proxy-metrics", cluster.Name) +} + +func lfsProxyLabels(cluster *kafscalev1alpha1.KafscaleCluster) map[string]string { + return map[string]string{ + "app": "kafscale-lfs-proxy", + "cluster": cluster.Name, + } +} + +func lfsProxyNamespace(cluster *kafscalev1alpha1.KafscaleCluster) string { + if ns := strings.TrimSpace(cluster.Spec.LfsProxy.S3.Namespace); ns != "" { + return ns + } + return cluster.Namespace +} + +func lfsProxyPort(spec kafscalev1alpha1.LfsProxySpec) int32 { + if spec.Service.Port != nil && *spec.Service.Port > 0 { + return *spec.Service.Port + } + return defaultLfsProxyPort +} + +func lfsProxyHTTPPort(spec kafscalev1alpha1.LfsProxySpec) int32 { + if spec.HTTP.Port != nil && *spec.HTTP.Port > 0 { + return *spec.HTTP.Port + } + return defaultLfsProxyHTTPPort +} + +func lfsProxyHealthPort(spec kafscalev1alpha1.LfsProxySpec) int32 { + if spec.Health.Port != nil && *spec.Health.Port > 0 { + return *spec.Health.Port + } + return defaultLfsProxyHealthPort +} + +func lfsProxyMetricsPort(spec kafscalev1alpha1.LfsProxySpec) int32 { + if spec.Metrics.Port != nil && *spec.Metrics.Port > 0 { + return *spec.Metrics.Port + } + return defaultLfsProxyMetricsPort +} + +func lfsProxyHTTPEnabled(spec kafscalev1alpha1.LfsProxySpec) bool { + if spec.HTTP.Enabled != nil { + return *spec.HTTP.Enabled + } + return false +} + +func lfsProxyMetricsEnabled(spec kafscalev1alpha1.LfsProxySpec) bool { + if spec.Metrics.Enabled != nil { + return *spec.Metrics.Enabled + } + return true +} + +func lfsProxyHealthEnabled(spec kafscalev1alpha1.LfsProxySpec) bool { + if spec.Health.Enabled != nil { + return *spec.Health.Enabled + } + return true +} + +func lfsProxyForcePathStyle(cluster *kafscalev1alpha1.KafscaleCluster) bool { + if cluster.Spec.LfsProxy.S3.ForcePathStyle != nil { + return *cluster.Spec.LfsProxy.S3.ForcePathStyle + } + return strings.TrimSpace(cluster.Spec.S3.Endpoint) != "" +} + +func lfsProxyEnsureBucket(cluster *kafscalev1alpha1.KafscaleCluster) bool { + if cluster.Spec.LfsProxy.S3.EnsureBucket != nil { + return *cluster.Spec.LfsProxy.S3.EnsureBucket + } + return false +} + +func servicePort(name string, port int32) corev1.ServicePort { + return corev1.ServicePort{Name: name, Port: port, Protocol: corev1.ProtocolTCP} +} diff --git a/pkg/operator/snapshot.go b/pkg/operator/snapshot.go index 2844bb9e..857a8b4b 100644 --- a/pkg/operator/snapshot.go +++ b/pkg/operator/snapshot.go @@ -70,6 +70,7 @@ func (p *SnapshotPublisher) Publish(ctx context.Context, cluster *kafscalev1alph return nil } +//nolint:unused // kept for snapshot recovery workflows func mergeExistingSnapshot(ctx context.Context, endpoints []string, next metadata.ClusterMetadata) metadata.ClusterMetadata { if len(endpoints) == 0 { return next @@ -99,6 +100,7 @@ func mergeExistingSnapshot(ctx context.Context, endpoints []string, next metadat return next } +//nolint:unused // kept for snapshot recovery workflows func readSnapshotFromEtcd(ctx context.Context, endpoints []string) (metadata.ClusterMetadata, error) { var snap metadata.ClusterMetadata cfg := clientv3.Config{ @@ -112,7 +114,7 @@ func readSnapshotFromEtcd(ctx context.Context, endpoints []string) (metadata.Clu if err != nil { return snap, err } - defer cli.Close() + defer func() { _ = cli.Close() }() getCtx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() resp, err := cli.Get(getCtx, "/kafscale/metadata/snapshot") diff --git a/pkg/protocol/encoding_test.go b/pkg/protocol/encoding_test.go new file mode 100644 index 00000000..4324c557 --- /dev/null +++ b/pkg/protocol/encoding_test.go @@ -0,0 +1,359 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package protocol + +import ( + "bytes" + "encoding/binary" + "fmt" + "strings" + "testing" +) + +func TestByteReaderInt16(t *testing.T) { + buf := make([]byte, 2) + binary.BigEndian.PutUint16(buf, 42) + r := newByteReader(buf) + v, err := r.Int16() + if err != nil { + t.Fatal(err) + } + if v != 42 { + t.Fatalf("expected 42, got %d", v) + } + + // Error: insufficient bytes + r2 := newByteReader([]byte{0x01}) + _, err = r2.Int16() + if err == nil { + t.Fatal("expected error for insufficient bytes") + } +} + +func TestByteReaderInt32(t *testing.T) { + buf := make([]byte, 4) + binary.BigEndian.PutUint32(buf, 1234) + r := newByteReader(buf) + v, err := r.Int32() + if err != nil { + t.Fatal(err) + } + if v != 1234 { + t.Fatalf("expected 1234, got %d", v) + } +} + +func TestByteReaderRemaining(t *testing.T) { + r := newByteReader([]byte{1, 2, 3, 4, 5}) + if r.remaining() != 5 { + t.Fatalf("expected 5, got %d", r.remaining()) + } + _, _ = r.read(3) + if r.remaining() != 2 { + t.Fatalf("expected 2, got %d", r.remaining()) + } +} + +func TestByteWriterBasic(t *testing.T) { + w := newByteWriter(0) + w.Int16(42) + w.Int32(1234) + + data := w.Bytes() + r := newByteReader(data) + + v16, _ := r.Int16() + v32, _ := r.Int32() + + if v16 != 42 || v32 != 1234 { + t.Fatalf("round-trip mismatch: %d %d", v16, v32) + } +} + +func TestFrameReadNegativeLength(t *testing.T) { + // Construct a frame with negative length + buf := make([]byte, 4) + binary.BigEndian.PutUint32(buf, 0x80000000) // -2147483648 as int32 + _, err := ReadFrame(bytes.NewReader(buf)) + if err == nil { + t.Fatal("expected error for negative frame length") + } + if !strings.Contains(err.Error(), "invalid frame length") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestFrameReadTruncated(t *testing.T) { + // Length says 100 bytes but only 3 available + buf := make([]byte, 7) + binary.BigEndian.PutUint32(buf, 100) + buf[4] = 1 + buf[5] = 2 + buf[6] = 3 + _, err := ReadFrame(bytes.NewReader(buf)) + if err == nil { + t.Fatal("expected error for truncated payload") + } +} + +func TestFrameReadEmpty(t *testing.T) { + _, err := ReadFrame(bytes.NewReader(nil)) + if err == nil { + t.Fatal("expected error for empty reader") + } +} + +func TestSkipTaggedFields(t *testing.T) { + // Empty tagged fields (0 tags) + r := newByteReader([]byte{0}) + err := r.SkipTaggedFields() + if err != nil { + t.Fatalf("SkipTaggedFields: %v", err) + } + + // Insufficient data + r2 := newByteReader(nil) + err = r2.SkipTaggedFields() + if err == nil { + t.Fatal("expected error for empty reader") + } + + // Tagged fields with actual data: 1 tag, tag_id=0, size=3, data=[0x01,0x02,0x03] + w := newByteWriter(16) + w.UVarint(1) // count = 1 + w.UVarint(0) // tag id + w.UVarint(3) // size = 3 + w.write([]byte{1, 2, 3}) + r3 := newByteReader(w.Bytes()) + if err := r3.SkipTaggedFields(); err != nil { + t.Fatalf("SkipTaggedFields with data: %v", err) + } + if r3.remaining() != 0 { + t.Fatalf("expected 0 remaining, got %d", r3.remaining()) + } + + // Tagged field with zero-size data + w2 := newByteWriter(8) + w2.UVarint(1) // count = 1 + w2.UVarint(5) // tag id + w2.UVarint(0) // size = 0 + r4 := newByteReader(w2.Bytes()) + if err := r4.SkipTaggedFields(); err != nil { + t.Fatalf("SkipTaggedFields zero-size: %v", err) + } +} + +func TestWriteTaggedFields(t *testing.T) { + w := newByteWriter(4) + w.WriteTaggedFields(0) + r := newByteReader(w.Bytes()) + count, err := r.UVarint() + if err != nil { + t.Fatal(err) + } + if count != 0 { + t.Fatalf("expected 0 count, got %d", count) + } + + w2 := newByteWriter(4) + w2.WriteTaggedFields(3) + r2 := newByteReader(w2.Bytes()) + count2, err := r2.UVarint() + if err != nil { + t.Fatal(err) + } + if count2 != 3 { + t.Fatalf("expected 3 count, got %d", count2) + } +} + +func TestNullableStringRoundTrip(t *testing.T) { + // Non-nil string + w := newByteWriter(16) + s := "hello" + w.NullableString(&s) + r := newByteReader(w.Bytes()) + got, err := r.NullableString() + if err != nil { + t.Fatal(err) + } + if got == nil || *got != "hello" { + t.Fatalf("expected 'hello', got %v", got) + } + + // Nil string + w2 := newByteWriter(4) + w2.NullableString(nil) + r2 := newByteReader(w2.Bytes()) + got2, err := r2.NullableString() + if err != nil { + t.Fatal(err) + } + if got2 != nil { + t.Fatalf("expected nil, got %v", got2) + } + + // Error path: invalid negative length (not -1) + buf := make([]byte, 2) + binary.BigEndian.PutUint16(buf, 0xFFFE) // -2 as int16 + r3 := newByteReader(buf) + _, err = r3.NullableString() + if err == nil { + t.Fatal("expected error for invalid negative length") + } +} + +func TestWriteFrameRoundTrip(t *testing.T) { + var buf bytes.Buffer + payload := []byte{0xDE, 0xAD, 0xBE, 0xEF} + if err := WriteFrame(&buf, payload); err != nil { + t.Fatalf("WriteFrame: %v", err) + } + frame, err := ReadFrame(&buf) + if err != nil { + t.Fatalf("ReadFrame: %v", err) + } + if !bytes.Equal(frame.Payload, payload) { + t.Fatalf("payload mismatch") + } +} + +func TestWriteFrameError(t *testing.T) { + // Writer that fails on first write + err := WriteFrame(&failWriter{failAt: 0}, []byte{1, 2, 3}) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "write frame size") { + t.Fatalf("unexpected error: %v", err) + } + + // Writer that fails on second write (payload) + err = WriteFrame(&failWriter{failAt: 1}, []byte{1, 2, 3}) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "write frame payload") { + t.Fatalf("unexpected error: %v", err) + } +} + +type failWriter struct { + count int + failAt int +} + +func (w *failWriter) Write(p []byte) (int, error) { + if w.count == w.failAt { + return 0, fmt.Errorf("write error") + } + w.count++ + return len(p), nil +} + +func TestUVarintRoundTrip(t *testing.T) { + for _, val := range []uint64{0, 1, 127, 128, 16383, 16384, 1<<63 - 1} { + w := newByteWriter(16) + w.UVarint(val) + r := newByteReader(w.Bytes()) + got, err := r.UVarint() + if err != nil { + t.Fatalf("UVarint(%d): %v", val, err) + } + if got != val { + t.Fatalf("expected %d, got %d", val, got) + } + } + + // Error path + _, err := newByteReader(nil).UVarint() + if err == nil { + t.Fatal("expected error for empty reader") + } +} + +func TestParseRequestHeaderError(t *testing.T) { + // Too short for APIKey + _, _, err := ParseRequestHeader([]byte{0x00}) + if err == nil { + t.Fatal("expected error for short header") + } + + // Too short for version + _, _, err = ParseRequestHeader([]byte{0x00, 0x00, 0x01}) + if err == nil { + t.Fatal("expected error for missing version") + } +} + +func TestParseRequestUnknownAPIKey(t *testing.T) { + w := newByteWriter(16) + w.Int16(9999) // unknown API key + w.Int16(0) + w.Int32(1) + w.NullableString(nil) + + _, _, err := ParseRequest(w.Bytes()) + if err == nil { + t.Fatal("expected error for unknown API key") + } + if !strings.Contains(err.Error(), "unsupported") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestByteReaderNullableStringError(t *testing.T) { + // Int16 read fails + r := newByteReader(nil) + _, err := r.NullableString() + if err == nil { + t.Fatal("expected error for empty NullableString") + } + + // Valid length but truncated data + w := newByteWriter(4) + w.Int16(10) // length 10 but no data + r = newByteReader(w.Bytes()) + _, err = r.NullableString() + if err == nil { + t.Fatal("expected error for truncated NullableString data") + } +} + +func TestByteReaderSkipTaggedFieldsErrors(t *testing.T) { + // numTags read fails + r := newByteReader(nil) + if err := r.SkipTaggedFields(); err == nil { + t.Fatal("expected error for empty reader") + } + + // numTags > 0 but tag read fails + w := newByteWriter(4) + w.UVarint(1) // 1 tagged field + r = newByteReader(w.Bytes()) + if err := r.SkipTaggedFields(); err == nil { + t.Fatal("expected error for truncated tagged fields") + } + + // tag OK but size read fails + w = newByteWriter(8) + w.UVarint(1) // 1 tagged field + w.UVarint(0) // tag = 0 + r = newByteReader(w.Bytes()) + if err := r.SkipTaggedFields(); err == nil { + t.Fatal("expected error for missing tagged field size") + } +} diff --git a/pkg/protocol/request_test.go b/pkg/protocol/request_test.go index 92b17f41..b2290a9d 100644 --- a/pkg/protocol/request_test.go +++ b/pkg/protocol/request_test.go @@ -330,3 +330,130 @@ func TestProduceMultiPartitionFranzCompat(t *testing.T) { }) } + +func TestParseJoinGroupRequest(t *testing.T) { + req := kmsg.NewPtrJoinGroupRequest() + req.Version = 1 + req.Group = "group-1" + req.SessionTimeoutMillis = 10000 + req.RebalanceTimeoutMillis = 30000 + req.MemberID = "" + req.ProtocolType = "consumer" + req.Protocols = []kmsg.JoinGroupRequestProtocol{ + {Name: "range", Metadata: []byte{0x00, 0x01}}, + } + + frame := buildRequestFrame(APIKeyJoinGroup, 1, 33, nil, req.AppendTo(nil)) + _, parsed, err := ParseRequest(frame) + if err != nil { + t.Fatalf("ParseRequest: %v", err) + } + joinReq, ok := parsed.(*kmsg.JoinGroupRequest) + if !ok { + t.Fatalf("expected *kmsg.JoinGroupRequest got %T", parsed) + } + if joinReq.Group != "group-1" || joinReq.SessionTimeoutMillis != 10000 { + t.Fatalf("unexpected join group: %#v", joinReq) + } + if joinReq.ProtocolType != "consumer" || len(joinReq.Protocols) != 1 { + t.Fatalf("unexpected protocols: %#v", joinReq) + } + if joinReq.Protocols[0].Name != "range" { + t.Fatalf("unexpected protocol name: %q", joinReq.Protocols[0].Name) + } +} + +func TestParseHeartbeatRequest(t *testing.T) { + req := kmsg.NewPtrHeartbeatRequest() + req.Version = 1 + req.Group = "group-1" + req.Generation = 5 + req.MemberID = "member-1" + + frame := buildRequestFrame(APIKeyHeartbeat, 1, 44, nil, req.AppendTo(nil)) + _, parsed, err := ParseRequest(frame) + if err != nil { + t.Fatalf("ParseRequest: %v", err) + } + heartReq, ok := parsed.(*kmsg.HeartbeatRequest) + if !ok { + t.Fatalf("expected *kmsg.HeartbeatRequest got %T", parsed) + } + if heartReq.Group != "group-1" || heartReq.Generation != 5 || heartReq.MemberID != "member-1" { + t.Fatalf("unexpected heartbeat: %#v", heartReq) + } +} + +func TestParseLeaveGroupRequest(t *testing.T) { + req := kmsg.NewPtrLeaveGroupRequest() + req.Version = 0 + req.Group = "group-1" + req.MemberID = "member-1" + + frame := buildRequestFrame(APIKeyLeaveGroup, 0, 55, nil, req.AppendTo(nil)) + _, parsed, err := ParseRequest(frame) + if err != nil { + t.Fatalf("ParseRequest: %v", err) + } + leaveReq, ok := parsed.(*kmsg.LeaveGroupRequest) + if !ok { + t.Fatalf("expected *kmsg.LeaveGroupRequest got %T", parsed) + } + if leaveReq.Group != "group-1" || leaveReq.MemberID != "member-1" { + t.Fatalf("unexpected leave group: %#v", leaveReq) + } +} + +func TestParseOffsetFetchRequest(t *testing.T) { + req := kmsg.NewPtrOffsetFetchRequest() + req.Version = 1 + req.Group = "group-1" + req.Topics = []kmsg.OffsetFetchRequestTopic{ + {Topic: "orders", Partitions: []int32{0, 1}}, + } + + frame := buildRequestFrame(APIKeyOffsetFetch, 1, 66, nil, req.AppendTo(nil)) + _, parsed, err := ParseRequest(frame) + if err != nil { + t.Fatalf("ParseRequest: %v", err) + } + fetchReq, ok := parsed.(*kmsg.OffsetFetchRequest) + if !ok { + t.Fatalf("expected *kmsg.OffsetFetchRequest got %T", parsed) + } + if fetchReq.Group != "group-1" { + t.Fatalf("unexpected group: %s", fetchReq.Group) + } + if len(fetchReq.Topics) != 1 || fetchReq.Topics[0].Topic != "orders" { + t.Fatalf("unexpected topics: %#v", fetchReq.Topics) + } + if len(fetchReq.Topics[0].Partitions) != 2 { + t.Fatalf("expected 2 partitions, got %d", len(fetchReq.Topics[0].Partitions)) + } +} + +func TestParseHeartbeatFlexible(t *testing.T) { + req := kmsg.NewPtrHeartbeatRequest() + req.Version = 4 + req.Group = "group-2" + req.Generation = 10 + req.MemberID = "member-2" + instanceID := "instance-1" + req.InstanceID = &instanceID + + frame := buildRequestFrame(APIKeyHeartbeat, 4, 77, nil, req.AppendTo(nil)) + _, parsed, err := ParseRequest(frame) + if err != nil { + t.Fatalf("ParseRequest: %v", err) + } + heartReq, ok := parsed.(*kmsg.HeartbeatRequest) + if !ok { + t.Fatalf("expected *kmsg.HeartbeatRequest got %T", parsed) + } + if heartReq.Group != "group-2" || heartReq.Generation != 10 { + t.Fatalf("unexpected heartbeat: %#v", heartReq) + } + if heartReq.InstanceID == nil || *heartReq.InstanceID != "instance-1" { + t.Fatalf("unexpected instance id: %v", heartReq.InstanceID) + } +} diff --git a/pkg/protocol/types.go b/pkg/protocol/types.go index ff530981..6f6a8093 100644 --- a/pkg/protocol/types.go +++ b/pkg/protocol/types.go @@ -21,4 +21,5 @@ type ( MetadataBroker = kmsg.MetadataResponseBroker MetadataTopic = kmsg.MetadataResponseTopic MetadataPartition = kmsg.MetadataResponseTopicPartition + ProduceRequest = kmsg.ProduceRequest ) diff --git a/pkg/storage/buffer_test.go b/pkg/storage/buffer_test.go index 4c9ef7cb..2a7a4eea 100644 --- a/pkg/storage/buffer_test.go +++ b/pkg/storage/buffer_test.go @@ -54,3 +54,54 @@ func TestWriteBufferThresholds(t *testing.T) { t.Fatalf("expected flush by time") } } + +func TestWriteBufferSize(t *testing.T) { + buf := NewWriteBuffer(WriteBufferConfig{}) + if buf.Size() != 0 { + t.Fatalf("expected initial size 0, got %d", buf.Size()) + } + buf.Append(RecordBatch{Bytes: make([]byte, 10), MessageCount: 1}) + if buf.Size() != 10 { + t.Fatalf("expected size 10, got %d", buf.Size()) + } + buf.Append(RecordBatch{Bytes: make([]byte, 5), MessageCount: 2}) + if buf.Size() != 15 { + t.Fatalf("expected size 15, got %d", buf.Size()) + } + buf.Drain() + if buf.Size() != 0 { + t.Fatalf("expected size 0 after drain, got %d", buf.Size()) + } +} + +func TestWriteBufferFlushByMessages(t *testing.T) { + buf := NewWriteBuffer(WriteBufferConfig{MaxMessages: 5}) + buf.Append(RecordBatch{Bytes: make([]byte, 1), MessageCount: 3}) + if buf.ShouldFlush(time.Now()) { + t.Fatal("3 messages should not trigger flush at threshold 5") + } + buf.Append(RecordBatch{Bytes: make([]byte, 1), MessageCount: 3}) + if !buf.ShouldFlush(time.Now()) { + t.Fatal("6 messages should trigger flush at threshold 5") + } +} + +func TestWriteBufferFlushByBatches(t *testing.T) { + buf := NewWriteBuffer(WriteBufferConfig{MaxBatches: 2}) + buf.Append(RecordBatch{Bytes: make([]byte, 1), MessageCount: 1}) + if buf.ShouldFlush(time.Now()) { + t.Fatal("1 batch should not trigger flush at threshold 2") + } + buf.Append(RecordBatch{Bytes: make([]byte, 1), MessageCount: 1}) + if !buf.ShouldFlush(time.Now()) { + t.Fatal("2 batches should trigger flush at threshold 2") + } +} + +func TestWriteBufferDrainEmpty(t *testing.T) { + buf := NewWriteBuffer(WriteBufferConfig{}) + drained := buf.Drain() + if len(drained) != 0 { + t.Fatalf("expected 0 drained batches, got %d", len(drained)) + } +} diff --git a/pkg/storage/index_test.go b/pkg/storage/index_test.go index 2e7bdc01..6790dfa6 100644 --- a/pkg/storage/index_test.go +++ b/pkg/storage/index_test.go @@ -15,7 +15,10 @@ package storage -import "testing" +import ( + "strings" + "testing" +) func TestIndexBuilder(t *testing.T) { builder := NewIndexBuilder(2) @@ -43,3 +46,76 @@ func TestIndexBuilder(t *testing.T) { t.Fatalf("parsed entries mismatch: %#v", parsed) } } + +func TestNewIndexBuilderZeroInterval(t *testing.T) { + b := NewIndexBuilder(0) + if b.interval != 1 { + t.Fatalf("expected interval 1 for zero input, got %d", b.interval) + } + b2 := NewIndexBuilder(-5) + if b2.interval != 1 { + t.Fatalf("expected interval 1 for negative input, got %d", b2.interval) + } +} + +func TestIndexBuilderEntriesCopied(t *testing.T) { + b := NewIndexBuilder(1) + b.MaybeAdd(0, 32, 1) + entries := b.Entries() + entries[0] = nil // Modify returned slice + origEntries := b.Entries() + if origEntries[0] == nil { + t.Fatal("Entries() should return a copy") + } +} + +func TestParseIndexTooSmall(t *testing.T) { + _, err := ParseIndex([]byte("short")) + if err == nil { + t.Fatal("expected error for data < 16 bytes") + } +} + +func TestParseIndexInvalidMagic(t *testing.T) { + data := make([]byte, 20) + copy(data, "BAAD") + _, err := ParseIndex(data) + if err == nil || !strings.Contains(err.Error(), "magic") { + t.Fatalf("expected invalid magic error, got: %v", err) + } +} + +func TestParseIndexBadVersion(t *testing.T) { + // Build valid magic + version=99 + data := make([]byte, 20) + copy(data, "IDX\x00") + data[4] = 0 // version high byte + data[5] = 99 // version low byte = 99 + _, err := ParseIndex(data) + if err == nil || !strings.Contains(err.Error(), "version") { + t.Fatalf("expected version error, got: %v", err) + } +} + +func TestIndexRoundTrip(t *testing.T) { + b := NewIndexBuilder(1) + for i := int64(0); i < 10; i++ { + b.MaybeAdd(i*100, int32(i*64), 1) + } + data, err := b.BuildBytes() + if err != nil { + t.Fatal(err) + } + entries, err := ParseIndex(data) + if err != nil { + t.Fatal(err) + } + if len(entries) != 10 { + t.Fatalf("expected 10 entries, got %d", len(entries)) + } + for i, e := range entries { + if e.Offset != int64(i)*100 { + t.Fatalf("entry %d: expected offset %d, got %d", i, i*100, e.Offset) + } + } +} diff --git a/pkg/storage/log_test.go b/pkg/storage/log_test.go index 9391f8cd..583e2ed1 100644 --- a/pkg/storage/log_test.go +++ b/pkg/storage/log_test.go @@ -26,6 +26,8 @@ import ( "time" "github.com/KafScale/platform/pkg/cache" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" "golang.org/x/sync/semaphore" ) @@ -252,7 +254,7 @@ func TestPartitionLogPrefetchSkippedWhenSemaphoreFull(t *testing.T) { // Now create a reader with a full semaphore and a fresh cache. sem := semaphore.NewWeighted(1) - sem.Acquire(context.Background(), 1) // exhaust the semaphore + _ = sem.Acquire(context.Background(), 1) // exhaust the semaphore c := cache.NewSegmentCache(1 << 20) reader := NewPartitionLog("default", "orders", 0, 0, s3mem, c, PartitionLogConfig{ Buffer: WriteBufferConfig{ @@ -270,7 +272,7 @@ func TestPartitionLogPrefetchSkippedWhenSemaphoreFull(t *testing.T) { if _, err := reader.RestoreFromS3(context.Background()); err != nil { t.Fatalf("RestoreFromS3: %v", err) } - sem.Acquire(context.Background(), 1) // re-exhaust + _ = sem.Acquire(context.Background(), 1) // re-exhaust // Trigger prefetch — should be skipped because TryAcquire fails. reader.startPrefetch(context.Background(), 0) @@ -592,6 +594,262 @@ func (t *transientIndexErrorS3) DownloadIndex(ctx context.Context, key string) ( return nil, fmt.Errorf("connection reset by peer") } +func TestPartitionLogReadNoCacheNoIndex(t *testing.T) { + // Read without cache forces the sliceFullSegmentData path + s3mem := NewMemoryS3Client() + log := NewPartitionLog("default", "orders", 0, 0, s3mem, nil, PartitionLogConfig{ + Buffer: WriteBufferConfig{ + MaxBytes: 1, + FlushInterval: time.Millisecond, + }, + Segment: SegmentWriterConfig{ + IndexIntervalMessages: 1000, // large interval → no index entries for range reads + }, + CacheEnabled: false, + }, nil, nil, nil) + + batchData := make([]byte, 70) + batch, _ := NewRecordBatchFromBytes(batchData) + if _, err := log.AppendBatch(context.Background(), batch); err != nil { + t.Fatalf("AppendBatch: %v", err) + } + if err := log.Flush(context.Background()); err != nil { + t.Fatalf("Flush: %v", err) + } + + data, err := log.Read(context.Background(), 0, 0) + if err != nil { + t.Fatalf("Read: %v", err) + } + if len(data) == 0 { + t.Fatal("expected non-empty data") + } +} + +func TestPartitionLogReadMaxBytes(t *testing.T) { + s3mem := NewMemoryS3Client() + log := NewPartitionLog("default", "orders", 0, 0, s3mem, nil, PartitionLogConfig{ + Buffer: WriteBufferConfig{ + MaxBytes: 1, + FlushInterval: time.Millisecond, + }, + Segment: SegmentWriterConfig{ + IndexIntervalMessages: 1, + }, + CacheEnabled: false, + }, nil, nil, nil) + + batch1, _ := NewRecordBatchFromBytes(makeBatchBytes(0, 0, 1, 0x11)) + batch2, _ := NewRecordBatchFromBytes(makeBatchBytes(1, 0, 1, 0x22)) + if _, err := log.AppendBatch(context.Background(), batch1); err != nil { + t.Fatal(err) + } + if _, err := log.AppendBatch(context.Background(), batch2); err != nil { + t.Fatal(err) + } + if err := log.Flush(context.Background()); err != nil { + t.Fatal(err) + } + + // Read with maxBytes = 10 (should truncate) + data, err := log.Read(context.Background(), 0, 10) + if err != nil { + t.Fatalf("Read: %v", err) + } + if len(data) > 10 { + t.Fatalf("expected max 10 bytes, got %d", len(data)) + } +} + +func TestPartitionLogReadOffsetOutOfRange(t *testing.T) { + s3mem := NewMemoryS3Client() + log := NewPartitionLog("default", "orders", 0, 0, s3mem, nil, PartitionLogConfig{ + Buffer: WriteBufferConfig{MaxBytes: 1}, + Segment: SegmentWriterConfig{IndexIntervalMessages: 1}, + }, nil, nil, nil) + + batch, _ := NewRecordBatchFromBytes(makeBatchBytes(0, 0, 1, 0x11)) + if _, err := log.AppendBatch(context.Background(), batch); err != nil { + t.Fatal(err) + } + if err := log.Flush(context.Background()); err != nil { + t.Fatal(err) + } + + // Read at offset 999 (beyond last segment) + _, err := log.Read(context.Background(), 999, 0) + if !errors.Is(err, ErrOffsetOutOfRange) { + t.Fatalf("expected ErrOffsetOutOfRange, got: %v", err) + } +} + +func TestPartitionLogRestoreFromS3Empty(t *testing.T) { + s3mem := NewMemoryS3Client() + log := NewPartitionLog("default", "empty", 0, 0, s3mem, nil, PartitionLogConfig{ + Buffer: WriteBufferConfig{MaxBytes: 1}, + Segment: SegmentWriterConfig{IndexIntervalMessages: 1}, + }, nil, nil, nil) + + lastOffset, err := log.RestoreFromS3(context.Background()) + if err != nil { + t.Fatal(err) + } + if lastOffset != -1 { + t.Fatalf("expected -1 for empty S3, got %d", lastOffset) + } +} + +func TestPartitionLogEarliestOffsetNoSegments(t *testing.T) { + s3mem := NewMemoryS3Client() + log := NewPartitionLog("default", "empty", 0, 0, s3mem, nil, PartitionLogConfig{ + Buffer: WriteBufferConfig{MaxBytes: 1}, + Segment: SegmentWriterConfig{IndexIntervalMessages: 1}, + }, nil, nil, nil) + // With no segments, EarliestOffset returns the configured start offset (0) + earliest := log.EarliestOffset() + if earliest != 0 { + t.Fatalf("expected 0 for no segments, got %d", earliest) + } +} + +func TestFindIndexEntry(t *testing.T) { + // Empty entries + e := findIndexEntry(nil, 0) + if e.Offset != 0 || e.Position != 0 { + t.Fatal("empty entries should return zero entry") + } + + entries := []*IndexEntry{ + {Offset: 0, Position: 32}, + {Offset: 10, Position: 100}, + {Offset: 20, Position: 200}, + {Offset: 30, Position: 300}, + } + + // Exact match + e = findIndexEntry(entries, 10) + if e.Offset != 10 { + t.Fatalf("expected offset 10, got %d", e.Offset) + } + + // Before first + e = findIndexEntry(entries, -5) + if e.Offset != 0 { + t.Fatalf("expected offset 0 for before-first, got %d", e.Offset) + } + + // After last + e = findIndexEntry(entries, 100) + if e.Offset != 30 { + t.Fatalf("expected offset 30 for after-last, got %d", e.Offset) + } + + // Between entries (should return floor entry) + e = findIndexEntry(entries, 15) + if e.Offset != 10 { + t.Fatalf("expected floor offset 10 for offset 15, got %d", e.Offset) + } + + e = findIndexEntry(entries, 25) + if e.Offset != 20 { + t.Fatalf("expected floor offset 20 for offset 25, got %d", e.Offset) + } +} + +func TestSliceFullSegmentData(t *testing.T) { + // Build data with header (32 bytes) + body + footer (16 bytes) + header := make([]byte, 32) + body := []byte("BODY_DATA_HERE!") + footer := make([]byte, segmentFooterLen) + copy(footer, "END!") + data := append(header, body...) + data = append(data, footer...) + + result := sliceFullSegmentData(data, 0) + if string(result) != "BODY_DATA_HERE!" { + t.Fatalf("expected body data, got %q", result) + } + + // With maxBytes + result = sliceFullSegmentData(data, 4) + if string(result) != "BODY" { + t.Fatalf("expected 'BODY', got %q", result) + } + + // Very short data (< header) + short := make([]byte, 10) + result = sliceFullSegmentData(short, 0) + if len(result) != 0 { + t.Fatalf("expected empty for short data, got %d bytes", len(result)) + } +} + +func TestMemoryS3Client_DownloadSegmentInvalidRange(t *testing.T) { + m := NewMemoryS3Client() + _ = m.UploadSegment(context.Background(), "key", []byte("data")) + + // Start beyond data length + _, err := m.DownloadSegment(context.Background(), "key", &ByteRange{Start: 100, End: 200}) + if err == nil { + t.Fatal("expected error for invalid range") + } +} + +func TestAWSS3ListSegments(t *testing.T) { + api := &fakeS3WithList{ + fakeS3: fakeS3{}, + objects: []s3ListObject{ + {key: "topic/0/seg-0", size: 100}, + {key: "topic/0/seg-1", size: 200}, + }, + } + client := newAWSClientWithAPI("bucket", "us-east-1", "", api) + + objs, err := client.ListSegments(context.Background(), "topic/0/") + if err != nil { + t.Fatal(err) + } + if len(objs) != 2 { + t.Fatalf("expected 2 objects, got %d", len(objs)) + } + // Verify size tracking + totalSize := int64(0) + for _, o := range objs { + totalSize += o.Size + } + if totalSize != 300 { + t.Fatalf("expected total size 300, got %d", totalSize) + } +} + +type s3ListObject struct { + key string + size int64 +} + +type fakeS3WithList struct { + fakeS3 + objects []s3ListObject +} + +func (f *fakeS3WithList) ListObjectsV2(ctx context.Context, params *s3.ListObjectsV2Input, optFns ...func(*s3.Options)) (*s3.ListObjectsV2Output, error) { + if f.listErr != nil { + return nil, f.listErr + } + var contents []types.Object + for _, o := range f.objects { + key := o.key + size := o.size + contents = append(contents, types.Object{ + Key: &key, + Size: &size, + }) + } + return &s3.ListObjectsV2Output{ + Contents: contents, + }, nil +} + func makeBatchBytes(baseOffset int64, lastOffsetDelta int32, messageCount int32, marker byte) []byte { const size = 70 data := make([]byte, size) diff --git a/pkg/storage/s3_aws.go b/pkg/storage/s3_aws.go index 1ccd47a0..daf1de4d 100644 --- a/pkg/storage/s3_aws.go +++ b/pkg/storage/s3_aws.go @@ -62,26 +62,15 @@ func NewS3Client(ctx context.Context, cfg S3Config) (S3Client, error) { if cfg.AccessKeyID != "" && cfg.SecretAccessKey != "" { loadOpts = append(loadOpts, config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(cfg.AccessKeyID, cfg.SecretAccessKey, cfg.SessionToken))) } - if cfg.Endpoint != "" { - customResolver := aws.EndpointResolverWithOptionsFunc(func(service, region string, options ...interface{}) (aws.Endpoint, error) { - if service == s3.ServiceID { - return aws.Endpoint{ - URL: cfg.Endpoint, - PartitionID: "aws", - SigningRegion: cfg.Region, - }, nil - } - return aws.Endpoint{}, &aws.EndpointNotFoundError{} - }) - loadOpts = append(loadOpts, config.WithEndpointResolverWithOptions(customResolver)) - } - awsCfg, err := config.LoadDefaultConfig(ctx, loadOpts...) if err != nil { return nil, fmt.Errorf("load aws config: %w", err) } client := s3.NewFromConfig(awsCfg, func(o *s3.Options) { + if cfg.Endpoint != "" { + o.BaseEndpoint = aws.String(cfg.Endpoint) + } o.UsePathStyle = cfg.ForcePathStyle if cfg.MaxConnections > 0 { o.HTTPClient = awshttp.NewBuildableClient().WithTransportOptions(func(t *http.Transport) { @@ -228,7 +217,7 @@ func (c *awsS3Client) DownloadSegment(ctx context.Context, key string, rng *Byte if err != nil { return nil, fmt.Errorf("get object %s: %w", key, err) } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() data, err := io.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("read body %s: %w", key, err) @@ -248,7 +237,7 @@ func (c *awsS3Client) DownloadIndex(ctx context.Context, key string) ([]byte, er } return nil, fmt.Errorf("get object %s: %w", key, err) } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() data, err := io.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("read body %s: %w", key, err) diff --git a/pkg/storage/s3client_test.go b/pkg/storage/s3client_test.go index 7e26955d..56ca4169 100644 --- a/pkg/storage/s3client_test.go +++ b/pkg/storage/s3client_test.go @@ -18,11 +18,13 @@ package storage import ( "bytes" "context" + "errors" "io" "testing" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/smithy-go" ) type fakeS3 struct { @@ -102,3 +104,263 @@ func TestAWSS3Client_Download(t *testing.T) { t.Fatalf("bucket mismatch: %s", aws.ToString(api.getInput.Bucket)) } } + +func TestAWSS3Client_DownloadNoRange(t *testing.T) { + api := &fakeS3{getData: []byte("fulldata")} + client := newAWSClientWithAPI("test-bucket", "us-east-1", "", api) + + data, err := client.DownloadSegment(context.Background(), "key", nil) + if err != nil { + t.Fatalf("DownloadSegment: %v", err) + } + if string(data) != "fulldata" { + t.Fatalf("unexpected data: %s", data) + } + if api.getInput.Range != nil { + t.Fatal("expected nil range header") + } +} + +func TestAWSS3Client_DownloadError(t *testing.T) { + api := &fakeS3{getErr: errors.New("access denied")} + client := newAWSClientWithAPI("test-bucket", "us-east-1", "", api) + + _, err := client.DownloadSegment(context.Background(), "key", nil) + if err == nil { + t.Fatal("expected error") + } +} + +func TestAWSS3Client_DownloadIndex(t *testing.T) { + api := &fakeS3{getData: []byte("index-data")} + client := newAWSClientWithAPI("test-bucket", "us-east-1", "", api) + + data, err := client.DownloadIndex(context.Background(), "topic/index") + if err != nil { + t.Fatalf("DownloadIndex: %v", err) + } + if string(data) != "index-data" { + t.Fatalf("unexpected data: %s", data) + } +} + +func TestAWSS3Client_DownloadIndexNotFound(t *testing.T) { + api := &fakeS3{getErr: &fakeAPIError{code: "NoSuchKey"}} + client := newAWSClientWithAPI("test-bucket", "us-east-1", "", api) + + _, err := client.DownloadIndex(context.Background(), "missing") + if err == nil { + t.Fatal("expected error") + } + if !errors.Is(err, ErrNotFound) { + t.Fatalf("expected ErrNotFound, got: %v", err) + } +} + +func TestAWSS3Client_UploadIndex(t *testing.T) { + api := &fakeS3{} + client := newAWSClientWithAPI("test-bucket", "us-east-1", "", api) + + err := client.UploadIndex(context.Background(), "index/key", []byte("idx")) + if err != nil { + t.Fatalf("UploadIndex: %v", err) + } + if len(api.putInputs) != 1 { + t.Fatalf("expected 1 put, got %d", len(api.putInputs)) + } +} + +func TestAWSS3Client_UploadNoKMS(t *testing.T) { + api := &fakeS3{} + client := newAWSClientWithAPI("test-bucket", "us-east-1", "", api) + + err := client.UploadSegment(context.Background(), "key", []byte("data")) + if err != nil { + t.Fatalf("UploadSegment: %v", err) + } + if api.putInputs[0].SSEKMSKeyId != nil { + t.Fatal("expected no KMS key when empty") + } +} + +func TestAWSS3Client_EnsureBucket(t *testing.T) { + api := &fakeS3{} + client := newAWSClientWithAPI("test-bucket", "us-east-1", "", api) + + err := client.EnsureBucket(context.Background()) + if err != nil { + t.Fatalf("EnsureBucket: %v", err) + } +} + +func TestAWSS3Client_EnsureBucketAlreadyExists(t *testing.T) { + api := &fakeS3{ + headErr: &fakeAPIError{code: "NotFound"}, + createErr: &fakeAPIError{code: "BucketAlreadyOwnedByYou"}, + } + client := newAWSClientWithAPI("test-bucket", "us-east-1", "", api) + + err := client.EnsureBucket(context.Background()) + if err != nil { + t.Fatalf("EnsureBucket: %v", err) + } +} + +func TestBucketLocationConfig(t *testing.T) { + c := &awsS3Client{region: "us-east-1"} + if c.bucketLocationConfig() != nil { + t.Fatal("us-east-1 should return nil config") + } + c2 := &awsS3Client{region: ""} + if c2.bucketLocationConfig() != nil { + t.Fatal("empty region should return nil config") + } + c3 := &awsS3Client{region: "eu-west-1"} + cfg := c3.bucketLocationConfig() + if cfg == nil { + t.Fatal("non-us-east-1 should return config") + } +} + +func TestIsNotFoundErr(t *testing.T) { + if isNotFoundErr(nil) { + t.Fatal("nil should not be not-found") + } + if !isNotFoundErr(&fakeAPIError{code: "NoSuchKey"}) { + t.Fatal("NoSuchKey should be not-found") + } + if !isNotFoundErr(&fakeAPIError{code: "NotFound"}) { + t.Fatal("NotFound should be not-found") + } + if isNotFoundErr(errors.New("random error")) { + t.Fatal("random error should not be not-found") + } +} + +func TestIsBucketMissingErr(t *testing.T) { + if isBucketMissingErr(nil) { + t.Fatal("nil should not be bucket-missing") + } + if !isBucketMissingErr(&fakeAPIError{code: "NoSuchBucket"}) { + t.Fatal("NoSuchBucket should be bucket-missing") + } + if !isBucketMissingErr(&fakeAPIError{code: "NotFound"}) { + t.Fatal("NotFound should be bucket-missing") + } + if isBucketMissingErr(errors.New("random error")) { + t.Fatal("random error should not be bucket-missing") + } +} + +func TestByteRangeHeaderValue(t *testing.T) { + br := &ByteRange{Start: 10, End: 20} + val := br.headerValue() + if val == nil || *val != "bytes=10-20" { + t.Fatalf("expected bytes=10-20, got %v", val) + } + + var nilBR *ByteRange + if nilBR.headerValue() != nil { + t.Fatal("nil ByteRange should return nil header") + } +} + +func TestNewS3ClientValidation(t *testing.T) { + _, err := NewS3Client(context.Background(), S3Config{Bucket: "", Region: "us-east-1"}) + if err == nil { + t.Fatal("expected error for empty bucket") + } + _, err = NewS3Client(context.Background(), S3Config{Bucket: "b", Region: ""}) + if err == nil { + t.Fatal("expected error for empty region") + } +} + +// fakeAPIError implements smithy.APIError +type fakeAPIError struct { + code string +} + +func (e *fakeAPIError) Error() string { return e.code } +func (e *fakeAPIError) ErrorCode() string { return e.code } +func (e *fakeAPIError) ErrorMessage() string { return e.code } +func (e *fakeAPIError) ErrorFault() smithy.ErrorFault { return smithy.FaultUnknown } + +func TestMemoryS3Client_EnsureBucket(t *testing.T) { + m := NewMemoryS3Client() + err := m.EnsureBucket(context.Background()) + if err != nil { + t.Fatalf("EnsureBucket: %v", err) + } +} + +func TestMemoryS3Client_UploadAndDownload(t *testing.T) { + m := NewMemoryS3Client() + _ = m.UploadIndex(context.Background(), "idx/key", []byte("index")) + + data, err := m.DownloadIndex(context.Background(), "idx/key") + if err != nil { + t.Fatalf("DownloadIndex: %v", err) + } + if string(data) != "index" { + t.Fatalf("unexpected index data: %s", data) + } + + _, err = m.DownloadIndex(context.Background(), "missing") + if err == nil { + t.Fatal("expected error for missing index") + } +} + +func TestMemoryS3Client_DownloadSegmentRange(t *testing.T) { + m := NewMemoryS3Client() + _ = m.UploadSegment(context.Background(), "seg", []byte("0123456789")) + + // Full download + data, err := m.DownloadSegment(context.Background(), "seg", nil) + if err != nil { + t.Fatal(err) + } + if string(data) != "0123456789" { + t.Fatalf("expected full data, got %s", data) + } + + // Range download + data, err = m.DownloadSegment(context.Background(), "seg", &ByteRange{Start: 2, End: 5}) + if err != nil { + t.Fatal(err) + } + if string(data) != "2345" { + t.Fatalf("expected '2345', got '%s'", data) + } + + // Not found + _, err = m.DownloadSegment(context.Background(), "missing", nil) + if err == nil { + t.Fatal("expected error for missing segment") + } +} + +func TestMemoryS3Client_ListSegments(t *testing.T) { + m := NewMemoryS3Client() + _ = m.UploadSegment(context.Background(), "topic/0/seg-0", []byte("a")) + _ = m.UploadSegment(context.Background(), "topic/0/seg-1", []byte("bb")) + _ = m.UploadSegment(context.Background(), "topic/1/seg-0", []byte("ccc")) + + objs, err := m.ListSegments(context.Background(), "topic/0/") + if err != nil { + t.Fatal(err) + } + if len(objs) != 2 { + t.Fatalf("expected 2 objects, got %d", len(objs)) + } + + // Non-matching prefix + objs, err = m.ListSegments(context.Background(), "other/") + if err != nil { + t.Fatal(err) + } + if len(objs) != 0 { + t.Fatalf("expected 0 objects, got %d", len(objs)) + } +} diff --git a/pkg/storage/segment.go b/pkg/storage/segment.go index 829c0585..01d500e2 100644 --- a/pkg/storage/segment.go +++ b/pkg/storage/segment.go @@ -87,19 +87,19 @@ func BuildSegment(cfg SegmentWriterConfig, batches []RecordBatch, created time.T func buildHeader(baseOffset int64, messageCount int32, created time.Time) []byte { buf := bytes.NewBuffer(make([]byte, 0, 32)) buf.WriteString(segmentMagic) - binary.Write(buf, binary.BigEndian, uint16(1)) // version - binary.Write(buf, binary.BigEndian, uint16(0)) // flags - binary.Write(buf, binary.BigEndian, baseOffset) // base offset - binary.Write(buf, binary.BigEndian, messageCount) - binary.Write(buf, binary.BigEndian, created.UnixMilli()) - binary.Write(buf, binary.BigEndian, uint32(0)) // reserved + _ = binary.Write(buf, binary.BigEndian, uint16(1)) // version + _ = binary.Write(buf, binary.BigEndian, uint16(0)) // flags + _ = binary.Write(buf, binary.BigEndian, baseOffset) // base offset + _ = binary.Write(buf, binary.BigEndian, messageCount) + _ = binary.Write(buf, binary.BigEndian, created.UnixMilli()) + _ = binary.Write(buf, binary.BigEndian, uint32(0)) // reserved return buf.Bytes() } func buildFooter(crc uint32, lastOffset int64) []byte { buf := bytes.NewBuffer(make([]byte, 0, 16)) - binary.Write(buf, binary.BigEndian, crc) - binary.Write(buf, binary.BigEndian, lastOffset) + _ = binary.Write(buf, binary.BigEndian, crc) + _ = binary.Write(buf, binary.BigEndian, lastOffset) buf.WriteString(footerMagic) return buf.Bytes() } diff --git a/test/e2e/README.md b/test/e2e/README.md index 4f1578bc..17d2b91e 100644 --- a/test/e2e/README.md +++ b/test/e2e/README.md @@ -25,24 +25,122 @@ These tests spin up a full cluster (via [kind](https://kind.sigs.k8s.io)), insta 2. `kind`, `kubectl`, and `helm` binaries on your `$PATH` 3. Internet access to pull the Bitnami `etcd` chart (the harness installs a single-node etcd for the operator) +## Test Categories and Dependencies + +Tests have different dependency requirements. Tests will **skip gracefully** if their dependencies aren't available. + +| Category | MinIO | Docker | Kind | Make Target | +|----------|-------|--------|------|-------------| +| Console tests | No | No | No | `go test -run Console` | +| Consumer group tests | No | No | No | `make test-consumer-group` | +| Ops API tests | No | No | No | `make test-ops-api` | +| MCP tests | No | No | No | `make test-mcp` | +| LFS proxy tests | No (fake S3) | No | No | `make test-lfs-proxy-broker` | +| Produce/consume tests | **Yes** | Yes | No | `make test-produce-consume` | +| Multi-segment durability | **Yes** | No | No | `make test-multi-segment-durability` | +| Kind cluster tests | No | Yes | Yes | Requires `KAFSCALE_E2E_KIND=1` | + +### MinIO Dependency + +Tests that require MinIO (produce/consume, durability) will automatically skip if MinIO isn't available: + +``` +=== RUN TestFranzGoProduceConsume + franz_test.go:42: MinIO not available at http://127.0.0.1:9000; run 'make ensure-minio' first or use 'make test-produce-consume' +--- SKIP: TestFranzGoProduceConsume (0.00s) +``` + +To run MinIO-dependent tests: + +```bash +# Option 1: Use make targets (automatically starts MinIO) +make test-produce-consume + +# Option 2: Start MinIO manually, then run tests +make ensure-minio +KAFSCALE_E2E=1 go test -tags=e2e ./test/e2e -run TestFranzGoProduceConsume -v +``` + ## Running ```bash KAFSCALE_E2E=1 go test -tags=e2e ./test/e2e -v ``` +## LFS Go SDK (Kind Cluster) + +This test validates the Go SDK against a running Kind cluster (LFS proxy + Kafka + MinIO). + +Required environment: +- `KAFSCALE_E2E=1` +- `KAFSCALE_E2E_KIND=1` +- `KAFSCALE_E2E_BROKER_ADDR` (host:port for broker) +- `LFS_PROXY_HTTP_URL` (full URL), or `LFS_PROXY_SERVICE_HOST` + `LFS_PROXY_HTTP_PORT` + `LFS_PROXY_HTTP_PATH` +- `KAFSCALE_LFS_PROXY_S3_BUCKET` +- `KAFSCALE_LFS_PROXY_S3_REGION` +- `KAFSCALE_LFS_PROXY_S3_ENDPOINT` +- `KAFSCALE_LFS_PROXY_S3_ACCESS_KEY` +- `KAFSCALE_LFS_PROXY_S3_SECRET_KEY` +- `KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE` (optional) + +Run: +```bash +KAFSCALE_E2E=1 KAFSCALE_E2E_KIND=1 \ +KAFSCALE_E2E_BROKER_ADDR=127.0.0.1:9092 \ +LFS_PROXY_HTTP_URL=http://127.0.0.1:8080 \ +KAFSCALE_LFS_PROXY_S3_BUCKET=kafscale \ +KAFSCALE_LFS_PROXY_S3_REGION=us-east-1 \ +KAFSCALE_LFS_PROXY_S3_ENDPOINT=http://127.0.0.1:9000 \ +KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=minioadmin \ +KAFSCALE_LFS_PROXY_S3_SECRET_KEY=minioadmin \ +go test -tags=e2e ./test/e2e -run TestLfsSDKKindE2E -v +``` + +**Note:** Running all tests with `go test` will skip tests whose dependencies aren't available. For complete test coverage, use `make test-full`. + For local developer workflows, prefer the Makefile targets: ```bash make test-consumer-group # embedded etcd + in-memory S3 make test-ops-api # embedded etcd + in-memory S3 +make test-mcp # MCP server tests +make test-lfs-proxy-broker # LFS proxy with fake S3 make test-multi-segment-durability # embedded etcd + MinIO make test-produce-consume # MinIO-backed produce/consume suite make test-full # unit tests + local e2e suites ``` -Optional environment variables: +### Kind LFS SDK Helper Makefile + +The Kind-based SDK test uses `lfs-client-sdk/Makefile` to orchestrate: +- `lfs-demo-up`: start the LFS demo stack on Kind (keeps it running) +- `pf-start`: port-forward broker, LFS proxy HTTP, and MinIO +- `test-lfs-sdk-kind`: run the Go SDK E2E test + +Run all: +```bash +make -C lfs-client-sdk run-all +``` + +Stop port-forwards: +```bash +make -C lfs-client-sdk pf-stop +``` + +If you already ran `make lfs-demo`, ensure port-forwards are up before running the SDK test: +```bash +make -C lfs-client-sdk pf-start +make -C lfs-client-sdk test-lfs-sdk-kind +``` + +## Optional Environment Variables -- `KAFSCALE_KIND_CLUSTER`: reuse an existing kind cluster without creating/deleting one. +| Variable | Description | +|----------|-------------| +| `KAFSCALE_KIND_CLUSTER` | Reuse an existing kind cluster without creating/deleting one | +| `KAFSCALE_S3_ENDPOINT` | MinIO endpoint (default: `http://127.0.0.1:9000`) | +| `KAFSCALE_E2E_DEBUG` | Enable verbose logging | +| `KAFSCALE_TRACE_KAFKA` | Enable Kafka protocol tracing | +| `KAFSCALE_E2E_OPEN_UI` | Open console UI in browser after test | The harness installs everything into the `kafscale-e2e` namespace and removes it after the test (unless you reused a cluster). diff --git a/test/e2e/franz_test.go b/test/e2e/franz_test.go index d71df91d..3a837c55 100644 --- a/test/e2e/franz_test.go +++ b/test/e2e/franz_test.go @@ -39,6 +39,7 @@ func TestFranzGoProduceConsume(t *testing.T) { if os.Getenv(enableEnv) != "1" { t.Skipf("set %s=1 to run integration harness", enableEnv) } + requireMinIO(t) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) defer cancel() diff --git a/test/e2e/kafka_cli_test.go b/test/e2e/kafka_cli_test.go index 39e42cb3..b08cd586 100644 --- a/test/e2e/kafka_cli_test.go +++ b/test/e2e/kafka_cli_test.go @@ -50,6 +50,7 @@ func newKafkaCliHarness(t *testing.T) *kafkaCliHarness { if os.Getenv(enableEnv) != "1" { t.Skipf("set %s=1 to run integration harness", enableEnv) } + requireMinIO(t) requireBinaries(t, "docker") @@ -233,6 +234,7 @@ func TestKafkaCliAdminTopics(t *testing.T) { if os.Getenv(enableEnv) != "1" { t.Skipf("set %s=1 to run integration harness", enableEnv) } + requireMinIO(t) requireBinaries(t, "docker") diff --git a/test/e2e/lfs_iceberg_processor_test.go b/test/e2e/lfs_iceberg_processor_test.go new file mode 100644 index 00000000..608869d4 --- /dev/null +++ b/test/e2e/lfs_iceberg_processor_test.go @@ -0,0 +1,252 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build e2e + +package e2e + +import ( + "bytes" + "context" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/twmb/franz-go/pkg/kgo" +) + +func TestLfsIcebergProcessorE2E(t *testing.T) { + const enableEnv = "KAFSCALE_E2E" + if os.Getenv(enableEnv) != "1" { + t.Skipf("set %s=1 to run integration harness", enableEnv) + } + + required := []string{ + "KAFSCALE_E2E_S3_ENDPOINT", + "KAFSCALE_E2E_S3_BUCKET", + "KAFSCALE_E2E_S3_REGION", + "KAFSCALE_E2E_S3_ACCESS_KEY", + "KAFSCALE_E2E_S3_SECRET_KEY", + "ICEBERG_PROCESSOR_CATALOG_URI", + "ICEBERG_PROCESSOR_WAREHOUSE", + } + for _, key := range required { + if os.Getenv(key) == "" { + t.Skipf("%s not set", key) + } + } + + ctx, cancel := context.WithTimeout(context.Background(), 4*time.Minute) + t.Cleanup(cancel) + + etcd, endpoints := startEmbeddedEtcd(t) + t.Cleanup(func() { etcd.Close() }) + + brokerAddr := freeAddr(t) + metricsAddr := freeAddr(t) + controlAddr := freeAddr(t) + + brokerCmd, brokerLogs := startBrokerWithEtcdS3ForIceberg(t, ctx, brokerAddr, metricsAddr, controlAddr, endpoints) + t.Cleanup(func() { stopBroker(t, brokerCmd) }) + waitForBroker(t, brokerLogs, brokerAddr) + + proxyPort := pickFreePort(t) + healthPort := pickFreePort(t) + proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) + configureProcessGroup(proxyCmd) + proxyCmd.Env = append(os.Environ(), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), + "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(endpoints, ",")), + fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_BUCKET=%s", os.Getenv("KAFSCALE_E2E_LFS_BUCKET")), + fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_REGION=%s", os.Getenv("KAFSCALE_E2E_S3_REGION")), + fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", os.Getenv("KAFSCALE_E2E_S3_ENDPOINT")), + fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=%s", os.Getenv("KAFSCALE_E2E_S3_ACCESS_KEY")), + fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_SECRET_KEY=%s", os.Getenv("KAFSCALE_E2E_S3_SECRET_KEY")), + "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", + "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", + ) + if os.Getenv("KAFSCALE_E2E_LFS_BUCKET") == "" { + proxyCmd.Env = append(proxyCmd.Env, fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_BUCKET=%s", os.Getenv("KAFSCALE_E2E_S3_BUCKET"))) + } + var proxyLogs bytes.Buffer + proxyCmd.Stdout = io.MultiWriter(&proxyLogs, mustLogFile(t, "lfs-iceberg-proxy.log")) + proxyCmd.Stderr = proxyCmd.Stdout + if err := proxyCmd.Start(); err != nil { + t.Fatalf("start lfs-proxy: %v", err) + } + t.Cleanup(func() { _ = signalProcessGroup(proxyCmd, os.Interrupt) }) + waitForPortWithTimeout(t, fmt.Sprintf("127.0.0.1:%s", proxyPort), 10*time.Second) + + configPath := writeIcebergProcessorConfig(t, brokerAddr, endpoints) + processorCmd := exec.CommandContext(ctx, "go", "run", "./cmd/processor", "-config", configPath) + processorCmd.Dir = filepath.Join(repoRoot(t), "addons", "processors", "iceberg-processor") + configureProcessGroup(processorCmd) + var processorLogs bytes.Buffer + processorCmd.Stdout = io.MultiWriter(&processorLogs, mustLogFile(t, "lfs-iceberg-processor.log")) + processorCmd.Stderr = processorCmd.Stdout + if err := processorCmd.Start(); err != nil { + t.Fatalf("start iceberg-processor: %v", err) + } + t.Cleanup(func() { _ = signalProcessGroup(processorCmd, os.Interrupt) }) + + producer, err := kgo.NewClient( + kgo.SeedBrokers("127.0.0.1:"+proxyPort), + kgo.AllowAutoTopicCreation(), + kgo.DisableIdempotentWrite(), + ) + if err != nil { + t.Fatalf("create producer: %v", err) + } + defer producer.Close() + + topic := "lfs-iceberg-topic" + record := &kgo.Record{ + Topic: topic, + Key: []byte("k1"), + Value: []byte("hello world"), + Headers: []kgo.RecordHeader{{Key: "LFS_BLOB", Value: []byte("1")}}, + } + if res := producer.ProduceSync(ctx, record); res.FirstErr() != nil { + t.Fatalf("produce: %v", res.FirstErr()) + } + + waitForLog(t, &processorLogs, "sink write failed", 30*time.Second) + if strings.Contains(processorLogs.String(), "sink write failed") { + t.Fatalf("processor reported sink write failure") + } +} + +func startBrokerWithEtcdS3ForIceberg(t *testing.T, ctx context.Context, brokerAddr, metricsAddr, controlAddr string, endpoints []string) (*exec.Cmd, *bytes.Buffer) { + t.Helper() + brokerCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "broker")) + configureProcessGroup(brokerCmd) + brokerCmd.Env = append(os.Environ(), + "KAFSCALE_AUTO_CREATE_TOPICS=true", + "KAFSCALE_AUTO_CREATE_PARTITIONS=1", + fmt.Sprintf("KAFSCALE_BROKER_ADDR=%s", brokerAddr), + fmt.Sprintf("KAFSCALE_METRICS_ADDR=%s", metricsAddr), + fmt.Sprintf("KAFSCALE_CONTROL_ADDR=%s", controlAddr), + fmt.Sprintf("KAFSCALE_ETCD_ENDPOINTS=%s", strings.Join(endpoints, ",")), + fmt.Sprintf("KAFSCALE_S3_BUCKET=%s", os.Getenv("KAFSCALE_E2E_S3_BUCKET")), + fmt.Sprintf("KAFSCALE_S3_REGION=%s", os.Getenv("KAFSCALE_E2E_S3_REGION")), + fmt.Sprintf("KAFSCALE_S3_ENDPOINT=%s", os.Getenv("KAFSCALE_E2E_S3_ENDPOINT")), + fmt.Sprintf("KAFSCALE_S3_ACCESS_KEY=%s", os.Getenv("KAFSCALE_E2E_S3_ACCESS_KEY")), + fmt.Sprintf("KAFSCALE_S3_SECRET_KEY=%s", os.Getenv("KAFSCALE_E2E_S3_SECRET_KEY")), + "KAFSCALE_S3_PATH_STYLE=true", + ) + var brokerLogs bytes.Buffer + logWriter := io.MultiWriter(&brokerLogs, mustLogFile(t, "broker-lfs-iceberg.log")) + brokerCmd.Stdout = logWriter + brokerCmd.Stderr = logWriter + if err := brokerCmd.Start(); err != nil { + t.Fatalf("start broker: %v", err) + } + return brokerCmd, &brokerLogs +} + +func writeIcebergProcessorConfig(t *testing.T, brokerAddr string, endpoints []string) string { + t.Helper() + config := fmt.Sprintf(`s3: + bucket: %s + namespace: default + region: %s + endpoint: %s + path_style: true +iceberg: + catalog: + type: %s + uri: %s + token: "%s" + warehouse: %s +offsets: + backend: etcd + lease_ttl_seconds: 30 + key_prefix: processors +discovery: + mode: auto +etcd: + endpoints: + - %s +schema: + mode: "off" +mappings: + - topic: lfs-iceberg-topic + table: default.lfs_iceberg_topic + mode: append + create_table_if_missing: true + lfs: + mode: resolve + max_inline_size: 1048576 + store_metadata: true + validate_checksum: true + resolve_concurrency: 2 +`, + os.Getenv("KAFSCALE_E2E_S3_BUCKET"), + os.Getenv("KAFSCALE_E2E_S3_REGION"), + os.Getenv("KAFSCALE_E2E_S3_ENDPOINT"), + envOrDefault("ICEBERG_PROCESSOR_CATALOG_TYPE", "rest"), + os.Getenv("ICEBERG_PROCESSOR_CATALOG_URI"), + os.Getenv("ICEBERG_PROCESSOR_CATALOG_TOKEN"), + os.Getenv("ICEBERG_PROCESSOR_WAREHOUSE"), + endpoints[0], + ) + + path := filepath.Join(t.TempDir(), "config.yaml") + if err := os.WriteFile(path, []byte(config), 0644); err != nil { + t.Fatalf("write config: %v", err) + } + return path +} + +func waitForLog(t *testing.T, logs *bytes.Buffer, needle string, timeout time.Duration) { + t.Helper() + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if strings.Contains(logs.String(), needle) { + return + } + time.Sleep(200 * time.Millisecond) + } +} + +func TestLfsIcebergQueryValidation(t *testing.T) { + const enableEnv = "KAFSCALE_E2E" + if os.Getenv(enableEnv) != "1" { + t.Skipf("set %s=1 to run integration harness", enableEnv) + } + cmdLine := os.Getenv("KAFSCALE_E2E_QUERY_CMD") + if cmdLine == "" { + t.Skip("KAFSCALE_E2E_QUERY_CMD not set") + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + t.Cleanup(cancel) + + cmd := exec.CommandContext(ctx, "sh", "-c", cmdLine) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + t.Fatalf("query command failed: %v", err) + } +} diff --git a/test/e2e/lfs_proxy_broker_test.go b/test/e2e/lfs_proxy_broker_test.go new file mode 100644 index 00000000..11799e06 --- /dev/null +++ b/test/e2e/lfs_proxy_broker_test.go @@ -0,0 +1,234 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build e2e + +package e2e + +import ( + "bytes" + "context" + "crypto/rand" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "net" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "testing" + "time" + + "github.com/KafScale/platform/pkg/lfs" + "github.com/KafScale/platform/pkg/metadata" + "github.com/KafScale/platform/pkg/protocol" + "github.com/twmb/franz-go/pkg/kgo" +) + +func TestLfsProxyBrokerE2E(t *testing.T) { + const enableEnv = "KAFSCALE_E2E" + if os.Getenv(enableEnv) != "1" { + t.Skipf("set %s=1 to run integration harness", enableEnv) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + t.Cleanup(cancel) + + s3Server := newFakeS3Server(t) + t.Cleanup(s3Server.Close) + + etcd, endpoints := startEmbeddedEtcd(t) + t.Cleanup(func() { + etcd.Close() + }) + + brokerAddr := freeAddr(t) + metricsAddr := freeAddr(t) + controlAddr := freeAddr(t) + + brokerHost, brokerPort := splitHostPort(t, brokerAddr) + store, err := metadata.NewEtcdStore(ctx, metadata.ClusterMetadata{ + Brokers: []protocol.MetadataBroker{{ + NodeID: 0, + Host: brokerHost, + Port: brokerPort, + }}, + }, metadata.EtcdStoreConfig{Endpoints: endpoints}) + if err != nil { + t.Fatalf("create etcd store: %v", err) + } + + topic := "lfs-broker-topic" + if _, err := store.CreateTopic(ctx, metadata.TopicSpec{ + Name: topic, + NumPartitions: 1, + ReplicationFactor: 1, + }); err != nil { + t.Fatalf("create topic: %v", err) + } + + brokerCmd, brokerLogs := startBrokerWithEtcd(t, ctx, brokerAddr, metricsAddr, controlAddr, endpoints) + t.Cleanup(func() { stopBroker(t, brokerCmd) }) + waitForBroker(t, brokerLogs, brokerAddr) + + proxyPort := pickFreePort(t) + healthPort := pickFreePort(t) + proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) + configureProcessGroup(proxyCmd) + proxyCmd.Env = append(os.Environ(), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), + "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(endpoints, ",")), + "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-e2e-broker", + "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), + "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", + "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", + ) + var proxyLogs bytes.Buffer + proxyWriterTargets := []io.Writer{&proxyLogs, mustLogFile(t, "lfs-proxy-broker.log")} + proxyCmd.Stdout = io.MultiWriter(proxyWriterTargets...) + proxyCmd.Stderr = proxyCmd.Stdout + if err := proxyCmd.Start(); err != nil { + t.Fatalf("start lfs-proxy: %v", err) + } + t.Cleanup(func() { + _ = signalProcessGroup(proxyCmd, os.Interrupt) + done := make(chan struct{}) + go func() { + _ = proxyCmd.Wait() + close(done) + }() + select { + case <-done: + case <-time.After(2 * time.Second): + _ = signalProcessGroup(proxyCmd, os.Kill) + } + }) + waitForPortWithTimeout(t, "127.0.0.1:"+proxyPort, 15*time.Second) + + producer, err := kgo.NewClient( + kgo.SeedBrokers("127.0.0.1:"+proxyPort), + kgo.AllowAutoTopicCreation(), + kgo.DisableIdempotentWrite(), + ) + if err != nil { + t.Fatalf("create producer: %v", err) + } + defer producer.Close() + + consumer, err := kgo.NewClient( + kgo.SeedBrokers(brokerAddr), + kgo.ConsumeTopics(topic), + kgo.ConsumerGroup("lfs-proxy-broker-e2e"), + kgo.BlockRebalanceOnPoll(), + ) + if err != nil { + t.Fatalf("create consumer: %v", err) + } + consumerClosed := false + defer func() { + if !consumerClosed { + consumer.CloseAllowingRebalance() + } + }() + + blob := make([]byte, 1024) + if _, err := rand.Read(blob); err != nil { + t.Fatalf("generate blob: %v", err) + } + + record := &kgo.Record{ + Topic: topic, + Key: []byte("test-key"), + Value: blob, + Headers: []kgo.RecordHeader{ + {Key: "LFS_BLOB", Value: nil}, + }, + } + res := producer.ProduceSync(ctx, record) + if err := res.FirstErr(); err != nil { + t.Fatalf("produce: %v\nproxy logs:\n%s\nbroker logs:\n%s", err, proxyLogs.String(), brokerLogs.String()) + } + + deadline := time.Now().Add(15 * time.Second) + for { + if time.Now().After(deadline) { + t.Fatalf("timed out waiting for broker record\nproxy logs:\n%s\nbroker logs:\n%s", proxyLogs.String(), brokerLogs.String()) + } + fetches := consumer.PollFetches(ctx) + if errs := fetches.Errors(); len(errs) > 0 { + t.Fatalf("fetch errors: %+v\nproxy logs:\n%s\nbroker logs:\n%s", errs, proxyLogs.String(), brokerLogs.String()) + } + var got []byte + fetches.EachRecord(func(r *kgo.Record) { + if r.Topic != topic || got != nil { + return + } + got = append([]byte(nil), r.Value...) + }) + if got == nil { + time.Sleep(200 * time.Millisecond) + continue + } + if !lfs.IsLfsEnvelope(got) { + t.Fatalf("expected LFS envelope, got: %s", string(got)) + } + var env lfs.Envelope + if err := json.Unmarshal(got, &env); err != nil { + t.Fatalf("decode envelope: %v", err) + } + expectedHash := sha256.Sum256(blob) + expectedChecksum := hex.EncodeToString(expectedHash[:]) + if env.SHA256 != expectedChecksum { + t.Fatalf("SHA256 = %s, want %s", env.SHA256, expectedChecksum) + } + s3Key := env.Key + s3Server.mu.Lock() + storedBlob, ok := s3Server.objects["lfs-e2e-broker/"+s3Key] + s3Server.mu.Unlock() + if !ok { + t.Fatalf("blob not found in S3 at key: %s", s3Key) + } + if !bytes.Equal(storedBlob, blob) { + t.Fatalf("stored blob does not match original") + } + consumer.CloseAllowingRebalance() + consumerClosed = true + return + } +} + +func splitHostPort(t *testing.T, addr string) (string, int32) { + t.Helper() + host, portStr, err := net.SplitHostPort(addr) + if err != nil { + t.Fatalf("split addr %s: %v", addr, err) + } + port, err := strconv.Atoi(portStr) + if err != nil { + t.Fatalf("parse port %s: %v", portStr, err) + } + return host, int32(port) +} diff --git a/test/e2e/lfs_proxy_etcd_test.go b/test/e2e/lfs_proxy_etcd_test.go new file mode 100644 index 00000000..e383294e --- /dev/null +++ b/test/e2e/lfs_proxy_etcd_test.go @@ -0,0 +1,65 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build e2e + +package e2e + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/KafScale/platform/pkg/metadata" + "github.com/KafScale/platform/pkg/protocol" +) + +func startLfsProxyEtcd(t *testing.T, brokerHost string, brokerPort int32, topics ...string) []string { + t.Helper() + etcd, endpoints := startEmbeddedEtcd(t) + t.Cleanup(func() { + etcd.Close() + }) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + t.Cleanup(cancel) + + store, err := metadata.NewEtcdStore(ctx, metadata.ClusterMetadata{ + Brokers: []protocol.MetadataBroker{{ + NodeID: 0, + Host: brokerHost, + Port: brokerPort, + }}, + }, metadata.EtcdStoreConfig{Endpoints: endpoints}) + if err != nil { + t.Fatalf("create etcd store: %v", err) + } + + for _, topic := range topics { + if topic == "" { + continue + } + if _, err := store.CreateTopic(ctx, metadata.TopicSpec{ + Name: topic, + NumPartitions: 1, + ReplicationFactor: 1, + }); err != nil && !errors.Is(err, metadata.ErrTopicExists) { + t.Fatalf("create topic %s: %v", topic, err) + } + } + + return endpoints +} diff --git a/test/e2e/lfs_proxy_http_test.go b/test/e2e/lfs_proxy_http_test.go new file mode 100644 index 00000000..d06ca060 --- /dev/null +++ b/test/e2e/lfs_proxy_http_test.go @@ -0,0 +1,641 @@ +// Copyright 2025 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build e2e + +package e2e + +import ( + "bytes" + "context" + "crypto/sha256" + "encoding/base64" + "encoding/binary" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "net/http/httptest" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + "testing" + "time" + + "github.com/KafScale/platform/pkg/lfs" + "github.com/KafScale/platform/pkg/protocol" + "github.com/twmb/franz-go/pkg/kmsg" +) + +func TestLfsProxyHTTPProduce(t *testing.T) { + const enableEnv = "KAFSCALE_E2E" + if os.Getenv(enableEnv) != "1" { + t.Skipf("set %s=1 to run integration harness", enableEnv) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + t.Cleanup(cancel) + + s3Server := newFakeS3Server(t) + t.Cleanup(s3Server.Close) + + brokerAddr, received, closeBackend := startFakeKafkaBackend(t) + // Start embedded etcd and seed topics for metadata responses + etcdEndpoints := startLfsProxyEtcd(t, "127.0.0.1", 9092, "http-limited") + t.Cleanup(closeBackend) + + proxyPort := pickFreePort(t) + httpPort := pickFreePort(t) + healthPort := pickFreePort(t) + proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) + configureProcessGroup(proxyCmd) + proxyCmd.Env = append(os.Environ(), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), + "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_HTTP_ADDR=127.0.0.1:%s", httpPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(etcdEndpoints, ",")), + "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-e2e", + "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), + "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", + "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", + ) + var proxyLogs bytes.Buffer + proxyWriterTargets := []io.Writer{&proxyLogs, mustLogFile(t, "lfs-proxy-http.log")} + proxyCmd.Stdout = io.MultiWriter(proxyWriterTargets...) + proxyCmd.Stderr = proxyCmd.Stdout + if err := proxyCmd.Start(); err != nil { + t.Fatalf("start lfs-proxy: %v", err) + } + t.Cleanup(func() { + _ = signalProcessGroup(proxyCmd, os.Interrupt) + done := make(chan struct{}) + go func() { + _ = proxyCmd.Wait() + close(done) + }() + select { + case <-done: + case <-time.After(2 * time.Second): + _ = signalProcessGroup(proxyCmd, os.Kill) + } + }) + waitForPortWithTimeout(t, "127.0.0.1:"+httpPort, 15*time.Second) + + payload := []byte("hello-lfs-stream") + checksum := sha256.Sum256(payload) + checksumHex := hex.EncodeToString(checksum[:]) + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("http://127.0.0.1:%s/lfs/produce", httpPort), bytes.NewReader(payload)) + if err != nil { + t.Fatalf("build request: %v", err) + } + req.Header.Set("X-Kafka-Topic", "http-limited") + req.Header.Set("X-Kafka-Key", base64.StdEncoding.EncodeToString([]byte("key-1"))) + req.Header.Set("X-LFS-Checksum", checksumHex) + req.Header.Set("Content-Type", "application/octet-stream") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("http produce failed: %v", err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + t.Fatalf("unexpected status %d: %s", resp.StatusCode, string(body)) + } + + var env lfs.Envelope + if err := json.NewDecoder(resp.Body).Decode(&env); err != nil { + t.Fatalf("decode response: %v", err) + } + if env.SHA256 != checksumHex { + t.Fatalf("checksum mismatch: %s", env.SHA256) + } + + deadline := time.After(10 * time.Second) + for { + select { + case value := <-received: + var got lfs.Envelope + if err := json.Unmarshal(value, &got); err != nil { + t.Fatalf("expected envelope json: %v", err) + } + if got.Key == "" || got.Bucket == "" { + t.Fatalf("unexpected envelope: %+v", got) + } + return + case <-deadline: + t.Fatalf("timed out waiting for backend record") + } + } +} + +func TestLfsProxyHTTPProduceRestart(t *testing.T) { + const enableEnv = "KAFSCALE_E2E" + if os.Getenv(enableEnv) != "1" { + t.Skipf("set %s=1 to run integration harness", enableEnv) + } + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) + t.Cleanup(cancel) + + s3Server := newFakeS3Server(t) + t.Cleanup(s3Server.Close) + + brokerAddr, received, closeBackend := startFakeKafkaBackend(t) + etcdEndpoints := startLfsProxyEtcd(t, "127.0.0.1", 9092, "http-restart") + t.Cleanup(closeBackend) + + proxyPort := pickFreePort(t) + httpPort := pickFreePort(t) + healthPort := pickFreePort(t) + + startProxy := func() (*exec.Cmd, *bytes.Buffer) { + proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) + configureProcessGroup(proxyCmd) + proxyCmd.Env = append(os.Environ(), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), + "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_HTTP_ADDR=127.0.0.1:%s", httpPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(etcdEndpoints, ",")), + "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-e2e", + "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), + "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", + "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", + ) + var proxyLogs bytes.Buffer + proxyCmd.Stdout = io.MultiWriter(&proxyLogs, mustLogFile(t, "lfs-proxy-http-restart.log")) + proxyCmd.Stderr = proxyCmd.Stdout + if err := proxyCmd.Start(); err != nil { + t.Fatalf("start lfs-proxy: %v", err) + } + return proxyCmd, &proxyLogs + } + + proxyCmd, _ := startProxy() + defer func() { + _ = signalProcessGroup(proxyCmd, os.Interrupt) + _ = proxyCmd.Wait() + }() + waitForPortWithTimeout(t, "127.0.0.1:"+httpPort, 15*time.Second) + + slowPayload := bytes.Repeat([]byte("a"), 1024*1024) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("http://127.0.0.1:%s/lfs/produce", httpPort), newSlowReader(slowPayload, 32*1024, 10*time.Millisecond)) + if err != nil { + t.Fatalf("build request: %v", err) + } + req.Header.Set("X-Kafka-Topic", "http-restart") + req.Header.Set("Content-Type", "application/octet-stream") + + clientErr := make(chan error, 1) + go func() { + resp, err := http.DefaultClient.Do(req) + if err == nil && resp != nil { + resp.Body.Close() + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + clientErr <- nil + return + } + err = fmt.Errorf("status %d", resp.StatusCode) + } + clientErr <- err + }() + + time.Sleep(50 * time.Millisecond) + _ = signalProcessGroup(proxyCmd, os.Interrupt) + _ = proxyCmd.Wait() + + <-clientErr + + proxyCmd, _ = startProxy() + defer func() { + _ = signalProcessGroup(proxyCmd, os.Interrupt) + _ = proxyCmd.Wait() + }() + waitForPortWithTimeout(t, "127.0.0.1:"+httpPort, 15*time.Second) + + payload := []byte("restart-ok") + req2, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("http://127.0.0.1:%s/lfs/produce", httpPort), bytes.NewReader(payload)) + if err != nil { + t.Fatalf("build request: %v", err) + } + req2.Header.Set("X-Kafka-Topic", "http-restart") + req2.Header.Set("Content-Type", "application/octet-stream") + + resp, err := http.DefaultClient.Do(req2) + if err != nil { + t.Fatalf("http produce failed: %v", err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + t.Fatalf("unexpected status %d: %s", resp.StatusCode, string(body)) + } + + deadline := time.After(10 * time.Second) + for { + select { + case value := <-received: + var got lfs.Envelope + if err := json.Unmarshal(value, &got); err != nil { + t.Fatalf("expected envelope json: %v", err) + } + if got.Key == "" || got.Bucket == "" { + t.Fatalf("unexpected envelope: %+v", got) + } + return + case <-deadline: + t.Fatalf("timed out waiting for backend record") + } + } +} + +func TestLfsProxyHTTPBackendUnavailable(t *testing.T) { + const enableEnv = "KAFSCALE_E2E" + if os.Getenv(enableEnv) != "1" { + t.Skipf("set %s=1 to run integration harness", enableEnv) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + t.Cleanup(cancel) + + s3Server := newFakeS3Server(t) + t.Cleanup(s3Server.Close) + + brokerAddr, _, closeBackend := startFakeKafkaBackend(t) + etcdEndpoints := startLfsProxyEtcd(t, "127.0.0.1", 9092, "http-backend-down") + t.Cleanup(closeBackend) + + proxyPort := pickFreePort(t) + httpPort := pickFreePort(t) + healthPort := pickFreePort(t) + proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) + configureProcessGroup(proxyCmd) + proxyCmd.Env = append(os.Environ(), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), + "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_HTTP_ADDR=127.0.0.1:%s", httpPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(etcdEndpoints, ",")), + "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-e2e", + "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), + "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", + "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", + ) + proxyCmd.Stdout = io.MultiWriter(mustLogFile(t, "lfs-proxy-http-backend-down.log")) + proxyCmd.Stderr = proxyCmd.Stdout + if err := proxyCmd.Start(); err != nil { + t.Fatalf("start lfs-proxy: %v", err) + } + t.Cleanup(func() { + _ = signalProcessGroup(proxyCmd, os.Interrupt) + _ = proxyCmd.Wait() + }) + waitForPortWithTimeout(t, "127.0.0.1:"+httpPort, 15*time.Second) + + closeBackend() + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("http://127.0.0.1:%s/lfs/produce", httpPort), bytes.NewReader([]byte("payload"))) + if err != nil { + t.Fatalf("build request: %v", err) + } + req.Header.Set("X-Kafka-Topic", "http-backend-down") + req.Header.Set("Content-Type", "application/octet-stream") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("http produce failed: %v", err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusServiceUnavailable && resp.StatusCode != http.StatusBadGateway { + body, _ := io.ReadAll(resp.Body) + t.Fatalf("unexpected status %d: %s", resp.StatusCode, string(body)) + } + var body httpErrorResponse + if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { + t.Fatalf("decode error response: %v", err) + } + if body.Code == "" { + t.Fatalf("expected error code in response") + } +} + +func newSlowReader(payload []byte, chunk int, delay time.Duration) io.Reader { + return &slowReader{payload: payload, chunk: chunk, delay: delay} +} + +type slowReader struct { + payload []byte + chunk int + delay time.Duration + idx int +} + +func (r *slowReader) Read(p []byte) (int, error) { + if r.idx >= len(r.payload) { + return 0, io.EOF + } + if r.delay > 0 { + time.Sleep(r.delay) + } + end := r.idx + r.chunk + if end > len(r.payload) { + end = len(r.payload) + } + n := copy(p, r.payload[r.idx:end]) + r.idx += n + return n, nil +} + +type fakeS3Server struct { + *httptest.Server + mu sync.Mutex + buckets map[string]struct{} + uploads map[string]*multipartUpload + objects map[string][]byte + counter int64 +} + +type multipartUpload struct { + bucket string + key string + data []byte +} + +func newFakeS3Server(t *testing.T) *fakeS3Server { + t.Helper() + fs := &fakeS3Server{ + buckets: make(map[string]struct{}), + uploads: make(map[string]*multipartUpload), + objects: make(map[string][]byte), + } + handler := http.NewServeMux() + handler.HandleFunc("/", fs.serve) + fs.Server = httptest.NewServer(handler) + return fs +} + +func (f *fakeS3Server) serve(w http.ResponseWriter, r *http.Request) { + bucket, key := splitBucketKey(r.URL.Path) + switch r.Method { + case http.MethodHead: + f.headBucket(w, bucket) + return + case http.MethodPut: + if r.URL.Query().Get("partNumber") != "" && r.URL.Query().Get("uploadId") != "" { + f.uploadPart(w, r, bucket, key) + return + } + if key == "" { + f.putBucket(w, bucket) + return + } + f.putObject(w, r, bucket, key) + return + case http.MethodPost: + if _, ok := r.URL.Query()["uploads"]; ok { + f.createMultipart(w, bucket, key) + return + } + if r.URL.Query().Get("uploadId") != "" { + f.completeMultipart(w, r.URL.Query().Get("uploadId")) + return + } + } + http.Error(w, "not implemented", http.StatusNotImplemented) +} + +func (f *fakeS3Server) headBucket(w http.ResponseWriter, bucket string) { + f.mu.Lock() + defer f.mu.Unlock() + if _, ok := f.buckets[bucket]; !ok { + w.WriteHeader(http.StatusNotFound) + return + } + w.WriteHeader(http.StatusOK) +} + +func (f *fakeS3Server) putBucket(w http.ResponseWriter, bucket string) { + f.mu.Lock() + f.buckets[bucket] = struct{}{} + f.mu.Unlock() + w.WriteHeader(http.StatusOK) +} + +func (f *fakeS3Server) putObject(w http.ResponseWriter, r *http.Request, bucket, key string) { + body, _ := io.ReadAll(r.Body) + f.mu.Lock() + f.objects[bucket+"/"+key] = body + f.buckets[bucket] = struct{}{} + f.mu.Unlock() + w.Header().Set("ETag", "\"fake\"") + w.WriteHeader(http.StatusOK) +} + +func (f *fakeS3Server) createMultipart(w http.ResponseWriter, bucket, key string) { + f.mu.Lock() + f.counter++ + uploadID := fmt.Sprintf("upload-%d", f.counter) + f.uploads[uploadID] = &multipartUpload{bucket: bucket, key: key} + f.buckets[bucket] = struct{}{} + f.mu.Unlock() + w.Header().Set("Content-Type", "application/xml") + fmt.Fprintf(w, "%s", uploadID) +} + +func (f *fakeS3Server) uploadPart(w http.ResponseWriter, r *http.Request, bucket, key string) { + uploadID := r.URL.Query().Get("uploadId") + body, _ := io.ReadAll(r.Body) + f.mu.Lock() + upload := f.uploads[uploadID] + if upload != nil { + upload.data = append(upload.data, body...) + } + f.mu.Unlock() + w.Header().Set("ETag", "\"part\"") + w.WriteHeader(http.StatusOK) +} + +func (f *fakeS3Server) completeMultipart(w http.ResponseWriter, uploadID string) { + f.mu.Lock() + upload := f.uploads[uploadID] + if upload != nil { + f.objects[upload.bucket+"/"+upload.key] = upload.data + delete(f.uploads, uploadID) + } + f.mu.Unlock() + w.Header().Set("Content-Type", "application/xml") + fmt.Fprintf(w, "\"fake\"") +} + +func splitBucketKey(path string) (string, string) { + trimmed := strings.TrimPrefix(path, "/") + if trimmed == "" { + return "", "" + } + parts := strings.SplitN(trimmed, "/", 2) + bucket := parts[0] + if len(parts) == 1 { + return bucket, "" + } + return bucket, parts[1] +} + +func waitForPortWithTimeout(t *testing.T, addr string, timeout time.Duration) { + t.Helper() + deadline := time.After(timeout) + for { + conn, err := net.DialTimeout("tcp", addr, 200*time.Millisecond) + if err == nil { + _ = conn.Close() + return + } + select { + case <-deadline: + t.Fatalf("broker did not start listening on %s: %v", addr, err) + case <-time.After(100 * time.Millisecond): + } + } +} + +func startFakeKafkaBackend(t *testing.T) (string, <-chan []byte, func()) { + t.Helper() + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("listen: %v", err) + } + addr := ln.Addr().String() + received := make(chan []byte, 1) + done := make(chan struct{}) + go func() { + defer close(done) + for { + conn, err := ln.Accept() + if err != nil { + return + } + go handleKafkaConn(t, conn, received) + } + }() + return addr, received, func() { + _ = ln.Close() + <-done + } +} + +func handleKafkaConn(t *testing.T, conn net.Conn, received chan<- []byte) { + t.Helper() + defer conn.Close() + frame, err := protocol.ReadFrame(conn) + if err != nil { + return + } + header, req, err := protocol.ParseRequest(frame.Payload) + if err != nil { + return + } + prodReq, ok := req.(*protocol.ProduceRequest) + if !ok { + return + } + if len(prodReq.Topics) > 0 && len(prodReq.Topics[0].Partitions) > 0 { + records := prodReq.Topics[0].Partitions[0].Records + value := extractFirstRecordValue(records) + if len(value) > 0 { + select { + case received <- value: + default: + } + } + } + respPayload, _ := buildProduceResponse(prodReq, header.CorrelationID, header.APIVersion) + _ = protocol.WriteFrame(conn, respPayload) +} + +func buildProduceResponse(req *protocol.ProduceRequest, correlationID int32, version int16) ([]byte, error) { + resp := &kmsg.ProduceResponse{} + for _, topic := range req.Topics { + rt := kmsg.NewProduceResponseTopic() + rt.Topic = topic.Topic + for _, part := range topic.Partitions { + rp := kmsg.NewProduceResponseTopicPartition() + rp.Partition = part.Partition + rp.ErrorCode = protocol.NONE + rt.Partitions = append(rt.Partitions, rp) + } + resp.Topics = append(resp.Topics, rt) + } + return protocol.EncodeResponse(correlationID, version, resp), nil +} + +func extractFirstRecordValue(records []byte) []byte { + if len(records) == 0 { + return nil + } + var batch kmsg.RecordBatch + if err := batch.ReadFrom(records); err != nil { + return nil + } + raw := batch.Records + recordsOut := make([]kmsg.Record, int(batch.NumRecords)) + recordsOut = readRawRecordsInto(recordsOut, raw) + if len(recordsOut) == 0 { + return nil + } + return recordsOut[0].Value +} + +func readRawRecordsInto(rs []kmsg.Record, in []byte) []kmsg.Record { + for i := range rs { + length, used := binary.Varint(in) + total := used + int(length) + if used == 0 || length < 0 || len(in) < total { + return rs[:i] + } + if err := (&rs[i]).ReadFrom(in[:total]); err != nil { + rs[i] = kmsg.Record{} + return rs[:i] + } + in = in[total:] + } + return rs +} + +type httpErrorResponse struct { + Code string `json:"code"` + Message string `json:"message"` + RequestID string `json:"request_id"` +} diff --git a/test/e2e/lfs_proxy_test.go b/test/e2e/lfs_proxy_test.go new file mode 100644 index 00000000..401bb422 --- /dev/null +++ b/test/e2e/lfs_proxy_test.go @@ -0,0 +1,462 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build e2e + +package e2e + +import ( + "bytes" + "context" + "crypto/rand" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/KafScale/platform/pkg/lfs" + "github.com/twmb/franz-go/pkg/kgo" +) + +// TestLfsProxyKafkaProtocol tests the LFS proxy with native Kafka protocol. +// Uses franz-go client to produce messages with LFS_BLOB header. +func TestLfsProxyKafkaProtocol(t *testing.T) { + const enableEnv = "KAFSCALE_E2E" + if os.Getenv(enableEnv) != "1" { + t.Skipf("set %s=1 to run integration harness", enableEnv) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + t.Cleanup(cancel) + + // Start fake S3 server + s3Server := newFakeS3Server(t) + t.Cleanup(s3Server.Close) + + // Start fake Kafka backend + brokerAddr, received, closeBackend := startFakeKafkaBackend(t) + // Start embedded etcd and seed topics for metadata responses + etcdEndpoints := startLfsProxyEtcd(t, "127.0.0.1", 9092, "lfs-test-topic", "regular-topic", "checksum-test") + t.Cleanup(closeBackend) + + // Start LFS proxy + proxyPort := pickFreePort(t) + healthPort := pickFreePort(t) + proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) + configureProcessGroup(proxyCmd) + proxyCmd.Env = append(os.Environ(), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), + "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(etcdEndpoints, ",")), + "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-test", + "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), + "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", + "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", + ) + var proxyLogs bytes.Buffer + proxyWriterTargets := []io.Writer{&proxyLogs, mustLogFile(t, "lfs-proxy-kafka.log")} + proxyCmd.Stdout = io.MultiWriter(proxyWriterTargets...) + proxyCmd.Stderr = proxyCmd.Stdout + if err := proxyCmd.Start(); err != nil { + t.Fatalf("start lfs-proxy: %v", err) + } + t.Cleanup(func() { + _ = signalProcessGroup(proxyCmd, os.Interrupt) + done := make(chan struct{}) + go func() { + _ = proxyCmd.Wait() + close(done) + }() + select { + case <-done: + case <-time.After(2 * time.Second): + _ = signalProcessGroup(proxyCmd, os.Kill) + } + }) + waitForPortWithTimeout(t, "127.0.0.1:"+proxyPort, 15*time.Second) + + // Create franz-go client pointing to proxy + client, err := kgo.NewClient( + kgo.SeedBrokers("127.0.0.1:"+proxyPort), + kgo.AllowAutoTopicCreation(), + ) + if err != nil { + t.Fatalf("create client: %v", err) + } + defer client.Close() + + // Generate random blob + blob := make([]byte, 1024) + if _, err := rand.Read(blob); err != nil { + t.Fatalf("generate blob: %v", err) + } + + // Produce with LFS_BLOB header + record := &kgo.Record{ + Topic: "lfs-test-topic", + Key: []byte("test-key"), + Value: blob, + Headers: []kgo.RecordHeader{ + {Key: "LFS_BLOB", Value: nil}, + }, + } + res := client.ProduceSync(ctx, record) + if err := res.FirstErr(); err != nil { + t.Fatalf("produce: %v", err) + } + + // Wait for backend to receive the envelope + deadline := time.After(10 * time.Second) + for { + select { + case value := <-received: + // Should receive an LFS envelope, not the original blob + if !lfs.IsLfsEnvelope(value) { + t.Fatalf("expected LFS envelope, got: %s", string(value)) + } + + var env lfs.Envelope + if err := json.Unmarshal(value, &env); err != nil { + t.Fatalf("decode envelope: %v", err) + } + + // Verify envelope fields + if env.Version != 1 { + t.Errorf("Version = %d, want 1", env.Version) + } + if env.Bucket != "lfs-test" { + t.Errorf("Bucket = %s, want lfs-test", env.Bucket) + } + if env.Size != int64(len(blob)) { + t.Errorf("Size = %d, want %d", env.Size, len(blob)) + } + + // Verify checksum matches + expectedHash := sha256.Sum256(blob) + expectedChecksum := hex.EncodeToString(expectedHash[:]) + if env.SHA256 != expectedChecksum { + t.Errorf("SHA256 = %s, want %s", env.SHA256, expectedChecksum) + } + + // Verify blob was stored in S3 + s3Key := env.Key + s3Server.mu.Lock() + storedBlob, ok := s3Server.objects["lfs-test/"+s3Key] + s3Server.mu.Unlock() + if !ok { + t.Errorf("blob not found in S3 at key: %s", s3Key) + } else if !bytes.Equal(storedBlob, blob) { + t.Errorf("stored blob does not match original") + } + + return + case <-deadline: + t.Fatalf("timed out waiting for backend record") + } + } +} + +// TestLfsProxyPassthrough tests that non-LFS messages pass through unchanged. +func TestLfsProxyPassthrough(t *testing.T) { + const enableEnv = "KAFSCALE_E2E" + if os.Getenv(enableEnv) != "1" { + t.Skipf("set %s=1 to run integration harness", enableEnv) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + t.Cleanup(cancel) + + // Start fake S3 server + s3Server := newFakeS3Server(t) + t.Cleanup(s3Server.Close) + + // Start fake Kafka backend + brokerAddr, received, closeBackend := startFakeKafkaBackend(t) + // Start embedded etcd and seed topics for metadata responses + etcdEndpoints := startLfsProxyEtcd(t, "127.0.0.1", 9092, "regular-topic") + t.Cleanup(closeBackend) + + // Start LFS proxy + proxyPort := pickFreePort(t) + healthPort := pickFreePort(t) + proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) + configureProcessGroup(proxyCmd) + proxyCmd.Env = append(os.Environ(), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), + "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(etcdEndpoints, ",")), + "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-test", + "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), + "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", + "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", + ) + var proxyLogs bytes.Buffer + proxyWriterTargets := []io.Writer{&proxyLogs, mustLogFile(t, "lfs-proxy-passthrough.log")} + proxyCmd.Stdout = io.MultiWriter(proxyWriterTargets...) + proxyCmd.Stderr = proxyCmd.Stdout + if err := proxyCmd.Start(); err != nil { + t.Fatalf("start lfs-proxy: %v", err) + } + t.Cleanup(func() { + _ = signalProcessGroup(proxyCmd, os.Interrupt) + done := make(chan struct{}) + go func() { + _ = proxyCmd.Wait() + close(done) + }() + select { + case <-done: + case <-time.After(2 * time.Second): + _ = signalProcessGroup(proxyCmd, os.Kill) + } + }) + waitForPortWithTimeout(t, "127.0.0.1:"+proxyPort, 15*time.Second) + + // Create franz-go client + client, err := kgo.NewClient( + kgo.SeedBrokers("127.0.0.1:"+proxyPort), + kgo.AllowAutoTopicCreation(), + ) + if err != nil { + t.Fatalf("create client: %v", err) + } + defer client.Close() + + // Produce without LFS_BLOB header (regular message) + plainValue := []byte("regular message without LFS") + record := &kgo.Record{ + Topic: "regular-topic", + Key: []byte("key"), + Value: plainValue, + } + res := client.ProduceSync(ctx, record) + if err := res.FirstErr(); err != nil { + t.Fatalf("produce: %v", err) + } + + // Wait for backend to receive the message + deadline := time.After(10 * time.Second) + for { + select { + case value := <-received: + // Should receive the original message unchanged + if lfs.IsLfsEnvelope(value) { + t.Fatalf("expected plain message, got LFS envelope") + } + if !bytes.Equal(value, plainValue) { + t.Errorf("value = %q, want %q", value, plainValue) + } + return + case <-deadline: + t.Fatalf("timed out waiting for backend record") + } + } +} + +// TestLfsProxyChecksumValidation tests that checksum validation works. +func TestLfsProxyChecksumValidation(t *testing.T) { + const enableEnv = "KAFSCALE_E2E" + if os.Getenv(enableEnv) != "1" { + t.Skipf("set %s=1 to run integration harness", enableEnv) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + t.Cleanup(cancel) + + // Start fake S3 server + s3Server := newFakeS3Server(t) + t.Cleanup(s3Server.Close) + + // Start fake Kafka backend + brokerAddr, _, closeBackend := startFakeKafkaBackend(t) + // Start embedded etcd and seed topics for metadata responses + etcdEndpoints := startLfsProxyEtcd(t, "127.0.0.1", 9092, "checksum-test") + t.Cleanup(closeBackend) + + // Start LFS proxy + proxyPort := pickFreePort(t) + healthPort := pickFreePort(t) + proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) + configureProcessGroup(proxyCmd) + proxyCmd.Env = append(os.Environ(), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), + "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(etcdEndpoints, ",")), + "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-test", + "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), + "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", + "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", + ) + var proxyLogs bytes.Buffer + proxyWriterTargets := []io.Writer{&proxyLogs, mustLogFile(t, "lfs-proxy-checksum.log")} + proxyCmd.Stdout = io.MultiWriter(proxyWriterTargets...) + proxyCmd.Stderr = proxyCmd.Stdout + if err := proxyCmd.Start(); err != nil { + t.Fatalf("start lfs-proxy: %v", err) + } + t.Cleanup(func() { + _ = signalProcessGroup(proxyCmd, os.Interrupt) + done := make(chan struct{}) + go func() { + _ = proxyCmd.Wait() + close(done) + }() + select { + case <-done: + case <-time.After(2 * time.Second): + _ = signalProcessGroup(proxyCmd, os.Kill) + } + }) + waitForPortWithTimeout(t, "127.0.0.1:"+proxyPort, 15*time.Second) + + // Create franz-go client + client, err := kgo.NewClient( + kgo.SeedBrokers("127.0.0.1:"+proxyPort), + kgo.AllowAutoTopicCreation(), + ) + if err != nil { + t.Fatalf("create client: %v", err) + } + defer client.Close() + + // Produce with wrong checksum in LFS_BLOB header + blob := []byte("test blob data") + wrongChecksum := "0000000000000000000000000000000000000000000000000000000000000000" + record := &kgo.Record{ + Topic: "checksum-test", + Key: []byte("key"), + Value: blob, + Headers: []kgo.RecordHeader{ + {Key: "LFS_BLOB", Value: []byte(wrongChecksum)}, + }, + } + res := client.ProduceSync(ctx, record) + err = res.FirstErr() + + // Should fail with checksum error + if err == nil { + t.Fatalf("expected checksum error, got nil") + } + t.Logf("got expected error: %v", err) +} + +// TestLfsProxyHealthEndpoint tests the health endpoints. +func TestLfsProxyHealthEndpoint(t *testing.T) { + const enableEnv = "KAFSCALE_E2E" + if os.Getenv(enableEnv) != "1" { + t.Skipf("set %s=1 to run integration harness", enableEnv) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + t.Cleanup(cancel) + + // Start fake S3 server + s3Server := newFakeS3Server(t) + t.Cleanup(s3Server.Close) + + // Start fake Kafka backend + brokerAddr, _, closeBackend := startFakeKafkaBackend(t) + // Start embedded etcd and seed topics for metadata responses + etcdEndpoints := startLfsProxyEtcd(t, "127.0.0.1", 9092, "health-check") + t.Cleanup(closeBackend) + + // Start LFS proxy + proxyPort := pickFreePort(t) + healthPort := pickFreePort(t) + proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) + configureProcessGroup(proxyCmd) + proxyCmd.Env = append(os.Environ(), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), + "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), + fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), + fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(etcdEndpoints, ",")), + "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-test", + "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", + fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), + "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", + "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", + "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", + ) + var proxyLogs bytes.Buffer + proxyWriterTargets := []io.Writer{&proxyLogs, mustLogFile(t, "lfs-proxy-health.log")} + proxyCmd.Stdout = io.MultiWriter(proxyWriterTargets...) + proxyCmd.Stderr = proxyCmd.Stdout + if err := proxyCmd.Start(); err != nil { + t.Fatalf("start lfs-proxy: %v", err) + } + t.Cleanup(func() { + _ = signalProcessGroup(proxyCmd, os.Interrupt) + done := make(chan struct{}) + go func() { + _ = proxyCmd.Wait() + close(done) + }() + select { + case <-done: + case <-time.After(2 * time.Second): + _ = signalProcessGroup(proxyCmd, os.Kill) + } + }) + waitForPortWithTimeout(t, "127.0.0.1:"+healthPort, 15*time.Second) + + // Test /livez endpoint + resp, err := http.Get(fmt.Sprintf("http://127.0.0.1:%s/livez", healthPort)) + if err != nil { + t.Fatalf("livez request failed: %v", err) + } + resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Errorf("/livez status = %d, want 200", resp.StatusCode) + } + + // Test /readyz endpoint + resp, err = http.Get(fmt.Sprintf("http://127.0.0.1:%s/readyz", healthPort)) + if err != nil { + t.Fatalf("readyz request failed: %v", err) + } + resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Errorf("/readyz status = %d, want 200", resp.StatusCode) + } +} diff --git a/test/e2e/lfs_sdk_test.go b/test/e2e/lfs_sdk_test.go new file mode 100644 index 00000000..a0568fd2 --- /dev/null +++ b/test/e2e/lfs_sdk_test.go @@ -0,0 +1,184 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build e2e + +package e2e + +import ( + "bytes" + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "math/rand" + "os" + "strconv" + "strings" + "testing" + "time" + + "github.com/KafScale/platform/pkg/lfs" + "github.com/twmb/franz-go/pkg/kgo" +) + +func TestLfsSDKKindE2E(t *testing.T) { + const enableEnv = "KAFSCALE_E2E" + if os.Getenv(enableEnv) != "1" || !parseBoolEnvLocal("KAFSCALE_E2E_KIND") { + t.Skipf("set %s=1 and KAFSCALE_E2E_KIND=1 to run kind integration test", enableEnv) + } + + brokerAddr := strings.TrimSpace(os.Getenv("KAFSCALE_E2E_BROKER_ADDR")) + if brokerAddr == "" { + t.Skip("KAFSCALE_E2E_BROKER_ADDR not set") + } + + httpURL := lfsProxyHTTPURL(t) + if httpURL == "" { + t.Skip("LFS proxy HTTP URL not configured (set LFS_PROXY_HTTP_URL or LFS_PROXY_SERVICE_HOST)") + } + + cfg, err := s3ConfigFromEnv() + if err != nil { + t.Skip(err.Error()) + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + t.Cleanup(cancel) + + topic := envOrDefaultLocal("LFS_DEMO_TOPIC", "lfs-demo-topic") + payloadSize := ensureMinBlobSize(envOrDefaultLocal("LFS_DEMO_BLOB_SIZE", "2097152")) + payload := buildPayload(payloadSize) + checksum := sha256.Sum256(payload) + checksumHex := hex.EncodeToString(checksum[:]) + + producer := lfs.NewProducer(httpURL) + result, err := producer.Produce(ctx, topic, fmt.Sprintf("sdk-e2e-%d", rand.Int63()), bytes.NewReader(payload)) + if err != nil { + t.Fatalf("produce via lfs proxy: %v", err) + } + if result.Envelope.SHA256 != "" && result.Envelope.SHA256 != checksumHex { + t.Fatalf("checksum mismatch: expected %s got %s", checksumHex, result.Envelope.SHA256) + } + + s3Client, err := lfs.NewS3Client(ctx, cfg) + if err != nil { + t.Fatalf("create s3 client: %v", err) + } + consumer := lfs.NewConsumer(s3Client) + + client, err := kgo.NewClient( + kgo.SeedBrokers(brokerAddr), + kgo.ConsumeTopics(topic), + kgo.ConsumerGroup(fmt.Sprintf("lfs-sdk-e2e-%d", rand.Int63())), + kgo.ConsumeResetOffset(kgo.NewOffset().AtStart()), + ) + if err != nil { + t.Fatalf("create kafka client: %v", err) + } + t.Cleanup(client.Close) + + deadline := time.Now().Add(45 * time.Second) + for time.Now().Before(deadline) { + fetches := client.PollFetches(ctx) + if err := fetches.Err(); err != nil { + t.Fatalf("poll fetches: %v", err) + } + iter := fetches.RecordIter() + for !iter.Done() { + record := iter.Next() + resolved, err := consumer.Unwrap(ctx, record.Value) + if err != nil { + t.Fatalf("unwrap lfs record: %v", err) + } + if !bytes.Equal(resolved, payload) { + t.Fatalf("resolved payload mismatch: got %d bytes", len(resolved)) + } + return + } + time.Sleep(500 * time.Millisecond) + } + + t.Fatalf("timed out waiting for LFS envelope on topic %s", topic) +} + +func lfsProxyHTTPURL(t *testing.T) string { + t.Helper() + if url := strings.TrimSpace(os.Getenv("LFS_PROXY_HTTP_URL")); url != "" { + return url + } + host := strings.TrimSpace(os.Getenv("LFS_PROXY_SERVICE_HOST")) + if host == "" { + return "" + } + port := envOrDefaultLocal("LFS_PROXY_HTTP_PORT", "8080") + path := envOrDefaultLocal("LFS_PROXY_HTTP_PATH", "/lfs/produce") + return fmt.Sprintf("http://%s:%s%s", host, port, path) +} + +func s3ConfigFromEnv() (lfs.S3Config, error) { + bucket := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_BUCKET")) + region := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_REGION")) + endpoint := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_ENDPOINT")) + accessKey := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_ACCESS_KEY")) + secretKey := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_SECRET_KEY")) + if bucket == "" || region == "" || endpoint == "" || accessKey == "" || secretKey == "" { + return lfs.S3Config{}, fmt.Errorf("set KAFSCALE_LFS_PROXY_S3_BUCKET/REGION/ENDPOINT/ACCESS_KEY/SECRET_KEY") + } + forcePathStyle := parseBoolEnvLocal("KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE") + return lfs.S3Config{ + Bucket: bucket, + Region: region, + Endpoint: endpoint, + AccessKeyID: accessKey, + SecretAccessKey: secretKey, + ForcePathStyle: forcePathStyle, + }, nil +} + +func ensureMinBlobSize(raw string) int { + val, err := strconv.Atoi(strings.TrimSpace(raw)) + if err != nil || val <= 0 { + return 2 * 1024 * 1024 + } + if val < 1024*1024 { + return 2 * 1024 * 1024 + } + return val +} + +func buildPayload(size int) []byte { + payload := make([]byte, size) + for i := range payload { + payload[i] = byte(i % 251) + } + return payload +} + +func parseBoolEnvLocal(name string) bool { + switch strings.ToLower(strings.TrimSpace(os.Getenv(name))) { + case "1", "true", "yes", "on": + return true + default: + return false + } +} + +func envOrDefaultLocal(name, fallback string) string { + if val := strings.TrimSpace(os.Getenv(name)); val != "" { + return val + } + return fallback +} diff --git a/test/e2e/log_test.go b/test/e2e/log_test.go index ee72eb8f..703d570b 100644 --- a/test/e2e/log_test.go +++ b/test/e2e/log_test.go @@ -22,6 +22,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" ) +//nolint:unused // kept for e2e test debugging func setupTestLogger() { ctrl.SetLogger(zap.New(zap.UseDevMode(false), zap.WriteTo(io.Discard))) } diff --git a/test/e2e/multi_segment_restart_test.go b/test/e2e/multi_segment_restart_test.go index eda4325a..e8c94566 100644 --- a/test/e2e/multi_segment_restart_test.go +++ b/test/e2e/multi_segment_restart_test.go @@ -40,6 +40,7 @@ func TestMultiSegmentRestartDurability(t *testing.T) { if os.Getenv(enableEnv) != "1" { t.Skipf("set %s=1 to run integration harness", enableEnv) } + requireMinIO(t) ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) defer cancel() diff --git a/test/e2e/operator_console_test.go b/test/e2e/operator_console_test.go index 77f3ed22..ffc25803 100644 --- a/test/e2e/operator_console_test.go +++ b/test/e2e/operator_console_test.go @@ -153,8 +153,11 @@ func TestOperatorConsoleEndToEnd(t *testing.T) { if err := json.NewDecoder(statusResp.Body).Decode(&status); err != nil { t.Fatalf("decode status: %v", err) } - if status.Cluster != string(cluster.UID) { - t.Fatalf("expected cluster %s, got %s", cluster.UID, status.Cluster) + if status.Cluster != cluster.Name { + t.Fatalf("expected cluster %s, got %s", cluster.Name, status.Cluster) + } + if status.ClusterID != string(cluster.UID) { + t.Fatalf("expected cluster id %s, got %s", cluster.UID, status.ClusterID) } if status.Brokers.Ready != int(replicas) { t.Fatalf("expected %d brokers ready, got %d", replicas, status.Brokers.Ready) @@ -170,8 +173,9 @@ func TestOperatorConsoleEndToEnd(t *testing.T) { } type consoleStatusPayload struct { - Cluster string `json:"cluster"` - Brokers struct { + Cluster string `json:"cluster"` + ClusterID string `json:"cluster_id"` + Brokers struct { Ready int `json:"ready"` } `json:"brokers"` Topics []struct { diff --git a/test/e2e/ports.go b/test/e2e/ports.go index dc94bdff..91a02eaa 100644 --- a/test/e2e/ports.go +++ b/test/e2e/ports.go @@ -22,6 +22,7 @@ import ( "os" "strings" "testing" + "time" ) func brokerAddrs(t *testing.T) (string, string, string) { @@ -50,3 +51,35 @@ func pickFreePort(t *testing.T) string { } return port } + +// minioEndpoint returns the MinIO endpoint from environment or the default localhost:9000. +func minioEndpoint() string { + if val := strings.TrimSpace(os.Getenv("KAFSCALE_S3_ENDPOINT")); val != "" { + return val + } + return "http://127.0.0.1:9000" +} + +// minioAvailable checks if MinIO is reachable at the configured endpoint. +// Tests that require MinIO should call requireMinIO(t) at the start. +func minioAvailable() bool { + endpoint := minioEndpoint() + // Extract host:port from http://host:port + addr := strings.TrimPrefix(endpoint, "http://") + addr = strings.TrimPrefix(addr, "https://") + conn, err := net.DialTimeout("tcp", addr, 2*time.Second) + if err != nil { + return false + } + _ = conn.Close() + return true +} + +// requireMinIO skips the test if MinIO is not available. +// Use this at the start of tests that require a real MinIO instance. +func requireMinIO(t *testing.T) { + t.Helper() + if !minioAvailable() { + t.Skipf("MinIO not available at %s; run 'make ensure-minio' first or use 'make test-produce-consume'", minioEndpoint()) + } +} diff --git a/test/e2e/process_group_unix.go b/test/e2e/process_group_unix.go index d2bf4d50..de7d458e 100644 --- a/test/e2e/process_group_unix.go +++ b/test/e2e/process_group_unix.go @@ -24,6 +24,7 @@ import ( "syscall" ) +//nolint:unused // kept for process group management in e2e tests func configureProcessGroup(cmd *exec.Cmd) { if cmd.SysProcAttr == nil { cmd.SysProcAttr = &syscall.SysProcAttr{} @@ -31,6 +32,7 @@ func configureProcessGroup(cmd *exec.Cmd) { cmd.SysProcAttr.Setpgid = true } +//nolint:unused // kept for process group management in e2e tests func signalProcessGroup(cmd *exec.Cmd, sig os.Signal) error { if cmd == nil || cmd.Process == nil { return nil diff --git a/ui/embed_test.go b/ui/embed_test.go new file mode 100644 index 00000000..11a47e66 --- /dev/null +++ b/ui/embed_test.go @@ -0,0 +1,68 @@ +// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ui + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func TestStaticHandler(t *testing.T) { + handler, err := StaticHandler() + if err != nil { + t.Fatalf("StaticHandler() error: %v", err) + } + if handler == nil { + t.Fatal("StaticHandler() returned nil handler") + } + + // Serve the root (index.html) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/", nil) + handler.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200 for /, got %d", rec.Code) + } + if ct := rec.Header().Get("Content-Type"); ct == "" { + t.Fatal("expected Content-Type header") + } + + // Serve CSS file + rec2 := httptest.NewRecorder() + req2 := httptest.NewRequest(http.MethodGet, "/style.css", nil) + handler.ServeHTTP(rec2, req2) + + if rec2.Code != http.StatusOK { + t.Fatalf("expected 200 for /style.css, got %d", rec2.Code) + } +} + +func TestStaticHandlerNotFound(t *testing.T) { + handler, err := StaticHandler() + if err != nil { + t.Fatalf("StaticHandler() error: %v", err) + } + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/nonexistent.xyz", nil) + handler.ServeHTTP(rec, req) + + if rec.Code != http.StatusNotFound { + t.Fatalf("expected 404, got %d", rec.Code) + } +} diff --git a/ui/public/app.js b/ui/public/app.js index 91eafe4c..4c4d56ef 100644 --- a/ui/public/app.js +++ b/ui/public/app.js @@ -7,6 +7,11 @@ let metricsSource = null; let statusInterval = null; let brokersExpanded = false; +// LFS state +let lfsEventsSource = null; +let lfsCurrentPath = ''; +let lfsTopicsCache = []; + const loginScreen = document.getElementById('login-screen'); const appScreen = document.getElementById('app'); const loginForm = document.getElementById('login-form'); @@ -522,7 +527,499 @@ if (logoutButton) { }); } +// LFS Dashboard Functions +function formatBytes(bytes) { + if (bytes === 0) return '0 B'; + const k = 1024; + const sizes = ['B', 'KB', 'MB', 'GB', 'TB']; + const i = Math.floor(Math.log(bytes) / Math.log(k)); + return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; +} + +function formatRelativeTime(timestamp) { + if (!timestamp) return '-'; + const date = new Date(timestamp); + const now = new Date(); + const diff = now - date; + if (diff < 60000) return 'just now'; + if (diff < 3600000) return `${Math.floor(diff / 60000)}m ago`; + if (diff < 86400000) return `${Math.floor(diff / 3600000)}h ago`; + return date.toLocaleDateString(); +} + +async function fetchLfsStatus() { + try { + const resp = await fetch('/ui/api/lfs/status'); + if (resp.status === 401) { + return; + } + if (!resp.ok) { + updateLfsStatusIndicator(false, 'LFS unavailable'); + return; + } + const data = await resp.json(); + renderLfsStatus(data); + } catch (err) { + updateLfsStatusIndicator(false, `Error: ${err.message}`); + } +} + +function updateLfsStatusIndicator(enabled, message) { + const indicator = document.getElementById('lfs-status-indicator'); + const text = document.getElementById('lfs-status-text'); + const dot = indicator ? indicator.querySelector('.status-dot') : null; + if (dot) { + dot.classList.remove('healthy', 'degraded', 'unavailable'); + dot.classList.add(enabled ? 'healthy' : 'unavailable'); + } + if (text) { + text.textContent = message || (enabled ? 'LFS Enabled' : 'LFS Disabled'); + } +} + +function updateLfsConsumerIndicator(connected, message) { + const indicator = document.getElementById('lfs-consumer-indicator'); + const text = document.getElementById('lfs-consumer-text'); + const dot = indicator ? indicator.querySelector('.status-dot') : null; + if (dot) { + dot.classList.remove('healthy', 'degraded', 'unavailable'); + dot.classList.add(connected ? 'healthy' : 'unavailable'); + } + if (text) { + text.textContent = message || (connected ? 'LFS connection: connected' : 'LFS connection: disconnected'); + } +} + +function renderLfsStatus(data) { + updateLfsStatusIndicator(data.enabled, data.enabled ? 'LFS Enabled' : 'LFS Disabled'); + if (data.consumer_status) { + const status = data.consumer_status; + const message = status.connected + ? `LFS connection: connected (last poll ${formatRelativeTime(status.last_poll_at)})` + : `LFS connection: disconnected${status.last_error ? `: ${status.last_error}` : ''}`; + updateLfsConsumerIndicator(Boolean(status.connected), message); + } else { + updateLfsConsumerIndicator(false, 'Tracker consumer unavailable'); + } + + const stats = data.stats || {}; + document.getElementById('lfs-total-objects').textContent = metricFormatter.format(stats.total_objects || 0); + document.getElementById('lfs-total-storage').textContent = formatBytes(stats.total_bytes || 0); + document.getElementById('lfs-uploads-24h').textContent = metricFormatter.format(stats.uploads_24h || 0); + document.getElementById('lfs-downloads-24h').textContent = metricFormatter.format(stats.downloads_24h || 0); + document.getElementById('lfs-errors-24h').textContent = metricFormatter.format(stats.errors_24h || 0); + document.getElementById('lfs-orphans-count').textContent = metricFormatter.format(stats.orphans_pending || 0); + + document.getElementById('lfs-s3-bucket').textContent = data.s3_bucket || '-'; + document.getElementById('lfs-tracker-topic').textContent = data.tracker_topic || '-'; + + // Update topic filter dropdown + const topicFilter = document.getElementById('lfs-objects-topic-filter'); + if (topicFilter && data.topics_with_lfs) { + lfsTopicsCache = data.topics_with_lfs; + topicFilter.innerHTML = ''; + data.topics_with_lfs.forEach(topic => { + const option = document.createElement('option'); + option.value = topic; + option.textContent = topic; + topicFilter.appendChild(option); + }); + } +} + +async function fetchLfsTopics() { + try { + const resp = await fetch('/ui/api/lfs/topics'); + if (!resp.ok) return; + const data = await resp.json(); + renderLfsTopics(data.topics || []); + } catch (err) { + console.error('Failed to fetch LFS topics:', err); + } +} + +function renderLfsTopics(topics) { + const grid = document.getElementById('lfs-topics-grid'); + if (!grid) return; + grid.innerHTML = ''; + + if (!topics.length) { + grid.innerHTML = '

No topics with LFS data yet.

'; + return; + } + + topics.forEach(topic => { + const card = document.createElement('div'); + card.className = 'lfs-topic-card'; + const statusDot = topic.has_lfs ? '' : ''; + card.innerHTML = ` +

${statusDot}${topic.name}

+
+
Objects${metricFormatter.format(topic.object_count || 0)}
+
Size${formatBytes(topic.total_bytes || 0)}
+
Uploads (24h)${topic.uploads_24h || 0}
+
Downloads (24h)${topic.downloads_24h || 0}
+
Errors (24h)${topic.errors_24h || 0}
+
Last Event${formatRelativeTime(topic.last_event)}
+
+ `; + grid.appendChild(card); + }); +} + +async function fetchLfsObjects(topic = '') { + const status = document.getElementById('lfs-objects-status'); + if (status) status.textContent = 'Loading objects...'; + + try { + const url = topic ? `/ui/api/lfs/objects?topic=${encodeURIComponent(topic)}&limit=50` : '/ui/api/lfs/objects?limit=50'; + const resp = await fetch(url); + if (!resp.ok) { + if (status) status.textContent = 'Failed to load objects'; + return; + } + const data = await resp.json(); + renderLfsObjects(data.objects || [], data.total_count || 0); + } catch (err) { + if (status) status.textContent = `Error: ${err.message}`; + } +} + +function renderLfsObjects(objects, totalCount) { + const tbody = document.querySelector('#lfs-objects-table tbody'); + const status = document.getElementById('lfs-objects-status'); + + if (!tbody) return; + tbody.innerHTML = ''; + + if (!objects.length) { + tbody.innerHTML = 'No objects found'; + if (status) status.textContent = 'No objects found'; + return; + } + + objects.forEach(obj => { + const row = document.createElement('tr'); + const shortKey = obj.s3_key ? obj.s3_key.split('/').pop() : '-'; + row.innerHTML = ` + ${shortKey} + ${obj.topic || '-'} + ${formatBytes(obj.size || 0)} + ${formatRelativeTime(obj.created_at)} + + + + `; + tbody.appendChild(row); + }); + + if (status) status.textContent = `Showing ${objects.length} of ${totalCount} objects`; + + // Add click handlers for presign buttons + tbody.querySelectorAll('.lfs-presign-btn').forEach(btn => { + btn.addEventListener('click', async () => { + const key = btn.dataset.key; + if (!key) return; + try { + const resp = await fetch('/ui/api/lfs/s3/presign', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ s3_key: key, ttl_seconds: 300 }) + }); + if (!resp.ok) { + alert('Failed to generate download URL'); + return; + } + const data = await resp.json(); + if (data.url) { + window.open(data.url, '_blank'); + } + } catch (err) { + alert(`Error: ${err.message}`); + } + }); + }); +} + +function startLfsEventsStream(filter = '') { + const status = document.getElementById('lfs-events-status'); + const list = document.getElementById('lfs-events-list'); + + if (lfsEventsSource) { + lfsEventsSource.close(); + } + + const url = filter ? `/ui/api/lfs/events?types=${encodeURIComponent(filter)}` : '/ui/api/lfs/events'; + lfsEventsSource = new EventSource(url); + + lfsEventsSource.onopen = () => { + if (status) status.textContent = 'Connected to event stream'; + }; + + lfsEventsSource.onmessage = event => { + try { + const data = JSON.parse(event.data); + addLfsEvent(data); + } catch (err) { + // Ignore parse errors for keepalive messages + } + }; + + lfsEventsSource.onerror = () => { + if (status) status.textContent = 'Event stream disconnected'; + lfsEventsSource.close(); + }; +} + +function addLfsEvent(event) { + const list = document.getElementById('lfs-events-list'); + if (!list) return; + + const item = document.createElement('div'); + item.className = `lfs-event-item lfs-event-${event.event_type || 'unknown'}`; + + const typeLabels = { + 'upload_started': 'Upload Started', + 'upload_completed': 'Upload Complete', + 'upload_failed': 'Upload Failed', + 'download_requested': 'Download Requested', + 'download_completed': 'Download Complete', + 'orphan_detected': 'Orphan Detected' + }; + + const label = typeLabels[event.event_type] || event.event_type; + const time = event.timestamp ? new Date(event.timestamp).toLocaleTimeString() : ''; + + item.innerHTML = ` + ${label} + ${event.topic || '-'} + ${event.s3_key ? event.s3_key.split('/').pop() : '-'} + ${time} + `; + + // Keep only last 100 events + if (list.children.length >= 100) { + list.removeChild(list.lastChild); + } + + list.insertBefore(item, list.firstChild); +} + +function clearLfsEvents() { + const list = document.getElementById('lfs-events-list'); + if (list) list.innerHTML = ''; +} + +async function fetchLfsOrphans() { + try { + const resp = await fetch('/ui/api/lfs/orphans'); + if (!resp.ok) return; + const data = await resp.json(); + renderLfsOrphans(data.orphans || [], data.total_size || 0); + } catch (err) { + console.error('Failed to fetch LFS orphans:', err); + } +} + +function renderLfsOrphans(orphans, totalSize) { + const summary = document.getElementById('lfs-orphans-summary'); + const tbody = document.querySelector('#lfs-orphans-table tbody'); + + if (summary) { + if (orphans.length === 0) { + summary.innerHTML = '

No orphaned objects detected.

'; + } else { + summary.innerHTML = `

${orphans.length} orphaned objects (${formatBytes(totalSize)} total)

`; + } + } + + if (!tbody) return; + tbody.innerHTML = ''; + + if (!orphans.length) { + tbody.innerHTML = 'No orphans detected'; + return; + } + + orphans.forEach(orphan => { + const row = document.createElement('tr'); + const shortKey = orphan.s3_key ? orphan.s3_key.split('/').pop() : '-'; + row.innerHTML = ` + ${shortKey} + ${orphan.topic || '-'} + ${formatRelativeTime(orphan.detected_at)} + ${orphan.reason || '-'} + `; + tbody.appendChild(row); + }); +} + +async function fetchLfsS3Browse(prefix = '') { + const status = document.getElementById('lfs-s3-status'); + const pathEl = document.getElementById('lfs-s3-path'); + + lfsCurrentPath = prefix; + if (pathEl) pathEl.textContent = '/' + prefix; + if (status) status.textContent = 'Loading...'; + + try { + const url = `/ui/api/lfs/s3/browse?prefix=${encodeURIComponent(prefix)}&delimiter=/&max_keys=100`; + const resp = await fetch(url); + if (!resp.ok) { + if (status) status.textContent = 'Failed to browse S3'; + return; + } + const data = await resp.json(); + renderLfsS3Browser(data.common_prefixes || [], data.objects || [], data.is_truncated); + } catch (err) { + if (status) status.textContent = `Error: ${err.message}`; + } +} + +function renderLfsS3Browser(prefixes, objects, isTruncated) { + const tbody = document.querySelector('#lfs-s3-table tbody'); + const status = document.getElementById('lfs-s3-status'); + + if (!tbody) return; + tbody.innerHTML = ''; + + // Render directories (prefixes) + prefixes.forEach(prefix => { + const row = document.createElement('tr'); + row.className = 'lfs-s3-dir'; + const name = prefix.replace(lfsCurrentPath, '').replace(/\/$/, ''); + row.innerHTML = ` + 📁 ${name} + - + - + - + `; + row.addEventListener('click', () => fetchLfsS3Browse(prefix)); + tbody.appendChild(row); + }); + + // Render files (objects) + objects.forEach(obj => { + const row = document.createElement('tr'); + const name = obj.key ? obj.key.replace(lfsCurrentPath, '') : '-'; + row.innerHTML = ` + 📄 ${name} + ${formatBytes(obj.size || 0)} + ${formatRelativeTime(obj.last_modified)} + + + + `; + tbody.appendChild(row); + }); + + if (prefixes.length === 0 && objects.length === 0) { + tbody.innerHTML = 'No objects at this path'; + } + + if (status) { + const count = prefixes.length + objects.length; + status.textContent = `${count} items${isTruncated ? ' (truncated)' : ''}`; + } + + // Add click handlers for download buttons + tbody.querySelectorAll('.lfs-s3-download-btn').forEach(btn => { + btn.addEventListener('click', async (e) => { + e.stopPropagation(); + const key = btn.dataset.key; + if (!key) return; + try { + const resp = await fetch('/ui/api/lfs/s3/presign', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ s3_key: key, ttl_seconds: 300 }) + }); + if (!resp.ok) { + alert('Failed to generate download URL'); + return; + } + const data = await resp.json(); + if (data.url) { + window.open(data.url, '_blank'); + } + } catch (err) { + alert(`Error: ${err.message}`); + } + }); + }); +} + +function navigateLfsS3Up() { + if (!lfsCurrentPath) return; + const parts = lfsCurrentPath.replace(/\/$/, '').split('/'); + parts.pop(); + const newPath = parts.length > 0 ? parts.join('/') + '/' : ''; + fetchLfsS3Browse(newPath); +} + +function initLfsTab() { + // Topic filter change + const topicFilter = document.getElementById('lfs-objects-topic-filter'); + if (topicFilter) { + topicFilter.addEventListener('change', () => { + fetchLfsObjects(topicFilter.value); + }); + } + + // Objects refresh button + const objectsRefresh = document.getElementById('lfs-objects-refresh'); + if (objectsRefresh) { + objectsRefresh.addEventListener('click', () => { + const filter = document.getElementById('lfs-objects-topic-filter'); + fetchLfsObjects(filter ? filter.value : ''); + }); + } + + // Events filter change + const eventsFilter = document.getElementById('lfs-events-filter'); + if (eventsFilter) { + eventsFilter.addEventListener('change', () => { + clearLfsEvents(); + startLfsEventsStream(eventsFilter.value); + }); + } + + // Events clear button + const eventsClear = document.getElementById('lfs-events-clear'); + if (eventsClear) { + eventsClear.addEventListener('click', clearLfsEvents); + } + + // S3 browser navigation + const s3Up = document.getElementById('lfs-s3-up'); + if (s3Up) { + s3Up.addEventListener('click', navigateLfsS3Up); + } + + const s3Refresh = document.getElementById('lfs-s3-refresh'); + if (s3Refresh) { + s3Refresh.addEventListener('click', () => fetchLfsS3Browse(lfsCurrentPath)); + } +} + +function startLfsConsole() { + fetchLfsStatus(); + fetchLfsTopics(); + fetchLfsObjects(); + fetchLfsOrphans(); + fetchLfsS3Browse(''); + startLfsEventsStream(); +} + +function stopLfsConsole() { + if (lfsEventsSource) { + lfsEventsSource.close(); + lfsEventsSource = null; + } +} + initAuth(); +initLfsTab(); document.querySelectorAll('.tab-button').forEach(button => { button.addEventListener('click', () => { @@ -533,5 +1030,12 @@ document.querySelectorAll('.tab-button').forEach(button => { document.querySelectorAll('.tab-panel').forEach(panel => { panel.classList.toggle('active', panel.id === `tab-${target}`); }); + + // Start/stop LFS streams based on tab visibility + if (target === 'lfs') { + startLfsConsole(); + } else { + stopLfsConsole(); + } }); }); diff --git a/ui/public/index.html b/ui/public/index.html index 0279be0e..731f95a8 100644 --- a/ui/public/index.html +++ b/ui/public/index.html @@ -60,6 +60,7 @@

KafScale Operations Console

+
@@ -145,6 +146,144 @@

S3 Error Rate

+
+
+

LFS Overview

+
+ + Checking status... +
+
+ + LFS connection status… +
+
+
+

Total Objects

+ 0 +
+
+

Total Storage

+ 0 B +
+
+

Uploads (24h)

+ 0 +
+
+

Downloads (24h)

+ 0 +
+
+

Errors (24h)

+ 0 +
+
+

Orphans

+ 0 +
+
+
+

S3 Bucket: -

+

Tracker Topic: -

+
+
+ +
+

Topics with LFS

+
+
+ +
+

Recent Objects

+
+ + +
+
+ + + + + + + + + + + +
S3 KeyTopicSizeCreatedActions
+
+

Loading objects...

+
+ +
+

Live Events

+
+ + +
+
+

Connecting to event stream...

+
+ +
+

S3 Browser

+
+
+ Path: + / +
+ + +
+
+ + + + + + + + + + +
NameSizeModifiedActions
+
+

Enter a path to browse S3...

+
+ +
+

Orphaned Objects

+
+

No orphaned objects detected.

+
+
+ + + + + + + + + + +
S3 KeyTopicDetectedReason
+
+
+
+

Admin & Control Plane

diff --git a/ui/public/style.css b/ui/public/style.css index 9de20813..546d2de6 100644 --- a/ui/public/style.css +++ b/ui/public/style.css @@ -597,3 +597,346 @@ button { align-items: flex-start; } } + +/* LFS Dashboard Styles */ +#tab-lfs.active { + display: grid; + grid-template-columns: repeat(2, 1fr); + gap: 1.5rem; +} + +#tab-lfs .lfs-overview-section { + grid-column: 1 / -1; +} + +#tab-lfs .lfs-topics-section { + grid-column: 1 / -1; +} + +#tab-lfs .lfs-objects-section { + grid-column: 1; +} + +#tab-lfs .lfs-events-section { + grid-column: 2; +} + +#tab-lfs .lfs-s3-browser-section { + grid-column: 1; +} + +#tab-lfs .lfs-orphans-section { + grid-column: 2; +} + +.lfs-status-indicator { + display: flex; + align-items: center; + gap: 0.5rem; + margin-bottom: 1rem; + padding: 0.5rem 0.75rem; + background: rgba(8, 8, 8, 0.6); + border-radius: 8px; + border: 1px solid rgba(255, 255, 255, 0.08); + width: fit-content; +} + +.lfs-config-info { + margin-top: 1rem; + padding: 0.75rem; + background: rgba(8, 8, 8, 0.5); + border-radius: 8px; + border: 1px solid rgba(255, 255, 255, 0.06); +} + +.lfs-config-info p { + margin: 0.3rem 0; + font-size: 0.85rem; +} + +.lfs-config-info span { + color: var(--fog); +} + +/* LFS Topics Grid */ +.lfs-topics-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(220px, 1fr)); + gap: 1rem; +} + +.lfs-topic-card { + padding: 1rem; + border-radius: 10px; + border: 1px solid rgba(255, 255, 255, 0.08); + background: rgba(12, 12, 12, 0.9); +} + +.lfs-topic-card h4 { + margin: 0 0 0.75rem 0; + font-size: 0.95rem; + color: var(--mint); + word-break: break-word; + display: flex; + align-items: center; + gap: 0.5rem; +} + +.lfs-topic-indicator { + width: 10px; + height: 10px; + border-radius: 50%; + background: var(--success); + display: inline-block; + flex: 0 0 10px; +} + +.lfs-topic-indicator.muted { + background: rgba(255, 255, 255, 0.2); +} + +.lfs-topic-stats { + display: grid; + grid-template-columns: repeat(2, 1fr); + gap: 0.5rem; +} + +.lfs-topic-stats div { + display: flex; + flex-direction: column; + gap: 0.2rem; +} + +.lfs-topic-stats span { + font-size: 0.65rem; + text-transform: uppercase; + letter-spacing: 0.08em; + color: var(--fog); +} + +.lfs-topic-stats strong { + font-size: 0.85rem; +} + +/* LFS Tables */ +.lfs-table-wrap { + overflow-x: auto; + border-radius: 10px; + border: 1px solid rgba(255, 255, 255, 0.08); + margin-top: 0.75rem; + max-height: 350px; + overflow-y: auto; +} + +.lfs-table { + width: 100%; + border-collapse: collapse; + font-size: 0.8rem; +} + +.lfs-table th, +.lfs-table td { + padding: 0.55rem 0.65rem; + text-align: left; + border-bottom: 1px solid rgba(255, 255, 255, 0.06); +} + +.lfs-table th { + font-size: 0.65rem; + text-transform: uppercase; + letter-spacing: 0.08em; + color: var(--fog); + background: rgba(10, 10, 10, 0.8); + position: sticky; + top: 0; +} + +.lfs-table tbody tr:hover { + background: rgba(255, 255, 255, 0.03); +} + +.lfs-table td:first-child { + max-width: 200px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.lfs-table .ghost-button { + padding: 0.25rem 0.5rem; + font-size: 0.7rem; +} + +/* LFS Toolbars */ +.lfs-objects-toolbar, +.lfs-events-toolbar, +.lfs-s3-toolbar { + display: flex; + align-items: center; + gap: 0.75rem; + flex-wrap: wrap; +} + +.lfs-objects-toolbar select, +.lfs-events-toolbar select { + background: rgba(15, 15, 15, 0.9); + color: #f8fafc; + border: 1px solid rgba(255, 255, 255, 0.12); + border-radius: 8px; + padding: 0.4rem 0.6rem; + font-size: 0.8rem; +} + +.lfs-events-toolbar label { + display: flex; + align-items: center; + gap: 0.4rem; + font-size: 0.8rem; + color: var(--fog); +} + +/* LFS Events List */ +.lfs-events-list { + max-height: 300px; + overflow-y: auto; + margin-top: 0.75rem; + border: 1px solid rgba(255, 255, 255, 0.08); + border-radius: 10px; + background: rgba(8, 8, 8, 0.6); +} + +.lfs-event-item { + display: grid; + grid-template-columns: 120px 1fr 1fr auto; + gap: 0.5rem; + padding: 0.5rem 0.75rem; + border-bottom: 1px solid rgba(255, 255, 255, 0.04); + font-size: 0.75rem; + align-items: center; +} + +.lfs-event-item:last-child { + border-bottom: none; +} + +.lfs-event-type { + font-weight: 600; + padding: 0.15rem 0.4rem; + border-radius: 4px; + font-size: 0.65rem; + text-align: center; +} + +.lfs-event-upload_started .lfs-event-type, +.lfs-event-upload_completed .lfs-event-type { + background: rgba(22, 163, 74, 0.2); + color: #4ade80; +} + +.lfs-event-upload_failed .lfs-event-type, +.lfs-event-orphan_detected .lfs-event-type { + background: rgba(220, 38, 38, 0.2); + color: #f87171; +} + +.lfs-event-download_requested .lfs-event-type, +.lfs-event-download_completed .lfs-event-type { + background: rgba(59, 130, 246, 0.2); + color: #93c5fd; +} + +.lfs-event-topic { + color: var(--mint); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.lfs-event-detail { + color: var(--fog); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.lfs-event-time { + color: var(--fog); + font-size: 0.7rem; +} + +/* LFS S3 Browser */ +.lfs-s3-breadcrumb { + flex: 1; + display: flex; + align-items: center; + gap: 0.4rem; + font-size: 0.85rem; + overflow: hidden; +} + +.lfs-s3-breadcrumb span:first-child { + color: var(--fog); +} + +#lfs-s3-path { + color: var(--mint); + word-break: break-all; +} + +.lfs-s3-dir { + cursor: pointer; +} + +.lfs-s3-dir:hover { + background: rgba(24, 198, 172, 0.08) !important; +} + +.lfs-s3-icon { + margin-right: 0.4rem; +} + +/* LFS Orphans */ +.lfs-orphans-summary { + padding: 0.75rem; + border-radius: 8px; + margin-bottom: 0.75rem; +} + +.lfs-orphans-ok { + margin: 0; + color: #4ade80; +} + +.lfs-orphans-warning { + margin: 0; + color: #f87171; + background: rgba(220, 38, 38, 0.1); + padding: 0.5rem 0.75rem; + border-radius: 6px; + border: 1px solid rgba(220, 38, 38, 0.3); +} + +/* Responsive adjustments for LFS tab */ +@media (max-width: 1200px) { + #tab-lfs.active { + grid-template-columns: 1fr; + } + + #tab-lfs .lfs-objects-section, + #tab-lfs .lfs-events-section, + #tab-lfs .lfs-s3-browser-section, + #tab-lfs .lfs-orphans-section { + grid-column: 1; + } +} + +@media (max-width: 600px) { + .lfs-event-item { + grid-template-columns: 1fr; + gap: 0.25rem; + } + + .lfs-topics-grid { + grid-template-columns: 1fr; + } +} From ebdbdb84033fa3e09e306aa8e00d2b776bfd4640 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirko=20K=C3=A4mpf?= Date: Fri, 6 Mar 2026 09:49:05 +0100 Subject: [PATCH 2/6] chore: strip demo/deployment targets from Makefile, add .claude/ to .gitignore Enforce strict separation: core platform PR must not contain demo scripts, deployment guides, staging infra, or spring-boot demo targets. Only core LFS additions remain (build-sdk, docker-build-lfs-proxy, test-lfs-proxy-broker). Co-Authored-By: Claude Opus 4.6 --- .gitignore | 1 + Makefile | 332 +---------------------------------------------------- 2 files changed, 4 insertions(+), 329 deletions(-) diff --git a/.gitignore b/.gitignore index 954b01ec..c6b9a19f 100644 --- a/.gitignore +++ b/.gitignore @@ -58,6 +58,7 @@ spark-warehouse/ *.swp *.swo .DS_Store +.claude/ # Addon processor artifacts addons/processors/**/bin/ diff --git a/Makefile b/Makefile index d8575624..30bd44f3 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -.PHONY: proto build test tidy lint generate build-sdk docker-build docker-build-e2e-client docker-build-etcd-tools docker-build-lfs-proxy docker-clean ensure-minio start-minio stop-containers release-broker-ports test-produce-consume test-produce-consume-debug test-consumer-group test-ops-api test-mcp test-multi-segment-durability test-lfs-proxy-broker test-full test-operator test-acl demo demo-long demo-platform demo-platform-bootstrap iceberg-demo kafsql-demo lfs-demo lfs-demo-medical lfs-demo-video lfs-demo-industrial platform-demo lfs-demo-idoc act-runnable demo-guide-pf demo-guide-pf-clean help clean-kind-all +.PHONY: proto build test tidy lint generate build-sdk docker-build docker-build-e2e-client docker-build-etcd-tools docker-build-lfs-proxy docker-clean ensure-minio start-minio stop-containers release-broker-ports test-produce-consume test-produce-consume-debug test-consumer-group test-ops-api test-mcp test-multi-segment-durability test-lfs-proxy-broker test-full test-operator test-acl demo demo-platform demo-platform-bootstrap iceberg-demo kafsql-demo platform-demo help clean-kind-all REGISTRY ?= ghcr.io/kafscale STAMP_DIR ?= .build @@ -24,18 +24,9 @@ PROXY_IMAGE ?= $(REGISTRY)/kafscale-proxy:dev LFS_PROXY_IMAGE ?= $(REGISTRY)/kafscale-lfs-proxy:dev SQL_PROCESSOR_IMAGE ?= $(REGISTRY)/kafscale-sql-processor:dev MCP_IMAGE ?= $(REGISTRY)/kafscale-mcp:dev -SPRING_DEMO_IMAGE ?= $(REGISTRY)/kafscale-spring-demo:dev - -OPERATOR_REPO := $(shell echo $(OPERATOR_IMAGE) | sed 's/:[^:]*$$//') -OPERATOR_TAG := $(shell echo $(OPERATOR_IMAGE) | sed 's/.*://') -CONSOLE_REPO := $(shell echo $(CONSOLE_IMAGE) | sed 's/:[^:]*$$//') -CONSOLE_TAG := $(shell echo $(CONSOLE_IMAGE) | sed 's/.*://') -SPRING_DEMO_REPO := $(shell echo $(SPRING_DEMO_IMAGE) | sed 's/:[^:]*$$//') -SPRING_DEMO_TAG := $(shell echo $(SPRING_DEMO_IMAGE) | sed 's/.*://') E2E_CLIENT_IMAGE ?= $(REGISTRY)/kafscale-e2e-client:dev ETCD_TOOLS_IMAGE ?= $(REGISTRY)/kafscale-etcd-tools:dev ICEBERG_PROCESSOR_IMAGE ?= iceberg-processor:dev -E72_BROWSER_DEMO_IMAGE ?= $(REGISTRY)/kafscale-e72-browser-demo:dev ICEBERG_REST_IMAGE ?= tabulario/iceberg-rest:1.6.0 ICEBERG_REST_PORT ?= 8181 ICEBERG_WAREHOUSE_BUCKET ?= kafscale-snapshots @@ -56,11 +47,6 @@ KAFSQL_DEMO_TOPIC ?= kafsql-demo-topic KAFSQL_DEMO_RECORDS ?= 200 KAFSQL_DEMO_TIMEOUT_SEC ?= 120 KAFSQL_PROCESSOR_RELEASE ?= kafsql-processor-dev -LFS_DEMO_NAMESPACE ?= $(KAFSCALE_DEMO_NAMESPACE) -LFS_DEMO_TOPIC ?= lfs-demo-topic -LFS_DEMO_BLOB_SIZE ?= 524288 -LFS_DEMO_BLOB_COUNT ?= 5 -LFS_DEMO_TIMEOUT_SEC ?= 120 MINIO_CONTAINER ?= kafscale-minio MINIO_IMAGE ?= quay.io/minio/minio:RELEASE.2024-09-22T00-33-43Z MINIO_PORT ?= 9000 @@ -119,7 +105,7 @@ test: ## Run unit tests + vet + race test-acl: ## Run ACL e2e test (requires KAFSCALE_E2E=1) KAFSCALE_E2E=1 go test -tags=e2e ./test/e2e -run TestACLsE2E -docker-build: docker-build-broker docker-build-operator docker-build-console docker-build-proxy docker-build-mcp docker-build-spring-demo docker-build-e2e-client docker-build-etcd-tools docker-build-sql-processor ## Build all container images +docker-build: docker-build-broker docker-build-operator docker-build-console docker-build-proxy docker-build-mcp docker-build-e2e-client docker-build-etcd-tools docker-build-sql-processor ## Build all container images @mkdir -p $(STAMP_DIR) DOCKER_BUILD_CMD := $(shell \ @@ -198,23 +184,10 @@ $(STAMP_DIR)/sql-processor.image: $(SQL_PROCESSOR_SRCS) $(DOCKER_BUILD_CMD) $(DOCKER_BUILD_ARGS) -t $(SQL_PROCESSOR_IMAGE) -f addons/processors/sql-processor/Dockerfile addons/processors/sql-processor @touch $(STAMP_DIR)/sql-processor.image -SPRING_DEMO_SRCS := $(shell find examples/E20_spring-boot-kafscale-demo -type f) -docker-build-spring-demo: $(STAMP_DIR)/spring-demo.image ## Build Spring Boot demo container image -$(STAMP_DIR)/spring-demo.image: $(SPRING_DEMO_SRCS) - @mkdir -p $(STAMP_DIR) - $(DOCKER_BUILD_CMD) -t $(SPRING_DEMO_IMAGE) examples/E20_spring-boot-kafscale-demo - @touch $(STAMP_DIR)/spring-demo.image - -docker-build-e72-browser-demo: ## Build E72 browser demo container image - $(DOCKER_BUILD_CMD) $(DOCKER_BUILD_ARGS) -t $(E72_BROWSER_DEMO_IMAGE) -f examples/E72_browser-lfs-sdk-demo/Dockerfile examples/E72_browser-lfs-sdk-demo - -docker-build-iceberg-processor: ## Build Iceberg processor container image - $(MAKE) -C addons/processors/iceberg-processor docker-build IMAGE=$(ICEBERG_PROCESSOR_IMAGE) DOCKER_BUILD_ARGS="$(DOCKER_BUILD_ARGS) --build-arg GO_BUILD_FLAGS='$(ICEBERG_PROCESSOR_BUILD_FLAGS)'" - docker-clean: ## Remove local dev images and prune dangling Docker data @echo "WARNING: this resets Docker build caches (buildx/builder) and removes local images." @printf "Type YES to continue: "; read ans; [ "$$ans" = "YES" ] || { echo "aborted"; exit 1; } - -docker image rm -f $(BROKER_IMAGE) $(OPERATOR_IMAGE) $(CONSOLE_IMAGE) $(PROXY_IMAGE) $(MCP_IMAGE) $(E2E_CLIENT_IMAGE) $(ETCD_TOOLS_IMAGE) $(SQL_PROCESSOR_IMAGE) $(SPRING_DEMO_IMAGE) + -docker image rm -f $(BROKER_IMAGE) $(OPERATOR_IMAGE) $(CONSOLE_IMAGE) $(PROXY_IMAGE) $(MCP_IMAGE) $(E2E_CLIENT_IMAGE) $(ETCD_TOOLS_IMAGE) $(SQL_PROCESSOR_IMAGE) -rm -rf $(STAMP_DIR) docker system prune --force --volumes docker buildx prune --force @@ -322,7 +295,6 @@ test-multi-segment-durability: release-broker-ports ensure-minio ## Run multi-se KAFSCALE_E2E=1 \ go test -tags=e2e ./test/e2e -run TestMultiSegmentRestartDurability -v - test-lfs-proxy-broker: ## Run LFS proxy e2e with real broker (embedded etcd + in-memory S3). KAFSCALE_E2E=1 \ go test -tags=e2e ./test/e2e -run TestLfsProxyBrokerE2E -v @@ -593,100 +565,6 @@ kafsql-demo: demo-platform-bootstrap ## Run the KAFSQL processor e2e demo on kin MINIO_ROOT_PASSWORD=$(MINIO_ROOT_PASSWORD) \ bash scripts/kafsql-demo.sh -lfs-demo: KAFSCALE_DEMO_PROXY=0 -lfs-demo: KAFSCALE_DEMO_CONSOLE=1 -lfs-demo: KAFSCALE_DEMO_BROKER_REPLICAS=1 -lfs-demo: demo-platform-bootstrap ## Run the LFS proxy demo on kind. - $(MAKE) docker-build-lfs-proxy - KUBECONFIG=$(KAFSCALE_KIND_KUBECONFIG) \ - KAFSCALE_DEMO_NAMESPACE=$(KAFSCALE_DEMO_NAMESPACE) \ - KAFSCALE_KIND_CLUSTER=$(KAFSCALE_KIND_CLUSTER) \ - LFS_DEMO_NAMESPACE=$(LFS_DEMO_NAMESPACE) \ - LFS_DEMO_TOPIC=$(LFS_DEMO_TOPIC) \ - LFS_DEMO_BLOB_SIZE=$(LFS_DEMO_BLOB_SIZE) \ - LFS_DEMO_BLOB_COUNT=$(LFS_DEMO_BLOB_COUNT) \ - LFS_DEMO_TIMEOUT_SEC=$(LFS_DEMO_TIMEOUT_SEC) \ - LFS_PROXY_IMAGE=$(LFS_PROXY_IMAGE) \ - E2E_CLIENT_IMAGE=$(E2E_CLIENT_IMAGE) \ - MINIO_BUCKET=$(MINIO_BUCKET) \ - MINIO_REGION=$(MINIO_REGION) \ - MINIO_ROOT_USER=$(MINIO_ROOT_USER) \ - MINIO_ROOT_PASSWORD=$(MINIO_ROOT_PASSWORD) \ - bash scripts/lfs-demo.sh - -lfs-demo-medical: KAFSCALE_DEMO_PROXY=0 -lfs-demo-medical: KAFSCALE_DEMO_CONSOLE=0 -lfs-demo-medical: KAFSCALE_DEMO_BROKER_REPLICAS=1 -lfs-demo-medical: demo-platform-bootstrap ## Run the Medical LFS demo (E60) - healthcare imaging with content explosion. - $(MAKE) docker-build-lfs-proxy - KUBECONFIG=$(KAFSCALE_KIND_KUBECONFIG) \ - KAFSCALE_KIND_CLUSTER=$(KAFSCALE_KIND_CLUSTER) \ - LFS_PROXY_IMAGE=$(LFS_PROXY_IMAGE) \ - E2E_CLIENT_IMAGE=$(E2E_CLIENT_IMAGE) \ - MINIO_BUCKET=$(MINIO_BUCKET) \ - MINIO_ROOT_USER=$(MINIO_ROOT_USER) \ - MINIO_ROOT_PASSWORD=$(MINIO_ROOT_PASSWORD) \ - bash scripts/medical-lfs-demo.sh - -lfs-demo-video: KAFSCALE_DEMO_PROXY=0 -lfs-demo-video: KAFSCALE_DEMO_CONSOLE=0 -lfs-demo-video: KAFSCALE_DEMO_BROKER_REPLICAS=1 -lfs-demo-video: demo-platform-bootstrap ## Run the Video LFS demo (E61) - media streaming with content explosion. - $(MAKE) docker-build-lfs-proxy - KUBECONFIG=$(KAFSCALE_KIND_KUBECONFIG) \ - KAFSCALE_KIND_CLUSTER=$(KAFSCALE_KIND_CLUSTER) \ - LFS_PROXY_IMAGE=$(LFS_PROXY_IMAGE) \ - E2E_CLIENT_IMAGE=$(E2E_CLIENT_IMAGE) \ - MINIO_BUCKET=$(MINIO_BUCKET) \ - MINIO_ROOT_USER=$(MINIO_ROOT_USER) \ - MINIO_ROOT_PASSWORD=$(MINIO_ROOT_PASSWORD) \ - bash scripts/video-lfs-demo.sh - -lfs-demo-industrial: KAFSCALE_DEMO_PROXY=0 -lfs-demo-industrial: KAFSCALE_DEMO_CONSOLE=0 -lfs-demo-industrial: KAFSCALE_DEMO_BROKER_REPLICAS=1 -lfs-demo-industrial: demo-platform-bootstrap ## Run the Industrial LFS demo (E62) - mixed telemetry + images. - $(MAKE) docker-build-lfs-proxy - KUBECONFIG=$(KAFSCALE_KIND_KUBECONFIG) \ - KAFSCALE_KIND_CLUSTER=$(KAFSCALE_KIND_CLUSTER) \ - LFS_PROXY_IMAGE=$(LFS_PROXY_IMAGE) \ - E2E_CLIENT_IMAGE=$(E2E_CLIENT_IMAGE) \ - MINIO_BUCKET=$(MINIO_BUCKET) \ - MINIO_ROOT_USER=$(MINIO_ROOT_USER) \ - MINIO_ROOT_PASSWORD=$(MINIO_ROOT_PASSWORD) \ - bash scripts/industrial-lfs-demo.sh - -e72-browser-demo: ## Run the E72 Browser LFS SDK demo (local, requires port-forward). - @echo "=== E72 Browser LFS SDK Demo (Local) ===" - @echo "Prerequisites: LFS proxy must be port-forwarded to localhost:8080" - @echo " kubectl -n kafscale-demo port-forward svc/lfs-proxy 8080:8080" - @echo "" - cd examples/E72_browser-lfs-sdk-demo && $(MAKE) test - -E72_PROXY_LOCAL_PORT ?= 8080 -E72_MINIO_LOCAL_PORT ?= 9000 -E72_S3_PUBLIC_ENDPOINT ?= http://localhost:$(E72_MINIO_LOCAL_PORT) - -e72-browser-demo-test: ## Rebuild/redeploy LFS proxy, refresh demo, port-forward, and open the SPA. - @echo "=== E72 Browser LFS SDK Demo (Rebuild + Test) ===" - $(MAKE) docker-build-lfs-proxy - kind load docker-image $(LFS_PROXY_IMAGE) --name $(KAFSCALE_KIND_CLUSTER) - kubectl -n $(KAFSCALE_DEMO_NAMESPACE) set env deployment/lfs-proxy KAFSCALE_LFS_PROXY_S3_PUBLIC_ENDPOINT=$(E72_S3_PUBLIC_ENDPOINT) - kubectl -n $(KAFSCALE_DEMO_NAMESPACE) rollout restart deployment/lfs-proxy - kubectl -n $(KAFSCALE_DEMO_NAMESPACE) rollout status deployment/lfs-proxy --timeout=60s - kubectl -n $(KAFSCALE_DEMO_NAMESPACE) apply -f examples/E72_browser-lfs-sdk-demo/k8s-deploy.yaml - kubectl -n $(KAFSCALE_DEMO_NAMESPACE) rollout restart deployment/e72-browser-demo - kubectl -n $(KAFSCALE_DEMO_NAMESPACE) rollout status deployment/e72-browser-demo --timeout=60s - @pkill -f "port-forward.*$(E72_PROXY_LOCAL_PORT)" 2>/dev/null || true - @pkill -f "port-forward.*$(E72_MINIO_LOCAL_PORT)" 2>/dev/null || true - @kubectl -n $(KAFSCALE_DEMO_NAMESPACE) port-forward svc/lfs-proxy $(E72_PROXY_LOCAL_PORT):8080 >/dev/null 2>&1 & - @kubectl -n $(KAFSCALE_DEMO_NAMESPACE) port-forward svc/minio $(E72_MINIO_LOCAL_PORT):9000 >/dev/null 2>&1 & - @sleep 2 - cd examples/E72_browser-lfs-sdk-demo && $(MAKE) test PORT=3000 - -e72-browser-demo-k8s: ## Run the E72 Browser LFS SDK demo inside the kind cluster. - bash scripts/e72-browser-demo.sh - platform-demo: demo-platform ## Alias for demo-platform. demo: release-broker-ports ensure-minio ## Launch the broker + console demo stack and open the UI (Ctrl-C to stop). @@ -723,215 +601,11 @@ demo-long: release-broker-ports ensure-minio ## Launch the broker + console demo KAFSCALE_S3_SECRET_KEY=$(MINIO_ROOT_PASSWORD) \ go test -count=1 -timeout 0 -tags=e2e ./test/e2e -run TestDemoStack -v -demo-bridge: release-broker-ports ensure-minio ## Launch the broker + console demo stack and open the UI (Ctrl-C to stop) + expose host for docker. - KAFSCALE_E2E=1 \ - KAFSCALE_E2E_DEMO=1 \ - KAFSCALE_E2E_OPEN_UI=1 \ - KAFSCALE_UI_USERNAME=kafscaleadmin \ - KAFSCALE_UI_PASSWORD=kafscale \ - KAFSCALE_CONSOLE_BROKER_METRICS_URL=http://127.0.0.1:39093/metrics \ - KAFSCALE_CONSOLE_OPERATOR_METRICS_URL=http://127.0.0.1:8080/metrics \ - KAFSCALE_S3_BUCKET=$(MINIO_BUCKET) \ - KAFSCALE_S3_REGION=$(MINIO_REGION) \ - KAFSCALE_S3_NAMESPACE=default \ - KAFSCALE_S3_ENDPOINT=http://127.0.0.1:$(MINIO_PORT) \ - KAFSCALE_S3_PATH_STYLE=true \ - KAFSCALE_S3_ACCESS_KEY=$(MINIO_ROOT_USER) \ - KAFSCALE_S3_SECRET_KEY=$(MINIO_ROOT_PASSWORD) \ - KAFSCALE_BROKERS_ADVERTISED_HOST=host.docker.internal \ - KAFSCALE_BROKERS_ADVERTISED_PORT=39092 \ - go test -count=1 -tags=e2e ./test/e2e -run TestDemoStack -v - -demo-guide-pf: docker-build ## Launch a full platform demo on kind. - @command -v docker >/dev/null 2>&1 || { echo "docker is required"; exit 1; } - @command -v kind >/dev/null 2>&1 || { echo "kind is required"; exit 1; } - @command -v kubectl >/dev/null 2>&1 || { echo "kubectl is required"; exit 1; } - @command -v helm >/dev/null 2>&1 || { echo "helm is required"; exit 1; } - - @kind delete cluster --name $(KAFSCALE_KIND_CLUSTER) >/dev/null 2>&1 || true - @kind create cluster --name $(KAFSCALE_KIND_CLUSTER) - - @kind load docker-image $(BROKER_IMAGE) --name $(KAFSCALE_KIND_CLUSTER) - @kind load docker-image $(OPERATOR_IMAGE) --name $(KAFSCALE_KIND_CLUSTER) - @kind load docker-image $(CONSOLE_IMAGE) --name $(KAFSCALE_KIND_CLUSTER) - @kind load docker-image $(SPRING_DEMO_IMAGE) --name $(KAFSCALE_KIND_CLUSTER) - - kubectl apply -f deploy/demo/namespace.yaml - kubectl apply -f deploy/demo/minio.yaml - - kubectl -n kafscale-demo rollout status deployment/minio --timeout=120s - - kubectl apply -f deploy/demo/s3-secret.yaml - - helm upgrade --install kafscale deploy/helm/kafscale \ - --namespace $(KAFSCALE_DEMO_NAMESPACE) \ - --create-namespace \ - --set operator.replicaCount=1 \ - --set operator.image.repository=$(OPERATOR_REPO) \ - --set operator.image.tag=$(OPERATOR_TAG) \ - --set operator.image.pullPolicy=IfNotPresent \ - --set console.image.repository=$(CONSOLE_REPO) \ - --set console.image.tag=$(CONSOLE_TAG) \ - --set console.auth.username=admin \ - --set console.auth.password=admin \ - --set operator.etcdEndpoints[0]= - - @echo "[CONSOLE_TAG] CONSOLE_TAG = $(CONSOLE_TAG)" - @echo "[CONSOLE_REPO ] CONSOLE_REPO = $(CONSOLE_REPO)" - @echo "[OPERATOR_REPO] OPERATOR_REPO = $(OPERATOR_REPO)" - @echo "[SPRING_DEMO_REPO] SPRING_DEMO_REPO = $(SPRING_DEMO_REPO)" - - @echo "[CONSOLE_REPO] CONSOLE_REPO =$(CONSOLE_REPO)" - @echo "[OPERATOR_REPO] OPERATOR_REPO =$(OPERATOR_REPO)" - - @echo "[IMAGENAME] BROKER_IMAGE. =$(BROKER_IMAGE)" - @echo "[IMAGENAME] OPERATOR_IMAGE =$(OPERATOR_IMAGE)" - @echo "[IMAGENAME] CONSOLE_IMAGE =$(CONSOLE_IMAGE)" - @echo "[IMAGENAME] SPRING_DEMO_IMAGE = $(SPRING_DEMO_IMAGE)" - - @echo "[CONSOLE_TAG] CONSOLE_TAG =$(CONSOLE_TAG)" - - @bash -c 'set -e; \ - OPERATOR_DEPLOY=$$(kubectl -n kafscale-demo get deployments \ - -l app.kubernetes.io/component=operator \ - -o jsonpath="{.items[0].metadata.name}"); \ - echo "Using operator deployment: $$OPERATOR_DEPLOY"; \ - kubectl -n kafscale-demo set env deployment/$$OPERATOR_DEPLOY \ - BROKER_IMAGE=$(BROKER_IMAGE) \ - KAFSCALE_OPERATOR_ETCD_ENDPOINTS= \ - KAFSCALE_OPERATOR_ETCD_SNAPSHOT_BUCKET=kafscale-snapshots \ - KAFSCALE_OPERATOR_ETCD_SNAPSHOT_CREATE_BUCKET=1 \ - KAFSCALE_OPERATOR_ETCD_SNAPSHOT_PROTECT_BUCKET=1 \ - KAFSCALE_OPERATOR_LEADER_KEY=kafscale-operator-leader \ - KAFSCALE_OPERATOR_ETCD_SNAPSHOT_S3_ENDPOINT=http://minio.kafscale-demo.svc.cluster.local:9000; \ - kubectl -n kafscale-demo rollout status deployment/$$OPERATOR_DEPLOY --timeout=120s; \ - kubectl apply -f deploy/demo/kafscale-cluster.yaml; \ - kubectl apply -f deploy/demo/kafscale-topics.yaml; \ - echo "Waiting for broker deployment to be created ..."; \ - while ! kubectl -n kafscale-demo get deployment kafscale-broker >/dev/null 2>&1; do sleep 1; done; \ - kubectl -n kafscale-demo wait --for=condition=available deployment/kafscale-broker --timeout=180s; \ - console_svc=$$(kubectl -n kafscale-demo get svc -l app.kubernetes.io/component=console -o jsonpath="{.items[0].metadata.name}"); \ - echo "Exposing Console at http://localhost:8080/ui"; \ - nohup kubectl -n kafscale-demo port-forward svc/$$console_svc 8080:80 >/tmp/kafscale-demo-console.log 2>&1 & \ - kubectl apply -f deploy/demo/spring-boot-app.yaml; \ - kubectl apply -f deploy/demo/flink-wordcount-app.yaml; \ - kubectl -n kafscale-demo wait --for=condition=available deployment/spring-demo-app --timeout=120s; \ - nohup kubectl -n kafscale-demo port-forward svc/spring-demo-app 8083:8083 >/tmp/kafscale-demo-spring.log 2>&1 & \ - nohup kubectl -n kafscale-demo port-forward svc/kafscale-broker 9093:9093 >/tmp/kafscale-demo-metrics.log 2>&1 & \ - nohup kubectl -n kafscale-demo port-forward svc/kafscale-broker 39092:9092 >/tmp/kafscale-demo-broker.log 2>&1 & \ - echo "Exposing SpringBootApp at http://localhost:8083"; \ - echo "Exposing Metrics at localhost:9093"; \ - echo "Services exposed in background. Logs at /tmp/kafscale-demo-*.log"' - -demo-guide-pf-app: docker-build - kubectl apply -f deploy/demo/spring-boot-app.yaml; - kubectl -n kafscale-demo wait --for=condition=available deployment/spring-demo-app --timeout=120s; - # Start Nginx Load Balancer - kubectl apply -f deploy/demo/nginx-lb.yaml; - kubectl -n kafscale-demo wait --for=condition=available deployment/nginx-lb --timeout=120s; - echo "Exposing SpringBootApp at http://localhost:8083"; - nohup kubectl -n kafscale-demo port-forward svc/spring-demo-app 8083:8083 >/tmp/kafscale-demo-spring.log 2>&1 & - echo "Exposing Kafka via Nginx LB at localhost:59092"; - nohup kubectl -n kafscale-demo port-forward svc/nginx-lb 59092:59092 >/tmp/kafscale-demo-nginx.log 2>&1 & - -demo-guide-pf-clean: ## Clean up the platform demo environment - @echo "Cleaning up demo-platform2..." - @pkill -f 'kubectl -n kafscale-demo port-forward' || true - @kind delete cluster --name $(KAFSCALE_KIND_CLUSTER) >/dev/null 2>&1 || true - @echo "Cleanup complete. \nKIND CLUSTER: [$(KAFSCALE_KIND_CLUSTER)] removed." - tidy: go mod tidy lint: golangci-lint run -ACT ?= act -ACT_PLATFORM ?= linux/amd64 -ACT_FLAGS ?= --container-architecture $(ACT_PLATFORM) -ACT_IMAGE ?= local/act-runner:latest -STAGE_REGISTRY ?= 192.168.0.131:5100 -STAGE_TAG ?= stage -STAGE_PLATFORMS ?= linux/amd64,linux/arm64 -STAGE_NO_CACHE ?= 1 -STAGE_SOURCE_REGISTRY ?= ghcr.io/kafscale -STAGE_SOURCE_TAG ?= dev -STAGE_IMAGES ?= kafscale-broker kafscale-lfs-proxy kafscale-operator kafscale-console \ - kafscale-etcd-tools kafscale-iceberg-processor kafscale-sql-processor \ - kafscale-e72-browser-demo - -act-runnable: ## Run runnable GitHub Actions locally (ci.yml, docker.yml) - $(ACT) -W .github/workflows/ci.yml $(ACT_FLAGS) - $(ACT) -W .github/workflows/docker.yml $(ACT_FLAGS) - -act-image: ## Build local act runner image. - docker build -t $(ACT_IMAGE) .devcontainer/act-runner - -stage-release: ## Push stage images to local registry (local buildx). - STAGE_REGISTRY=$(STAGE_REGISTRY) STAGE_TAG=$(STAGE_TAG) STAGE_PLATFORMS=$(STAGE_PLATFORMS) STAGE_NO_CACHE=$(STAGE_NO_CACHE) \ - bash scripts/stage-release-local.sh - -stage-release-push: docker-build docker-build-lfs-proxy docker-build-iceberg-processor docker-build-e72-browser-demo ## Retag and push locally built images to STAGE_REGISTRY. - @set -e; \ - for img in $(STAGE_IMAGES); do \ - dst="$(STAGE_REGISTRY)/kafscale/$${img}:$(STAGE_TAG)"; \ - found=0; \ - for src in \ - "$(STAGE_SOURCE_REGISTRY)/$${img}:$(STAGE_SOURCE_TAG)" \ - "$${img}:$(STAGE_SOURCE_TAG)" \ - "$$(case $$img in \ - kafscale-broker) echo $(BROKER_IMAGE) ;; \ - kafscale-operator) echo $(OPERATOR_IMAGE) ;; \ - kafscale-console) echo $(CONSOLE_IMAGE) ;; \ - kafscale-lfs-proxy) echo $(LFS_PROXY_IMAGE) ;; \ - kafscale-etcd-tools) echo $(ETCD_TOOLS_IMAGE) ;; \ - kafscale-sql-processor) echo $(SQL_PROCESSOR_IMAGE) ;; \ - kafscale-iceberg-processor) echo $(ICEBERG_PROCESSOR_IMAGE) ;; \ - kafscale-e72-browser-demo) echo $(E72_BROWSER_DEMO_IMAGE) ;; \ - *) echo "" ;; \ - esac)"; do \ - [ -z "$$src" ] && continue; \ - if docker image inspect "$$src" >/dev/null 2>&1; then \ - echo "Pushing $$src -> $$dst"; \ - docker tag "$$src" "$$dst"; \ - docker push "$$dst"; \ - found=1; \ - break; \ - fi; \ - done; \ - if [ "$$found" -ne 1 ]; then \ - echo "Skipping $$img (source image not found)"; \ - fi; \ - done - -stage-release-clean: ## Remove stage release builder and prune local stage images. - @docker buildx rm stage-release-builder >/dev/null 2>&1 || true - @docker image rm -f $(E72_BROWSER_DEMO_IMAGE) $(BROKER_IMAGE) $(OPERATOR_IMAGE) $(CONSOLE_IMAGE) \ - $(LFS_PROXY_IMAGE) $(ETCD_TOOLS_IMAGE) $(SQL_PROCESSOR_IMAGE) $(ICEBERG_PROCESSOR_IMAGE) >/dev/null 2>&1 || true - -stage-release-act: act-image ## Push stage images to local registry via workflow (containerized act). - docker run --rm \ - --privileged \ - --network host \ - -v /var/run/docker.sock:/var/run/docker.sock \ - -v $(PWD):/workspace \ - -w /workspace \ - $(ACT_IMAGE) \ - -W .github/workflows/stage-release.yml $(ACT_FLAGS) \ - -P ubuntu-latest=catthehacker/ubuntu:act-latest \ - --input registry=$(STAGE_REGISTRY) --input tag=$(STAGE_TAG) - -IDOC_EXPLODE_BIN ?= bin/idoc-explode - -lfs-demo-idoc: ensure-minio ## Run IDoc explode demo — uploads IDoc XML to S3 via LFS, then explodes into topic streams. - @mkdir -p bin - go build -o $(IDOC_EXPLODE_BIN) ./cmd/idoc-explode - MINIO_PORT=$(MINIO_PORT) \ - MINIO_BUCKET=$(MINIO_BUCKET) \ - MINIO_REGION=$(MINIO_REGION) \ - MINIO_ROOT_USER=$(MINIO_ROOT_USER) \ - MINIO_ROOT_PASSWORD=$(MINIO_ROOT_PASSWORD) \ - ./scripts/idoc-explode-demo.sh - help: ## Show targets @grep -E '^[a-zA-Z_-]+:.*?##' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "%-20s %s\n", $$1, $$2}' From fc96d6044bec865d2228e61cefffcc907e43d987 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirko=20K=C3=A4mpf?= Date: Mon, 9 Mar 2026 17:39:15 +0100 Subject: [PATCH 3/6] refactor: consolidate Consumer.Unwrap() and Consumer.UnwrapEnvelope() into single method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses reviewer feedback (klaudworks): pkg/lfs had four different ways to resolve an LFS envelope. This removes the redundancy between Unwrap() and UnwrapEnvelope() by consolidating into a single Unwrap() that returns (*Envelope, []byte, error). Callers that don't need the envelope metadata simply ignore the first return value. Before: Unwrap() -> ([]byte, error) — discards envelope UnwrapEnvelope() -> (*Envelope, []byte, error) — preserves envelope After: Unwrap() -> (*Envelope, []byte, error) — always available Co-Authored-By: Claude Opus 4.6 --- pkg/lfs/consumer.go | 44 +++++----------------------------------- pkg/lfs/consumer_test.go | 20 +++++++++--------- pkg/lfs/doc.go | 7 ++++--- pkg/lfs/record.go | 2 +- test/e2e/lfs_sdk_test.go | 2 +- 5 files changed, 21 insertions(+), 54 deletions(-) diff --git a/pkg/lfs/consumer.go b/pkg/lfs/consumer.go index e57e2b54..f19b17c5 100644 --- a/pkg/lfs/consumer.go +++ b/pkg/lfs/consumer.go @@ -52,45 +52,11 @@ func NewConsumer(fetcher BlobFetcher, opts ...ConsumerOption) *Consumer { return c } -// Unwrap checks if value is an LFS envelope and fetches the blob. -// Returns the original value if not an envelope. -func (c *Consumer) Unwrap(ctx context.Context, value []byte) ([]byte, error) { - if !IsLfsEnvelope(value) { - return value, nil - } - - env, err := DecodeEnvelope(value) - if err != nil { - return nil, &LfsError{Op: "decode", Err: err} - } - - blob, err := c.fetcher.Fetch(ctx, env.Key) - if err != nil { - return nil, &LfsError{Op: "fetch", Err: err} - } - - if c.validateChecksum { - alg, expected, ok, err := EnvelopeChecksum(env) - if err != nil { - return nil, &LfsError{Op: "checksum", Err: err} - } - if ok { - actual, err := ComputeChecksum(alg, blob) - if err != nil { - return nil, &LfsError{Op: "checksum", Err: err} - } - if actual != expected { - return nil, &ChecksumError{Expected: expected, Actual: actual} - } - } - } - - return blob, nil -} - -// UnwrapEnvelope returns the envelope and fetched blob for records that are envelopes. -// Returns nil envelope and original value if not an envelope. -func (c *Consumer) UnwrapEnvelope(ctx context.Context, value []byte) (*Envelope, []byte, error) { +// Unwrap checks if value is an LFS envelope and fetches the blob from storage. +// Returns (nil, original value, nil) for non-envelope values. +// Returns (envelope, blob, nil) for successfully resolved envelopes. +// Callers that don't need the envelope can ignore the first return value. +func (c *Consumer) Unwrap(ctx context.Context, value []byte) (*Envelope, []byte, error) { if !IsLfsEnvelope(value) { return nil, value, nil } diff --git a/pkg/lfs/consumer_test.go b/pkg/lfs/consumer_test.go index 849cbf64..71beed6f 100644 --- a/pkg/lfs/consumer_test.go +++ b/pkg/lfs/consumer_test.go @@ -47,7 +47,7 @@ func TestConsumerUnwrapNonLFS(t *testing.T) { // Plain text should pass through unchanged plainText := []byte("hello world") - result, err := consumer.Unwrap(context.Background(), plainText) + _, result, err := consumer.Unwrap(context.Background(), plainText) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -80,7 +80,7 @@ func TestConsumerUnwrapLFS(t *testing.T) { t.Fatalf("failed to encode envelope: %v", err) } - result, err := consumer.Unwrap(context.Background(), envBytes) + _, result, err := consumer.Unwrap(context.Background(), envBytes) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -115,7 +115,7 @@ func TestConsumerUnwrapMD5Checksum(t *testing.T) { t.Fatalf("failed to encode envelope: %v", err) } - result, err := consumer.Unwrap(context.Background(), envBytes) + _, result, err := consumer.Unwrap(context.Background(), envBytes) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -147,7 +147,7 @@ func TestConsumerUnwrapChecksumMismatch(t *testing.T) { t.Fatalf("failed to encode envelope: %v", err) } - _, err = consumer.Unwrap(context.Background(), envBytes) + _, _, err = consumer.Unwrap(context.Background(), envBytes) if err == nil { t.Fatal("expected checksum error, got nil") } @@ -185,7 +185,7 @@ func TestConsumerUnwrapChecksumDisabled(t *testing.T) { } // Should succeed because checksum validation is disabled - result, err := consumer.Unwrap(context.Background(), envBytes) + _, result, err := consumer.Unwrap(context.Background(), envBytes) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -213,7 +213,7 @@ func TestConsumerUnwrapFetchError(t *testing.T) { t.Fatalf("failed to encode envelope: %v", err) } - _, err = consumer.Unwrap(context.Background(), envBytes) + _, _, err = consumer.Unwrap(context.Background(), envBytes) if err == nil { t.Fatal("expected error, got nil") } @@ -235,7 +235,7 @@ func TestConsumerUnwrapInvalidEnvelope(t *testing.T) { // Must be > 15 bytes to pass IsLfsEnvelope length check invalid := []byte(`{"kfs_lfs": 1, "bucket": "b"}`) - _, err := consumer.Unwrap(context.Background(), invalid) + _, _, err := consumer.Unwrap(context.Background(), invalid) if err == nil { t.Fatal("expected error for invalid envelope, got nil") } @@ -270,7 +270,7 @@ func TestConsumerUnwrapEnvelope(t *testing.T) { } envBytes, _ := EncodeEnvelope(envelope) - env, data, err := consumer.UnwrapEnvelope(context.Background(), envBytes) + env, data, err := consumer.Unwrap(context.Background(), envBytes) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -288,12 +288,12 @@ func TestConsumerUnwrapEnvelope(t *testing.T) { } } -func TestConsumerUnwrapEnvelopeNonLFS(t *testing.T) { +func TestConsumerUnwrapNonLFSReturnsNilEnvelope(t *testing.T) { fetcher := &mockFetcher{} consumer := NewConsumer(fetcher) plain := []byte("not an envelope") - env, data, err := consumer.UnwrapEnvelope(context.Background(), plain) + env, data, err := consumer.Unwrap(context.Background(), plain) if err != nil { t.Fatalf("unexpected error: %v", err) } diff --git a/pkg/lfs/doc.go b/pkg/lfs/doc.go index 9b1f2c99..00eb8ba5 100644 --- a/pkg/lfs/doc.go +++ b/pkg/lfs/doc.go @@ -64,8 +64,9 @@ Basic usage with franz-go: // Process Kafka records for _, record := range kafkaRecords { - // Unwrap automatically fetches LFS blobs from S3 - data, err := consumer.Unwrap(ctx, record.Value) + // Unwrap automatically fetches LFS blobs from S3. + // First return is the envelope (nil for non-LFS records). + _, data, err := consumer.Unwrap(ctx, record.Value) if err != nil { log.Error("failed to unwrap", "error", err) continue @@ -137,7 +138,7 @@ stored in the envelope. This can be disabled for performance: The package defines specific error types for common failures: - data, err := consumer.Unwrap(ctx, value) + _, data, err := consumer.Unwrap(ctx, value) if err != nil { var checksumErr *lfs.ChecksumError if errors.As(err, &checksumErr) { diff --git a/pkg/lfs/record.go b/pkg/lfs/record.go index b43192ab..b03786aa 100644 --- a/pkg/lfs/record.go +++ b/pkg/lfs/record.go @@ -134,7 +134,7 @@ func (r *Record) Value(ctx context.Context) ([]byte, error) { return nil, r.err } - env, blob, err := r.consumer.UnwrapEnvelope(ctx, r.raw) + env, blob, err := r.consumer.Unwrap(ctx, r.raw) r.envelope = env if err != nil { r.err = err diff --git a/test/e2e/lfs_sdk_test.go b/test/e2e/lfs_sdk_test.go index a0568fd2..cc79f08e 100644 --- a/test/e2e/lfs_sdk_test.go +++ b/test/e2e/lfs_sdk_test.go @@ -99,7 +99,7 @@ func TestLfsSDKKindE2E(t *testing.T) { iter := fetches.RecordIter() for !iter.Done() { record := iter.Next() - resolved, err := consumer.Unwrap(ctx, record.Value) + _, resolved, err := consumer.Unwrap(ctx, record.Value) if err != nil { t.Fatalf("unwrap lfs record: %v", err) } From 4cc089fe00fa19a46e49778b05b1bfb31ceba63c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirko=20K=C3=A4mpf?= Date: Mon, 9 Mar 2026 17:44:15 +0100 Subject: [PATCH 4/6] refactor: remove deprecated standalone lfs-proxy binary Addresses reviewer feedback (klaudworks): cmd/lfs-proxy/ duplicated ~30% of the unified proxy's infrastructure (TCP listener, Kafka protocol handling, connection management, health checks). LFS is now exclusively a feature-flag on the unified proxy, enabled with KAFSCALE_PROXY_LFS_ENABLED=true. The unified proxy provides partition-aware routing (vs round-robin) and consumer group support. Removed: - cmd/lfs-proxy/ (5,733 lines of Go source + tests) - deploy/docker/lfs-proxy.Dockerfile - deploy/helm/kafscale/templates/lfs-proxy-*.yaml (6 Helm templates) - test/e2e/lfs_proxy_*.go (e2e tests that exec'd the standalone binary) - CI jobs: build-lfs-proxy, e2e-lfs-proxy - Makefile targets: docker-build-lfs-proxy, test-lfs-proxy-broker Kept: - cmd/proxy/lfs_*.go (unified proxy LFS module) - pkg/lfs/ (shared LFS library) - api/lfs-proxy/openapi.yaml (API spec) - lfs-client-sdk/ (all client SDKs) Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci.yml | 42 - Makefile | 14 +- cmd/lfs-proxy/backend_auth.go | 105 -- cmd/lfs-proxy/backend_tls.go | 68 - cmd/lfs-proxy/handler.go | 1120 ----------------- cmd/lfs-proxy/handler_test.go | 325 ----- cmd/lfs-proxy/http.go | 1013 --------------- cmd/lfs-proxy/http_test.go | 274 ---- cmd/lfs-proxy/http_tls.go | 59 - cmd/lfs-proxy/http_tls_test.go | 39 - cmd/lfs-proxy/main.go | 440 ------- cmd/lfs-proxy/metrics.go | 221 ---- cmd/lfs-proxy/openapi.yaml | 433 ------- cmd/lfs-proxy/record.go | 277 ---- cmd/lfs-proxy/s3.go | 582 --------- cmd/lfs-proxy/sasl_encode.go | 77 -- cmd/lfs-proxy/sasl_encode_test.go | 45 - cmd/lfs-proxy/swagger.go | 73 -- cmd/lfs-proxy/tracker.go | 372 ------ cmd/lfs-proxy/tracker_test.go | 383 ------ cmd/lfs-proxy/tracker_types.go | 238 ---- cmd/lfs-proxy/uuid.go | 22 - deploy/docker-compose/Makefile | 1 - deploy/docker-compose/docker-compose.yaml | 65 +- deploy/docker/lfs-proxy.Dockerfile | 46 - .../templates/lfs-proxy-deployment.yaml | 251 ---- .../templates/lfs-proxy-http-ingress.yaml | 57 - .../templates/lfs-proxy-metrics-service.yaml | 36 - .../templates/lfs-proxy-prometheusrule.yaml | 46 - .../kafscale/templates/lfs-proxy-service.yaml | 47 - .../templates/lfs-proxy-servicemonitor.yaml | 34 - deploy/helm/kafscale/values.yaml | 87 +- test/e2e/lfs_iceberg_processor_test.go | 252 ---- test/e2e/lfs_proxy_broker_test.go | 234 ---- test/e2e/lfs_proxy_etcd_test.go | 65 - test/e2e/lfs_proxy_http_test.go | 641 ---------- test/e2e/lfs_proxy_test.go | 462 ------- 37 files changed, 9 insertions(+), 8537 deletions(-) delete mode 100644 cmd/lfs-proxy/backend_auth.go delete mode 100644 cmd/lfs-proxy/backend_tls.go delete mode 100644 cmd/lfs-proxy/handler.go delete mode 100644 cmd/lfs-proxy/handler_test.go delete mode 100644 cmd/lfs-proxy/http.go delete mode 100644 cmd/lfs-proxy/http_test.go delete mode 100644 cmd/lfs-proxy/http_tls.go delete mode 100644 cmd/lfs-proxy/http_tls_test.go delete mode 100644 cmd/lfs-proxy/main.go delete mode 100644 cmd/lfs-proxy/metrics.go delete mode 100644 cmd/lfs-proxy/openapi.yaml delete mode 100644 cmd/lfs-proxy/record.go delete mode 100644 cmd/lfs-proxy/s3.go delete mode 100644 cmd/lfs-proxy/sasl_encode.go delete mode 100644 cmd/lfs-proxy/sasl_encode_test.go delete mode 100644 cmd/lfs-proxy/swagger.go delete mode 100644 cmd/lfs-proxy/tracker.go delete mode 100644 cmd/lfs-proxy/tracker_test.go delete mode 100644 cmd/lfs-proxy/tracker_types.go delete mode 100644 cmd/lfs-proxy/uuid.go delete mode 100644 deploy/docker/lfs-proxy.Dockerfile delete mode 100644 deploy/helm/kafscale/templates/lfs-proxy-deployment.yaml delete mode 100644 deploy/helm/kafscale/templates/lfs-proxy-http-ingress.yaml delete mode 100644 deploy/helm/kafscale/templates/lfs-proxy-metrics-service.yaml delete mode 100644 deploy/helm/kafscale/templates/lfs-proxy-prometheusrule.yaml delete mode 100644 deploy/helm/kafscale/templates/lfs-proxy-service.yaml delete mode 100644 deploy/helm/kafscale/templates/lfs-proxy-servicemonitor.yaml delete mode 100644 test/e2e/lfs_iceberg_processor_test.go delete mode 100644 test/e2e/lfs_proxy_broker_test.go delete mode 100644 test/e2e/lfs_proxy_etcd_test.go delete mode 100644 test/e2e/lfs_proxy_http_test.go delete mode 100644 test/e2e/lfs_proxy_test.go diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 932c1541..8e7f6974 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,28 +57,6 @@ jobs: - name: Run go test -race ./... run: go test -race ./... - build-lfs-proxy: - name: Build LFS Proxy - runs-on: ubuntu-latest - env: - GOCACHE: ${{ github.workspace }}/.gocache - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4 - - - uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v5 - with: - go-version-file: go.mod - cache-dependency-path: go.sum - - - name: Prepare Go build cache - run: mkdir -p "$GOCACHE" - - - name: Build lfs-proxy - run: go build -o lfs-proxy ./cmd/lfs-proxy - - - name: Run lfs-proxy tests - run: go test ./cmd/lfs-proxy/... - go-coverage: name: Go Coverage Gate runs-on: ubuntu-latest @@ -99,26 +77,6 @@ jobs: run: bash hack/check_coverage.sh 45 - e2e-lfs-proxy: - name: LFS Proxy E2E - runs-on: ubuntu-latest - env: - GOCACHE: ${{ github.workspace }}/.gocache - KAFSCALE_E2E: "1" - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v4 - - - uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v5 - with: - go-version-file: go.mod - cache-dependency-path: go.sum - - - name: Prepare Go build cache - run: mkdir -p "$GOCACHE" - - - name: Run LFS proxy E2E tests - run: go test -tags=e2e ./test/e2e -run LfsProxy - helm-lint: name: Helm Lint runs-on: ubuntu-latest diff --git a/Makefile b/Makefile index 30bd44f3..a463a181 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -.PHONY: proto build test tidy lint generate build-sdk docker-build docker-build-e2e-client docker-build-etcd-tools docker-build-lfs-proxy docker-clean ensure-minio start-minio stop-containers release-broker-ports test-produce-consume test-produce-consume-debug test-consumer-group test-ops-api test-mcp test-multi-segment-durability test-lfs-proxy-broker test-full test-operator test-acl demo demo-platform demo-platform-bootstrap iceberg-demo kafsql-demo platform-demo help clean-kind-all +.PHONY: proto build test tidy lint generate build-sdk docker-build docker-build-e2e-client docker-build-etcd-tools docker-clean ensure-minio start-minio stop-containers release-broker-ports test-produce-consume test-produce-consume-debug test-consumer-group test-ops-api test-mcp test-multi-segment-durability test-full test-operator test-acl demo demo-platform demo-platform-bootstrap iceberg-demo kafsql-demo platform-demo help clean-kind-all REGISTRY ?= ghcr.io/kafscale STAMP_DIR ?= .build @@ -21,7 +21,6 @@ BROKER_IMAGE ?= $(REGISTRY)/kafscale-broker:dev OPERATOR_IMAGE ?= $(REGISTRY)/kafscale-operator:dev CONSOLE_IMAGE ?= $(REGISTRY)/kafscale-console:dev PROXY_IMAGE ?= $(REGISTRY)/kafscale-proxy:dev -LFS_PROXY_IMAGE ?= $(REGISTRY)/kafscale-lfs-proxy:dev SQL_PROCESSOR_IMAGE ?= $(REGISTRY)/kafscale-sql-processor:dev MCP_IMAGE ?= $(REGISTRY)/kafscale-mcp:dev E2E_CLIENT_IMAGE ?= $(REGISTRY)/kafscale-e2e-client:dev @@ -149,13 +148,6 @@ $(STAMP_DIR)/proxy.image: $(PROXY_SRCS) $(DOCKER_BUILD_CMD) $(DOCKER_BUILD_ARGS) -t $(PROXY_IMAGE) -f deploy/docker/proxy.Dockerfile . @touch $(STAMP_DIR)/proxy.image -LFS_PROXY_SRCS := $(shell find cmd/lfs-proxy pkg go.mod go.sum) -docker-build-lfs-proxy: $(STAMP_DIR)/lfs-proxy.image ## Build LFS proxy container image -$(STAMP_DIR)/lfs-proxy.image: $(LFS_PROXY_SRCS) - @mkdir -p $(STAMP_DIR) - $(DOCKER_BUILD_CMD) $(DOCKER_BUILD_ARGS) -t $(LFS_PROXY_IMAGE) -f deploy/docker/lfs-proxy.Dockerfile . - @touch $(STAMP_DIR)/lfs-proxy.image - MCP_SRCS := $(shell find cmd/mcp internal/mcpserver go.mod go.sum) docker-build-mcp: $(STAMP_DIR)/mcp.image ## Build MCP container image $(STAMP_DIR)/mcp.image: $(MCP_SRCS) @@ -295,10 +287,6 @@ test-multi-segment-durability: release-broker-ports ensure-minio ## Run multi-se KAFSCALE_E2E=1 \ go test -tags=e2e ./test/e2e -run TestMultiSegmentRestartDurability -v -test-lfs-proxy-broker: ## Run LFS proxy e2e with real broker (embedded etcd + in-memory S3). - KAFSCALE_E2E=1 \ - go test -tags=e2e ./test/e2e -run TestLfsProxyBrokerE2E -v - test-full: ## Run unit tests plus local + MinIO-backed e2e suites. $(MAKE) test $(MAKE) test-consumer-group diff --git a/cmd/lfs-proxy/backend_auth.go b/cmd/lfs-proxy/backend_auth.go deleted file mode 100644 index bfb0f784..00000000 --- a/cmd/lfs-proxy/backend_auth.go +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "context" - "crypto/tls" - "errors" - "fmt" - "net" - "strings" - "time" - - "github.com/KafScale/platform/pkg/protocol" -) - -const ( - apiKeySaslHandshake int16 = 17 - apiKeySaslAuthenticate int16 = 36 -) - -func (p *lfsProxy) wrapBackendTLS(ctx context.Context, conn net.Conn, addr string) (net.Conn, error) { - if p.backendTLSConfig == nil { - return conn, nil - } - cfg := p.backendTLSConfig.Clone() - if cfg.ServerName == "" { - if host, _, err := net.SplitHostPort(addr); err == nil { - cfg.ServerName = host - } - } - tlsConn := tls.Client(conn, cfg) - deadline := time.Now().Add(p.dialTimeout) - if ctxDeadline, ok := ctx.Deadline(); ok { - deadline = ctxDeadline - } - _ = tlsConn.SetDeadline(deadline) - if err := tlsConn.Handshake(); err != nil { - return nil, err - } - _ = tlsConn.SetDeadline(time.Time{}) - return tlsConn, nil -} - -func (p *lfsProxy) performBackendSASL(ctx context.Context, conn net.Conn) error { - mech := strings.TrimSpace(p.backendSASLMechanism) - if mech == "" { - return nil - } - if strings.ToUpper(mech) != "PLAIN" { - return fmt.Errorf("unsupported SASL mechanism %q", mech) - } - if p.backendSASLUsername == "" { - return errors.New("backend SASL username required") - } - - // 1) Handshake - correlationID := int32(1) - handshakeReq, err := encodeSaslHandshakeRequest(&protocol.RequestHeader{ - APIKey: apiKeySaslHandshake, - APIVersion: 1, - CorrelationID: correlationID, - }, mech) - if err != nil { - return err - } - if err := protocol.WriteFrame(conn, handshakeReq); err != nil { - return err - } - if err := readSaslResponse(conn); err != nil { - return fmt.Errorf("sasl handshake failed: %w", err) - } - - // 2) Authenticate - authBytes := buildSaslPlainAuthBytes(p.backendSASLUsername, p.backendSASLPassword) - authReq, err := encodeSaslAuthenticateRequest(&protocol.RequestHeader{ - APIKey: apiKeySaslAuthenticate, - APIVersion: 1, - CorrelationID: correlationID + 1, - }, authBytes) - if err != nil { - return err - } - if err := protocol.WriteFrame(conn, authReq); err != nil { - return err - } - if err := readSaslResponse(conn); err != nil { - return fmt.Errorf("sasl authenticate failed: %w", err) - } - - return nil -} diff --git a/cmd/lfs-proxy/backend_tls.go b/cmd/lfs-proxy/backend_tls.go deleted file mode 100644 index a691cdfa..00000000 --- a/cmd/lfs-proxy/backend_tls.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "crypto/tls" - "crypto/x509" - "errors" - "os" - "strings" -) - -func buildBackendTLSConfig() (*tls.Config, error) { - enabled := envBoolDefault("KAFSCALE_LFS_PROXY_BACKEND_TLS_ENABLED", false) - if !enabled { - return nil, nil - } - caFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_TLS_CA_FILE")) - certFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_TLS_CERT_FILE")) - keyFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_TLS_KEY_FILE")) - serverName := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_TLS_SERVER_NAME")) - insecureSkip := envBoolDefault("KAFSCALE_LFS_PROXY_BACKEND_TLS_INSECURE_SKIP_VERIFY", false) - - var rootCAs *x509.CertPool - if caFile != "" { - caPEM, err := os.ReadFile(caFile) - if err != nil { - return nil, err - } - rootCAs = x509.NewCertPool() - if !rootCAs.AppendCertsFromPEM(caPEM) { - return nil, errors.New("failed to parse backend TLS CA file") - } - } - - var certs []tls.Certificate - if certFile != "" || keyFile != "" { - if certFile == "" || keyFile == "" { - return nil, errors.New("backend TLS cert and key must both be set") - } - cert, err := tls.LoadX509KeyPair(certFile, keyFile) - if err != nil { - return nil, err - } - certs = append(certs, cert) - } - - return &tls.Config{ - RootCAs: rootCAs, - Certificates: certs, - ServerName: serverName, - InsecureSkipVerify: insecureSkip, - MinVersion: tls.VersionTLS12, - }, nil -} diff --git a/cmd/lfs-proxy/handler.go b/cmd/lfs-proxy/handler.go deleted file mode 100644 index ad302b36..00000000 --- a/cmd/lfs-proxy/handler.go +++ /dev/null @@ -1,1120 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "bytes" - "context" - "errors" - "fmt" - "hash/crc32" - "log/slog" - "net" - "net/http" - "strings" - "sync/atomic" - "time" - - "github.com/KafScale/platform/pkg/lfs" - "github.com/KafScale/platform/pkg/metadata" - "github.com/KafScale/platform/pkg/protocol" - "github.com/twmb/franz-go/pkg/kgo" - "github.com/twmb/franz-go/pkg/kmsg" -) - -func (p *lfsProxy) listenAndServe(ctx context.Context) error { - ln, err := net.Listen("tcp", p.addr) - if err != nil { - return err - } - p.logger.Info("lfs proxy listening", "addr", ln.Addr().String()) - - go func() { - <-ctx.Done() - _ = ln.Close() - }() - - for { - conn, err := ln.Accept() - if err != nil { - select { - case <-ctx.Done(): - return nil - default: - } - if ne, ok := err.(net.Error); ok && !ne.Timeout() { - p.logger.Warn("accept temporary error", "error", err) - continue - } - return err - } - p.logger.Debug("connection accepted", "remote", conn.RemoteAddr().String()) - go p.handleConnection(ctx, conn) - } -} - -func (p *lfsProxy) setReady(ready bool) { - prev := atomic.LoadUint32(&p.ready) - if ready { - atomic.StoreUint32(&p.ready, 1) - if prev == 0 { - p.logger.Info("proxy ready state changed", "ready", true) - } - return - } - atomic.StoreUint32(&p.ready, 0) - if prev == 1 { - p.logger.Warn("proxy ready state changed", "ready", false) - } -} - -func (p *lfsProxy) isReady() bool { - readyFlag := atomic.LoadUint32(&p.ready) == 1 - cacheFresh := p.cacheFresh() - s3Healthy := p.isS3Healthy() - ready := readyFlag && cacheFresh && s3Healthy - if !ready { - p.logger.Debug("ready check failed", "readyFlag", readyFlag, "cacheFresh", cacheFresh, "s3Healthy", s3Healthy) - } - return ready -} - -func (p *lfsProxy) markS3Healthy(ok bool) { - if ok { - atomic.StoreUint32(&p.s3Healthy, 1) - return - } - atomic.StoreUint32(&p.s3Healthy, 0) -} - -func (p *lfsProxy) isS3Healthy() bool { - return atomic.LoadUint32(&p.s3Healthy) == 1 -} - -func (p *lfsProxy) startS3HealthCheck(ctx context.Context, interval time.Duration) { - if interval <= 0 { - interval = time.Duration(defaultS3HealthIntervalSec) * time.Second - } - ticker := time.NewTicker(interval) - go func() { - defer ticker.Stop() - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - err := p.s3Uploader.HeadBucket(ctx) - wasHealthy := p.isS3Healthy() - p.markS3Healthy(err == nil) - if err != nil && wasHealthy { - p.logger.Warn("s3 health check failed", "error", err) - } else if err == nil && !wasHealthy { - p.logger.Info("s3 health check recovered") - } - } - } - }() -} - -func (p *lfsProxy) setCachedBackends(backends []string) { - if len(backends) == 0 { - return - } - copied := make([]string, len(backends)) - copy(copied, backends) - p.cacheMu.Lock() - p.cachedBackends = copied - p.cacheMu.Unlock() -} - -func (p *lfsProxy) cachedBackendsSnapshot() []string { - p.cacheMu.RLock() - if len(p.cachedBackends) == 0 { - p.cacheMu.RUnlock() - return nil - } - copied := make([]string, len(p.cachedBackends)) - copy(copied, p.cachedBackends) - p.cacheMu.RUnlock() - return copied -} - -func (p *lfsProxy) touchHealthy() { - atomic.StoreInt64(&p.lastHealthy, time.Now().UnixNano()) -} - -func (p *lfsProxy) cacheFresh() bool { - // Static backends are always fresh (no TTL expiry) - if len(p.backends) > 0 { - return true - } - last := atomic.LoadInt64(&p.lastHealthy) - if last == 0 { - return false - } - return time.Since(time.Unix(0, last)) <= p.cacheTTL -} - -func (p *lfsProxy) startBackendRefresh(ctx context.Context, backoff time.Duration, interval time.Duration) { - if p.store == nil || len(p.backends) > 0 { - p.logger.Debug("backend refresh disabled", "hasStore", p.store != nil, "staticBackends", len(p.backends)) - return - } - if backoff <= 0 { - backoff = time.Duration(defaultBackendBackoffMs) * time.Millisecond - } - if interval <= 0 { - interval = time.Duration(defaultBackendRefreshIntervalSec) * time.Second - } - ticker := time.NewTicker(interval) - go func() { - defer ticker.Stop() - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - backends, err := p.refreshBackends(ctx) - if err != nil { - p.logger.Warn("backend refresh failed", "error", err) - if !p.cacheFresh() { - p.setReady(false) - } - time.Sleep(backoff) - } else { - p.logger.Debug("backend refresh succeeded", "count", len(backends)) - } - } - } - }() -} - -func (p *lfsProxy) refreshBackends(ctx context.Context) ([]string, error) { - backends, err := p.currentBackends(ctx) - if err != nil { - return nil, err - } - if len(backends) > 0 { - p.touchHealthy() - p.setReady(true) - } - return backends, nil -} - -func (p *lfsProxy) startHealthServer(ctx context.Context, addr string) { - mux := http.NewServeMux() - mux.HandleFunc("/readyz", func(w http.ResponseWriter, _ *http.Request) { - if p.isReady() || (len(p.cachedBackendsSnapshot()) > 0 && p.cacheFresh() && p.isS3Healthy()) { - w.WriteHeader(http.StatusOK) - _, _ = w.Write([]byte("ready\n")) - return - } - http.Error(w, "not ready", http.StatusServiceUnavailable) - }) - mux.HandleFunc("/livez", func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusOK) - _, _ = w.Write([]byte("ok\n")) - }) - srv := &http.Server{ - Addr: addr, - Handler: mux, - ReadTimeout: p.httpReadTimeout, - WriteTimeout: p.httpWriteTimeout, - IdleTimeout: p.httpIdleTimeout, - ReadHeaderTimeout: p.httpHeaderTimeout, - MaxHeaderBytes: p.httpMaxHeaderBytes, - } - go func() { - <-ctx.Done() - shutdownCtx, cancel := context.WithTimeout(context.Background(), p.httpShutdownTimeout) - defer cancel() - _ = srv.Shutdown(shutdownCtx) - }() - go func() { - p.logger.Info("lfs proxy health listening", "addr", addr) - if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { - p.logger.Warn("lfs proxy health server error", "error", err) - } - }() -} - -func (p *lfsProxy) handleConnection(ctx context.Context, conn net.Conn) { - defer func() { _ = conn.Close() }() - var backendConn net.Conn - var backendAddr string - - for { - frame, err := protocol.ReadFrame(conn) - if err != nil { - p.logger.Debug("connection read ended", "remote", conn.RemoteAddr().String(), "error", err) - return - } - header, _, err := protocol.ParseRequestHeader(frame.Payload) - if err != nil { - p.logger.Warn("parse request header failed", "error", err) - return - } - p.logger.Debug("request received", "apiKey", header.APIKey, "correlationId", header.CorrelationID, "remote", conn.RemoteAddr().String()) - - if header.APIKey == protocol.APIKeyApiVersion { - resp, err := p.handleApiVersions(header) - if err != nil { - p.logger.Warn("api versions handling failed", "error", err) - return - } - if err := protocol.WriteFrame(conn, resp); err != nil { - p.logger.Warn("write api versions response failed", "error", err) - return - } - continue - } - - if !p.isReady() { - p.logger.Warn("rejecting request: proxy not ready", "apiKey", header.APIKey, "remote", conn.RemoteAddr().String()) - resp, ok, err := p.buildNotReadyResponse(header, frame.Payload) - if err != nil { - p.logger.Warn("not-ready response build failed", "error", err) - return - } - if ok { - if err := protocol.WriteFrame(conn, resp); err != nil { - p.logger.Warn("write not-ready response failed", "error", err) - } - } - return - } - - switch header.APIKey { - case protocol.APIKeyMetadata: - resp, err := p.handleMetadata(ctx, header, frame.Payload) - if err != nil { - p.logger.Warn("metadata handling failed", "error", err) - return - } - if err := protocol.WriteFrame(conn, resp); err != nil { - p.logger.Warn("write metadata response failed", "error", err) - return - } - continue - case protocol.APIKeyFindCoordinator: - resp, err := p.handleFindCoordinator(header) - if err != nil { - p.logger.Warn("find coordinator handling failed", "error", err) - return - } - if err := protocol.WriteFrame(conn, resp); err != nil { - p.logger.Warn("write coordinator response failed", "error", err) - return - } - continue - case protocol.APIKeyProduce: - resp, handled, err := p.handleProduce(ctx, header, frame.Payload) - if err != nil { - p.logger.Warn("produce handling failed", "error", err) - if resp != nil { - _ = protocol.WriteFrame(conn, resp) - } - return - } - if handled { - if err := protocol.WriteFrame(conn, resp); err != nil { - p.logger.Warn("write produce response failed", "error", err) - } - continue - } - default: - } - - if backendConn == nil { - backendConn, backendAddr, err = p.connectBackend(ctx) - if err != nil { - p.logger.Error("backend connect failed", "error", err) - p.respondBackendError(conn, header, frame.Payload) - return - } - } - - resp, err := p.forwardToBackend(ctx, backendConn, backendAddr, frame.Payload) - if err != nil { - _ = backendConn.Close() - backendConn, backendAddr, err = p.connectBackend(ctx) - if err != nil { - p.logger.Warn("backend reconnect failed", "error", err) - p.respondBackendError(conn, header, frame.Payload) - return - } - resp, err = p.forwardToBackend(ctx, backendConn, backendAddr, frame.Payload) - if err != nil { - p.logger.Warn("backend forward failed", "error", err) - p.respondBackendError(conn, header, frame.Payload) - return - } - } - if err := protocol.WriteFrame(conn, resp); err != nil { - p.logger.Warn("write response failed", "error", err) - return - } - } -} - -func (p *lfsProxy) handleApiVersions(header *protocol.RequestHeader) ([]byte, error) { - resp := kmsg.NewPtrApiVersionsResponse() - resp.ErrorCode = protocol.NONE - resp.ApiKeys = p.apiVersions - return protocol.EncodeResponse(header.CorrelationID, header.APIVersion, resp), nil -} - -func (p *lfsProxy) respondBackendError(conn net.Conn, header *protocol.RequestHeader, payload []byte) { - resp, ok, err := p.buildNotReadyResponse(header, payload) - if err != nil || !ok { - return - } - _ = protocol.WriteFrame(conn, resp) -} - -func (p *lfsProxy) handleMetadata(ctx context.Context, header *protocol.RequestHeader, payload []byte) ([]byte, error) { - _, req, err := protocol.ParseRequest(payload) - if err != nil { - return nil, err - } - metaReq, ok := req.(*kmsg.MetadataRequest) - if !ok { - return nil, fmt.Errorf("unexpected metadata request type %T", req) - } - - meta, err := p.loadMetadata(ctx, metaReq) - if err != nil { - return nil, err - } - p.logger.Debug("metadata response", "advertisedHost", p.advertisedHost, "advertisedPort", p.advertisedPort, "topics", len(meta.Topics)) - resp := buildProxyMetadataResponse(meta, header.CorrelationID, header.APIVersion, p.advertisedHost, p.advertisedPort) - return protocol.EncodeResponse(header.CorrelationID, header.APIVersion, resp), nil -} - -func (p *lfsProxy) handleFindCoordinator(header *protocol.RequestHeader) ([]byte, error) { - resp := kmsg.NewPtrFindCoordinatorResponse() - resp.ErrorCode = protocol.NONE - resp.NodeID = 0 - resp.Host = p.advertisedHost - resp.Port = p.advertisedPort - return protocol.EncodeResponse(header.CorrelationID, header.APIVersion, resp), nil -} - -func (p *lfsProxy) loadMetadata(ctx context.Context, req *kmsg.MetadataRequest) (*metadata.ClusterMetadata, error) { - var zeroID [16]byte - useIDs := false - var topicNames []string - if req.Topics != nil { - for _, t := range req.Topics { - if t.TopicID != zeroID { - useIDs = true - break - } - if t.Topic != nil { - topicNames = append(topicNames, *t.Topic) - } - } - } - if !useIDs { - return p.store.Metadata(ctx, topicNames) - } - all, err := p.store.Metadata(ctx, nil) - if err != nil { - return nil, err - } - index := make(map[[16]byte]protocol.MetadataTopic, len(all.Topics)) - for _, topic := range all.Topics { - index[topic.TopicID] = topic - } - filtered := make([]protocol.MetadataTopic, 0, len(req.Topics)) - for _, t := range req.Topics { - if t.TopicID == zeroID { - continue - } - if topic, ok := index[t.TopicID]; ok { - filtered = append(filtered, topic) - } else { - filtered = append(filtered, protocol.MetadataTopic{ - ErrorCode: protocol.UNKNOWN_TOPIC_ID, - TopicID: t.TopicID, - }) - } - } - return &metadata.ClusterMetadata{ - Brokers: all.Brokers, - ClusterID: all.ClusterID, - ControllerID: all.ControllerID, - Topics: filtered, - }, nil -} - -func (p *lfsProxy) handleProduce(ctx context.Context, header *protocol.RequestHeader, payload []byte) ([]byte, bool, error) { - start := time.Now() - _, req, err := protocol.ParseRequest(payload) - if err != nil { - return nil, false, err - } - prodReq, ok := req.(*kmsg.ProduceRequest) - if !ok { - return nil, false, fmt.Errorf("unexpected produce request type %T", req) - } - - p.logger.Debug("handling produce request", "topics", topicsFromProduce(prodReq)) - lfsResult, err := p.rewriteProduceRecords(ctx, header, prodReq) - if err != nil { - for _, topic := range topicsFromProduce(prodReq) { - p.metrics.IncRequests(topic, "error", "lfs") - } - resp, errResp := buildProduceErrorResponse(prodReq, header.CorrelationID, header.APIVersion, protocol.UNKNOWN_SERVER_ERROR) - if errResp != nil { - return nil, true, err - } - return resp, true, err - } - if !lfsResult.modified { - for _, topic := range topicsFromProduce(prodReq) { - p.metrics.IncRequests(topic, "ok", "passthrough") - } - return nil, false, nil - } - for topic := range lfsResult.topics { - p.metrics.IncRequests(topic, "ok", "lfs") - } - p.metrics.ObserveUploadDuration(time.Since(start).Seconds()) - p.metrics.AddUploadBytes(lfsResult.uploadBytes) - - backendConn, backendAddr, err := p.connectBackend(ctx) - if err != nil { - p.trackOrphans(lfsResult.orphans) - return nil, true, err - } - defer func() { _ = backendConn.Close() }() - - resp, err := p.forwardToBackend(ctx, backendConn, backendAddr, lfsResult.payload) - if err != nil { - p.trackOrphans(lfsResult.orphans) - } - return resp, true, err -} - -func (p *lfsProxy) rewriteProduceRecords(ctx context.Context, header *protocol.RequestHeader, req *kmsg.ProduceRequest) (rewriteResult, error) { - if p.logger == nil { - p.logger = slog.Default() - } - - if req == nil { - return rewriteResult{}, errors.New("nil produce request") - } - - modified := false - uploadBytes := int64(0) - decompressor := kgo.DefaultDecompressor() - topics := make(map[string]struct{}) - orphans := make([]orphanInfo, 0, 4) - - for ti := range req.Topics { - topic := &req.Topics[ti] - for pi := range topic.Partitions { - partition := &topic.Partitions[pi] - if len(partition.Records) == 0 { - continue - } - batches, err := decodeRecordBatches(partition.Records) - if err != nil { - return rewriteResult{}, err - } - batchModified := false - for bi := range batches { - batch := &batches[bi] - records, codec, err := decodeBatchRecords(batch, decompressor) - if err != nil { - return rewriteResult{}, err - } - if len(records) == 0 { - continue - } - recordChanged := false - for ri := range records { - rec := &records[ri] - headers := rec.Headers - lfsValue, ok := findHeaderValue(headers, "LFS_BLOB") - if !ok { - continue - } - recordChanged = true - modified = true - topics[topic.Topic] = struct{}{} - checksumHeader := strings.TrimSpace(string(lfsValue)) - algHeader, _ := findHeaderValue(headers, "LFS_BLOB_ALG") - alg, err := p.resolveChecksumAlg(string(algHeader)) - if err != nil { - return rewriteResult{}, err - } - if checksumHeader != "" && alg == lfs.ChecksumNone { - return rewriteResult{}, errors.New("checksum provided but checksum algorithm is none") - } - payload := rec.Value - p.logger.Info("LFS blob detected", "topic", topic.Topic, "size", len(payload)) - if int64(len(payload)) > p.maxBlob { - p.logger.Error("blob exceeds max size", "size", len(payload), "max", p.maxBlob) - return rewriteResult{}, fmt.Errorf("blob size %d exceeds max %d", len(payload), p.maxBlob) - } - key := p.buildObjectKey(topic.Topic) - sha256Hex, checksum, checksumAlg, err := p.s3Uploader.Upload(ctx, key, payload, alg) - if err != nil { - p.metrics.IncS3Errors() - return rewriteResult{}, err - } - if checksumHeader != "" && checksum != "" && !strings.EqualFold(checksumHeader, checksum) { - if err := p.s3Uploader.DeleteObject(ctx, key); err != nil { - p.trackOrphans([]orphanInfo{{Topic: topic.Topic, Key: key, RequestID: "", Reason: "checksum_mismatch_delete_failed"}}) - return rewriteResult{}, fmt.Errorf("checksum mismatch; delete failed: %w", err) - } - return rewriteResult{}, &lfs.ChecksumError{Expected: checksumHeader, Actual: checksum} - } - env := lfs.Envelope{ - Version: 1, - Bucket: p.s3Bucket, - Key: key, - Size: int64(len(payload)), - SHA256: sha256Hex, - Checksum: checksum, - ChecksumAlg: checksumAlg, - ContentType: headerValue(headers, "content-type"), - OriginalHeaders: headersToMap(headers), - CreatedAt: time.Now().UTC().Format(time.RFC3339), - ProxyID: p.proxyID, - } - encoded, err := lfs.EncodeEnvelope(env) - if err != nil { - return rewriteResult{}, err - } - rec.Value = encoded - rec.Headers = dropHeader(headers, "LFS_BLOB") - uploadBytes += int64(len(payload)) - orphans = append(orphans, orphanInfo{Topic: topic.Topic, Key: key, RequestID: "", Reason: "kafka_produce_failed"}) - } - if !recordChanged { - continue - } - newRecords := encodeRecords(records) - compressedRecords, usedCodec, err := compressRecords(codec, newRecords) - if err != nil { - return rewriteResult{}, err - } - batch.Records = compressedRecords - batch.NumRecords = int32(len(records)) - batch.Attributes = (batch.Attributes &^ 0x0007) | int16(usedCodec) - batch.Length = 0 - batch.CRC = 0 - batchBytes := batch.AppendTo(nil) - batch.Length = int32(len(batchBytes) - 12) - batchBytes = batch.AppendTo(nil) - batch.CRC = int32(crc32.Checksum(batchBytes[21:], crc32cTable)) - batchBytes = batch.AppendTo(nil) - batch.Raw = batchBytes - batchModified = true - } - if !batchModified { - continue - } - partition.Records = joinRecordBatches(batches) - } - } - if !modified { - return rewriteResult{modified: false}, nil - } - - payloadBytes, err := encodeProduceRequest(header, req) - if err != nil { - return rewriteResult{}, err - } - return rewriteResult{modified: true, payload: payloadBytes, uploadBytes: uploadBytes, topics: topics, orphans: orphans}, nil -} - -func (p *lfsProxy) buildObjectKey(topic string) string { - ns := strings.TrimSpace(p.s3Namespace) - if ns == "" { - ns = "default" - } - now := time.Now().UTC() - return fmt.Sprintf("%s/%s/lfs/%04d/%02d/%02d/obj-%s", ns, topic, now.Year(), now.Month(), now.Day(), newUUID()) -} - -func (p *lfsProxy) connectBackend(ctx context.Context) (net.Conn, string, error) { - retries := envInt("KAFSCALE_LFS_PROXY_BACKEND_RETRIES", 6) - if retries < 1 { - retries = 1 - } - backoff := time.Duration(envInt("KAFSCALE_LFS_PROXY_BACKEND_BACKOFF_MS", 500)) * time.Millisecond - if backoff <= 0 { - backoff = time.Duration(defaultBackendBackoffMs) * time.Millisecond - } - var lastErr error - for attempt := 0; attempt < retries; attempt++ { - backends, err := p.currentBackends(ctx) - if err != nil || len(backends) == 0 { - if cached := p.cachedBackendsSnapshot(); len(cached) > 0 && p.cacheFresh() { - backends = cached - err = nil - } - } - if err != nil || len(backends) == 0 { - lastErr = err - time.Sleep(backoff) - continue - } - index := atomic.AddUint32(&p.rr, 1) - addr := backends[int(index)%len(backends)] - dialer := net.Dialer{Timeout: p.dialTimeout} - conn, dialErr := dialer.DialContext(ctx, "tcp", addr) - if dialErr == nil { - wrapped, err := p.wrapBackendTLS(ctx, conn, addr) - if err != nil { - _ = conn.Close() - lastErr = err - time.Sleep(backoff) - continue - } - if err := p.performBackendSASL(ctx, wrapped); err != nil { - _ = wrapped.Close() - lastErr = err - time.Sleep(backoff) - continue - } - return wrapped, addr, nil - } - lastErr = dialErr - time.Sleep(backoff) - } - if lastErr == nil { - lastErr = errors.New("no backends available") - } - return nil, "", lastErr -} - -func (p *lfsProxy) currentBackends(ctx context.Context) ([]string, error) { - if len(p.backends) > 0 { - return p.backends, nil - } - meta, err := p.store.Metadata(ctx, nil) - if err != nil { - return nil, err - } - addrs := make([]string, 0, len(meta.Brokers)) - for _, broker := range meta.Brokers { - if broker.Host == "" || broker.Port == 0 { - continue - } - addrs = append(addrs, fmt.Sprintf("%s:%d", broker.Host, broker.Port)) - } - if len(addrs) > 0 { - p.setCachedBackends(addrs) - p.touchHealthy() - p.setReady(true) - } - return addrs, nil -} - -func (p *lfsProxy) forwardToBackend(ctx context.Context, conn net.Conn, backendAddr string, payload []byte) ([]byte, error) { - if err := protocol.WriteFrame(conn, payload); err != nil { - return nil, err - } - frame, err := protocol.ReadFrame(conn) - if err != nil { - return nil, err - } - return frame.Payload, nil -} - -func buildProxyMetadataResponse(meta *metadata.ClusterMetadata, correlationID int32, version int16, host string, port int32) *kmsg.MetadataResponse { - brokers := []protocol.MetadataBroker{{ - NodeID: 0, - Host: host, - Port: port, - }} - topics := make([]protocol.MetadataTopic, 0, len(meta.Topics)) - for _, topic := range meta.Topics { - if topic.ErrorCode != protocol.NONE { - topics = append(topics, topic) - continue - } - partitions := make([]protocol.MetadataPartition, 0, len(topic.Partitions)) - for _, part := range topic.Partitions { - partitions = append(partitions, protocol.MetadataPartition{ - ErrorCode: part.ErrorCode, - Partition: part.Partition, - Leader: 0, - LeaderEpoch: part.LeaderEpoch, - Replicas: []int32{0}, - ISR: []int32{0}, - }) - } - topics = append(topics, protocol.MetadataTopic{ - ErrorCode: topic.ErrorCode, - Topic: topic.Topic, - TopicID: topic.TopicID, - IsInternal: topic.IsInternal, - Partitions: partitions, - }) - } - resp := kmsg.NewPtrMetadataResponse() - resp.Brokers = brokers - resp.ClusterID = meta.ClusterID - resp.ControllerID = 0 - resp.Topics = topics - return resp -} - -func (p *lfsProxy) buildNotReadyResponse(header *protocol.RequestHeader, payload []byte) ([]byte, bool, error) { - _, req, err := protocol.ParseRequest(payload) - if err != nil { - return nil, false, err - } - encode := func(resp kmsg.Response) ([]byte, bool, error) { - return protocol.EncodeResponse(header.CorrelationID, header.APIVersion, resp), true, nil - } - switch header.APIKey { - case protocol.APIKeyMetadata: - metaReq := req.(*kmsg.MetadataRequest) - resp := kmsg.NewPtrMetadataResponse() - resp.ControllerID = -1 - for _, t := range metaReq.Topics { - mt := kmsg.NewMetadataResponseTopic() - mt.ErrorCode = protocol.REQUEST_TIMED_OUT - mt.Topic = t.Topic - mt.TopicID = t.TopicID - resp.Topics = append(resp.Topics, mt) - } - return encode(resp) - case protocol.APIKeyFindCoordinator: - resp := kmsg.NewPtrFindCoordinatorResponse() - resp.ErrorCode = protocol.REQUEST_TIMED_OUT - resp.NodeID = -1 - return encode(resp) - case protocol.APIKeyProduce: - prodReq := req.(*kmsg.ProduceRequest) - resp := kmsg.NewPtrProduceResponse() - for _, topic := range prodReq.Topics { - rt := kmsg.NewProduceResponseTopic() - rt.Topic = topic.Topic - for _, part := range topic.Partitions { - rp := kmsg.NewProduceResponseTopicPartition() - rp.Partition = part.Partition - rp.ErrorCode = protocol.REQUEST_TIMED_OUT - rp.BaseOffset = -1 - rp.LogAppendTime = -1 - rp.LogStartOffset = -1 - rt.Partitions = append(rt.Partitions, rp) - } - resp.Topics = append(resp.Topics, rt) - } - return encode(resp) - default: - return nil, false, nil - } -} - -func buildProduceErrorResponse(req *kmsg.ProduceRequest, correlationID int32, version int16, code int16) ([]byte, error) { - resp := kmsg.NewPtrProduceResponse() - for _, topic := range req.Topics { - rt := kmsg.NewProduceResponseTopic() - rt.Topic = topic.Topic - for _, part := range topic.Partitions { - rp := kmsg.NewProduceResponseTopicPartition() - rp.Partition = part.Partition - rp.ErrorCode = code - rp.BaseOffset = -1 - rp.LogAppendTime = -1 - rp.LogStartOffset = -1 - rt.Partitions = append(rt.Partitions, rp) - } - resp.Topics = append(resp.Topics, rt) - } - return protocol.EncodeResponse(correlationID, version, resp), nil -} - -func generateProxyApiVersions() []kmsg.ApiVersionsResponseApiKey { - supported := []struct { - key int16 - min, max int16 - }{ - {key: protocol.APIKeyApiVersion, min: 0, max: 4}, - {key: protocol.APIKeyMetadata, min: 0, max: 12}, - {key: protocol.APIKeyProduce, min: 0, max: 9}, - {key: protocol.APIKeyFetch, min: 11, max: 13}, - {key: protocol.APIKeyFindCoordinator, min: 3, max: 3}, - {key: protocol.APIKeyListOffsets, min: 0, max: 4}, - {key: protocol.APIKeyJoinGroup, min: 4, max: 4}, - {key: protocol.APIKeySyncGroup, min: 4, max: 4}, - {key: protocol.APIKeyHeartbeat, min: 4, max: 4}, - {key: protocol.APIKeyLeaveGroup, min: 4, max: 4}, - {key: protocol.APIKeyOffsetCommit, min: 3, max: 3}, - {key: protocol.APIKeyOffsetFetch, min: 5, max: 5}, - {key: protocol.APIKeyDescribeGroups, min: 5, max: 5}, - {key: protocol.APIKeyListGroups, min: 5, max: 5}, - {key: protocol.APIKeyOffsetForLeaderEpoch, min: 3, max: 3}, - {key: protocol.APIKeyDescribeConfigs, min: 4, max: 4}, - {key: protocol.APIKeyAlterConfigs, min: 1, max: 1}, - {key: protocol.APIKeyCreatePartitions, min: 0, max: 3}, - {key: protocol.APIKeyCreateTopics, min: 0, max: 2}, - {key: protocol.APIKeyDeleteTopics, min: 0, max: 2}, - {key: protocol.APIKeyDeleteGroups, min: 0, max: 2}, - } - unsupported := []int16{4, 5, 6, 7, 21, 22, 24, 25, 26} - entries := make([]kmsg.ApiVersionsResponseApiKey, 0, len(supported)+len(unsupported)) - for _, entry := range supported { - entries = append(entries, kmsg.ApiVersionsResponseApiKey{ - ApiKey: entry.key, - MinVersion: entry.min, - MaxVersion: entry.max, - }) - } - for _, key := range unsupported { - entries = append(entries, kmsg.ApiVersionsResponseApiKey{ - ApiKey: key, - MinVersion: -1, - MaxVersion: -1, - }) - } - return entries -} - -func topicsFromProduce(req *kmsg.ProduceRequest) []string { - if req == nil { - return nil - } - seen := make(map[string]struct{}, len(req.Topics)) - out := make([]string, 0, len(req.Topics)) - for _, topic := range req.Topics { - if _, ok := seen[topic.Topic]; ok { - continue - } - seen[topic.Topic] = struct{}{} - out = append(out, topic.Topic) - } - if len(out) == 0 { - return []string{"unknown"} - } - return out -} - -type recordBatch struct { - kmsg.RecordBatch - Raw []byte -} - -type rewriteResult struct { - modified bool - payload []byte - uploadBytes int64 - topics map[string]struct{} - orphans []orphanInfo -} - -type orphanInfo struct { - Topic string - Key string - RequestID string - Reason string -} - -func (p *lfsProxy) trackOrphans(orphans []orphanInfo) { - if len(orphans) == 0 { - return - } - p.metrics.IncOrphans(len(orphans)) - for _, orphan := range orphans { - p.logger.Warn("lfs orphaned object", "topic", orphan.Topic, "key", orphan.Key, "reason", orphan.Reason) - // Emit orphan_detected event - reason := orphan.Reason - if reason == "" { - reason = "kafka_produce_failed" - } - p.tracker.EmitOrphanDetected(orphan.RequestID, "upload_failure", orphan.Topic, p.s3Bucket, orphan.Key, orphan.RequestID, reason, 0) - } -} - -func decodeRecordBatches(records []byte) ([]recordBatch, error) { - out := make([]recordBatch, 0, 4) - buf := records - for len(buf) > 0 { - if len(buf) < 12 { - return nil, fmt.Errorf("record batch too short: %d", len(buf)) - } - length := int(int32FromBytes(buf[8:12])) - total := 12 + length - if length < 0 || len(buf) < total { - return nil, fmt.Errorf("invalid record batch length %d", length) - } - batchBytes := buf[:total] - var batch kmsg.RecordBatch - if err := batch.ReadFrom(batchBytes); err != nil { - return nil, err - } - out = append(out, recordBatch{RecordBatch: batch, Raw: batchBytes}) - buf = buf[total:] - } - return out, nil -} - -func joinRecordBatches(batches []recordBatch) []byte { - if len(batches) == 0 { - return nil - } - size := 0 - for _, batch := range batches { - size += len(batch.Raw) - } - out := make([]byte, 0, size) - for _, batch := range batches { - out = append(out, batch.Raw...) - } - return out -} - -func decodeBatchRecords(batch *recordBatch, decompressor kgo.Decompressor) ([]kmsg.Record, kgo.CompressionCodecType, error) { - codec := kgo.CompressionCodecType(batch.Attributes & 0x0007) - rawRecords := batch.Records - if codec != kgo.CodecNone { - var err error - rawRecords, err = decompressor.Decompress(rawRecords, codec) - if err != nil { - return nil, codec, err - } - } - numRecords := int(batch.NumRecords) - records := make([]kmsg.Record, numRecords) - records = readRawRecordsInto(records, rawRecords) - return records, codec, nil -} - -func readRawRecordsInto(rs []kmsg.Record, in []byte) []kmsg.Record { - for i := range rs { - length, used := varint(in) - total := used + int(length) - if used == 0 || length < 0 || len(in) < total { - return rs[:i] - } - if err := (&rs[i]).ReadFrom(in[:total]); err != nil { - rs[i] = kmsg.Record{} - return rs[:i] - } - in = in[total:] - } - return rs -} - -func compressRecords(codec kgo.CompressionCodecType, raw []byte) ([]byte, kgo.CompressionCodecType, error) { - if codec == kgo.CodecNone { - return raw, kgo.CodecNone, nil - } - var comp kgo.Compressor - var err error - switch codec { - case kgo.CodecGzip: - comp, err = kgo.DefaultCompressor(kgo.GzipCompression()) - case kgo.CodecSnappy: - comp, err = kgo.DefaultCompressor(kgo.SnappyCompression()) - case kgo.CodecLz4: - comp, err = kgo.DefaultCompressor(kgo.Lz4Compression()) - case kgo.CodecZstd: - comp, err = kgo.DefaultCompressor(kgo.ZstdCompression()) - default: - return raw, kgo.CodecNone, nil - } - if err != nil || comp == nil { - return raw, kgo.CodecNone, err - } - out, usedCodec := comp.Compress(bytes.NewBuffer(nil), raw) - return out, usedCodec, nil -} - -func findHeaderValue(headers []kmsg.Header, key string) ([]byte, bool) { - for _, header := range headers { - if header.Key == key { - return header.Value, true - } - } - return nil, false -} - -func headerValue(headers []kmsg.Header, key string) string { - for _, header := range headers { - if header.Key == key { - return string(header.Value) - } - } - return "" -} - -// safeHeaderAllowlist defines headers that are safe to include in the LFS envelope. -// Headers not in this list are redacted to prevent leaking sensitive information. -var safeHeaderAllowlist = map[string]bool{ - "content-type": true, - "content-encoding": true, - "correlation-id": true, - "message-id": true, - "x-correlation-id": true, - "x-request-id": true, - "traceparent": true, // W3C trace context - "tracestate": true, // W3C trace context -} - -func headersToMap(headers []kmsg.Header) map[string]string { - if len(headers) == 0 { - return nil - } - out := make(map[string]string) - for _, header := range headers { - key := strings.ToLower(header.Key) - // Only include safe headers in the envelope - if safeHeaderAllowlist[key] { - out[header.Key] = string(header.Value) - } - } - if len(out) == 0 { - return nil - } - return out -} - -func dropHeader(headers []kmsg.Header, key string) []kmsg.Header { - if len(headers) == 0 { - return headers - } - out := headers[:0] - for _, header := range headers { - if header.Key == key { - continue - } - out = append(out, header) - } - return out -} - -func int32FromBytes(b []byte) int32 { - return int32(uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3])) -} - -var crc32cTable = crc32.MakeTable(crc32.Castagnoli) - -func (p *lfsProxy) resolveChecksumAlg(raw string) (lfs.ChecksumAlg, error) { - if strings.TrimSpace(raw) == "" { - return lfs.NormalizeChecksumAlg(p.checksumAlg) - } - return lfs.NormalizeChecksumAlg(raw) -} diff --git a/cmd/lfs-proxy/handler_test.go b/cmd/lfs-proxy/handler_test.go deleted file mode 100644 index d338f1c2..00000000 --- a/cmd/lfs-proxy/handler_test.go +++ /dev/null @@ -1,325 +0,0 @@ -// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "bytes" - "context" - "encoding/json" - "errors" - "io" - "testing" - - "github.com/KafScale/platform/pkg/lfs" - "github.com/KafScale/platform/pkg/protocol" - "github.com/aws/aws-sdk-go-v2/aws" - "github.com/aws/aws-sdk-go-v2/service/s3" - "github.com/twmb/franz-go/pkg/kgo" - "github.com/twmb/franz-go/pkg/kmsg" -) - -type fakeS3API struct{} - -func (fakeS3API) CreateMultipartUpload(ctx context.Context, params *s3.CreateMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CreateMultipartUploadOutput, error) { - return &s3.CreateMultipartUploadOutput{UploadId: aws.String("upload")}, nil -} -func (fakeS3API) UploadPart(ctx context.Context, params *s3.UploadPartInput, optFns ...func(*s3.Options)) (*s3.UploadPartOutput, error) { - return &s3.UploadPartOutput{ETag: aws.String("etag")}, nil -} -func (fakeS3API) CompleteMultipartUpload(ctx context.Context, params *s3.CompleteMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CompleteMultipartUploadOutput, error) { - return &s3.CompleteMultipartUploadOutput{}, nil -} -func (fakeS3API) AbortMultipartUpload(ctx context.Context, params *s3.AbortMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.AbortMultipartUploadOutput, error) { - return &s3.AbortMultipartUploadOutput{}, nil -} -func (fakeS3API) PutObject(ctx context.Context, params *s3.PutObjectInput, optFns ...func(*s3.Options)) (*s3.PutObjectOutput, error) { - return &s3.PutObjectOutput{}, nil -} -func (fakeS3API) GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) { - body := io.NopCloser(bytes.NewReader([]byte("payload"))) - return &s3.GetObjectOutput{ - Body: body, - ContentLength: aws.Int64(int64(len("payload"))), - ContentType: aws.String("application/octet-stream"), - }, nil -} - -func (fakeS3API) DeleteObject(ctx context.Context, params *s3.DeleteObjectInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectOutput, error) { - return &s3.DeleteObjectOutput{}, nil -} -func (fakeS3API) HeadBucket(ctx context.Context, params *s3.HeadBucketInput, optFns ...func(*s3.Options)) (*s3.HeadBucketOutput, error) { - return &s3.HeadBucketOutput{}, nil -} -func (fakeS3API) CreateBucket(ctx context.Context, params *s3.CreateBucketInput, optFns ...func(*s3.Options)) (*s3.CreateBucketOutput, error) { - return &s3.CreateBucketOutput{}, nil -} - -type failingS3API struct { - err error -} - -func (f failingS3API) CreateMultipartUpload(ctx context.Context, params *s3.CreateMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CreateMultipartUploadOutput, error) { - return nil, f.err -} -func (f failingS3API) UploadPart(ctx context.Context, params *s3.UploadPartInput, optFns ...func(*s3.Options)) (*s3.UploadPartOutput, error) { - return nil, f.err -} -func (f failingS3API) CompleteMultipartUpload(ctx context.Context, params *s3.CompleteMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CompleteMultipartUploadOutput, error) { - return nil, f.err -} -func (f failingS3API) AbortMultipartUpload(ctx context.Context, params *s3.AbortMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.AbortMultipartUploadOutput, error) { - return nil, f.err -} -func (f failingS3API) PutObject(ctx context.Context, params *s3.PutObjectInput, optFns ...func(*s3.Options)) (*s3.PutObjectOutput, error) { - return nil, f.err -} -func (f failingS3API) GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) { - return nil, f.err -} - -func (f failingS3API) DeleteObject(ctx context.Context, params *s3.DeleteObjectInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectOutput, error) { - return nil, f.err -} -func (f failingS3API) HeadBucket(ctx context.Context, params *s3.HeadBucketInput, optFns ...func(*s3.Options)) (*s3.HeadBucketOutput, error) { - return nil, f.err -} -func (f failingS3API) CreateBucket(ctx context.Context, params *s3.CreateBucketInput, optFns ...func(*s3.Options)) (*s3.CreateBucketOutput, error) { - return nil, f.err -} - -func TestRewriteProduceRecords(t *testing.T) { - proxy := &lfsProxy{ - s3Uploader: &s3Uploader{bucket: "bucket", chunkSize: 1024, api: fakeS3API{}}, - s3Bucket: "bucket", - s3Namespace: "ns", - maxBlob: 1024 * 1024, - proxyID: "proxy-1", - metrics: newLfsMetrics(), - } - - rec := kmsg.Record{ - TimestampDelta64: 0, - OffsetDelta: 0, - Value: []byte("payload"), - Headers: []kmsg.Header{ - {Key: "LFS_BLOB", Value: nil}, - {Key: "content-type", Value: []byte("application/octet-stream")}, - }, - } - batchBytes := buildRecordBatch([]kmsg.Record{rec}) - - req := &kmsg.ProduceRequest{ - Acks: 1, - TimeoutMillis: 1000, - Topics: []kmsg.ProduceRequestTopic{ - { - Topic: "topic", - Partitions: []kmsg.ProduceRequestTopicPartition{{ - Partition: 0, - Records: batchBytes, - }}, - }, - }, - } - header := &protocol.RequestHeader{ - APIKey: protocol.APIKeyProduce, - APIVersion: 9, - CorrelationID: 1, - ClientID: strPtr("client"), - } - - result, err := proxy.rewriteProduceRecords(context.Background(), header, req) - if err != nil { - t.Fatalf("rewriteProduceRecords error: %v", err) - } - if !result.modified { - t.Fatalf("expected modified payload") - } - parsedHeader, parsedReq, err := protocol.ParseRequest(result.payload) - if err != nil { - t.Fatalf("parse rewritten request: %v", err) - } - if parsedHeader.APIKey != protocol.APIKeyProduce { - t.Fatalf("unexpected api key %d", parsedHeader.APIKey) - } - prodReq := parsedReq.(*kmsg.ProduceRequest) - batches, err := decodeRecordBatches(prodReq.Topics[0].Partitions[0].Records) - if err != nil { - t.Fatalf("decode record batches: %v", err) - } - records, _, err := decodeBatchRecords(&batches[0], kgo.DefaultDecompressor()) - if err != nil { - t.Fatalf("decode records: %v", err) - } - var env lfs.Envelope - if err := json.Unmarshal(records[0].Value, &env); err != nil { - t.Fatalf("unmarshal envelope: %v", err) - } - if env.Bucket != "bucket" || env.Key == "" || env.Version != 1 { - t.Fatalf("unexpected envelope: %+v", env) - } -} - -func TestRewriteProduceRecordsPassthrough(t *testing.T) { - proxy := &lfsProxy{ - s3Uploader: &s3Uploader{bucket: "bucket", chunkSize: 1024, api: fakeS3API{}}, - s3Bucket: "bucket", - s3Namespace: "ns", - maxBlob: 1024 * 1024, - metrics: newLfsMetrics(), - } - - rec := kmsg.Record{ - TimestampDelta64: 0, - OffsetDelta: 0, - Value: []byte("payload"), - Headers: nil, - } - batchBytes := buildRecordBatch([]kmsg.Record{rec}) - - req := &kmsg.ProduceRequest{ - Acks: 1, - TimeoutMillis: 1000, - Topics: []kmsg.ProduceRequestTopic{ - { - Topic: "topic", - Partitions: []kmsg.ProduceRequestTopicPartition{{ - Partition: 0, - Records: batchBytes, - }}, - }, - }, - } - header := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: 1} - - result, err := proxy.rewriteProduceRecords(context.Background(), header, req) - if err != nil { - t.Fatalf("rewriteProduceRecords error: %v", err) - } - if result.modified { - t.Fatalf("expected passthrough") - } -} - -func TestRewriteProduceRecordsS3Failure(t *testing.T) { - proxy := &lfsProxy{ - s3Uploader: &s3Uploader{bucket: "bucket", chunkSize: 1024, api: failingS3API{err: errors.New("boom")}}, - s3Bucket: "bucket", - s3Namespace: "ns", - maxBlob: 1024 * 1024, - metrics: newLfsMetrics(), - } - - rec := kmsg.Record{ - TimestampDelta64: 0, - OffsetDelta: 0, - Value: []byte("payload"), - Headers: []kmsg.Header{{Key: "LFS_BLOB", Value: nil}}, - } - batchBytes := buildRecordBatch([]kmsg.Record{rec}) - - req := &kmsg.ProduceRequest{ - Acks: 1, - TimeoutMillis: 1000, - Topics: []kmsg.ProduceRequestTopic{{ - Topic: "topic", - Partitions: []kmsg.ProduceRequestTopicPartition{{ - Partition: 0, - Records: batchBytes, - }}, - }}, - } - header := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: 1} - - _, err := proxy.rewriteProduceRecords(context.Background(), header, req) - if err == nil { - t.Fatalf("expected error") - } -} - -func TestRewriteProduceRecordsChecksumMismatch(t *testing.T) { - proxy := &lfsProxy{ - s3Uploader: &s3Uploader{bucket: "bucket", chunkSize: 1024, api: fakeS3API{}}, - s3Bucket: "bucket", - s3Namespace: "ns", - maxBlob: 1024 * 1024, - metrics: newLfsMetrics(), - } - - rec := kmsg.Record{ - TimestampDelta64: 0, - OffsetDelta: 0, - Value: []byte("payload"), - Headers: []kmsg.Header{{Key: "LFS_BLOB", Value: []byte("deadbeef")}}, - } - batchBytes := buildRecordBatch([]kmsg.Record{rec}) - - req := &kmsg.ProduceRequest{ - Acks: 1, - TimeoutMillis: 1000, - Topics: []kmsg.ProduceRequestTopic{{ - Topic: "topic", - Partitions: []kmsg.ProduceRequestTopicPartition{{ - Partition: 0, - Records: batchBytes, - }}, - }}, - } - header := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: 1} - - _, err := proxy.rewriteProduceRecords(context.Background(), header, req) - if err == nil { - t.Fatalf("expected error") - } -} - -func TestRewriteProduceRecordsMaxBlobSize(t *testing.T) { - proxy := &lfsProxy{ - s3Uploader: &s3Uploader{bucket: "bucket", chunkSize: 1024, api: fakeS3API{}}, - s3Bucket: "bucket", - s3Namespace: "ns", - maxBlob: 3, - metrics: newLfsMetrics(), - } - - rec := kmsg.Record{ - TimestampDelta64: 0, - OffsetDelta: 0, - Value: []byte("payload"), - Headers: []kmsg.Header{{Key: "LFS_BLOB", Value: nil}}, - } - batchBytes := buildRecordBatch([]kmsg.Record{rec}) - - req := &kmsg.ProduceRequest{ - Acks: 1, - TimeoutMillis: 1000, - Topics: []kmsg.ProduceRequestTopic{{ - Topic: "topic", - Partitions: []kmsg.ProduceRequestTopicPartition{{ - Partition: 0, - Records: batchBytes, - }}, - }}, - } - header := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: 1} - - _, err := proxy.rewriteProduceRecords(context.Background(), header, req) - if err == nil { - t.Fatalf("expected error") - } -} - -func strPtr(v string) *string { return &v } diff --git a/cmd/lfs-proxy/http.go b/cmd/lfs-proxy/http.go deleted file mode 100644 index 27c16325..00000000 --- a/cmd/lfs-proxy/http.go +++ /dev/null @@ -1,1013 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "context" - "crypto/sha256" - "crypto/subtle" - "encoding/base64" - "encoding/hex" - "encoding/json" - "errors" - "io" - "math" - "net/http" - "regexp" - "strconv" - "strings" - "sync" - "sync/atomic" - "time" - - "github.com/KafScale/platform/pkg/lfs" - "github.com/KafScale/platform/pkg/protocol" - "github.com/aws/aws-sdk-go-v2/aws" - "github.com/aws/aws-sdk-go-v2/service/s3/types" - "github.com/twmb/franz-go/pkg/kmsg" -) - -const ( - headerTopic = "X-Kafka-Topic" - headerKey = "X-Kafka-Key" - headerPartition = "X-Kafka-Partition" - headerChecksum = "X-LFS-Checksum" - headerChecksumAlg = "X-LFS-Checksum-Alg" - headerRequestID = "X-Request-ID" -) - -// validTopicPattern matches valid Kafka topic names (alphanumeric, dots, underscores, hyphens) -var validTopicPattern = regexp.MustCompile(`^[a-zA-Z0-9._-]+$`) - -type errorResponse struct { - Code string `json:"code"` - Message string `json:"message"` - RequestID string `json:"request_id"` -} - -type downloadRequest struct { - Bucket string `json:"bucket"` - Key string `json:"key"` - Mode string `json:"mode"` - ExpiresSeconds int `json:"expires_seconds"` -} - -type downloadResponse struct { - Mode string `json:"mode"` - URL string `json:"url"` - ExpiresAt string `json:"expires_at"` -} - -type uploadInitRequest struct { - Topic string `json:"topic"` - Key string `json:"key"` - Partition *int32 `json:"partition,omitempty"` - ContentType string `json:"content_type"` - SizeBytes int64 `json:"size_bytes"` - Checksum string `json:"checksum,omitempty"` - ChecksumAlg string `json:"checksum_alg,omitempty"` -} - -type uploadInitResponse struct { - UploadID string `json:"upload_id"` - S3Key string `json:"s3_key"` - PartSize int64 `json:"part_size"` - ExpiresAt string `json:"expires_at"` -} - -type uploadPartResponse struct { - UploadID string `json:"upload_id"` - PartNumber int32 `json:"part_number"` - ETag string `json:"etag"` -} - -type uploadCompleteRequest struct { - Parts []struct { - PartNumber int32 `json:"part_number"` - ETag string `json:"etag"` - } `json:"parts"` -} - -type uploadSession struct { - mu sync.Mutex - ID string - Topic string - S3Key string - UploadID string - ContentType string - SizeBytes int64 - KeyBytes []byte - Partition int32 - Checksum string - ChecksumAlg lfs.ChecksumAlg - CreatedAt time.Time - ExpiresAt time.Time - PartSize int64 - NextPart int32 - TotalUploaded int64 - Parts map[int32]string - PartSizes map[int32]int64 - sha256Hasher hashWriter - checksumHasher hashWriter -} - -type hashWriter interface { - Write([]byte) (int, error) - Sum([]byte) []byte -} - -func (p *lfsProxy) startHTTPServer(ctx context.Context, addr string) { - mux := http.NewServeMux() - mux.HandleFunc("/lfs/produce", p.corsMiddleware(p.handleHTTPProduce)) - mux.HandleFunc("/lfs/download", p.corsMiddleware(p.handleHTTPDownload)) - mux.HandleFunc("/lfs/uploads", p.corsMiddleware(p.handleHTTPUploadInit)) - mux.HandleFunc("/lfs/uploads/", p.corsMiddleware(p.handleHTTPUploadSession)) - // Swagger UI and OpenAPI spec endpoints - mux.HandleFunc("/swagger", p.handleSwaggerUI) - mux.HandleFunc("/swagger/", p.handleSwaggerUI) - mux.HandleFunc("/api/openapi.yaml", p.handleOpenAPISpec) - srv := &http.Server{ - Addr: addr, - Handler: mux, - ReadTimeout: p.httpReadTimeout, - WriteTimeout: p.httpWriteTimeout, - IdleTimeout: p.httpIdleTimeout, - ReadHeaderTimeout: p.httpHeaderTimeout, - MaxHeaderBytes: p.httpMaxHeaderBytes, - } - go func() { - <-ctx.Done() - shutdownCtx, cancel := context.WithTimeout(context.Background(), p.httpShutdownTimeout) - defer cancel() - _ = srv.Shutdown(shutdownCtx) - }() - go func() { - p.logger.Info("lfs proxy http listening", "addr", addr, "tls", p.httpTLSConfig != nil) - var err error - if p.httpTLSConfig != nil { - srv.TLSConfig = p.httpTLSConfig - err = srv.ListenAndServeTLS(p.httpTLSCertFile, p.httpTLSKeyFile) - } else { - err = srv.ListenAndServe() - } - if err != nil && err != http.ErrServerClosed { - p.logger.Warn("lfs proxy http server error", "error", err) - } - }() -} - -// corsMiddleware adds CORS headers to allow browser-based clients. -func (p *lfsProxy) corsMiddleware(next http.HandlerFunc) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - // Set CORS headers for all responses - w.Header().Set("Access-Control-Allow-Origin", "*") - w.Header().Set("Access-Control-Allow-Methods", "POST, PUT, DELETE, OPTIONS") - w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Content-Range, X-Kafka-Topic, X-Kafka-Key, X-Kafka-Partition, X-LFS-Checksum, X-LFS-Checksum-Alg, X-LFS-Size, X-LFS-Mode, X-Request-ID, X-API-Key, Authorization") - w.Header().Set("Access-Control-Expose-Headers", "X-Request-ID") - - // Handle preflight OPTIONS request - if r.Method == http.MethodOptions { - w.WriteHeader(http.StatusNoContent) - return - } - - next(w, r) - } -} - -func (p *lfsProxy) handleHTTPProduce(w http.ResponseWriter, r *http.Request) { - requestID := strings.TrimSpace(r.Header.Get(headerRequestID)) - if requestID == "" { - requestID = newUUID() - } - w.Header().Set(headerRequestID, requestID) - if r.Method != http.MethodPost { - p.writeHTTPError(w, requestID, "", http.StatusMethodNotAllowed, "method_not_allowed", "method not allowed") - return - } - if p.httpAPIKey != "" && !p.validateHTTPAPIKey(r) { - p.writeHTTPError(w, requestID, "", http.StatusUnauthorized, "unauthorized", "unauthorized") - return - } - if !p.isReady() { - p.writeHTTPError(w, requestID, "", http.StatusServiceUnavailable, "proxy_not_ready", "proxy not ready") - return - } - topic := strings.TrimSpace(r.Header.Get(headerTopic)) - if topic == "" { - p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "missing_topic", "missing topic") - return - } - if !p.isValidTopicName(topic) { - p.writeHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_topic", "invalid topic name") - return - } - - var keyBytes []byte - if keyHeader := strings.TrimSpace(r.Header.Get(headerKey)); keyHeader != "" { - decoded, err := base64.StdEncoding.DecodeString(keyHeader) - if err != nil { - p.writeHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_key", "invalid key") - return - } - keyBytes = decoded - } - - partition := int32(0) - if partitionHeader := strings.TrimSpace(r.Header.Get(headerPartition)); partitionHeader != "" { - parsed, err := strconv.ParseInt(partitionHeader, 10, 32) - if err != nil { - p.writeHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_partition", "invalid partition") - return - } - partition = int32(parsed) - } - - checksumHeader := strings.TrimSpace(r.Header.Get(headerChecksum)) - checksumAlgHeader := strings.TrimSpace(r.Header.Get(headerChecksumAlg)) - alg, err := p.resolveChecksumAlg(checksumAlgHeader) - if err != nil { - p.writeHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_request", err.Error()) - return - } - if checksumHeader != "" && alg == lfs.ChecksumNone { - p.writeHTTPError(w, requestID, topic, http.StatusBadRequest, "invalid_checksum", "checksum provided but checksum algorithm is none") - return - } - objectKey := p.buildObjectKey(topic) - clientIP := getClientIP(r) - contentType := r.Header.Get("Content-Type") - - start := time.Now() - - // Emit upload_started event - p.tracker.EmitUploadStarted(requestID, topic, partition, objectKey, contentType, clientIP, "http", r.ContentLength) - - sha256Hex, checksum, checksumAlg, size, err := p.s3Uploader.UploadStream(r.Context(), objectKey, r.Body, p.maxBlob, alg) - if err != nil { - p.metrics.IncRequests(topic, "error", "lfs") - p.metrics.IncS3Errors() - status, code := statusForUploadError(err) - p.tracker.EmitUploadFailed(requestID, topic, objectKey, code, err.Error(), "s3_upload", 0, time.Since(start)) - p.writeHTTPError(w, requestID, topic, status, code, err.Error()) - return - } - if checksumHeader != "" && checksum != "" && !strings.EqualFold(checksumHeader, checksum) { - if err := p.s3Uploader.DeleteObject(r.Context(), objectKey); err != nil { - p.trackOrphans([]orphanInfo{{Topic: topic, Key: objectKey, RequestID: requestID, Reason: "kafka_produce_failed"}}) - p.metrics.IncRequests(topic, "error", "lfs") - p.tracker.EmitUploadFailed(requestID, topic, objectKey, "checksum_mismatch", "checksum mismatch; delete failed", "validation", size, time.Since(start)) - p.writeHTTPError(w, requestID, topic, http.StatusBadRequest, "checksum_mismatch", "checksum mismatch; delete failed") - return - } - p.metrics.IncRequests(topic, "error", "lfs") - p.tracker.EmitUploadFailed(requestID, topic, objectKey, "checksum_mismatch", (&lfs.ChecksumError{Expected: checksumHeader, Actual: checksum}).Error(), "validation", size, time.Since(start)) - p.writeHTTPError(w, requestID, topic, http.StatusBadRequest, "checksum_mismatch", (&lfs.ChecksumError{Expected: checksumHeader, Actual: checksum}).Error()) - return - } - - env := lfs.Envelope{ - Version: 1, - Bucket: p.s3Bucket, - Key: objectKey, - Size: size, - SHA256: sha256Hex, - Checksum: checksum, - ChecksumAlg: checksumAlg, - ContentType: r.Header.Get("Content-Type"), - CreatedAt: time.Now().UTC().Format(time.RFC3339), - ProxyID: p.proxyID, - } - encoded, err := lfs.EncodeEnvelope(env) - if err != nil { - p.metrics.IncRequests(topic, "error", "lfs") - p.writeHTTPError(w, requestID, topic, http.StatusInternalServerError, "encode_failed", err.Error()) - return - } - - record := kmsg.Record{ - TimestampDelta64: 0, - OffsetDelta: 0, - Key: keyBytes, - Value: encoded, - } - batchBytes := buildRecordBatch([]kmsg.Record{record}) - - produceReq := &kmsg.ProduceRequest{ - Acks: 1, - TimeoutMillis: 15000, - Topics: []kmsg.ProduceRequestTopic{{ - Topic: topic, - Partitions: []kmsg.ProduceRequestTopicPartition{{ - Partition: partition, - Records: batchBytes, - }}, - }}, - } - - correlationID := int32(atomic.AddUint32(&p.corrID, 1)) - reqHeader := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: correlationID} - payload, err := encodeProduceRequest(reqHeader, produceReq) - if err != nil { - p.metrics.IncRequests(topic, "error", "lfs") - p.writeHTTPError(w, requestID, topic, http.StatusInternalServerError, "encode_failed", err.Error()) - return - } - - backendConn, backendAddr, err := p.connectBackend(r.Context()) - if err != nil { - p.metrics.IncRequests(topic, "error", "lfs") - p.trackOrphans([]orphanInfo{{Topic: topic, Key: objectKey, RequestID: requestID, Reason: "kafka_produce_failed"}}) - p.tracker.EmitUploadFailed(requestID, topic, objectKey, "backend_unavailable", err.Error(), "kafka_produce", size, time.Since(start)) - p.writeHTTPError(w, requestID, topic, http.StatusServiceUnavailable, "backend_unavailable", err.Error()) - return - } - defer func() { _ = backendConn.Close() }() - - _, err = p.forwardToBackend(r.Context(), backendConn, backendAddr, payload) - if err != nil { - p.metrics.IncRequests(topic, "error", "lfs") - p.trackOrphans([]orphanInfo{{Topic: topic, Key: objectKey, RequestID: requestID, Reason: "kafka_produce_failed"}}) - p.tracker.EmitUploadFailed(requestID, topic, objectKey, "backend_error", err.Error(), "kafka_produce", size, time.Since(start)) - p.writeHTTPError(w, requestID, topic, http.StatusBadGateway, "backend_error", err.Error()) - return - } - - p.metrics.IncRequests(topic, "ok", "lfs") - p.metrics.AddUploadBytes(size) - p.metrics.ObserveUploadDuration(time.Since(start).Seconds()) - - // Emit upload_completed event - p.tracker.EmitUploadCompleted(requestID, topic, partition, 0, p.s3Bucket, objectKey, size, sha256Hex, checksum, checksumAlg, contentType, time.Since(start)) - - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusOK) - _ = json.NewEncoder(w).Encode(env) -} - -func (p *lfsProxy) handleHTTPDownload(w http.ResponseWriter, r *http.Request) { - requestID := strings.TrimSpace(r.Header.Get(headerRequestID)) - if requestID == "" { - requestID = newUUID() - } - w.Header().Set(headerRequestID, requestID) - if r.Method != http.MethodPost { - p.writeHTTPError(w, requestID, "", http.StatusMethodNotAllowed, "method_not_allowed", "method not allowed") - return - } - if p.httpAPIKey != "" && !p.validateHTTPAPIKey(r) { - p.writeHTTPError(w, requestID, "", http.StatusUnauthorized, "unauthorized", "unauthorized") - return - } - if !p.isReady() { - p.writeHTTPError(w, requestID, "", http.StatusServiceUnavailable, "proxy_not_ready", "proxy not ready") - return - } - - var req downloadRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_request", "invalid JSON body") - return - } - req.Bucket = strings.TrimSpace(req.Bucket) - req.Key = strings.TrimSpace(req.Key) - if req.Bucket == "" || req.Key == "" { - p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_request", "bucket and key required") - return - } - if req.Bucket != p.s3Bucket { - p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_bucket", "bucket not allowed") - return - } - if err := p.validateObjectKey(req.Key); err != nil { - p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_key", err.Error()) - return - } - - mode := strings.ToLower(strings.TrimSpace(req.Mode)) - if mode == "" { - mode = "presign" - } - if mode != "presign" && mode != "stream" { - p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_mode", "mode must be presign or stream") - return - } - - clientIP := getClientIP(r) - start := time.Now() - - // Emit download_requested event - ttlSeconds := 0 - if mode == "presign" { - ttlSeconds = req.ExpiresSeconds - if ttlSeconds <= 0 { - ttlSeconds = int(p.downloadTTLMax.Seconds()) - } - } - p.tracker.EmitDownloadRequested(requestID, req.Bucket, req.Key, mode, clientIP, ttlSeconds) - - switch mode { - case "presign": - ttl := p.downloadTTLMax - if req.ExpiresSeconds > 0 { - requested := time.Duration(req.ExpiresSeconds) * time.Second - if requested < ttl { - ttl = requested - } - } - url, err := p.s3Uploader.PresignGetObject(r.Context(), req.Key, ttl) - if err != nil { - p.metrics.IncS3Errors() - p.writeHTTPError(w, requestID, "", http.StatusBadGateway, "s3_presign_failed", err.Error()) - return - } - // Emit download_completed for presign (URL generated) - p.tracker.EmitDownloadCompleted(requestID, req.Key, mode, time.Since(start), 0) - - resp := downloadResponse{ - Mode: "presign", - URL: url, - ExpiresAt: time.Now().UTC().Add(ttl).Format(time.RFC3339), - } - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusOK) - _ = json.NewEncoder(w).Encode(resp) - case "stream": - obj, err := p.s3Uploader.GetObject(r.Context(), req.Key) - if err != nil { - p.metrics.IncS3Errors() - p.writeHTTPError(w, requestID, "", http.StatusBadGateway, "s3_get_failed", err.Error()) - return - } - defer func() { _ = obj.Body.Close() }() - contentType := "application/octet-stream" - if obj.ContentType != nil && *obj.ContentType != "" { - contentType = *obj.ContentType - } - w.Header().Set("Content-Type", contentType) - var size int64 - if obj.ContentLength != nil { - size = *obj.ContentLength - w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) - } - if _, err := io.Copy(w, obj.Body); err != nil { - p.logger.Warn("download stream failed", "error", err) - } - // Emit download_completed for stream - p.tracker.EmitDownloadCompleted(requestID, req.Key, mode, time.Since(start), size) - } -} - -func (p *lfsProxy) handleHTTPUploadInit(w http.ResponseWriter, r *http.Request) { - requestID := strings.TrimSpace(r.Header.Get(headerRequestID)) - if requestID == "" { - requestID = newUUID() - } - w.Header().Set(headerRequestID, requestID) - if r.Method != http.MethodPost { - p.writeHTTPError(w, requestID, "", http.StatusMethodNotAllowed, "method_not_allowed", "method not allowed") - return - } - if p.httpAPIKey != "" && !p.validateHTTPAPIKey(r) { - p.writeHTTPError(w, requestID, "", http.StatusUnauthorized, "unauthorized", "unauthorized") - return - } - if !p.isReady() { - p.writeHTTPError(w, requestID, "", http.StatusServiceUnavailable, "proxy_not_ready", "proxy not ready") - return - } - - var req uploadInitRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_request", "invalid JSON body") - return - } - - req.Topic = strings.TrimSpace(req.Topic) - req.ContentType = strings.TrimSpace(req.ContentType) - req.Checksum = strings.TrimSpace(req.Checksum) - req.ChecksumAlg = strings.TrimSpace(req.ChecksumAlg) - if req.Topic == "" { - p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "missing_topic", "missing topic") - return - } - if !p.isValidTopicName(req.Topic) { - p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_topic", "invalid topic name") - return - } - if req.ContentType == "" { - p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "missing_content_type", "content_type required") - return - } - if req.SizeBytes <= 0 { - p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_size", "size_bytes must be > 0") - return - } - if p.maxBlob > 0 && req.SizeBytes > p.maxBlob { - p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "payload_too_large", "payload exceeds max size") - return - } - - keyBytes := []byte(nil) - if req.Key != "" { - decoded, err := base64.StdEncoding.DecodeString(req.Key) - if err != nil { - p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_key", "invalid key") - return - } - keyBytes = decoded - } - - partition := int32(0) - if req.Partition != nil { - partition = *req.Partition - if partition < 0 { - p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_partition", "invalid partition") - return - } - } - - alg, err := p.resolveChecksumAlg(req.ChecksumAlg) - if err != nil { - p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_request", err.Error()) - return - } - if req.Checksum != "" && alg == lfs.ChecksumNone { - p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_checksum", "checksum provided but checksum algorithm is none") - return - } - - objectKey := p.buildObjectKey(req.Topic) - uploadID, err := p.s3Uploader.StartMultipartUpload(r.Context(), objectKey, req.ContentType) - if err != nil { - p.metrics.IncS3Errors() - p.writeHTTPError(w, requestID, req.Topic, http.StatusBadGateway, "s3_upload_failed", err.Error()) - return - } - p.logger.Info("http chunked upload init", "requestId", requestID, "topic", req.Topic, "s3Key", objectKey, "uploadId", uploadID, "sizeBytes", req.SizeBytes, "partSize", p.chunkSize) - - partSize := normalizeChunkSize(p.chunkSize) - session := &uploadSession{ - ID: newUUID(), - Topic: req.Topic, - S3Key: objectKey, - UploadID: uploadID, - ContentType: req.ContentType, - SizeBytes: req.SizeBytes, - KeyBytes: keyBytes, - Partition: partition, - Checksum: req.Checksum, - ChecksumAlg: alg, - CreatedAt: time.Now().UTC(), - ExpiresAt: time.Now().UTC().Add(p.uploadSessionTTL), - PartSize: partSize, - NextPart: 1, - Parts: make(map[int32]string), - PartSizes: make(map[int32]int64), - sha256Hasher: sha256.New(), - } - if alg != lfs.ChecksumNone { - if alg == lfs.ChecksumSHA256 { - session.checksumHasher = session.sha256Hasher - } else if h, err := lfs.NewChecksumHasher(alg); err == nil { - session.checksumHasher = h - } else if err != nil { - p.writeHTTPError(w, requestID, req.Topic, http.StatusBadRequest, "invalid_checksum", err.Error()) - return - } - } - - p.storeUploadSession(session) - p.tracker.EmitUploadStarted(requestID, req.Topic, partition, objectKey, req.ContentType, getClientIP(r), "http-chunked", req.SizeBytes) - - resp := uploadInitResponse{ - UploadID: session.ID, - S3Key: session.S3Key, - PartSize: session.PartSize, - ExpiresAt: session.ExpiresAt.Format(time.RFC3339), - } - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusOK) - _ = json.NewEncoder(w).Encode(resp) -} - -func (p *lfsProxy) handleHTTPUploadSession(w http.ResponseWriter, r *http.Request) { - requestID := strings.TrimSpace(r.Header.Get(headerRequestID)) - if requestID == "" { - requestID = newUUID() - } - w.Header().Set(headerRequestID, requestID) - if p.httpAPIKey != "" && !p.validateHTTPAPIKey(r) { - p.writeHTTPError(w, requestID, "", http.StatusUnauthorized, "unauthorized", "unauthorized") - return - } - if !p.isReady() { - p.writeHTTPError(w, requestID, "", http.StatusServiceUnavailable, "proxy_not_ready", "proxy not ready") - return - } - - path := strings.TrimPrefix(r.URL.Path, "/lfs/uploads/") - parts := strings.Split(strings.Trim(path, "/"), "/") - if len(parts) == 0 || parts[0] == "" { - p.writeHTTPError(w, requestID, "", http.StatusNotFound, "not_found", "not found") - return - } - uploadID := parts[0] - - switch { - case len(parts) == 1 && r.Method == http.MethodDelete: - p.handleHTTPUploadAbort(w, r, requestID, uploadID) - return - case len(parts) == 2 && parts[1] == "complete" && r.Method == http.MethodPost: - p.handleHTTPUploadComplete(w, r, requestID, uploadID) - return - case len(parts) == 3 && parts[1] == "parts" && r.Method == http.MethodPut: - partNum, err := strconv.ParseInt(parts[2], 10, 32) - if err != nil || partNum <= 0 || partNum > math.MaxInt32 { - p.writeHTTPError(w, requestID, "", http.StatusBadRequest, "invalid_part", "invalid part number") - return - } - p.handleHTTPUploadPart(w, r, requestID, uploadID, int32(partNum)) - return - default: - p.writeHTTPError(w, requestID, "", http.StatusNotFound, "not_found", "not found") - return - } -} - -func (p *lfsProxy) handleHTTPUploadPart(w http.ResponseWriter, r *http.Request, requestID, sessionID string, partNumber int32) { - session, ok := p.getUploadSession(sessionID) - if !ok { - p.writeHTTPError(w, requestID, "", http.StatusNotFound, "upload_not_found", "upload session not found") - return - } - - session.mu.Lock() - defer session.mu.Unlock() - if time.Now().UTC().After(session.ExpiresAt) { - p.deleteUploadSession(sessionID) - p.writeHTTPError(w, requestID, session.Topic, http.StatusGone, "upload_expired", "upload session expired") - return - } - - if etag, exists := session.Parts[partNumber]; exists { - _, _ = io.Copy(io.Discard, r.Body) - p.logger.Info("http chunked upload part already received", "requestId", requestID, "uploadId", sessionID, "part", partNumber, "etag", etag) - resp := uploadPartResponse{UploadID: sessionID, PartNumber: partNumber, ETag: etag} - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusOK) - _ = json.NewEncoder(w).Encode(resp) - return - } - - if partNumber != session.NextPart { - p.writeHTTPError(w, requestID, session.Topic, http.StatusConflict, "out_of_order", "part out of order") - return - } - - limit := session.PartSize + 1 - body, err := io.ReadAll(io.LimitReader(r.Body, limit)) - if err != nil { - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", err.Error()) - return - } - if int64(len(body)) == 0 { - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "empty part") - return - } - if int64(len(body)) > session.PartSize { - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "part too large") - return - } - if session.TotalUploaded+int64(len(body)) > session.SizeBytes { - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "part exceeds declared size") - return - } - if session.TotalUploaded+int64(len(body)) < session.SizeBytes && int64(len(body)) < minMultipartChunkSize { - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "part too small") - return - } - - if _, err := session.sha256Hasher.Write(body); err != nil { - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "hash_error", err.Error()) - return - } - if session.checksumHasher != nil && session.checksumHasher != session.sha256Hasher { - if _, err := session.checksumHasher.Write(body); err != nil { - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "hash_error", err.Error()) - return - } - } - - etag, err := p.s3Uploader.UploadPart(r.Context(), session.S3Key, session.UploadID, partNumber, body) - if err != nil { - p.metrics.IncS3Errors() - p.tracker.EmitUploadFailed(requestID, session.Topic, session.S3Key, "s3_upload_failed", err.Error(), "upload_part", session.TotalUploaded, 0) - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadGateway, "s3_upload_failed", err.Error()) - return - } - p.logger.Info("http chunked upload part stored", "requestId", requestID, "uploadId", sessionID, "part", partNumber, "etag", etag, "bytes", len(body)) - - session.Parts[partNumber] = etag - session.PartSizes[partNumber] = int64(len(body)) - session.TotalUploaded += int64(len(body)) - session.NextPart++ - - resp := uploadPartResponse{UploadID: sessionID, PartNumber: partNumber, ETag: etag} - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusOK) - _ = json.NewEncoder(w).Encode(resp) -} - -func (p *lfsProxy) handleHTTPUploadComplete(w http.ResponseWriter, r *http.Request, requestID, sessionID string) { - session, ok := p.getUploadSession(sessionID) - if !ok { - p.writeHTTPError(w, requestID, "", http.StatusNotFound, "upload_not_found", "upload session not found") - return - } - - session.mu.Lock() - defer session.mu.Unlock() - if time.Now().UTC().After(session.ExpiresAt) { - p.deleteUploadSession(sessionID) - p.writeHTTPError(w, requestID, session.Topic, http.StatusGone, "upload_expired", "upload session expired") - return - } - if session.TotalUploaded != session.SizeBytes { - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "incomplete_upload", "not all bytes uploaded") - return - } - - var req uploadCompleteRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_request", "invalid JSON body") - return - } - if len(req.Parts) == 0 { - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_request", "parts required") - return - } - - completed := make([]types.CompletedPart, 0, len(req.Parts)) - for _, part := range req.Parts { - etag, ok := session.Parts[part.PartNumber] - if !ok || etag == "" || part.ETag == "" || etag != part.ETag { - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "invalid_part", "part etag mismatch") - return - } - completed = append(completed, types.CompletedPart{ - ETag: aws.String(part.ETag), - PartNumber: aws.Int32(part.PartNumber), - }) - } - - if err := p.s3Uploader.CompleteMultipartUpload(r.Context(), session.S3Key, session.UploadID, completed); err != nil { - p.metrics.IncS3Errors() - p.tracker.EmitUploadFailed(requestID, session.Topic, session.S3Key, "s3_upload_failed", err.Error(), "upload_complete", session.TotalUploaded, 0) - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadGateway, "s3_upload_failed", err.Error()) - return - } - p.logger.Info("http chunked upload completed", "requestId", requestID, "uploadId", sessionID, "parts", len(completed), "bytes", session.TotalUploaded) - - shaHex := hex.EncodeToString(session.sha256Hasher.Sum(nil)) - checksum := "" - if session.ChecksumAlg != lfs.ChecksumNone { - if session.ChecksumAlg == lfs.ChecksumSHA256 { - checksum = shaHex - } else if session.checksumHasher != nil { - checksum = hex.EncodeToString(session.checksumHasher.Sum(nil)) - } - } - if session.Checksum != "" && checksum != "" && !strings.EqualFold(session.Checksum, checksum) { - _ = p.s3Uploader.AbortMultipartUpload(r.Context(), session.S3Key, session.UploadID) - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadRequest, "checksum_mismatch", "checksum mismatch") - return - } - - env := lfs.Envelope{ - Version: 1, - Bucket: p.s3Bucket, - Key: session.S3Key, - Size: session.TotalUploaded, - SHA256: shaHex, - Checksum: checksum, - ChecksumAlg: string(session.ChecksumAlg), - ContentType: session.ContentType, - CreatedAt: time.Now().UTC().Format(time.RFC3339), - ProxyID: p.proxyID, - } - encoded, err := lfs.EncodeEnvelope(env) - if err != nil { - p.writeHTTPError(w, requestID, session.Topic, http.StatusInternalServerError, "encode_failed", err.Error()) - return - } - - record := kmsg.Record{ - TimestampDelta64: 0, - OffsetDelta: 0, - Key: session.KeyBytes, - Value: encoded, - } - batchBytes := buildRecordBatch([]kmsg.Record{record}) - - produceReq := &kmsg.ProduceRequest{ - Acks: 1, - TimeoutMillis: 15000, - Topics: []kmsg.ProduceRequestTopic{{ - Topic: session.Topic, - Partitions: []kmsg.ProduceRequestTopicPartition{{ - Partition: session.Partition, - Records: batchBytes, - }}, - }}, - } - - correlationID := int32(atomic.AddUint32(&p.corrID, 1)) - reqHeader := &protocol.RequestHeader{APIKey: protocol.APIKeyProduce, APIVersion: 9, CorrelationID: correlationID} - payload, err := encodeProduceRequest(reqHeader, produceReq) - if err != nil { - p.writeHTTPError(w, requestID, session.Topic, http.StatusInternalServerError, "encode_failed", err.Error()) - return - } - - backendConn, backendAddr, err := p.connectBackend(r.Context()) - if err != nil { - p.trackOrphans([]orphanInfo{{Topic: session.Topic, Key: session.S3Key, RequestID: requestID, Reason: "kafka_produce_failed"}}) - p.tracker.EmitUploadFailed(requestID, session.Topic, session.S3Key, "backend_unavailable", err.Error(), "kafka_produce", session.TotalUploaded, 0) - p.writeHTTPError(w, requestID, session.Topic, http.StatusServiceUnavailable, "backend_unavailable", err.Error()) - return - } - defer func() { _ = backendConn.Close() }() - - if _, err := p.forwardToBackend(r.Context(), backendConn, backendAddr, payload); err != nil { - p.trackOrphans([]orphanInfo{{Topic: session.Topic, Key: session.S3Key, RequestID: requestID, Reason: "kafka_produce_failed"}}) - p.tracker.EmitUploadFailed(requestID, session.Topic, session.S3Key, "backend_error", err.Error(), "kafka_produce", session.TotalUploaded, 0) - p.writeHTTPError(w, requestID, session.Topic, http.StatusBadGateway, "backend_error", err.Error()) - return - } - - p.metrics.IncRequests(session.Topic, "ok", "lfs") - p.metrics.AddUploadBytes(session.TotalUploaded) - - p.tracker.EmitUploadCompleted(requestID, session.Topic, session.Partition, 0, p.s3Bucket, session.S3Key, session.TotalUploaded, shaHex, checksum, string(session.ChecksumAlg), session.ContentType, 0) - - p.deleteUploadSession(sessionID) - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusOK) - _ = json.NewEncoder(w).Encode(env) -} - -func (p *lfsProxy) handleHTTPUploadAbort(w http.ResponseWriter, r *http.Request, requestID, sessionID string) { - session, ok := p.getUploadSession(sessionID) - if !ok { - p.writeHTTPError(w, requestID, "", http.StatusNotFound, "upload_not_found", "upload session not found") - return - } - session.mu.Lock() - defer session.mu.Unlock() - _ = p.s3Uploader.AbortMultipartUpload(r.Context(), session.S3Key, session.UploadID) - p.deleteUploadSession(sessionID) - w.WriteHeader(http.StatusNoContent) -} - -func (p *lfsProxy) storeUploadSession(session *uploadSession) { - if session == nil { - return - } - p.uploadMu.Lock() - defer p.uploadMu.Unlock() - p.cleanupUploadSessionsLocked() - p.uploadSessions[session.ID] = session -} - -func (p *lfsProxy) getUploadSession(id string) (*uploadSession, bool) { - p.uploadMu.Lock() - defer p.uploadMu.Unlock() - p.cleanupUploadSessionsLocked() - session, ok := p.uploadSessions[id] - return session, ok -} - -func (p *lfsProxy) deleteUploadSession(id string) { - p.uploadMu.Lock() - defer p.uploadMu.Unlock() - delete(p.uploadSessions, id) -} - -func (p *lfsProxy) cleanupUploadSessionsLocked() { - now := time.Now().UTC() - for id, session := range p.uploadSessions { - if session.ExpiresAt.Before(now) { - delete(p.uploadSessions, id) - } - } -} - -func statusForUploadError(err error) (int, string) { - msg := err.Error() - switch { - case strings.Contains(msg, "exceeds max"): - return http.StatusBadRequest, "payload_too_large" - case strings.Contains(msg, "empty upload"): - return http.StatusBadRequest, "empty_upload" - case strings.Contains(msg, "s3 key required"): - return http.StatusBadRequest, "invalid_key" - case strings.Contains(msg, "reader required"): - return http.StatusBadRequest, "invalid_reader" - default: - return http.StatusBadGateway, "s3_upload_failed" - } -} - -func (p *lfsProxy) writeHTTPError(w http.ResponseWriter, requestID, topic string, status int, code, message string) { - if topic != "" { - p.logger.Warn("http produce failed", "status", status, "code", code, "requestId", requestID, "topic", topic, "error", message) - } else { - p.logger.Warn("http produce failed", "status", status, "code", code, "requestId", requestID, "error", message) - } - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(status) - _ = json.NewEncoder(w).Encode(errorResponse{ - Code: code, - Message: message, - RequestID: requestID, - }) -} - -func (p *lfsProxy) validateHTTPAPIKey(r *http.Request) bool { - if r == nil { - return false - } - key := strings.TrimSpace(r.Header.Get("X-API-Key")) - if key == "" { - auth := strings.TrimSpace(r.Header.Get("Authorization")) - if strings.HasPrefix(strings.ToLower(auth), "bearer ") { - key = strings.TrimSpace(auth[len("bearer "):]) - } - } - if key == "" { - return false - } - // Use constant-time comparison to prevent timing attacks - return subtle.ConstantTimeCompare([]byte(key), []byte(p.httpAPIKey)) == 1 -} - -func (p *lfsProxy) validateObjectKey(key string) error { - if strings.HasPrefix(key, "/") { - return errors.New("key must be relative") - } - if strings.Contains(key, "..") { - return errors.New("key must not contain '..'") - } - ns := strings.TrimSpace(p.s3Namespace) - if ns != "" && !strings.HasPrefix(key, ns+"/") { - return errors.New("key outside namespace") - } - if !strings.Contains(key, "/lfs/") { - return errors.New("key must include /lfs/ segment") - } - return nil -} - -// isValidTopicName validates a Kafka topic name. -// Topics must be 1-249 characters, containing only alphanumeric, dots, underscores, or hyphens. -func (p *lfsProxy) isValidTopicName(topic string) bool { - if len(topic) == 0 || len(topic) > p.topicMaxLength { - return false - } - return validTopicPattern.MatchString(topic) -} - -// getClientIP extracts the client IP address from the request. -// It checks X-Forwarded-For and X-Real-IP headers first, then falls back to RemoteAddr. -func getClientIP(r *http.Request) string { - if xff := r.Header.Get("X-Forwarded-For"); xff != "" { - // X-Forwarded-For can contain multiple IPs; take the first one - if idx := strings.Index(xff, ","); idx > 0 { - return strings.TrimSpace(xff[:idx]) - } - return strings.TrimSpace(xff) - } - if xri := r.Header.Get("X-Real-IP"); xri != "" { - return strings.TrimSpace(xri) - } - // Extract IP from RemoteAddr (host:port format) - host, _, err := strings.Cut(r.RemoteAddr, ":") - if err { - return host - } - return r.RemoteAddr -} diff --git a/cmd/lfs-proxy/http_test.go b/cmd/lfs-proxy/http_test.go deleted file mode 100644 index fbae909f..00000000 --- a/cmd/lfs-proxy/http_test.go +++ /dev/null @@ -1,274 +0,0 @@ -// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "bytes" - "context" - "encoding/json" - "errors" - "io" - "net/http" - "net/http/httptest" - "testing" - "time" - - "log/slog" - - "github.com/aws/aws-sdk-go-v2/aws/signer/v4" - "github.com/aws/aws-sdk-go-v2/service/s3" -) - -type fakePresignAPI struct { - url string - err error -} - -func (f fakePresignAPI) PresignGetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.PresignOptions)) (*v4.PresignedHTTPRequest, error) { - if f.err != nil { - return nil, f.err - } - return &v4.PresignedHTTPRequest{URL: f.url}, nil -} - -func newReadyProxy(api s3API) *lfsProxy { - proxy := &lfsProxy{ - logger: slog.New(slog.NewTextHandler(io.Discard, nil)), - topicMaxLength: defaultTopicMaxLength, - cacheTTL: time.Minute, - metrics: newLfsMetrics(), - s3Uploader: &s3Uploader{bucket: "bucket", chunkSize: minMultipartChunkSize, api: api, presign: fakePresignAPI{url: "https://example.com/object"}}, - s3Bucket: "bucket", - s3Namespace: "default", - downloadTTLMax: 2 * time.Minute, - } - proxy.setReady(true) - proxy.markS3Healthy(true) - proxy.touchHealthy() - return proxy -} - -func TestHTTPProduceNotReadyReturnsJSON(t *testing.T) { - proxy := &lfsProxy{ - logger: slog.New(slog.NewTextHandler(io.Discard, nil)), - topicMaxLength: defaultTopicMaxLength, - cacheTTL: time.Minute, - metrics: newLfsMetrics(), - } - - req := httptest.NewRequest(http.MethodPost, "/lfs/produce", bytes.NewReader([]byte("payload"))) - req.Header.Set(headerTopic, "lfs-demo-topic") - rec := httptest.NewRecorder() - - proxy.handleHTTPProduce(rec, req) - - resp := rec.Result() - if resp.StatusCode != http.StatusServiceUnavailable { - t.Fatalf("expected status %d, got %d", http.StatusServiceUnavailable, resp.StatusCode) - } - if got := resp.Header.Get(headerRequestID); got == "" { - t.Fatalf("expected %s header to be set", headerRequestID) - } - var body errorResponse - if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { - t.Fatalf("decode response: %v", err) - } - if body.Code != "proxy_not_ready" { - t.Fatalf("unexpected code: %s", body.Code) - } - if body.RequestID == "" { - t.Fatalf("expected request_id in body") - } -} - -func TestHTTPProduceInvalidTopic(t *testing.T) { - proxy := newReadyProxy(fakeS3API{}) - req := httptest.NewRequest(http.MethodPost, "/lfs/produce", bytes.NewReader([]byte("payload"))) - req.Header.Set(headerTopic, "bad topic") - rec := httptest.NewRecorder() - - proxy.handleHTTPProduce(rec, req) - - resp := rec.Result() - if resp.StatusCode != http.StatusBadRequest { - t.Fatalf("expected status %d, got %d", http.StatusBadRequest, resp.StatusCode) - } - var body errorResponse - if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { - t.Fatalf("decode response: %v", err) - } - if body.Code != "invalid_topic" { - t.Fatalf("unexpected code: %s", body.Code) - } -} - -func TestHTTPProduceUploadFailureReturnsBadGateway(t *testing.T) { - proxy := newReadyProxy(failingS3API{err: errors.New("boom")}) - req := httptest.NewRequest(http.MethodPost, "/lfs/produce", bytes.NewReader([]byte("payload"))) - req.Header.Set(headerTopic, "lfs-demo-topic") - rec := httptest.NewRecorder() - - proxy.handleHTTPProduce(rec, req) - - resp := rec.Result() - if resp.StatusCode != http.StatusBadGateway { - t.Fatalf("expected status %d, got %d", http.StatusBadGateway, resp.StatusCode) - } - var body errorResponse - if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { - t.Fatalf("decode response: %v", err) - } - if body.Code != "s3_upload_failed" { - t.Fatalf("unexpected code: %s", body.Code) - } -} - -func TestHTTPProduceRequestIDPreserved(t *testing.T) { - proxy := newReadyProxy(failingS3API{err: errors.New("boom")}) - req := httptest.NewRequest(http.MethodPost, "/lfs/produce", bytes.NewReader([]byte("payload"))) - req.Header.Set(headerTopic, "lfs-demo-topic") - req.Header.Set(headerRequestID, "req-123") - rec := httptest.NewRecorder() - - proxy.handleHTTPProduce(rec, req) - - resp := rec.Result() - if got := resp.Header.Get(headerRequestID); got != "req-123" { - t.Fatalf("expected request id to be preserved, got %q", got) - } - var body errorResponse - if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { - t.Fatalf("decode response: %v", err) - } - if body.RequestID != "req-123" { - t.Fatalf("expected request_id in body to be preserved, got %q", body.RequestID) - } -} - -func TestHTTPProduceUnauthorized(t *testing.T) { - proxy := newReadyProxy(fakeS3API{}) - proxy.httpAPIKey = "secret" - req := httptest.NewRequest(http.MethodPost, "/lfs/produce", bytes.NewReader([]byte("payload"))) - req.Header.Set(headerTopic, "lfs-demo-topic") - rec := httptest.NewRecorder() - - proxy.handleHTTPProduce(rec, req) - - resp := rec.Result() - if resp.StatusCode != http.StatusUnauthorized { - t.Fatalf("expected status %d, got %d", http.StatusUnauthorized, resp.StatusCode) - } -} - -func TestHTTPProduceMethodNotAllowed(t *testing.T) { - proxy := newReadyProxy(fakeS3API{}) - req := httptest.NewRequest(http.MethodGet, "/lfs/produce", nil) - rec := httptest.NewRecorder() - - proxy.handleHTTPProduce(rec, req) - - resp := rec.Result() - if resp.StatusCode != http.StatusMethodNotAllowed { - t.Fatalf("expected status %d, got %d", http.StatusMethodNotAllowed, resp.StatusCode) - } -} - -func TestHTTPDownloadMethodNotAllowed(t *testing.T) { - proxy := newReadyProxy(fakeS3API{}) - req := httptest.NewRequest(http.MethodGet, "/lfs/download", nil) - rec := httptest.NewRecorder() - - proxy.handleHTTPDownload(rec, req) - - if rec.Result().StatusCode != http.StatusMethodNotAllowed { - t.Fatalf("expected status %d, got %d", http.StatusMethodNotAllowed, rec.Result().StatusCode) - } -} - -func TestHTTPDownloadUnauthorized(t *testing.T) { - proxy := newReadyProxy(fakeS3API{}) - proxy.httpAPIKey = "secret" - req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader([]byte(`{"bucket":"bucket","key":"default/topic/lfs/2026/02/03/obj-1"}`))) - rec := httptest.NewRecorder() - - proxy.handleHTTPDownload(rec, req) - - if rec.Result().StatusCode != http.StatusUnauthorized { - t.Fatalf("expected status %d, got %d", http.StatusUnauthorized, rec.Result().StatusCode) - } -} - -func TestHTTPDownloadInvalidKey(t *testing.T) { - proxy := newReadyProxy(fakeS3API{}) - req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader([]byte(`{"bucket":"bucket","key":"other/topic/obj-1"}`))) - rec := httptest.NewRecorder() - - proxy.handleHTTPDownload(rec, req) - - if rec.Result().StatusCode != http.StatusBadRequest { - t.Fatalf("expected status %d, got %d", http.StatusBadRequest, rec.Result().StatusCode) - } - var body errorResponse - if err := json.NewDecoder(rec.Body).Decode(&body); err != nil { - t.Fatalf("decode response: %v", err) - } - if body.Code != "invalid_key" { - t.Fatalf("unexpected code: %s", body.Code) - } -} - -func TestHTTPDownloadPresignOK(t *testing.T) { - proxy := newReadyProxy(fakeS3API{}) - req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader([]byte(`{"bucket":"bucket","key":"default/topic/lfs/2026/02/03/obj-1","mode":"presign","expires_seconds":120}`))) - rec := httptest.NewRecorder() - - proxy.handleHTTPDownload(rec, req) - - resp := rec.Result() - if resp.StatusCode != http.StatusOK { - t.Fatalf("expected status %d, got %d", http.StatusOK, resp.StatusCode) - } - var body downloadResponse - if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { - t.Fatalf("decode response: %v", err) - } - if body.URL == "" || body.Mode != "presign" { - t.Fatalf("expected presign response, got %+v", body) - } -} - -func TestHTTPDownloadStreamOK(t *testing.T) { - proxy := newReadyProxy(fakeS3API{}) - req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader([]byte(`{"bucket":"bucket","key":"default/topic/lfs/2026/02/03/obj-1","mode":"stream"}`))) - rec := httptest.NewRecorder() - - proxy.handleHTTPDownload(rec, req) - - resp := rec.Result() - if resp.StatusCode != http.StatusOK { - t.Fatalf("expected status %d, got %d", http.StatusOK, resp.StatusCode) - } - if resp.Header.Get("Content-Type") == "" { - t.Fatalf("expected content-type header to be set") - } - payload, err := io.ReadAll(resp.Body) - if err != nil { - t.Fatalf("read body: %v", err) - } - if string(payload) == "" { - t.Fatalf("expected body payload") - } -} diff --git a/cmd/lfs-proxy/http_tls.go b/cmd/lfs-proxy/http_tls.go deleted file mode 100644 index d659dcd0..00000000 --- a/cmd/lfs-proxy/http_tls.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "crypto/tls" - "crypto/x509" - "errors" - "os" - "strings" -) - -func buildHTTPServerTLSConfig() (*tls.Config, string, string, error) { - enabled := envBoolDefault("KAFSCALE_LFS_PROXY_HTTP_TLS_ENABLED", false) - if !enabled { - return nil, "", "", nil - } - certFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HTTP_TLS_CERT_FILE")) - keyFile := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HTTP_TLS_KEY_FILE")) - clientCA := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HTTP_TLS_CLIENT_CA_FILE")) - requireClient := envBoolDefault("KAFSCALE_LFS_PROXY_HTTP_TLS_REQUIRE_CLIENT_CERT", false) - - if certFile == "" || keyFile == "" { - return nil, "", "", errors.New("http TLS cert and key must be set when enabled") - } - - cfg := &tls.Config{MinVersion: tls.VersionTLS12} - if clientCA != "" { - caPEM, err := os.ReadFile(clientCA) - if err != nil { - return nil, "", "", err - } - pool := x509.NewCertPool() - if !pool.AppendCertsFromPEM(caPEM) { - return nil, "", "", errors.New("failed to parse http TLS client CA file") - } - cfg.ClientCAs = pool - if requireClient { - cfg.ClientAuth = tls.RequireAndVerifyClientCert - } else { - cfg.ClientAuth = tls.VerifyClientCertIfGiven - } - } - - return cfg, certFile, keyFile, nil -} diff --git a/cmd/lfs-proxy/http_tls_test.go b/cmd/lfs-proxy/http_tls_test.go deleted file mode 100644 index ac2813a1..00000000 --- a/cmd/lfs-proxy/http_tls_test.go +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import "testing" - -func TestBuildHTTPServerTLSConfigDisabled(t *testing.T) { - t.Setenv("KAFSCALE_LFS_PROXY_HTTP_TLS_ENABLED", "false") - cfg, certFile, keyFile, err := buildHTTPServerTLSConfig() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if cfg != nil || certFile != "" || keyFile != "" { - t.Fatalf("expected empty TLS config when disabled") - } -} - -func TestBuildHTTPServerTLSConfigMissingCert(t *testing.T) { - t.Setenv("KAFSCALE_LFS_PROXY_HTTP_TLS_ENABLED", "true") - t.Setenv("KAFSCALE_LFS_PROXY_HTTP_TLS_CERT_FILE", "") - t.Setenv("KAFSCALE_LFS_PROXY_HTTP_TLS_KEY_FILE", "") - _, _, _, err := buildHTTPServerTLSConfig() - if err == nil { - t.Fatal("expected error when cert/key missing") - } -} diff --git a/cmd/lfs-proxy/main.go b/cmd/lfs-proxy/main.go deleted file mode 100644 index 79df2e29..00000000 --- a/cmd/lfs-proxy/main.go +++ /dev/null @@ -1,440 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "context" - "crypto/tls" - "errors" - "log/slog" - "net" - "net/http" - "os" - "os/signal" - "strconv" - "strings" - "sync" - "syscall" - "time" - - "github.com/KafScale/platform/pkg/metadata" - "github.com/twmb/franz-go/pkg/kmsg" -) - -const ( - defaultProxyAddr = ":9092" - defaultMaxBlob = int64(5 << 30) - defaultChunkSize = int64(5 << 20) - defaultDialTimeoutMs = 5000 - defaultBackendBackoffMs = 500 - defaultBackendRefreshIntervalSec = 3 - defaultS3HealthIntervalSec = 30 - defaultHTTPReadTimeoutSec = 30 - defaultHTTPWriteTimeoutSec = 300 - defaultHTTPIdleTimeoutSec = 60 - defaultHTTPHeaderTimeoutSec = 10 - defaultHTTPMaxHeaderBytes = 1 << 20 - defaultHTTPShutdownTimeoutSec = 10 - defaultTopicMaxLength = 249 - defaultDownloadTTLSec = 120 - defaultUploadSessionTTLSec = 3600 -) - -type lfsProxy struct { - addr string - advertisedHost string - advertisedPort int32 - store metadata.Store - backends []string - logger *slog.Logger - rr uint32 - dialTimeout time.Duration - httpReadTimeout time.Duration - httpWriteTimeout time.Duration - httpIdleTimeout time.Duration - httpHeaderTimeout time.Duration - httpMaxHeaderBytes int - httpShutdownTimeout time.Duration - topicMaxLength int - downloadTTLMax time.Duration - checksumAlg string - backendTLSConfig *tls.Config - backendSASLMechanism string - backendSASLUsername string - backendSASLPassword string - httpTLSConfig *tls.Config - httpTLSCertFile string - httpTLSKeyFile string - ready uint32 - lastHealthy int64 - cacheTTL time.Duration - cacheMu sync.RWMutex - cachedBackends []string - apiVersions []kmsg.ApiVersionsResponseApiKey - metrics *lfsMetrics - - s3Uploader *s3Uploader - s3Bucket string - s3Namespace string - maxBlob int64 - chunkSize int64 - proxyID string - s3Healthy uint32 - corrID uint32 - httpAPIKey string - - // LFS Operations Tracker - tracker *LfsOpsTracker - - uploadSessionTTL time.Duration - uploadMu sync.Mutex - uploadSessions map[string]*uploadSession -} - -func main() { - ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) - defer cancel() - - logLevel := slog.LevelInfo - if strings.EqualFold(os.Getenv("KAFSCALE_LFS_PROXY_LOG_LEVEL"), "debug") { - logLevel = slog.LevelDebug - } - logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: logLevel})) - - logger.Warn("DEPRECATED: standalone lfs-proxy is deprecated; use the unified proxy with KAFSCALE_PROXY_LFS_ENABLED=true instead") - - addr := envOrDefault("KAFSCALE_LFS_PROXY_ADDR", defaultProxyAddr) - healthAddr := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HEALTH_ADDR")) - metricsAddr := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_METRICS_ADDR")) - httpAddr := envOrDefault("KAFSCALE_LFS_PROXY_HTTP_ADDR", ":8080") - httpAPIKey := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_HTTP_API_KEY")) - advertisedHost := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_ADVERTISED_HOST")) - advertisedPort := envPort("KAFSCALE_LFS_PROXY_ADVERTISED_PORT", portFromAddr(addr, 9092)) - logger.Info("advertised address configured", "host", advertisedHost, "port", advertisedPort) - backends := splitCSV(os.Getenv("KAFSCALE_LFS_PROXY_BACKENDS")) - backendBackoff := time.Duration(envInt("KAFSCALE_LFS_PROXY_BACKEND_BACKOFF_MS", defaultBackendBackoffMs)) * time.Millisecond - backendRefreshInterval := time.Duration(envInt("KAFSCALE_LFS_PROXY_BACKEND_REFRESH_INTERVAL_SEC", defaultBackendRefreshIntervalSec)) * time.Second - cacheTTL := time.Duration(envInt("KAFSCALE_LFS_PROXY_BACKEND_CACHE_TTL_SEC", 60)) * time.Second - if cacheTTL <= 0 { - cacheTTL = 60 * time.Second - } - - s3Bucket := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_BUCKET")) - s3Region := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_REGION")) - s3Endpoint := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_ENDPOINT")) - s3PublicURL := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_PUBLIC_ENDPOINT")) - s3AccessKey := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_ACCESS_KEY")) - s3SecretKey := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_SECRET_KEY")) - s3SessionToken := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_S3_SESSION_TOKEN")) - forcePathStyle := envBoolDefault("KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE", s3Endpoint != "") - s3EnsureBucket := envBoolDefault("KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET", false) - maxBlob := envInt64("KAFSCALE_LFS_PROXY_MAX_BLOB_SIZE", defaultMaxBlob) - chunkSize := envInt64("KAFSCALE_LFS_PROXY_CHUNK_SIZE", defaultChunkSize) - proxyID := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_ID")) - s3Namespace := envOrDefault("KAFSCALE_S3_NAMESPACE", "default") - dialTimeout := time.Duration(envInt("KAFSCALE_LFS_PROXY_DIAL_TIMEOUT_MS", defaultDialTimeoutMs)) * time.Millisecond - s3HealthInterval := time.Duration(envInt("KAFSCALE_LFS_PROXY_S3_HEALTH_INTERVAL_SEC", defaultS3HealthIntervalSec)) * time.Second - httpReadTimeout := time.Duration(envInt("KAFSCALE_LFS_PROXY_HTTP_READ_TIMEOUT_SEC", defaultHTTPReadTimeoutSec)) * time.Second - httpWriteTimeout := time.Duration(envInt("KAFSCALE_LFS_PROXY_HTTP_WRITE_TIMEOUT_SEC", defaultHTTPWriteTimeoutSec)) * time.Second - httpIdleTimeout := time.Duration(envInt("KAFSCALE_LFS_PROXY_HTTP_IDLE_TIMEOUT_SEC", defaultHTTPIdleTimeoutSec)) * time.Second - httpHeaderTimeout := time.Duration(envInt("KAFSCALE_LFS_PROXY_HTTP_HEADER_TIMEOUT_SEC", defaultHTTPHeaderTimeoutSec)) * time.Second - httpMaxHeaderBytes := envInt("KAFSCALE_LFS_PROXY_HTTP_MAX_HEADER_BYTES", defaultHTTPMaxHeaderBytes) - httpShutdownTimeout := time.Duration(envInt("KAFSCALE_LFS_PROXY_HTTP_SHUTDOWN_TIMEOUT_SEC", defaultHTTPShutdownTimeoutSec)) * time.Second - uploadSessionTTL := time.Duration(envInt("KAFSCALE_LFS_PROXY_UPLOAD_SESSION_TTL_SEC", defaultUploadSessionTTLSec)) * time.Second - topicMaxLength := envInt("KAFSCALE_LFS_PROXY_TOPIC_MAX_LENGTH", defaultTopicMaxLength) - downloadTTLSec := envInt("KAFSCALE_LFS_PROXY_DOWNLOAD_TTL_SEC", defaultDownloadTTLSec) - if downloadTTLSec <= 0 { - downloadTTLSec = defaultDownloadTTLSec - } - checksumAlg := envOrDefault("KAFSCALE_LFS_PROXY_CHECKSUM_ALGO", "sha256") - backendTLSConfig, err := buildBackendTLSConfig() - if err != nil { - logger.Error("backend tls config failed", "error", err) - os.Exit(1) - } - backendSASLMechanism := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_SASL_MECHANISM")) - backendSASLUsername := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_SASL_USERNAME")) - backendSASLPassword := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_BACKEND_SASL_PASSWORD")) - httpTLSConfig, httpTLSCertFile, httpTLSKeyFile, err := buildHTTPServerTLSConfig() - if err != nil { - logger.Error("http tls config failed", "error", err) - os.Exit(1) - } - - store, err := buildMetadataStore(ctx) - if err != nil { - logger.Error("metadata store init failed", "error", err) - os.Exit(1) - } - if store == nil { - logger.Error("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS not set; proxy cannot build metadata responses") - os.Exit(1) - } - - if advertisedHost == "" { - logger.Warn("KAFSCALE_LFS_PROXY_ADVERTISED_HOST not set; clients may not resolve the proxy address") - } - - s3Uploader, err := newS3Uploader(ctx, s3Config{ - Bucket: s3Bucket, - Region: s3Region, - Endpoint: s3Endpoint, - PublicEndpoint: s3PublicURL, - AccessKeyID: s3AccessKey, - SecretAccessKey: s3SecretKey, - SessionToken: s3SessionToken, - ForcePathStyle: forcePathStyle, - ChunkSize: chunkSize, - }) - if err != nil { - logger.Error("s3 client init failed", "error", err) - os.Exit(1) - } - if s3EnsureBucket { - if err := s3Uploader.EnsureBucket(ctx); err != nil { - logger.Error("s3 bucket ensure failed", "error", err) - } - } - - metrics := newLfsMetrics() - - // LFS Ops Tracker configuration - trackerEnabled := envBoolDefault("KAFSCALE_LFS_TRACKER_ENABLED", true) - trackerTopic := envOrDefault("KAFSCALE_LFS_TRACKER_TOPIC", defaultTrackerTopic) - trackerBatchSize := envInt("KAFSCALE_LFS_TRACKER_BATCH_SIZE", defaultTrackerBatchSize) - trackerFlushMs := envInt("KAFSCALE_LFS_TRACKER_FLUSH_MS", defaultTrackerFlushMs) - trackerEnsureTopic := envBoolDefault("KAFSCALE_LFS_TRACKER_ENSURE_TOPIC", true) - trackerPartitions := envInt("KAFSCALE_LFS_TRACKER_PARTITIONS", defaultTrackerPartitions) - trackerReplication := envInt("KAFSCALE_LFS_TRACKER_REPLICATION_FACTOR", defaultTrackerReplication) - - trackerCfg := TrackerConfig{ - Enabled: trackerEnabled, - Topic: trackerTopic, - Brokers: backends, - BatchSize: trackerBatchSize, - FlushMs: trackerFlushMs, - ProxyID: proxyID, - EnsureTopic: trackerEnsureTopic, - Partitions: trackerPartitions, - ReplicationFactor: trackerReplication, - } - - tracker, err := NewLfsOpsTracker(ctx, trackerCfg, logger) - if err != nil { - logger.Warn("lfs ops tracker init failed, continuing without tracker", "error", err) - tracker = &LfsOpsTracker{config: trackerCfg, logger: logger} - } - - p := &lfsProxy{ - addr: addr, - advertisedHost: advertisedHost, - advertisedPort: advertisedPort, - store: store, - backends: backends, - logger: logger, - dialTimeout: dialTimeout, - cacheTTL: cacheTTL, - apiVersions: generateProxyApiVersions(), - metrics: metrics, - s3Uploader: s3Uploader, - s3Bucket: s3Bucket, - s3Namespace: s3Namespace, - maxBlob: maxBlob, - chunkSize: chunkSize, - proxyID: proxyID, - httpAPIKey: httpAPIKey, - httpReadTimeout: httpReadTimeout, - httpWriteTimeout: httpWriteTimeout, - httpIdleTimeout: httpIdleTimeout, - httpHeaderTimeout: httpHeaderTimeout, - httpMaxHeaderBytes: httpMaxHeaderBytes, - httpShutdownTimeout: httpShutdownTimeout, - topicMaxLength: topicMaxLength, - downloadTTLMax: time.Duration(downloadTTLSec) * time.Second, - checksumAlg: checksumAlg, - backendTLSConfig: backendTLSConfig, - backendSASLMechanism: backendSASLMechanism, - backendSASLUsername: backendSASLUsername, - backendSASLPassword: backendSASLPassword, - httpTLSConfig: httpTLSConfig, - httpTLSCertFile: httpTLSCertFile, - httpTLSKeyFile: httpTLSKeyFile, - tracker: tracker, - uploadSessionTTL: uploadSessionTTL, - uploadSessions: make(map[string]*uploadSession), - } - if len(backends) > 0 { - p.setCachedBackends(backends) - p.touchHealthy() - p.setReady(true) - } - p.markS3Healthy(true) - p.startBackendRefresh(ctx, backendBackoff, backendRefreshInterval) - p.startS3HealthCheck(ctx, s3HealthInterval) - if healthAddr != "" { - p.startHealthServer(ctx, healthAddr) - } - if metricsAddr != "" { - p.startMetricsServer(ctx, metricsAddr) - } - if httpAddr != "" { - p.startHTTPServer(ctx, httpAddr) - } - if err := p.listenAndServe(ctx); err != nil && !errors.Is(err, context.Canceled) { - logger.Error("lfs proxy server error", "error", err) - os.Exit(1) - } - - // Graceful shutdown of tracker - if p.tracker != nil { - if err := p.tracker.Close(); err != nil { - logger.Warn("tracker close error", "error", err) - } - } -} - -func envOrDefault(key, fallback string) string { - if val := os.Getenv(key); val != "" { - return val - } - return fallback -} - -func envPort(key string, fallback int32) int32 { - val := strings.TrimSpace(os.Getenv(key)) - if val == "" { - return fallback - } - parsed, err := strconv.ParseInt(val, 10, 32) - if err != nil || parsed <= 0 { - return fallback - } - return int32(parsed) -} - -func envInt(key string, fallback int) int { - val := strings.TrimSpace(os.Getenv(key)) - if val == "" { - return fallback - } - parsed, err := strconv.Atoi(val) - if err != nil { - return fallback - } - return parsed -} - -func envInt64(key string, fallback int64) int64 { - val := strings.TrimSpace(os.Getenv(key)) - if val == "" { - return fallback - } - parsed, err := strconv.ParseInt(val, 10, 64) - if err != nil { - return fallback - } - return parsed -} - -func envBoolDefault(key string, fallback bool) bool { - val := strings.TrimSpace(os.Getenv(key)) - if val == "" { - return fallback - } - switch strings.ToLower(val) { - case "1", "true", "yes", "y", "on": - return true - case "0", "false", "no", "n", "off": - return false - default: - return fallback - } -} - -func portFromAddr(addr string, fallback int32) int32 { - _, portStr, err := net.SplitHostPort(addr) - if err != nil { - return fallback - } - port, err := strconv.ParseInt(portStr, 10, 32) - if err != nil || port <= 0 || port > 65535 { - return fallback - } - return int32(port) -} - -func splitCSV(raw string) []string { - if strings.TrimSpace(raw) == "" { - return nil - } - parts := strings.Split(raw, ",") - out := make([]string, 0, len(parts)) - for _, part := range parts { - val := strings.TrimSpace(part) - if val != "" { - out = append(out, val) - } - } - return out -} - -func buildMetadataStore(ctx context.Context) (metadata.Store, error) { - cfg, ok := proxyEtcdConfigFromEnv() - if !ok { - return nil, nil - } - return metadata.NewEtcdStore(ctx, metadata.ClusterMetadata{}, cfg) -} - -func proxyEtcdConfigFromEnv() (metadata.EtcdStoreConfig, bool) { - endpoints := strings.TrimSpace(os.Getenv("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS")) - if endpoints == "" { - return metadata.EtcdStoreConfig{}, false - } - return metadata.EtcdStoreConfig{ - Endpoints: strings.Split(endpoints, ","), - Username: os.Getenv("KAFSCALE_LFS_PROXY_ETCD_USERNAME"), - Password: os.Getenv("KAFSCALE_LFS_PROXY_ETCD_PASSWORD"), - }, true -} - -func (p *lfsProxy) startMetricsServer(ctx context.Context, addr string) { - mux := http.NewServeMux() - mux.HandleFunc("/metrics", func(w http.ResponseWriter, _ *http.Request) { - p.metrics.WritePrometheus(w) - }) - srv := &http.Server{ - Addr: addr, - Handler: mux, - ReadTimeout: p.httpReadTimeout, - WriteTimeout: p.httpWriteTimeout, - IdleTimeout: p.httpIdleTimeout, - ReadHeaderTimeout: p.httpHeaderTimeout, - MaxHeaderBytes: p.httpMaxHeaderBytes, - } - go func() { - <-ctx.Done() - shutdownCtx, cancel := context.WithTimeout(context.Background(), p.httpShutdownTimeout) - defer cancel() - _ = srv.Shutdown(shutdownCtx) - }() - go func() { - p.logger.Info("lfs proxy metrics listening", "addr", addr) - if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { - p.logger.Warn("lfs proxy metrics server error", "error", err) - } - }() -} diff --git a/cmd/lfs-proxy/metrics.go b/cmd/lfs-proxy/metrics.go deleted file mode 100644 index a24c3030..00000000 --- a/cmd/lfs-proxy/metrics.go +++ /dev/null @@ -1,221 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "fmt" - "io" - "runtime" - "sort" - "sync" - "sync/atomic" -) - -type lfsMetrics struct { - uploadDuration *histogram - uploadBytes uint64 - s3Errors uint64 - orphans uint64 - mu sync.Mutex - requests map[string]*topicCounters -} - -func newLfsMetrics() *lfsMetrics { - buckets := []float64{0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30} - return &lfsMetrics{ - uploadDuration: newHistogram(buckets), - requests: make(map[string]*topicCounters), - } -} - -func (m *lfsMetrics) ObserveUploadDuration(seconds float64) { - if m == nil || m.uploadDuration == nil { - return - } - m.uploadDuration.Observe(seconds) -} - -func (m *lfsMetrics) AddUploadBytes(n int64) { - if m == nil || n <= 0 { - return - } - atomic.AddUint64(&m.uploadBytes, uint64(n)) -} - -func (m *lfsMetrics) IncRequests(topic, status, typ string) { - if m == nil { - return - } - if topic == "" { - topic = "unknown" - } - m.mu.Lock() - counters := m.requests[topic] - if counters == nil { - counters = &topicCounters{} - m.requests[topic] = counters - } - m.mu.Unlock() - switch { - case status == "ok" && typ == "lfs": - atomic.AddUint64(&counters.okLfs, 1) - case status == "error" && typ == "lfs": - atomic.AddUint64(&counters.errLfs, 1) - case status == "ok" && typ == "passthrough": - atomic.AddUint64(&counters.okPas, 1) - case status == "error" && typ == "passthrough": - atomic.AddUint64(&counters.errPas, 1) - } -} - -func (m *lfsMetrics) IncS3Errors() { - if m == nil { - return - } - atomic.AddUint64(&m.s3Errors, 1) -} - -func (m *lfsMetrics) IncOrphans(count int) { - if m == nil || count <= 0 { - return - } - atomic.AddUint64(&m.orphans, uint64(count)) -} - -func (m *lfsMetrics) WritePrometheus(w io.Writer) { - if m == nil { - return - } - m.uploadDuration.WritePrometheus(w, "kafscale_lfs_proxy_upload_duration_seconds", "LFS proxy upload durations in seconds") - _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_upload_bytes_total Total bytes uploaded via LFS\n") - _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_upload_bytes_total counter\n") - _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_upload_bytes_total %d\n", atomic.LoadUint64(&m.uploadBytes)) - _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_requests_total LFS proxy requests\n") - _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_requests_total counter\n") - topics := m.snapshotTopics() - for _, topic := range topics { - counters := m.requests[topic] - _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_requests_total{topic=\"%s\",status=\"ok\",type=\"lfs\"} %d\n", topic, atomic.LoadUint64(&counters.okLfs)) - _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_requests_total{topic=\"%s\",status=\"error\",type=\"lfs\"} %d\n", topic, atomic.LoadUint64(&counters.errLfs)) - _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_requests_total{topic=\"%s\",status=\"ok\",type=\"passthrough\"} %d\n", topic, atomic.LoadUint64(&counters.okPas)) - _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_requests_total{topic=\"%s\",status=\"error\",type=\"passthrough\"} %d\n", topic, atomic.LoadUint64(&counters.errPas)) - } - _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_s3_errors_total Total S3 errors\n") - _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_s3_errors_total counter\n") - _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_s3_errors_total %d\n", atomic.LoadUint64(&m.s3Errors)) - _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_orphan_objects_total LFS objects uploaded but not committed to Kafka\n") - _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_orphan_objects_total counter\n") - _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_orphan_objects_total %d\n", atomic.LoadUint64(&m.orphans)) - - // Runtime metrics - var memStats runtime.MemStats - runtime.ReadMemStats(&memStats) - _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_goroutines Number of goroutines\n") - _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_goroutines gauge\n") - _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_goroutines %d\n", runtime.NumGoroutine()) - _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_memory_alloc_bytes Bytes allocated and in use\n") - _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_memory_alloc_bytes gauge\n") - _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_memory_alloc_bytes %d\n", memStats.Alloc) - _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_memory_sys_bytes Bytes obtained from system\n") - _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_memory_sys_bytes gauge\n") - _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_memory_sys_bytes %d\n", memStats.Sys) - _, _ = fmt.Fprintf(w, "# HELP kafscale_lfs_proxy_gc_pause_total_ns Total GC pause time in nanoseconds\n") - _, _ = fmt.Fprintf(w, "# TYPE kafscale_lfs_proxy_gc_pause_total_ns counter\n") - _, _ = fmt.Fprintf(w, "kafscale_lfs_proxy_gc_pause_total_ns %d\n", memStats.PauseTotalNs) -} - -func (m *lfsMetrics) snapshotTopics() []string { - m.mu.Lock() - defer m.mu.Unlock() - out := make([]string, 0, len(m.requests)) - for topic := range m.requests { - out = append(out, topic) - } - sort.Strings(out) - return out -} - -type topicCounters struct { - okLfs uint64 - errLfs uint64 - okPas uint64 - errPas uint64 -} - -type histogram struct { - mu sync.Mutex - buckets []float64 - counts []int64 - sum float64 - count int64 -} - -func newHistogram(buckets []float64) *histogram { - if len(buckets) == 0 { - buckets = []float64{1, 2, 5, 10, 25, 50, 100} - } - cp := append([]float64(nil), buckets...) - sort.Float64s(cp) - return &histogram{ - buckets: cp, - counts: make([]int64, len(cp)+1), - } -} - -func (h *histogram) Observe(value float64) { - if h == nil { - return - } - h.mu.Lock() - defer h.mu.Unlock() - h.sum += value - h.count++ - idx := sort.SearchFloat64s(h.buckets, value) - h.counts[idx]++ -} - -func (h *histogram) Snapshot() ([]float64, []int64, float64, int64) { - if h == nil { - return nil, nil, 0, 0 - } - h.mu.Lock() - defer h.mu.Unlock() - buckets := append([]float64(nil), h.buckets...) - counts := append([]int64(nil), h.counts...) - return buckets, counts, h.sum, h.count -} - -func (h *histogram) WritePrometheus(w io.Writer, name, help string) { - if h == nil { - return - } - buckets, counts, sum, count := h.Snapshot() - _, _ = fmt.Fprintf(w, "# HELP %s %s\n", name, help) - _, _ = fmt.Fprintf(w, "# TYPE %s histogram\n", name) - var cumulative int64 - for i, upper := range buckets { - cumulative += counts[i] - _, _ = fmt.Fprintf(w, "%s_bucket{le=%q} %d\n", name, formatFloat(upper), cumulative) - } - cumulative += counts[len(counts)-1] - _, _ = fmt.Fprintf(w, "%s_bucket{le=\"+Inf\"} %d\n", name, cumulative) - _, _ = fmt.Fprintf(w, "%s_sum %f\n", name, sum) - _, _ = fmt.Fprintf(w, "%s_count %d\n", name, count) -} - -func formatFloat(val float64) string { - return fmt.Sprintf("%g", val) -} diff --git a/cmd/lfs-proxy/openapi.yaml b/cmd/lfs-proxy/openapi.yaml deleted file mode 100644 index 065ad0e1..00000000 --- a/cmd/lfs-proxy/openapi.yaml +++ /dev/null @@ -1,433 +0,0 @@ -# Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -openapi: 3.0.3 -info: - title: KafScale LFS Proxy HTTP API - version: 1.0.0 - description: | - The KafScale LFS (Large File Support) Proxy provides HTTP endpoints for producing - large binary objects to Kafka via S3-backed storage. Instead of sending large payloads - directly through Kafka, clients upload blobs to S3 and receive an envelope (pointer) - that is stored in Kafka. - - ## Authentication - - When API key authentication is enabled (via `KAFSCALE_LFS_PROXY_HTTP_API_KEY`), - requests must include one of: - - `X-API-Key` header with the API key - - `Authorization: Bearer ` header - - ## CORS - - The API supports CORS for browser-based clients. Preflight OPTIONS requests are handled automatically. - - ## Request Tracing - - All requests can include an optional `X-Request-ID` header for tracing. If not provided, - the proxy generates one and returns it in the response. - contact: - name: KafScale - url: https://github.com/KafScale/platform - license: - name: Apache 2.0 - url: https://www.apache.org/licenses/LICENSE-2.0 -servers: - - url: http://localhost:8080 - description: Local development - - url: http://lfs-proxy:8080 - description: Kubernetes in-cluster -tags: - - name: LFS - description: Large File Support operations -paths: - /lfs/produce: - post: - tags: - - LFS - summary: Upload and produce an LFS record - description: | - Streams a binary payload to the LFS proxy, which: - 1. Uploads the blob to S3 storage - 2. Computes checksums (SHA256 by default) - 3. Creates an LFS envelope with blob metadata - 4. Produces the envelope to the specified Kafka topic - - The response contains the full LFS envelope that was stored in Kafka. - operationId: lfsProduce - security: - - ApiKeyAuth: [] - - BearerAuth: [] - - {} - parameters: - - in: header - name: X-Kafka-Topic - required: true - schema: - type: string - pattern: '^[a-zA-Z0-9._-]+$' - maxLength: 249 - description: Target Kafka topic name (alphanumeric, dots, underscores, hyphens only) - example: video-uploads - - in: header - name: X-Kafka-Key - required: false - schema: - type: string - description: Base64-encoded Kafka record key for partitioning - example: dXNlci0xMjM= - - in: header - name: X-Kafka-Partition - required: false - schema: - type: integer - format: int32 - minimum: 0 - description: Explicit partition number (overrides key-based partitioning) - example: 0 - - in: header - name: X-LFS-Checksum - required: false - schema: - type: string - description: Expected checksum of the payload for verification - example: abc123def456... - - in: header - name: X-LFS-Checksum-Alg - required: false - schema: - type: string - enum: [sha256, md5, crc32, none] - default: sha256 - description: Checksum algorithm for verification - - in: header - name: X-Request-ID - required: false - schema: - type: string - format: uuid - description: Request correlation ID for tracing - - in: header - name: Content-Type - required: false - schema: - type: string - description: MIME type of the payload (stored in envelope) - example: video/mp4 - requestBody: - required: true - description: Binary payload to upload - content: - application/octet-stream: - schema: - type: string - format: binary - '*/*': - schema: - type: string - format: binary - responses: - "200": - description: LFS envelope successfully produced to Kafka - headers: - X-Request-ID: - schema: - type: string - description: Request correlation ID - content: - application/json: - schema: - $ref: "#/components/schemas/LfsEnvelope" - example: - kfs_lfs: 1 - bucket: kafscale-lfs - key: default/video-uploads/lfs/2026/02/05/abc123 - size: 10485760 - sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 - checksum: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 - checksum_alg: sha256 - content_type: video/mp4 - created_at: "2026-02-05T10:30:00Z" - proxy_id: lfs-proxy-0 - "400": - description: Invalid request (missing topic, invalid checksum, etc.) - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorResponse" - examples: - missing_topic: - value: - code: missing_topic - message: missing topic - request_id: abc-123 - checksum_mismatch: - value: - code: checksum_mismatch - message: "expected abc123, got def456" - request_id: abc-123 - "401": - description: Unauthorized - API key required or invalid - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorResponse" - "502": - description: Upstream storage or Kafka failure - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorResponse" - "503": - description: Proxy not ready (backends unavailable) - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorResponse" - options: - tags: - - LFS - summary: CORS preflight for produce endpoint - description: Handles CORS preflight requests for browser clients - responses: - "204": - description: CORS headers returned - headers: - Access-Control-Allow-Origin: - schema: - type: string - Access-Control-Allow-Methods: - schema: - type: string - Access-Control-Allow-Headers: - schema: - type: string - - /lfs/download: - post: - tags: - - LFS - summary: Download an LFS object - description: | - Retrieves an LFS object from S3 storage. Supports two modes: - - - **presign**: Returns a presigned S3 URL for direct download (default) - - **stream**: Streams the object content through the proxy - - For presign mode, the URL TTL is capped by server configuration. - operationId: lfsDownload - security: - - ApiKeyAuth: [] - - BearerAuth: [] - - {} - parameters: - - in: header - name: X-Request-ID - required: false - schema: - type: string - format: uuid - description: Request correlation ID for tracing - requestBody: - required: true - content: - application/json: - schema: - $ref: "#/components/schemas/DownloadRequest" - examples: - presign: - summary: Get presigned URL - value: - bucket: kafscale-lfs - key: default/video-uploads/lfs/2026/02/05/abc123 - mode: presign - expires_seconds: 300 - stream: - summary: Stream content - value: - bucket: kafscale-lfs - key: default/video-uploads/lfs/2026/02/05/abc123 - mode: stream - responses: - "200": - description: Presigned URL or streamed object content - content: - application/json: - schema: - $ref: "#/components/schemas/DownloadResponse" - example: - mode: presign - url: https://s3.amazonaws.com/kafscale-lfs/... - expires_at: "2026-02-05T10:35:00Z" - application/octet-stream: - schema: - type: string - format: binary - description: Streamed object content (when mode=stream) - "400": - description: Invalid request - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorResponse" - "401": - description: Unauthorized - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorResponse" - "502": - description: Upstream storage failure - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorResponse" - "503": - description: Proxy not ready - content: - application/json: - schema: - $ref: "#/components/schemas/ErrorResponse" - options: - tags: - - LFS - summary: CORS preflight for download endpoint - responses: - "204": - description: CORS headers returned - -components: - securitySchemes: - ApiKeyAuth: - type: apiKey - in: header - name: X-API-Key - description: API key for authentication - BearerAuth: - type: http - scheme: bearer - description: Bearer token authentication (same API key) - - schemas: - LfsEnvelope: - type: object - description: LFS envelope containing blob metadata and S3 location - properties: - kfs_lfs: - type: integer - format: int32 - description: LFS envelope version - example: 1 - bucket: - type: string - description: S3 bucket name - example: kafscale-lfs - key: - type: string - description: S3 object key - example: default/video-uploads/lfs/2026/02/05/abc123 - size: - type: integer - format: int64 - description: Blob size in bytes - example: 10485760 - sha256: - type: string - description: SHA256 hash of the blob - example: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 - checksum: - type: string - description: Checksum value (algorithm depends on checksum_alg) - checksum_alg: - type: string - description: Checksum algorithm used - enum: [sha256, md5, crc32, none] - example: sha256 - content_type: - type: string - description: MIME type of the blob - example: video/mp4 - created_at: - type: string - format: date-time - description: Timestamp when the blob was created - example: "2026-02-05T10:30:00Z" - proxy_id: - type: string - description: ID of the proxy instance that handled the upload - example: lfs-proxy-0 - - DownloadRequest: - type: object - required: [bucket, key] - description: Request to download an LFS object - properties: - bucket: - type: string - description: S3 bucket name (must match proxy's configured bucket) - example: kafscale-lfs - key: - type: string - description: S3 object key from the LFS envelope - example: default/video-uploads/lfs/2026/02/05/abc123 - mode: - type: string - enum: [presign, stream] - default: presign - description: | - Download mode: - - presign: Return a presigned URL for direct S3 download - - stream: Stream content through the proxy - expires_seconds: - type: integer - format: int32 - default: 120 - minimum: 1 - maximum: 3600 - description: Requested presign URL TTL in seconds (capped by server) - - DownloadResponse: - type: object - description: Response for presign download mode - properties: - mode: - type: string - enum: [presign] - description: Download mode used - url: - type: string - format: uri - description: Presigned S3 URL for direct download - expires_at: - type: string - format: date-time - description: URL expiration timestamp - - ErrorResponse: - type: object - description: Error response returned for all error conditions - properties: - code: - type: string - description: Machine-readable error code - example: missing_topic - message: - type: string - description: Human-readable error message - example: missing topic - request_id: - type: string - description: Request correlation ID for support/debugging - example: abc-123-def-456 diff --git a/cmd/lfs-proxy/record.go b/cmd/lfs-proxy/record.go deleted file mode 100644 index 447da53f..00000000 --- a/cmd/lfs-proxy/record.go +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "encoding/binary" - "errors" - "hash/crc32" - - "github.com/KafScale/platform/pkg/protocol" - "github.com/twmb/franz-go/pkg/kmsg" -) - -type byteWriter struct { - buf []byte -} - -func newByteWriter(capacity int) *byteWriter { - return &byteWriter{buf: make([]byte, 0, capacity)} -} - -func (w *byteWriter) write(b []byte) { - w.buf = append(w.buf, b...) -} - -func (w *byteWriter) Int16(v int16) { - var tmp [2]byte - binary.BigEndian.PutUint16(tmp[:], uint16(v)) - w.write(tmp[:]) -} - -func (w *byteWriter) Int32(v int32) { - var tmp [4]byte - binary.BigEndian.PutUint32(tmp[:], uint32(v)) - w.write(tmp[:]) -} - -func (w *byteWriter) Int64(v int64) { - var tmp [8]byte - binary.BigEndian.PutUint64(tmp[:], uint64(v)) - w.write(tmp[:]) -} - -func (w *byteWriter) String(v string) { - w.Int16(int16(len(v))) - if len(v) > 0 { - w.write([]byte(v)) - } -} - -func (w *byteWriter) NullableString(v *string) { - if v == nil { - w.Int16(-1) - return - } - w.String(*v) -} - -func (w *byteWriter) CompactString(v string) { - w.compactLength(len(v)) - if len(v) > 0 { - w.write([]byte(v)) - } -} - -func (w *byteWriter) CompactNullableString(v *string) { - if v == nil { - w.compactLength(-1) - return - } - w.CompactString(*v) -} - -func (w *byteWriter) BytesWithLength(b []byte) { - w.Int32(int32(len(b))) - w.write(b) -} - -func (w *byteWriter) CompactBytes(b []byte) { - if b == nil { - w.compactLength(-1) - return - } - w.compactLength(len(b)) - w.write(b) -} - -func (w *byteWriter) UVarint(v uint64) { - var tmp [binary.MaxVarintLen64]byte - n := binary.PutUvarint(tmp[:], v) - w.write(tmp[:n]) -} - -func (w *byteWriter) CompactArrayLen(length int) { - if length < 0 { - w.UVarint(0) - return - } - w.UVarint(uint64(length) + 1) -} - -func (w *byteWriter) WriteTaggedFields(count int) { - if count == 0 { - w.UVarint(0) - return - } - w.UVarint(uint64(count)) -} - -func (w *byteWriter) compactLength(length int) { - if length < 0 { - w.UVarint(0) - return - } - w.UVarint(uint64(length) + 1) -} - -func (w *byteWriter) Bytes() []byte { - return w.buf -} - -func encodeProduceRequest(header *protocol.RequestHeader, req *kmsg.ProduceRequest) ([]byte, error) { - if header == nil || req == nil { - return nil, errors.New("nil header or request") - } - req.SetVersion(header.APIVersion) - // Build header manually (kmsg doesn't handle request headers) - flexible := req.IsFlexible() - w := newByteWriter(0) - w.Int16(header.APIKey) - w.Int16(header.APIVersion) - w.Int32(header.CorrelationID) - w.NullableString(header.ClientID) - if flexible { - w.WriteTaggedFields(0) - } - // Append kmsg-encoded body - body := req.AppendTo(nil) - w.write(body) - return w.Bytes(), nil -} - -func isFlexibleRequest(apiKey, version int16) bool { - switch apiKey { - case protocol.APIKeyApiVersion: - return version >= 3 - case protocol.APIKeyProduce: - return version >= 9 - case protocol.APIKeyMetadata: - return version >= 9 - case protocol.APIKeyFetch: - return version >= 12 - case protocol.APIKeyFindCoordinator: - return version >= 3 - case protocol.APIKeySyncGroup: - return version >= 4 - case protocol.APIKeyHeartbeat: - return version >= 4 - case protocol.APIKeyListGroups: - return version >= 3 - case protocol.APIKeyDescribeGroups: - return version >= 5 - case protocol.APIKeyOffsetForLeaderEpoch: - return version >= 4 - case protocol.APIKeyDescribeConfigs: - return version >= 4 - case protocol.APIKeyAlterConfigs: - return version >= 2 - case protocol.APIKeyCreatePartitions: - return version >= 2 - case protocol.APIKeyDeleteGroups: - return version >= 2 - default: - return false - } -} - -func encodeRecords(records []kmsg.Record) []byte { - if len(records) == 0 { - return nil - } - out := make([]byte, 0, 256) - for _, record := range records { - out = append(out, encodeRecord(record)...) - } - return out -} - -func encodeRecord(record kmsg.Record) []byte { - body := make([]byte, 0, 128) - body = append(body, byte(record.Attributes)) - body = appendVarlong(body, record.TimestampDelta64) - body = appendVarint(body, record.OffsetDelta) - body = appendVarintBytes(body, record.Key) - body = appendVarintBytes(body, record.Value) - body = appendVarint(body, int32(len(record.Headers))) - for _, header := range record.Headers { - body = appendVarintString(body, header.Key) - body = appendVarintBytes(body, header.Value) - } - - cap64 := int64(len(body)) + int64(binary.MaxVarintLen32) - out := make([]byte, 0, cap64) - out = appendVarint(out, int32(len(body))) - out = append(out, body...) - return out -} - -func appendVarint(dst []byte, v int32) []byte { - var tmp [binary.MaxVarintLen32]byte - n := binary.PutVarint(tmp[:], int64(v)) - return append(dst, tmp[:n]...) -} - -func appendVarlong(dst []byte, v int64) []byte { - var tmp [binary.MaxVarintLen64]byte - n := binary.PutVarint(tmp[:], v) - return append(dst, tmp[:n]...) -} - -func appendVarintBytes(dst []byte, b []byte) []byte { - if b == nil { - dst = appendVarint(dst, -1) - return dst - } - dst = appendVarint(dst, int32(len(b))) - return append(dst, b...) -} - -func appendVarintString(dst []byte, s string) []byte { - dst = appendVarint(dst, int32(len(s))) - return append(dst, s...) -} - -func varint(buf []byte) (int32, int) { - val, n := binary.Varint(buf) - if n <= 0 { - return 0, 0 - } - return int32(val), n -} - -func buildRecordBatch(records []kmsg.Record) []byte { - encoded := encodeRecords(records) - batch := kmsg.RecordBatch{ - FirstOffset: 0, - PartitionLeaderEpoch: -1, - Magic: 2, - Attributes: 0, - LastOffsetDelta: int32(len(records) - 1), - FirstTimestamp: 0, - MaxTimestamp: 0, - ProducerID: -1, - ProducerEpoch: -1, - FirstSequence: 0, - NumRecords: int32(len(records)), - Records: encoded, - } - batchBytes := batch.AppendTo(nil) - batch.Length = int32(len(batchBytes) - 12) - batchBytes = batch.AppendTo(nil) - batch.CRC = int32(crc32.Checksum(batchBytes[21:], crc32cTable)) - return batch.AppendTo(nil) -} diff --git a/cmd/lfs-proxy/s3.go b/cmd/lfs-proxy/s3.go deleted file mode 100644 index a66bc449..00000000 --- a/cmd/lfs-proxy/s3.go +++ /dev/null @@ -1,582 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "bytes" - "context" - "crypto/sha256" - "encoding/hex" - "errors" - "fmt" - "io" - "time" - - "github.com/KafScale/platform/pkg/lfs" - "github.com/aws/aws-sdk-go-v2/aws" - "github.com/aws/aws-sdk-go-v2/aws/signer/v4" - "github.com/aws/aws-sdk-go-v2/config" - "github.com/aws/aws-sdk-go-v2/credentials" - "github.com/aws/aws-sdk-go-v2/service/s3" - "github.com/aws/aws-sdk-go-v2/service/s3/types" - "github.com/aws/smithy-go" -) - -const minMultipartChunkSize int64 = 5 * 1024 * 1024 - -type s3Config struct { - Bucket string - Region string - Endpoint string - PublicEndpoint string - AccessKeyID string - SecretAccessKey string - SessionToken string - ForcePathStyle bool - ChunkSize int64 -} - -type s3API interface { - CreateMultipartUpload(ctx context.Context, params *s3.CreateMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CreateMultipartUploadOutput, error) - UploadPart(ctx context.Context, params *s3.UploadPartInput, optFns ...func(*s3.Options)) (*s3.UploadPartOutput, error) - CompleteMultipartUpload(ctx context.Context, params *s3.CompleteMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.CompleteMultipartUploadOutput, error) - AbortMultipartUpload(ctx context.Context, params *s3.AbortMultipartUploadInput, optFns ...func(*s3.Options)) (*s3.AbortMultipartUploadOutput, error) - PutObject(ctx context.Context, params *s3.PutObjectInput, optFns ...func(*s3.Options)) (*s3.PutObjectOutput, error) - GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) - DeleteObject(ctx context.Context, params *s3.DeleteObjectInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectOutput, error) - HeadBucket(ctx context.Context, params *s3.HeadBucketInput, optFns ...func(*s3.Options)) (*s3.HeadBucketOutput, error) - CreateBucket(ctx context.Context, params *s3.CreateBucketInput, optFns ...func(*s3.Options)) (*s3.CreateBucketOutput, error) -} - -type s3PresignAPI interface { - PresignGetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.PresignOptions)) (*v4.PresignedHTTPRequest, error) -} - -type s3Uploader struct { - bucket string - region string - chunkSize int64 - api s3API - presign s3PresignAPI -} - -func normalizeChunkSize(chunk int64) int64 { - if chunk <= 0 { - chunk = defaultChunkSize - } - if chunk < minMultipartChunkSize { - chunk = minMultipartChunkSize - } - return chunk -} - -func newS3Uploader(ctx context.Context, cfg s3Config) (*s3Uploader, error) { - if cfg.Bucket == "" { - return nil, errors.New("s3 bucket required") - } - if cfg.Region == "" { - return nil, errors.New("s3 region required") - } - cfg.ChunkSize = normalizeChunkSize(cfg.ChunkSize) - - loadOpts := []func(*config.LoadOptions) error{ - config.WithRegion(cfg.Region), - } - if cfg.AccessKeyID != "" && cfg.SecretAccessKey != "" { - loadOpts = append(loadOpts, config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(cfg.AccessKeyID, cfg.SecretAccessKey, cfg.SessionToken))) - } - awsCfg, err := config.LoadDefaultConfig(ctx, loadOpts...) - if err != nil { - return nil, fmt.Errorf("load aws config: %w", err) - } - client := s3.NewFromConfig(awsCfg, func(o *s3.Options) { - if cfg.Endpoint != "" { - o.BaseEndpoint = aws.String(cfg.Endpoint) - } - o.UsePathStyle = cfg.ForcePathStyle - }) - presignEndpoint := cfg.Endpoint - if cfg.PublicEndpoint != "" { - presignEndpoint = cfg.PublicEndpoint - } - presignClient := s3.NewFromConfig(awsCfg, func(o *s3.Options) { - if presignEndpoint != "" { - o.BaseEndpoint = aws.String(presignEndpoint) - } - o.UsePathStyle = cfg.ForcePathStyle - }) - presigner := s3.NewPresignClient(presignClient) - - return &s3Uploader{ - bucket: cfg.Bucket, - region: cfg.Region, - chunkSize: cfg.ChunkSize, - api: client, - presign: presigner, - }, nil -} - -func (u *s3Uploader) PresignGetObject(ctx context.Context, key string, ttl time.Duration) (string, error) { - if key == "" { - return "", errors.New("s3 key required") - } - if u.presign == nil { - return "", errors.New("presign client not configured") - } - out, err := u.presign.PresignGetObject(ctx, &s3.GetObjectInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - }, func(opts *s3.PresignOptions) { - opts.Expires = ttl - }) - if err != nil { - return "", err - } - return out.URL, nil -} - -func (u *s3Uploader) GetObject(ctx context.Context, key string) (*s3.GetObjectOutput, error) { - if key == "" { - return nil, errors.New("s3 key required") - } - return u.api.GetObject(ctx, &s3.GetObjectInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - }) -} - -func (u *s3Uploader) HeadBucket(ctx context.Context) error { - _, err := u.api.HeadBucket(ctx, &s3.HeadBucketInput{Bucket: aws.String(u.bucket)}) - if err == nil { - return nil - } - return err -} - -func (u *s3Uploader) EnsureBucket(ctx context.Context) error { - if err := u.HeadBucket(ctx); err == nil { - return nil - } - input := &s3.CreateBucketInput{Bucket: aws.String(u.bucket)} - if u.region != "" && u.region != "us-east-1" { - input.CreateBucketConfiguration = &types.CreateBucketConfiguration{LocationConstraint: types.BucketLocationConstraint(u.region)} - } - _, err := u.api.CreateBucket(ctx, input) - if err != nil { - var apiErr smithy.APIError - if errors.As(err, &apiErr) { - switch apiErr.ErrorCode() { - case "BucketAlreadyOwnedByYou", "BucketAlreadyExists": - return nil - } - } - return fmt.Errorf("create bucket %s: %w", u.bucket, err) - } - return nil -} - -func (u *s3Uploader) Upload(ctx context.Context, key string, payload []byte, alg lfs.ChecksumAlg) (string, string, string, error) { - if key == "" { - return "", "", "", errors.New("s3 key required") - } - shaHasher := sha256.New() - if _, err := shaHasher.Write(payload); err != nil { - return "", "", "", err - } - shaHex := hex.EncodeToString(shaHasher.Sum(nil)) - - checksumAlg := alg - if checksumAlg == "" { - checksumAlg = lfs.ChecksumSHA256 - } - var checksum string - if checksumAlg != lfs.ChecksumNone { - if checksumAlg == lfs.ChecksumSHA256 { - checksum = shaHex - } else { - computed, err := lfs.ComputeChecksum(checksumAlg, payload) - if err != nil { - return "", "", "", err - } - checksum = computed - } - } - - size := int64(len(payload)) - if size <= u.chunkSize { - _, err := u.api.PutObject(ctx, &s3.PutObjectInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - Body: bytes.NewReader(payload), - ContentLength: aws.Int64(size), - }) - return shaHex, checksum, string(checksumAlg), err - } - return shaHex, checksum, string(checksumAlg), u.multipartUpload(ctx, key, payload) -} - -func (u *s3Uploader) UploadStream(ctx context.Context, key string, reader io.Reader, maxSize int64, alg lfs.ChecksumAlg) (string, string, string, int64, error) { - if key == "" { - return "", "", "", 0, errors.New("s3 key required") - } - if reader == nil { - return "", "", "", 0, errors.New("reader required") - } - u.chunkSize = normalizeChunkSize(u.chunkSize) - - checksumAlg := alg - if checksumAlg == "" { - checksumAlg = lfs.ChecksumSHA256 - } - - // Read first chunk to determine if we need multipart upload - firstBuf := make([]byte, u.chunkSize) - firstN, firstErr := io.ReadFull(reader, firstBuf) - if firstErr != nil && firstErr != io.EOF && firstErr != io.ErrUnexpectedEOF { - return "", "", "", 0, firstErr - } - if firstN == 0 { - return "", "", "", 0, errors.New("empty upload") - } - - firstReadHitEOF := firstErr == io.EOF || firstErr == io.ErrUnexpectedEOF - - // If data fits in one chunk and is smaller than minMultipartChunkSize, use PutObject - if firstReadHitEOF && int64(firstN) < minMultipartChunkSize { - data := firstBuf[:firstN] - shaHasher := sha256.New() - shaHasher.Write(data) - shaHex := hex.EncodeToString(shaHasher.Sum(nil)) - - checksum := "" - if checksumAlg != lfs.ChecksumNone { - if checksumAlg == lfs.ChecksumSHA256 { - checksum = shaHex - } else { - computed, err := lfs.ComputeChecksum(checksumAlg, data) - if err != nil { - return "", "", "", 0, err - } - checksum = computed - } - } - - _, err := u.api.PutObject(ctx, &s3.PutObjectInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - Body: bytes.NewReader(data), - ContentLength: aws.Int64(int64(firstN)), - }) - if err != nil { - return "", "", "", 0, fmt.Errorf("put object: %w", err) - } - return shaHex, checksum, string(checksumAlg), int64(firstN), nil - } - - // Use multipart upload for larger files - createResp, err := u.api.CreateMultipartUpload(ctx, &s3.CreateMultipartUploadInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - }) - if err != nil { - return "", "", "", 0, fmt.Errorf("create multipart upload: %w", err) - } - uploadID := createResp.UploadId - if uploadID == nil { - return "", "", "", 0, errors.New("missing upload id") - } - - shaHasher := sha256.New() - var checksumHasher interface { - Write([]byte) (int, error) - Sum([]byte) []byte - } - if checksumAlg != lfs.ChecksumNone { - if checksumAlg == lfs.ChecksumSHA256 { - checksumHasher = shaHasher - } else { - h, err := lfs.NewChecksumHasher(checksumAlg) - if err != nil { - _ = u.abortUpload(ctx, key, *uploadID) - return "", "", "", 0, err - } - checksumHasher = h - } - } - parts := make([]types.CompletedPart, 0, 4) - partNum := int32(1) - var total int64 - - // Upload first chunk - total += int64(firstN) - if maxSize > 0 && total > maxSize { - _ = u.abortUpload(ctx, key, *uploadID) - return "", "", "", total, fmt.Errorf("blob size %d exceeds max %d", total, maxSize) - } - shaHasher.Write(firstBuf[:firstN]) - if checksumHasher != nil && checksumHasher != shaHasher { - _, _ = checksumHasher.Write(firstBuf[:firstN]) - } - partResp, err := u.api.UploadPart(ctx, &s3.UploadPartInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - UploadId: uploadID, - PartNumber: aws.Int32(partNum), - Body: bytes.NewReader(firstBuf[:firstN]), - }) - if err != nil { - _ = u.abortUpload(ctx, key, *uploadID) - return "", "", "", total, fmt.Errorf("upload part %d: %w", partNum, err) - } - parts = append(parts, types.CompletedPart{ETag: partResp.ETag, PartNumber: aws.Int32(partNum)}) - partNum++ - - // Continue reading remaining chunks - buf := make([]byte, u.chunkSize) - for { - n, readErr := io.ReadFull(reader, buf) - if n > 0 { - total += int64(n) - if maxSize > 0 && total > maxSize { - _ = u.abortUpload(ctx, key, *uploadID) - return "", "", "", total, fmt.Errorf("blob size %d exceeds max %d", total, maxSize) - } - if _, err := shaHasher.Write(buf[:n]); err != nil { - _ = u.abortUpload(ctx, key, *uploadID) - return "", "", "", total, err - } - if checksumHasher != nil && checksumHasher != shaHasher { - if _, err := checksumHasher.Write(buf[:n]); err != nil { - _ = u.abortUpload(ctx, key, *uploadID) - return "", "", "", total, err - } - } - partResp, err := u.api.UploadPart(ctx, &s3.UploadPartInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - UploadId: uploadID, - PartNumber: aws.Int32(partNum), - Body: bytes.NewReader(buf[:n]), - }) - if err != nil { - _ = u.abortUpload(ctx, key, *uploadID) - return "", "", "", total, fmt.Errorf("upload part %d: %w", partNum, err) - } - parts = append(parts, types.CompletedPart{ETag: partResp.ETag, PartNumber: aws.Int32(partNum)}) - partNum++ - } - if readErr == io.EOF { - break - } - if readErr == io.ErrUnexpectedEOF { - break - } - if readErr != nil { - _ = u.abortUpload(ctx, key, *uploadID) - return "", "", "", total, readErr - } - } - - _, err = u.api.CompleteMultipartUpload(ctx, &s3.CompleteMultipartUploadInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - UploadId: uploadID, - MultipartUpload: &types.CompletedMultipartUpload{ - Parts: parts, - }, - }) - if err != nil { - _ = u.abortUpload(ctx, key, *uploadID) - return "", "", "", total, fmt.Errorf("complete multipart upload: %w", err) - } - shaHex := hex.EncodeToString(shaHasher.Sum(nil)) - checksum := "" - if checksumAlg != lfs.ChecksumNone { - if checksumAlg == lfs.ChecksumSHA256 { - checksum = shaHex - } else if checksumHasher != nil { - checksum = hex.EncodeToString(checksumHasher.Sum(nil)) - } - } - return shaHex, checksum, string(checksumAlg), total, nil -} - -func (u *s3Uploader) StartMultipartUpload(ctx context.Context, key, contentType string) (string, error) { - if key == "" { - return "", errors.New("s3 key required") - } - input := &s3.CreateMultipartUploadInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - } - if contentType != "" { - input.ContentType = aws.String(contentType) - } - resp, err := u.api.CreateMultipartUpload(ctx, input) - if err != nil { - return "", fmt.Errorf("create multipart upload: %w", err) - } - if resp.UploadId == nil || *resp.UploadId == "" { - return "", errors.New("missing upload id") - } - return *resp.UploadId, nil -} - -func (u *s3Uploader) UploadPart(ctx context.Context, key, uploadID string, partNumber int32, payload []byte) (string, error) { - if key == "" { - return "", errors.New("s3 key required") - } - if uploadID == "" { - return "", errors.New("upload id required") - } - resp, err := u.api.UploadPart(ctx, &s3.UploadPartInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - UploadId: aws.String(uploadID), - PartNumber: aws.Int32(partNumber), - Body: bytes.NewReader(payload), - }) - if err != nil { - return "", fmt.Errorf("upload part %d: %w", partNumber, err) - } - if resp.ETag == nil || *resp.ETag == "" { - return "", errors.New("missing etag") - } - return *resp.ETag, nil -} - -func (u *s3Uploader) CompleteMultipartUpload(ctx context.Context, key, uploadID string, parts []types.CompletedPart) error { - if key == "" { - return errors.New("s3 key required") - } - if uploadID == "" { - return errors.New("upload id required") - } - _, err := u.api.CompleteMultipartUpload(ctx, &s3.CompleteMultipartUploadInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - UploadId: aws.String(uploadID), - MultipartUpload: &types.CompletedMultipartUpload{ - Parts: parts, - }, - }) - if err != nil { - return fmt.Errorf("complete multipart upload: %w", err) - } - return nil -} - -func (u *s3Uploader) AbortMultipartUpload(ctx context.Context, key, uploadID string) error { - if key == "" { - return errors.New("s3 key required") - } - if uploadID == "" { - return errors.New("upload id required") - } - _, err := u.api.AbortMultipartUpload(ctx, &s3.AbortMultipartUploadInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - UploadId: aws.String(uploadID), - }) - return err -} - -func (u *s3Uploader) multipartUpload(ctx context.Context, key string, payload []byte) error { - createResp, err := u.api.CreateMultipartUpload(ctx, &s3.CreateMultipartUploadInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - }) - if err != nil { - return fmt.Errorf("create multipart upload: %w", err) - } - uploadID := createResp.UploadId - if uploadID == nil { - return errors.New("missing upload id") - } - - numParts := int64(len(payload))/u.chunkSize + 1 - parts := make([]types.CompletedPart, 0, numParts) - reader := bytes.NewReader(payload) - partNum := int32(1) - buf := make([]byte, u.chunkSize) - for { - n, readErr := io.ReadFull(reader, buf) - if readErr == io.EOF || readErr == io.ErrUnexpectedEOF { - if n == 0 { - break - } - } - if n > 0 { - partResp, err := u.api.UploadPart(ctx, &s3.UploadPartInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - UploadId: uploadID, - PartNumber: aws.Int32(partNum), - Body: bytes.NewReader(buf[:n]), - }) - if err != nil { - _ = u.abortUpload(ctx, key, *uploadID) - return fmt.Errorf("upload part %d: %w", partNum, err) - } - parts = append(parts, types.CompletedPart{ETag: partResp.ETag, PartNumber: aws.Int32(partNum)}) - partNum++ - } - if readErr == io.EOF { - break - } - if readErr != nil && readErr != io.ErrUnexpectedEOF { - _ = u.abortUpload(ctx, key, *uploadID) - return fmt.Errorf("read payload: %w", readErr) - } - if readErr == io.ErrUnexpectedEOF { - break - } - } - - _, err = u.api.CompleteMultipartUpload(ctx, &s3.CompleteMultipartUploadInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - UploadId: uploadID, - MultipartUpload: &types.CompletedMultipartUpload{ - Parts: parts, - }, - }) - if err != nil { - _ = u.abortUpload(ctx, key, *uploadID) - return fmt.Errorf("complete multipart upload: %w", err) - } - return nil -} - -func (u *s3Uploader) abortUpload(ctx context.Context, key, uploadID string) error { - _, err := u.api.AbortMultipartUpload(ctx, &s3.AbortMultipartUploadInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - UploadId: aws.String(uploadID), - }) - return err -} - -func (u *s3Uploader) DeleteObject(ctx context.Context, key string) error { - if key == "" { - return errors.New("s3 key required") - } - _, err := u.api.DeleteObject(ctx, &s3.DeleteObjectInput{ - Bucket: aws.String(u.bucket), - Key: aws.String(key), - }) - return err -} diff --git a/cmd/lfs-proxy/sasl_encode.go b/cmd/lfs-proxy/sasl_encode.go deleted file mode 100644 index 8729099d..00000000 --- a/cmd/lfs-proxy/sasl_encode.go +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "encoding/binary" - "errors" - "fmt" - "io" - - "github.com/KafScale/platform/pkg/protocol" -) - -func encodeSaslHandshakeRequest(header *protocol.RequestHeader, mechanism string) ([]byte, error) { - if header == nil { - return nil, errors.New("nil header") - } - w := newByteWriter(0) - w.Int16(header.APIKey) - w.Int16(header.APIVersion) - w.Int32(header.CorrelationID) - w.NullableString(header.ClientID) - w.String(mechanism) - return w.Bytes(), nil -} - -func encodeSaslAuthenticateRequest(header *protocol.RequestHeader, authBytes []byte) ([]byte, error) { - if header == nil { - return nil, errors.New("nil header") - } - w := newByteWriter(0) - w.Int16(header.APIKey) - w.Int16(header.APIVersion) - w.Int32(header.CorrelationID) - w.NullableString(header.ClientID) - w.BytesWithLength(authBytes) - return w.Bytes(), nil -} - -func buildSaslPlainAuthBytes(username, password string) []byte { - // PLAIN: 0x00 + username + 0x00 + password - buf := make([]byte, 0, len(username)+len(password)+2) - buf = append(buf, 0) - buf = append(buf, []byte(username)...) - buf = append(buf, 0) - buf = append(buf, []byte(password)...) - return buf -} - -func readSaslResponse(r io.Reader) error { - frame, err := protocol.ReadFrame(r) - if err != nil { - return err - } - if len(frame.Payload) < 6 { - return fmt.Errorf("invalid SASL response length %d", len(frame.Payload)) - } - // First 4 bytes are correlation ID - errorCode := int16(binary.BigEndian.Uint16(frame.Payload[4:6])) - if errorCode != 0 { - return fmt.Errorf("sasl error code %d", errorCode) - } - return nil -} diff --git a/cmd/lfs-proxy/sasl_encode_test.go b/cmd/lfs-proxy/sasl_encode_test.go deleted file mode 100644 index a32baa3d..00000000 --- a/cmd/lfs-proxy/sasl_encode_test.go +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "bytes" - "encoding/binary" - "testing" - - "github.com/KafScale/platform/pkg/protocol" -) - -func TestBuildSaslPlainAuthBytes(t *testing.T) { - got := buildSaslPlainAuthBytes("user", "pass") - want := []byte{0, 'u', 's', 'e', 'r', 0, 'p', 'a', 's', 's'} - if !bytes.Equal(got, want) { - t.Fatalf("auth bytes mismatch: got %v want %v", got, want) - } -} - -func TestReadSaslResponseError(t *testing.T) { - buf := &bytes.Buffer{} - payload := make([]byte, 6) - binary.BigEndian.PutUint32(payload[:4], 1) - binary.BigEndian.PutUint16(payload[4:6], 1) // error code 1 - if err := protocol.WriteFrame(buf, payload); err != nil { - t.Fatalf("write frame: %v", err) - } - if err := readSaslResponse(buf); err == nil { - t.Fatal("expected error") - } -} diff --git a/cmd/lfs-proxy/swagger.go b/cmd/lfs-proxy/swagger.go deleted file mode 100644 index 32fb14c3..00000000 --- a/cmd/lfs-proxy/swagger.go +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - _ "embed" - "net/http" -) - -//go:embed openapi.yaml -var openAPISpec []byte - -const swaggerUIHTML = ` - - - - KafScale LFS Proxy - API Documentation - - - - -
- - - - - -` - -// handleSwaggerUI serves the Swagger UI HTML page. -func (p *lfsProxy) handleSwaggerUI(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "text/html; charset=utf-8") - w.WriteHeader(http.StatusOK) - _, _ = w.Write([]byte(swaggerUIHTML)) -} - -// handleOpenAPISpec serves the OpenAPI specification YAML file. -func (p *lfsProxy) handleOpenAPISpec(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/yaml") - w.Header().Set("Access-Control-Allow-Origin", "*") - w.WriteHeader(http.StatusOK) - _, _ = w.Write(openAPISpec) -} diff --git a/cmd/lfs-proxy/tracker.go b/cmd/lfs-proxy/tracker.go deleted file mode 100644 index dd7c4e4d..00000000 --- a/cmd/lfs-proxy/tracker.go +++ /dev/null @@ -1,372 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "context" - "errors" - "log/slog" - "math" - "sync" - "sync/atomic" - "time" - - "github.com/twmb/franz-go/pkg/kadm" - "github.com/twmb/franz-go/pkg/kerr" - "github.com/twmb/franz-go/pkg/kgo" -) - -const ( - defaultTrackerTopic = "__lfs_ops_state" - defaultTrackerBatchSize = 100 - defaultTrackerFlushMs = 100 - defaultTrackerChanSize = 10000 - defaultTrackerPartitions = 3 - defaultTrackerReplication = 1 -) - -// TrackerConfig holds configuration for the LFS operations tracker. -type TrackerConfig struct { - Enabled bool - Topic string - Brokers []string - BatchSize int - FlushMs int - ProxyID string - EnsureTopic bool - Partitions int - ReplicationFactor int -} - -// LfsOpsTracker tracks LFS operations by emitting events to a Kafka topic. -type LfsOpsTracker struct { - config TrackerConfig - client *kgo.Client - logger *slog.Logger - eventCh chan TrackerEvent - wg sync.WaitGroup - ctx context.Context - cancel context.CancelFunc - - // Circuit breaker state - circuitOpen uint32 - failures uint32 - lastSuccess int64 - failureThreshold uint32 - resetTimeout time.Duration - - // Metrics - eventsEmitted uint64 - eventsDropped uint64 - batchesSent uint64 -} - -// NewLfsOpsTracker creates a new tracker instance. -func NewLfsOpsTracker(ctx context.Context, cfg TrackerConfig, logger *slog.Logger) (*LfsOpsTracker, error) { - if !cfg.Enabled { - logger.Info("lfs ops tracker disabled") - return &LfsOpsTracker{config: cfg, logger: logger}, nil - } - - if cfg.Topic == "" { - cfg.Topic = defaultTrackerTopic - } - if cfg.BatchSize <= 0 { - cfg.BatchSize = defaultTrackerBatchSize - } - if cfg.FlushMs <= 0 { - cfg.FlushMs = defaultTrackerFlushMs - } - if cfg.Partitions <= 0 { - cfg.Partitions = defaultTrackerPartitions - } - if cfg.ReplicationFactor <= 0 { - cfg.ReplicationFactor = defaultTrackerReplication - } - if len(cfg.Brokers) == 0 { - logger.Warn("lfs ops tracker: no brokers configured, tracker disabled") - return &LfsOpsTracker{config: cfg, logger: logger}, nil - } - - opts := []kgo.Opt{ - kgo.SeedBrokers(cfg.Brokers...), - kgo.DefaultProduceTopic(cfg.Topic), - kgo.ProducerBatchMaxBytes(1024 * 1024), // 1MB max batch - kgo.ProducerLinger(time.Duration(cfg.FlushMs) * time.Millisecond), - kgo.RequiredAcks(kgo.LeaderAck()), - kgo.DisableIdempotentWrite(), // Not required for tracking events - } - - client, err := kgo.NewClient(opts...) - if err != nil { - return nil, err - } - - if cfg.EnsureTopic { - if err := ensureTrackerTopic(ctx, client, cfg, logger); err != nil { - logger.Warn("lfs ops tracker: ensure topic failed", "topic", cfg.Topic, "error", err) - } - } - - trackerCtx, cancel := context.WithCancel(ctx) - t := &LfsOpsTracker{ - config: cfg, - client: client, - logger: logger, - eventCh: make(chan TrackerEvent, defaultTrackerChanSize), - ctx: trackerCtx, - cancel: cancel, - failureThreshold: 5, - resetTimeout: 30 * time.Second, - } - - t.wg.Add(1) - go t.runBatcher() - - logger.Info("lfs ops tracker started", "topic", cfg.Topic, "brokers", cfg.Brokers) - return t, nil -} - -// Emit sends a tracker event to the channel for async processing. -func (t *LfsOpsTracker) Emit(event TrackerEvent) { - if t == nil || !t.config.Enabled || t.client == nil { - return - } - - // Check circuit breaker - if atomic.LoadUint32(&t.circuitOpen) == 1 { - // Check if we should try to reset - if time.Now().UnixNano()-atomic.LoadInt64(&t.lastSuccess) > t.resetTimeout.Nanoseconds() { - atomic.StoreUint32(&t.circuitOpen, 0) - atomic.StoreUint32(&t.failures, 0) - t.logger.Info("lfs ops tracker: circuit breaker reset") - } else { - atomic.AddUint64(&t.eventsDropped, 1) - return - } - } - - select { - case t.eventCh <- event: - atomic.AddUint64(&t.eventsEmitted, 1) - default: - // Channel full, drop the event - atomic.AddUint64(&t.eventsDropped, 1) - t.logger.Debug("lfs ops tracker: event dropped, channel full") - } -} - -// runBatcher processes events from the channel and sends them in batches. -func (t *LfsOpsTracker) runBatcher() { - defer t.wg.Done() - - batch := make([]*kgo.Record, 0, t.config.BatchSize) - flushInterval := time.Duration(t.config.FlushMs) * time.Millisecond - ticker := time.NewTicker(flushInterval) - defer ticker.Stop() - - flush := func() { - if len(batch) == 0 { - return - } - - // Produce batch - results := t.client.ProduceSync(t.ctx, batch...) - hasError := false - for _, result := range results { - if result.Err != nil { - hasError = true - t.logger.Warn("lfs ops tracker: produce failed", "error", result.Err) - } - } - - if hasError { - failures := atomic.AddUint32(&t.failures, 1) - if failures >= t.failureThreshold { - atomic.StoreUint32(&t.circuitOpen, 1) - t.logger.Warn("lfs ops tracker: circuit breaker opened", "failures", failures) - } - } else { - atomic.StoreUint32(&t.failures, 0) - atomic.StoreInt64(&t.lastSuccess, time.Now().UnixNano()) - atomic.AddUint64(&t.batchesSent, 1) - } - - batch = batch[:0] - } - - for { - select { - case <-t.ctx.Done(): - flush() - return - - case event := <-t.eventCh: - record, err := t.eventToRecord(event) - if err != nil { - t.logger.Warn("lfs ops tracker: failed to serialize event", "error", err, "type", event.GetEventType()) - continue - } - batch = append(batch, record) - if len(batch) >= t.config.BatchSize { - flush() - } - - case <-ticker.C: - flush() - } - } -} - -// eventToRecord converts a TrackerEvent to a Kafka record. -func (t *LfsOpsTracker) eventToRecord(event TrackerEvent) (*kgo.Record, error) { - value, err := event.Marshal() - if err != nil { - return nil, err - } - - return &kgo.Record{ - Key: []byte(event.GetTopic()), - Value: value, - }, nil -} - -func ensureTrackerTopic(ctx context.Context, client *kgo.Client, cfg TrackerConfig, logger *slog.Logger) error { - admin := kadm.NewClient(client) - var partitions int32 = defaultTrackerPartitions - if cfg.Partitions > 0 && cfg.Partitions <= math.MaxInt32 { - partitions = int32(cfg.Partitions) //nolint:gosec // bounds checked - } - var replication int16 = defaultTrackerReplication - if cfg.ReplicationFactor > 0 && cfg.ReplicationFactor <= math.MaxInt16 { - replication = int16(cfg.ReplicationFactor) //nolint:gosec // bounds checked - } - responses, err := admin.CreateTopics(ctx, partitions, replication, nil, cfg.Topic) - if err != nil { - return err - } - resp, ok := responses[cfg.Topic] - if !ok { - return errors.New("tracker topic response missing") - } - if resp.Err == nil || errors.Is(resp.Err, kerr.TopicAlreadyExists) { - logger.Info("lfs ops tracker topic ready", "topic", cfg.Topic, "partitions", cfg.Partitions, "replication", cfg.ReplicationFactor) - return nil - } - return resp.Err -} - -// Close gracefully shuts down the tracker. -func (t *LfsOpsTracker) Close() error { - if t == nil || t.client == nil { - return nil - } - - t.cancel() - t.wg.Wait() - t.client.Close() - - t.logger.Info("lfs ops tracker closed", - "events_emitted", atomic.LoadUint64(&t.eventsEmitted), - "events_dropped", atomic.LoadUint64(&t.eventsDropped), - "batches_sent", atomic.LoadUint64(&t.batchesSent), - ) - return nil -} - -// Stats returns tracker statistics. -func (t *LfsOpsTracker) Stats() TrackerStats { - if t == nil { - return TrackerStats{} - } - return TrackerStats{ - Enabled: t.config.Enabled, - Topic: t.config.Topic, - EventsEmitted: atomic.LoadUint64(&t.eventsEmitted), - EventsDropped: atomic.LoadUint64(&t.eventsDropped), - BatchesSent: atomic.LoadUint64(&t.batchesSent), - CircuitOpen: atomic.LoadUint32(&t.circuitOpen) == 1, - } -} - -// TrackerStats holds statistics about the tracker. -type TrackerStats struct { - Enabled bool `json:"enabled"` - Topic string `json:"topic"` - EventsEmitted uint64 `json:"events_emitted"` - EventsDropped uint64 `json:"events_dropped"` - BatchesSent uint64 `json:"batches_sent"` - CircuitOpen bool `json:"circuit_open"` -} - -// IsEnabled returns true if the tracker is enabled and ready. -func (t *LfsOpsTracker) IsEnabled() bool { - return t != nil && t.config.Enabled && t.client != nil -} - -// EmitUploadStarted emits an upload started event. -func (t *LfsOpsTracker) EmitUploadStarted(requestID, topic string, partition int32, s3Key, contentType, clientIP, apiType string, expectedSize int64) { - if !t.IsEnabled() { - return - } - event := NewUploadStartedEvent(t.config.ProxyID, requestID, topic, partition, s3Key, contentType, clientIP, apiType, expectedSize) - t.Emit(event) -} - -// EmitUploadCompleted emits an upload completed event. -func (t *LfsOpsTracker) EmitUploadCompleted(requestID, topic string, partition int32, kafkaOffset int64, s3Bucket, s3Key string, size int64, sha256, checksum, checksumAlg, contentType string, duration time.Duration) { - if !t.IsEnabled() { - return - } - event := NewUploadCompletedEvent(t.config.ProxyID, requestID, topic, partition, kafkaOffset, s3Bucket, s3Key, size, sha256, checksum, checksumAlg, contentType, duration.Milliseconds()) - t.Emit(event) -} - -// EmitUploadFailed emits an upload failed event. -func (t *LfsOpsTracker) EmitUploadFailed(requestID, topic, s3Key, errorCode, errorMessage, stage string, sizeUploaded int64, duration time.Duration) { - if !t.IsEnabled() { - return - } - event := NewUploadFailedEvent(t.config.ProxyID, requestID, topic, s3Key, errorCode, errorMessage, stage, sizeUploaded, duration.Milliseconds()) - t.Emit(event) -} - -// EmitDownloadRequested emits a download requested event. -func (t *LfsOpsTracker) EmitDownloadRequested(requestID, s3Bucket, s3Key, mode, clientIP string, ttlSeconds int) { - if !t.IsEnabled() { - return - } - event := NewDownloadRequestedEvent(t.config.ProxyID, requestID, s3Bucket, s3Key, mode, clientIP, ttlSeconds) - t.Emit(event) -} - -// EmitDownloadCompleted emits a download completed event. -func (t *LfsOpsTracker) EmitDownloadCompleted(requestID, s3Key, mode string, duration time.Duration, size int64) { - if !t.IsEnabled() { - return - } - event := NewDownloadCompletedEvent(t.config.ProxyID, requestID, s3Key, mode, duration.Milliseconds(), size) - t.Emit(event) -} - -// EmitOrphanDetected emits an orphan detected event. -func (t *LfsOpsTracker) EmitOrphanDetected(requestID, detectionSource, topic, s3Bucket, s3Key, originalRequestID, reason string, size int64) { - if !t.IsEnabled() { - return - } - event := NewOrphanDetectedEvent(t.config.ProxyID, requestID, detectionSource, topic, s3Bucket, s3Key, originalRequestID, reason, size) - t.Emit(event) -} diff --git a/cmd/lfs-proxy/tracker_test.go b/cmd/lfs-proxy/tracker_test.go deleted file mode 100644 index 91441fab..00000000 --- a/cmd/lfs-proxy/tracker_test.go +++ /dev/null @@ -1,383 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "context" - "encoding/json" - "log/slog" - "os" - "testing" - "time" -) - -func TestTrackerEventTypes(t *testing.T) { - proxyID := "test-proxy" - requestID := "req-123" - - t.Run("UploadStartedEvent", func(t *testing.T) { - event := NewUploadStartedEvent(proxyID, requestID, "test-topic", 0, "s3/key", "application/json", "127.0.0.1", "http", 1024) - - if event.EventType != EventTypeUploadStarted { - t.Errorf("expected event type %s, got %s", EventTypeUploadStarted, event.EventType) - } - if event.Topic != "test-topic" { - t.Errorf("expected topic test-topic, got %s", event.Topic) - } - if event.ProxyID != proxyID { - t.Errorf("expected proxy ID %s, got %s", proxyID, event.ProxyID) - } - if event.RequestID != requestID { - t.Errorf("expected request ID %s, got %s", requestID, event.RequestID) - } - if event.Version != TrackerEventVersion { - t.Errorf("expected version %d, got %d", TrackerEventVersion, event.Version) - } - - // Test marshaling - data, err := event.Marshal() - if err != nil { - t.Fatalf("failed to marshal event: %v", err) - } - var decoded UploadStartedEvent - if err := json.Unmarshal(data, &decoded); err != nil { - t.Fatalf("failed to unmarshal event: %v", err) - } - if decoded.Topic != event.Topic { - t.Errorf("decoded topic mismatch: %s vs %s", decoded.Topic, event.Topic) - } - }) - - t.Run("UploadCompletedEvent", func(t *testing.T) { - event := NewUploadCompletedEvent(proxyID, requestID, "test-topic", 0, 42, "bucket", "s3/key", 1024, "sha256hex", "checksum", "sha256", "application/json", 500) - - if event.EventType != EventTypeUploadCompleted { - t.Errorf("expected event type %s, got %s", EventTypeUploadCompleted, event.EventType) - } - if event.KafkaOffset != 42 { - t.Errorf("expected kafka offset 42, got %d", event.KafkaOffset) - } - if event.Size != 1024 { - t.Errorf("expected size 1024, got %d", event.Size) - } - if event.DurationMs != 500 { - t.Errorf("expected duration 500ms, got %d", event.DurationMs) - } - - data, err := event.Marshal() - if err != nil { - t.Fatalf("failed to marshal event: %v", err) - } - var decoded UploadCompletedEvent - if err := json.Unmarshal(data, &decoded); err != nil { - t.Fatalf("failed to unmarshal event: %v", err) - } - }) - - t.Run("UploadFailedEvent", func(t *testing.T) { - event := NewUploadFailedEvent(proxyID, requestID, "test-topic", "s3/key", "s3_error", "connection refused", "s3_upload", 512, 250) - - if event.EventType != EventTypeUploadFailed { - t.Errorf("expected event type %s, got %s", EventTypeUploadFailed, event.EventType) - } - if event.ErrorCode != "s3_error" { - t.Errorf("expected error code s3_error, got %s", event.ErrorCode) - } - if event.Stage != "s3_upload" { - t.Errorf("expected stage s3_upload, got %s", event.Stage) - } - - data, err := event.Marshal() - if err != nil { - t.Fatalf("failed to marshal event: %v", err) - } - var decoded UploadFailedEvent - if err := json.Unmarshal(data, &decoded); err != nil { - t.Fatalf("failed to unmarshal event: %v", err) - } - }) - - t.Run("DownloadRequestedEvent", func(t *testing.T) { - event := NewDownloadRequestedEvent(proxyID, requestID, "bucket", "s3/key", "presign", "192.168.1.1", 120) - - if event.EventType != EventTypeDownloadRequested { - t.Errorf("expected event type %s, got %s", EventTypeDownloadRequested, event.EventType) - } - if event.Mode != "presign" { - t.Errorf("expected mode presign, got %s", event.Mode) - } - if event.TTLSeconds != 120 { - t.Errorf("expected TTL 120, got %d", event.TTLSeconds) - } - - data, err := event.Marshal() - if err != nil { - t.Fatalf("failed to marshal event: %v", err) - } - var decoded DownloadRequestedEvent - if err := json.Unmarshal(data, &decoded); err != nil { - t.Fatalf("failed to unmarshal event: %v", err) - } - }) - - t.Run("DownloadCompletedEvent", func(t *testing.T) { - event := NewDownloadCompletedEvent(proxyID, requestID, "s3/key", "stream", 150, 2048) - - if event.EventType != EventTypeDownloadCompleted { - t.Errorf("expected event type %s, got %s", EventTypeDownloadCompleted, event.EventType) - } - if event.DurationMs != 150 { - t.Errorf("expected duration 150ms, got %d", event.DurationMs) - } - if event.Size != 2048 { - t.Errorf("expected size 2048, got %d", event.Size) - } - - data, err := event.Marshal() - if err != nil { - t.Fatalf("failed to marshal event: %v", err) - } - var decoded DownloadCompletedEvent - if err := json.Unmarshal(data, &decoded); err != nil { - t.Fatalf("failed to unmarshal event: %v", err) - } - }) - - t.Run("OrphanDetectedEvent", func(t *testing.T) { - event := NewOrphanDetectedEvent(proxyID, requestID, "upload_failure", "test-topic", "bucket", "s3/key", "orig-req-456", "kafka_produce_failed", 4096) - - if event.EventType != EventTypeOrphanDetected { - t.Errorf("expected event type %s, got %s", EventTypeOrphanDetected, event.EventType) - } - if event.DetectionSource != "upload_failure" { - t.Errorf("expected detection source upload_failure, got %s", event.DetectionSource) - } - if event.Reason != "kafka_produce_failed" { - t.Errorf("expected reason kafka_produce_failed, got %s", event.Reason) - } - if event.OriginalRequestID != "orig-req-456" { - t.Errorf("expected original request ID orig-req-456, got %s", event.OriginalRequestID) - } - - data, err := event.Marshal() - if err != nil { - t.Fatalf("failed to marshal event: %v", err) - } - var decoded OrphanDetectedEvent - if err := json.Unmarshal(data, &decoded); err != nil { - t.Fatalf("failed to unmarshal event: %v", err) - } - }) -} - -func TestTrackerDisabled(t *testing.T) { - logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) - ctx := context.Background() - - cfg := TrackerConfig{ - Enabled: false, - ProxyID: "test-proxy", - } - - tracker, err := NewLfsOpsTracker(ctx, cfg, logger) - if err != nil { - t.Fatalf("failed to create disabled tracker: %v", err) - } - - if tracker.IsEnabled() { - t.Error("expected tracker to be disabled") - } - - // Should not panic when emitting to disabled tracker - tracker.EmitUploadStarted("req-1", "topic", 0, "key", "ct", "ip", "http", 100) - tracker.EmitUploadCompleted("req-1", "topic", 0, 0, "bucket", "key", 100, "sha", "cs", "alg", "ct", time.Second) - tracker.EmitUploadFailed("req-1", "topic", "key", "code", "msg", "stage", 0, time.Second) - tracker.EmitDownloadRequested("req-1", "bucket", "key", "presign", "ip", 60) - tracker.EmitDownloadCompleted("req-1", "key", "presign", time.Second, 100) - tracker.EmitOrphanDetected("req-1", "source", "topic", "bucket", "key", "orig", "reason", 100) - - stats := tracker.Stats() - if stats.Enabled { - t.Error("expected stats.Enabled to be false") - } -} - -func TestTrackerNoBrokers(t *testing.T) { - logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) - ctx := context.Background() - - cfg := TrackerConfig{ - Enabled: true, - Topic: "__lfs_ops_state", - Brokers: nil, // No brokers - ProxyID: "test-proxy", - } - - tracker, err := NewLfsOpsTracker(ctx, cfg, logger) - if err != nil { - t.Fatalf("failed to create tracker without brokers: %v", err) - } - - if tracker.IsEnabled() { - t.Error("expected tracker to be disabled when no brokers configured") - } -} - -func TestTrackerConfigDefaults(t *testing.T) { - logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) - ctx := context.Background() - - cfg := TrackerConfig{ - Enabled: true, - Topic: "", // Should default to __lfs_ops_state - Brokers: []string{"localhost:9092"}, - BatchSize: 0, // Should default - FlushMs: 0, // Should default - ProxyID: "test-proxy", - } - - // This will fail to connect but should not error on config defaults - tracker, err := NewLfsOpsTracker(ctx, cfg, logger) - if err != nil { - // May fail to connect, but defaults should be set - t.Logf("tracker creation returned error (expected if Kafka not running): %v", err) - } - if tracker != nil { - defer func() { _ = tracker.Close() }() - } -} - -func TestEventToRecordUsesTopicKey(t *testing.T) { - tracker := &LfsOpsTracker{} - event := NewUploadCompletedEvent( - "proxy-1", - "req-1", - "topic-a", - 0, - 10, - "bucket", - "key", - 123, - "sha", - "chk", - "sha256", - "application/octet-stream", - 10, - ) - - record, err := tracker.eventToRecord(event) - if err != nil { - t.Fatalf("eventToRecord error: %v", err) - } - if string(record.Key) != "topic-a" { - t.Fatalf("expected record key topic-a, got %q", string(record.Key)) - } - if record.Partition != 0 { - t.Fatalf("expected partition 0 (unset), got %d", record.Partition) - } -} - -func TestTrackerStats(t *testing.T) { - logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) - - tracker := &LfsOpsTracker{ - config: TrackerConfig{ - Enabled: true, - Topic: "__lfs_ops_state", - }, - logger: logger, - } - - stats := tracker.Stats() - if !stats.Enabled { - t.Error("expected stats.Enabled to be true") - } - if stats.Topic != "__lfs_ops_state" { - t.Errorf("expected topic __lfs_ops_state, got %s", stats.Topic) - } -} - -func TestNilTrackerSafe(t *testing.T) { - var tracker *LfsOpsTracker - - // All these should not panic on nil tracker - tracker.Emit(nil) - tracker.EmitUploadStarted("", "", 0, "", "", "", "", 0) - tracker.EmitUploadCompleted("", "", 0, 0, "", "", 0, "", "", "", "", 0) - tracker.EmitUploadFailed("", "", "", "", "", "", 0, 0) - tracker.EmitDownloadRequested("", "", "", "", "", 0) - tracker.EmitDownloadCompleted("", "", "", 0, 0) - tracker.EmitOrphanDetected("", "", "", "", "", "", "", 0) - - if tracker.IsEnabled() { - t.Error("nil tracker should not be enabled") - } - - stats := tracker.Stats() - if stats.Enabled { - t.Error("nil tracker stats should show disabled") - } - - // Close should not panic - err := tracker.Close() - if err != nil { - t.Errorf("nil tracker close should not error: %v", err) - } -} - -func TestGetTopic(t *testing.T) { - tests := []struct { - event TrackerEvent - expected string - }{ - {&UploadStartedEvent{Topic: "topic-a"}, "topic-a"}, - {&UploadCompletedEvent{Topic: "topic-b"}, "topic-b"}, - {&UploadFailedEvent{Topic: "topic-c"}, "topic-c"}, - {&DownloadRequestedEvent{}, ""}, - {&DownloadCompletedEvent{}, ""}, - {&OrphanDetectedEvent{Topic: "topic-d"}, "topic-d"}, - } - - for _, tt := range tests { - result := tt.event.GetTopic() - if result != tt.expected { - t.Errorf("GetTopic() = %q, expected %q", result, tt.expected) - } - } -} - -func TestBaseEventFields(t *testing.T) { - base := newBaseEvent("test_event", "proxy-1", "req-abc") - - if base.EventType != "test_event" { - t.Errorf("expected event type test_event, got %s", base.EventType) - } - if base.ProxyID != "proxy-1" { - t.Errorf("expected proxy ID proxy-1, got %s", base.ProxyID) - } - if base.RequestID != "req-abc" { - t.Errorf("expected request ID req-abc, got %s", base.RequestID) - } - if base.Version != TrackerEventVersion { - t.Errorf("expected version %d, got %d", TrackerEventVersion, base.Version) - } - if base.EventID == "" { - t.Error("expected non-empty event ID") - } - if base.Timestamp == "" { - t.Error("expected non-empty timestamp") - } -} diff --git a/cmd/lfs-proxy/tracker_types.go b/cmd/lfs-proxy/tracker_types.go deleted file mode 100644 index 455a5835..00000000 --- a/cmd/lfs-proxy/tracker_types.go +++ /dev/null @@ -1,238 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "encoding/json" - "time" -) - -// Event types for LFS operations tracking. -const ( - EventTypeUploadStarted = "upload_started" - EventTypeUploadCompleted = "upload_completed" - EventTypeUploadFailed = "upload_failed" - EventTypeDownloadRequested = "download_requested" - EventTypeDownloadCompleted = "download_completed" - EventTypeOrphanDetected = "orphan_detected" -) - -// TrackerEventVersion is the current schema version for tracker events. -const TrackerEventVersion = 1 - -// BaseEvent contains common fields for all tracker events. -type BaseEvent struct { - EventType string `json:"event_type"` - EventID string `json:"event_id"` - Timestamp string `json:"timestamp"` - ProxyID string `json:"proxy_id"` - RequestID string `json:"request_id"` - Version int `json:"version"` -} - -// UploadStartedEvent is emitted when an upload operation begins. -type UploadStartedEvent struct { - BaseEvent - Topic string `json:"topic"` - Partition int32 `json:"partition"` - S3Key string `json:"s3_key"` - ContentType string `json:"content_type,omitempty"` - ExpectedSize int64 `json:"expected_size,omitempty"` - ClientIP string `json:"client_ip,omitempty"` - APIType string `json:"api_type"` // "http" or "kafka" -} - -// UploadCompletedEvent is emitted when an upload operation succeeds. -type UploadCompletedEvent struct { - BaseEvent - Topic string `json:"topic"` - Partition int32 `json:"partition"` - KafkaOffset int64 `json:"kafka_offset,omitempty"` - S3Bucket string `json:"s3_bucket"` - S3Key string `json:"s3_key"` - Size int64 `json:"size"` - SHA256 string `json:"sha256"` - Checksum string `json:"checksum,omitempty"` - ChecksumAlg string `json:"checksum_alg,omitempty"` - DurationMs int64 `json:"duration_ms"` - ContentType string `json:"content_type,omitempty"` -} - -// UploadFailedEvent is emitted when an upload operation fails. -type UploadFailedEvent struct { - BaseEvent - Topic string `json:"topic"` - S3Key string `json:"s3_key,omitempty"` - ErrorCode string `json:"error_code"` - ErrorMessage string `json:"error_message"` - Stage string `json:"stage"` // "validation", "s3_upload", "kafka_produce" - SizeUploaded int64 `json:"size_uploaded,omitempty"` - DurationMs int64 `json:"duration_ms"` -} - -// DownloadRequestedEvent is emitted when a download operation is requested. -type DownloadRequestedEvent struct { - BaseEvent - S3Bucket string `json:"s3_bucket"` - S3Key string `json:"s3_key"` - Mode string `json:"mode"` // "presign" or "stream" - ClientIP string `json:"client_ip,omitempty"` - TTLSeconds int `json:"ttl_seconds,omitempty"` -} - -// DownloadCompletedEvent is emitted when a download operation completes. -type DownloadCompletedEvent struct { - BaseEvent - S3Key string `json:"s3_key"` - Mode string `json:"mode"` - DurationMs int64 `json:"duration_ms"` - Size int64 `json:"size,omitempty"` -} - -// OrphanDetectedEvent is emitted when an orphaned S3 object is detected. -type OrphanDetectedEvent struct { - BaseEvent - DetectionSource string `json:"detection_source"` // "upload_failure", "reconciliation" - Topic string `json:"topic"` - S3Bucket string `json:"s3_bucket"` - S3Key string `json:"s3_key"` - Size int64 `json:"size,omitempty"` - OriginalRequestID string `json:"original_request_id,omitempty"` - Reason string `json:"reason"` // "kafka_produce_failed", "checksum_mismatch", etc. -} - -// TrackerEvent is a union type that can hold any tracker event. -type TrackerEvent interface { - GetEventType() string - GetTopic() string - Marshal() ([]byte, error) -} - -// GetEventType returns the event type. -func (e *BaseEvent) GetEventType() string { - return e.EventType -} - -// GetTopic returns the topic for partitioning. -func (e *UploadStartedEvent) GetTopic() string { return e.Topic } -func (e *UploadCompletedEvent) GetTopic() string { return e.Topic } -func (e *UploadFailedEvent) GetTopic() string { return e.Topic } -func (e *DownloadRequestedEvent) GetTopic() string { return "" } -func (e *DownloadCompletedEvent) GetTopic() string { return "" } -func (e *OrphanDetectedEvent) GetTopic() string { return e.Topic } - -// Marshal serializes the event to JSON. -func (e *UploadStartedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } -func (e *UploadCompletedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } -func (e *UploadFailedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } -func (e *DownloadRequestedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } -func (e *DownloadCompletedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } -func (e *OrphanDetectedEvent) Marshal() ([]byte, error) { return json.Marshal(e) } - -// newBaseEvent creates a new base event with common fields. -func newBaseEvent(eventType, proxyID, requestID string) BaseEvent { - return BaseEvent{ - EventType: eventType, - EventID: newUUID(), - Timestamp: time.Now().UTC().Format(time.RFC3339Nano), - ProxyID: proxyID, - RequestID: requestID, - Version: TrackerEventVersion, - } -} - -// NewUploadStartedEvent creates a new upload started event. -func NewUploadStartedEvent(proxyID, requestID, topic string, partition int32, s3Key, contentType, clientIP, apiType string, expectedSize int64) *UploadStartedEvent { - return &UploadStartedEvent{ - BaseEvent: newBaseEvent(EventTypeUploadStarted, proxyID, requestID), - Topic: topic, - Partition: partition, - S3Key: s3Key, - ContentType: contentType, - ExpectedSize: expectedSize, - ClientIP: clientIP, - APIType: apiType, - } -} - -// NewUploadCompletedEvent creates a new upload completed event. -func NewUploadCompletedEvent(proxyID, requestID, topic string, partition int32, kafkaOffset int64, s3Bucket, s3Key string, size int64, sha256, checksum, checksumAlg, contentType string, durationMs int64) *UploadCompletedEvent { - return &UploadCompletedEvent{ - BaseEvent: newBaseEvent(EventTypeUploadCompleted, proxyID, requestID), - Topic: topic, - Partition: partition, - KafkaOffset: kafkaOffset, - S3Bucket: s3Bucket, - S3Key: s3Key, - Size: size, - SHA256: sha256, - Checksum: checksum, - ChecksumAlg: checksumAlg, - DurationMs: durationMs, - ContentType: contentType, - } -} - -// NewUploadFailedEvent creates a new upload failed event. -func NewUploadFailedEvent(proxyID, requestID, topic, s3Key, errorCode, errorMessage, stage string, sizeUploaded, durationMs int64) *UploadFailedEvent { - return &UploadFailedEvent{ - BaseEvent: newBaseEvent(EventTypeUploadFailed, proxyID, requestID), - Topic: topic, - S3Key: s3Key, - ErrorCode: errorCode, - ErrorMessage: errorMessage, - Stage: stage, - SizeUploaded: sizeUploaded, - DurationMs: durationMs, - } -} - -// NewDownloadRequestedEvent creates a new download requested event. -func NewDownloadRequestedEvent(proxyID, requestID, s3Bucket, s3Key, mode, clientIP string, ttlSeconds int) *DownloadRequestedEvent { - return &DownloadRequestedEvent{ - BaseEvent: newBaseEvent(EventTypeDownloadRequested, proxyID, requestID), - S3Bucket: s3Bucket, - S3Key: s3Key, - Mode: mode, - ClientIP: clientIP, - TTLSeconds: ttlSeconds, - } -} - -// NewDownloadCompletedEvent creates a new download completed event. -func NewDownloadCompletedEvent(proxyID, requestID, s3Key, mode string, durationMs, size int64) *DownloadCompletedEvent { - return &DownloadCompletedEvent{ - BaseEvent: newBaseEvent(EventTypeDownloadCompleted, proxyID, requestID), - S3Key: s3Key, - Mode: mode, - DurationMs: durationMs, - Size: size, - } -} - -// NewOrphanDetectedEvent creates a new orphan detected event. -func NewOrphanDetectedEvent(proxyID, requestID, detectionSource, topic, s3Bucket, s3Key, originalRequestID, reason string, size int64) *OrphanDetectedEvent { - return &OrphanDetectedEvent{ - BaseEvent: newBaseEvent(EventTypeOrphanDetected, proxyID, requestID), - DetectionSource: detectionSource, - Topic: topic, - S3Bucket: s3Bucket, - S3Key: s3Key, - Size: size, - OriginalRequestID: originalRequestID, - Reason: reason, - } -} diff --git a/cmd/lfs-proxy/uuid.go b/cmd/lfs-proxy/uuid.go deleted file mode 100644 index aa1fa49c..00000000 --- a/cmd/lfs-proxy/uuid.go +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import "github.com/google/uuid" - -func newUUID() string { - return uuid.NewString() -} diff --git a/deploy/docker-compose/Makefile b/deploy/docker-compose/Makefile index 87b8fd23..da1b53f9 100644 --- a/deploy/docker-compose/Makefile +++ b/deploy/docker-compose/Makefile @@ -37,7 +37,6 @@ health: ## Check service health @echo "=== Health Checks ===" @echo -n "etcd: "; curl -s http://localhost:2379/health | head -c 50 || echo "FAIL" @echo -n "minio: "; curl -s http://localhost:9000/minio/health/live || echo "FAIL" - @echo -n "lfs-proxy: "; curl -s http://localhost:9094/readyz || echo "FAIL" @echo -n "broker: "; nc -z localhost 9092 && echo "OK" || echo "FAIL" test-upload: ## Test LFS upload (creates 1KB test file) diff --git a/deploy/docker-compose/docker-compose.yaml b/deploy/docker-compose/docker-compose.yaml index a06682b0..92d78fcd 100644 --- a/deploy/docker-compose/docker-compose.yaml +++ b/deploy/docker-compose/docker-compose.yaml @@ -145,65 +145,10 @@ services: networks: - kafscale - lfs-proxy: - image: ${REGISTRY:-192.168.0.131:5100}/kafscale/kafscale-lfs-proxy:${TAG:-dev} - container_name: kafscale-lfs-proxy-dc - depends_on: - etcd: - condition: service_healthy - minio-init: - condition: service_completed_successfully - broker: - condition: service_healthy - environment: - # Kafka proxy settings - KAFSCALE_LFS_PROXY_ADDR: ":9093" - KAFSCALE_LFS_PROXY_ADVERTISED_HOST: "lfs-proxy" - KAFSCALE_LFS_PROXY_ADVERTISED_PORT: "9093" - KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS: "http://etcd:2379" - KAFSCALE_LFS_PROXY_BACKENDS: "broker:9092" - # HTTP API settings - KAFSCALE_LFS_PROXY_HTTP_ADDR: ":8080" - # Health & Metrics - KAFSCALE_LFS_PROXY_HEALTH_ADDR: ":9094" - KAFSCALE_LFS_PROXY_METRICS_ADDR: ":9095" - # S3 settings - KAFSCALE_LFS_PROXY_S3_BUCKET: "kafscale" - KAFSCALE_LFS_PROXY_S3_REGION: "us-east-1" - KAFSCALE_LFS_PROXY_S3_ENDPOINT: "http://minio:9000" - KAFSCALE_LFS_PROXY_S3_ACCESS_KEY: "minioadmin" - KAFSCALE_LFS_PROXY_S3_SECRET_KEY: "minioadmin" - KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE: "true" - KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET: "true" - # Blob settings (Beast mode) - KAFSCALE_LFS_PROXY_MAX_BLOB_SIZE: "7516192768" # 7GB - KAFSCALE_LFS_PROXY_CHUNK_SIZE: "16777216" # 16MB - # HTTP timeouts for large uploads - KAFSCALE_LFS_PROXY_HTTP_READ_TIMEOUT_SEC: "1800" - KAFSCALE_LFS_PROXY_HTTP_WRITE_TIMEOUT_SEC: "1800" - KAFSCALE_LFS_PROXY_HTTP_IDLE_TIMEOUT_SEC: "120" - # Logging - KAFSCALE_LFS_PROXY_LOG_LEVEL: "info" - # Traceability (LFS Ops Tracker) - KAFSCALE_LFS_TRACKER_ENABLED: "true" - KAFSCALE_LFS_TRACKER_TOPIC: "__lfs_ops_state" - KAFSCALE_LFS_TRACKER_BATCH_SIZE: "100" - KAFSCALE_LFS_TRACKER_FLUSH_MS: "100" - KAFSCALE_LFS_TRACKER_ENSURE_TOPIC: "true" - KAFSCALE_LFS_TRACKER_PARTITIONS: "3" - KAFSCALE_LFS_TRACKER_REPLICATION_FACTOR: "1" - ports: - - "9093:9093" # Kafka protocol (LFS) - - "8080:8080" # HTTP API - - "9094:9094" # Health - - "9095:9095" # Metrics - healthcheck: - test: ["CMD-SHELL", "wget -qO- http://localhost:9094/readyz || exit 1"] - interval: 10s - timeout: 5s - retries: 10 - networks: - - kafscale + # NOTE: The standalone lfs-proxy service has been removed. + # LFS is now a feature-flag on the unified proxy: + # KAFSCALE_PROXY_LFS_ENABLED=true + # See the proxy service above for configuration. console: image: ${REGISTRY:-192.168.0.131:5100}/kafscale/kafscale-console:${TAG:-dev} @@ -248,7 +193,7 @@ services: image: ${REGISTRY:-192.168.0.131:5100}/kafscale/kafscale-e72-browser-demo:${TAG:-dev} container_name: kafscale-e72-demo-dc depends_on: - lfs-proxy: + proxy: condition: service_healthy ports: - "3072:80" diff --git a/deploy/docker/lfs-proxy.Dockerfile b/deploy/docker/lfs-proxy.Dockerfile deleted file mode 100644 index ff227352..00000000 --- a/deploy/docker/lfs-proxy.Dockerfile +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# syntax=docker/dockerfile:1.7 - -ARG GO_VERSION=1.25.2 -FROM golang:${GO_VERSION}-alpine@sha256:06cdd34bd531b810650e47762c01e025eb9b1c7eadd191553b91c9f2d549fae8 AS builder - -ARG TARGETOS=linux -ARG TARGETARCH=amd64 - -WORKDIR /src -RUN apk add --no-cache git ca-certificates - -COPY go.mod go.sum ./ -RUN --mount=type=cache,target=/go/pkg/mod \ - --mount=type=cache,target=/root/.cache/go-build \ - go mod download -COPY . . - -RUN --mount=type=cache,target=/go/pkg/mod \ - --mount=type=cache,target=/root/.cache/go-build \ - CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \ - go build -ldflags="-s -w" -o /out/lfs-proxy ./cmd/lfs-proxy - -FROM alpine:3.19@sha256:6baf43584bcb78f2e5847d1de515f23499913ac9f12bdf834811a3145eb11ca1 -RUN apk add --no-cache ca-certificates && adduser -D -u 10001 kafscale -USER 10001 -WORKDIR /app - -COPY --from=builder /out/lfs-proxy /usr/local/bin/kafscale-lfs-proxy - -EXPOSE 9092 -ENTRYPOINT ["/usr/local/bin/kafscale-lfs-proxy"] diff --git a/deploy/helm/kafscale/templates/lfs-proxy-deployment.yaml b/deploy/helm/kafscale/templates/lfs-proxy-deployment.yaml deleted file mode 100644 index 9b7dba92..00000000 --- a/deploy/helm/kafscale/templates/lfs-proxy-deployment.yaml +++ /dev/null @@ -1,251 +0,0 @@ -# Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -{{- if .Values.lfsProxy.enabled }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "kafscale.componentName" (dict "root" . "component" "lfs-proxy") }} - labels: -{{ include "kafscale.labels" . | indent 4 }} - app.kubernetes.io/component: lfs-proxy -spec: - replicas: {{ .Values.lfsProxy.replicaCount }} - selector: - matchLabels: -{{ include "kafscale.componentSelectorLabels" (dict "root" . "component" "lfs-proxy") | indent 6 }} - template: - metadata: - labels: -{{ include "kafscale.componentSelectorLabels" (dict "root" . "component" "lfs-proxy") | indent 8 }} -{{- with .Values.lfsProxy.podAnnotations }} - annotations: -{{ toYaml . | indent 8 }} -{{- end }} - spec: -{{- if .Values.imagePullSecrets }} - imagePullSecrets: -{{- range .Values.imagePullSecrets }} - - name: {{ . }} -{{- end }} -{{- end }} - containers: - - name: lfs-proxy - image: "{{ .Values.lfsProxy.image.repository }}:{{ ternary "latest" (default .Chart.AppVersion .Values.lfsProxy.image.tag) .Values.lfsProxy.image.useLatest }}" - imagePullPolicy: {{ ternary "Always" .Values.lfsProxy.image.pullPolicy .Values.lfsProxy.image.useLatest }} - env: - - name: KAFSCALE_LFS_PROXY_ADDR - value: ":{{ .Values.lfsProxy.service.port }}" - - name: KAFSCALE_LFS_PROXY_ADVERTISED_PORT - value: "{{ .Values.lfsProxy.advertisedPort }}" -{{- if .Values.lfsProxy.http.enabled }} - - name: KAFSCALE_LFS_PROXY_HTTP_ADDR - value: ":{{ .Values.lfsProxy.http.port }}" -{{- end }} -{{- if .Values.lfsProxy.http.apiKey }} - - name: KAFSCALE_LFS_PROXY_HTTP_API_KEY - value: "{{ .Values.lfsProxy.http.apiKey }}" -{{- end }} -{{- if and .Values.lfsProxy.http.cors .Values.lfsProxy.http.cors.enabled }} - - name: KAFSCALE_LFS_PROXY_HTTP_CORS_ENABLED - value: "true" -{{- if .Values.lfsProxy.http.cors.allowOrigins }} - - name: KAFSCALE_LFS_PROXY_HTTP_CORS_ALLOW_ORIGINS - value: "{{ join "," .Values.lfsProxy.http.cors.allowOrigins }}" -{{- end }} -{{- if .Values.lfsProxy.http.cors.allowMethods }} - - name: KAFSCALE_LFS_PROXY_HTTP_CORS_ALLOW_METHODS - value: "{{ join "," .Values.lfsProxy.http.cors.allowMethods }}" -{{- end }} -{{- if .Values.lfsProxy.http.cors.allowHeaders }} - - name: KAFSCALE_LFS_PROXY_HTTP_CORS_ALLOW_HEADERS - value: "{{ join "," .Values.lfsProxy.http.cors.allowHeaders }}" -{{- end }} -{{- if .Values.lfsProxy.http.cors.exposeHeaders }} - - name: KAFSCALE_LFS_PROXY_HTTP_CORS_EXPOSE_HEADERS - value: "{{ join "," .Values.lfsProxy.http.cors.exposeHeaders }}" -{{- end }} -{{- end }} -{{- if .Values.lfsProxy.health.enabled }} - - name: KAFSCALE_LFS_PROXY_HEALTH_ADDR - value: ":{{ .Values.lfsProxy.health.port }}" -{{- end }} -{{- if .Values.lfsProxy.metrics.enabled }} - - name: KAFSCALE_LFS_PROXY_METRICS_ADDR - value: ":{{ .Values.lfsProxy.metrics.port }}" -{{- end }} -{{- if .Values.lfsProxy.backendCacheTTLSeconds }} - - name: KAFSCALE_LFS_PROXY_BACKEND_CACHE_TTL_SEC - value: "{{ .Values.lfsProxy.backendCacheTTLSeconds }}" -{{- end }} -{{- if .Values.lfsProxy.advertisedHost }} - - name: KAFSCALE_LFS_PROXY_ADVERTISED_HOST - value: "{{ .Values.lfsProxy.advertisedHost }}" -{{- end }} -{{- if .Values.lfsProxy.etcdEndpoints }} - - name: KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS - value: "{{ join "," .Values.lfsProxy.etcdEndpoints }}" -{{- end }} -{{- if .Values.lfsProxy.etcd.existingSecret }} - - name: KAFSCALE_LFS_PROXY_ETCD_USERNAME - valueFrom: - secretKeyRef: - name: {{ .Values.lfsProxy.etcd.existingSecret }} - key: ETCD_USERNAME - - name: KAFSCALE_LFS_PROXY_ETCD_PASSWORD - valueFrom: - secretKeyRef: - name: {{ .Values.lfsProxy.etcd.existingSecret }} - key: ETCD_PASSWORD -{{- else }} -{{- if .Values.lfsProxy.etcd.username }} - - name: KAFSCALE_LFS_PROXY_ETCD_USERNAME - value: "{{ .Values.lfsProxy.etcd.username }}" -{{- end }} -{{- if .Values.lfsProxy.etcd.password }} - - name: KAFSCALE_LFS_PROXY_ETCD_PASSWORD - value: "{{ .Values.lfsProxy.etcd.password }}" -{{- end }} -{{- end }} -{{- if .Values.lfsProxy.backends }} - - name: KAFSCALE_LFS_PROXY_BACKENDS - value: "{{ join "," .Values.lfsProxy.backends }}" -{{- end }} -{{- if .Values.lfsProxy.s3.bucket }} - - name: KAFSCALE_LFS_PROXY_S3_BUCKET - value: "{{ .Values.lfsProxy.s3.bucket }}" -{{- end }} -{{- if .Values.lfsProxy.s3.region }} - - name: KAFSCALE_LFS_PROXY_S3_REGION - value: "{{ .Values.lfsProxy.s3.region }}" -{{- end }} -{{- if .Values.lfsProxy.s3.endpoint }} - - name: KAFSCALE_LFS_PROXY_S3_ENDPOINT - value: "{{ .Values.lfsProxy.s3.endpoint }}" -{{- end }} -{{- if .Values.lfsProxy.s3.existingSecret }} - - name: KAFSCALE_LFS_PROXY_S3_ACCESS_KEY - valueFrom: - secretKeyRef: - name: {{ .Values.lfsProxy.s3.existingSecret }} - key: AWS_ACCESS_KEY_ID - - name: KAFSCALE_LFS_PROXY_S3_SECRET_KEY - valueFrom: - secretKeyRef: - name: {{ .Values.lfsProxy.s3.existingSecret }} - key: AWS_SECRET_ACCESS_KEY -{{- else }} -{{- if .Values.lfsProxy.s3.accessKey }} - - name: KAFSCALE_LFS_PROXY_S3_ACCESS_KEY - value: "{{ .Values.lfsProxy.s3.accessKey }}" -{{- end }} -{{- if .Values.lfsProxy.s3.secretKey }} - - name: KAFSCALE_LFS_PROXY_S3_SECRET_KEY - value: "{{ .Values.lfsProxy.s3.secretKey }}" -{{- end }} -{{- end }} -{{- if .Values.lfsProxy.s3.sessionToken }} - - name: KAFSCALE_LFS_PROXY_S3_SESSION_TOKEN - value: "{{ .Values.lfsProxy.s3.sessionToken }}" -{{- end }} -{{- if .Values.lfsProxy.s3.forcePathStyle }} - - name: KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE - value: "true" -{{- end }} -{{- if .Values.lfsProxy.s3.ensureBucket }} - - name: KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET - value: "true" -{{- end }} -{{- if .Values.lfsProxy.s3.maxBlobSize }} - - name: KAFSCALE_LFS_PROXY_MAX_BLOB_SIZE - value: "{{ .Values.lfsProxy.s3.maxBlobSize }}" -{{- end }} -{{- if .Values.lfsProxy.s3.chunkSize }} - - name: KAFSCALE_LFS_PROXY_CHUNK_SIZE - value: "{{ .Values.lfsProxy.s3.chunkSize }}" -{{- end }} -{{- if .Values.lfsProxy.tracker.enabled }} - - name: KAFSCALE_LFS_TRACKER_ENABLED - value: "true" - - name: KAFSCALE_LFS_TRACKER_TOPIC - value: "{{ .Values.lfsProxy.tracker.topic }}" - - name: KAFSCALE_LFS_TRACKER_BATCH_SIZE - value: "{{ .Values.lfsProxy.tracker.batchSize }}" - - name: KAFSCALE_LFS_TRACKER_FLUSH_MS - value: "{{ .Values.lfsProxy.tracker.flushMs }}" - - name: KAFSCALE_LFS_TRACKER_ENSURE_TOPIC - value: "{{ .Values.lfsProxy.tracker.ensureTopic }}" - - name: KAFSCALE_LFS_TRACKER_PARTITIONS - value: "{{ .Values.lfsProxy.tracker.partitions }}" - - name: KAFSCALE_LFS_TRACKER_REPLICATION_FACTOR - value: "{{ .Values.lfsProxy.tracker.replicationFactor }}" -{{- else }} - - name: KAFSCALE_LFS_TRACKER_ENABLED - value: "false" -{{- end }} - ports: - - name: kafka - containerPort: {{ .Values.lfsProxy.service.port }} - protocol: TCP -{{- if .Values.lfsProxy.http.enabled }} - - name: http - containerPort: {{ .Values.lfsProxy.http.port }} - protocol: TCP -{{- end }} -{{- if .Values.lfsProxy.health.enabled }} - - name: health - containerPort: {{ .Values.lfsProxy.health.port }} - protocol: TCP -{{- end }} -{{- if .Values.lfsProxy.metrics.enabled }} - - name: metrics - containerPort: {{ .Values.lfsProxy.metrics.port }} - protocol: TCP -{{- end }} -{{- if .Values.lfsProxy.health.enabled }} - readinessProbe: - httpGet: - path: /readyz - port: health - initialDelaySeconds: 2 - periodSeconds: 5 - failureThreshold: 6 - livenessProbe: - httpGet: - path: /livez - port: health - initialDelaySeconds: 5 - periodSeconds: 10 - failureThreshold: 3 -{{- end }} - resources: -{{- if .Values.lfsProxy.resources }} -{{ toYaml .Values.lfsProxy.resources | indent 12 }} -{{- else }} - {} -{{- end }} -{{- with .Values.lfsProxy.nodeSelector }} - nodeSelector: -{{ toYaml . | indent 8 }} -{{- end }} -{{- with .Values.lfsProxy.tolerations }} - tolerations: -{{ toYaml . | indent 8 }} -{{- end }} -{{- with .Values.lfsProxy.affinity }} - affinity: -{{ toYaml . | indent 8 }} -{{- end }} -{{- end }} diff --git a/deploy/helm/kafscale/templates/lfs-proxy-http-ingress.yaml b/deploy/helm/kafscale/templates/lfs-proxy-http-ingress.yaml deleted file mode 100644 index 43f522d4..00000000 --- a/deploy/helm/kafscale/templates/lfs-proxy-http-ingress.yaml +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -{{- if and .Values.lfsProxy.enabled .Values.lfsProxy.http.enabled .Values.lfsProxy.ingress.enabled }} -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: {{ include "kafscale.componentName" (dict "root" . "component" "lfs-proxy-http") }} - labels: -{{ include "kafscale.labels" . | indent 4 }} - app.kubernetes.io/component: lfs-proxy-http -{{- with .Values.lfsProxy.ingress.annotations }} - annotations: -{{ toYaml . | indent 4 }} -{{- end }} -spec: -{{- if .Values.lfsProxy.ingress.className }} - ingressClassName: {{ .Values.lfsProxy.ingress.className }} -{{- end }} -{{- if .Values.lfsProxy.ingress.tls }} - tls: -{{- range .Values.lfsProxy.ingress.tls }} - - hosts: -{{- range .hosts }} - - {{ . | quote }} -{{- end }} - secretName: {{ .secretName }} -{{- end }} -{{- end }} - rules: -{{- range .Values.lfsProxy.ingress.hosts }} - - host: {{ .host | quote }} - http: - paths: -{{- range .paths }} - - path: {{ .path }} - pathType: {{ .pathType }} - backend: - service: - name: {{ include "kafscale.componentName" (dict "root" $ "component" "lfs-proxy") }} - port: - number: {{ $.Values.lfsProxy.http.port }} -{{- end }} -{{- end }} -{{- end }} diff --git a/deploy/helm/kafscale/templates/lfs-proxy-metrics-service.yaml b/deploy/helm/kafscale/templates/lfs-proxy-metrics-service.yaml deleted file mode 100644 index 6183ad4e..00000000 --- a/deploy/helm/kafscale/templates/lfs-proxy-metrics-service.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -{{- if .Values.lfsProxy.metrics.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "kafscale.componentName" (dict "root" . "component" "lfs-proxy") }}-metrics - labels: -{{ include "kafscale.labels" . | indent 4 }} - app.kubernetes.io/component: lfs-proxy -{{- with .Values.lfsProxy.metrics.service.annotations }} - annotations: -{{ toYaml . | indent 4 }} -{{- end }} -spec: - type: ClusterIP - ports: - - name: metrics - port: {{ .Values.lfsProxy.metrics.port }} - targetPort: metrics - selector: -{{ include "kafscale.componentSelectorLabels" (dict "root" . "component" "lfs-proxy") | indent 4 }} -{{- end }} diff --git a/deploy/helm/kafscale/templates/lfs-proxy-prometheusrule.yaml b/deploy/helm/kafscale/templates/lfs-proxy-prometheusrule.yaml deleted file mode 100644 index 3cd1c886..00000000 --- a/deploy/helm/kafscale/templates/lfs-proxy-prometheusrule.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -{{- if and .Values.lfsProxy.metrics.enabled .Values.lfsProxy.metrics.prometheusRule.enabled }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ include "kafscale.componentName" (dict "root" . "component" "lfs-proxy") }} - labels: -{{ include "kafscale.labels" . | indent 4 }} -{{- with .Values.lfsProxy.metrics.prometheusRule.labels }} -{{ toYaml . | indent 4 }} -{{- end }} -spec: - groups: - - name: kafscale-lfs-proxy.rules - rules: - - alert: KafscaleLfsProxyS3Errors - expr: increase(kafscale_lfs_proxy_s3_errors_total[5m]) > 0 - for: 5m - labels: - severity: warning - annotations: - summary: LFS proxy S3 errors detected - description: LFS proxy is encountering S3 errors in the last 5 minutes. - - alert: KafscaleLfsProxyOrphanedObjects - expr: increase(kafscale_lfs_proxy_orphan_objects_total[10m]) > 0 - for: 10m - labels: - severity: warning - annotations: - summary: LFS proxy orphaned objects detected - description: LFS proxy created orphaned objects in the last 10 minutes. -{{- end }} diff --git a/deploy/helm/kafscale/templates/lfs-proxy-service.yaml b/deploy/helm/kafscale/templates/lfs-proxy-service.yaml deleted file mode 100644 index 06dd1c73..00000000 --- a/deploy/helm/kafscale/templates/lfs-proxy-service.yaml +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -{{- if .Values.lfsProxy.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "kafscale.componentName" (dict "root" . "component" "lfs-proxy") }} - labels: -{{ include "kafscale.labels" . | indent 4 }} - app.kubernetes.io/component: lfs-proxy - {{- with .Values.lfsProxy.service.annotations }} - annotations: -{{ toYaml . | indent 4 }} - {{- end }} -spec: - type: {{ .Values.lfsProxy.service.type }} - {{- if .Values.lfsProxy.service.loadBalancerSourceRanges }} - loadBalancerSourceRanges: -{{ toYaml .Values.lfsProxy.service.loadBalancerSourceRanges | indent 4 }} - {{- end }} - selector: -{{ include "kafscale.componentSelectorLabels" (dict "root" . "component" "lfs-proxy") | indent 4 }} - ports: - - name: kafka - port: {{ .Values.lfsProxy.service.port }} - targetPort: kafka - protocol: TCP -{{- if .Values.lfsProxy.http.enabled }} - - name: http - port: {{ .Values.lfsProxy.http.port }} - targetPort: http - protocol: TCP -{{- end }} -{{- end }} diff --git a/deploy/helm/kafscale/templates/lfs-proxy-servicemonitor.yaml b/deploy/helm/kafscale/templates/lfs-proxy-servicemonitor.yaml deleted file mode 100644 index 1d9548e6..00000000 --- a/deploy/helm/kafscale/templates/lfs-proxy-servicemonitor.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -# This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -{{- if and .Values.lfsProxy.metrics.enabled .Values.lfsProxy.metrics.serviceMonitor.enabled }} -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: {{ include "kafscale.componentName" (dict "root" . "component" "lfs-proxy") }} - labels: -{{ include "kafscale.labels" . | indent 4 }} -{{- with .Values.lfsProxy.metrics.serviceMonitor.labels }} -{{ toYaml . | indent 4 }} -{{- end }} -spec: - selector: - matchLabels: -{{ include "kafscale.componentSelectorLabels" (dict "root" . "component" "lfs-proxy") | indent 6 }} - endpoints: - - port: metrics - interval: {{ .Values.lfsProxy.metrics.serviceMonitor.interval }} - scrapeTimeout: {{ .Values.lfsProxy.metrics.serviceMonitor.scrapeTimeout }} -{{- end }} diff --git a/deploy/helm/kafscale/values.yaml b/deploy/helm/kafscale/values.yaml index 93106932..a5ae4f9e 100644 --- a/deploy/helm/kafscale/values.yaml +++ b/deploy/helm/kafscale/values.yaml @@ -152,90 +152,9 @@ proxy: annotations: {} loadBalancerSourceRanges: [] -lfsProxy: - enabled: false - replicaCount: 2 - image: - repository: ghcr.io/kafscale/kafscale-lfs-proxy - tag: "" - useLatest: false - pullPolicy: IfNotPresent - health: - enabled: true - port: 9094 - metrics: - enabled: true - port: 9095 - service: - annotations: {} - serviceMonitor: - enabled: false - interval: 30s - scrapeTimeout: 10s - labels: {} - prometheusRule: - enabled: false - labels: {} - http: - enabled: false # Disabled by default for security; enable with apiKey set - port: 8080 - apiKey: "" # Required when http.enabled=true - cors: - enabled: false # Enable for browser access - allowOrigins: ["*"] # Restrict in production - allowMethods: ["POST", "OPTIONS"] - allowHeaders: ["Content-Type", "X-Kafka-Topic", "X-Kafka-Key", "X-Kafka-Partition", "X-LFS-Checksum", "X-LFS-Checksum-Alg", "X-LFS-Size", "X-LFS-Mode", "X-Request-ID", "X-API-Key", "Authorization"] - exposeHeaders: ["X-Request-ID"] - ingress: - enabled: false - className: "" - annotations: {} - hosts: - - host: lfs.local - paths: - - path: /lfs - pathType: Prefix - tls: [] - backendCacheTTLSeconds: 60 - advertisedHost: "" - advertisedPort: 9092 - etcdEndpoints: [] - etcd: - existingSecret: "" # Name of existing Secret with ETCD_USERNAME and ETCD_PASSWORD keys - username: "" - password: null # Set via --set or use existingSecret - backends: [] - s3: - bucket: "" - region: "" - endpoint: "" - # Credentials: use existingSecret (preferred) or inline values (not recommended) - existingSecret: "" # Name of existing Secret with AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY keys - accessKey: "" # Deprecated: use existingSecret instead - secretKey: "" # Deprecated: use existingSecret instead - sessionToken: "" - forcePathStyle: false - ensureBucket: false - maxBlobSize: 7516192768 - chunkSize: 16777216 - tracker: - enabled: true - topic: "__lfs_ops_state" - batchSize: 100 - flushMs: 100 - ensureTopic: true - partitions: 3 - replicationFactor: 1 - podAnnotations: {} - resources: {} - nodeSelector: {} - tolerations: [] - affinity: {} - service: - type: ClusterIP # Changed from LoadBalancer for security; use Ingress for external access - port: 9092 - annotations: {} - loadBalancerSourceRanges: [] +# NOTE: The standalone lfs-proxy has been removed. +# LFS is now a feature-flag on the unified proxy: set proxy.lfs.enabled=true. +# See proxy values above for LFS configuration. lfsDemos: enabled: false diff --git a/test/e2e/lfs_iceberg_processor_test.go b/test/e2e/lfs_iceberg_processor_test.go deleted file mode 100644 index 608869d4..00000000 --- a/test/e2e/lfs_iceberg_processor_test.go +++ /dev/null @@ -1,252 +0,0 @@ -// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build e2e - -package e2e - -import ( - "bytes" - "context" - "fmt" - "io" - "os" - "os/exec" - "path/filepath" - "strings" - "testing" - "time" - - "github.com/twmb/franz-go/pkg/kgo" -) - -func TestLfsIcebergProcessorE2E(t *testing.T) { - const enableEnv = "KAFSCALE_E2E" - if os.Getenv(enableEnv) != "1" { - t.Skipf("set %s=1 to run integration harness", enableEnv) - } - - required := []string{ - "KAFSCALE_E2E_S3_ENDPOINT", - "KAFSCALE_E2E_S3_BUCKET", - "KAFSCALE_E2E_S3_REGION", - "KAFSCALE_E2E_S3_ACCESS_KEY", - "KAFSCALE_E2E_S3_SECRET_KEY", - "ICEBERG_PROCESSOR_CATALOG_URI", - "ICEBERG_PROCESSOR_WAREHOUSE", - } - for _, key := range required { - if os.Getenv(key) == "" { - t.Skipf("%s not set", key) - } - } - - ctx, cancel := context.WithTimeout(context.Background(), 4*time.Minute) - t.Cleanup(cancel) - - etcd, endpoints := startEmbeddedEtcd(t) - t.Cleanup(func() { etcd.Close() }) - - brokerAddr := freeAddr(t) - metricsAddr := freeAddr(t) - controlAddr := freeAddr(t) - - brokerCmd, brokerLogs := startBrokerWithEtcdS3ForIceberg(t, ctx, brokerAddr, metricsAddr, controlAddr, endpoints) - t.Cleanup(func() { stopBroker(t, brokerCmd) }) - waitForBroker(t, brokerLogs, brokerAddr) - - proxyPort := pickFreePort(t) - healthPort := pickFreePort(t) - proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) - configureProcessGroup(proxyCmd) - proxyCmd.Env = append(os.Environ(), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), - "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(endpoints, ",")), - fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_BUCKET=%s", os.Getenv("KAFSCALE_E2E_LFS_BUCKET")), - fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_REGION=%s", os.Getenv("KAFSCALE_E2E_S3_REGION")), - fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", os.Getenv("KAFSCALE_E2E_S3_ENDPOINT")), - fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=%s", os.Getenv("KAFSCALE_E2E_S3_ACCESS_KEY")), - fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_SECRET_KEY=%s", os.Getenv("KAFSCALE_E2E_S3_SECRET_KEY")), - "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", - "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", - ) - if os.Getenv("KAFSCALE_E2E_LFS_BUCKET") == "" { - proxyCmd.Env = append(proxyCmd.Env, fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_BUCKET=%s", os.Getenv("KAFSCALE_E2E_S3_BUCKET"))) - } - var proxyLogs bytes.Buffer - proxyCmd.Stdout = io.MultiWriter(&proxyLogs, mustLogFile(t, "lfs-iceberg-proxy.log")) - proxyCmd.Stderr = proxyCmd.Stdout - if err := proxyCmd.Start(); err != nil { - t.Fatalf("start lfs-proxy: %v", err) - } - t.Cleanup(func() { _ = signalProcessGroup(proxyCmd, os.Interrupt) }) - waitForPortWithTimeout(t, fmt.Sprintf("127.0.0.1:%s", proxyPort), 10*time.Second) - - configPath := writeIcebergProcessorConfig(t, brokerAddr, endpoints) - processorCmd := exec.CommandContext(ctx, "go", "run", "./cmd/processor", "-config", configPath) - processorCmd.Dir = filepath.Join(repoRoot(t), "addons", "processors", "iceberg-processor") - configureProcessGroup(processorCmd) - var processorLogs bytes.Buffer - processorCmd.Stdout = io.MultiWriter(&processorLogs, mustLogFile(t, "lfs-iceberg-processor.log")) - processorCmd.Stderr = processorCmd.Stdout - if err := processorCmd.Start(); err != nil { - t.Fatalf("start iceberg-processor: %v", err) - } - t.Cleanup(func() { _ = signalProcessGroup(processorCmd, os.Interrupt) }) - - producer, err := kgo.NewClient( - kgo.SeedBrokers("127.0.0.1:"+proxyPort), - kgo.AllowAutoTopicCreation(), - kgo.DisableIdempotentWrite(), - ) - if err != nil { - t.Fatalf("create producer: %v", err) - } - defer producer.Close() - - topic := "lfs-iceberg-topic" - record := &kgo.Record{ - Topic: topic, - Key: []byte("k1"), - Value: []byte("hello world"), - Headers: []kgo.RecordHeader{{Key: "LFS_BLOB", Value: []byte("1")}}, - } - if res := producer.ProduceSync(ctx, record); res.FirstErr() != nil { - t.Fatalf("produce: %v", res.FirstErr()) - } - - waitForLog(t, &processorLogs, "sink write failed", 30*time.Second) - if strings.Contains(processorLogs.String(), "sink write failed") { - t.Fatalf("processor reported sink write failure") - } -} - -func startBrokerWithEtcdS3ForIceberg(t *testing.T, ctx context.Context, brokerAddr, metricsAddr, controlAddr string, endpoints []string) (*exec.Cmd, *bytes.Buffer) { - t.Helper() - brokerCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "broker")) - configureProcessGroup(brokerCmd) - brokerCmd.Env = append(os.Environ(), - "KAFSCALE_AUTO_CREATE_TOPICS=true", - "KAFSCALE_AUTO_CREATE_PARTITIONS=1", - fmt.Sprintf("KAFSCALE_BROKER_ADDR=%s", brokerAddr), - fmt.Sprintf("KAFSCALE_METRICS_ADDR=%s", metricsAddr), - fmt.Sprintf("KAFSCALE_CONTROL_ADDR=%s", controlAddr), - fmt.Sprintf("KAFSCALE_ETCD_ENDPOINTS=%s", strings.Join(endpoints, ",")), - fmt.Sprintf("KAFSCALE_S3_BUCKET=%s", os.Getenv("KAFSCALE_E2E_S3_BUCKET")), - fmt.Sprintf("KAFSCALE_S3_REGION=%s", os.Getenv("KAFSCALE_E2E_S3_REGION")), - fmt.Sprintf("KAFSCALE_S3_ENDPOINT=%s", os.Getenv("KAFSCALE_E2E_S3_ENDPOINT")), - fmt.Sprintf("KAFSCALE_S3_ACCESS_KEY=%s", os.Getenv("KAFSCALE_E2E_S3_ACCESS_KEY")), - fmt.Sprintf("KAFSCALE_S3_SECRET_KEY=%s", os.Getenv("KAFSCALE_E2E_S3_SECRET_KEY")), - "KAFSCALE_S3_PATH_STYLE=true", - ) - var brokerLogs bytes.Buffer - logWriter := io.MultiWriter(&brokerLogs, mustLogFile(t, "broker-lfs-iceberg.log")) - brokerCmd.Stdout = logWriter - brokerCmd.Stderr = logWriter - if err := brokerCmd.Start(); err != nil { - t.Fatalf("start broker: %v", err) - } - return brokerCmd, &brokerLogs -} - -func writeIcebergProcessorConfig(t *testing.T, brokerAddr string, endpoints []string) string { - t.Helper() - config := fmt.Sprintf(`s3: - bucket: %s - namespace: default - region: %s - endpoint: %s - path_style: true -iceberg: - catalog: - type: %s - uri: %s - token: "%s" - warehouse: %s -offsets: - backend: etcd - lease_ttl_seconds: 30 - key_prefix: processors -discovery: - mode: auto -etcd: - endpoints: - - %s -schema: - mode: "off" -mappings: - - topic: lfs-iceberg-topic - table: default.lfs_iceberg_topic - mode: append - create_table_if_missing: true - lfs: - mode: resolve - max_inline_size: 1048576 - store_metadata: true - validate_checksum: true - resolve_concurrency: 2 -`, - os.Getenv("KAFSCALE_E2E_S3_BUCKET"), - os.Getenv("KAFSCALE_E2E_S3_REGION"), - os.Getenv("KAFSCALE_E2E_S3_ENDPOINT"), - envOrDefault("ICEBERG_PROCESSOR_CATALOG_TYPE", "rest"), - os.Getenv("ICEBERG_PROCESSOR_CATALOG_URI"), - os.Getenv("ICEBERG_PROCESSOR_CATALOG_TOKEN"), - os.Getenv("ICEBERG_PROCESSOR_WAREHOUSE"), - endpoints[0], - ) - - path := filepath.Join(t.TempDir(), "config.yaml") - if err := os.WriteFile(path, []byte(config), 0644); err != nil { - t.Fatalf("write config: %v", err) - } - return path -} - -func waitForLog(t *testing.T, logs *bytes.Buffer, needle string, timeout time.Duration) { - t.Helper() - deadline := time.Now().Add(timeout) - for time.Now().Before(deadline) { - if strings.Contains(logs.String(), needle) { - return - } - time.Sleep(200 * time.Millisecond) - } -} - -func TestLfsIcebergQueryValidation(t *testing.T) { - const enableEnv = "KAFSCALE_E2E" - if os.Getenv(enableEnv) != "1" { - t.Skipf("set %s=1 to run integration harness", enableEnv) - } - cmdLine := os.Getenv("KAFSCALE_E2E_QUERY_CMD") - if cmdLine == "" { - t.Skip("KAFSCALE_E2E_QUERY_CMD not set") - } - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - t.Cleanup(cancel) - - cmd := exec.CommandContext(ctx, "sh", "-c", cmdLine) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - t.Fatalf("query command failed: %v", err) - } -} diff --git a/test/e2e/lfs_proxy_broker_test.go b/test/e2e/lfs_proxy_broker_test.go deleted file mode 100644 index 11799e06..00000000 --- a/test/e2e/lfs_proxy_broker_test.go +++ /dev/null @@ -1,234 +0,0 @@ -// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build e2e - -package e2e - -import ( - "bytes" - "context" - "crypto/rand" - "crypto/sha256" - "encoding/hex" - "encoding/json" - "fmt" - "io" - "net" - "os" - "os/exec" - "path/filepath" - "strconv" - "strings" - "testing" - "time" - - "github.com/KafScale/platform/pkg/lfs" - "github.com/KafScale/platform/pkg/metadata" - "github.com/KafScale/platform/pkg/protocol" - "github.com/twmb/franz-go/pkg/kgo" -) - -func TestLfsProxyBrokerE2E(t *testing.T) { - const enableEnv = "KAFSCALE_E2E" - if os.Getenv(enableEnv) != "1" { - t.Skipf("set %s=1 to run integration harness", enableEnv) - } - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - t.Cleanup(cancel) - - s3Server := newFakeS3Server(t) - t.Cleanup(s3Server.Close) - - etcd, endpoints := startEmbeddedEtcd(t) - t.Cleanup(func() { - etcd.Close() - }) - - brokerAddr := freeAddr(t) - metricsAddr := freeAddr(t) - controlAddr := freeAddr(t) - - brokerHost, brokerPort := splitHostPort(t, brokerAddr) - store, err := metadata.NewEtcdStore(ctx, metadata.ClusterMetadata{ - Brokers: []protocol.MetadataBroker{{ - NodeID: 0, - Host: brokerHost, - Port: brokerPort, - }}, - }, metadata.EtcdStoreConfig{Endpoints: endpoints}) - if err != nil { - t.Fatalf("create etcd store: %v", err) - } - - topic := "lfs-broker-topic" - if _, err := store.CreateTopic(ctx, metadata.TopicSpec{ - Name: topic, - NumPartitions: 1, - ReplicationFactor: 1, - }); err != nil { - t.Fatalf("create topic: %v", err) - } - - brokerCmd, brokerLogs := startBrokerWithEtcd(t, ctx, brokerAddr, metricsAddr, controlAddr, endpoints) - t.Cleanup(func() { stopBroker(t, brokerCmd) }) - waitForBroker(t, brokerLogs, brokerAddr) - - proxyPort := pickFreePort(t) - healthPort := pickFreePort(t) - proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) - configureProcessGroup(proxyCmd) - proxyCmd.Env = append(os.Environ(), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), - "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(endpoints, ",")), - "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-e2e-broker", - "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), - "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", - "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", - ) - var proxyLogs bytes.Buffer - proxyWriterTargets := []io.Writer{&proxyLogs, mustLogFile(t, "lfs-proxy-broker.log")} - proxyCmd.Stdout = io.MultiWriter(proxyWriterTargets...) - proxyCmd.Stderr = proxyCmd.Stdout - if err := proxyCmd.Start(); err != nil { - t.Fatalf("start lfs-proxy: %v", err) - } - t.Cleanup(func() { - _ = signalProcessGroup(proxyCmd, os.Interrupt) - done := make(chan struct{}) - go func() { - _ = proxyCmd.Wait() - close(done) - }() - select { - case <-done: - case <-time.After(2 * time.Second): - _ = signalProcessGroup(proxyCmd, os.Kill) - } - }) - waitForPortWithTimeout(t, "127.0.0.1:"+proxyPort, 15*time.Second) - - producer, err := kgo.NewClient( - kgo.SeedBrokers("127.0.0.1:"+proxyPort), - kgo.AllowAutoTopicCreation(), - kgo.DisableIdempotentWrite(), - ) - if err != nil { - t.Fatalf("create producer: %v", err) - } - defer producer.Close() - - consumer, err := kgo.NewClient( - kgo.SeedBrokers(brokerAddr), - kgo.ConsumeTopics(topic), - kgo.ConsumerGroup("lfs-proxy-broker-e2e"), - kgo.BlockRebalanceOnPoll(), - ) - if err != nil { - t.Fatalf("create consumer: %v", err) - } - consumerClosed := false - defer func() { - if !consumerClosed { - consumer.CloseAllowingRebalance() - } - }() - - blob := make([]byte, 1024) - if _, err := rand.Read(blob); err != nil { - t.Fatalf("generate blob: %v", err) - } - - record := &kgo.Record{ - Topic: topic, - Key: []byte("test-key"), - Value: blob, - Headers: []kgo.RecordHeader{ - {Key: "LFS_BLOB", Value: nil}, - }, - } - res := producer.ProduceSync(ctx, record) - if err := res.FirstErr(); err != nil { - t.Fatalf("produce: %v\nproxy logs:\n%s\nbroker logs:\n%s", err, proxyLogs.String(), brokerLogs.String()) - } - - deadline := time.Now().Add(15 * time.Second) - for { - if time.Now().After(deadline) { - t.Fatalf("timed out waiting for broker record\nproxy logs:\n%s\nbroker logs:\n%s", proxyLogs.String(), brokerLogs.String()) - } - fetches := consumer.PollFetches(ctx) - if errs := fetches.Errors(); len(errs) > 0 { - t.Fatalf("fetch errors: %+v\nproxy logs:\n%s\nbroker logs:\n%s", errs, proxyLogs.String(), brokerLogs.String()) - } - var got []byte - fetches.EachRecord(func(r *kgo.Record) { - if r.Topic != topic || got != nil { - return - } - got = append([]byte(nil), r.Value...) - }) - if got == nil { - time.Sleep(200 * time.Millisecond) - continue - } - if !lfs.IsLfsEnvelope(got) { - t.Fatalf("expected LFS envelope, got: %s", string(got)) - } - var env lfs.Envelope - if err := json.Unmarshal(got, &env); err != nil { - t.Fatalf("decode envelope: %v", err) - } - expectedHash := sha256.Sum256(blob) - expectedChecksum := hex.EncodeToString(expectedHash[:]) - if env.SHA256 != expectedChecksum { - t.Fatalf("SHA256 = %s, want %s", env.SHA256, expectedChecksum) - } - s3Key := env.Key - s3Server.mu.Lock() - storedBlob, ok := s3Server.objects["lfs-e2e-broker/"+s3Key] - s3Server.mu.Unlock() - if !ok { - t.Fatalf("blob not found in S3 at key: %s", s3Key) - } - if !bytes.Equal(storedBlob, blob) { - t.Fatalf("stored blob does not match original") - } - consumer.CloseAllowingRebalance() - consumerClosed = true - return - } -} - -func splitHostPort(t *testing.T, addr string) (string, int32) { - t.Helper() - host, portStr, err := net.SplitHostPort(addr) - if err != nil { - t.Fatalf("split addr %s: %v", addr, err) - } - port, err := strconv.Atoi(portStr) - if err != nil { - t.Fatalf("parse port %s: %v", portStr, err) - } - return host, int32(port) -} diff --git a/test/e2e/lfs_proxy_etcd_test.go b/test/e2e/lfs_proxy_etcd_test.go deleted file mode 100644 index e383294e..00000000 --- a/test/e2e/lfs_proxy_etcd_test.go +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build e2e - -package e2e - -import ( - "context" - "errors" - "testing" - "time" - - "github.com/KafScale/platform/pkg/metadata" - "github.com/KafScale/platform/pkg/protocol" -) - -func startLfsProxyEtcd(t *testing.T, brokerHost string, brokerPort int32, topics ...string) []string { - t.Helper() - etcd, endpoints := startEmbeddedEtcd(t) - t.Cleanup(func() { - etcd.Close() - }) - - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - t.Cleanup(cancel) - - store, err := metadata.NewEtcdStore(ctx, metadata.ClusterMetadata{ - Brokers: []protocol.MetadataBroker{{ - NodeID: 0, - Host: brokerHost, - Port: brokerPort, - }}, - }, metadata.EtcdStoreConfig{Endpoints: endpoints}) - if err != nil { - t.Fatalf("create etcd store: %v", err) - } - - for _, topic := range topics { - if topic == "" { - continue - } - if _, err := store.CreateTopic(ctx, metadata.TopicSpec{ - Name: topic, - NumPartitions: 1, - ReplicationFactor: 1, - }); err != nil && !errors.Is(err, metadata.ErrTopicExists) { - t.Fatalf("create topic %s: %v", topic, err) - } - } - - return endpoints -} diff --git a/test/e2e/lfs_proxy_http_test.go b/test/e2e/lfs_proxy_http_test.go deleted file mode 100644 index d06ca060..00000000 --- a/test/e2e/lfs_proxy_http_test.go +++ /dev/null @@ -1,641 +0,0 @@ -// Copyright 2025 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build e2e - -package e2e - -import ( - "bytes" - "context" - "crypto/sha256" - "encoding/base64" - "encoding/binary" - "encoding/hex" - "encoding/json" - "fmt" - "io" - "net" - "net/http" - "net/http/httptest" - "os" - "os/exec" - "path/filepath" - "strings" - "sync" - "testing" - "time" - - "github.com/KafScale/platform/pkg/lfs" - "github.com/KafScale/platform/pkg/protocol" - "github.com/twmb/franz-go/pkg/kmsg" -) - -func TestLfsProxyHTTPProduce(t *testing.T) { - const enableEnv = "KAFSCALE_E2E" - if os.Getenv(enableEnv) != "1" { - t.Skipf("set %s=1 to run integration harness", enableEnv) - } - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - t.Cleanup(cancel) - - s3Server := newFakeS3Server(t) - t.Cleanup(s3Server.Close) - - brokerAddr, received, closeBackend := startFakeKafkaBackend(t) - // Start embedded etcd and seed topics for metadata responses - etcdEndpoints := startLfsProxyEtcd(t, "127.0.0.1", 9092, "http-limited") - t.Cleanup(closeBackend) - - proxyPort := pickFreePort(t) - httpPort := pickFreePort(t) - healthPort := pickFreePort(t) - proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) - configureProcessGroup(proxyCmd) - proxyCmd.Env = append(os.Environ(), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), - "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_HTTP_ADDR=127.0.0.1:%s", httpPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(etcdEndpoints, ",")), - "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-e2e", - "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), - "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", - "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", - ) - var proxyLogs bytes.Buffer - proxyWriterTargets := []io.Writer{&proxyLogs, mustLogFile(t, "lfs-proxy-http.log")} - proxyCmd.Stdout = io.MultiWriter(proxyWriterTargets...) - proxyCmd.Stderr = proxyCmd.Stdout - if err := proxyCmd.Start(); err != nil { - t.Fatalf("start lfs-proxy: %v", err) - } - t.Cleanup(func() { - _ = signalProcessGroup(proxyCmd, os.Interrupt) - done := make(chan struct{}) - go func() { - _ = proxyCmd.Wait() - close(done) - }() - select { - case <-done: - case <-time.After(2 * time.Second): - _ = signalProcessGroup(proxyCmd, os.Kill) - } - }) - waitForPortWithTimeout(t, "127.0.0.1:"+httpPort, 15*time.Second) - - payload := []byte("hello-lfs-stream") - checksum := sha256.Sum256(payload) - checksumHex := hex.EncodeToString(checksum[:]) - - req, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("http://127.0.0.1:%s/lfs/produce", httpPort), bytes.NewReader(payload)) - if err != nil { - t.Fatalf("build request: %v", err) - } - req.Header.Set("X-Kafka-Topic", "http-limited") - req.Header.Set("X-Kafka-Key", base64.StdEncoding.EncodeToString([]byte("key-1"))) - req.Header.Set("X-LFS-Checksum", checksumHex) - req.Header.Set("Content-Type", "application/octet-stream") - - resp, err := http.DefaultClient.Do(req) - if err != nil { - t.Fatalf("http produce failed: %v", err) - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - body, _ := io.ReadAll(resp.Body) - t.Fatalf("unexpected status %d: %s", resp.StatusCode, string(body)) - } - - var env lfs.Envelope - if err := json.NewDecoder(resp.Body).Decode(&env); err != nil { - t.Fatalf("decode response: %v", err) - } - if env.SHA256 != checksumHex { - t.Fatalf("checksum mismatch: %s", env.SHA256) - } - - deadline := time.After(10 * time.Second) - for { - select { - case value := <-received: - var got lfs.Envelope - if err := json.Unmarshal(value, &got); err != nil { - t.Fatalf("expected envelope json: %v", err) - } - if got.Key == "" || got.Bucket == "" { - t.Fatalf("unexpected envelope: %+v", got) - } - return - case <-deadline: - t.Fatalf("timed out waiting for backend record") - } - } -} - -func TestLfsProxyHTTPProduceRestart(t *testing.T) { - const enableEnv = "KAFSCALE_E2E" - if os.Getenv(enableEnv) != "1" { - t.Skipf("set %s=1 to run integration harness", enableEnv) - } - - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) - t.Cleanup(cancel) - - s3Server := newFakeS3Server(t) - t.Cleanup(s3Server.Close) - - brokerAddr, received, closeBackend := startFakeKafkaBackend(t) - etcdEndpoints := startLfsProxyEtcd(t, "127.0.0.1", 9092, "http-restart") - t.Cleanup(closeBackend) - - proxyPort := pickFreePort(t) - httpPort := pickFreePort(t) - healthPort := pickFreePort(t) - - startProxy := func() (*exec.Cmd, *bytes.Buffer) { - proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) - configureProcessGroup(proxyCmd) - proxyCmd.Env = append(os.Environ(), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), - "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_HTTP_ADDR=127.0.0.1:%s", httpPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(etcdEndpoints, ",")), - "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-e2e", - "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), - "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", - "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", - ) - var proxyLogs bytes.Buffer - proxyCmd.Stdout = io.MultiWriter(&proxyLogs, mustLogFile(t, "lfs-proxy-http-restart.log")) - proxyCmd.Stderr = proxyCmd.Stdout - if err := proxyCmd.Start(); err != nil { - t.Fatalf("start lfs-proxy: %v", err) - } - return proxyCmd, &proxyLogs - } - - proxyCmd, _ := startProxy() - defer func() { - _ = signalProcessGroup(proxyCmd, os.Interrupt) - _ = proxyCmd.Wait() - }() - waitForPortWithTimeout(t, "127.0.0.1:"+httpPort, 15*time.Second) - - slowPayload := bytes.Repeat([]byte("a"), 1024*1024) - req, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("http://127.0.0.1:%s/lfs/produce", httpPort), newSlowReader(slowPayload, 32*1024, 10*time.Millisecond)) - if err != nil { - t.Fatalf("build request: %v", err) - } - req.Header.Set("X-Kafka-Topic", "http-restart") - req.Header.Set("Content-Type", "application/octet-stream") - - clientErr := make(chan error, 1) - go func() { - resp, err := http.DefaultClient.Do(req) - if err == nil && resp != nil { - resp.Body.Close() - if resp.StatusCode >= 200 && resp.StatusCode < 300 { - clientErr <- nil - return - } - err = fmt.Errorf("status %d", resp.StatusCode) - } - clientErr <- err - }() - - time.Sleep(50 * time.Millisecond) - _ = signalProcessGroup(proxyCmd, os.Interrupt) - _ = proxyCmd.Wait() - - <-clientErr - - proxyCmd, _ = startProxy() - defer func() { - _ = signalProcessGroup(proxyCmd, os.Interrupt) - _ = proxyCmd.Wait() - }() - waitForPortWithTimeout(t, "127.0.0.1:"+httpPort, 15*time.Second) - - payload := []byte("restart-ok") - req2, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("http://127.0.0.1:%s/lfs/produce", httpPort), bytes.NewReader(payload)) - if err != nil { - t.Fatalf("build request: %v", err) - } - req2.Header.Set("X-Kafka-Topic", "http-restart") - req2.Header.Set("Content-Type", "application/octet-stream") - - resp, err := http.DefaultClient.Do(req2) - if err != nil { - t.Fatalf("http produce failed: %v", err) - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - body, _ := io.ReadAll(resp.Body) - t.Fatalf("unexpected status %d: %s", resp.StatusCode, string(body)) - } - - deadline := time.After(10 * time.Second) - for { - select { - case value := <-received: - var got lfs.Envelope - if err := json.Unmarshal(value, &got); err != nil { - t.Fatalf("expected envelope json: %v", err) - } - if got.Key == "" || got.Bucket == "" { - t.Fatalf("unexpected envelope: %+v", got) - } - return - case <-deadline: - t.Fatalf("timed out waiting for backend record") - } - } -} - -func TestLfsProxyHTTPBackendUnavailable(t *testing.T) { - const enableEnv = "KAFSCALE_E2E" - if os.Getenv(enableEnv) != "1" { - t.Skipf("set %s=1 to run integration harness", enableEnv) - } - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - t.Cleanup(cancel) - - s3Server := newFakeS3Server(t) - t.Cleanup(s3Server.Close) - - brokerAddr, _, closeBackend := startFakeKafkaBackend(t) - etcdEndpoints := startLfsProxyEtcd(t, "127.0.0.1", 9092, "http-backend-down") - t.Cleanup(closeBackend) - - proxyPort := pickFreePort(t) - httpPort := pickFreePort(t) - healthPort := pickFreePort(t) - proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) - configureProcessGroup(proxyCmd) - proxyCmd.Env = append(os.Environ(), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), - "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_HTTP_ADDR=127.0.0.1:%s", httpPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(etcdEndpoints, ",")), - "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-e2e", - "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), - "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", - "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", - ) - proxyCmd.Stdout = io.MultiWriter(mustLogFile(t, "lfs-proxy-http-backend-down.log")) - proxyCmd.Stderr = proxyCmd.Stdout - if err := proxyCmd.Start(); err != nil { - t.Fatalf("start lfs-proxy: %v", err) - } - t.Cleanup(func() { - _ = signalProcessGroup(proxyCmd, os.Interrupt) - _ = proxyCmd.Wait() - }) - waitForPortWithTimeout(t, "127.0.0.1:"+httpPort, 15*time.Second) - - closeBackend() - - req, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("http://127.0.0.1:%s/lfs/produce", httpPort), bytes.NewReader([]byte("payload"))) - if err != nil { - t.Fatalf("build request: %v", err) - } - req.Header.Set("X-Kafka-Topic", "http-backend-down") - req.Header.Set("Content-Type", "application/octet-stream") - - resp, err := http.DefaultClient.Do(req) - if err != nil { - t.Fatalf("http produce failed: %v", err) - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusServiceUnavailable && resp.StatusCode != http.StatusBadGateway { - body, _ := io.ReadAll(resp.Body) - t.Fatalf("unexpected status %d: %s", resp.StatusCode, string(body)) - } - var body httpErrorResponse - if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { - t.Fatalf("decode error response: %v", err) - } - if body.Code == "" { - t.Fatalf("expected error code in response") - } -} - -func newSlowReader(payload []byte, chunk int, delay time.Duration) io.Reader { - return &slowReader{payload: payload, chunk: chunk, delay: delay} -} - -type slowReader struct { - payload []byte - chunk int - delay time.Duration - idx int -} - -func (r *slowReader) Read(p []byte) (int, error) { - if r.idx >= len(r.payload) { - return 0, io.EOF - } - if r.delay > 0 { - time.Sleep(r.delay) - } - end := r.idx + r.chunk - if end > len(r.payload) { - end = len(r.payload) - } - n := copy(p, r.payload[r.idx:end]) - r.idx += n - return n, nil -} - -type fakeS3Server struct { - *httptest.Server - mu sync.Mutex - buckets map[string]struct{} - uploads map[string]*multipartUpload - objects map[string][]byte - counter int64 -} - -type multipartUpload struct { - bucket string - key string - data []byte -} - -func newFakeS3Server(t *testing.T) *fakeS3Server { - t.Helper() - fs := &fakeS3Server{ - buckets: make(map[string]struct{}), - uploads: make(map[string]*multipartUpload), - objects: make(map[string][]byte), - } - handler := http.NewServeMux() - handler.HandleFunc("/", fs.serve) - fs.Server = httptest.NewServer(handler) - return fs -} - -func (f *fakeS3Server) serve(w http.ResponseWriter, r *http.Request) { - bucket, key := splitBucketKey(r.URL.Path) - switch r.Method { - case http.MethodHead: - f.headBucket(w, bucket) - return - case http.MethodPut: - if r.URL.Query().Get("partNumber") != "" && r.URL.Query().Get("uploadId") != "" { - f.uploadPart(w, r, bucket, key) - return - } - if key == "" { - f.putBucket(w, bucket) - return - } - f.putObject(w, r, bucket, key) - return - case http.MethodPost: - if _, ok := r.URL.Query()["uploads"]; ok { - f.createMultipart(w, bucket, key) - return - } - if r.URL.Query().Get("uploadId") != "" { - f.completeMultipart(w, r.URL.Query().Get("uploadId")) - return - } - } - http.Error(w, "not implemented", http.StatusNotImplemented) -} - -func (f *fakeS3Server) headBucket(w http.ResponseWriter, bucket string) { - f.mu.Lock() - defer f.mu.Unlock() - if _, ok := f.buckets[bucket]; !ok { - w.WriteHeader(http.StatusNotFound) - return - } - w.WriteHeader(http.StatusOK) -} - -func (f *fakeS3Server) putBucket(w http.ResponseWriter, bucket string) { - f.mu.Lock() - f.buckets[bucket] = struct{}{} - f.mu.Unlock() - w.WriteHeader(http.StatusOK) -} - -func (f *fakeS3Server) putObject(w http.ResponseWriter, r *http.Request, bucket, key string) { - body, _ := io.ReadAll(r.Body) - f.mu.Lock() - f.objects[bucket+"/"+key] = body - f.buckets[bucket] = struct{}{} - f.mu.Unlock() - w.Header().Set("ETag", "\"fake\"") - w.WriteHeader(http.StatusOK) -} - -func (f *fakeS3Server) createMultipart(w http.ResponseWriter, bucket, key string) { - f.mu.Lock() - f.counter++ - uploadID := fmt.Sprintf("upload-%d", f.counter) - f.uploads[uploadID] = &multipartUpload{bucket: bucket, key: key} - f.buckets[bucket] = struct{}{} - f.mu.Unlock() - w.Header().Set("Content-Type", "application/xml") - fmt.Fprintf(w, "%s", uploadID) -} - -func (f *fakeS3Server) uploadPart(w http.ResponseWriter, r *http.Request, bucket, key string) { - uploadID := r.URL.Query().Get("uploadId") - body, _ := io.ReadAll(r.Body) - f.mu.Lock() - upload := f.uploads[uploadID] - if upload != nil { - upload.data = append(upload.data, body...) - } - f.mu.Unlock() - w.Header().Set("ETag", "\"part\"") - w.WriteHeader(http.StatusOK) -} - -func (f *fakeS3Server) completeMultipart(w http.ResponseWriter, uploadID string) { - f.mu.Lock() - upload := f.uploads[uploadID] - if upload != nil { - f.objects[upload.bucket+"/"+upload.key] = upload.data - delete(f.uploads, uploadID) - } - f.mu.Unlock() - w.Header().Set("Content-Type", "application/xml") - fmt.Fprintf(w, "\"fake\"") -} - -func splitBucketKey(path string) (string, string) { - trimmed := strings.TrimPrefix(path, "/") - if trimmed == "" { - return "", "" - } - parts := strings.SplitN(trimmed, "/", 2) - bucket := parts[0] - if len(parts) == 1 { - return bucket, "" - } - return bucket, parts[1] -} - -func waitForPortWithTimeout(t *testing.T, addr string, timeout time.Duration) { - t.Helper() - deadline := time.After(timeout) - for { - conn, err := net.DialTimeout("tcp", addr, 200*time.Millisecond) - if err == nil { - _ = conn.Close() - return - } - select { - case <-deadline: - t.Fatalf("broker did not start listening on %s: %v", addr, err) - case <-time.After(100 * time.Millisecond): - } - } -} - -func startFakeKafkaBackend(t *testing.T) (string, <-chan []byte, func()) { - t.Helper() - ln, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatalf("listen: %v", err) - } - addr := ln.Addr().String() - received := make(chan []byte, 1) - done := make(chan struct{}) - go func() { - defer close(done) - for { - conn, err := ln.Accept() - if err != nil { - return - } - go handleKafkaConn(t, conn, received) - } - }() - return addr, received, func() { - _ = ln.Close() - <-done - } -} - -func handleKafkaConn(t *testing.T, conn net.Conn, received chan<- []byte) { - t.Helper() - defer conn.Close() - frame, err := protocol.ReadFrame(conn) - if err != nil { - return - } - header, req, err := protocol.ParseRequest(frame.Payload) - if err != nil { - return - } - prodReq, ok := req.(*protocol.ProduceRequest) - if !ok { - return - } - if len(prodReq.Topics) > 0 && len(prodReq.Topics[0].Partitions) > 0 { - records := prodReq.Topics[0].Partitions[0].Records - value := extractFirstRecordValue(records) - if len(value) > 0 { - select { - case received <- value: - default: - } - } - } - respPayload, _ := buildProduceResponse(prodReq, header.CorrelationID, header.APIVersion) - _ = protocol.WriteFrame(conn, respPayload) -} - -func buildProduceResponse(req *protocol.ProduceRequest, correlationID int32, version int16) ([]byte, error) { - resp := &kmsg.ProduceResponse{} - for _, topic := range req.Topics { - rt := kmsg.NewProduceResponseTopic() - rt.Topic = topic.Topic - for _, part := range topic.Partitions { - rp := kmsg.NewProduceResponseTopicPartition() - rp.Partition = part.Partition - rp.ErrorCode = protocol.NONE - rt.Partitions = append(rt.Partitions, rp) - } - resp.Topics = append(resp.Topics, rt) - } - return protocol.EncodeResponse(correlationID, version, resp), nil -} - -func extractFirstRecordValue(records []byte) []byte { - if len(records) == 0 { - return nil - } - var batch kmsg.RecordBatch - if err := batch.ReadFrom(records); err != nil { - return nil - } - raw := batch.Records - recordsOut := make([]kmsg.Record, int(batch.NumRecords)) - recordsOut = readRawRecordsInto(recordsOut, raw) - if len(recordsOut) == 0 { - return nil - } - return recordsOut[0].Value -} - -func readRawRecordsInto(rs []kmsg.Record, in []byte) []kmsg.Record { - for i := range rs { - length, used := binary.Varint(in) - total := used + int(length) - if used == 0 || length < 0 || len(in) < total { - return rs[:i] - } - if err := (&rs[i]).ReadFrom(in[:total]); err != nil { - rs[i] = kmsg.Record{} - return rs[:i] - } - in = in[total:] - } - return rs -} - -type httpErrorResponse struct { - Code string `json:"code"` - Message string `json:"message"` - RequestID string `json:"request_id"` -} diff --git a/test/e2e/lfs_proxy_test.go b/test/e2e/lfs_proxy_test.go deleted file mode 100644 index 401bb422..00000000 --- a/test/e2e/lfs_proxy_test.go +++ /dev/null @@ -1,462 +0,0 @@ -// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). -// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build e2e - -package e2e - -import ( - "bytes" - "context" - "crypto/rand" - "crypto/sha256" - "encoding/hex" - "encoding/json" - "fmt" - "io" - "net/http" - "os" - "os/exec" - "path/filepath" - "strings" - "testing" - "time" - - "github.com/KafScale/platform/pkg/lfs" - "github.com/twmb/franz-go/pkg/kgo" -) - -// TestLfsProxyKafkaProtocol tests the LFS proxy with native Kafka protocol. -// Uses franz-go client to produce messages with LFS_BLOB header. -func TestLfsProxyKafkaProtocol(t *testing.T) { - const enableEnv = "KAFSCALE_E2E" - if os.Getenv(enableEnv) != "1" { - t.Skipf("set %s=1 to run integration harness", enableEnv) - } - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - t.Cleanup(cancel) - - // Start fake S3 server - s3Server := newFakeS3Server(t) - t.Cleanup(s3Server.Close) - - // Start fake Kafka backend - brokerAddr, received, closeBackend := startFakeKafkaBackend(t) - // Start embedded etcd and seed topics for metadata responses - etcdEndpoints := startLfsProxyEtcd(t, "127.0.0.1", 9092, "lfs-test-topic", "regular-topic", "checksum-test") - t.Cleanup(closeBackend) - - // Start LFS proxy - proxyPort := pickFreePort(t) - healthPort := pickFreePort(t) - proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) - configureProcessGroup(proxyCmd) - proxyCmd.Env = append(os.Environ(), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), - "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(etcdEndpoints, ",")), - "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-test", - "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), - "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", - "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", - ) - var proxyLogs bytes.Buffer - proxyWriterTargets := []io.Writer{&proxyLogs, mustLogFile(t, "lfs-proxy-kafka.log")} - proxyCmd.Stdout = io.MultiWriter(proxyWriterTargets...) - proxyCmd.Stderr = proxyCmd.Stdout - if err := proxyCmd.Start(); err != nil { - t.Fatalf("start lfs-proxy: %v", err) - } - t.Cleanup(func() { - _ = signalProcessGroup(proxyCmd, os.Interrupt) - done := make(chan struct{}) - go func() { - _ = proxyCmd.Wait() - close(done) - }() - select { - case <-done: - case <-time.After(2 * time.Second): - _ = signalProcessGroup(proxyCmd, os.Kill) - } - }) - waitForPortWithTimeout(t, "127.0.0.1:"+proxyPort, 15*time.Second) - - // Create franz-go client pointing to proxy - client, err := kgo.NewClient( - kgo.SeedBrokers("127.0.0.1:"+proxyPort), - kgo.AllowAutoTopicCreation(), - ) - if err != nil { - t.Fatalf("create client: %v", err) - } - defer client.Close() - - // Generate random blob - blob := make([]byte, 1024) - if _, err := rand.Read(blob); err != nil { - t.Fatalf("generate blob: %v", err) - } - - // Produce with LFS_BLOB header - record := &kgo.Record{ - Topic: "lfs-test-topic", - Key: []byte("test-key"), - Value: blob, - Headers: []kgo.RecordHeader{ - {Key: "LFS_BLOB", Value: nil}, - }, - } - res := client.ProduceSync(ctx, record) - if err := res.FirstErr(); err != nil { - t.Fatalf("produce: %v", err) - } - - // Wait for backend to receive the envelope - deadline := time.After(10 * time.Second) - for { - select { - case value := <-received: - // Should receive an LFS envelope, not the original blob - if !lfs.IsLfsEnvelope(value) { - t.Fatalf("expected LFS envelope, got: %s", string(value)) - } - - var env lfs.Envelope - if err := json.Unmarshal(value, &env); err != nil { - t.Fatalf("decode envelope: %v", err) - } - - // Verify envelope fields - if env.Version != 1 { - t.Errorf("Version = %d, want 1", env.Version) - } - if env.Bucket != "lfs-test" { - t.Errorf("Bucket = %s, want lfs-test", env.Bucket) - } - if env.Size != int64(len(blob)) { - t.Errorf("Size = %d, want %d", env.Size, len(blob)) - } - - // Verify checksum matches - expectedHash := sha256.Sum256(blob) - expectedChecksum := hex.EncodeToString(expectedHash[:]) - if env.SHA256 != expectedChecksum { - t.Errorf("SHA256 = %s, want %s", env.SHA256, expectedChecksum) - } - - // Verify blob was stored in S3 - s3Key := env.Key - s3Server.mu.Lock() - storedBlob, ok := s3Server.objects["lfs-test/"+s3Key] - s3Server.mu.Unlock() - if !ok { - t.Errorf("blob not found in S3 at key: %s", s3Key) - } else if !bytes.Equal(storedBlob, blob) { - t.Errorf("stored blob does not match original") - } - - return - case <-deadline: - t.Fatalf("timed out waiting for backend record") - } - } -} - -// TestLfsProxyPassthrough tests that non-LFS messages pass through unchanged. -func TestLfsProxyPassthrough(t *testing.T) { - const enableEnv = "KAFSCALE_E2E" - if os.Getenv(enableEnv) != "1" { - t.Skipf("set %s=1 to run integration harness", enableEnv) - } - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - t.Cleanup(cancel) - - // Start fake S3 server - s3Server := newFakeS3Server(t) - t.Cleanup(s3Server.Close) - - // Start fake Kafka backend - brokerAddr, received, closeBackend := startFakeKafkaBackend(t) - // Start embedded etcd and seed topics for metadata responses - etcdEndpoints := startLfsProxyEtcd(t, "127.0.0.1", 9092, "regular-topic") - t.Cleanup(closeBackend) - - // Start LFS proxy - proxyPort := pickFreePort(t) - healthPort := pickFreePort(t) - proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) - configureProcessGroup(proxyCmd) - proxyCmd.Env = append(os.Environ(), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), - "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(etcdEndpoints, ",")), - "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-test", - "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), - "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", - "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", - ) - var proxyLogs bytes.Buffer - proxyWriterTargets := []io.Writer{&proxyLogs, mustLogFile(t, "lfs-proxy-passthrough.log")} - proxyCmd.Stdout = io.MultiWriter(proxyWriterTargets...) - proxyCmd.Stderr = proxyCmd.Stdout - if err := proxyCmd.Start(); err != nil { - t.Fatalf("start lfs-proxy: %v", err) - } - t.Cleanup(func() { - _ = signalProcessGroup(proxyCmd, os.Interrupt) - done := make(chan struct{}) - go func() { - _ = proxyCmd.Wait() - close(done) - }() - select { - case <-done: - case <-time.After(2 * time.Second): - _ = signalProcessGroup(proxyCmd, os.Kill) - } - }) - waitForPortWithTimeout(t, "127.0.0.1:"+proxyPort, 15*time.Second) - - // Create franz-go client - client, err := kgo.NewClient( - kgo.SeedBrokers("127.0.0.1:"+proxyPort), - kgo.AllowAutoTopicCreation(), - ) - if err != nil { - t.Fatalf("create client: %v", err) - } - defer client.Close() - - // Produce without LFS_BLOB header (regular message) - plainValue := []byte("regular message without LFS") - record := &kgo.Record{ - Topic: "regular-topic", - Key: []byte("key"), - Value: plainValue, - } - res := client.ProduceSync(ctx, record) - if err := res.FirstErr(); err != nil { - t.Fatalf("produce: %v", err) - } - - // Wait for backend to receive the message - deadline := time.After(10 * time.Second) - for { - select { - case value := <-received: - // Should receive the original message unchanged - if lfs.IsLfsEnvelope(value) { - t.Fatalf("expected plain message, got LFS envelope") - } - if !bytes.Equal(value, plainValue) { - t.Errorf("value = %q, want %q", value, plainValue) - } - return - case <-deadline: - t.Fatalf("timed out waiting for backend record") - } - } -} - -// TestLfsProxyChecksumValidation tests that checksum validation works. -func TestLfsProxyChecksumValidation(t *testing.T) { - const enableEnv = "KAFSCALE_E2E" - if os.Getenv(enableEnv) != "1" { - t.Skipf("set %s=1 to run integration harness", enableEnv) - } - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - t.Cleanup(cancel) - - // Start fake S3 server - s3Server := newFakeS3Server(t) - t.Cleanup(s3Server.Close) - - // Start fake Kafka backend - brokerAddr, _, closeBackend := startFakeKafkaBackend(t) - // Start embedded etcd and seed topics for metadata responses - etcdEndpoints := startLfsProxyEtcd(t, "127.0.0.1", 9092, "checksum-test") - t.Cleanup(closeBackend) - - // Start LFS proxy - proxyPort := pickFreePort(t) - healthPort := pickFreePort(t) - proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) - configureProcessGroup(proxyCmd) - proxyCmd.Env = append(os.Environ(), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), - "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(etcdEndpoints, ",")), - "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-test", - "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), - "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", - "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", - ) - var proxyLogs bytes.Buffer - proxyWriterTargets := []io.Writer{&proxyLogs, mustLogFile(t, "lfs-proxy-checksum.log")} - proxyCmd.Stdout = io.MultiWriter(proxyWriterTargets...) - proxyCmd.Stderr = proxyCmd.Stdout - if err := proxyCmd.Start(); err != nil { - t.Fatalf("start lfs-proxy: %v", err) - } - t.Cleanup(func() { - _ = signalProcessGroup(proxyCmd, os.Interrupt) - done := make(chan struct{}) - go func() { - _ = proxyCmd.Wait() - close(done) - }() - select { - case <-done: - case <-time.After(2 * time.Second): - _ = signalProcessGroup(proxyCmd, os.Kill) - } - }) - waitForPortWithTimeout(t, "127.0.0.1:"+proxyPort, 15*time.Second) - - // Create franz-go client - client, err := kgo.NewClient( - kgo.SeedBrokers("127.0.0.1:"+proxyPort), - kgo.AllowAutoTopicCreation(), - ) - if err != nil { - t.Fatalf("create client: %v", err) - } - defer client.Close() - - // Produce with wrong checksum in LFS_BLOB header - blob := []byte("test blob data") - wrongChecksum := "0000000000000000000000000000000000000000000000000000000000000000" - record := &kgo.Record{ - Topic: "checksum-test", - Key: []byte("key"), - Value: blob, - Headers: []kgo.RecordHeader{ - {Key: "LFS_BLOB", Value: []byte(wrongChecksum)}, - }, - } - res := client.ProduceSync(ctx, record) - err = res.FirstErr() - - // Should fail with checksum error - if err == nil { - t.Fatalf("expected checksum error, got nil") - } - t.Logf("got expected error: %v", err) -} - -// TestLfsProxyHealthEndpoint tests the health endpoints. -func TestLfsProxyHealthEndpoint(t *testing.T) { - const enableEnv = "KAFSCALE_E2E" - if os.Getenv(enableEnv) != "1" { - t.Skipf("set %s=1 to run integration harness", enableEnv) - } - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - t.Cleanup(cancel) - - // Start fake S3 server - s3Server := newFakeS3Server(t) - t.Cleanup(s3Server.Close) - - // Start fake Kafka backend - brokerAddr, _, closeBackend := startFakeKafkaBackend(t) - // Start embedded etcd and seed topics for metadata responses - etcdEndpoints := startLfsProxyEtcd(t, "127.0.0.1", 9092, "health-check") - t.Cleanup(closeBackend) - - // Start LFS proxy - proxyPort := pickFreePort(t) - healthPort := pickFreePort(t) - proxyCmd := exec.CommandContext(ctx, "go", "run", filepath.Join(repoRoot(t), "cmd", "lfs-proxy")) - configureProcessGroup(proxyCmd) - proxyCmd.Env = append(os.Environ(), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADDR=127.0.0.1:%s", proxyPort), - "KAFSCALE_LFS_PROXY_ADVERTISED_HOST=127.0.0.1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_ADVERTISED_PORT=%s", proxyPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_HEALTH_ADDR=127.0.0.1:%s", healthPort), - fmt.Sprintf("KAFSCALE_LFS_PROXY_BACKENDS=%s", brokerAddr), - fmt.Sprintf("KAFSCALE_LFS_PROXY_ETCD_ENDPOINTS=%s", strings.Join(etcdEndpoints, ",")), - "KAFSCALE_LFS_PROXY_S3_BUCKET=lfs-test", - "KAFSCALE_LFS_PROXY_S3_REGION=us-east-1", - fmt.Sprintf("KAFSCALE_LFS_PROXY_S3_ENDPOINT=%s", s3Server.URL), - "KAFSCALE_LFS_PROXY_S3_ACCESS_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_SECRET_KEY=fake", - "KAFSCALE_LFS_PROXY_S3_FORCE_PATH_STYLE=true", - "KAFSCALE_LFS_PROXY_S3_ENSURE_BUCKET=true", - ) - var proxyLogs bytes.Buffer - proxyWriterTargets := []io.Writer{&proxyLogs, mustLogFile(t, "lfs-proxy-health.log")} - proxyCmd.Stdout = io.MultiWriter(proxyWriterTargets...) - proxyCmd.Stderr = proxyCmd.Stdout - if err := proxyCmd.Start(); err != nil { - t.Fatalf("start lfs-proxy: %v", err) - } - t.Cleanup(func() { - _ = signalProcessGroup(proxyCmd, os.Interrupt) - done := make(chan struct{}) - go func() { - _ = proxyCmd.Wait() - close(done) - }() - select { - case <-done: - case <-time.After(2 * time.Second): - _ = signalProcessGroup(proxyCmd, os.Kill) - } - }) - waitForPortWithTimeout(t, "127.0.0.1:"+healthPort, 15*time.Second) - - // Test /livez endpoint - resp, err := http.Get(fmt.Sprintf("http://127.0.0.1:%s/livez", healthPort)) - if err != nil { - t.Fatalf("livez request failed: %v", err) - } - resp.Body.Close() - if resp.StatusCode != http.StatusOK { - t.Errorf("/livez status = %d, want 200", resp.StatusCode) - } - - // Test /readyz endpoint - resp, err = http.Get(fmt.Sprintf("http://127.0.0.1:%s/readyz", healthPort)) - if err != nil { - t.Fatalf("readyz request failed: %v", err) - } - resp.Body.Close() - if resp.StatusCode != http.StatusOK { - t.Errorf("/readyz status = %d, want 200", resp.StatusCode) - } -} From c688877f94a6b369c5a3c8638962dda69f3386e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirko=20K=C3=A4mpf?= Date: Mon, 9 Mar 2026 17:56:57 +0100 Subject: [PATCH 5/6] fix: resolve CodeQL notices in Python LFS SDK - Remove unused 'Any' import in envelope.py - Add explanatory comment to empty except clause in producer.py Co-Authored-By: Claude Opus 4.6 --- lfs-client-sdk/python/lfs_sdk/envelope.py | 2 +- lfs-client-sdk/python/lfs_sdk/producer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lfs-client-sdk/python/lfs_sdk/envelope.py b/lfs-client-sdk/python/lfs_sdk/envelope.py index e6271007..979a647a 100644 --- a/lfs-client-sdk/python/lfs_sdk/envelope.py +++ b/lfs-client-sdk/python/lfs_sdk/envelope.py @@ -2,7 +2,7 @@ from dataclasses import dataclass import json -from typing import Any, Dict +from typing import Dict @dataclass diff --git a/lfs-client-sdk/python/lfs_sdk/producer.py b/lfs-client-sdk/python/lfs_sdk/producer.py index 782e1e43..2f868d18 100644 --- a/lfs-client-sdk/python/lfs_sdk/producer.py +++ b/lfs-client-sdk/python/lfs_sdk/producer.py @@ -110,7 +110,7 @@ def _send_with_retry(self, data: bytes, headers: Dict[str, str]) -> dict: message = err.get("message", body) request_id = err.get("request_id", request_id) except Exception: - pass + pass # response body is not JSON; use raw text as message http_error = LfsHttpException( status_code=resp.status_code, From 623f09f5182e801e5dcfd2ce1a778c04bf159ea8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirko=20K=C3=A4mpf?= Date: Mon, 9 Mar 2026 19:31:32 +0100 Subject: [PATCH 6/6] test: add comprehensive LFS unit tests for unified proxy Add 123 new tests covering all LFS code paths in the unified proxy: - lfs_http_test.go (70 tests): HTTP handlers, validation, CORS, multipart sessions - lfs_test.go (53 tests): record encoding, compression, headers, metrics, histogram All 171 cmd/proxy tests pass (including pre-existing proxy tests). Co-Authored-By: Claude Opus 4.6 --- cmd/proxy/lfs_http_test.go | 1349 ++++++++++++++++++++++++++++++++++++ cmd/proxy/lfs_test.go | 884 +++++++++++++++++++++++ 2 files changed, 2233 insertions(+) create mode 100644 cmd/proxy/lfs_http_test.go diff --git a/cmd/proxy/lfs_http_test.go b/cmd/proxy/lfs_http_test.go new file mode 100644 index 00000000..20ba891e --- /dev/null +++ b/cmd/proxy/lfs_http_test.go @@ -0,0 +1,1349 @@ +// Copyright 2025-2026 Alexander Alten (novatechflow), NovaTechflow (novatechflow.com). +// This project is supported and financed by Scalytics, Inc. (www.scalytics.io). +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "encoding/json" + "errors" + "io" + "log/slog" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + "time" +) + +// testHTTPModule builds a fully-configured lfsModule for HTTP handler testing. +// It reuses the fakeS3/fakePresign types from lfs_test.go and populates the +// fields required by the HTTP API handlers. +func testHTTPModule(t *testing.T) *lfsModule { + t.Helper() + fs3 := newFakeS3() + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + m := &lfsModule{ + logger: logger, + s3Uploader: &s3Uploader{bucket: "test-bucket", region: "us-east-1", chunkSize: 5 << 20, api: fs3, presign: &fakePresign{}}, + s3Bucket: "test-bucket", + s3Namespace: "test-ns", + maxBlob: 5 << 30, + chunkSize: 5 << 20, + checksumAlg: "sha256", + proxyID: "test-proxy", + metrics: newLfsMetrics(), + tracker: &LfsOpsTracker{config: TrackerConfig{}, logger: logger}, + httpAPIKey: "test-secret-key", + topicMaxLength: 249, + downloadTTLMax: 2 * time.Minute, + uploadSessionTTL: 1 * time.Hour, + uploadSessions: make(map[string]*uploadSession), + } + atomic.StoreUint32(&m.s3Healthy, 1) + return m +} + +// --------------------------------------------------------------------------- +// 1. lfsValidateHTTPAPIKey +// --------------------------------------------------------------------------- + +func TestLfsValidateHTTPAPIKey_XAPIKeyHeader(t *testing.T) { + m := testHTTPModule(t) + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", nil) + req.Header.Set("X-API-Key", "test-secret-key") + if !m.lfsValidateHTTPAPIKey(req) { + t.Fatal("expected valid X-API-Key header to pass") + } +} + +func TestLfsValidateHTTPAPIKey_BearerToken(t *testing.T) { + m := testHTTPModule(t) + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", nil) + req.Header.Set("Authorization", "Bearer test-secret-key") + if !m.lfsValidateHTTPAPIKey(req) { + t.Fatal("expected valid Bearer token to pass") + } +} + +func TestLfsValidateHTTPAPIKey_EmptyKey(t *testing.T) { + m := testHTTPModule(t) + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", nil) + // No auth headers set + if m.lfsValidateHTTPAPIKey(req) { + t.Fatal("expected empty key to fail") + } +} + +func TestLfsValidateHTTPAPIKey_WrongKey(t *testing.T) { + m := testHTTPModule(t) + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", nil) + req.Header.Set("X-API-Key", "wrong-key") + if m.lfsValidateHTTPAPIKey(req) { + t.Fatal("expected wrong key to fail") + } +} + +func TestLfsValidateHTTPAPIKey_NilRequest(t *testing.T) { + m := testHTTPModule(t) + if m.lfsValidateHTTPAPIKey(nil) { + t.Fatal("expected nil request to fail") + } +} + +func TestLfsValidateHTTPAPIKey_BearerCaseInsensitive(t *testing.T) { + m := testHTTPModule(t) + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", nil) + req.Header.Set("Authorization", "BEARER test-secret-key") + if !m.lfsValidateHTTPAPIKey(req) { + t.Fatal("expected case-insensitive Bearer prefix to pass") + } +} + +// --------------------------------------------------------------------------- +// 2. lfsIsValidTopicName +// --------------------------------------------------------------------------- + +func TestLfsIsValidTopicName_Valid(t *testing.T) { + m := testHTTPModule(t) + valid := []string{"my-topic", "topic.name", "topic_name", "Topic123", "a", "A-B.C_D"} + for _, name := range valid { + if !m.lfsIsValidTopicName(name) { + t.Fatalf("expected %q to be valid", name) + } + } +} + +func TestLfsIsValidTopicName_Empty(t *testing.T) { + m := testHTTPModule(t) + if m.lfsIsValidTopicName("") { + t.Fatal("expected empty topic to be invalid") + } +} + +func TestLfsIsValidTopicName_TooLong(t *testing.T) { + m := testHTTPModule(t) + long := strings.Repeat("a", 250) + if m.lfsIsValidTopicName(long) { + t.Fatal("expected 250-char topic to be invalid") + } +} + +func TestLfsIsValidTopicName_ExactMaxLength(t *testing.T) { + m := testHTTPModule(t) + exact := strings.Repeat("a", 249) + if !m.lfsIsValidTopicName(exact) { + t.Fatal("expected 249-char topic to be valid") + } +} + +func TestLfsIsValidTopicName_SpecialChars(t *testing.T) { + m := testHTTPModule(t) + invalid := []string{"topic name", "topic/name", "topic@name", "topic#name", "topic$name", "topic!name"} + for _, name := range invalid { + if m.lfsIsValidTopicName(name) { + t.Fatalf("expected %q to be invalid", name) + } + } +} + +// --------------------------------------------------------------------------- +// 3. lfsValidateObjectKey +// --------------------------------------------------------------------------- + +func TestLfsValidateObjectKey_Valid(t *testing.T) { + m := testHTTPModule(t) + err := m.lfsValidateObjectKey("test-ns/my-topic/lfs/2025/01/01/obj-abc") + if err != nil { + t.Fatalf("expected valid key, got: %v", err) + } +} + +func TestLfsValidateObjectKey_AbsolutePath(t *testing.T) { + m := testHTTPModule(t) + err := m.lfsValidateObjectKey("/test-ns/my-topic/lfs/2025/01/01/obj-abc") + if err == nil { + t.Fatal("expected error for absolute path") + } + if !strings.Contains(err.Error(), "relative") { + t.Fatalf("expected 'relative' in error, got: %v", err) + } +} + +func TestLfsValidateObjectKey_ParentRefs(t *testing.T) { + m := testHTTPModule(t) + err := m.lfsValidateObjectKey("test-ns/../secret/lfs/data") + if err == nil { + t.Fatal("expected error for parent refs") + } + if !strings.Contains(err.Error(), "..") { + t.Fatalf("expected '..' in error, got: %v", err) + } +} + +func TestLfsValidateObjectKey_WrongNamespace(t *testing.T) { + m := testHTTPModule(t) + err := m.lfsValidateObjectKey("wrong-ns/my-topic/lfs/2025/01/01/obj-abc") + if err == nil { + t.Fatal("expected error for wrong namespace") + } + if !strings.Contains(err.Error(), "namespace") { + t.Fatalf("expected 'namespace' in error, got: %v", err) + } +} + +func TestLfsValidateObjectKey_MissingLFSSegment(t *testing.T) { + m := testHTTPModule(t) + err := m.lfsValidateObjectKey("test-ns/my-topic/data/2025/01/01/obj-abc") + if err == nil { + t.Fatal("expected error for missing /lfs/ segment") + } + if !strings.Contains(err.Error(), "/lfs/") { + t.Fatalf("expected '/lfs/' in error, got: %v", err) + } +} + +// --------------------------------------------------------------------------- +// 4. lfsGetClientIP +// --------------------------------------------------------------------------- + +func TestLfsGetClientIP_XForwardedForSingle(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/", nil) + req.Header.Set("X-Forwarded-For", "1.2.3.4") + ip := lfsGetClientIP(req) + if ip != "1.2.3.4" { + t.Fatalf("expected 1.2.3.4, got %s", ip) + } +} + +func TestLfsGetClientIP_XForwardedForMultiple(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/", nil) + req.Header.Set("X-Forwarded-For", "1.2.3.4, 5.6.7.8, 9.10.11.12") + ip := lfsGetClientIP(req) + if ip != "1.2.3.4" { + t.Fatalf("expected 1.2.3.4 (first in chain), got %s", ip) + } +} + +func TestLfsGetClientIP_XRealIP(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/", nil) + req.Header.Set("X-Real-IP", "10.0.0.1") + ip := lfsGetClientIP(req) + if ip != "10.0.0.1" { + t.Fatalf("expected 10.0.0.1, got %s", ip) + } +} + +func TestLfsGetClientIP_RemoteAddrFallback(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/", nil) + req.RemoteAddr = "192.168.1.1:12345" + ip := lfsGetClientIP(req) + if ip != "192.168.1.1" { + t.Fatalf("expected 192.168.1.1, got %s", ip) + } +} + +func TestLfsGetClientIP_RemoteAddrNoPort(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/", nil) + req.RemoteAddr = "192.168.1.1" + ip := lfsGetClientIP(req) + if ip != "192.168.1.1" { + t.Fatalf("expected 192.168.1.1, got %s", ip) + } +} + +func TestLfsGetClientIP_XForwardedForPrecedence(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/", nil) + req.Header.Set("X-Forwarded-For", "1.1.1.1") + req.Header.Set("X-Real-IP", "2.2.2.2") + req.RemoteAddr = "3.3.3.3:999" + ip := lfsGetClientIP(req) + if ip != "1.1.1.1" { + t.Fatalf("expected X-Forwarded-For to take precedence, got %s", ip) + } +} + +// --------------------------------------------------------------------------- +// 5. lfsStatusForUploadError +// --------------------------------------------------------------------------- + +func TestLfsStatusForUploadError_ExceedsMax(t *testing.T) { + status, code := lfsStatusForUploadError(errors.New("upload exceeds max size")) + if status != http.StatusBadRequest || code != "payload_too_large" { + t.Fatalf("expected 400/payload_too_large, got %d/%s", status, code) + } +} + +func TestLfsStatusForUploadError_EmptyUpload(t *testing.T) { + status, code := lfsStatusForUploadError(errors.New("empty upload")) + if status != http.StatusBadRequest || code != "empty_upload" { + t.Fatalf("expected 400/empty_upload, got %d/%s", status, code) + } +} + +func TestLfsStatusForUploadError_S3KeyRequired(t *testing.T) { + status, code := lfsStatusForUploadError(errors.New("s3 key required")) + if status != http.StatusBadRequest || code != "invalid_key" { + t.Fatalf("expected 400/invalid_key, got %d/%s", status, code) + } +} + +func TestLfsStatusForUploadError_ReaderRequired(t *testing.T) { + status, code := lfsStatusForUploadError(errors.New("reader required")) + if status != http.StatusBadRequest || code != "invalid_reader" { + t.Fatalf("expected 400/invalid_reader, got %d/%s", status, code) + } +} + +func TestLfsStatusForUploadError_Default(t *testing.T) { + status, code := lfsStatusForUploadError(errors.New("some unknown s3 error")) + if status != http.StatusBadGateway || code != "s3_upload_failed" { + t.Fatalf("expected 502/s3_upload_failed, got %d/%s", status, code) + } +} + +// --------------------------------------------------------------------------- +// 6. lfsCORSMiddleware +// --------------------------------------------------------------------------- + +func TestLfsCORSMiddleware_Preflight(t *testing.T) { + m := testHTTPModule(t) + innerCalled := false + inner := func(w http.ResponseWriter, r *http.Request) { + innerCalled = true + } + handler := m.lfsCORSMiddleware(inner) + + req := httptest.NewRequest(http.MethodOptions, "/lfs/produce", nil) + rr := httptest.NewRecorder() + handler(rr, req) + + if rr.Code != http.StatusNoContent { + t.Fatalf("expected 204, got %d", rr.Code) + } + if innerCalled { + t.Fatal("inner handler should not be called on OPTIONS preflight") + } + if rr.Header().Get("Access-Control-Allow-Origin") != "*" { + t.Fatal("expected Access-Control-Allow-Origin: *") + } + if rr.Header().Get("Access-Control-Allow-Methods") == "" { + t.Fatal("expected Access-Control-Allow-Methods header") + } + if rr.Header().Get("Access-Control-Allow-Headers") == "" { + t.Fatal("expected Access-Control-Allow-Headers header") + } + if rr.Header().Get("Access-Control-Expose-Headers") != "X-Request-ID" { + t.Fatal("expected Access-Control-Expose-Headers: X-Request-ID") + } +} + +func TestLfsCORSMiddleware_NormalRequest(t *testing.T) { + m := testHTTPModule(t) + innerCalled := false + inner := func(w http.ResponseWriter, r *http.Request) { + innerCalled = true + w.WriteHeader(http.StatusOK) + } + handler := m.lfsCORSMiddleware(inner) + + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", nil) + rr := httptest.NewRecorder() + handler(rr, req) + + if !innerCalled { + t.Fatal("inner handler should be called for non-OPTIONS requests") + } + if rr.Header().Get("Access-Control-Allow-Origin") != "*" { + t.Fatal("expected CORS headers on normal request") + } +} + +// --------------------------------------------------------------------------- +// 7. handleHTTPProduce +// --------------------------------------------------------------------------- + +func TestHandleHTTPProduce_MethodNotAllowed(t *testing.T) { + m := testHTTPModule(t) + // Disable API key check for this test + m.httpAPIKey = "" + + req := httptest.NewRequest(http.MethodGet, "/lfs/produce", nil) + rr := httptest.NewRecorder() + m.handleHTTPProduce(rr, req) + + if rr.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "method_not_allowed" { + t.Fatalf("expected code=method_not_allowed, got %s", errResp.Code) + } +} + +func TestHandleHTTPProduce_Unauthorized(t *testing.T) { + m := testHTTPModule(t) + + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", nil) + // No auth headers + rr := httptest.NewRecorder() + m.handleHTTPProduce(rr, req) + + if rr.Code != http.StatusUnauthorized { + t.Fatalf("expected 401, got %d", rr.Code) + } +} + +func TestHandleHTTPProduce_MissingTopic(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", strings.NewReader("data")) + rr := httptest.NewRecorder() + m.handleHTTPProduce(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "missing_topic" { + t.Fatalf("expected code=missing_topic, got %s", errResp.Code) + } +} + +func TestHandleHTTPProduce_InvalidTopic(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", strings.NewReader("data")) + req.Header.Set("X-Kafka-Topic", "invalid topic!") + rr := httptest.NewRecorder() + m.handleHTTPProduce(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "invalid_topic" { + t.Fatalf("expected code=invalid_topic, got %s", errResp.Code) + } +} + +func TestHandleHTTPProduce_S3Unhealthy(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + atomic.StoreUint32(&m.s3Healthy, 0) + + req := httptest.NewRequest(http.MethodPost, "/lfs/produce", strings.NewReader("data")) + req.Header.Set("X-Kafka-Topic", "test-topic") + rr := httptest.NewRecorder() + m.handleHTTPProduce(rr, req) + + if rr.Code != http.StatusServiceUnavailable { + t.Fatalf("expected 503, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "proxy_not_ready" { + t.Fatalf("expected code=proxy_not_ready, got %s", errResp.Code) + } +} + +func TestHandleHTTPProduce_RequestIDEcho(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + req := httptest.NewRequest(http.MethodGet, "/lfs/produce", nil) + req.Header.Set("X-Request-ID", "my-custom-id") + rr := httptest.NewRecorder() + m.handleHTTPProduce(rr, req) + + if rr.Header().Get("X-Request-ID") != "my-custom-id" { + t.Fatalf("expected X-Request-ID echoed, got %s", rr.Header().Get("X-Request-ID")) + } +} + +// --------------------------------------------------------------------------- +// 8. handleHTTPDownload +// --------------------------------------------------------------------------- + +func TestHandleHTTPDownload_MethodNotAllowed(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + req := httptest.NewRequest(http.MethodGet, "/lfs/download", nil) + rr := httptest.NewRecorder() + m.handleHTTPDownload(rr, req) + + if rr.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", rr.Code) + } +} + +func TestHandleHTTPDownload_InvalidJSON(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + req := httptest.NewRequest(http.MethodPost, "/lfs/download", strings.NewReader("not json")) + rr := httptest.NewRecorder() + m.handleHTTPDownload(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "invalid_request" { + t.Fatalf("expected code=invalid_request, got %s", errResp.Code) + } +} + +func TestHandleHTTPDownload_MissingBucketKey(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + body, _ := json.Marshal(lfsDownloadRequest{Bucket: "", Key: ""}) + req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPDownload(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "invalid_request" { + t.Fatalf("expected code=invalid_request, got %s", errResp.Code) + } +} + +func TestHandleHTTPDownload_WrongBucket(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + body, _ := json.Marshal(lfsDownloadRequest{ + Bucket: "wrong-bucket", + Key: "test-ns/topic/lfs/2025/01/01/obj-123", + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPDownload(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "invalid_bucket" { + t.Fatalf("expected code=invalid_bucket, got %s", errResp.Code) + } +} + +func TestHandleHTTPDownload_PresignMode(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + body, _ := json.Marshal(lfsDownloadRequest{ + Bucket: "test-bucket", + Key: "test-ns/topic/lfs/2025/01/01/obj-123", + Mode: "presign", + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPDownload(rr, req) + + if rr.Code != http.StatusOK { + t.Fatalf("expected 200, got %d; body: %s", rr.Code, rr.Body.String()) + } + var resp lfsDownloadResponse + if err := json.NewDecoder(rr.Body).Decode(&resp); err != nil { + t.Fatalf("failed to decode response: %v", err) + } + if resp.Mode != "presign" { + t.Fatalf("expected mode=presign, got %s", resp.Mode) + } + if resp.URL == "" { + t.Fatal("expected non-empty presigned URL") + } + if resp.ExpiresAt == "" { + t.Fatal("expected non-empty expires_at") + } +} + +func TestHandleHTTPDownload_DefaultModeIsPresign(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + body, _ := json.Marshal(lfsDownloadRequest{ + Bucket: "test-bucket", + Key: "test-ns/topic/lfs/2025/01/01/obj-123", + // Mode intentionally omitted (empty string) + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPDownload(rr, req) + + if rr.Code != http.StatusOK { + t.Fatalf("expected 200, got %d; body: %s", rr.Code, rr.Body.String()) + } + var resp lfsDownloadResponse + if err := json.NewDecoder(rr.Body).Decode(&resp); err != nil { + t.Fatalf("failed to decode response: %v", err) + } + if resp.Mode != "presign" { + t.Fatalf("expected default mode=presign, got %s", resp.Mode) + } +} + +func TestHandleHTTPDownload_InvalidMode(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + body, _ := json.Marshal(lfsDownloadRequest{ + Bucket: "test-bucket", + Key: "test-ns/topic/lfs/2025/01/01/obj-123", + Mode: "invalid", + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPDownload(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "invalid_mode" { + t.Fatalf("expected code=invalid_mode, got %s", errResp.Code) + } +} + +func TestHandleHTTPDownload_InvalidObjectKey(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + body, _ := json.Marshal(lfsDownloadRequest{ + Bucket: "test-bucket", + Key: "/absolute/path/lfs/obj", + Mode: "presign", + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPDownload(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "invalid_key" { + t.Fatalf("expected code=invalid_key, got %s", errResp.Code) + } +} + +func TestHandleHTTPDownload_S3Unhealthy(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + atomic.StoreUint32(&m.s3Healthy, 0) + + body, _ := json.Marshal(lfsDownloadRequest{ + Bucket: "test-bucket", + Key: "test-ns/topic/lfs/obj", + Mode: "presign", + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPDownload(rr, req) + + if rr.Code != http.StatusServiceUnavailable { + t.Fatalf("expected 503, got %d", rr.Code) + } +} + +// --------------------------------------------------------------------------- +// 9. handleHTTPUploadInit +// --------------------------------------------------------------------------- + +func TestHandleHTTPUploadInit_MethodNotAllowed(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + req := httptest.NewRequest(http.MethodGet, "/lfs/uploads", nil) + rr := httptest.NewRecorder() + m.handleHTTPUploadInit(rr, req) + + if rr.Code != http.StatusMethodNotAllowed { + t.Fatalf("expected 405, got %d", rr.Code) + } +} + +func TestHandleHTTPUploadInit_MissingTopic(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + body, _ := json.Marshal(lfsUploadInitRequest{ + ContentType: "application/octet-stream", + SizeBytes: 1024, + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/uploads", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPUploadInit(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "missing_topic" { + t.Fatalf("expected code=missing_topic, got %s", errResp.Code) + } +} + +func TestHandleHTTPUploadInit_MissingContentType(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + body, _ := json.Marshal(lfsUploadInitRequest{ + Topic: "my-topic", + SizeBytes: 1024, + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/uploads", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPUploadInit(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "missing_content_type" { + t.Fatalf("expected code=missing_content_type, got %s", errResp.Code) + } +} + +func TestHandleHTTPUploadInit_InvalidSize(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + body, _ := json.Marshal(lfsUploadInitRequest{ + Topic: "my-topic", + ContentType: "application/octet-stream", + SizeBytes: 0, + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/uploads", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPUploadInit(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "invalid_size" { + t.Fatalf("expected code=invalid_size, got %s", errResp.Code) + } +} + +func TestHandleHTTPUploadInit_NegativeSize(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + body, _ := json.Marshal(lfsUploadInitRequest{ + Topic: "my-topic", + ContentType: "application/octet-stream", + SizeBytes: -100, + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/uploads", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPUploadInit(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "invalid_size" { + t.Fatalf("expected code=invalid_size, got %s", errResp.Code) + } +} + +func TestHandleHTTPUploadInit_PayloadTooLarge(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + m.maxBlob = 1000 // Set small limit for test + + body, _ := json.Marshal(lfsUploadInitRequest{ + Topic: "my-topic", + ContentType: "application/octet-stream", + SizeBytes: 2000, + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/uploads", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPUploadInit(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "payload_too_large" { + t.Fatalf("expected code=payload_too_large, got %s", errResp.Code) + } +} + +func TestHandleHTTPUploadInit_Success(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + body, _ := json.Marshal(lfsUploadInitRequest{ + Topic: "my-topic", + ContentType: "application/octet-stream", + SizeBytes: 10 << 20, + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/uploads", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPUploadInit(rr, req) + + if rr.Code != http.StatusOK { + t.Fatalf("expected 200, got %d; body: %s", rr.Code, rr.Body.String()) + } + var resp lfsUploadInitResponse + if err := json.NewDecoder(rr.Body).Decode(&resp); err != nil { + t.Fatalf("failed to decode response: %v", err) + } + if resp.UploadID == "" { + t.Fatal("expected non-empty upload_id") + } + if resp.S3Key == "" { + t.Fatal("expected non-empty s3_key") + } + if resp.PartSize <= 0 { + t.Fatal("expected positive part_size") + } + if resp.ExpiresAt == "" { + t.Fatal("expected non-empty expires_at") + } +} + +func TestHandleHTTPUploadInit_InvalidTopic(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + body, _ := json.Marshal(lfsUploadInitRequest{ + Topic: "invalid topic!", + ContentType: "application/octet-stream", + SizeBytes: 1024, + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/uploads", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPUploadInit(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "invalid_topic" { + t.Fatalf("expected code=invalid_topic, got %s", errResp.Code) + } +} + +// --------------------------------------------------------------------------- +// 10. handleHTTPUploadSession routing +// --------------------------------------------------------------------------- + +func TestHandleHTTPUploadSession_PartUploadRoute(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + // PUT /lfs/uploads/{id}/parts/1 with no session => not_found (upload_not_found) + req := httptest.NewRequest(http.MethodPut, "/lfs/uploads/nonexistent/parts/1", strings.NewReader("data")) + rr := httptest.NewRecorder() + m.handleHTTPUploadSession(rr, req) + + if rr.Code != http.StatusNotFound { + t.Fatalf("expected 404, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "upload_not_found" { + t.Fatalf("expected code=upload_not_found, got %s", errResp.Code) + } +} + +func TestHandleHTTPUploadSession_CompleteRoute(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + // POST /lfs/uploads/{id}/complete with no session => not_found (upload_not_found) + body, _ := json.Marshal(lfsUploadCompleteRequest{}) + req := httptest.NewRequest(http.MethodPost, "/lfs/uploads/nonexistent/complete", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPUploadSession(rr, req) + + if rr.Code != http.StatusNotFound { + t.Fatalf("expected 404, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "upload_not_found" { + t.Fatalf("expected code=upload_not_found, got %s", errResp.Code) + } +} + +func TestHandleHTTPUploadSession_AbortRoute(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + // DELETE /lfs/uploads/{id} with no session => not_found (upload_not_found) + req := httptest.NewRequest(http.MethodDelete, "/lfs/uploads/nonexistent", nil) + rr := httptest.NewRecorder() + m.handleHTTPUploadSession(rr, req) + + if rr.Code != http.StatusNotFound { + t.Fatalf("expected 404, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "upload_not_found" { + t.Fatalf("expected code=upload_not_found, got %s", errResp.Code) + } +} + +func TestHandleHTTPUploadSession_InvalidPath(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + // GET /lfs/uploads/some-id/unknown => not_found + req := httptest.NewRequest(http.MethodGet, "/lfs/uploads/some-id/unknown", nil) + rr := httptest.NewRecorder() + m.handleHTTPUploadSession(rr, req) + + if rr.Code != http.StatusNotFound { + t.Fatalf("expected 404, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "not_found" { + t.Fatalf("expected code=not_found, got %s", errResp.Code) + } +} + +func TestHandleHTTPUploadSession_EmptyPath(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + req := httptest.NewRequest(http.MethodGet, "/lfs/uploads/", nil) + rr := httptest.NewRecorder() + m.handleHTTPUploadSession(rr, req) + + if rr.Code != http.StatusNotFound { + t.Fatalf("expected 404, got %d", rr.Code) + } +} + +func TestHandleHTTPUploadSession_InvalidPartNumber(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + req := httptest.NewRequest(http.MethodPut, "/lfs/uploads/some-id/parts/abc", strings.NewReader("data")) + rr := httptest.NewRecorder() + m.handleHTTPUploadSession(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "invalid_part" { + t.Fatalf("expected code=invalid_part, got %s", errResp.Code) + } +} + +func TestHandleHTTPUploadSession_ZeroPartNumber(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + req := httptest.NewRequest(http.MethodPut, "/lfs/uploads/some-id/parts/0", strings.NewReader("data")) + rr := httptest.NewRecorder() + m.handleHTTPUploadSession(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "invalid_part" { + t.Fatalf("expected code=invalid_part, got %s", errResp.Code) + } +} + +// --------------------------------------------------------------------------- +// 11. lfsWriteHTTPError +// --------------------------------------------------------------------------- + +func TestLfsWriteHTTPError_JSONFormat(t *testing.T) { + m := testHTTPModule(t) + rr := httptest.NewRecorder() + m.lfsWriteHTTPError(rr, "req-123", "test-topic", http.StatusBadRequest, "test_code", "test message") + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + if ct := rr.Header().Get("Content-Type"); ct != "application/json" { + t.Fatalf("expected Content-Type=application/json, got %s", ct) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "test_code" { + t.Fatalf("expected code=test_code, got %s", errResp.Code) + } + if errResp.Message != "test message" { + t.Fatalf("expected message='test message', got %s", errResp.Message) + } + if errResp.RequestID != "req-123" { + t.Fatalf("expected request_id=req-123, got %s", errResp.RequestID) + } +} + +func TestLfsWriteHTTPError_NoTopic(t *testing.T) { + m := testHTTPModule(t) + rr := httptest.NewRecorder() + m.lfsWriteHTTPError(rr, "req-456", "", http.StatusInternalServerError, "internal", "something went wrong") + + if rr.Code != http.StatusInternalServerError { + t.Fatalf("expected 500, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "internal" { + t.Fatalf("expected code=internal, got %s", errResp.Code) + } +} + +// --------------------------------------------------------------------------- +// 12. Upload session management +// --------------------------------------------------------------------------- + +func TestUploadSession_StoreGetDelete(t *testing.T) { + m := testHTTPModule(t) + + session := &uploadSession{ + ID: "session-1", + Topic: "my-topic", + S3Key: "test-ns/my-topic/lfs/2025/01/01/obj-1", + UploadID: "s3-upload-id", + ExpiresAt: time.Now().UTC().Add(1 * time.Hour), + Parts: make(map[int32]string), + PartSizes: make(map[int32]int64), + } + + // Store + m.lfsStoreUploadSession(session) + + // Get + got, ok := m.lfsGetUploadSession("session-1") + if !ok { + t.Fatal("expected to find session-1") + } + if got.Topic != "my-topic" { + t.Fatalf("expected topic=my-topic, got %s", got.Topic) + } + + // Get non-existent + _, ok = m.lfsGetUploadSession("nonexistent") + if ok { + t.Fatal("expected not to find nonexistent session") + } + + // Delete + m.lfsDeleteUploadSession("session-1") + _, ok = m.lfsGetUploadSession("session-1") + if ok { + t.Fatal("expected session to be deleted") + } +} + +func TestUploadSession_TTLCleanup(t *testing.T) { + m := testHTTPModule(t) + + expiredSession := &uploadSession{ + ID: "expired-session", + Topic: "topic-a", + ExpiresAt: time.Now().UTC().Add(-1 * time.Second), // Already expired + Parts: make(map[int32]string), + PartSizes: make(map[int32]int64), + } + activeSession := &uploadSession{ + ID: "active-session", + Topic: "topic-b", + ExpiresAt: time.Now().UTC().Add(1 * time.Hour), + Parts: make(map[int32]string), + PartSizes: make(map[int32]int64), + } + + // Store both directly to bypass cleanup during store + m.uploadMu.Lock() + m.uploadSessions[expiredSession.ID] = expiredSession + m.uploadSessions[activeSession.ID] = activeSession + m.uploadMu.Unlock() + + // Get triggers cleanup + _, ok := m.lfsGetUploadSession("expired-session") + if ok { + t.Fatal("expected expired session to be cleaned up") + } + + got, ok := m.lfsGetUploadSession("active-session") + if !ok { + t.Fatal("expected active session to survive cleanup") + } + if got.Topic != "topic-b" { + t.Fatalf("expected topic-b, got %s", got.Topic) + } +} + +func TestUploadSession_StoreNilSafe(t *testing.T) { + m := testHTTPModule(t) + // Should not panic + m.lfsStoreUploadSession(nil) + + // No sessions should be stored + m.uploadMu.Lock() + count := len(m.uploadSessions) + m.uploadMu.Unlock() + if count != 0 { + t.Fatalf("expected 0 sessions, got %d", count) + } +} + +func TestUploadSession_DeleteNonExistent(t *testing.T) { + m := testHTTPModule(t) + // Should not panic + m.lfsDeleteUploadSession("does-not-exist") +} + +func TestUploadSession_MultipleSessionsConcurrent(t *testing.T) { + m := testHTTPModule(t) + + for i := 0; i < 10; i++ { + session := &uploadSession{ + ID: "session-" + strings.Repeat("x", i+1), + Topic: "topic", + ExpiresAt: time.Now().UTC().Add(1 * time.Hour), + Parts: make(map[int32]string), + PartSizes: make(map[int32]int64), + } + m.lfsStoreUploadSession(session) + } + + m.uploadMu.Lock() + count := len(m.uploadSessions) + m.uploadMu.Unlock() + if count != 10 { + t.Fatalf("expected 10 sessions, got %d", count) + } +} + +// --------------------------------------------------------------------------- +// handleHTTPUploadSession with real session: abort flow +// --------------------------------------------------------------------------- + +func TestHandleHTTPUploadAbort_WithSession(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + session := &uploadSession{ + ID: "abort-me", + Topic: "my-topic", + S3Key: "test-ns/my-topic/lfs/2025/01/01/obj-abort", + UploadID: "s3-upload-abort", + ExpiresAt: time.Now().UTC().Add(1 * time.Hour), + Parts: make(map[int32]string), + PartSizes: make(map[int32]int64), + } + m.lfsStoreUploadSession(session) + + req := httptest.NewRequest(http.MethodDelete, "/lfs/uploads/abort-me", nil) + rr := httptest.NewRecorder() + m.handleHTTPUploadSession(rr, req) + + if rr.Code != http.StatusNoContent { + t.Fatalf("expected 204, got %d; body: %s", rr.Code, rr.Body.String()) + } + + // Session should be cleaned up + _, ok := m.lfsGetUploadSession("abort-me") + if ok { + t.Fatal("expected session to be deleted after abort") + } +} + +// --------------------------------------------------------------------------- +// Auth integration with handlers +// --------------------------------------------------------------------------- + +func TestHandleHTTPDownload_Unauthorized(t *testing.T) { + m := testHTTPModule(t) + // httpAPIKey is set by default in testHTTPModule + + body, _ := json.Marshal(lfsDownloadRequest{ + Bucket: "test-bucket", + Key: "test-ns/topic/lfs/2025/01/01/obj-123", + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/download", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPDownload(rr, req) + + if rr.Code != http.StatusUnauthorized { + t.Fatalf("expected 401, got %d", rr.Code) + } +} + +func TestHandleHTTPUploadInit_Unauthorized(t *testing.T) { + m := testHTTPModule(t) + + body, _ := json.Marshal(lfsUploadInitRequest{ + Topic: "my-topic", + ContentType: "application/octet-stream", + SizeBytes: 1024, + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/uploads", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPUploadInit(rr, req) + + if rr.Code != http.StatusUnauthorized { + t.Fatalf("expected 401, got %d", rr.Code) + } +} + +func TestHandleHTTPUploadSession_Unauthorized(t *testing.T) { + m := testHTTPModule(t) + + req := httptest.NewRequest(http.MethodDelete, "/lfs/uploads/some-id", nil) + rr := httptest.NewRecorder() + m.handleHTTPUploadSession(rr, req) + + if rr.Code != http.StatusUnauthorized { + t.Fatalf("expected 401, got %d", rr.Code) + } +} + +// --------------------------------------------------------------------------- +// handleHTTPUploadInit with invalid JSON +// --------------------------------------------------------------------------- + +func TestHandleHTTPUploadInit_InvalidJSON(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + + req := httptest.NewRequest(http.MethodPost, "/lfs/uploads", strings.NewReader("not json at all")) + rr := httptest.NewRecorder() + m.handleHTTPUploadInit(rr, req) + + if rr.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rr.Code) + } + var errResp lfsErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&errResp); err != nil { + t.Fatalf("failed to decode error response: %v", err) + } + if errResp.Code != "invalid_request" { + t.Fatalf("expected code=invalid_request, got %s", errResp.Code) + } +} + +func TestHandleHTTPUploadInit_S3Unhealthy(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + atomic.StoreUint32(&m.s3Healthy, 0) + + body, _ := json.Marshal(lfsUploadInitRequest{ + Topic: "my-topic", + ContentType: "application/octet-stream", + SizeBytes: 1024, + }) + req := httptest.NewRequest(http.MethodPost, "/lfs/uploads", bytes.NewReader(body)) + rr := httptest.NewRecorder() + m.handleHTTPUploadInit(rr, req) + + if rr.Code != http.StatusServiceUnavailable { + t.Fatalf("expected 503, got %d", rr.Code) + } +} + +func TestHandleHTTPUploadSession_S3Unhealthy(t *testing.T) { + m := testHTTPModule(t) + m.httpAPIKey = "" + atomic.StoreUint32(&m.s3Healthy, 0) + + req := httptest.NewRequest(http.MethodDelete, "/lfs/uploads/some-id", nil) + rr := httptest.NewRecorder() + m.handleHTTPUploadSession(rr, req) + + if rr.Code != http.StatusServiceUnavailable { + t.Fatalf("expected 503, got %d", rr.Code) + } +} diff --git a/cmd/proxy/lfs_test.go b/cmd/proxy/lfs_test.go index 3a21deb6..cc1d5820 100644 --- a/cmd/proxy/lfs_test.go +++ b/cmd/proxy/lfs_test.go @@ -16,13 +16,17 @@ package main import ( + "bytes" "context" "crypto/sha256" + "encoding/binary" "encoding/hex" "errors" "hash/crc32" "io" "log/slog" + "math" + "strings" "testing" "github.com/KafScale/platform/pkg/lfs" @@ -31,6 +35,7 @@ import ( "github.com/aws/aws-sdk-go-v2/aws/signer/v4" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/twmb/franz-go/pkg/kgo" "github.com/twmb/franz-go/pkg/kmsg" ) @@ -424,3 +429,882 @@ func TestLFSTopicsFromProduceEmpty(t *testing.T) { var _ s3API = (*fakeS3)(nil) var _ s3PresignAPI = (*fakePresign)(nil) var _ types.CompletedPart + +// --------------------------------------------------------------------------- +// Tests for lfsEncodeRecords / lfsEncodeRecord +// --------------------------------------------------------------------------- + +func TestLfsEncodeRecordSingle(t *testing.T) { + rec := kmsg.Record{ + Key: []byte("key1"), + Value: []byte("val1"), + } + encoded := lfsEncodeRecord(rec) + if len(encoded) == 0 { + t.Fatal("expected non-empty encoded record") + } + // The encoded bytes must begin with a varint length prefix. + length, n := binary.Varint(encoded) + if n <= 0 { + t.Fatal("expected valid varint length prefix") + } + if int(length) != len(encoded)-n { + t.Fatalf("varint length %d does not match body length %d", length, len(encoded)-n) + } +} + +func TestLfsEncodeRecordsMultiple(t *testing.T) { + records := []kmsg.Record{ + {Key: []byte("k1"), Value: []byte("v1")}, + {Key: []byte("k2"), Value: []byte("v2")}, + {Key: []byte("k3"), Value: []byte("v3")}, + } + encoded := lfsEncodeRecords(records) + if len(encoded) == 0 { + t.Fatal("expected non-empty output for multiple records") + } + // Encoded length must be the sum of individually encoded records. + sum := 0 + for _, r := range records { + sum += len(lfsEncodeRecord(r)) + } + if len(encoded) != sum { + t.Fatalf("encoded length %d != sum of individual %d", len(encoded), sum) + } +} + +func TestLfsEncodeRecordsEmpty(t *testing.T) { + encoded := lfsEncodeRecords(nil) + if encoded != nil { + t.Fatalf("expected nil for empty records, got %v", encoded) + } + encoded = lfsEncodeRecords([]kmsg.Record{}) + if encoded != nil { + t.Fatalf("expected nil for zero-length records, got %v", encoded) + } +} + +func TestLfsEncodeRecordNilKeyValue(t *testing.T) { + rec := kmsg.Record{ + Key: nil, + Value: nil, + } + encoded := lfsEncodeRecord(rec) + if len(encoded) == 0 { + t.Fatal("expected non-empty encoded record even with nil key/value") + } + // Build a batch and decode to verify round-trip. + batchBytes := lfsBuildRecordBatch([]kmsg.Record{rec}) + batches, err := lfsDecodeRecordBatches(batchBytes) + if err != nil { + t.Fatalf("decode batches: %v", err) + } + recs, _, err := lfsDecodeBatchRecords(&batches[0], nil) + if err != nil { + t.Fatalf("decode records: %v", err) + } + if len(recs) != 1 { + t.Fatalf("expected 1 record, got %d", len(recs)) + } + if recs[0].Key != nil { + t.Fatalf("expected nil key, got %v", recs[0].Key) + } + if recs[0].Value != nil { + t.Fatalf("expected nil value, got %v", recs[0].Value) + } +} + +func TestLfsEncodeRecordWithHeaders(t *testing.T) { + rec := kmsg.Record{ + Key: []byte("hk"), + Value: []byte("hv"), + Headers: []kmsg.Header{ + {Key: "h1", Value: []byte("v1")}, + {Key: "h2", Value: []byte("v2")}, + }, + } + batchBytes := lfsBuildRecordBatch([]kmsg.Record{rec}) + batches, err := lfsDecodeRecordBatches(batchBytes) + if err != nil { + t.Fatalf("decode batches: %v", err) + } + recs, _, err := lfsDecodeBatchRecords(&batches[0], nil) + if err != nil { + t.Fatalf("decode records: %v", err) + } + if len(recs) != 1 { + t.Fatalf("expected 1 record, got %d", len(recs)) + } + if len(recs[0].Headers) != 2 { + t.Fatalf("expected 2 headers, got %d", len(recs[0].Headers)) + } + if recs[0].Headers[0].Key != "h1" || string(recs[0].Headers[0].Value) != "v1" { + t.Fatalf("header 0 mismatch: %v", recs[0].Headers[0]) + } + if recs[0].Headers[1].Key != "h2" || string(recs[0].Headers[1].Value) != "v2" { + t.Fatalf("header 1 mismatch: %v", recs[0].Headers[1]) + } +} + +// --------------------------------------------------------------------------- +// Tests for lfsBuildRecordBatch +// --------------------------------------------------------------------------- + +func TestLfsBuildRecordBatch(t *testing.T) { + records := []kmsg.Record{ + {Key: []byte("k"), Value: []byte("v")}, + } + batchBytes := lfsBuildRecordBatch(records) + if len(batchBytes) == 0 { + t.Fatal("expected non-empty batch bytes") + } + // Must be decodable. + batches, err := lfsDecodeRecordBatches(batchBytes) + if err != nil { + t.Fatalf("decode: %v", err) + } + if len(batches) != 1 { + t.Fatalf("expected 1 batch, got %d", len(batches)) + } + if batches[0].NumRecords != 1 { + t.Fatalf("expected NumRecords=1, got %d", batches[0].NumRecords) + } +} + +func TestLfsBuildRecordBatchRoundTrip(t *testing.T) { + records := []kmsg.Record{ + {Key: []byte("a"), Value: []byte("1")}, + {Key: []byte("b"), Value: []byte("2")}, + } + batchBytes := lfsBuildRecordBatch(records) + batches, err := lfsDecodeRecordBatches(batchBytes) + if err != nil { + t.Fatalf("decode: %v", err) + } + recs, _, err := lfsDecodeBatchRecords(&batches[0], nil) + if err != nil { + t.Fatalf("decode records: %v", err) + } + if len(recs) != 2 { + t.Fatalf("expected 2 records, got %d", len(recs)) + } + if string(recs[0].Key) != "a" || string(recs[0].Value) != "1" { + t.Fatalf("record 0 mismatch: key=%q val=%q", recs[0].Key, recs[0].Value) + } + if string(recs[1].Key) != "b" || string(recs[1].Value) != "2" { + t.Fatalf("record 1 mismatch: key=%q val=%q", recs[1].Key, recs[1].Value) + } +} + +// --------------------------------------------------------------------------- +// Tests for lfsVarint +// --------------------------------------------------------------------------- + +func TestLfsVarintValid(t *testing.T) { + var buf [binary.MaxVarintLen32]byte + n := binary.PutVarint(buf[:], 42) + val, consumed := lfsVarint(buf[:n]) + if consumed != n { + t.Fatalf("expected consumed=%d, got %d", n, consumed) + } + if val != 42 { + t.Fatalf("expected 42, got %d", val) + } +} + +func TestLfsVarintZero(t *testing.T) { + var buf [binary.MaxVarintLen32]byte + n := binary.PutVarint(buf[:], 0) + val, consumed := lfsVarint(buf[:n]) + if consumed != n || val != 0 { + t.Fatalf("expected (0, %d), got (%d, %d)", n, val, consumed) + } +} + +func TestLfsVarintNegative(t *testing.T) { + var buf [binary.MaxVarintLen32]byte + n := binary.PutVarint(buf[:], -1) + val, consumed := lfsVarint(buf[:n]) + if consumed != n { + t.Fatalf("expected consumed=%d, got %d", n, consumed) + } + if val != -1 { + t.Fatalf("expected -1, got %d", val) + } +} + +func TestLfsVarintEmptyBuffer(t *testing.T) { + val, consumed := lfsVarint(nil) + if consumed != 0 || val != 0 { + t.Fatalf("expected (0, 0), got (%d, %d)", val, consumed) + } + val, consumed = lfsVarint([]byte{}) + if consumed != 0 || val != 0 { + t.Fatalf("expected (0, 0) for empty slice, got (%d, %d)", val, consumed) + } +} + +// --------------------------------------------------------------------------- +// Tests for lfsCompressRecords +// --------------------------------------------------------------------------- + +func TestLfsCompressRecordsCodecNone(t *testing.T) { + raw := []byte("no compression needed") + out, codec, err := lfsCompressRecords(kgo.CodecNone, raw) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if codec != kgo.CodecNone { + t.Fatalf("expected CodecNone, got %d", codec) + } + if !bytes.Equal(out, raw) { + t.Fatal("expected passthrough for CodecNone") + } +} + +func TestLfsCompressRecordsGzipRoundtrip(t *testing.T) { + raw := []byte("gzip test payload data that should compress") + compressed, codec, err := lfsCompressRecords(kgo.CodecGzip, raw) + if err != nil { + t.Fatalf("compress: %v", err) + } + if codec != kgo.CodecGzip { + t.Fatalf("expected CodecGzip, got %d", codec) + } + // Decompress and verify round-trip. + decompressor := kgo.DefaultDecompressor() + decompressed, err := decompressor.Decompress(compressed, kgo.CodecGzip) + if err != nil { + t.Fatalf("decompress: %v", err) + } + if !bytes.Equal(decompressed, raw) { + t.Fatal("round-trip mismatch for gzip") + } +} + +func TestLfsCompressRecordsSnappyRoundtrip(t *testing.T) { + raw := []byte("snappy test payload data that should compress") + compressed, codec, err := lfsCompressRecords(kgo.CodecSnappy, raw) + if err != nil { + t.Fatalf("compress: %v", err) + } + if codec != kgo.CodecSnappy { + t.Fatalf("expected CodecSnappy, got %d", codec) + } + decompressor := kgo.DefaultDecompressor() + decompressed, err := decompressor.Decompress(compressed, kgo.CodecSnappy) + if err != nil { + t.Fatalf("decompress: %v", err) + } + if !bytes.Equal(decompressed, raw) { + t.Fatal("round-trip mismatch for snappy") + } +} + +// --------------------------------------------------------------------------- +// Tests for lfsDecodeRecordBatches +// --------------------------------------------------------------------------- + +func TestLfsDecodeRecordBatchesValid(t *testing.T) { + records := []kmsg.Record{ + {Key: []byte("x"), Value: []byte("y")}, + } + batchBytes := lfsBuildRecordBatch(records) + batches, err := lfsDecodeRecordBatches(batchBytes) + if err != nil { + t.Fatalf("decode: %v", err) + } + if len(batches) != 1 { + t.Fatalf("expected 1 batch, got %d", len(batches)) + } + if batches[0].Magic != 2 { + t.Fatalf("expected magic=2, got %d", batches[0].Magic) + } +} + +func TestLfsDecodeRecordBatchesEmptyInput(t *testing.T) { + batches, err := lfsDecodeRecordBatches(nil) + if err != nil { + t.Fatalf("expected no error on nil, got: %v", err) + } + if len(batches) != 0 { + t.Fatalf("expected 0 batches, got %d", len(batches)) + } + + batches, err = lfsDecodeRecordBatches([]byte{}) + if err != nil { + t.Fatalf("expected no error on empty, got: %v", err) + } + if len(batches) != 0 { + t.Fatalf("expected 0 batches, got %d", len(batches)) + } +} + +func TestLfsDecodeRecordBatchesTruncatedInput(t *testing.T) { + // Less than 12 bytes triggers the "too short" error. + _, err := lfsDecodeRecordBatches([]byte{0, 1, 2, 3, 4}) + if err == nil { + t.Fatal("expected error for truncated input") + } + if !strings.Contains(err.Error(), "too short") { + t.Fatalf("expected 'too short' error, got: %v", err) + } +} + +func TestLfsDecodeRecordBatchesInvalidLength(t *testing.T) { + // 12 bytes: first 8 are baseOffset, bytes 8..11 encode the length. + // Set length to a huge value that exceeds the buffer. + buf := make([]byte, 12) + buf[8] = 0x7F + buf[9] = 0xFF + buf[10] = 0xFF + buf[11] = 0xFF + _, err := lfsDecodeRecordBatches(buf) + if err == nil { + t.Fatal("expected error for invalid length") + } + if !strings.Contains(err.Error(), "invalid record batch length") { + t.Fatalf("expected 'invalid record batch length' error, got: %v", err) + } +} + +// --------------------------------------------------------------------------- +// Tests for lfsJoinRecordBatches +// --------------------------------------------------------------------------- + +func TestLfsJoinRecordBatchesEmpty(t *testing.T) { + out := lfsJoinRecordBatches(nil) + if out != nil { + t.Fatalf("expected nil, got %v", out) + } + out = lfsJoinRecordBatches([]lfsRecordBatch{}) + if out != nil { + t.Fatalf("expected nil, got %v", out) + } +} + +func TestLfsJoinRecordBatchesSingle(t *testing.T) { + batchBytes := lfsBuildRecordBatch([]kmsg.Record{ + {Key: []byte("k"), Value: []byte("v")}, + }) + batches, _ := lfsDecodeRecordBatches(batchBytes) + joined := lfsJoinRecordBatches(batches) + if !bytes.Equal(joined, batchBytes) { + t.Fatal("single batch join should equal original bytes") + } +} + +func TestLfsJoinRecordBatchesMultiple(t *testing.T) { + batch1 := lfsBuildRecordBatch([]kmsg.Record{ + {Key: []byte("k1"), Value: []byte("v1")}, + }) + batch2 := lfsBuildRecordBatch([]kmsg.Record{ + {Key: []byte("k2"), Value: []byte("v2")}, + }) + combined := append(append([]byte(nil), batch1...), batch2...) + allBatches, err := lfsDecodeRecordBatches(combined) + if err != nil { + t.Fatalf("decode combined: %v", err) + } + if len(allBatches) != 2 { + t.Fatalf("expected 2 batches, got %d", len(allBatches)) + } + joined := lfsJoinRecordBatches(allBatches) + if !bytes.Equal(joined, combined) { + t.Fatal("joined bytes should equal combined input") + } +} + +// --------------------------------------------------------------------------- +// Tests for lfsFindHeaderValue +// --------------------------------------------------------------------------- + +func TestLfsFindHeaderValueFound(t *testing.T) { + headers := []kmsg.Header{ + {Key: "a", Value: []byte("1")}, + {Key: "b", Value: []byte("2")}, + } + val, ok := lfsFindHeaderValue(headers, "b") + if !ok { + t.Fatal("expected found=true") + } + if string(val) != "2" { + t.Fatalf("expected '2', got '%s'", val) + } +} + +func TestLfsFindHeaderValueNotFound(t *testing.T) { + headers := []kmsg.Header{ + {Key: "a", Value: []byte("1")}, + } + val, ok := lfsFindHeaderValue(headers, "missing") + if ok { + t.Fatal("expected found=false") + } + if val != nil { + t.Fatalf("expected nil, got %v", val) + } +} + +func TestLfsFindHeaderValueEmptyHeaders(t *testing.T) { + val, ok := lfsFindHeaderValue(nil, "any") + if ok { + t.Fatal("expected found=false for nil headers") + } + if val != nil { + t.Fatalf("expected nil, got %v", val) + } +} + +// --------------------------------------------------------------------------- +// Tests for lfsHeaderValue +// --------------------------------------------------------------------------- + +func TestLfsHeaderValueFound(t *testing.T) { + headers := []kmsg.Header{ + {Key: "content-type", Value: []byte("application/json")}, + } + val := lfsHeaderValue(headers, "content-type") + if val != "application/json" { + t.Fatalf("expected 'application/json', got '%s'", val) + } +} + +func TestLfsHeaderValueNotFound(t *testing.T) { + headers := []kmsg.Header{ + {Key: "other", Value: []byte("val")}, + } + val := lfsHeaderValue(headers, "content-type") + if val != "" { + t.Fatalf("expected empty string, got '%s'", val) + } +} + +// --------------------------------------------------------------------------- +// Tests for lfsHeadersToMap +// --------------------------------------------------------------------------- + +func TestLfsHeadersToMapAllowlisted(t *testing.T) { + headers := []kmsg.Header{ + {Key: "content-type", Value: []byte("text/plain")}, + {Key: "correlation-id", Value: []byte("abc-123")}, + } + m := lfsHeadersToMap(headers) + if m == nil { + t.Fatal("expected non-nil map") + } + if m["content-type"] != "text/plain" { + t.Fatalf("expected 'text/plain', got '%s'", m["content-type"]) + } + if m["correlation-id"] != "abc-123" { + t.Fatalf("expected 'abc-123', got '%s'", m["correlation-id"]) + } +} + +func TestLfsHeadersToMapNonAllowlisted(t *testing.T) { + headers := []kmsg.Header{ + {Key: "x-custom-header", Value: []byte("secret")}, + {Key: "random", Value: []byte("data")}, + } + m := lfsHeadersToMap(headers) + if m != nil { + t.Fatalf("expected nil map for non-allowlisted headers, got %v", m) + } +} + +func TestLfsHeadersToMapEmpty(t *testing.T) { + m := lfsHeadersToMap(nil) + if m != nil { + t.Fatalf("expected nil for nil headers, got %v", m) + } + m = lfsHeadersToMap([]kmsg.Header{}) + if m != nil { + t.Fatalf("expected nil for empty headers, got %v", m) + } +} + +func TestLfsHeadersToMapMixed(t *testing.T) { + headers := []kmsg.Header{ + {Key: "content-type", Value: []byte("application/octet-stream")}, + {Key: "x-custom", Value: []byte("ignored")}, + {Key: "traceparent", Value: []byte("00-abc-def-01")}, + {Key: "LFS_BLOB", Value: []byte("sha256")}, + } + m := lfsHeadersToMap(headers) + if m == nil { + t.Fatal("expected non-nil map") + } + if len(m) != 2 { + t.Fatalf("expected 2 entries, got %d: %v", len(m), m) + } + if m["content-type"] != "application/octet-stream" { + t.Fatalf("content-type mismatch: %s", m["content-type"]) + } + if m["traceparent"] != "00-abc-def-01" { + t.Fatalf("traceparent mismatch: %s", m["traceparent"]) + } +} + +// --------------------------------------------------------------------------- +// Tests for lfsDropHeader +// --------------------------------------------------------------------------- + +func TestLfsDropHeaderExisting(t *testing.T) { + headers := []kmsg.Header{ + {Key: "keep", Value: []byte("1")}, + {Key: "drop-me", Value: []byte("2")}, + {Key: "also-keep", Value: []byte("3")}, + } + result := lfsDropHeader(headers, "drop-me") + if len(result) != 2 { + t.Fatalf("expected 2 headers, got %d", len(result)) + } + for _, h := range result { + if h.Key == "drop-me" { + t.Fatal("drop-me should have been removed") + } + } +} + +func TestLfsDropHeaderNonExisting(t *testing.T) { + headers := []kmsg.Header{ + {Key: "a", Value: []byte("1")}, + } + result := lfsDropHeader(headers, "nonexistent") + if len(result) != 1 { + t.Fatalf("expected 1 header, got %d", len(result)) + } + if result[0].Key != "a" { + t.Fatalf("expected 'a', got '%s'", result[0].Key) + } +} + +func TestLfsDropHeaderEmpty(t *testing.T) { + result := lfsDropHeader(nil, "any") + if result != nil { + t.Fatalf("expected nil for nil headers, got %v", result) + } + result = lfsDropHeader([]kmsg.Header{}, "any") + if len(result) != 0 { + t.Fatalf("expected empty for empty headers, got %v", result) + } +} + +// --------------------------------------------------------------------------- +// Tests for lfsInt32FromBytes +// --------------------------------------------------------------------------- + +func TestLfsInt32FromBytesPositive(t *testing.T) { + // 1 in big-endian: 0x00000001 + val := lfsInt32FromBytes([]byte{0x00, 0x00, 0x00, 0x01}) + if val != 1 { + t.Fatalf("expected 1, got %d", val) + } + // 256 in big-endian: 0x00000100 + val = lfsInt32FromBytes([]byte{0x00, 0x00, 0x01, 0x00}) + if val != 256 { + t.Fatalf("expected 256, got %d", val) + } +} + +func TestLfsInt32FromBytesZero(t *testing.T) { + val := lfsInt32FromBytes([]byte{0x00, 0x00, 0x00, 0x00}) + if val != 0 { + t.Fatalf("expected 0, got %d", val) + } +} + +func TestLfsInt32FromBytesMaxInt32(t *testing.T) { + // math.MaxInt32 = 0x7FFFFFFF + val := lfsInt32FromBytes([]byte{0x7F, 0xFF, 0xFF, 0xFF}) + if val != math.MaxInt32 { + t.Fatalf("expected %d, got %d", math.MaxInt32, val) + } +} + +// --------------------------------------------------------------------------- +// Tests for newLfsMetrics +// --------------------------------------------------------------------------- + +func TestNewLfsMetrics(t *testing.T) { + m := newLfsMetrics() + if m == nil { + t.Fatal("expected non-nil metrics") + } + if m.uploadDuration == nil { + t.Fatal("expected non-nil uploadDuration histogram") + } + if m.requests == nil { + t.Fatal("expected non-nil requests map") + } + if m.uploadBytes != 0 { + t.Fatalf("expected uploadBytes=0, got %d", m.uploadBytes) + } + if m.s3Errors != 0 { + t.Fatalf("expected s3Errors=0, got %d", m.s3Errors) + } + if m.orphans != 0 { + t.Fatalf("expected orphans=0, got %d", m.orphans) + } +} + +// --------------------------------------------------------------------------- +// Tests for ObserveUploadDuration +// --------------------------------------------------------------------------- + +func TestObserveUploadDurationNilSafety(t *testing.T) { + var m *lfsMetrics + // Must not panic. + m.ObserveUploadDuration(1.0) +} + +func TestObserveUploadDurationNormal(t *testing.T) { + m := newLfsMetrics() + m.ObserveUploadDuration(0.5) + m.ObserveUploadDuration(1.5) + _, _, sum, count := m.uploadDuration.Snapshot() + if count != 2 { + t.Fatalf("expected count=2, got %d", count) + } + if sum != 2.0 { + t.Fatalf("expected sum=2.0, got %f", sum) + } +} + +// --------------------------------------------------------------------------- +// Tests for AddUploadBytes +// --------------------------------------------------------------------------- + +func TestAddUploadBytesNormal(t *testing.T) { + m := newLfsMetrics() + m.AddUploadBytes(100) + m.AddUploadBytes(200) + if m.uploadBytes != 300 { + t.Fatalf("expected 300, got %d", m.uploadBytes) + } +} + +func TestAddUploadBytesNegativeIgnored(t *testing.T) { + m := newLfsMetrics() + m.AddUploadBytes(100) + m.AddUploadBytes(-50) + // Negative values are ignored (n <= 0 guard). + if m.uploadBytes != 100 { + t.Fatalf("expected 100 (negative ignored), got %d", m.uploadBytes) + } +} + +func TestAddUploadBytesNilSafety(t *testing.T) { + var m *lfsMetrics + // Must not panic. + m.AddUploadBytes(100) +} + +// --------------------------------------------------------------------------- +// Tests for IncRequests +// --------------------------------------------------------------------------- + +func TestIncRequestsAllCombinations(t *testing.T) { + m := newLfsMetrics() + m.IncRequests("topic1", "ok", "lfs") + m.IncRequests("topic1", "error", "lfs") + m.IncRequests("topic1", "ok", "passthrough") + m.IncRequests("topic1", "error", "passthrough") + + counters := m.requests["topic1"] + if counters == nil { + t.Fatal("expected counters for topic1") + } + if counters.okLfs != 1 { + t.Fatalf("expected okLfs=1, got %d", counters.okLfs) + } + if counters.errLfs != 1 { + t.Fatalf("expected errLfs=1, got %d", counters.errLfs) + } + if counters.okPas != 1 { + t.Fatalf("expected okPas=1, got %d", counters.okPas) + } + if counters.errPas != 1 { + t.Fatalf("expected errPas=1, got %d", counters.errPas) + } +} + +func TestIncRequestsEmptyTopicDefaultsToUnknown(t *testing.T) { + m := newLfsMetrics() + m.IncRequests("", "ok", "lfs") + counters := m.requests["unknown"] + if counters == nil { + t.Fatal("expected counters for 'unknown'") + } + if counters.okLfs != 1 { + t.Fatalf("expected okLfs=1 for unknown topic, got %d", counters.okLfs) + } +} + +// --------------------------------------------------------------------------- +// Tests for IncS3Errors +// --------------------------------------------------------------------------- + +func TestIncS3Errors(t *testing.T) { + m := newLfsMetrics() + m.IncS3Errors() + m.IncS3Errors() + if m.s3Errors != 2 { + t.Fatalf("expected 2, got %d", m.s3Errors) + } +} + +func TestIncS3ErrorsNilSafety(t *testing.T) { + var m *lfsMetrics + // Must not panic. + m.IncS3Errors() +} + +// --------------------------------------------------------------------------- +// Tests for IncOrphans +// --------------------------------------------------------------------------- + +func TestIncOrphans(t *testing.T) { + m := newLfsMetrics() + m.IncOrphans(3) + m.IncOrphans(2) + if m.orphans != 5 { + t.Fatalf("expected 5, got %d", m.orphans) + } +} + +func TestIncOrphansZeroAndNegativeIgnored(t *testing.T) { + m := newLfsMetrics() + m.IncOrphans(5) + m.IncOrphans(0) + m.IncOrphans(-1) + if m.orphans != 5 { + t.Fatalf("expected 5 (zero/negative ignored), got %d", m.orphans) + } +} + +func TestIncOrphansNilSafety(t *testing.T) { + var m *lfsMetrics + // Must not panic. + m.IncOrphans(1) +} + +// --------------------------------------------------------------------------- +// Tests for WritePrometheus +// --------------------------------------------------------------------------- + +func TestWritePrometheus(t *testing.T) { + m := newLfsMetrics() + m.ObserveUploadDuration(0.1) + m.AddUploadBytes(1024) + m.IncRequests("test-topic", "ok", "lfs") + m.IncS3Errors() + m.IncOrphans(2) + + var buf bytes.Buffer + m.WritePrometheus(&buf) + output := buf.String() + + expectedMetrics := []string{ + "kafscale_lfs_proxy_upload_duration_seconds", + "kafscale_lfs_proxy_upload_bytes_total", + "kafscale_lfs_proxy_requests_total", + "kafscale_lfs_proxy_s3_errors_total", + "kafscale_lfs_proxy_orphan_objects_total", + "kafscale_lfs_proxy_goroutines", + "kafscale_lfs_proxy_memory_alloc_bytes", + "kafscale_lfs_proxy_memory_sys_bytes", + "kafscale_lfs_proxy_gc_pause_total_ns", + } + for _, metric := range expectedMetrics { + if !strings.Contains(output, metric) { + t.Fatalf("expected output to contain %q", metric) + } + } + // Verify specific values. + if !strings.Contains(output, "kafscale_lfs_proxy_upload_bytes_total 1024") { + t.Fatal("expected upload_bytes_total 1024 in output") + } + if !strings.Contains(output, "kafscale_lfs_proxy_s3_errors_total 1") { + t.Fatal("expected s3_errors_total 1 in output") + } + if !strings.Contains(output, "kafscale_lfs_proxy_orphan_objects_total 2") { + t.Fatal("expected orphan_objects_total 2 in output") + } +} + +func TestWritePrometheusNilSafety(t *testing.T) { + var m *lfsMetrics + var buf bytes.Buffer + // Must not panic. + m.WritePrometheus(&buf) + if buf.Len() != 0 { + t.Fatalf("expected empty output for nil metrics, got %d bytes", buf.Len()) + } +} + +// --------------------------------------------------------------------------- +// Tests for histogram +// --------------------------------------------------------------------------- + +func TestHistogramObserve(t *testing.T) { + h := newHistogram([]float64{1, 5, 10}) + h.Observe(0.5) + h.Observe(3.0) + h.Observe(7.0) + h.Observe(15.0) + + buckets, counts, sum, count := h.Snapshot() + if count != 4 { + t.Fatalf("expected count=4, got %d", count) + } + expectedSum := 0.5 + 3.0 + 7.0 + 15.0 + if sum != expectedSum { + t.Fatalf("expected sum=%f, got %f", expectedSum, sum) + } + if len(buckets) != 3 { + t.Fatalf("expected 3 buckets, got %d", len(buckets)) + } + // counts has len(buckets)+1 entries: one per bucket boundary + overflow. + if len(counts) != 4 { + t.Fatalf("expected 4 count slots, got %d", len(counts)) + } +} + +func TestHistogramSnapshot(t *testing.T) { + h := newHistogram([]float64{1, 10}) + buckets, counts, sum, count := h.Snapshot() + if count != 0 || sum != 0 { + t.Fatal("expected empty histogram") + } + if len(buckets) != 2 || len(counts) != 3 { + t.Fatalf("expected 2 buckets / 3 counts, got %d / %d", len(buckets), len(counts)) + } +} + +func TestHistogramNilSafety(t *testing.T) { + var h *histogram + // Must not panic. + h.Observe(1.0) + buckets, counts, sum, count := h.Snapshot() + if buckets != nil || counts != nil || sum != 0 || count != 0 { + t.Fatal("expected zero values for nil histogram snapshot") + } +} + +func TestHistogramMultipleObservations(t *testing.T) { + h := newHistogram([]float64{1, 5, 10}) + for i := 0; i < 100; i++ { + h.Observe(float64(i) * 0.1) + } + _, _, sum, count := h.Snapshot() + if count != 100 { + t.Fatalf("expected count=100, got %d", count) + } + // Sum of 0.0, 0.1, 0.2, ..., 9.9 = 99*100/2 * 0.1 = 495.0 + expectedSum := 495.0 + if math.Abs(sum-expectedSum) > 0.001 { + t.Fatalf("expected sum~%f, got %f", expectedSum, sum) + } +} +