From af0e9d3062e82b95f4f2bf2dbe51894fa50b0dfa Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 2 Jun 2026 10:46:55 +0300 Subject: [PATCH 01/21] EAI-1500 update documentation --- .github/workflows/README.md | 52 +++++++++++++++++++++++++++++++++++++ README.md | 1 + 2 files changed, 53 insertions(+) create mode 100644 .github/workflows/README.md diff --git a/.github/workflows/README.md b/.github/workflows/README.md new file mode 100644 index 00000000..be81912d --- /dev/null +++ b/.github/workflows/README.md @@ -0,0 +1,52 @@ +# GitHub Actions Workflows + +This directory contains CI/CD workflows for cluster-forge. + +## Workflow files + +| Workflow | Trigger | Purpose | +|---|---|---| +| `helm-chart-checks.yaml` | `pull_request` | Validates Helm charts and Kyverno policy test coverage. | +| `pr-component-validation.yaml` | `pull_request` (path-filtered), `workflow_dispatch` | Validates SBOM/component sync when key files change. | +| `release-pipeline.yaml` | `workflow_dispatch` | Calculates release version, creates prerelease artifact, and publishes SBOM. | + +## Workflow details + +### `helm-chart-checks.yaml` + +- Runs on PR events (`opened`, `synchronize`, `reopened`, `ready_for_review`, `converted_to_draft`). +- Validates `root` chart with all sizing values files (`values`, `values_small`, `values_medium`, `values_large`). +- Lints and templates Kyverno policy charts. +- Enforces Kyverno test coverage (test folder, `kyverno-test.yaml`, resource files, and policy mapping). +- Runs `kyverno test` against generated policy manifests. +- Includes a comprehensive coverage job to ensure all charts under `sources/kyverno-policies` are included in CI. + +### `pr-component-validation.yaml` + +- Runs on manual dispatch and PRs to `main` when these files change: + - `sbom/components.yaml` + - `root/values.yaml` + - `sbom/*.sh` +- Installs `yq` and executes `sbom/validate-sync.sh`. +- Acts as a gate to keep SBOM/component definitions consistent. + +### `release-pipeline.yaml` + +- Manual workflow with optional input: `version_override`. +- Job `release`: + - Checks out full history. + - Computes next semantic version (`ietf-tools/semver-action`) unless overridden. + - Warns when `scripts/bootstrap.sh` `LATEST_RELEASE` base version does not match release base version. + - Packages `root/`, `scripts/`, and `sources/` into `release-enterprise-ai-.tar.gz`. + - Creates a GitHub prerelease with generated notes. +- Job `sbom` (depends on `release`): + - Generates SBOM via `sbom/generate-sbom.sh`. + - Renames output to `sbom--.md`. + - Uploads SBOM asset to the GitHub release with `--clobber`. + +## Operating notes + +- PR workflows perform validation only and do not publish releases. +- Use **Actions -> Release Pipeline -> Run workflow** to cut a release. +- Set `version_override` when you need a specific tag. +- Keep `LATEST_RELEASE` in `scripts/bootstrap.sh` aligned with the release stream to avoid warnings. diff --git a/README.md b/README.md index 1502c9f3..7160631d 100644 --- a/README.md +++ b/README.md @@ -189,6 +189,7 @@ Comprehensive documentation is available in the `/docs` folder: | **Policy System** | [Kyverno Modular Design](docs/kyverno_modular_design.md) | | **Storage Policies** | [Kyverno Access Mode Policy](docs/kyverno_access_mode_policy.md) | | **Operations** | [Backup and Restore](docs/backup_and_restore.md) | +| **CI/CD** | [Workflow Documentation](.github/workflows/README.md) | Additional documentation: - **SBOM**: See `/sbom` folder for software bill of materials generation and validation From 60e5acd61b948e3cc11263cc5c93cd2edd0ec8db Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Wed, 3 Jun 2026 17:36:20 +0300 Subject: [PATCH 02/21] EAI-5821 add ai-gateway config --- root/values.yaml | 51 + root/values_large.yaml | 3 + root/values_medium.yaml | 3 + root/values_small.yaml | 3 + .../envoy-ai-gateway-crds/v0.6.0/Chart.yaml | 20 + ...gateway.envoyproxy.io_aigatewayroutes.yaml | 1730 +++ ...teway.envoyproxy.io_aiservicebackends.yaml | 832 ++ ...envoyproxy.io_backendsecuritypolicies.yaml | 10127 ++++++++++++++++ ...igateway.envoyproxy.io_gatewayconfigs.yaml | 1422 +++ .../aigateway.envoyproxy.io_mcproutes.yaml | 9346 ++++++++++++++ ...aigateway.envoyproxy.io_quotapolicies.yaml | 497 + .../envoy-ai-gateway-crds/v0.6.0/values.yaml | 5 + sources/envoy-ai-gateway/v0.6.0/Chart.yaml | 20 + .../v0.6.0/templates/NOTES.txt | 0 .../v0.6.0/templates/_helpers.tpl | 133 + .../v0.6.0/templates/admission_webhook.yaml | 106 + .../v0.6.0/templates/deployment.yaml | 171 + ...gateway_cluster_role_for_ai_resources.yaml | 35 + ...teway_cluster_role_for_inference_pool.yaml | 38 + .../v0.6.0/templates/service.yaml | 20 + .../v0.6.0/templates/serviceaccount.yaml | 115 + sources/envoy-ai-gateway/v0.6.0/values.yaml | 263 + sources/envoy-gateway-config/README.md | 25 + .../templates/client-trafic-policy.yaml | 4 +- .../templates/envoy-proxy-access-logs.yaml | 11 +- .../templates/gateway-config-ai.yaml | 12 + .../templates/gateway.yaml | 8 +- .../templates/https-service.yaml | 4 + .../templates/tlsroute-k8s-passthrough.yaml | 11 +- sources/envoy-gateway-config/values.yaml | 12 +- .../v1.5.0/Chart.yaml | 14 + .../v1.5.0/templates/inferencepools.yaml | 373 + .../v1.5.0/values.yaml | 1 + .../collectors-logs-metrics-k8s.yaml | 27 + 34 files changed, 25430 insertions(+), 12 deletions(-) create mode 100644 sources/envoy-ai-gateway-crds/v0.6.0/Chart.yaml create mode 100644 sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml create mode 100644 sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_aiservicebackends.yaml create mode 100644 sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_backendsecuritypolicies.yaml create mode 100644 sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_gatewayconfigs.yaml create mode 100644 sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_mcproutes.yaml create mode 100644 sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_quotapolicies.yaml create mode 100644 sources/envoy-ai-gateway-crds/v0.6.0/values.yaml create mode 100644 sources/envoy-ai-gateway/v0.6.0/Chart.yaml create mode 100644 sources/envoy-ai-gateway/v0.6.0/templates/NOTES.txt create mode 100644 sources/envoy-ai-gateway/v0.6.0/templates/_helpers.tpl create mode 100644 sources/envoy-ai-gateway/v0.6.0/templates/admission_webhook.yaml create mode 100644 sources/envoy-ai-gateway/v0.6.0/templates/deployment.yaml create mode 100644 sources/envoy-ai-gateway/v0.6.0/templates/envoy_gateway_cluster_role_for_ai_resources.yaml create mode 100644 sources/envoy-ai-gateway/v0.6.0/templates/envoy_gateway_cluster_role_for_inference_pool.yaml create mode 100644 sources/envoy-ai-gateway/v0.6.0/templates/service.yaml create mode 100644 sources/envoy-ai-gateway/v0.6.0/templates/serviceaccount.yaml create mode 100644 sources/envoy-ai-gateway/v0.6.0/values.yaml create mode 100644 sources/envoy-gateway-config/README.md create mode 100644 sources/envoy-gateway-config/templates/gateway-config-ai.yaml create mode 100644 sources/inference-extension-crds/v1.5.0/Chart.yaml create mode 100644 sources/inference-extension-crds/v1.5.0/templates/inferencepools.yaml create mode 100644 sources/inference-extension-crds/v1.5.0/values.yaml diff --git a/root/values.yaml b/root/values.yaml index a0c65eb3..1a245c55 100644 --- a/root/values.yaml +++ b/root/values.yaml @@ -582,6 +582,40 @@ apps: syncWave: -30 valuesObject: kubernetesClusterDomain: cluster.local + config: + envoyGateway: + extensionApis: + enableBackend: true + extensionManager: + resources: + - group: aigateway.envoyproxy.io + version: v1beta1 + kind: AIGatewayRoute + - group: aigateway.envoyproxy.io + version: v1beta1 + kind: AIServiceBackend + backendResources: + - group: inference.networking.k8s.io + version: v1 + kind: InferencePool + hooks: + xdsTranslator: + translation: + listener: + includeAll: true + route: + includeAll: true + cluster: + includeAll: true + secret: + includeAll: true + post: + - Translation + - Cluster + - Route + service: + host: ai-gateway-controller.envoy-ai-gateway-system.svc.cluster.local + port: 1063 envoy-gateway-config: helmParameters: - name: domain @@ -590,6 +624,23 @@ apps: path: envoy-gateway-config syncWave: -15 valuesFile: values.yaml + envoy-ai-gateway-crds: + namespace: envoy-ai-gateway-system + path: envoy-ai-gateway-crds/v0.6.0 + syncWave: -10 + envoy-ai-gateway: + namespace: envoy-ai-gateway-system + path: envoy-ai-gateway/v0.6.0 + syncWave: -5 + valuesObject: + controller: + mcp: + sessionEncryption: + seed: "cluster-forge-default-seed-override-in-production" + inference-extension-crds: + namespace: envoy-ai-gateway-system + path: inference-extension-crds/v1.5.0 + syncWave: -10 kserve: namespace: kserve-system path: kserve/v0.16.0 diff --git a/root/values_large.yaml b/root/values_large.yaml index e98ee421..b006ec63 100644 --- a/root/values_large.yaml +++ b/root/values_large.yaml @@ -20,8 +20,11 @@ enabledApps: - cnpg-operator - external-secrets - external-secrets-config + - inference-extension-crds - envoy-gateway - envoy-gateway-config + - envoy-ai-gateway-crds + - envoy-ai-gateway - gitea - gitea-config - kaiwo diff --git a/root/values_medium.yaml b/root/values_medium.yaml index 4429597a..32ee9ea7 100644 --- a/root/values_medium.yaml +++ b/root/values_medium.yaml @@ -22,8 +22,11 @@ enabledApps: - cnpg-operator - external-secrets - external-secrets-config + - inference-extension-crds - envoy-gateway - envoy-gateway-config + - envoy-ai-gateway-crds + - envoy-ai-gateway - gitea - gitea-config - kaiwo diff --git a/root/values_small.yaml b/root/values_small.yaml index e995aec6..9fdc6cb6 100644 --- a/root/values_small.yaml +++ b/root/values_small.yaml @@ -27,8 +27,11 @@ enabledApps: - cnpg-operator - external-secrets - external-secrets-config + - inference-extension-crds - envoy-gateway - envoy-gateway-config + - envoy-ai-gateway-crds + - envoy-ai-gateway - gitea - gitea-config - kaiwo diff --git a/sources/envoy-ai-gateway-crds/v0.6.0/Chart.yaml b/sources/envoy-ai-gateway-crds/v0.6.0/Chart.yaml new file mode 100644 index 00000000..44caf635 --- /dev/null +++ b/sources/envoy-ai-gateway-crds/v0.6.0/Chart.yaml @@ -0,0 +1,20 @@ +apiVersion: v2 +appVersion: v0.6.0 +description: The Helm chart for Envoy AI Gateway CRD +home: https://aigateway.envoyproxy.io/ +icon: https://raw.githubusercontent.com/envoyproxy/ai-gateway/refs/heads/main/site/static/img/logo.svg +keywords: +- gateway-api +- envoyproxy +- envoy-gateway +- eg +- ai-gateway +- ai +maintainers: +- name: envoy-ai-gateway-maintainers + url: https://github.com/envoyproxy/ai-gateway/blob/main/CODEOWNERS +name: ai-gateway-crds-helm +sources: +- https://github.com/envoyproxy/ai-gateway +type: application +version: v0.6.0 diff --git a/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml b/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml new file mode 100644 index 00000000..83705814 --- /dev/null +++ b/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml @@ -0,0 +1,1730 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.0 + name: aigatewayroutes.aigateway.envoyproxy.io +spec: + group: aigateway.envoyproxy.io + names: + kind: AIGatewayRoute + listKind: AIGatewayRouteList + plural: aigatewayroutes + singular: aigatewayroute + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.conditions[-1:].type + name: Status + type: string + deprecated: true + deprecationWarning: aigateway.envoyproxy.io/v1alpha1 is deprecated; use aigateway.envoyproxy.io/v1beta1 + instead + name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + AIGatewayRoute combines multiple AIServiceBackends and attaching them to Gateway(s) resources. + + This serves as a way to define a "unified" AI API for a Gateway which allows downstream + clients to use a single schema API to interact with multiple AI backends. + + Envoy AI Gateway will generate the following k8s resources corresponding to the AIGatewayRoute: + + - HTTPRoute of the Gateway API as a top-level resource to bind all backends. + The name of the HTTPRoute is the same as the AIGatewayRoute. + - HTTPRouteFilter of the Envoy Gateway API per namespace for automatic hostname rewrite. + The name of the HTTPRouteFilter is `ai-eg-host-rewrite-${AIGatewayRoute.Name}`. + + All of these resources are created in the same namespace as the AIGatewayRoute. Note that this is the implementation + detail subject to change. If you want to customize the default behavior of the Envoy AI Gateway, you can use these + resources as a reference and create your own resources. Alternatively, you can use EnvoyPatchPolicy API of the Envoy + Gateway to patch the generated resources. For example, you can configure the retry fallback behavior by attaching + BackendTrafficPolicy API of Envoy Gateway to the generated HTTPRoute. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Spec defines the details of the AIGatewayRoute. + properties: + llmRequestCosts: + description: "LLMRequestCosts specifies how to capture the cost of + the LLM-related request, notably the token usage.\nThe AI Gateway + filter will capture each specified number and store it in the Envoy's + dynamic\nmetadata per HTTP request. The namespaced key is \"io.envoy.ai_gateway\".\n\nThese + route-level costs override any global defaults defined in GatewayConfig.Spec.GlobalLLMRequestCosts\nfor + the same metadataKey. If a metadataKey is not defined in either + place, no cost is calculated for it.\n\nThis allows you to define + common cost formulas once at the gateway level (e.g., via GatewayConfig)\nand + only override them in specific routes when needed (e.g., premium + routes with different pricing).\n\nFor example, let's say we have + the following LLMRequestCosts configuration:\n```yaml\n\tllmRequestCosts:\n\t- + metadataKey: llm_input_token\n\t type: InputToken\n\t- metadataKey: + llm_output_token\n\t type: OutputToken\n\t- metadataKey: llm_total_token\n\t + \ type: TotalToken\n\t- metadataKey: llm_cached_input_token\n\t + \ type: CachedInputToken\n- metadataKey: llm_cache_creation_input_token\n + \ type: CacheCreationInputToken\n```\nThen, with the following + BackendTrafficPolicy of Envoy Gateway, you can have three\nrate + limit buckets for each unique x-tenant-id header value. One bucket + is for the input token,\nthe other is for the output token, and + the last one is for the total token.\nEach bucket will be reduced + by the corresponding token usage captured by the AI Gateway filter.\n\n```yaml\n\tapiVersion: + gateway.envoyproxy.io/v1alpha1\n\tkind: BackendTrafficPolicy\n\tmetadata:\n\t + \ name: some-example-token-rate-limit\n\t namespace: default\n\tspec:\n\t + \ targetRefs:\n\t - group: gateway.networking.k8s.io\n\t kind: + HTTPRoute\n\t name: usage-rate-limit\n\t rateLimit:\n\t type: + Global\n\t global:\n\t rules:\n\t - clientSelectors:\n\t + \ # Do the rate limiting based on the x-tenant-id header.\n\t + \ - headers:\n\t - name: x-tenant-id\n\t + \ type: Distinct\n\t limit:\n\t # + Configures the number of \"tokens\" allowed per hour.\n\t requests: + 10000\n\t unit: Hour\n\t cost:\n\t request:\n\t + \ from: Number\n\t # Setting the request + cost to zero allows to only check the rate limit budget,\n\t # + and not consume the budget on the request path.\n\t number: + 0\n\t # This specifies the cost of the response retrieved + from the dynamic metadata set by the AI Gateway filter.\n\t # + The extracted value will be used to consume the rate limit budget, + and subsequent requests will be rate limited\n\t # if + the budget is exhausted.\n\t response:\n\t from: + Metadata\n\t metadata:\n\t namespace: + io.envoy.ai_gateway\n\t key: llm_input_token\n\t + \ - clientSelectors:\n\t - headers:\n\t - + name: x-tenant-id\n\t type: Distinct\n\t limit:\n\t + \ requests: 10000\n\t unit: Hour\n\t cost:\n\t + \ request:\n\t from: Number\n\t number: + 0\n\t response:\n\t from: Metadata\n\t metadata:\n\t + \ namespace: io.envoy.ai_gateway\n\t key: + llm_output_token\n\t - clientSelectors:\n\t - + headers:\n\t - name: x-tenant-id\n\t type: + Distinct\n\t limit:\n\t requests: 10000\n\t + \ unit: Hour\n\t cost:\n\t request:\n\t + \ from: Number\n\t number: 0\n\t response:\n\t + \ from: Metadata\n\t metadata:\n\t namespace: + io.envoy.ai_gateway\n\t key: llm_total_token\n```\n\nNote + that when multiple AIGatewayRoute resources are attached to the + same Gateway, and\ndifferent costs are configured for the same metadata + key, each route's rule is carried in\nthe filter configuration with + the route identity; the data plane selects the matching rule\nper + request (by route), so each route can define its own cost for the + same metadata key." + items: + description: LLMRequestCost configures each request cost. + properties: + cel: + description: "CEL is the CEL expression to calculate the cost + of the request.\nThe CEL expression must return a signed or + unsigned integer. If the\nreturn value is negative, it will + be error.\n\nThe expression can use the following variables:\n\n\t* + model: the model name extracted from the request content. + Type: string.\n\t* backend: the backend name in the form of + \"name.namespace\". Type: string.\n\t* input_tokens: the number + of input tokens. Type: unsigned integer.\n\t* cached_input_tokens: + the number of cached read input tokens. Type: unsigned integer.\n\t* + cache_creation_input_tokens: the number of cache creation + input tokens. Type: unsigned integer.\n\t* output_tokens: + the number of output tokens. Type: unsigned integer.\n\t* + total_tokens: the total number of tokens. Type: unsigned integer.\n\t* + reasoning_tokens: the number of reasoning tokens. Type: unsigned + integer.\n\nFor example, the following expressions are valid:\n\n\t* + \"model == 'llama' ? input_tokens + output_token * 0.5 : + total_tokens\"\n\t* \"backend == 'foo.default' ? input_tokens + + output_tokens : total_tokens\"\n\t* \"backend == 'bar.default' + ? (input_tokens - cached_input_tokens) + cached_input_tokens + * 0.1 + cache_creation_input_tokens * 1.25 + output_tokens + : total_tokens\"\n\t* \"input_tokens + output_tokens + total_tokens\"\n\t* + \"input_tokens * output_tokens\"" + type: string + metadataKey: + description: MetadataKey is the key of the metadata to store + this cost of the request. + type: string + type: + description: |- + Type specifies the type of the request cost. The default is "OutputToken", + and it uses "output token" as the cost. The other types are "InputToken", "TotalToken", + "CachedInputToken", "CacheCreationInputToken", "ReasoningToken", and "CEL". + enum: + - OutputToken + - InputToken + - CachedInputToken + - CacheCreationInputToken + - TotalToken + - ReasoningToken + - CEL + type: string + required: + - metadataKey + - type + type: object + maxItems: 36 + type: array + parentRefs: + description: |- + ParentRefs are the names of the Gateway resources this AIGatewayRoute is being attached to. + Currently, each reference's Kind must be Gateway. + items: + description: |- + ParentReference identifies an API object (usually a Gateway) that can be considered + a parent of this resource (usually a route). There are two kinds of parent resources + with "Core" support: + + * Gateway (Gateway conformance profile) + * Service (Mesh conformance profile, ClusterIP Services only) + + This API may be extended in the future to support additional kinds of parent + resources. + + The API object must be valid in the cluster; the Group and Kind must + be registered in the cluster for this reference to be valid. + properties: + group: + default: gateway.networking.k8s.io + description: |- + Group is the group of the referent. + When unspecified, "gateway.networking.k8s.io" is inferred. + To set the core API group (such as for a "Service" kind referent), + Group must be explicitly set to "" (empty string). + + Support: Core + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Gateway + description: |- + Kind is kind of the referent. + + There are two kinds of parent resources with "Core" support: + + * Gateway (Gateway conformance profile) + * Service (Mesh conformance profile, ClusterIP Services only) + + Support for other resources is Implementation-Specific. + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: |- + Name is the name of the referent. + + Support: Core + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referent. When unspecified, this refers + to the local namespace of the Route. + + Note that there are specific rules for ParentRefs which cross namespace + boundaries. Cross-namespace references are only valid if they are explicitly + allowed by something in the namespace they are referring to. For example: + Gateway has the AllowedRoutes field, and ReferenceGrant provides a + generic way to enable any other kind of cross-namespace reference. + + + ParentRefs from a Route to a Service in the same namespace are "producer" + routes, which apply default routing rules to inbound connections from + any namespace to the Service. + + ParentRefs from a Route to a Service in a different namespace are + "consumer" routes, and these routing rules are only applied to outbound + connections originating from the same namespace as the Route, for which + the intended destination of the connections are a Service targeted as a + ParentRef of the Route. + + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port is the network port this Route targets. It can be interpreted + differently based on the type of parent resource. + + When the parent resource is a Gateway, this targets all listeners + listening on the specified port that also support this kind of Route(and + select this Route). It's not recommended to set `Port` unless the + networking behaviors specified in a Route must apply to a specific port + as opposed to a listener(s) whose port(s) may be changed. When both Port + and SectionName are specified, the name and port of the selected listener + must match both specified values. + + + When the parent resource is a Service, this targets a specific port in the + Service spec. When both Port (experimental) and SectionName are specified, + the name and port of the selected port must match both specified values. + + + Implementations MAY choose to support other parent resources. + Implementations supporting other types of parent resources MUST clearly + document how/if Port is interpreted. + + For the purpose of status, an attachment is considered successful as + long as the parent resource accepts it partially. For example, Gateway + listeners can restrict which Routes can attach to them by Route kind, + namespace, or hostname. If 1 of 2 Gateway listeners accept attachment + from the referencing Route, the Route MUST be considered successfully + attached. If no Gateway listeners accept attachment from this Route, + the Route MUST be considered detached from the Gateway. + + Support: Extended + format: int32 + maximum: 65535 + minimum: 1 + type: integer + sectionName: + description: |- + SectionName is the name of a section within the target resource. In the + following resources, SectionName is interpreted as the following: + + * Gateway: Listener name. When both Port (experimental) and SectionName + are specified, the name and port of the selected listener must match + both specified values. + * Service: Port name. When both Port (experimental) and SectionName + are specified, the name and port of the selected listener must match + both specified values. + + Implementations MAY choose to support attaching Routes to other resources. + If that is the case, they MUST clearly document how SectionName is + interpreted. + + When unspecified (empty string), this will reference the entire resource. + For the purpose of status, an attachment is considered successful if at + least one section in the parent resource accepts it. For example, Gateway + listeners can restrict which Routes can attach to them by Route kind, + namespace, or hostname. If 1 of 2 Gateway listeners accept attachment from + the referencing Route, the Route MUST be considered successfully + attached. If no Gateway listeners accept attachment from this Route, the + Route MUST be considered detached from the Gateway. + + Support: Core + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + required: + - name + type: object + maxItems: 16 + type: array + x-kubernetes-validations: + - message: only Gateway is supported + rule: self.all(match, match.kind == 'Gateway') + rules: + description: |- + Rules is the list of AIGatewayRouteRule that this AIGatewayRoute will match the traffic to. + Each rule is a subset of the HTTPRoute in the Gateway API (https://gateway-api.sigs.k8s.io/api-types/httproute/). + + AI Gateway controller will generate a HTTPRoute based on the configuration given here with the additional + modifications to achieve the necessary jobs, notably inserting the AI Gateway filter responsible for + the transformation of the request and response, etc. + + In the matching conditions in the AIGatewayRouteRule, `x-ai-eg-model` header is available + if we want to describe the routing behavior based on the model name. The model name is extracted + from the request content before the routing decision. + + How multiple rules are matched is the same as the Gateway API. See for the details: + https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.HTTPRoute + items: + description: AIGatewayRouteRule is a rule that defines the routing + behavior of the AIGatewayRoute. + properties: + backendRefs: + description: |- + BackendRefs is the list of backends that this rule will route the traffic to. + Each backend can have a weight that determines the traffic distribution. + + The namespace of each backend defaults to the same namespace as the AIGatewayRoute when not specified. + Cross-namespace references are supported by specifying the namespace field. + When a namespace different than the AIGatewayRoute's namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. + + BackendRefs can reference either AIServiceBackend resources (default) or InferencePool resources + from the Gateway API Inference Extension. When referencing InferencePool resources: + - Only one InferencePool backend is allowed per rule + - Cannot mix InferencePool with AIServiceBackend references in the same rule + - Fallback behavior is handled by the InferencePool's endpoint picker + + For AIServiceBackend references, you can achieve fallback behavior by configuring multiple backends + combined with the BackendTrafficPolicy of Envoy Gateway. + Please refer to https://gateway.envoyproxy.io/docs/tasks/traffic/failover/ as well as + https://gateway.envoyproxy.io/docs/tasks/traffic/retry/. + items: + description: |- + AIGatewayRouteRuleBackendRef is a reference to a backend with a weight. + It can reference either an AIServiceBackend or an InferencePool resource. + properties: + bodyMutation: + description: |- + BodyMutation defines the request body mutation to be applied to this backend. + This allows modification of JSON fields in the request body before sending to the backend. + When both route-level and backend-level BodyMutation are defined, + route-level takes precedence over backend-level for conflicting operations. + This field is ignored when referencing InferencePool resources. + properties: + remove: + description: |- + Remove the given JSON field(s) from the HTTP request body before sending to the backend. + The value of Remove is a list of top-level field names to remove. + + Input: + { + "model": "gpt-4", + "service_tier": "default", + "internal_flag": true + } + + Config: + remove: ["service_tier", "internal_flag"] + + Output: + { + "model": "gpt-4" + } + items: + type: string + maxItems: 16 + type: array + x-kubernetes-list-type: set + set: + description: |- + Set overwrites/adds the request body with the given JSON field (name, value) + before sending to the backend. Only top-level fields are currently supported. + + Input: + { + "model": "gpt-4", + "service_tier": "default" + } + + Config: + set: + - path: "service_tier" + value: "scale" + + Output: + { + "model": "gpt-4", + "service_tier": "scale" + } + items: + description: HTTPBodyField represents a JSON field + name and value for body mutation + properties: + path: + description: |- + Path is the top-level field name to set in the request body. + Examples: "service_tier", "max_tokens", "temperature" + minLength: 1 + type: string + value: + description: |- + Value is the JSON value to set at the specified field. This can be any valid JSON value: + string, number, boolean, object, array, or null. + The value will be parsed as JSON and inserted at the specified field. + + Examples: + - "\"scale\"" (string) + - "42" (number) + - "true" (boolean) + - "{\"key\": \"value\"}" (object) + - "[1, 2, 3]" (array) + - "null" (null) + type: string + required: + - path + - value + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - path + x-kubernetes-list-type: map + type: object + group: + description: |- + Group is the group of the backend resource. + When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend). + Currently, only "inference.networking.k8s.io" is supported for InferencePool resources. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + headerMutation: + description: |- + HeaderMutation defines the request header mutation to be applied to this backend. + When both route-level and backend-level HeaderMutation are defined, + route-level takes precedence over backend-level for conflicting operations. + This field is ignored when referencing InferencePool resources. + properties: + remove: + description: |- + Remove the given header(s) from the HTTP request before the action. The + value of Remove is a list of HTTP header names. Note that the header + names are case-insensitive (see + https://datatracker.ietf.org/doc/html/rfc2616#section-4.2). + + Input: + GET /foo HTTP/1.1 + my-header1: foo + my-header2: bar + my-header3: baz + + Config: + remove: ["my-header1", "my-header3"] + + Output: + GET /foo HTTP/1.1 + my-header2: bar + items: + type: string + maxItems: 16 + type: array + x-kubernetes-list-type: set + set: + description: |- + Set overwrites/adds the request with the given header (name, value) + before the action. + + Input: + GET /foo HTTP/1.1 + my-header: foo + + Config: + set: + - name: "my-header" + value: "bar" + + Output: + GET /foo HTTP/1.1 + my-header: bar + items: + description: HTTPHeader represents an HTTP Header + name and value as defined by RFC 7230. + properties: + name: + description: |- + Name is the name of the HTTP Header to be matched. Name matching MUST be + case-insensitive. (See https://tools.ietf.org/html/rfc7230#section-3.2). + + If multiple entries specify equivalent header names, the first entry with + an equivalent name MUST be considered for a match. Subsequent entries + with an equivalent header name MUST be ignored. Due to the + case-insensitivity of header names, "foo" and "Foo" are considered + equivalent. + maxLength: 256 + minLength: 1 + pattern: ^[A-Za-z0-9!#$%&'*+\-.^_\x60|~]+$ + type: string + value: + description: Value is the value of HTTP Header + to be matched. + maxLength: 4096 + minLength: 1 + type: string + required: + - name + - value + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + kind: + description: |- + Kind is the kind of the backend resource. + When not specified, defaults to AIServiceBackend. + Currently, only "InferencePool" is supported when Group is specified. + maxLength: 63 + pattern: ^$|^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + modelNameOverride: + description: |- + Name of the model in the backend. If provided this will override the name provided in the request. + This field is ignored when referencing InferencePool resources. + type: string + name: + description: |- + Name is the name of the backend resource. + When Group and Kind are not specified, this refers to an AIServiceBackend. + When Group and Kind are specified, this refers to the resource of the specified type. + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend resource. + When unspecified (or empty string), this refers to the local namespace of the AIGatewayRoute. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + priority: + default: 0 + description: |- + Priority is the priority of the backend. This sets the priority on the underlying endpoints. + See: https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/priority + Note: This will override the `faillback` property of the underlying Envoy Gateway Backend + This field is ignored when referencing InferencePool resources. + + Default is 0. + format: int32 + minimum: 0 + type: integer + weight: + default: 1 + description: |- + Weight is the weight of the backend. This is exactly the same as the weight in + the BackendRef in the Gateway API. See for the details: + https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.BackendRef + + Default is 1. + format: int32 + minimum: 0 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: group and kind must be specified together + rule: '!has(self.group) && !has(self.kind) || (has(self.group) + && has(self.kind))' + - message: only InferencePool from inference.networking.k8s.io + group is supported + rule: '!has(self.group) || (self.group == ''inference.networking.k8s.io'' + && self.kind == ''InferencePool'')' + maxItems: 128 + type: array + matches: + description: |- + Matches is the list of AIGatewayRouteMatch that this rule will match the traffic to. + This is a subset of the HTTPRouteMatch in the Gateway API. See for the details: + https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.HTTPRouteMatch + items: + properties: + headers: + description: |- + Headers specifies HTTP request header matchers. See HeaderMatch in the Gateway API for the details: + https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.HTTPHeaderMatch + items: + description: |- + HTTPHeaderMatch describes how to select a HTTP route by matching HTTP request + headers. + properties: + name: + description: |- + Name is the name of the HTTP Header to be matched. Name matching MUST be + case-insensitive. (See https://tools.ietf.org/html/rfc7230#section-3.2). + + If multiple entries specify equivalent header names, only the first + entry with an equivalent name MUST be considered for a match. Subsequent + entries with an equivalent header name MUST be ignored. Due to the + case-insensitivity of header names, "foo" and "Foo" are considered + equivalent. + + When a header is repeated in an HTTP request, it is + implementation-specific behavior as to how this is represented. + Generally, proxies should follow the guidance from the RFC: + https://www.rfc-editor.org/rfc/rfc7230.html#section-3.2.2 regarding + processing a repeated header, with special handling for "Set-Cookie". + maxLength: 256 + minLength: 1 + pattern: ^[A-Za-z0-9!#$%&'*+\-.^_\x60|~]+$ + type: string + type: + default: Exact + description: |- + Type specifies how to match against the value of the header. + + Support: Core (Exact) + + Support: Implementation-specific (RegularExpression) + + Since RegularExpression HeaderMatchType has implementation-specific + conformance, implementations can support POSIX, PCRE or any other dialects + of regular expressions. Please read the implementation's documentation to + determine the supported dialect. + enum: + - Exact + - RegularExpression + type: string + value: + description: Value is the value of HTTP Header to + be matched. + maxLength: 4096 + minLength: 1 + type: string + required: + - name + - value + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + maxItems: 128 + type: array + modelsCreatedAt: + description: |- + ModelsCreatedAt represents the creation timestamp of the running models serving by the backends, + which will be exported as the field of "Created" in openai-compatible API "/models". + It follows the format of RFC 3339, for example "2024-05-21T10:00:00Z". + + This is used only when this rule contains "x-ai-eg-model" in its header matching + where the header value will be recognized as a "model" in "/models" endpoint. + All the matched models will share the same creation time. + + Default to the creation timestamp of the AIGatewayRoute if not set. + format: date-time + type: string + modelsOwnedBy: + default: Envoy AI Gateway + description: |- + ModelsOwnedBy represents the owner of the running models serving by the backends, + which will be exported as the field of "OwnedBy" in openai-compatible API "/models". + + This is used only when this rule contains "x-ai-eg-model" in its header matching + where the header value will be recognized as a "model" in "/models" endpoint. + All the matched models will share the same owner. + + Default to "Envoy AI Gateway" if not set. + type: string + timeouts: + description: |- + Timeouts defines the timeouts that can be configured for an HTTP request. + + If this field is not set, or the timeout.requestTimeout is nil, Envoy AI Gateway defaults to + set 60s for the request timeout as opposed to 15s of the Envoy Gateway's default value. + + For streaming responses (like chat completions with stream=true), consider setting + longer timeouts as the response may take time until the completion. + properties: + backendRequest: + description: |- + BackendRequest specifies a timeout for an individual request from the gateway + to a backend. This covers the time from when the request first starts being + sent from the gateway to when the full response has been received from the backend. + + Setting a timeout to the zero duration (e.g. "0s") SHOULD disable the timeout + completely. Implementations that cannot completely disable the timeout MUST + instead interpret the zero duration as the longest possible value to which + the timeout can be set. + + An entire client HTTP transaction with a gateway, covered by the Request timeout, + may result in more than one call from the gateway to the destination backend, + for example, if automatic retries are supported. + + The value of BackendRequest must be a Gateway API Duration string as defined by + GEP-2257. When this field is unspecified, its behavior is implementation-specific; + when specified, the value of BackendRequest must be no more than the value of the + Request timeout (since the Request timeout encompasses the BackendRequest timeout). + + Support: Extended + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + request: + description: |- + Request specifies the maximum duration for a gateway to respond to an HTTP request. + If the gateway has not been able to respond before this deadline is met, the gateway + MUST return a timeout error. + + For example, setting the `rules.timeouts.request` field to the value `10s` in an + `HTTPRoute` will cause a timeout if a client request is taking longer than 10 seconds + to complete. + + Setting a timeout to the zero duration (e.g. "0s") SHOULD disable the timeout + completely. Implementations that cannot completely disable the timeout MUST + instead interpret the zero duration as the longest possible value to which + the timeout can be set. + + This timeout is intended to cover as close to the whole request-response transaction + as possible although an implementation MAY choose to start the timeout after the entire + request stream has been received instead of immediately after the transaction is + initiated by the client. + + The value of Request is a Gateway API Duration string as defined by GEP-2257. When this + field is unspecified, request timeout behavior is implementation-specific. + + Support: Extended + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + x-kubernetes-validations: + - message: backendRequest timeout cannot be longer than request + timeout + rule: '!(has(self.request) && has(self.backendRequest) && + duration(self.request) != duration(''0s'') && duration(self.backendRequest) + > duration(self.request))' + type: object + x-kubernetes-validations: + - message: cannot mix InferencePool and AIServiceBackend references + in the same rule + rule: '!has(self.backendRefs) || size(self.backendRefs) == 0 || + (self.backendRefs.all(ref, !has(ref.group) && !has(ref.kind)) + || self.backendRefs.all(ref, has(ref.group) && has(ref.kind)))' + - message: only one InferencePool backend is allowed per rule + rule: '!has(self.backendRefs) || size(self.backendRefs) == 0 || + !self.backendRefs.exists(ref, has(ref.group) && has(ref.kind)) + || size(self.backendRefs) == 1' + maxItems: 128 + type: array + required: + - rules + type: object + status: + description: Status defines the status details of the AIGatewayRoute. + properties: + conditions: + description: |- + Conditions is the list of conditions by the reconciliation result. + Currently, at most one condition is set. + + Known .status.conditions.type are: "Accepted", "NotAccepted". + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: false + subresources: + status: {} + - additionalPrinterColumns: + - jsonPath: .status.conditions[-1:].type + name: Status + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + AIGatewayRoute combines multiple AIServiceBackends and attaching them to Gateway(s) resources. + + This serves as a way to define a "unified" AI API for a Gateway which allows downstream + clients to use a single schema API to interact with multiple AI backends. + + Envoy AI Gateway will generate the following k8s resources corresponding to the AIGatewayRoute: + + - HTTPRoute of the Gateway API as a top-level resource to bind all backends. + The name of the HTTPRoute is the same as the AIGatewayRoute. + - HTTPRouteFilter of the Envoy Gateway API per namespace for automatic hostname rewrite. + The name of the HTTPRouteFilter is `ai-eg-host-rewrite-${AIGatewayRoute.Name}`. + + All of these resources are created in the same namespace as the AIGatewayRoute. Note that this is the implementation + detail subject to change. If you want to customize the default behavior of the Envoy AI Gateway, you can use these + resources as a reference and create your own resources. Alternatively, you can use EnvoyPatchPolicy API of the Envoy + Gateway to patch the generated resources. For example, you can configure the retry fallback behavior by attaching + BackendTrafficPolicy API of Envoy Gateway to the generated HTTPRoute. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Spec defines the details of the AIGatewayRoute. + properties: + llmRequestCosts: + description: "LLMRequestCosts specifies how to capture the cost of + the LLM-related request, notably the token usage.\nThe AI Gateway + filter will capture each specified number and store it in the Envoy's + dynamic\nmetadata per HTTP request. The namespaced key is \"io.envoy.ai_gateway\".\n\nThese + route-level costs override any global defaults defined in GatewayConfig.Spec.GlobalLLMRequestCosts\nfor + the same metadataKey. If a metadataKey is not defined in either + place, no cost is calculated for it.\n\nThis allows you to define + common cost formulas once at the gateway level (e.g., via GatewayConfig)\nand + only override them in specific routes when needed (e.g., premium + routes with different pricing).\n\nFor example, let's say we have + the following LLMRequestCosts configuration:\n```yaml\n\tllmRequestCosts:\n\t- + metadataKey: llm_input_token\n\t type: InputToken\n\t- metadataKey: + llm_output_token\n\t type: OutputToken\n\t- metadataKey: llm_total_token\n\t + \ type: TotalToken\n\t- metadataKey: llm_cached_input_token\n\t + \ type: CachedInputToken\n- metadataKey: llm_cache_creation_input_token\n + \ type: CacheCreationInputToken\n```\nThen, with the following + BackendTrafficPolicy of Envoy Gateway, you can have three\nrate + limit buckets for each unique x-tenant-id header value. One bucket + is for the input token,\nthe other is for the output token, and + the last one is for the total token.\nEach bucket will be reduced + by the corresponding token usage captured by the AI Gateway filter.\n\n```yaml\n\tapiVersion: + gateway.envoyproxy.io/v1alpha1\n\tkind: BackendTrafficPolicy\n\tmetadata:\n\t + \ name: some-example-token-rate-limit\n\t namespace: default\n\tspec:\n\t + \ targetRefs:\n\t - group: gateway.networking.k8s.io\n\t kind: + HTTPRoute\n\t name: usage-rate-limit\n\t rateLimit:\n\t type: + Global\n\t global:\n\t rules:\n\t - clientSelectors:\n\t + \ # Do the rate limiting based on the x-tenant-id header.\n\t + \ - headers:\n\t - name: x-tenant-id\n\t + \ type: Distinct\n\t limit:\n\t # + Configures the number of \"tokens\" allowed per hour.\n\t requests: + 10000\n\t unit: Hour\n\t cost:\n\t request:\n\t + \ from: Number\n\t # Setting the request + cost to zero allows to only check the rate limit budget,\n\t # + and not consume the budget on the request path.\n\t number: + 0\n\t # This specifies the cost of the response retrieved + from the dynamic metadata set by the AI Gateway filter.\n\t # + The extracted value will be used to consume the rate limit budget, + and subsequent requests will be rate limited\n\t # if + the budget is exhausted.\n\t response:\n\t from: + Metadata\n\t metadata:\n\t namespace: + io.envoy.ai_gateway\n\t key: llm_input_token\n\t + \ - clientSelectors:\n\t - headers:\n\t - + name: x-tenant-id\n\t type: Distinct\n\t limit:\n\t + \ requests: 10000\n\t unit: Hour\n\t cost:\n\t + \ request:\n\t from: Number\n\t number: + 0\n\t response:\n\t from: Metadata\n\t metadata:\n\t + \ namespace: io.envoy.ai_gateway\n\t key: + llm_output_token\n\t - clientSelectors:\n\t - + headers:\n\t - name: x-tenant-id\n\t type: + Distinct\n\t limit:\n\t requests: 10000\n\t + \ unit: Hour\n\t cost:\n\t request:\n\t + \ from: Number\n\t number: 0\n\t response:\n\t + \ from: Metadata\n\t metadata:\n\t namespace: + io.envoy.ai_gateway\n\t key: llm_total_token\n```\n\nNote + that when multiple AIGatewayRoute resources are attached to the + same Gateway, and\ndifferent costs are configured for the same metadata + key, each route's rule is carried in\nthe filter configuration with + the route identity; the data plane selects the matching rule\nper + request (by route), so each route can define its own cost for the + same metadata key." + items: + description: LLMRequestCost configures each request cost. + properties: + cel: + description: "CEL is the CEL expression to calculate the cost + of the request.\nThe CEL expression must return a signed or + unsigned integer. If the\nreturn value is negative, it will + be error.\n\nThe expression can use the following variables:\n\n\t* + model: the model name extracted from the request content. + Type: string.\n\t* backend: the backend name in the form of + \"name.namespace\". Type: string.\n\t* input_tokens: the number + of input tokens. Type: unsigned integer.\n\t* cached_input_tokens: + the number of cached read input tokens. Type: unsigned integer.\n\t* + cache_creation_input_tokens: the number of cache creation + input tokens. Type: unsigned integer.\n\t* output_tokens: + the number of output tokens. Type: unsigned integer.\n\t* + total_tokens: the total number of tokens. Type: unsigned integer.\n\t* + reasoning_tokens: the number of reasoning tokens. Type: unsigned + integer.\n\nFor example, the following expressions are valid:\n\n\t* + \"model == 'llama' ? input_tokens + output_token * 0.5 : + total_tokens\"\n\t* \"backend == 'foo.default' ? input_tokens + + output_tokens : total_tokens\"\n\t* \"backend == 'bar.default' + ? (input_tokens - cached_input_tokens) + cached_input_tokens + * 0.1 + cache_creation_input_tokens * 1.25 + output_tokens + : total_tokens\"\n\t* \"input_tokens + output_tokens + total_tokens\"\n\t* + \"input_tokens * output_tokens\"" + type: string + metadataKey: + description: MetadataKey is the key of the metadata to store + this cost of the request. + type: string + type: + description: |- + Type specifies the type of the request cost. The default is "OutputToken", + and it uses "output token" as the cost. The other types are "InputToken", "TotalToken", + "CachedInputToken", "CacheCreationInputToken", "ReasoningToken", and "CEL". + enum: + - OutputToken + - InputToken + - CachedInputToken + - CacheCreationInputToken + - TotalToken + - ReasoningToken + - CEL + type: string + required: + - metadataKey + - type + type: object + maxItems: 36 + type: array + parentRefs: + description: |- + ParentRefs are the names of the Gateway resources this AIGatewayRoute is being attached to. + Currently, each reference's Kind must be Gateway. + items: + description: |- + ParentReference identifies an API object (usually a Gateway) that can be considered + a parent of this resource (usually a route). There are two kinds of parent resources + with "Core" support: + + * Gateway (Gateway conformance profile) + * Service (Mesh conformance profile, ClusterIP Services only) + + This API may be extended in the future to support additional kinds of parent + resources. + + The API object must be valid in the cluster; the Group and Kind must + be registered in the cluster for this reference to be valid. + properties: + group: + default: gateway.networking.k8s.io + description: |- + Group is the group of the referent. + When unspecified, "gateway.networking.k8s.io" is inferred. + To set the core API group (such as for a "Service" kind referent), + Group must be explicitly set to "" (empty string). + + Support: Core + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Gateway + description: |- + Kind is kind of the referent. + + There are two kinds of parent resources with "Core" support: + + * Gateway (Gateway conformance profile) + * Service (Mesh conformance profile, ClusterIP Services only) + + Support for other resources is Implementation-Specific. + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: |- + Name is the name of the referent. + + Support: Core + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referent. When unspecified, this refers + to the local namespace of the Route. + + Note that there are specific rules for ParentRefs which cross namespace + boundaries. Cross-namespace references are only valid if they are explicitly + allowed by something in the namespace they are referring to. For example: + Gateway has the AllowedRoutes field, and ReferenceGrant provides a + generic way to enable any other kind of cross-namespace reference. + + + ParentRefs from a Route to a Service in the same namespace are "producer" + routes, which apply default routing rules to inbound connections from + any namespace to the Service. + + ParentRefs from a Route to a Service in a different namespace are + "consumer" routes, and these routing rules are only applied to outbound + connections originating from the same namespace as the Route, for which + the intended destination of the connections are a Service targeted as a + ParentRef of the Route. + + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port is the network port this Route targets. It can be interpreted + differently based on the type of parent resource. + + When the parent resource is a Gateway, this targets all listeners + listening on the specified port that also support this kind of Route(and + select this Route). It's not recommended to set `Port` unless the + networking behaviors specified in a Route must apply to a specific port + as opposed to a listener(s) whose port(s) may be changed. When both Port + and SectionName are specified, the name and port of the selected listener + must match both specified values. + + + When the parent resource is a Service, this targets a specific port in the + Service spec. When both Port (experimental) and SectionName are specified, + the name and port of the selected port must match both specified values. + + + Implementations MAY choose to support other parent resources. + Implementations supporting other types of parent resources MUST clearly + document how/if Port is interpreted. + + For the purpose of status, an attachment is considered successful as + long as the parent resource accepts it partially. For example, Gateway + listeners can restrict which Routes can attach to them by Route kind, + namespace, or hostname. If 1 of 2 Gateway listeners accept attachment + from the referencing Route, the Route MUST be considered successfully + attached. If no Gateway listeners accept attachment from this Route, + the Route MUST be considered detached from the Gateway. + + Support: Extended + format: int32 + maximum: 65535 + minimum: 1 + type: integer + sectionName: + description: |- + SectionName is the name of a section within the target resource. In the + following resources, SectionName is interpreted as the following: + + * Gateway: Listener name. When both Port (experimental) and SectionName + are specified, the name and port of the selected listener must match + both specified values. + * Service: Port name. When both Port (experimental) and SectionName + are specified, the name and port of the selected listener must match + both specified values. + + Implementations MAY choose to support attaching Routes to other resources. + If that is the case, they MUST clearly document how SectionName is + interpreted. + + When unspecified (empty string), this will reference the entire resource. + For the purpose of status, an attachment is considered successful if at + least one section in the parent resource accepts it. For example, Gateway + listeners can restrict which Routes can attach to them by Route kind, + namespace, or hostname. If 1 of 2 Gateway listeners accept attachment from + the referencing Route, the Route MUST be considered successfully + attached. If no Gateway listeners accept attachment from this Route, the + Route MUST be considered detached from the Gateway. + + Support: Core + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + required: + - name + type: object + maxItems: 16 + type: array + x-kubernetes-validations: + - message: only Gateway is supported + rule: self.all(match, match.kind == 'Gateway') + rules: + description: |- + Rules is the list of AIGatewayRouteRule that this AIGatewayRoute will match the traffic to. + Each rule is a subset of the HTTPRoute in the Gateway API (https://gateway-api.sigs.k8s.io/api-types/httproute/). + + AI Gateway controller will generate a HTTPRoute based on the configuration given here with the additional + modifications to achieve the necessary jobs, notably inserting the AI Gateway filter responsible for + the transformation of the request and response, etc. + + In the matching conditions in the AIGatewayRouteRule, `x-ai-eg-model` header is available + if we want to describe the routing behavior based on the model name. The model name is extracted + from the request content before the routing decision. + + How multiple rules are matched is the same as the Gateway API. See for the details: + https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.HTTPRoute + items: + description: AIGatewayRouteRule is a rule that defines the routing + behavior of the AIGatewayRoute. + properties: + backendRefs: + description: |- + BackendRefs is the list of backends that this rule will route the traffic to. + Each backend can have a weight that determines the traffic distribution. + + The namespace of each backend defaults to the same namespace as the AIGatewayRoute when not specified. + Cross-namespace references are supported by specifying the namespace field. + When a namespace different than the AIGatewayRoute's namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. + + BackendRefs can reference either AIServiceBackend resources (default) or InferencePool resources + from the Gateway API Inference Extension. When referencing InferencePool resources: + - Only one InferencePool backend is allowed per rule + - Cannot mix InferencePool with AIServiceBackend references in the same rule + - Fallback behavior is handled by the InferencePool's endpoint picker + + For AIServiceBackend references, you can achieve fallback behavior by configuring multiple backends + combined with the BackendTrafficPolicy of Envoy Gateway. + Please refer to https://gateway.envoyproxy.io/docs/tasks/traffic/failover/ as well as + https://gateway.envoyproxy.io/docs/tasks/traffic/retry/. + items: + description: |- + AIGatewayRouteRuleBackendRef is a reference to a backend with a weight. + It can reference either an AIServiceBackend or an InferencePool resource. + properties: + bodyMutation: + description: |- + BodyMutation defines the request body mutation to be applied to this backend. + This allows modification of JSON fields in the request body before sending to the backend. + When both route-level and backend-level BodyMutation are defined, + route-level takes precedence over backend-level for conflicting operations. + This field is ignored when referencing InferencePool resources. + properties: + remove: + description: |- + Remove the given JSON field(s) from the HTTP request body before sending to the backend. + The value of Remove is a list of top-level field names to remove. + + Input: + { + "model": "gpt-4", + "service_tier": "default", + "internal_flag": true + } + + Config: + remove: ["service_tier", "internal_flag"] + + Output: + { + "model": "gpt-4" + } + items: + type: string + maxItems: 16 + type: array + x-kubernetes-list-type: set + set: + description: |- + Set overwrites/adds the request body with the given JSON field (name, value) + before sending to the backend. Only top-level fields are currently supported. + + Input: + { + "model": "gpt-4", + "service_tier": "default" + } + + Config: + set: + - path: "service_tier" + value: "scale" + + Output: + { + "model": "gpt-4", + "service_tier": "scale" + } + items: + description: HTTPBodyField represents a JSON field + name and value for body mutation + properties: + path: + description: |- + Path is the top-level field name to set in the request body. + Examples: "service_tier", "max_tokens", "temperature" + minLength: 1 + type: string + value: + description: |- + Value is the JSON value to set at the specified field. This can be any valid JSON value: + string, number, boolean, object, array, or null. + The value will be parsed as JSON and inserted at the specified field. + + Examples: + - "\"scale\"" (string) + - "42" (number) + - "true" (boolean) + - "{\"key\": \"value\"}" (object) + - "[1, 2, 3]" (array) + - "null" (null) + type: string + required: + - path + - value + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - path + x-kubernetes-list-type: map + type: object + group: + description: |- + Group is the group of the backend resource. + When not specified, defaults to aigateway.envoyproxy.io (AIServiceBackend). + Currently, only "inference.networking.k8s.io" is supported for InferencePool resources. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + headerMutation: + description: |- + HeaderMutation defines the request header mutation to be applied to this backend. + When both route-level and backend-level HeaderMutation are defined, + route-level takes precedence over backend-level for conflicting operations. + This field is ignored when referencing InferencePool resources. + properties: + remove: + description: |- + Remove the given header(s) from the HTTP request before the action. The + value of Remove is a list of HTTP header names. Note that the header + names are case-insensitive (see + https://datatracker.ietf.org/doc/html/rfc2616#section-4.2). + + Input: + GET /foo HTTP/1.1 + my-header1: foo + my-header2: bar + my-header3: baz + + Config: + remove: ["my-header1", "my-header3"] + + Output: + GET /foo HTTP/1.1 + my-header2: bar + items: + type: string + maxItems: 16 + type: array + x-kubernetes-list-type: set + set: + description: |- + Set overwrites/adds the request with the given header (name, value) + before the action. + + Input: + GET /foo HTTP/1.1 + my-header: foo + + Config: + set: + - name: "my-header" + value: "bar" + + Output: + GET /foo HTTP/1.1 + my-header: bar + items: + description: HTTPHeader represents an HTTP Header + name and value as defined by RFC 7230. + properties: + name: + description: |- + Name is the name of the HTTP Header to be matched. Name matching MUST be + case-insensitive. (See https://tools.ietf.org/html/rfc7230#section-3.2). + + If multiple entries specify equivalent header names, the first entry with + an equivalent name MUST be considered for a match. Subsequent entries + with an equivalent header name MUST be ignored. Due to the + case-insensitivity of header names, "foo" and "Foo" are considered + equivalent. + maxLength: 256 + minLength: 1 + pattern: ^[A-Za-z0-9!#$%&'*+\-.^_\x60|~]+$ + type: string + value: + description: Value is the value of HTTP Header + to be matched. + maxLength: 4096 + minLength: 1 + type: string + required: + - name + - value + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + kind: + description: |- + Kind is the kind of the backend resource. + When not specified, defaults to AIServiceBackend. + Currently, only "InferencePool" is supported when Group is specified. + maxLength: 63 + pattern: ^$|^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + modelNameOverride: + description: |- + Name of the model in the backend. If provided this will override the name provided in the request. + This field is ignored when referencing InferencePool resources. + type: string + name: + description: |- + Name is the name of the backend resource. + When Group and Kind are not specified, this refers to an AIServiceBackend. + When Group and Kind are specified, this refers to the resource of the specified type. + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend resource. + When unspecified (or empty string), this refers to the local namespace of the AIGatewayRoute. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + priority: + default: 0 + description: |- + Priority is the priority of the backend. This sets the priority on the underlying endpoints. + See: https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/priority + Note: This will override the `faillback` property of the underlying Envoy Gateway Backend + This field is ignored when referencing InferencePool resources. + + Default is 0. + format: int32 + minimum: 0 + type: integer + weight: + default: 1 + description: |- + Weight is the weight of the backend. This is exactly the same as the weight in + the BackendRef in the Gateway API. See for the details: + https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.BackendRef + + Default is 1. + format: int32 + minimum: 0 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: group and kind must be specified together + rule: '!has(self.group) && !has(self.kind) || (has(self.group) + && has(self.kind))' + - message: only InferencePool from inference.networking.k8s.io + group is supported + rule: '!has(self.group) || (self.group == ''inference.networking.k8s.io'' + && self.kind == ''InferencePool'')' + maxItems: 128 + type: array + matches: + description: |- + Matches is the list of AIGatewayRouteMatch that this rule will match the traffic to. + This is a subset of the HTTPRouteMatch in the Gateway API. See for the details: + https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.HTTPRouteMatch + items: + properties: + headers: + description: |- + Headers specifies HTTP request header matchers. See HeaderMatch in the Gateway API for the details: + https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.HTTPHeaderMatch + items: + description: |- + HTTPHeaderMatch describes how to select a HTTP route by matching HTTP request + headers. + properties: + name: + description: |- + Name is the name of the HTTP Header to be matched. Name matching MUST be + case-insensitive. (See https://tools.ietf.org/html/rfc7230#section-3.2). + + If multiple entries specify equivalent header names, only the first + entry with an equivalent name MUST be considered for a match. Subsequent + entries with an equivalent header name MUST be ignored. Due to the + case-insensitivity of header names, "foo" and "Foo" are considered + equivalent. + + When a header is repeated in an HTTP request, it is + implementation-specific behavior as to how this is represented. + Generally, proxies should follow the guidance from the RFC: + https://www.rfc-editor.org/rfc/rfc7230.html#section-3.2.2 regarding + processing a repeated header, with special handling for "Set-Cookie". + maxLength: 256 + minLength: 1 + pattern: ^[A-Za-z0-9!#$%&'*+\-.^_\x60|~]+$ + type: string + type: + default: Exact + description: |- + Type specifies how to match against the value of the header. + + Support: Core (Exact) + + Support: Implementation-specific (RegularExpression) + + Since RegularExpression HeaderMatchType has implementation-specific + conformance, implementations can support POSIX, PCRE or any other dialects + of regular expressions. Please read the implementation's documentation to + determine the supported dialect. + enum: + - Exact + - RegularExpression + type: string + value: + description: Value is the value of HTTP Header to + be matched. + maxLength: 4096 + minLength: 1 + type: string + required: + - name + - value + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + maxItems: 128 + type: array + modelsCreatedAt: + description: |- + ModelsCreatedAt represents the creation timestamp of the running models serving by the backends, + which will be exported as the field of "Created" in openai-compatible API "/models". + It follows the format of RFC 3339, for example "2024-05-21T10:00:00Z". + + This is used only when this rule contains "x-ai-eg-model" in its header matching + where the header value will be recognized as a "model" in "/models" endpoint. + All the matched models will share the same creation time. + + Default to the creation timestamp of the AIGatewayRoute if not set. + format: date-time + type: string + modelsOwnedBy: + default: Envoy AI Gateway + description: |- + ModelsOwnedBy represents the owner of the running models serving by the backends, + which will be exported as the field of "OwnedBy" in openai-compatible API "/models". + + This is used only when this rule contains "x-ai-eg-model" in its header matching + where the header value will be recognized as a "model" in "/models" endpoint. + All the matched models will share the same owner. + + Default to "Envoy AI Gateway" if not set. + type: string + timeouts: + description: |- + Timeouts defines the timeouts that can be configured for an HTTP request. + + If this field is not set, or the timeout.requestTimeout is nil, Envoy AI Gateway defaults to + set 60s for the request timeout as opposed to 15s of the Envoy Gateway's default value. + + For streaming responses (like chat completions with stream=true), consider setting + longer timeouts as the response may take time until the completion. + properties: + backendRequest: + description: |- + BackendRequest specifies a timeout for an individual request from the gateway + to a backend. This covers the time from when the request first starts being + sent from the gateway to when the full response has been received from the backend. + + Setting a timeout to the zero duration (e.g. "0s") SHOULD disable the timeout + completely. Implementations that cannot completely disable the timeout MUST + instead interpret the zero duration as the longest possible value to which + the timeout can be set. + + An entire client HTTP transaction with a gateway, covered by the Request timeout, + may result in more than one call from the gateway to the destination backend, + for example, if automatic retries are supported. + + The value of BackendRequest must be a Gateway API Duration string as defined by + GEP-2257. When this field is unspecified, its behavior is implementation-specific; + when specified, the value of BackendRequest must be no more than the value of the + Request timeout (since the Request timeout encompasses the BackendRequest timeout). + + Support: Extended + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + request: + description: |- + Request specifies the maximum duration for a gateway to respond to an HTTP request. + If the gateway has not been able to respond before this deadline is met, the gateway + MUST return a timeout error. + + For example, setting the `rules.timeouts.request` field to the value `10s` in an + `HTTPRoute` will cause a timeout if a client request is taking longer than 10 seconds + to complete. + + Setting a timeout to the zero duration (e.g. "0s") SHOULD disable the timeout + completely. Implementations that cannot completely disable the timeout MUST + instead interpret the zero duration as the longest possible value to which + the timeout can be set. + + This timeout is intended to cover as close to the whole request-response transaction + as possible although an implementation MAY choose to start the timeout after the entire + request stream has been received instead of immediately after the transaction is + initiated by the client. + + The value of Request is a Gateway API Duration string as defined by GEP-2257. When this + field is unspecified, request timeout behavior is implementation-specific. + + Support: Extended + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + x-kubernetes-validations: + - message: backendRequest timeout cannot be longer than request + timeout + rule: '!(has(self.request) && has(self.backendRequest) && + duration(self.request) != duration(''0s'') && duration(self.backendRequest) + > duration(self.request))' + type: object + x-kubernetes-validations: + - message: cannot mix InferencePool and AIServiceBackend references + in the same rule + rule: '!has(self.backendRefs) || size(self.backendRefs) == 0 || + (self.backendRefs.all(ref, !has(ref.group) && !has(ref.kind)) + || self.backendRefs.all(ref, has(ref.group) && has(ref.kind)))' + - message: only one InferencePool backend is allowed per rule + rule: '!has(self.backendRefs) || size(self.backendRefs) == 0 || + !self.backendRefs.exists(ref, has(ref.group) && has(ref.kind)) + || size(self.backendRefs) == 1' + maxItems: 128 + type: array + required: + - rules + type: object + status: + description: Status defines the status details of the AIGatewayRoute. + properties: + conditions: + description: |- + Conditions is the list of conditions by the reconciliation result. + Currently, at most one condition is set. + + Known .status.conditions.type are: "Accepted", "NotAccepted". + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_aiservicebackends.yaml b/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_aiservicebackends.yaml new file mode 100644 index 00000000..590054da --- /dev/null +++ b/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_aiservicebackends.yaml @@ -0,0 +1,832 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.0 + name: aiservicebackends.aigateway.envoyproxy.io +spec: + group: aigateway.envoyproxy.io + names: + kind: AIServiceBackend + listKind: AIServiceBackendList + plural: aiservicebackends + singular: aiservicebackend + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.conditions[-1:].type + name: Status + type: string + deprecated: true + deprecationWarning: aigateway.envoyproxy.io/v1alpha1 is deprecated; use aigateway.envoyproxy.io/v1beta1 + instead + name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + AIServiceBackend is a resource that represents a single backend for AIGatewayRoute. + A backend is a service that handles traffic with a concrete API specification. + + A AIServiceBackend is "attached" to a Backend which is either a k8s Service or a Backend resource of the Envoy Gateway. + + When a backend with an attached AIServiceBackend is used as a routing target in the AIGatewayRoute (more precisely, the + HTTPRouteSpec defined in the AIGatewayRoute), the ai-gateway will generate the necessary configuration to do + the backend specific logic in the final HTTPRoute. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Spec defines the details of AIServiceBackend. + properties: + backendRef: + description: |- + BackendRef is the reference to the Backend resource that this AIServiceBackend corresponds to. + + A backend must be a Backend resource of Envoy Gateway. Note that k8s Service will be supported + as a backend in the future. See https://github.com/envoyproxy/ai-gateway/issues/902 for more details. + + This is required to be set. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: BackendRef must be a Backend resource of Envoy Gateway. + See https://github.com/envoyproxy/ai-gateway/issues/902 for more + details. + rule: has(self.kind) && self.kind == 'Backend' && has(self.group) + && self.group == 'gateway.envoyproxy.io' + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') ? has(self.port) + : true' + bodyMutation: + description: |- + BodyMutation defines the mutation of HTTP request body JSON fields that will be applied to the request + before sending it to the backend. + properties: + remove: + description: |- + Remove the given JSON field(s) from the HTTP request body before sending to the backend. + The value of Remove is a list of top-level field names to remove. + + Input: + { + "model": "gpt-4", + "service_tier": "default", + "internal_flag": true + } + + Config: + remove: ["service_tier", "internal_flag"] + + Output: + { + "model": "gpt-4" + } + items: + type: string + maxItems: 16 + type: array + x-kubernetes-list-type: set + set: + description: |- + Set overwrites/adds the request body with the given JSON field (name, value) + before sending to the backend. Only top-level fields are currently supported. + + Input: + { + "model": "gpt-4", + "service_tier": "default" + } + + Config: + set: + - path: "service_tier" + value: "scale" + + Output: + { + "model": "gpt-4", + "service_tier": "scale" + } + items: + description: HTTPBodyField represents a JSON field name and + value for body mutation + properties: + path: + description: |- + Path is the top-level field name to set in the request body. + Examples: "service_tier", "max_tokens", "temperature" + minLength: 1 + type: string + value: + description: |- + Value is the JSON value to set at the specified field. This can be any valid JSON value: + string, number, boolean, object, array, or null. + The value will be parsed as JSON and inserted at the specified field. + + Examples: + - "\"scale\"" (string) + - "42" (number) + - "true" (boolean) + - "{\"key\": \"value\"}" (object) + - "[1, 2, 3]" (array) + - "null" (null) + type: string + required: + - path + - value + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - path + x-kubernetes-list-type: map + type: object + headerMutation: + description: |- + HeaderMutation defines the mutation of HTTP headers that will be applied to the request + before sending it to the backend. + properties: + remove: + description: |- + Remove the given header(s) from the HTTP request before the action. The + value of Remove is a list of HTTP header names. Note that the header + names are case-insensitive (see + https://datatracker.ietf.org/doc/html/rfc2616#section-4.2). + + Input: + GET /foo HTTP/1.1 + my-header1: foo + my-header2: bar + my-header3: baz + + Config: + remove: ["my-header1", "my-header3"] + + Output: + GET /foo HTTP/1.1 + my-header2: bar + items: + type: string + maxItems: 16 + type: array + x-kubernetes-list-type: set + set: + description: |- + Set overwrites/adds the request with the given header (name, value) + before the action. + + Input: + GET /foo HTTP/1.1 + my-header: foo + + Config: + set: + - name: "my-header" + value: "bar" + + Output: + GET /foo HTTP/1.1 + my-header: bar + items: + description: HTTPHeader represents an HTTP Header name and value + as defined by RFC 7230. + properties: + name: + description: |- + Name is the name of the HTTP Header to be matched. Name matching MUST be + case-insensitive. (See https://tools.ietf.org/html/rfc7230#section-3.2). + + If multiple entries specify equivalent header names, the first entry with + an equivalent name MUST be considered for a match. Subsequent entries + with an equivalent header name MUST be ignored. Due to the + case-insensitivity of header names, "foo" and "Foo" are considered + equivalent. + maxLength: 256 + minLength: 1 + pattern: ^[A-Za-z0-9!#$%&'*+\-.^_\x60|~]+$ + type: string + value: + description: Value is the value of HTTP Header to be matched. + maxLength: 4096 + minLength: 1 + type: string + required: + - name + - value + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + schema: + description: |- + APISchema specifies the API schema of the output format of requests from + Envoy that this AIServiceBackend can accept as incoming requests. + Based on this schema, the ai-gateway will perform the necessary transformation for + the pair of AIGatewayRouteSpec.APISchema and AIServiceBackendSpec.APISchema. + + This is required to be set. + properties: + name: + description: Name is the name of the API schema of the AIGatewayRoute + or AIServiceBackend. + enum: + - OpenAI + - Cohere + - AWSBedrock + - AzureOpenAI + - GCPVertexAI + - GCPAnthropic + - Anthropic + - AWSAnthropic + type: string + prefix: + description: |- + Prefix is the prefix for the API. + + When the name is set to "OpenAI", "chat completions" API endpoint will be "${this_field}/chat/completions". + It can be with or without a leading slash ("/"). + + This is especially useful when routing to the backend that has an OpenAI compatible API but has a different + prefix. For example, Gemini OpenAI compatible API (https://ai.google.dev/gemini-api/docs/openai) uses + "/v1beta/openai" prefix. Another example is that Cohere AI (https://docs.cohere.com/v2/docs/compatibility-api) + uses "/compatibility/v1" prefix. On the other hand, DeepSeek (https://api-docs.deepseek.com/) doesn't + use prefix, so you can leave this field unset. + + See https://aigateway.envoyproxy.io/docs/capabilities/llm-integrations/supported-providers for details. + type: string + version: + description: |- + Version is the version of the API schema. + + When the name is set to AzureOpenAI, this version maps to "API Version" in the + Azure OpenAI API documentation (https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning). + + See https://aigateway.envoyproxy.io/docs/capabilities/llm-integrations/supported-providers for details. + type: string + required: + - name + type: object + required: + - backendRef + - schema + type: object + status: + description: Status defines the status details of the AIServiceBackend. + properties: + conditions: + description: |- + Conditions is the list of conditions by the reconciliation result. + Currently, at most one condition is set. + + Known .status.conditions.type are: "Accepted", "NotAccepted". + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: false + subresources: + status: {} + - additionalPrinterColumns: + - jsonPath: .status.conditions[-1:].type + name: Status + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + AIServiceBackend is a resource that represents a single backend for AIGatewayRoute. + A backend is a service that handles traffic with a concrete API specification. + + A AIServiceBackend is "attached" to a Backend which is either a k8s Service or a Backend resource of the Envoy Gateway. + + When a backend with an attached AIServiceBackend is used as a routing target in the AIGatewayRoute (more precisely, the + HTTPRouteSpec defined in the AIGatewayRoute), the ai-gateway will generate the necessary configuration to do + the backend specific logic in the final HTTPRoute. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Spec defines the details of AIServiceBackend. + properties: + backendRef: + description: |- + BackendRef is the reference to the Backend resource that this AIServiceBackend corresponds to. + + A backend must be a Backend resource of Envoy Gateway. Note that k8s Service will be supported + as a backend in the future. See https://github.com/envoyproxy/ai-gateway/issues/902 for more details. + + This is required to be set. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: BackendRef must be a Backend resource of Envoy Gateway. + See https://github.com/envoyproxy/ai-gateway/issues/902 for more + details. + rule: has(self.kind) && self.kind == 'Backend' && has(self.group) + && self.group == 'gateway.envoyproxy.io' + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') ? has(self.port) + : true' + bodyMutation: + description: |- + BodyMutation defines the mutation of HTTP request body JSON fields that will be applied to the request + before sending it to the backend. + properties: + remove: + description: |- + Remove the given JSON field(s) from the HTTP request body before sending to the backend. + The value of Remove is a list of top-level field names to remove. + + Input: + { + "model": "gpt-4", + "service_tier": "default", + "internal_flag": true + } + + Config: + remove: ["service_tier", "internal_flag"] + + Output: + { + "model": "gpt-4" + } + items: + type: string + maxItems: 16 + type: array + x-kubernetes-list-type: set + set: + description: |- + Set overwrites/adds the request body with the given JSON field (name, value) + before sending to the backend. Only top-level fields are currently supported. + + Input: + { + "model": "gpt-4", + "service_tier": "default" + } + + Config: + set: + - path: "service_tier" + value: "scale" + + Output: + { + "model": "gpt-4", + "service_tier": "scale" + } + items: + description: HTTPBodyField represents a JSON field name and + value for body mutation + properties: + path: + description: |- + Path is the top-level field name to set in the request body. + Examples: "service_tier", "max_tokens", "temperature" + minLength: 1 + type: string + value: + description: |- + Value is the JSON value to set at the specified field. This can be any valid JSON value: + string, number, boolean, object, array, or null. + The value will be parsed as JSON and inserted at the specified field. + + Examples: + - "\"scale\"" (string) + - "42" (number) + - "true" (boolean) + - "{\"key\": \"value\"}" (object) + - "[1, 2, 3]" (array) + - "null" (null) + type: string + required: + - path + - value + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - path + x-kubernetes-list-type: map + type: object + headerMutation: + description: |- + HeaderMutation defines the mutation of HTTP headers that will be applied to the request + before sending it to the backend. + properties: + remove: + description: |- + Remove the given header(s) from the HTTP request before the action. The + value of Remove is a list of HTTP header names. Note that the header + names are case-insensitive (see + https://datatracker.ietf.org/doc/html/rfc2616#section-4.2). + + Input: + GET /foo HTTP/1.1 + my-header1: foo + my-header2: bar + my-header3: baz + + Config: + remove: ["my-header1", "my-header3"] + + Output: + GET /foo HTTP/1.1 + my-header2: bar + items: + type: string + maxItems: 16 + type: array + x-kubernetes-list-type: set + set: + description: |- + Set overwrites/adds the request with the given header (name, value) + before the action. + + Input: + GET /foo HTTP/1.1 + my-header: foo + + Config: + set: + - name: "my-header" + value: "bar" + + Output: + GET /foo HTTP/1.1 + my-header: bar + items: + description: HTTPHeader represents an HTTP Header name and value + as defined by RFC 7230. + properties: + name: + description: |- + Name is the name of the HTTP Header to be matched. Name matching MUST be + case-insensitive. (See https://tools.ietf.org/html/rfc7230#section-3.2). + + If multiple entries specify equivalent header names, the first entry with + an equivalent name MUST be considered for a match. Subsequent entries + with an equivalent header name MUST be ignored. Due to the + case-insensitivity of header names, "foo" and "Foo" are considered + equivalent. + maxLength: 256 + minLength: 1 + pattern: ^[A-Za-z0-9!#$%&'*+\-.^_\x60|~]+$ + type: string + value: + description: Value is the value of HTTP Header to be matched. + maxLength: 4096 + minLength: 1 + type: string + required: + - name + - value + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + schema: + description: |- + APISchema specifies the API schema of the output format of requests from + Envoy that this AIServiceBackend can accept as incoming requests. + Based on this schema, the ai-gateway will perform the necessary transformation for + the pair of AIGatewayRouteSpec.APISchema and AIServiceBackendSpec.APISchema. + + This is required to be set. + properties: + name: + description: Name is the name of the API schema of the AIGatewayRoute + or AIServiceBackend. + enum: + - OpenAI + - Cohere + - AWSBedrock + - AzureOpenAI + - GCPVertexAI + - GCPAnthropic + - Anthropic + - AWSAnthropic + type: string + prefix: + description: |- + Prefix is the prefix for the API. + + When the name is set to "OpenAI", "chat completions" API endpoint will be "${this_field}/chat/completions". + It can be with or without a leading slash ("/"). + + This is especially useful when routing to the backend that has an OpenAI compatible API but has a different + prefix. For example, Gemini OpenAI compatible API (https://ai.google.dev/gemini-api/docs/openai) uses + "/v1beta/openai" prefix. Another example is that Cohere AI (https://docs.cohere.com/v2/docs/compatibility-api) + uses "/compatibility/v1" prefix. On the other hand, DeepSeek (https://api-docs.deepseek.com/) doesn't + use prefix, so you can leave this field unset. + + See https://aigateway.envoyproxy.io/docs/capabilities/llm-integrations/supported-providers for details. + type: string + version: + description: |- + Version is the version of the API schema. + + When the name is set to AzureOpenAI, this version maps to "API Version" in the + Azure OpenAI API documentation (https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning). + + See https://aigateway.envoyproxy.io/docs/capabilities/llm-integrations/supported-providers for details. + type: string + required: + - name + type: object + required: + - backendRef + - schema + type: object + status: + description: Status defines the status details of the AIServiceBackend. + properties: + conditions: + description: |- + Conditions is the list of conditions by the reconciliation result. + Currently, at most one condition is set. + + Known .status.conditions.type are: "Accepted", "NotAccepted". + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_backendsecuritypolicies.yaml b/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_backendsecuritypolicies.yaml new file mode 100644 index 00000000..80e171f5 --- /dev/null +++ b/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_backendsecuritypolicies.yaml @@ -0,0 +1,10127 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.0 + labels: + gateway.networking.k8s.io/policy: direct + name: backendsecuritypolicies.aigateway.envoyproxy.io +spec: + group: aigateway.envoyproxy.io + names: + kind: BackendSecurityPolicy + listKind: BackendSecurityPolicyList + plural: backendsecuritypolicies + singular: backendsecuritypolicy + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.conditions[-1:].type + name: Status + type: string + deprecated: true + deprecationWarning: aigateway.envoyproxy.io/v1alpha1 is deprecated; use aigateway.envoyproxy.io/v1beta1 + instead + name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + BackendSecurityPolicy specifies configuration for authentication and authorization rules on the traffic + exiting the gateway to the backend. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + BackendSecurityPolicySpec specifies authentication rules on access the provider from the Gateway. + Only one mechanism to access a backend(s) can be specified. + + Only one type of BackendSecurityPolicy can be defined. + maxProperties: 3 + properties: + anthropicAPIKey: + description: |- + AnthropicAPIKey is a mechanism to access Anthropic backend(s). The API key will be injected into the "x-api-key" header. + https://docs.claude.com/en/api/overview#authentication + properties: + secretRef: + description: |- + SecretRef is the reference to the secret containing the Anthropic API key. + ai-gateway must be given the permission to read this secret. + The key of the secret should be "apiKey". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + required: + - secretRef + type: object + apiKey: + description: APIKey is a mechanism to access a backend(s). The API + key will be injected into the Authorization header. + properties: + secretRef: + description: |- + SecretRef is the reference to the secret containing the API key. + ai-gateway must be given the permission to read this secret. + The key of the secret should be "apiKey". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + required: + - secretRef + type: object + awsCredentials: + description: AWSCredentials is a mechanism to access a backend(s). + AWS specific logic will be applied. + properties: + credentialsFile: + description: |- + CredentialsFile specifies the credentials file to use for the AWS provider. + When specified, this takes precedence over the default credential chain. + properties: + profile: + default: default + description: Profile is the profile to use in the credentials + file. + type: string + secretRef: + description: |- + SecretRef is the reference to the credential file. + + The secret should contain the AWS credentials file keyed on "credentials". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + required: + - secretRef + type: object + oidcExchangeToken: + description: |- + OIDCExchangeToken specifies the oidc configurations used to obtain an oidc token. The oidc token will be + used to obtain temporary credentials to access AWS. + When specified, this takes precedence over the default credential chain. + properties: + aud: + description: Aud defines the audience that this ID Token is + intended for. + type: string + awsRoleArn: + description: |- + AwsRoleArn is the AWS IAM Role with the permission to use specific resources in AWS account + which maps to the temporary AWS security credentials exchanged using the authentication token issued by OIDC provider. + minLength: 1 + type: string + grantType: + description: GrantType is the method application gets access + token. + type: string + oidc: + description: OIDC is used to obtain oidc tokens via an SSO + server which will be used to exchange for provider credentials. + properties: + clientID: + description: |- + The client ID to be used in the OIDC + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + + Only one of clientID or clientIDRef must be set. + minLength: 1 + type: string + clientIDRef: + description: |- + The Kubernetes secret which contains the client ID to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + Exactly one of clientID or clientIDRef must be set. + This is an Opaque secret. The client ID should be stored in the key "client-id". + + Only one of clientID or clientIDRef must be set. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + clientSecret: + description: |- + The Kubernetes secret which contains the OIDC client secret to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + + This is an Opaque secret. The client secret should be stored in the key + "client-secret". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + cookieConfig: + description: |- + CookieConfigs allows setting the SameSite attribute for OIDC cookies. + By default, its unset. + properties: + sameSite: + enum: + - Lax + - Strict + - None + type: string + type: object + cookieDomain: + description: |- + The optional domain to set the access and ID token cookies on. + If not set, the cookies will default to the host of the request, not including the subdomains. + If set, the cookies will be set on the specified domain and all subdomains. + This means that requests to any subdomain will not require reauthentication after users log in to the parent domain. + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9]))*$ + type: string + cookieNames: + description: |- + The optional cookie name overrides to be used for Bearer and IdToken cookies in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, uses a randomly generated suffix + properties: + accessToken: + description: |- + The name of the cookie used to store the AccessToken in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, defaults to "AccessToken-(randomly generated uid)" + type: string + idToken: + description: |- + The name of the cookie used to store the IdToken in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, defaults to "IdToken-(randomly generated uid)" + type: string + type: object + csrfTokenTTL: + description: |- + CSRFTokenTTL defines how long the CSRF token generated during the OAuth2 authorization flow remains valid. + + This duration determines the lifetime of the CSRF cookie, which is validated against the CSRF token + in the "state" parameter when the provider redirects back to the callback endpoint. + + If omitted, Envoy Gateway defaults the token expiration to 10 minutes. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + defaultRefreshTokenTTL: + description: |- + DefaultRefreshTokenTTL is the default lifetime of the refresh token. + This field is only used when the exp (expiration time) claim is omitted in + the refresh token or the refresh token is not JWT. + + If not specified, defaults to 604800s (one week). + Note: this field is only applicable when the "refreshToken" field is set to true. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + defaultTokenTTL: + description: |- + DefaultTokenTTL is the default lifetime of the id token and access token. + Please note that Envoy will always use the expiry time from the response + of the authorization server if it is provided. This field is only used when + the expiry time is not provided by the authorization. + + If not specified, defaults to 0. In this case, the "expires_in" field in + the authorization response must be set by the authorization server, or the + OAuth flow will fail. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + denyRedirect: + description: |- + Any request that matches any of the provided matchers (with either tokens that are expired or missing tokens) will not be redirected to the OIDC Provider. + This behavior can be useful for AJAX or machine requests. + properties: + headers: + description: Defines the headers to match against + the request to deny redirect to the OIDC Provider. + items: + description: OIDCDenyRedirectHeader defines how + a header is matched + properties: + name: + description: Specifies the name of the header + in the request. + minLength: 1 + type: string + type: + default: Exact + description: Type specifies how to match against + a string. + enum: + - Exact + - Prefix + - Suffix + - RegularExpression + type: string + value: + description: Value specifies the string value + that the match must have. + maxLength: 1024 + minLength: 1 + type: string + required: + - name + - value + type: object + maxItems: 16 + minItems: 1 + type: array + required: + - headers + type: object + disableTokenEncryption: + description: |- + Disable token encryption. When set to true, both the access token and the ID token will be stored in plain text. + This option should only be used in secure environments where token encryption is not required. + Default is false (tokens are encrypted). + type: boolean + forwardAccessToken: + description: |- + ForwardAccessToken indicates whether the Envoy should forward the access token + via the Authorization header Bearer scheme to the upstream. + If not specified, defaults to false. + type: boolean + logoutPath: + description: |- + The path to log a user out, clearing their credential cookies. + + If not specified, uses a default logout path "/logout" + type: string + passThroughAuthHeader: + description: |- + Skips OIDC authentication when the request contains a header that will be extracted by the JWT filter. Unless + explicitly stated otherwise in the extractFrom field, this will be the "Authorization: Bearer ..." header. + + The passThroughAuthHeader option is typically used for non-browser clients that may not be able to handle OIDC + redirects and wish to directly supply a token instead. + + If not specified, defaults to false. + type: boolean + provider: + description: The OIDC Provider configuration. + properties: + authorizationEndpoint: + description: |- + The OIDC Provider's [authorization endpoint](https://openid.net/specs/openid-connect-core-1_0.html#AuthorizationEndpoint). + If not provided, EG will try to discover it from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse). + type: string + backendRef: + description: |- + BackendRef references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + + Deprecated: Use BackendRefs instead. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') + ? has(self.port) : true' + backendRefs: + description: |- + BackendRefs references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + items: + description: BackendRef defines how an ObjectReference + that is specific to BackendRef. + properties: + fallback: + description: |- + Fallback indicates whether the backend is designated as a fallback. + Multiple fallback backends can be configured. + It is highly recommended to configure active or passive health checks to ensure that failover can be detected + when the active backends become unhealthy and to automatically readjust once the primary backends are healthy again. + The overprovisioning factor is set to 1.4, meaning the fallback backends will only start receiving traffic when + the health of the active backends falls below 72%. + type: boolean + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + weight: + default: 1 + description: |- + Weight specifies the proportion of requests forwarded to the referenced + backend. This is computed as weight/(sum of all weights in this + BackendRefs list). For non-zero values, there may be some epsilon from + the exact proportion defined here depending on the precision an + implementation supports. Weight is not a percentage and the sum of + weights does not need to equal 100. + + If only one backend is specified and it has a weight greater than 0, 100% + of the traffic is forwarded to that backend. If weight is set to 0, no + traffic should be forwarded for this entry. If unspecified, weight + defaults to 1. + + Support for this field varies based on the context where used. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == + ''Service'') ? has(self.port) : true' + maxItems: 16 + type: array + backendSettings: + description: |- + BackendSettings holds configuration for managing the connection + to the backend. + properties: + circuitBreaker: + description: |- + Circuit Breaker settings for the upstream connections and requests. + If not set, circuit breakers will be enabled with the default thresholds + properties: + maxConnections: + default: 1024 + description: The maximum number of connections + that Envoy will establish to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRequests: + default: 1024 + description: The maximum number of parallel + requests that Envoy will make to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRetries: + default: 1024 + description: The maximum number of parallel + retries that Envoy will make to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxPendingRequests: + default: 1024 + description: The maximum number of pending + requests that Envoy will queue to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxRequestsPerConnection: + description: |- + The maximum number of requests that Envoy will make over a single connection to the referenced backend defined within a xRoute rule. + Default: unlimited. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + perEndpoint: + description: PerEndpoint defines Circuit Breakers + that will apply per-endpoint for an upstream + cluster + properties: + maxConnections: + default: 1024 + description: MaxConnections configures + the maximum number of connections that + Envoy will establish per-endpoint to + the referenced backend defined within + a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + type: object + type: object + connection: + description: Connection includes backend connection + settings. + properties: + bufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + BufferLimit Soft limit on size of the cluster’s connections read and write buffers. + BufferLimit applies to connection streaming (maybe non-streaming) channel between processes, it's in user space. + If unspecified, an implementation defined default is applied (32768 bytes). + For example, 20Mi, 1Gi, 256Ki etc. + Note: that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + preconnect: + description: |- + Preconnect configures proactive upstream connections to reduce latency by establishing + connections before they’re needed and avoiding connection establishment overhead. + + If unset, Envoy will fetch connections as needed to serve in-flight requests. + properties: + perEndpointPercent: + description: |- + PerEndpointPercent configures how many additional connections to maintain per + upstream endpoint, useful for high-QPS or latency sensitive services. Expressed as a + percentage of the connections required by active streams + (e.g. 100 = preconnect disabled, 105 = 1.05x connections per-endpoint, 200 = 2.00×). + + Allowed value range is between 100-300. When both PerEndpointPercent and + PredictivePercent are set, Envoy ensures both are satisfied (max of the two). + format: int32 + maximum: 300 + minimum: 100 + type: integer + predictivePercent: + description: |- + PredictivePercent configures how many additional connections to maintain + across the cluster by anticipating which upstream endpoint the load balancer + will select next, useful for low-QPS services. Relies on deterministic + loadbalancing and is only supported with Random or RoundRobin. + Expressed as a percentage of the connections required by active streams + (e.g. 100 = 1.0 (no preconnect), 105 = 1.05× connections across the cluster, 200 = 2.00×). + + Minimum allowed value is 100. When both PerEndpointPercent and PredictivePercent are + set Envoy ensures both are satisfied per host (max of the two). + format: int32 + minimum: 100 + type: integer + type: object + socketBufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + SocketBufferLimit provides configuration for the maximum buffer size in bytes for each socket + to backend. + SocketBufferLimit applies to socket streaming channel between TCP/IP stacks, it's in kernel space. + For example, 20Mi, 1Gi, 256Ki etc. + Note that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + type: object + dns: + description: DNS includes dns resolution settings. + properties: + dnsRefreshRate: + description: |- + DNSRefreshRate specifies the rate at which DNS records should be refreshed. + Defaults to 30 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + lookupFamily: + description: |- + LookupFamily determines how Envoy would resolve DNS for Routes where the backend is specified as a fully qualified domain name (FQDN). + If set, this configuration overrides other defaults. + enum: + - IPv4 + - IPv6 + - IPv4Preferred + - IPv6Preferred + - IPv4AndIPv6 + type: string + respectDnsTtl: + description: |- + RespectDNSTTL indicates whether the DNS Time-To-Live (TTL) should be respected. + If the value is set to true, the DNS refresh rate will be set to the resource record’s TTL. + Defaults to true. + type: boolean + type: object + healthCheck: + description: HealthCheck allows gateway to perform + active health checking on backends. + properties: + active: + description: Active health check configuration + properties: + grpc: + description: |- + GRPC defines the configuration of the GRPC health checker. + It's optional, and can only be used if the specified type is GRPC. + properties: + service: + description: |- + Service to send in the health check request. + If this is not specified, then the health check request applies to the entire + server and not to a specific service. + type: string + type: object + healthyThreshold: + default: 1 + description: HealthyThreshold defines + the number of healthy health checks + required before a backend host is marked + healthy. + format: int32 + minimum: 1 + type: integer + http: + description: |- + HTTP defines the configuration of http health checker. + It's required while the health checker type is HTTP. + properties: + expectedResponse: + description: ExpectedResponse defines + a list of HTTP expected responses + to match. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + expectedStatuses: + description: |- + ExpectedStatuses defines a list of HTTP response statuses considered healthy. + Defaults to 200 only + items: + description: HTTPStatus defines + the http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + hostname: + description: |- + Hostname defines the HTTP host that will be requested during health checking. + Default: HTTPRoute or GRPCRoute hostname. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + method: + description: |- + Method defines the HTTP method used for health checking. + Defaults to GET + type: string + path: + description: Path defines the HTTP + path that will be requested during + health checking. + maxLength: 1024 + minLength: 1 + type: string + required: + - path + type: object + initialJitter: + description: |- + InitialJitter defines the maximum time Envoy will wait before the first health check. + Envoy will randomly select a value between 0 and the initial jitter value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + default: 3s + description: Interval defines the time + between active health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + tcp: + description: |- + TCP defines the configuration of tcp health checker. + It's required while the health checker type is TCP. + properties: + receive: + description: Receive defines the expected + response payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + send: + description: Send defines the request + payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + type: object + timeout: + default: 1s + description: Timeout defines the time + to wait for a health check response. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: + allOf: + - enum: + - HTTP + - TCP + - GRPC + - enum: + - HTTP + - TCP + - GRPC + description: Type defines the type of + health checker. + type: string + unhealthyThreshold: + default: 3 + description: UnhealthyThreshold defines + the number of unhealthy health checks + required before a backend host is marked + unhealthy. + format: int32 + minimum: 1 + type: integer + required: + - type + type: object + x-kubernetes-validations: + - message: If Health Checker type is HTTP, + http field needs to be set. + rule: 'self.type == ''HTTP'' ? has(self.http) + : !has(self.http)' + - message: If Health Checker type is TCP, + tcp field needs to be set. + rule: 'self.type == ''TCP'' ? has(self.tcp) + : !has(self.tcp)' + - message: The grpc field can only be set + if the Health Checker type is GRPC. + rule: 'has(self.grpc) ? self.type == ''GRPC'' + : true' + panicThreshold: + description: |- + When number of unhealthy endpoints for a backend reaches this threshold + Envoy will disregard health status and balance across all endpoints. + It's designed to prevent a situation in which host failures cascade throughout the cluster + as load increases. If not set, the default value is 50%. To disable panic mode, set value to `0`. + format: int32 + maximum: 100 + minimum: 0 + type: integer + passive: + description: Passive passive check configuration + properties: + baseEjectionTime: + default: 30s + description: BaseEjectionTime defines + the base duration for which a host will + be ejected on consecutive failures. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + consecutive5XxErrors: + default: 5 + description: Consecutive5xxErrors sets + the number of consecutive 5xx errors + triggering ejection. + format: int32 + type: integer + consecutiveGatewayErrors: + description: ConsecutiveGatewayErrors + sets the number of consecutive gateway + errors triggering ejection. + format: int32 + type: integer + consecutiveLocalOriginFailures: + default: 5 + description: |- + ConsecutiveLocalOriginFailures sets the number of consecutive local origin failures triggering ejection. + Parameter takes effect only when split_external_local_origin_errors is set to true. + format: int32 + type: integer + failurePercentageThreshold: + description: |- + FailurePercentageThreshold sets the failure percentage threshold for outlier detection. + If the failure percentage of a given host is greater than or equal to this value, it will be ejected. + Defaults to 85. + format: int32 + maximum: 100 + minimum: 0 + type: integer + interval: + default: 3s + description: Interval defines the time + between passive health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxEjectionPercent: + default: 10 + description: MaxEjectionPercent sets the + maximum percentage of hosts in a cluster + that can be ejected. + format: int32 + type: integer + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors + enables splitting of errors between + external and local origin. + type: boolean + type: object + type: object + http2: + description: HTTP2 provides HTTP/2 configuration + for backend connections. + properties: + initialConnectionWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialConnectionWindowSize sets the initial window size for HTTP/2 connections. + If not set, the default value is 1 MiB. + x-kubernetes-int-or-string: true + initialStreamWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialStreamWindowSize sets the initial window size for HTTP/2 streams. + If not set, the default value is 64 KiB(64*1024). + x-kubernetes-int-or-string: true + maxConcurrentStreams: + description: |- + MaxConcurrentStreams sets the maximum number of concurrent streams allowed per connection. + If not set, the default value is 100. + format: int32 + maximum: 2147483647 + minimum: 1 + type: integer + onInvalidMessage: + description: |- + OnInvalidMessage determines if Envoy will terminate the connection or just the offending stream in the event of HTTP messaging error + It's recommended for L2 Envoy deployments to set this value to TerminateStream. + https://www.envoyproxy.io/docs/envoy/latest/configuration/best_practices/level_two + Default: TerminateConnection + type: string + type: object + loadBalancer: + description: |- + LoadBalancer policy to apply when routing traffic from the gateway to + the backend endpoints. Defaults to `LeastRequest`. + properties: + consistentHash: + description: |- + ConsistentHash defines the configuration when the load balancer type is + set to ConsistentHash + properties: + cookie: + description: Cookie configures the cookie + hash policy when the consistent hash + type is set to Cookie. + properties: + attributes: + additionalProperties: + type: string + description: Additional Attributes + to set for the generated cookie. + type: object + name: + description: |- + Name of the cookie to hash. + If this cookie does not exist in the request, Envoy will generate a cookie and set + the TTL on the response back to the client based on Layer 4 + attributes of the backend endpoint, to ensure that these future requests + go to the same backend endpoint. Make sure to set the TTL field for this case. + type: string + ttl: + description: |- + TTL of the generated cookie if the cookie is not present. This value sets the + Max-Age attribute value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - name + type: object + header: + description: |- + Header configures the header hash policy when the consistent hash type is set to Header. + + Deprecated: use Headers instead + properties: + name: + description: Name of the header to + hash. + type: string + required: + - name + type: object + headers: + description: Headers configures the header + hash policy for each header, when the + consistent hash type is set to Headers. + items: + description: |- + Header defines the header hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the header + to hash. + type: string + required: + - name + type: object + type: array + queryParams: + description: QueryParams configures the + query parameter hash policy when the + consistent hash type is set to QueryParams. + items: + description: |- + QueryParam defines the query parameter name hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the query param + to hash. + type: string + required: + - name + type: object + type: array + tableSize: + default: 65537 + description: The table size for consistent + hashing, must be prime number limited + to 5000011. + format: int64 + maximum: 5000011 + minimum: 2 + type: integer + type: + description: |- + ConsistentHashType defines the type of input to hash on. Valid Type values are + "SourceIP", + "Header", + "Headers", + "Cookie". + "QueryParams". + enum: + - SourceIP + - Header + - Headers + - Cookie + - QueryParams + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If consistent hash type is header, + the header field must be set. + rule: 'self.type == ''Header'' ? has(self.header) + : !has(self.header)' + - message: If consistent hash type is headers, + the headers field must be set. + rule: 'self.type == ''Headers'' ? has(self.headers) + : !has(self.headers)' + - message: If consistent hash type is cookie, + the cookie field must be set. + rule: 'self.type == ''Cookie'' ? has(self.cookie) + : !has(self.cookie)' + - message: If consistent hash type is queryParams, + the queryParams field must be set. + rule: 'self.type == ''QueryParams'' ? has(self.queryParams) + : !has(self.queryParams)' + endpointOverride: + description: |- + EndpointOverride defines the configuration for endpoint override. + When specified, the load balancer will attempt to route requests to endpoints + based on the override information extracted from request headers or metadata. + If the override endpoints are not available, the configured load balancer policy will be used as fallback. + properties: + extractFrom: + description: ExtractFrom defines the sources + to extract endpoint override information + from. + items: + description: EndpointOverrideExtractFrom + defines a source to extract endpoint + override information from. + properties: + header: + description: |- + Header defines the header to get the override endpoint addresses. + The header value must specify at least one endpoint in `IP:Port` format or multiple endpoints in `IP:Port,IP:Port,...` format. + For example `10.0.0.5:8080` or `[2600:4040:5204::1574:24ae]:80`. + The IPv6 address is enclosed in square brackets. + type: string + type: object + maxItems: 10 + minItems: 1 + type: array + required: + - extractFrom + type: object + slowStart: + description: |- + SlowStart defines the configuration related to the slow start load balancer policy. + If set, during slow start window, traffic sent to the newly added hosts will gradually increase. + Currently this is only supported for RoundRobin and LeastRequest load balancers + properties: + window: + description: |- + Window defines the duration of the warm up period for newly added host. + During slow start window, traffic sent to the newly added hosts will gradually increase. + Currently only supports linear growth of traffic. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto#config-cluster-v3-cluster-slowstartconfig + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - window + type: object + type: + description: |- + Type decides the type of Load Balancer policy. + Valid LoadBalancerType values are + "ConsistentHash", + "LeastRequest", + "Random", + "RoundRobin". + enum: + - ConsistentHash + - LeastRequest + - Random + - RoundRobin + type: string + zoneAware: + description: ZoneAware defines the configuration + related to the distribution of requests + between locality zones. + properties: + preferLocal: + description: PreferLocalZone configures + zone-aware routing to prefer sending + traffic to the local locality zone. + properties: + force: + description: |- + ForceLocalZone defines override configuration for forcing all traffic to stay within the local zone instead of the default behavior + which maintains equal distribution among upstream endpoints while sending as much traffic as possible locally. + properties: + minEndpointsInZoneThreshold: + description: |- + MinEndpointsInZoneThreshold is the minimum number of upstream endpoints in the local zone required to honor the forceLocalZone + override. This is useful for protecting zones with fewer endpoints. + format: int32 + type: integer + type: object + minEndpointsThreshold: + description: MinEndpointsThreshold + is the minimum number of total upstream + endpoints across all zones required + to enable zone-aware routing. + format: int64 + type: integer + percentageEnabled: + description: Configures percentage + of requests that will be considered + for zone aware routing if zone aware + routing is configured. If not specified, + Envoy defaults to 100%. + format: int32 + maximum: 100 + minimum: 0 + type: integer + type: object + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: If LoadBalancer type is consistentHash, + consistentHash field needs to be set. + rule: 'self.type == ''ConsistentHash'' ? has(self.consistentHash) + : !has(self.consistentHash)' + - message: Currently SlowStart is only supported + for RoundRobin and LeastRequest load balancers. + rule: 'self.type in [''Random'', ''ConsistentHash''] + ? !has(self.slowStart) : true ' + - message: Currently ZoneAware is only supported + for LeastRequest, Random, and RoundRobin load + balancers. + rule: 'self.type == ''ConsistentHash'' ? !has(self.zoneAware) + : true ' + proxyProtocol: + description: ProxyProtocol enables the Proxy Protocol + when communicating with the backend. + properties: + version: + description: |- + Version of ProxyProtol + Valid ProxyProtocolVersion values are + "V1" + "V2" + enum: + - V1 + - V2 + type: string + required: + - version + type: object + retry: + description: |- + Retry provides more advanced usage, allowing users to customize the number of retries, retry fallback strategy, and retry triggering conditions. + If not set, retry will be disabled. + properties: + numAttemptsPerPriority: + description: |- + NumAttemptsPerPriority defines the number of requests (initial attempt + retries) + that should be sent to the same priority before switching to a different one. + If not specified or set to 0, all requests are sent to the highest priority that is healthy. + format: int32 + type: integer + numRetries: + default: 2 + description: NumRetries is the number of retries + to be attempted. Defaults to 2. + format: int32 + minimum: 0 + type: integer + perRetry: + description: PerRetry is the retry policy + to be applied per retry attempt. + properties: + backOff: + description: |- + Backoff is the backoff policy to be applied per retry attempt. gateway uses a fully jittered exponential + back-off algorithm for retries. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/router_filter#config-http-filters-router-x-envoy-max-retries + properties: + baseInterval: + description: BaseInterval is the base + interval between retries. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxInterval: + description: |- + MaxInterval is the maximum interval between retries. This parameter is optional, but must be greater than or equal to the base_interval if set. + The default is 10 times the base_interval + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + timeout: + description: Timeout is the timeout per + retry attempt. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + retryOn: + description: |- + RetryOn specifies the retry trigger condition. + + If not specified, the default is to retry on connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes(503). + properties: + httpStatusCodes: + description: |- + HttpStatusCodes specifies the http status codes to be retried. + The retriable-status-codes trigger must also be configured for these status codes to trigger a retry. + items: + description: HTTPStatus defines the + http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + triggers: + description: Triggers specifies the retry + trigger condition(Http/Grpc). + items: + description: TriggerEnum specifies the + conditions that trigger retries. + enum: + - 5xx + - gateway-error + - reset + - reset-before-request + - connect-failure + - retriable-4xx + - refused-stream + - retriable-status-codes + - cancelled + - deadline-exceeded + - internal + - resource-exhausted + - unavailable + type: string + type: array + type: object + type: object + tcpKeepalive: + description: |- + TcpKeepalive settings associated with the upstream client connection. + Disabled by default. + properties: + idleTime: + description: |- + The duration a connection needs to be idle before keep-alive + probes start being sent. + The duration format is + Defaults to `7200s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + description: |- + The duration between keep-alive probes. + Defaults to `75s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + probes: + description: |- + The total number of unacknowledged probes to send before deciding + the connection is dead. + Defaults to 9. + format: int32 + type: integer + type: object + timeout: + description: Timeout settings for the backend + connections. + properties: + http: + description: Timeout settings for HTTP. + properties: + connectionIdleTimeout: + description: |- + The idle timeout for an HTTP connection. Idle time is defined as a period in which there are no active requests in the connection. + Default: 1 hour. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxConnectionDuration: + description: |- + The maximum duration of an HTTP connection. + Default: unlimited. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxStreamDuration: + description: |- + MaxStreamDuration is the maximum duration for a stream to complete. This timeout measures the time + from when the request is sent until the response stream is fully consumed and does not apply to + non-streaming requests. + When set to "0s", no max duration is applied and streams can run indefinitely. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + requestTimeout: + description: RequestTimeout is the time + until which entire response is received + from the upstream. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + tcp: + description: Timeout settings for TCP. + properties: + connectTimeout: + description: |- + The timeout for network connection establishment, including TCP and TLS handshakes. + Default: 10 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + type: object + type: object + x-kubernetes-validations: + - message: predictivePercent in preconnect policy + only works with RoundRobin or Random load balancers + rule: '!((has(self.connection) && has(self.connection.preconnect) + && has(self.connection.preconnect.predictivePercent)) + && !(has(self.loadBalancer) && has(self.loadBalancer.type) + && self.loadBalancer.type in [''Random'', ''RoundRobin'']))' + endSessionEndpoint: + description: |- + The OIDC Provider's [end session endpoint](https://openid.net/specs/openid-connect-core-1_0.html#RPLogout). + + If the end session endpoint is provided, EG will use it to log out the user from the OIDC Provider when the user accesses the logout path. + EG will also try to discover the end session endpoint from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse) when authorizationEndpoint or tokenEndpoint is not provided. + type: string + issuer: + description: |- + The OIDC Provider's [issuer identifier](https://openid.net/specs/openid-connect-discovery-1_0.html#IssuerDiscovery). + Issuer MUST be a URI RFC 3986 [RFC3986] with a scheme component that MUST + be https, a host component, and optionally, port and path components and + no query or fragment components. + minLength: 1 + type: string + tokenEndpoint: + description: |- + The OIDC Provider's [token endpoint](https://openid.net/specs/openid-connect-core-1_0.html#TokenEndpoint). + If not provided, EG will try to discover it from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse). + type: string + required: + - issuer + type: object + x-kubernetes-validations: + - message: BackendRefs must be used, backendRef is not + supported. + rule: '!has(self.backendRef)' + - message: Retry timeout is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.perRetry)? + !has(self.backendSettings.retry.perRetry.timeout):true):true):true + - message: HTTPStatusCodes is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.retryOn)? + !has(self.backendSettings.retry.retryOn.httpStatusCodes):true):true):true + redirectURL: + description: |- + The redirect URL to be used in the OIDC + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, uses the default redirect URI "%REQ(x-forwarded-proto)%://%REQ(:authority)%/oauth2/callback" + type: string + refreshToken: + default: true + description: |- + RefreshToken indicates whether the Envoy should automatically refresh the + id token and access token when they expire. + When set to true, the Envoy will use the refresh token to get a new id token + and access token when they expire. + + If not specified, defaults to true. + type: boolean + resources: + description: |- + The OIDC resources to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + items: + type: string + type: array + scopes: + description: |- + The OIDC scopes to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + The "openid" scope is always added to the list of scopes if not already + specified. + items: + type: string + type: array + required: + - clientSecret + - provider + type: object + x-kubernetes-validations: + - message: only one of clientID or clientIDRef must be set + rule: (has(self.clientID) && !has(self.clientIDRef)) || + (!has(self.clientID) && has(self.clientIDRef)) + required: + - awsRoleArn + - oidc + type: object + region: + description: Region specifies the AWS region associated with the + policy. + minLength: 1 + type: string + required: + - region + type: object + azureAPIKey: + description: AzureAPIKey is a mechanism to access Azure OpenAI backend(s). + The API key will be injected into the api-key header. + properties: + secretRef: + description: |- + SecretRef is the reference to the secret containing the Azure API key. + ai-gateway must be given the permission to read this secret. + The key of the secret should be "apiKey". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + required: + - secretRef + type: object + azureCredentials: + description: AzureCredentials is a mechanism to access a backend(s). + Azure OpenAI specific logic will be applied. + properties: + clientID: + description: ClientID is a unique identifier for an application + in Azure. + minLength: 1 + type: string + clientSecretRef: + description: |- + ClientSecretRef is the reference to the secret containing the Azure client secret. + ai-gateway must be given the permission to read this secret. + The key of secret should be "client-secret". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + oidcExchangeToken: + description: |- + OIDCExchangeToken specifies the oidc configurations used to obtain an oidc token. The oidc token will be + used to obtain temporary credentials to access Azure. + properties: + aud: + description: Aud defines the audience that this ID Token is + intended for. + type: string + grantType: + description: GrantType is the method application gets access + token. + type: string + oidc: + description: OIDC is used to obtain oidc tokens via an SSO + server which will be used to exchange for provider credentials. + properties: + clientID: + description: |- + The client ID to be used in the OIDC + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + + Only one of clientID or clientIDRef must be set. + minLength: 1 + type: string + clientIDRef: + description: |- + The Kubernetes secret which contains the client ID to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + Exactly one of clientID or clientIDRef must be set. + This is an Opaque secret. The client ID should be stored in the key "client-id". + + Only one of clientID or clientIDRef must be set. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + clientSecret: + description: |- + The Kubernetes secret which contains the OIDC client secret to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + + This is an Opaque secret. The client secret should be stored in the key + "client-secret". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + cookieConfig: + description: |- + CookieConfigs allows setting the SameSite attribute for OIDC cookies. + By default, its unset. + properties: + sameSite: + enum: + - Lax + - Strict + - None + type: string + type: object + cookieDomain: + description: |- + The optional domain to set the access and ID token cookies on. + If not set, the cookies will default to the host of the request, not including the subdomains. + If set, the cookies will be set on the specified domain and all subdomains. + This means that requests to any subdomain will not require reauthentication after users log in to the parent domain. + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9]))*$ + type: string + cookieNames: + description: |- + The optional cookie name overrides to be used for Bearer and IdToken cookies in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, uses a randomly generated suffix + properties: + accessToken: + description: |- + The name of the cookie used to store the AccessToken in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, defaults to "AccessToken-(randomly generated uid)" + type: string + idToken: + description: |- + The name of the cookie used to store the IdToken in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, defaults to "IdToken-(randomly generated uid)" + type: string + type: object + csrfTokenTTL: + description: |- + CSRFTokenTTL defines how long the CSRF token generated during the OAuth2 authorization flow remains valid. + + This duration determines the lifetime of the CSRF cookie, which is validated against the CSRF token + in the "state" parameter when the provider redirects back to the callback endpoint. + + If omitted, Envoy Gateway defaults the token expiration to 10 minutes. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + defaultRefreshTokenTTL: + description: |- + DefaultRefreshTokenTTL is the default lifetime of the refresh token. + This field is only used when the exp (expiration time) claim is omitted in + the refresh token or the refresh token is not JWT. + + If not specified, defaults to 604800s (one week). + Note: this field is only applicable when the "refreshToken" field is set to true. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + defaultTokenTTL: + description: |- + DefaultTokenTTL is the default lifetime of the id token and access token. + Please note that Envoy will always use the expiry time from the response + of the authorization server if it is provided. This field is only used when + the expiry time is not provided by the authorization. + + If not specified, defaults to 0. In this case, the "expires_in" field in + the authorization response must be set by the authorization server, or the + OAuth flow will fail. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + denyRedirect: + description: |- + Any request that matches any of the provided matchers (with either tokens that are expired or missing tokens) will not be redirected to the OIDC Provider. + This behavior can be useful for AJAX or machine requests. + properties: + headers: + description: Defines the headers to match against + the request to deny redirect to the OIDC Provider. + items: + description: OIDCDenyRedirectHeader defines how + a header is matched + properties: + name: + description: Specifies the name of the header + in the request. + minLength: 1 + type: string + type: + default: Exact + description: Type specifies how to match against + a string. + enum: + - Exact + - Prefix + - Suffix + - RegularExpression + type: string + value: + description: Value specifies the string value + that the match must have. + maxLength: 1024 + minLength: 1 + type: string + required: + - name + - value + type: object + maxItems: 16 + minItems: 1 + type: array + required: + - headers + type: object + disableTokenEncryption: + description: |- + Disable token encryption. When set to true, both the access token and the ID token will be stored in plain text. + This option should only be used in secure environments where token encryption is not required. + Default is false (tokens are encrypted). + type: boolean + forwardAccessToken: + description: |- + ForwardAccessToken indicates whether the Envoy should forward the access token + via the Authorization header Bearer scheme to the upstream. + If not specified, defaults to false. + type: boolean + logoutPath: + description: |- + The path to log a user out, clearing their credential cookies. + + If not specified, uses a default logout path "/logout" + type: string + passThroughAuthHeader: + description: |- + Skips OIDC authentication when the request contains a header that will be extracted by the JWT filter. Unless + explicitly stated otherwise in the extractFrom field, this will be the "Authorization: Bearer ..." header. + + The passThroughAuthHeader option is typically used for non-browser clients that may not be able to handle OIDC + redirects and wish to directly supply a token instead. + + If not specified, defaults to false. + type: boolean + provider: + description: The OIDC Provider configuration. + properties: + authorizationEndpoint: + description: |- + The OIDC Provider's [authorization endpoint](https://openid.net/specs/openid-connect-core-1_0.html#AuthorizationEndpoint). + If not provided, EG will try to discover it from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse). + type: string + backendRef: + description: |- + BackendRef references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + + Deprecated: Use BackendRefs instead. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') + ? has(self.port) : true' + backendRefs: + description: |- + BackendRefs references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + items: + description: BackendRef defines how an ObjectReference + that is specific to BackendRef. + properties: + fallback: + description: |- + Fallback indicates whether the backend is designated as a fallback. + Multiple fallback backends can be configured. + It is highly recommended to configure active or passive health checks to ensure that failover can be detected + when the active backends become unhealthy and to automatically readjust once the primary backends are healthy again. + The overprovisioning factor is set to 1.4, meaning the fallback backends will only start receiving traffic when + the health of the active backends falls below 72%. + type: boolean + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + weight: + default: 1 + description: |- + Weight specifies the proportion of requests forwarded to the referenced + backend. This is computed as weight/(sum of all weights in this + BackendRefs list). For non-zero values, there may be some epsilon from + the exact proportion defined here depending on the precision an + implementation supports. Weight is not a percentage and the sum of + weights does not need to equal 100. + + If only one backend is specified and it has a weight greater than 0, 100% + of the traffic is forwarded to that backend. If weight is set to 0, no + traffic should be forwarded for this entry. If unspecified, weight + defaults to 1. + + Support for this field varies based on the context where used. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == + ''Service'') ? has(self.port) : true' + maxItems: 16 + type: array + backendSettings: + description: |- + BackendSettings holds configuration for managing the connection + to the backend. + properties: + circuitBreaker: + description: |- + Circuit Breaker settings for the upstream connections and requests. + If not set, circuit breakers will be enabled with the default thresholds + properties: + maxConnections: + default: 1024 + description: The maximum number of connections + that Envoy will establish to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRequests: + default: 1024 + description: The maximum number of parallel + requests that Envoy will make to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRetries: + default: 1024 + description: The maximum number of parallel + retries that Envoy will make to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxPendingRequests: + default: 1024 + description: The maximum number of pending + requests that Envoy will queue to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxRequestsPerConnection: + description: |- + The maximum number of requests that Envoy will make over a single connection to the referenced backend defined within a xRoute rule. + Default: unlimited. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + perEndpoint: + description: PerEndpoint defines Circuit Breakers + that will apply per-endpoint for an upstream + cluster + properties: + maxConnections: + default: 1024 + description: MaxConnections configures + the maximum number of connections that + Envoy will establish per-endpoint to + the referenced backend defined within + a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + type: object + type: object + connection: + description: Connection includes backend connection + settings. + properties: + bufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + BufferLimit Soft limit on size of the cluster’s connections read and write buffers. + BufferLimit applies to connection streaming (maybe non-streaming) channel between processes, it's in user space. + If unspecified, an implementation defined default is applied (32768 bytes). + For example, 20Mi, 1Gi, 256Ki etc. + Note: that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + preconnect: + description: |- + Preconnect configures proactive upstream connections to reduce latency by establishing + connections before they’re needed and avoiding connection establishment overhead. + + If unset, Envoy will fetch connections as needed to serve in-flight requests. + properties: + perEndpointPercent: + description: |- + PerEndpointPercent configures how many additional connections to maintain per + upstream endpoint, useful for high-QPS or latency sensitive services. Expressed as a + percentage of the connections required by active streams + (e.g. 100 = preconnect disabled, 105 = 1.05x connections per-endpoint, 200 = 2.00×). + + Allowed value range is between 100-300. When both PerEndpointPercent and + PredictivePercent are set, Envoy ensures both are satisfied (max of the two). + format: int32 + maximum: 300 + minimum: 100 + type: integer + predictivePercent: + description: |- + PredictivePercent configures how many additional connections to maintain + across the cluster by anticipating which upstream endpoint the load balancer + will select next, useful for low-QPS services. Relies on deterministic + loadbalancing and is only supported with Random or RoundRobin. + Expressed as a percentage of the connections required by active streams + (e.g. 100 = 1.0 (no preconnect), 105 = 1.05× connections across the cluster, 200 = 2.00×). + + Minimum allowed value is 100. When both PerEndpointPercent and PredictivePercent are + set Envoy ensures both are satisfied per host (max of the two). + format: int32 + minimum: 100 + type: integer + type: object + socketBufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + SocketBufferLimit provides configuration for the maximum buffer size in bytes for each socket + to backend. + SocketBufferLimit applies to socket streaming channel between TCP/IP stacks, it's in kernel space. + For example, 20Mi, 1Gi, 256Ki etc. + Note that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + type: object + dns: + description: DNS includes dns resolution settings. + properties: + dnsRefreshRate: + description: |- + DNSRefreshRate specifies the rate at which DNS records should be refreshed. + Defaults to 30 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + lookupFamily: + description: |- + LookupFamily determines how Envoy would resolve DNS for Routes where the backend is specified as a fully qualified domain name (FQDN). + If set, this configuration overrides other defaults. + enum: + - IPv4 + - IPv6 + - IPv4Preferred + - IPv6Preferred + - IPv4AndIPv6 + type: string + respectDnsTtl: + description: |- + RespectDNSTTL indicates whether the DNS Time-To-Live (TTL) should be respected. + If the value is set to true, the DNS refresh rate will be set to the resource record’s TTL. + Defaults to true. + type: boolean + type: object + healthCheck: + description: HealthCheck allows gateway to perform + active health checking on backends. + properties: + active: + description: Active health check configuration + properties: + grpc: + description: |- + GRPC defines the configuration of the GRPC health checker. + It's optional, and can only be used if the specified type is GRPC. + properties: + service: + description: |- + Service to send in the health check request. + If this is not specified, then the health check request applies to the entire + server and not to a specific service. + type: string + type: object + healthyThreshold: + default: 1 + description: HealthyThreshold defines + the number of healthy health checks + required before a backend host is marked + healthy. + format: int32 + minimum: 1 + type: integer + http: + description: |- + HTTP defines the configuration of http health checker. + It's required while the health checker type is HTTP. + properties: + expectedResponse: + description: ExpectedResponse defines + a list of HTTP expected responses + to match. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + expectedStatuses: + description: |- + ExpectedStatuses defines a list of HTTP response statuses considered healthy. + Defaults to 200 only + items: + description: HTTPStatus defines + the http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + hostname: + description: |- + Hostname defines the HTTP host that will be requested during health checking. + Default: HTTPRoute or GRPCRoute hostname. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + method: + description: |- + Method defines the HTTP method used for health checking. + Defaults to GET + type: string + path: + description: Path defines the HTTP + path that will be requested during + health checking. + maxLength: 1024 + minLength: 1 + type: string + required: + - path + type: object + initialJitter: + description: |- + InitialJitter defines the maximum time Envoy will wait before the first health check. + Envoy will randomly select a value between 0 and the initial jitter value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + default: 3s + description: Interval defines the time + between active health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + tcp: + description: |- + TCP defines the configuration of tcp health checker. + It's required while the health checker type is TCP. + properties: + receive: + description: Receive defines the expected + response payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + send: + description: Send defines the request + payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + type: object + timeout: + default: 1s + description: Timeout defines the time + to wait for a health check response. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: + allOf: + - enum: + - HTTP + - TCP + - GRPC + - enum: + - HTTP + - TCP + - GRPC + description: Type defines the type of + health checker. + type: string + unhealthyThreshold: + default: 3 + description: UnhealthyThreshold defines + the number of unhealthy health checks + required before a backend host is marked + unhealthy. + format: int32 + minimum: 1 + type: integer + required: + - type + type: object + x-kubernetes-validations: + - message: If Health Checker type is HTTP, + http field needs to be set. + rule: 'self.type == ''HTTP'' ? has(self.http) + : !has(self.http)' + - message: If Health Checker type is TCP, + tcp field needs to be set. + rule: 'self.type == ''TCP'' ? has(self.tcp) + : !has(self.tcp)' + - message: The grpc field can only be set + if the Health Checker type is GRPC. + rule: 'has(self.grpc) ? self.type == ''GRPC'' + : true' + panicThreshold: + description: |- + When number of unhealthy endpoints for a backend reaches this threshold + Envoy will disregard health status and balance across all endpoints. + It's designed to prevent a situation in which host failures cascade throughout the cluster + as load increases. If not set, the default value is 50%. To disable panic mode, set value to `0`. + format: int32 + maximum: 100 + minimum: 0 + type: integer + passive: + description: Passive passive check configuration + properties: + baseEjectionTime: + default: 30s + description: BaseEjectionTime defines + the base duration for which a host will + be ejected on consecutive failures. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + consecutive5XxErrors: + default: 5 + description: Consecutive5xxErrors sets + the number of consecutive 5xx errors + triggering ejection. + format: int32 + type: integer + consecutiveGatewayErrors: + description: ConsecutiveGatewayErrors + sets the number of consecutive gateway + errors triggering ejection. + format: int32 + type: integer + consecutiveLocalOriginFailures: + default: 5 + description: |- + ConsecutiveLocalOriginFailures sets the number of consecutive local origin failures triggering ejection. + Parameter takes effect only when split_external_local_origin_errors is set to true. + format: int32 + type: integer + failurePercentageThreshold: + description: |- + FailurePercentageThreshold sets the failure percentage threshold for outlier detection. + If the failure percentage of a given host is greater than or equal to this value, it will be ejected. + Defaults to 85. + format: int32 + maximum: 100 + minimum: 0 + type: integer + interval: + default: 3s + description: Interval defines the time + between passive health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxEjectionPercent: + default: 10 + description: MaxEjectionPercent sets the + maximum percentage of hosts in a cluster + that can be ejected. + format: int32 + type: integer + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors + enables splitting of errors between + external and local origin. + type: boolean + type: object + type: object + http2: + description: HTTP2 provides HTTP/2 configuration + for backend connections. + properties: + initialConnectionWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialConnectionWindowSize sets the initial window size for HTTP/2 connections. + If not set, the default value is 1 MiB. + x-kubernetes-int-or-string: true + initialStreamWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialStreamWindowSize sets the initial window size for HTTP/2 streams. + If not set, the default value is 64 KiB(64*1024). + x-kubernetes-int-or-string: true + maxConcurrentStreams: + description: |- + MaxConcurrentStreams sets the maximum number of concurrent streams allowed per connection. + If not set, the default value is 100. + format: int32 + maximum: 2147483647 + minimum: 1 + type: integer + onInvalidMessage: + description: |- + OnInvalidMessage determines if Envoy will terminate the connection or just the offending stream in the event of HTTP messaging error + It's recommended for L2 Envoy deployments to set this value to TerminateStream. + https://www.envoyproxy.io/docs/envoy/latest/configuration/best_practices/level_two + Default: TerminateConnection + type: string + type: object + loadBalancer: + description: |- + LoadBalancer policy to apply when routing traffic from the gateway to + the backend endpoints. Defaults to `LeastRequest`. + properties: + consistentHash: + description: |- + ConsistentHash defines the configuration when the load balancer type is + set to ConsistentHash + properties: + cookie: + description: Cookie configures the cookie + hash policy when the consistent hash + type is set to Cookie. + properties: + attributes: + additionalProperties: + type: string + description: Additional Attributes + to set for the generated cookie. + type: object + name: + description: |- + Name of the cookie to hash. + If this cookie does not exist in the request, Envoy will generate a cookie and set + the TTL on the response back to the client based on Layer 4 + attributes of the backend endpoint, to ensure that these future requests + go to the same backend endpoint. Make sure to set the TTL field for this case. + type: string + ttl: + description: |- + TTL of the generated cookie if the cookie is not present. This value sets the + Max-Age attribute value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - name + type: object + header: + description: |- + Header configures the header hash policy when the consistent hash type is set to Header. + + Deprecated: use Headers instead + properties: + name: + description: Name of the header to + hash. + type: string + required: + - name + type: object + headers: + description: Headers configures the header + hash policy for each header, when the + consistent hash type is set to Headers. + items: + description: |- + Header defines the header hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the header + to hash. + type: string + required: + - name + type: object + type: array + queryParams: + description: QueryParams configures the + query parameter hash policy when the + consistent hash type is set to QueryParams. + items: + description: |- + QueryParam defines the query parameter name hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the query param + to hash. + type: string + required: + - name + type: object + type: array + tableSize: + default: 65537 + description: The table size for consistent + hashing, must be prime number limited + to 5000011. + format: int64 + maximum: 5000011 + minimum: 2 + type: integer + type: + description: |- + ConsistentHashType defines the type of input to hash on. Valid Type values are + "SourceIP", + "Header", + "Headers", + "Cookie". + "QueryParams". + enum: + - SourceIP + - Header + - Headers + - Cookie + - QueryParams + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If consistent hash type is header, + the header field must be set. + rule: 'self.type == ''Header'' ? has(self.header) + : !has(self.header)' + - message: If consistent hash type is headers, + the headers field must be set. + rule: 'self.type == ''Headers'' ? has(self.headers) + : !has(self.headers)' + - message: If consistent hash type is cookie, + the cookie field must be set. + rule: 'self.type == ''Cookie'' ? has(self.cookie) + : !has(self.cookie)' + - message: If consistent hash type is queryParams, + the queryParams field must be set. + rule: 'self.type == ''QueryParams'' ? has(self.queryParams) + : !has(self.queryParams)' + endpointOverride: + description: |- + EndpointOverride defines the configuration for endpoint override. + When specified, the load balancer will attempt to route requests to endpoints + based on the override information extracted from request headers or metadata. + If the override endpoints are not available, the configured load balancer policy will be used as fallback. + properties: + extractFrom: + description: ExtractFrom defines the sources + to extract endpoint override information + from. + items: + description: EndpointOverrideExtractFrom + defines a source to extract endpoint + override information from. + properties: + header: + description: |- + Header defines the header to get the override endpoint addresses. + The header value must specify at least one endpoint in `IP:Port` format or multiple endpoints in `IP:Port,IP:Port,...` format. + For example `10.0.0.5:8080` or `[2600:4040:5204::1574:24ae]:80`. + The IPv6 address is enclosed in square brackets. + type: string + type: object + maxItems: 10 + minItems: 1 + type: array + required: + - extractFrom + type: object + slowStart: + description: |- + SlowStart defines the configuration related to the slow start load balancer policy. + If set, during slow start window, traffic sent to the newly added hosts will gradually increase. + Currently this is only supported for RoundRobin and LeastRequest load balancers + properties: + window: + description: |- + Window defines the duration of the warm up period for newly added host. + During slow start window, traffic sent to the newly added hosts will gradually increase. + Currently only supports linear growth of traffic. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto#config-cluster-v3-cluster-slowstartconfig + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - window + type: object + type: + description: |- + Type decides the type of Load Balancer policy. + Valid LoadBalancerType values are + "ConsistentHash", + "LeastRequest", + "Random", + "RoundRobin". + enum: + - ConsistentHash + - LeastRequest + - Random + - RoundRobin + type: string + zoneAware: + description: ZoneAware defines the configuration + related to the distribution of requests + between locality zones. + properties: + preferLocal: + description: PreferLocalZone configures + zone-aware routing to prefer sending + traffic to the local locality zone. + properties: + force: + description: |- + ForceLocalZone defines override configuration for forcing all traffic to stay within the local zone instead of the default behavior + which maintains equal distribution among upstream endpoints while sending as much traffic as possible locally. + properties: + minEndpointsInZoneThreshold: + description: |- + MinEndpointsInZoneThreshold is the minimum number of upstream endpoints in the local zone required to honor the forceLocalZone + override. This is useful for protecting zones with fewer endpoints. + format: int32 + type: integer + type: object + minEndpointsThreshold: + description: MinEndpointsThreshold + is the minimum number of total upstream + endpoints across all zones required + to enable zone-aware routing. + format: int64 + type: integer + percentageEnabled: + description: Configures percentage + of requests that will be considered + for zone aware routing if zone aware + routing is configured. If not specified, + Envoy defaults to 100%. + format: int32 + maximum: 100 + minimum: 0 + type: integer + type: object + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: If LoadBalancer type is consistentHash, + consistentHash field needs to be set. + rule: 'self.type == ''ConsistentHash'' ? has(self.consistentHash) + : !has(self.consistentHash)' + - message: Currently SlowStart is only supported + for RoundRobin and LeastRequest load balancers. + rule: 'self.type in [''Random'', ''ConsistentHash''] + ? !has(self.slowStart) : true ' + - message: Currently ZoneAware is only supported + for LeastRequest, Random, and RoundRobin load + balancers. + rule: 'self.type == ''ConsistentHash'' ? !has(self.zoneAware) + : true ' + proxyProtocol: + description: ProxyProtocol enables the Proxy Protocol + when communicating with the backend. + properties: + version: + description: |- + Version of ProxyProtol + Valid ProxyProtocolVersion values are + "V1" + "V2" + enum: + - V1 + - V2 + type: string + required: + - version + type: object + retry: + description: |- + Retry provides more advanced usage, allowing users to customize the number of retries, retry fallback strategy, and retry triggering conditions. + If not set, retry will be disabled. + properties: + numAttemptsPerPriority: + description: |- + NumAttemptsPerPriority defines the number of requests (initial attempt + retries) + that should be sent to the same priority before switching to a different one. + If not specified or set to 0, all requests are sent to the highest priority that is healthy. + format: int32 + type: integer + numRetries: + default: 2 + description: NumRetries is the number of retries + to be attempted. Defaults to 2. + format: int32 + minimum: 0 + type: integer + perRetry: + description: PerRetry is the retry policy + to be applied per retry attempt. + properties: + backOff: + description: |- + Backoff is the backoff policy to be applied per retry attempt. gateway uses a fully jittered exponential + back-off algorithm for retries. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/router_filter#config-http-filters-router-x-envoy-max-retries + properties: + baseInterval: + description: BaseInterval is the base + interval between retries. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxInterval: + description: |- + MaxInterval is the maximum interval between retries. This parameter is optional, but must be greater than or equal to the base_interval if set. + The default is 10 times the base_interval + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + timeout: + description: Timeout is the timeout per + retry attempt. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + retryOn: + description: |- + RetryOn specifies the retry trigger condition. + + If not specified, the default is to retry on connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes(503). + properties: + httpStatusCodes: + description: |- + HttpStatusCodes specifies the http status codes to be retried. + The retriable-status-codes trigger must also be configured for these status codes to trigger a retry. + items: + description: HTTPStatus defines the + http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + triggers: + description: Triggers specifies the retry + trigger condition(Http/Grpc). + items: + description: TriggerEnum specifies the + conditions that trigger retries. + enum: + - 5xx + - gateway-error + - reset + - reset-before-request + - connect-failure + - retriable-4xx + - refused-stream + - retriable-status-codes + - cancelled + - deadline-exceeded + - internal + - resource-exhausted + - unavailable + type: string + type: array + type: object + type: object + tcpKeepalive: + description: |- + TcpKeepalive settings associated with the upstream client connection. + Disabled by default. + properties: + idleTime: + description: |- + The duration a connection needs to be idle before keep-alive + probes start being sent. + The duration format is + Defaults to `7200s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + description: |- + The duration between keep-alive probes. + Defaults to `75s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + probes: + description: |- + The total number of unacknowledged probes to send before deciding + the connection is dead. + Defaults to 9. + format: int32 + type: integer + type: object + timeout: + description: Timeout settings for the backend + connections. + properties: + http: + description: Timeout settings for HTTP. + properties: + connectionIdleTimeout: + description: |- + The idle timeout for an HTTP connection. Idle time is defined as a period in which there are no active requests in the connection. + Default: 1 hour. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxConnectionDuration: + description: |- + The maximum duration of an HTTP connection. + Default: unlimited. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxStreamDuration: + description: |- + MaxStreamDuration is the maximum duration for a stream to complete. This timeout measures the time + from when the request is sent until the response stream is fully consumed and does not apply to + non-streaming requests. + When set to "0s", no max duration is applied and streams can run indefinitely. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + requestTimeout: + description: RequestTimeout is the time + until which entire response is received + from the upstream. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + tcp: + description: Timeout settings for TCP. + properties: + connectTimeout: + description: |- + The timeout for network connection establishment, including TCP and TLS handshakes. + Default: 10 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + type: object + type: object + x-kubernetes-validations: + - message: predictivePercent in preconnect policy + only works with RoundRobin or Random load balancers + rule: '!((has(self.connection) && has(self.connection.preconnect) + && has(self.connection.preconnect.predictivePercent)) + && !(has(self.loadBalancer) && has(self.loadBalancer.type) + && self.loadBalancer.type in [''Random'', ''RoundRobin'']))' + endSessionEndpoint: + description: |- + The OIDC Provider's [end session endpoint](https://openid.net/specs/openid-connect-core-1_0.html#RPLogout). + + If the end session endpoint is provided, EG will use it to log out the user from the OIDC Provider when the user accesses the logout path. + EG will also try to discover the end session endpoint from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse) when authorizationEndpoint or tokenEndpoint is not provided. + type: string + issuer: + description: |- + The OIDC Provider's [issuer identifier](https://openid.net/specs/openid-connect-discovery-1_0.html#IssuerDiscovery). + Issuer MUST be a URI RFC 3986 [RFC3986] with a scheme component that MUST + be https, a host component, and optionally, port and path components and + no query or fragment components. + minLength: 1 + type: string + tokenEndpoint: + description: |- + The OIDC Provider's [token endpoint](https://openid.net/specs/openid-connect-core-1_0.html#TokenEndpoint). + If not provided, EG will try to discover it from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse). + type: string + required: + - issuer + type: object + x-kubernetes-validations: + - message: BackendRefs must be used, backendRef is not + supported. + rule: '!has(self.backendRef)' + - message: Retry timeout is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.perRetry)? + !has(self.backendSettings.retry.perRetry.timeout):true):true):true + - message: HTTPStatusCodes is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.retryOn)? + !has(self.backendSettings.retry.retryOn.httpStatusCodes):true):true):true + redirectURL: + description: |- + The redirect URL to be used in the OIDC + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, uses the default redirect URI "%REQ(x-forwarded-proto)%://%REQ(:authority)%/oauth2/callback" + type: string + refreshToken: + default: true + description: |- + RefreshToken indicates whether the Envoy should automatically refresh the + id token and access token when they expire. + When set to true, the Envoy will use the refresh token to get a new id token + and access token when they expire. + + If not specified, defaults to true. + type: boolean + resources: + description: |- + The OIDC resources to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + items: + type: string + type: array + scopes: + description: |- + The OIDC scopes to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + The "openid" scope is always added to the list of scopes if not already + specified. + items: + type: string + type: array + required: + - clientSecret + - provider + type: object + x-kubernetes-validations: + - message: only one of clientID or clientIDRef must be set + rule: (has(self.clientID) && !has(self.clientIDRef)) || + (!has(self.clientID) && has(self.clientIDRef)) + required: + - oidc + type: object + tenantID: + description: TenantId is a unique identifier for an Azure Active + Directory instance. + minLength: 1 + type: string + required: + - clientID + - tenantID + type: object + x-kubernetes-validations: + - message: Exactly one of clientSecretRef or oidcExchangeToken must + be specified + rule: (has(self.clientSecretRef) && !has(self.oidcExchangeToken)) + || (!has(self.clientSecretRef) && has(self.oidcExchangeToken)) + gcpCredentials: + description: GCPCredentials is a mechanism to access a backend(s). + GCP specific logic will be applied. + properties: + credentialsFile: + description: CredentialsFile specifies the service account credentials + file to use for the GCP provider. + properties: + secretRef: + description: |- + SecretRef is the reference to the credential file. + + The secret should contain the GCP service account credentials file keyed on "service_account.json". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + required: + - secretRef + type: object + projectName: + description: ProjectName is the GCP project name. + minLength: 1 + type: string + region: + description: Region is the GCP region associated with the policy. + minLength: 1 + type: string + workloadIdentityFederationConfig: + description: WorkloadIdentityFederationConfig is the configuration + for the GCP Workload Identity Federation. + properties: + oidcExchangeToken: + description: |- + OIDCExchangeToken specifies the oidc configurations used to obtain an oidc token. The oidc token will be + used to obtain temporary credentials to access GCP. + properties: + aud: + description: Aud defines the audience that this ID Token + is intended for. + type: string + grantType: + description: GrantType is the method application gets + access token. + type: string + oidc: + description: OIDC is used to obtain oidc tokens via an + SSO server which will be used to exchange for provider + credentials. + properties: + clientID: + description: |- + The client ID to be used in the OIDC + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + + Only one of clientID or clientIDRef must be set. + minLength: 1 + type: string + clientIDRef: + description: |- + The Kubernetes secret which contains the client ID to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + Exactly one of clientID or clientIDRef must be set. + This is an Opaque secret. The client ID should be stored in the key "client-id". + + Only one of clientID or clientIDRef must be set. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For + example "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + clientSecret: + description: |- + The Kubernetes secret which contains the OIDC client secret to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + + This is an Opaque secret. The client secret should be stored in the key + "client-secret". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For + example "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + cookieConfig: + description: |- + CookieConfigs allows setting the SameSite attribute for OIDC cookies. + By default, its unset. + properties: + sameSite: + enum: + - Lax + - Strict + - None + type: string + type: object + cookieDomain: + description: |- + The optional domain to set the access and ID token cookies on. + If not set, the cookies will default to the host of the request, not including the subdomains. + If set, the cookies will be set on the specified domain and all subdomains. + This means that requests to any subdomain will not require reauthentication after users log in to the parent domain. + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9]))*$ + type: string + cookieNames: + description: |- + The optional cookie name overrides to be used for Bearer and IdToken cookies in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, uses a randomly generated suffix + properties: + accessToken: + description: |- + The name of the cookie used to store the AccessToken in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, defaults to "AccessToken-(randomly generated uid)" + type: string + idToken: + description: |- + The name of the cookie used to store the IdToken in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, defaults to "IdToken-(randomly generated uid)" + type: string + type: object + csrfTokenTTL: + description: |- + CSRFTokenTTL defines how long the CSRF token generated during the OAuth2 authorization flow remains valid. + + This duration determines the lifetime of the CSRF cookie, which is validated against the CSRF token + in the "state" parameter when the provider redirects back to the callback endpoint. + + If omitted, Envoy Gateway defaults the token expiration to 10 minutes. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + defaultRefreshTokenTTL: + description: |- + DefaultRefreshTokenTTL is the default lifetime of the refresh token. + This field is only used when the exp (expiration time) claim is omitted in + the refresh token or the refresh token is not JWT. + + If not specified, defaults to 604800s (one week). + Note: this field is only applicable when the "refreshToken" field is set to true. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + defaultTokenTTL: + description: |- + DefaultTokenTTL is the default lifetime of the id token and access token. + Please note that Envoy will always use the expiry time from the response + of the authorization server if it is provided. This field is only used when + the expiry time is not provided by the authorization. + + If not specified, defaults to 0. In this case, the "expires_in" field in + the authorization response must be set by the authorization server, or the + OAuth flow will fail. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + denyRedirect: + description: |- + Any request that matches any of the provided matchers (with either tokens that are expired or missing tokens) will not be redirected to the OIDC Provider. + This behavior can be useful for AJAX or machine requests. + properties: + headers: + description: Defines the headers to match against + the request to deny redirect to the OIDC Provider. + items: + description: OIDCDenyRedirectHeader defines + how a header is matched + properties: + name: + description: Specifies the name of the header + in the request. + minLength: 1 + type: string + type: + default: Exact + description: Type specifies how to match + against a string. + enum: + - Exact + - Prefix + - Suffix + - RegularExpression + type: string + value: + description: Value specifies the string + value that the match must have. + maxLength: 1024 + minLength: 1 + type: string + required: + - name + - value + type: object + maxItems: 16 + minItems: 1 + type: array + required: + - headers + type: object + disableTokenEncryption: + description: |- + Disable token encryption. When set to true, both the access token and the ID token will be stored in plain text. + This option should only be used in secure environments where token encryption is not required. + Default is false (tokens are encrypted). + type: boolean + forwardAccessToken: + description: |- + ForwardAccessToken indicates whether the Envoy should forward the access token + via the Authorization header Bearer scheme to the upstream. + If not specified, defaults to false. + type: boolean + logoutPath: + description: |- + The path to log a user out, clearing their credential cookies. + + If not specified, uses a default logout path "/logout" + type: string + passThroughAuthHeader: + description: |- + Skips OIDC authentication when the request contains a header that will be extracted by the JWT filter. Unless + explicitly stated otherwise in the extractFrom field, this will be the "Authorization: Bearer ..." header. + + The passThroughAuthHeader option is typically used for non-browser clients that may not be able to handle OIDC + redirects and wish to directly supply a token instead. + + If not specified, defaults to false. + type: boolean + provider: + description: The OIDC Provider configuration. + properties: + authorizationEndpoint: + description: |- + The OIDC Provider's [authorization endpoint](https://openid.net/specs/openid-connect-core-1_0.html#AuthorizationEndpoint). + If not provided, EG will try to discover it from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse). + type: string + backendRef: + description: |- + BackendRef references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + + Deprecated: Use BackendRefs instead. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == + ''Service'') ? has(self.port) : true' + backendRefs: + description: |- + BackendRefs references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + items: + description: BackendRef defines how an ObjectReference + that is specific to BackendRef. + properties: + fallback: + description: |- + Fallback indicates whether the backend is designated as a fallback. + Multiple fallback backends can be configured. + It is highly recommended to configure active or passive health checks to ensure that failover can be detected + when the active backends become unhealthy and to automatically readjust once the primary backends are healthy again. + The overprovisioning factor is set to 1.4, meaning the fallback backends will only start receiving traffic when + the health of the active backends falls below 72%. + type: boolean + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + weight: + default: 1 + description: |- + Weight specifies the proportion of requests forwarded to the referenced + backend. This is computed as weight/(sum of all weights in this + BackendRefs list). For non-zero values, there may be some epsilon from + the exact proportion defined here depending on the precision an + implementation supports. Weight is not a percentage and the sum of + weights does not need to equal 100. + + If only one backend is specified and it has a weight greater than 0, 100% + of the traffic is forwarded to that backend. If weight is set to 0, no + traffic should be forwarded for this entry. If unspecified, weight + defaults to 1. + + Support for this field varies based on the context where used. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind + == ''Service'') ? has(self.port) : true' + maxItems: 16 + type: array + backendSettings: + description: |- + BackendSettings holds configuration for managing the connection + to the backend. + properties: + circuitBreaker: + description: |- + Circuit Breaker settings for the upstream connections and requests. + If not set, circuit breakers will be enabled with the default thresholds + properties: + maxConnections: + default: 1024 + description: The maximum number of connections + that Envoy will establish to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRequests: + default: 1024 + description: The maximum number of parallel + requests that Envoy will make to the + referenced backend defined within a + xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRetries: + default: 1024 + description: The maximum number of parallel + retries that Envoy will make to the + referenced backend defined within a + xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxPendingRequests: + default: 1024 + description: The maximum number of pending + requests that Envoy will queue to the + referenced backend defined within a + xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxRequestsPerConnection: + description: |- + The maximum number of requests that Envoy will make over a single connection to the referenced backend defined within a xRoute rule. + Default: unlimited. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + perEndpoint: + description: PerEndpoint defines Circuit + Breakers that will apply per-endpoint + for an upstream cluster + properties: + maxConnections: + default: 1024 + description: MaxConnections configures + the maximum number of connections + that Envoy will establish per-endpoint + to the referenced backend defined + within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + type: object + type: object + connection: + description: Connection includes backend connection + settings. + properties: + bufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + BufferLimit Soft limit on size of the cluster’s connections read and write buffers. + BufferLimit applies to connection streaming (maybe non-streaming) channel between processes, it's in user space. + If unspecified, an implementation defined default is applied (32768 bytes). + For example, 20Mi, 1Gi, 256Ki etc. + Note: that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + preconnect: + description: |- + Preconnect configures proactive upstream connections to reduce latency by establishing + connections before they’re needed and avoiding connection establishment overhead. + + If unset, Envoy will fetch connections as needed to serve in-flight requests. + properties: + perEndpointPercent: + description: |- + PerEndpointPercent configures how many additional connections to maintain per + upstream endpoint, useful for high-QPS or latency sensitive services. Expressed as a + percentage of the connections required by active streams + (e.g. 100 = preconnect disabled, 105 = 1.05x connections per-endpoint, 200 = 2.00×). + + Allowed value range is between 100-300. When both PerEndpointPercent and + PredictivePercent are set, Envoy ensures both are satisfied (max of the two). + format: int32 + maximum: 300 + minimum: 100 + type: integer + predictivePercent: + description: |- + PredictivePercent configures how many additional connections to maintain + across the cluster by anticipating which upstream endpoint the load balancer + will select next, useful for low-QPS services. Relies on deterministic + loadbalancing and is only supported with Random or RoundRobin. + Expressed as a percentage of the connections required by active streams + (e.g. 100 = 1.0 (no preconnect), 105 = 1.05× connections across the cluster, 200 = 2.00×). + + Minimum allowed value is 100. When both PerEndpointPercent and PredictivePercent are + set Envoy ensures both are satisfied per host (max of the two). + format: int32 + minimum: 100 + type: integer + type: object + socketBufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + SocketBufferLimit provides configuration for the maximum buffer size in bytes for each socket + to backend. + SocketBufferLimit applies to socket streaming channel between TCP/IP stacks, it's in kernel space. + For example, 20Mi, 1Gi, 256Ki etc. + Note that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + type: object + dns: + description: DNS includes dns resolution settings. + properties: + dnsRefreshRate: + description: |- + DNSRefreshRate specifies the rate at which DNS records should be refreshed. + Defaults to 30 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + lookupFamily: + description: |- + LookupFamily determines how Envoy would resolve DNS for Routes where the backend is specified as a fully qualified domain name (FQDN). + If set, this configuration overrides other defaults. + enum: + - IPv4 + - IPv6 + - IPv4Preferred + - IPv6Preferred + - IPv4AndIPv6 + type: string + respectDnsTtl: + description: |- + RespectDNSTTL indicates whether the DNS Time-To-Live (TTL) should be respected. + If the value is set to true, the DNS refresh rate will be set to the resource record’s TTL. + Defaults to true. + type: boolean + type: object + healthCheck: + description: HealthCheck allows gateway to + perform active health checking on backends. + properties: + active: + description: Active health check configuration + properties: + grpc: + description: |- + GRPC defines the configuration of the GRPC health checker. + It's optional, and can only be used if the specified type is GRPC. + properties: + service: + description: |- + Service to send in the health check request. + If this is not specified, then the health check request applies to the entire + server and not to a specific service. + type: string + type: object + healthyThreshold: + default: 1 + description: HealthyThreshold defines + the number of healthy health checks + required before a backend host is + marked healthy. + format: int32 + minimum: 1 + type: integer + http: + description: |- + HTTP defines the configuration of http health checker. + It's required while the health checker type is HTTP. + properties: + expectedResponse: + description: ExpectedResponse + defines a list of HTTP expected + responses to match. + properties: + binary: + description: Binary payload + base64 encoded. + format: byte + type: string + text: + description: Text payload + in plain text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines + the type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is + Text, text field needs to + be set. + rule: 'self.type == ''Text'' + ? has(self.text) : !has(self.text)' + - message: If payload type is + Binary, binary field needs + to be set. + rule: 'self.type == ''Binary'' + ? has(self.binary) : !has(self.binary)' + expectedStatuses: + description: |- + ExpectedStatuses defines a list of HTTP response statuses considered healthy. + Defaults to 200 only + items: + description: HTTPStatus defines + the http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + hostname: + description: |- + Hostname defines the HTTP host that will be requested during health checking. + Default: HTTPRoute or GRPCRoute hostname. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + method: + description: |- + Method defines the HTTP method used for health checking. + Defaults to GET + type: string + path: + description: Path defines the + HTTP path that will be requested + during health checking. + maxLength: 1024 + minLength: 1 + type: string + required: + - path + type: object + initialJitter: + description: |- + InitialJitter defines the maximum time Envoy will wait before the first health check. + Envoy will randomly select a value between 0 and the initial jitter value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + default: 3s + description: Interval defines the + time between active health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + tcp: + description: |- + TCP defines the configuration of tcp health checker. + It's required while the health checker type is TCP. + properties: + receive: + description: Receive defines the + expected response payload. + properties: + binary: + description: Binary payload + base64 encoded. + format: byte + type: string + text: + description: Text payload + in plain text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines + the type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is + Text, text field needs to + be set. + rule: 'self.type == ''Text'' + ? has(self.text) : !has(self.text)' + - message: If payload type is + Binary, binary field needs + to be set. + rule: 'self.type == ''Binary'' + ? has(self.binary) : !has(self.binary)' + send: + description: Send defines the + request payload. + properties: + binary: + description: Binary payload + base64 encoded. + format: byte + type: string + text: + description: Text payload + in plain text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines + the type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is + Text, text field needs to + be set. + rule: 'self.type == ''Text'' + ? has(self.text) : !has(self.text)' + - message: If payload type is + Binary, binary field needs + to be set. + rule: 'self.type == ''Binary'' + ? has(self.binary) : !has(self.binary)' + type: object + timeout: + default: 1s + description: Timeout defines the time + to wait for a health check response. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: + allOf: + - enum: + - HTTP + - TCP + - GRPC + - enum: + - HTTP + - TCP + - GRPC + description: Type defines the type + of health checker. + type: string + unhealthyThreshold: + default: 3 + description: UnhealthyThreshold defines + the number of unhealthy health checks + required before a backend host is + marked unhealthy. + format: int32 + minimum: 1 + type: integer + required: + - type + type: object + x-kubernetes-validations: + - message: If Health Checker type is HTTP, + http field needs to be set. + rule: 'self.type == ''HTTP'' ? has(self.http) + : !has(self.http)' + - message: If Health Checker type is TCP, + tcp field needs to be set. + rule: 'self.type == ''TCP'' ? has(self.tcp) + : !has(self.tcp)' + - message: The grpc field can only be + set if the Health Checker type is + GRPC. + rule: 'has(self.grpc) ? self.type == + ''GRPC'' : true' + panicThreshold: + description: |- + When number of unhealthy endpoints for a backend reaches this threshold + Envoy will disregard health status and balance across all endpoints. + It's designed to prevent a situation in which host failures cascade throughout the cluster + as load increases. If not set, the default value is 50%. To disable panic mode, set value to `0`. + format: int32 + maximum: 100 + minimum: 0 + type: integer + passive: + description: Passive passive check configuration + properties: + baseEjectionTime: + default: 30s + description: BaseEjectionTime defines + the base duration for which a host + will be ejected on consecutive failures. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + consecutive5XxErrors: + default: 5 + description: Consecutive5xxErrors + sets the number of consecutive 5xx + errors triggering ejection. + format: int32 + type: integer + consecutiveGatewayErrors: + description: ConsecutiveGatewayErrors + sets the number of consecutive gateway + errors triggering ejection. + format: int32 + type: integer + consecutiveLocalOriginFailures: + default: 5 + description: |- + ConsecutiveLocalOriginFailures sets the number of consecutive local origin failures triggering ejection. + Parameter takes effect only when split_external_local_origin_errors is set to true. + format: int32 + type: integer + failurePercentageThreshold: + description: |- + FailurePercentageThreshold sets the failure percentage threshold for outlier detection. + If the failure percentage of a given host is greater than or equal to this value, it will be ejected. + Defaults to 85. + format: int32 + maximum: 100 + minimum: 0 + type: integer + interval: + default: 3s + description: Interval defines the + time between passive health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxEjectionPercent: + default: 10 + description: MaxEjectionPercent sets + the maximum percentage of hosts + in a cluster that can be ejected. + format: int32 + type: integer + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors + enables splitting of errors between + external and local origin. + type: boolean + type: object + type: object + http2: + description: HTTP2 provides HTTP/2 configuration + for backend connections. + properties: + initialConnectionWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialConnectionWindowSize sets the initial window size for HTTP/2 connections. + If not set, the default value is 1 MiB. + x-kubernetes-int-or-string: true + initialStreamWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialStreamWindowSize sets the initial window size for HTTP/2 streams. + If not set, the default value is 64 KiB(64*1024). + x-kubernetes-int-or-string: true + maxConcurrentStreams: + description: |- + MaxConcurrentStreams sets the maximum number of concurrent streams allowed per connection. + If not set, the default value is 100. + format: int32 + maximum: 2147483647 + minimum: 1 + type: integer + onInvalidMessage: + description: |- + OnInvalidMessage determines if Envoy will terminate the connection or just the offending stream in the event of HTTP messaging error + It's recommended for L2 Envoy deployments to set this value to TerminateStream. + https://www.envoyproxy.io/docs/envoy/latest/configuration/best_practices/level_two + Default: TerminateConnection + type: string + type: object + loadBalancer: + description: |- + LoadBalancer policy to apply when routing traffic from the gateway to + the backend endpoints. Defaults to `LeastRequest`. + properties: + consistentHash: + description: |- + ConsistentHash defines the configuration when the load balancer type is + set to ConsistentHash + properties: + cookie: + description: Cookie configures the + cookie hash policy when the consistent + hash type is set to Cookie. + properties: + attributes: + additionalProperties: + type: string + description: Additional Attributes + to set for the generated cookie. + type: object + name: + description: |- + Name of the cookie to hash. + If this cookie does not exist in the request, Envoy will generate a cookie and set + the TTL on the response back to the client based on Layer 4 + attributes of the backend endpoint, to ensure that these future requests + go to the same backend endpoint. Make sure to set the TTL field for this case. + type: string + ttl: + description: |- + TTL of the generated cookie if the cookie is not present. This value sets the + Max-Age attribute value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - name + type: object + header: + description: |- + Header configures the header hash policy when the consistent hash type is set to Header. + + Deprecated: use Headers instead + properties: + name: + description: Name of the header + to hash. + type: string + required: + - name + type: object + headers: + description: Headers configures the + header hash policy for each header, + when the consistent hash type is + set to Headers. + items: + description: |- + Header defines the header hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the header + to hash. + type: string + required: + - name + type: object + type: array + queryParams: + description: QueryParams configures + the query parameter hash policy + when the consistent hash type is + set to QueryParams. + items: + description: |- + QueryParam defines the query parameter name hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the query + param to hash. + type: string + required: + - name + type: object + type: array + tableSize: + default: 65537 + description: The table size for consistent + hashing, must be prime number limited + to 5000011. + format: int64 + maximum: 5000011 + minimum: 2 + type: integer + type: + description: |- + ConsistentHashType defines the type of input to hash on. Valid Type values are + "SourceIP", + "Header", + "Headers", + "Cookie". + "QueryParams". + enum: + - SourceIP + - Header + - Headers + - Cookie + - QueryParams + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If consistent hash type is + header, the header field must be set. + rule: 'self.type == ''Header'' ? has(self.header) + : !has(self.header)' + - message: If consistent hash type is + headers, the headers field must be + set. + rule: 'self.type == ''Headers'' ? has(self.headers) + : !has(self.headers)' + - message: If consistent hash type is + cookie, the cookie field must be set. + rule: 'self.type == ''Cookie'' ? has(self.cookie) + : !has(self.cookie)' + - message: If consistent hash type is + queryParams, the queryParams field + must be set. + rule: 'self.type == ''QueryParams'' + ? has(self.queryParams) : !has(self.queryParams)' + endpointOverride: + description: |- + EndpointOverride defines the configuration for endpoint override. + When specified, the load balancer will attempt to route requests to endpoints + based on the override information extracted from request headers or metadata. + If the override endpoints are not available, the configured load balancer policy will be used as fallback. + properties: + extractFrom: + description: ExtractFrom defines the + sources to extract endpoint override + information from. + items: + description: EndpointOverrideExtractFrom + defines a source to extract endpoint + override information from. + properties: + header: + description: |- + Header defines the header to get the override endpoint addresses. + The header value must specify at least one endpoint in `IP:Port` format or multiple endpoints in `IP:Port,IP:Port,...` format. + For example `10.0.0.5:8080` or `[2600:4040:5204::1574:24ae]:80`. + The IPv6 address is enclosed in square brackets. + type: string + type: object + maxItems: 10 + minItems: 1 + type: array + required: + - extractFrom + type: object + slowStart: + description: |- + SlowStart defines the configuration related to the slow start load balancer policy. + If set, during slow start window, traffic sent to the newly added hosts will gradually increase. + Currently this is only supported for RoundRobin and LeastRequest load balancers + properties: + window: + description: |- + Window defines the duration of the warm up period for newly added host. + During slow start window, traffic sent to the newly added hosts will gradually increase. + Currently only supports linear growth of traffic. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto#config-cluster-v3-cluster-slowstartconfig + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - window + type: object + type: + description: |- + Type decides the type of Load Balancer policy. + Valid LoadBalancerType values are + "ConsistentHash", + "LeastRequest", + "Random", + "RoundRobin". + enum: + - ConsistentHash + - LeastRequest + - Random + - RoundRobin + type: string + zoneAware: + description: ZoneAware defines the configuration + related to the distribution of requests + between locality zones. + properties: + preferLocal: + description: PreferLocalZone configures + zone-aware routing to prefer sending + traffic to the local locality zone. + properties: + force: + description: |- + ForceLocalZone defines override configuration for forcing all traffic to stay within the local zone instead of the default behavior + which maintains equal distribution among upstream endpoints while sending as much traffic as possible locally. + properties: + minEndpointsInZoneThreshold: + description: |- + MinEndpointsInZoneThreshold is the minimum number of upstream endpoints in the local zone required to honor the forceLocalZone + override. This is useful for protecting zones with fewer endpoints. + format: int32 + type: integer + type: object + minEndpointsThreshold: + description: MinEndpointsThreshold + is the minimum number of total + upstream endpoints across all + zones required to enable zone-aware + routing. + format: int64 + type: integer + percentageEnabled: + description: Configures percentage + of requests that will be considered + for zone aware routing if zone + aware routing is configured. + If not specified, Envoy defaults + to 100%. + format: int32 + maximum: 100 + minimum: 0 + type: integer + type: object + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: If LoadBalancer type is consistentHash, + consistentHash field needs to be set. + rule: 'self.type == ''ConsistentHash'' ? + has(self.consistentHash) : !has(self.consistentHash)' + - message: Currently SlowStart is only supported + for RoundRobin and LeastRequest load balancers. + rule: 'self.type in [''Random'', ''ConsistentHash''] + ? !has(self.slowStart) : true ' + - message: Currently ZoneAware is only supported + for LeastRequest, Random, and RoundRobin + load balancers. + rule: 'self.type == ''ConsistentHash'' ? + !has(self.zoneAware) : true ' + proxyProtocol: + description: ProxyProtocol enables the Proxy + Protocol when communicating with the backend. + properties: + version: + description: |- + Version of ProxyProtol + Valid ProxyProtocolVersion values are + "V1" + "V2" + enum: + - V1 + - V2 + type: string + required: + - version + type: object + retry: + description: |- + Retry provides more advanced usage, allowing users to customize the number of retries, retry fallback strategy, and retry triggering conditions. + If not set, retry will be disabled. + properties: + numAttemptsPerPriority: + description: |- + NumAttemptsPerPriority defines the number of requests (initial attempt + retries) + that should be sent to the same priority before switching to a different one. + If not specified or set to 0, all requests are sent to the highest priority that is healthy. + format: int32 + type: integer + numRetries: + default: 2 + description: NumRetries is the number + of retries to be attempted. Defaults + to 2. + format: int32 + minimum: 0 + type: integer + perRetry: + description: PerRetry is the retry policy + to be applied per retry attempt. + properties: + backOff: + description: |- + Backoff is the backoff policy to be applied per retry attempt. gateway uses a fully jittered exponential + back-off algorithm for retries. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/router_filter#config-http-filters-router-x-envoy-max-retries + properties: + baseInterval: + description: BaseInterval is the + base interval between retries. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxInterval: + description: |- + MaxInterval is the maximum interval between retries. This parameter is optional, but must be greater than or equal to the base_interval if set. + The default is 10 times the base_interval + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + timeout: + description: Timeout is the timeout + per retry attempt. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + retryOn: + description: |- + RetryOn specifies the retry trigger condition. + + If not specified, the default is to retry on connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes(503). + properties: + httpStatusCodes: + description: |- + HttpStatusCodes specifies the http status codes to be retried. + The retriable-status-codes trigger must also be configured for these status codes to trigger a retry. + items: + description: HTTPStatus defines + the http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + triggers: + description: Triggers specifies the + retry trigger condition(Http/Grpc). + items: + description: TriggerEnum specifies + the conditions that trigger retries. + enum: + - 5xx + - gateway-error + - reset + - reset-before-request + - connect-failure + - retriable-4xx + - refused-stream + - retriable-status-codes + - cancelled + - deadline-exceeded + - internal + - resource-exhausted + - unavailable + type: string + type: array + type: object + type: object + tcpKeepalive: + description: |- + TcpKeepalive settings associated with the upstream client connection. + Disabled by default. + properties: + idleTime: + description: |- + The duration a connection needs to be idle before keep-alive + probes start being sent. + The duration format is + Defaults to `7200s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + description: |- + The duration between keep-alive probes. + Defaults to `75s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + probes: + description: |- + The total number of unacknowledged probes to send before deciding + the connection is dead. + Defaults to 9. + format: int32 + type: integer + type: object + timeout: + description: Timeout settings for the backend + connections. + properties: + http: + description: Timeout settings for HTTP. + properties: + connectionIdleTimeout: + description: |- + The idle timeout for an HTTP connection. Idle time is defined as a period in which there are no active requests in the connection. + Default: 1 hour. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxConnectionDuration: + description: |- + The maximum duration of an HTTP connection. + Default: unlimited. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxStreamDuration: + description: |- + MaxStreamDuration is the maximum duration for a stream to complete. This timeout measures the time + from when the request is sent until the response stream is fully consumed and does not apply to + non-streaming requests. + When set to "0s", no max duration is applied and streams can run indefinitely. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + requestTimeout: + description: RequestTimeout is the + time until which entire response + is received from the upstream. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + tcp: + description: Timeout settings for TCP. + properties: + connectTimeout: + description: |- + The timeout for network connection establishment, including TCP and TLS handshakes. + Default: 10 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + type: object + type: object + x-kubernetes-validations: + - message: predictivePercent in preconnect policy + only works with RoundRobin or Random load + balancers + rule: '!((has(self.connection) && has(self.connection.preconnect) + && has(self.connection.preconnect.predictivePercent)) + && !(has(self.loadBalancer) && has(self.loadBalancer.type) + && self.loadBalancer.type in [''Random'', + ''RoundRobin'']))' + endSessionEndpoint: + description: |- + The OIDC Provider's [end session endpoint](https://openid.net/specs/openid-connect-core-1_0.html#RPLogout). + + If the end session endpoint is provided, EG will use it to log out the user from the OIDC Provider when the user accesses the logout path. + EG will also try to discover the end session endpoint from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse) when authorizationEndpoint or tokenEndpoint is not provided. + type: string + issuer: + description: |- + The OIDC Provider's [issuer identifier](https://openid.net/specs/openid-connect-discovery-1_0.html#IssuerDiscovery). + Issuer MUST be a URI RFC 3986 [RFC3986] with a scheme component that MUST + be https, a host component, and optionally, port and path components and + no query or fragment components. + minLength: 1 + type: string + tokenEndpoint: + description: |- + The OIDC Provider's [token endpoint](https://openid.net/specs/openid-connect-core-1_0.html#TokenEndpoint). + If not provided, EG will try to discover it from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse). + type: string + required: + - issuer + type: object + x-kubernetes-validations: + - message: BackendRefs must be used, backendRef is + not supported. + rule: '!has(self.backendRef)' + - message: Retry timeout is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.perRetry)? + !has(self.backendSettings.retry.perRetry.timeout):true):true):true + - message: HTTPStatusCodes is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.retryOn)? + !has(self.backendSettings.retry.retryOn.httpStatusCodes):true):true):true + redirectURL: + description: |- + The redirect URL to be used in the OIDC + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, uses the default redirect URI "%REQ(x-forwarded-proto)%://%REQ(:authority)%/oauth2/callback" + type: string + refreshToken: + default: true + description: |- + RefreshToken indicates whether the Envoy should automatically refresh the + id token and access token when they expire. + When set to true, the Envoy will use the refresh token to get a new id token + and access token when they expire. + + If not specified, defaults to true. + type: boolean + resources: + description: |- + The OIDC resources to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + items: + type: string + type: array + scopes: + description: |- + The OIDC scopes to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + The "openid" scope is always added to the list of scopes if not already + specified. + items: + type: string + type: array + required: + - clientSecret + - provider + type: object + x-kubernetes-validations: + - message: only one of clientID or clientIDRef must be + set + rule: (has(self.clientID) && !has(self.clientIDRef)) + || (!has(self.clientID) && has(self.clientIDRef)) + required: + - oidc + type: object + projectID: + description: ProjectID is the GCP project ID. + minLength: 1 + type: string + serviceAccountImpersonation: + description: |- + ServiceAccountImpersonation is the service account impersonation configuration. + This is used to impersonate a service account when getting access token. + properties: + serviceAccountName: + description: ServiceAccountName is the name of the service + account to impersonate. + minLength: 1 + type: string + required: + - serviceAccountName + type: object + workloadIdentityPoolName: + description: |- + WorkloadIdentityPoolName is the name of the workload identity pool defined in GCP. + https://cloud.google.com/iam/docs/workload-identity-federation?hl=en + minLength: 1 + type: string + workloadIdentityProviderName: + description: WorkloadIdentityProviderName is the name of the + external identity provider as registered on Google Cloud + Platform. + minLength: 1 + type: string + required: + - oidcExchangeToken + - projectID + - workloadIdentityPoolName + - workloadIdentityProviderName + type: object + required: + - projectName + - region + type: object + x-kubernetes-validations: + - message: Exactly one of GCPWorkloadIdentityFederationConfig or GCPCredentialsFile + must be specified + rule: (has(self.credentialsFile) && !has(self.workloadIdentityFederationConfig)) + || (has(self.workloadIdentityFederationConfig) && !has(self.credentialsFile)) + targetRefs: + description: |- + TargetRefs are the names of the AIServiceBackend or InferencePool resources this BackendSecurityPolicy is being attached to. + Attaching multiple BackendSecurityPolicies to the same resource is invalid and will result in an error + during the reconciliation of the resource. + items: + description: |- + LocalPolicyTargetReference identifies an API object to apply a direct or + inherited policy to. This should be used as part of Policy resources + that can target Gateway API resources. For more information on how this + policy attachment model works, and a sample Policy resource, refer to + the policy attachment documentation for Gateway API. + properties: + group: + description: Group is the group of the target resource. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + description: Kind is kind of the target resource. + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the target resource. + maxLength: 253 + minLength: 1 + type: string + required: + - group + - kind + - name + type: object + maxItems: 16 + type: array + x-kubernetes-validations: + - message: targetRefs must reference AIServiceBackend or InferencePool + resources + rule: self.all(ref, (ref.group == 'aigateway.envoyproxy.io' && ref.kind + == 'AIServiceBackend') || (ref.group == 'inference.networking.k8s.io' + && ref.kind == 'InferencePool')) + type: + description: Type specifies the type of the backend security policy. + enum: + - APIKey + - AWSCredentials + - AzureAPIKey + - AzureCredentials + - GCPCredentials + - AnthropicAPIKey + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: When type is APIKey, only apiKey field should be set + rule: 'self.type == ''APIKey'' ? (has(self.apiKey) && !has(self.awsCredentials) + && !has(self.azureAPIKey) && !has(self.azureCredentials) && !has(self.gcpCredentials) + && !has(self.anthropicAPIKey)) : true' + - message: When type is AWSCredentials, only awsCredentials field should + be set + rule: 'self.type == ''AWSCredentials'' ? (has(self.awsCredentials) && + !has(self.apiKey) && !has(self.azureAPIKey) && !has(self.azureCredentials) + && !has(self.gcpCredentials) && !has(self.anthropicAPIKey)) : true' + - message: When type is AzureAPIKey, only azureAPIKey field should be + set + rule: 'self.type == ''AzureAPIKey'' ? (has(self.azureAPIKey) && !has(self.apiKey) + && !has(self.awsCredentials) && !has(self.azureCredentials) && !has(self.gcpCredentials) + && !has(self.anthropicAPIKey)) : true' + - message: When type is AzureCredentials, only azureCredentials field + should be set + rule: 'self.type == ''AzureCredentials'' ? (has(self.azureCredentials) + && !has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureAPIKey) + && !has(self.gcpCredentials) && !has(self.anthropicAPIKey)) : true' + - message: When type is GCPCredentials, only gcpCredentials field should + be set + rule: 'self.type == ''GCPCredentials'' ? (has(self.gcpCredentials) && + !has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureAPIKey) + && !has(self.azureCredentials) && !has(self.anthropicAPIKey)) : true' + - message: When type is AnthropicAPIKey, only anthropicAPIKey field should + be set + rule: 'self.type == ''AnthropicAPIKey'' ? (has(self.anthropicAPIKey) + && !has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureAPIKey) + && !has(self.azureCredentials) && !has(self.gcpCredentials)) : true' + status: + description: Status defines the status details of the BackendSecurityPolicy. + properties: + conditions: + description: |- + Conditions is the list of conditions by the reconciliation result. + Currently, at most one condition is set. + + Known .status.conditions.type are: "Accepted", "NotAccepted". + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: false + subresources: + status: {} + - additionalPrinterColumns: + - jsonPath: .status.conditions[-1:].type + name: Status + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + BackendSecurityPolicy specifies configuration for authentication and authorization rules on the traffic + exiting the gateway to the backend. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + BackendSecurityPolicySpec specifies authentication rules on access the provider from the Gateway. + Only one mechanism to access a backend(s) can be specified. + + Only one type of BackendSecurityPolicy can be defined. + maxProperties: 3 + properties: + anthropicAPIKey: + description: |- + AnthropicAPIKey is a mechanism to access Anthropic backend(s). The API key will be injected into the "x-api-key" header. + https://docs.claude.com/en/api/overview#authentication + properties: + secretRef: + description: |- + SecretRef is the reference to the secret containing the Anthropic API key. + ai-gateway must be given the permission to read this secret. + The key of the secret should be "apiKey". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + required: + - secretRef + type: object + apiKey: + description: APIKey is a mechanism to access a backend(s). The API + key will be injected into the Authorization header. + properties: + secretRef: + description: |- + SecretRef is the reference to the secret containing the API key. + ai-gateway must be given the permission to read this secret. + The key of the secret should be "apiKey". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + required: + - secretRef + type: object + awsCredentials: + description: AWSCredentials is a mechanism to access a backend(s). + AWS specific logic will be applied. + properties: + credentialsFile: + description: |- + CredentialsFile specifies the credentials file to use for the AWS provider. + When specified, this takes precedence over the default credential chain. + properties: + profile: + default: default + description: Profile is the profile to use in the credentials + file. + type: string + secretRef: + description: |- + SecretRef is the reference to the credential file. + + The secret should contain the AWS credentials file keyed on "credentials". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + required: + - secretRef + type: object + oidcExchangeToken: + description: |- + OIDCExchangeToken specifies the oidc configurations used to obtain an oidc token. The oidc token will be + used to obtain temporary credentials to access AWS. + When specified, this takes precedence over the default credential chain. + properties: + aud: + description: Aud defines the audience that this ID Token is + intended for. + type: string + awsRoleArn: + description: |- + AwsRoleArn is the AWS IAM Role with the permission to use specific resources in AWS account + which maps to the temporary AWS security credentials exchanged using the authentication token issued by OIDC provider. + minLength: 1 + type: string + grantType: + description: GrantType is the method application gets access + token. + type: string + oidc: + description: OIDC is used to obtain oidc tokens via an SSO + server which will be used to exchange for provider credentials. + properties: + clientID: + description: |- + The client ID to be used in the OIDC + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + + Only one of clientID or clientIDRef must be set. + minLength: 1 + type: string + clientIDRef: + description: |- + The Kubernetes secret which contains the client ID to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + Exactly one of clientID or clientIDRef must be set. + This is an Opaque secret. The client ID should be stored in the key "client-id". + + Only one of clientID or clientIDRef must be set. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + clientSecret: + description: |- + The Kubernetes secret which contains the OIDC client secret to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + + This is an Opaque secret. The client secret should be stored in the key + "client-secret". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + cookieConfig: + description: |- + CookieConfigs allows setting the SameSite attribute for OIDC cookies. + By default, its unset. + properties: + sameSite: + enum: + - Lax + - Strict + - None + type: string + type: object + cookieDomain: + description: |- + The optional domain to set the access and ID token cookies on. + If not set, the cookies will default to the host of the request, not including the subdomains. + If set, the cookies will be set on the specified domain and all subdomains. + This means that requests to any subdomain will not require reauthentication after users log in to the parent domain. + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9]))*$ + type: string + cookieNames: + description: |- + The optional cookie name overrides to be used for Bearer and IdToken cookies in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, uses a randomly generated suffix + properties: + accessToken: + description: |- + The name of the cookie used to store the AccessToken in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, defaults to "AccessToken-(randomly generated uid)" + type: string + idToken: + description: |- + The name of the cookie used to store the IdToken in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, defaults to "IdToken-(randomly generated uid)" + type: string + type: object + csrfTokenTTL: + description: |- + CSRFTokenTTL defines how long the CSRF token generated during the OAuth2 authorization flow remains valid. + + This duration determines the lifetime of the CSRF cookie, which is validated against the CSRF token + in the "state" parameter when the provider redirects back to the callback endpoint. + + If omitted, Envoy Gateway defaults the token expiration to 10 minutes. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + defaultRefreshTokenTTL: + description: |- + DefaultRefreshTokenTTL is the default lifetime of the refresh token. + This field is only used when the exp (expiration time) claim is omitted in + the refresh token or the refresh token is not JWT. + + If not specified, defaults to 604800s (one week). + Note: this field is only applicable when the "refreshToken" field is set to true. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + defaultTokenTTL: + description: |- + DefaultTokenTTL is the default lifetime of the id token and access token. + Please note that Envoy will always use the expiry time from the response + of the authorization server if it is provided. This field is only used when + the expiry time is not provided by the authorization. + + If not specified, defaults to 0. In this case, the "expires_in" field in + the authorization response must be set by the authorization server, or the + OAuth flow will fail. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + denyRedirect: + description: |- + Any request that matches any of the provided matchers (with either tokens that are expired or missing tokens) will not be redirected to the OIDC Provider. + This behavior can be useful for AJAX or machine requests. + properties: + headers: + description: Defines the headers to match against + the request to deny redirect to the OIDC Provider. + items: + description: OIDCDenyRedirectHeader defines how + a header is matched + properties: + name: + description: Specifies the name of the header + in the request. + minLength: 1 + type: string + type: + default: Exact + description: Type specifies how to match against + a string. + enum: + - Exact + - Prefix + - Suffix + - RegularExpression + type: string + value: + description: Value specifies the string value + that the match must have. + maxLength: 1024 + minLength: 1 + type: string + required: + - name + - value + type: object + maxItems: 16 + minItems: 1 + type: array + required: + - headers + type: object + disableTokenEncryption: + description: |- + Disable token encryption. When set to true, both the access token and the ID token will be stored in plain text. + This option should only be used in secure environments where token encryption is not required. + Default is false (tokens are encrypted). + type: boolean + forwardAccessToken: + description: |- + ForwardAccessToken indicates whether the Envoy should forward the access token + via the Authorization header Bearer scheme to the upstream. + If not specified, defaults to false. + type: boolean + logoutPath: + description: |- + The path to log a user out, clearing their credential cookies. + + If not specified, uses a default logout path "/logout" + type: string + passThroughAuthHeader: + description: |- + Skips OIDC authentication when the request contains a header that will be extracted by the JWT filter. Unless + explicitly stated otherwise in the extractFrom field, this will be the "Authorization: Bearer ..." header. + + The passThroughAuthHeader option is typically used for non-browser clients that may not be able to handle OIDC + redirects and wish to directly supply a token instead. + + If not specified, defaults to false. + type: boolean + provider: + description: The OIDC Provider configuration. + properties: + authorizationEndpoint: + description: |- + The OIDC Provider's [authorization endpoint](https://openid.net/specs/openid-connect-core-1_0.html#AuthorizationEndpoint). + If not provided, EG will try to discover it from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse). + type: string + backendRef: + description: |- + BackendRef references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + + Deprecated: Use BackendRefs instead. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') + ? has(self.port) : true' + backendRefs: + description: |- + BackendRefs references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + items: + description: BackendRef defines how an ObjectReference + that is specific to BackendRef. + properties: + fallback: + description: |- + Fallback indicates whether the backend is designated as a fallback. + Multiple fallback backends can be configured. + It is highly recommended to configure active or passive health checks to ensure that failover can be detected + when the active backends become unhealthy and to automatically readjust once the primary backends are healthy again. + The overprovisioning factor is set to 1.4, meaning the fallback backends will only start receiving traffic when + the health of the active backends falls below 72%. + type: boolean + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + weight: + default: 1 + description: |- + Weight specifies the proportion of requests forwarded to the referenced + backend. This is computed as weight/(sum of all weights in this + BackendRefs list). For non-zero values, there may be some epsilon from + the exact proportion defined here depending on the precision an + implementation supports. Weight is not a percentage and the sum of + weights does not need to equal 100. + + If only one backend is specified and it has a weight greater than 0, 100% + of the traffic is forwarded to that backend. If weight is set to 0, no + traffic should be forwarded for this entry. If unspecified, weight + defaults to 1. + + Support for this field varies based on the context where used. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == + ''Service'') ? has(self.port) : true' + maxItems: 16 + type: array + backendSettings: + description: |- + BackendSettings holds configuration for managing the connection + to the backend. + properties: + circuitBreaker: + description: |- + Circuit Breaker settings for the upstream connections and requests. + If not set, circuit breakers will be enabled with the default thresholds + properties: + maxConnections: + default: 1024 + description: The maximum number of connections + that Envoy will establish to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRequests: + default: 1024 + description: The maximum number of parallel + requests that Envoy will make to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRetries: + default: 1024 + description: The maximum number of parallel + retries that Envoy will make to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxPendingRequests: + default: 1024 + description: The maximum number of pending + requests that Envoy will queue to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxRequestsPerConnection: + description: |- + The maximum number of requests that Envoy will make over a single connection to the referenced backend defined within a xRoute rule. + Default: unlimited. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + perEndpoint: + description: PerEndpoint defines Circuit Breakers + that will apply per-endpoint for an upstream + cluster + properties: + maxConnections: + default: 1024 + description: MaxConnections configures + the maximum number of connections that + Envoy will establish per-endpoint to + the referenced backend defined within + a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + type: object + type: object + connection: + description: Connection includes backend connection + settings. + properties: + bufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + BufferLimit Soft limit on size of the cluster’s connections read and write buffers. + BufferLimit applies to connection streaming (maybe non-streaming) channel between processes, it's in user space. + If unspecified, an implementation defined default is applied (32768 bytes). + For example, 20Mi, 1Gi, 256Ki etc. + Note: that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + preconnect: + description: |- + Preconnect configures proactive upstream connections to reduce latency by establishing + connections before they’re needed and avoiding connection establishment overhead. + + If unset, Envoy will fetch connections as needed to serve in-flight requests. + properties: + perEndpointPercent: + description: |- + PerEndpointPercent configures how many additional connections to maintain per + upstream endpoint, useful for high-QPS or latency sensitive services. Expressed as a + percentage of the connections required by active streams + (e.g. 100 = preconnect disabled, 105 = 1.05x connections per-endpoint, 200 = 2.00×). + + Allowed value range is between 100-300. When both PerEndpointPercent and + PredictivePercent are set, Envoy ensures both are satisfied (max of the two). + format: int32 + maximum: 300 + minimum: 100 + type: integer + predictivePercent: + description: |- + PredictivePercent configures how many additional connections to maintain + across the cluster by anticipating which upstream endpoint the load balancer + will select next, useful for low-QPS services. Relies on deterministic + loadbalancing and is only supported with Random or RoundRobin. + Expressed as a percentage of the connections required by active streams + (e.g. 100 = 1.0 (no preconnect), 105 = 1.05× connections across the cluster, 200 = 2.00×). + + Minimum allowed value is 100. When both PerEndpointPercent and PredictivePercent are + set Envoy ensures both are satisfied per host (max of the two). + format: int32 + minimum: 100 + type: integer + type: object + socketBufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + SocketBufferLimit provides configuration for the maximum buffer size in bytes for each socket + to backend. + SocketBufferLimit applies to socket streaming channel between TCP/IP stacks, it's in kernel space. + For example, 20Mi, 1Gi, 256Ki etc. + Note that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + type: object + dns: + description: DNS includes dns resolution settings. + properties: + dnsRefreshRate: + description: |- + DNSRefreshRate specifies the rate at which DNS records should be refreshed. + Defaults to 30 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + lookupFamily: + description: |- + LookupFamily determines how Envoy would resolve DNS for Routes where the backend is specified as a fully qualified domain name (FQDN). + If set, this configuration overrides other defaults. + enum: + - IPv4 + - IPv6 + - IPv4Preferred + - IPv6Preferred + - IPv4AndIPv6 + type: string + respectDnsTtl: + description: |- + RespectDNSTTL indicates whether the DNS Time-To-Live (TTL) should be respected. + If the value is set to true, the DNS refresh rate will be set to the resource record’s TTL. + Defaults to true. + type: boolean + type: object + healthCheck: + description: HealthCheck allows gateway to perform + active health checking on backends. + properties: + active: + description: Active health check configuration + properties: + grpc: + description: |- + GRPC defines the configuration of the GRPC health checker. + It's optional, and can only be used if the specified type is GRPC. + properties: + service: + description: |- + Service to send in the health check request. + If this is not specified, then the health check request applies to the entire + server and not to a specific service. + type: string + type: object + healthyThreshold: + default: 1 + description: HealthyThreshold defines + the number of healthy health checks + required before a backend host is marked + healthy. + format: int32 + minimum: 1 + type: integer + http: + description: |- + HTTP defines the configuration of http health checker. + It's required while the health checker type is HTTP. + properties: + expectedResponse: + description: ExpectedResponse defines + a list of HTTP expected responses + to match. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + expectedStatuses: + description: |- + ExpectedStatuses defines a list of HTTP response statuses considered healthy. + Defaults to 200 only + items: + description: HTTPStatus defines + the http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + hostname: + description: |- + Hostname defines the HTTP host that will be requested during health checking. + Default: HTTPRoute or GRPCRoute hostname. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + method: + description: |- + Method defines the HTTP method used for health checking. + Defaults to GET + type: string + path: + description: Path defines the HTTP + path that will be requested during + health checking. + maxLength: 1024 + minLength: 1 + type: string + required: + - path + type: object + initialJitter: + description: |- + InitialJitter defines the maximum time Envoy will wait before the first health check. + Envoy will randomly select a value between 0 and the initial jitter value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + default: 3s + description: Interval defines the time + between active health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + tcp: + description: |- + TCP defines the configuration of tcp health checker. + It's required while the health checker type is TCP. + properties: + receive: + description: Receive defines the expected + response payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + send: + description: Send defines the request + payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + type: object + timeout: + default: 1s + description: Timeout defines the time + to wait for a health check response. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: + allOf: + - enum: + - HTTP + - TCP + - GRPC + - enum: + - HTTP + - TCP + - GRPC + description: Type defines the type of + health checker. + type: string + unhealthyThreshold: + default: 3 + description: UnhealthyThreshold defines + the number of unhealthy health checks + required before a backend host is marked + unhealthy. + format: int32 + minimum: 1 + type: integer + required: + - type + type: object + x-kubernetes-validations: + - message: If Health Checker type is HTTP, + http field needs to be set. + rule: 'self.type == ''HTTP'' ? has(self.http) + : !has(self.http)' + - message: If Health Checker type is TCP, + tcp field needs to be set. + rule: 'self.type == ''TCP'' ? has(self.tcp) + : !has(self.tcp)' + - message: The grpc field can only be set + if the Health Checker type is GRPC. + rule: 'has(self.grpc) ? self.type == ''GRPC'' + : true' + panicThreshold: + description: |- + When number of unhealthy endpoints for a backend reaches this threshold + Envoy will disregard health status and balance across all endpoints. + It's designed to prevent a situation in which host failures cascade throughout the cluster + as load increases. If not set, the default value is 50%. To disable panic mode, set value to `0`. + format: int32 + maximum: 100 + minimum: 0 + type: integer + passive: + description: Passive passive check configuration + properties: + baseEjectionTime: + default: 30s + description: BaseEjectionTime defines + the base duration for which a host will + be ejected on consecutive failures. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + consecutive5XxErrors: + default: 5 + description: Consecutive5xxErrors sets + the number of consecutive 5xx errors + triggering ejection. + format: int32 + type: integer + consecutiveGatewayErrors: + description: ConsecutiveGatewayErrors + sets the number of consecutive gateway + errors triggering ejection. + format: int32 + type: integer + consecutiveLocalOriginFailures: + default: 5 + description: |- + ConsecutiveLocalOriginFailures sets the number of consecutive local origin failures triggering ejection. + Parameter takes effect only when split_external_local_origin_errors is set to true. + format: int32 + type: integer + failurePercentageThreshold: + description: |- + FailurePercentageThreshold sets the failure percentage threshold for outlier detection. + If the failure percentage of a given host is greater than or equal to this value, it will be ejected. + Defaults to 85. + format: int32 + maximum: 100 + minimum: 0 + type: integer + interval: + default: 3s + description: Interval defines the time + between passive health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxEjectionPercent: + default: 10 + description: MaxEjectionPercent sets the + maximum percentage of hosts in a cluster + that can be ejected. + format: int32 + type: integer + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors + enables splitting of errors between + external and local origin. + type: boolean + type: object + type: object + http2: + description: HTTP2 provides HTTP/2 configuration + for backend connections. + properties: + initialConnectionWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialConnectionWindowSize sets the initial window size for HTTP/2 connections. + If not set, the default value is 1 MiB. + x-kubernetes-int-or-string: true + initialStreamWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialStreamWindowSize sets the initial window size for HTTP/2 streams. + If not set, the default value is 64 KiB(64*1024). + x-kubernetes-int-or-string: true + maxConcurrentStreams: + description: |- + MaxConcurrentStreams sets the maximum number of concurrent streams allowed per connection. + If not set, the default value is 100. + format: int32 + maximum: 2147483647 + minimum: 1 + type: integer + onInvalidMessage: + description: |- + OnInvalidMessage determines if Envoy will terminate the connection or just the offending stream in the event of HTTP messaging error + It's recommended for L2 Envoy deployments to set this value to TerminateStream. + https://www.envoyproxy.io/docs/envoy/latest/configuration/best_practices/level_two + Default: TerminateConnection + type: string + type: object + loadBalancer: + description: |- + LoadBalancer policy to apply when routing traffic from the gateway to + the backend endpoints. Defaults to `LeastRequest`. + properties: + consistentHash: + description: |- + ConsistentHash defines the configuration when the load balancer type is + set to ConsistentHash + properties: + cookie: + description: Cookie configures the cookie + hash policy when the consistent hash + type is set to Cookie. + properties: + attributes: + additionalProperties: + type: string + description: Additional Attributes + to set for the generated cookie. + type: object + name: + description: |- + Name of the cookie to hash. + If this cookie does not exist in the request, Envoy will generate a cookie and set + the TTL on the response back to the client based on Layer 4 + attributes of the backend endpoint, to ensure that these future requests + go to the same backend endpoint. Make sure to set the TTL field for this case. + type: string + ttl: + description: |- + TTL of the generated cookie if the cookie is not present. This value sets the + Max-Age attribute value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - name + type: object + header: + description: |- + Header configures the header hash policy when the consistent hash type is set to Header. + + Deprecated: use Headers instead + properties: + name: + description: Name of the header to + hash. + type: string + required: + - name + type: object + headers: + description: Headers configures the header + hash policy for each header, when the + consistent hash type is set to Headers. + items: + description: |- + Header defines the header hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the header + to hash. + type: string + required: + - name + type: object + type: array + queryParams: + description: QueryParams configures the + query parameter hash policy when the + consistent hash type is set to QueryParams. + items: + description: |- + QueryParam defines the query parameter name hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the query param + to hash. + type: string + required: + - name + type: object + type: array + tableSize: + default: 65537 + description: The table size for consistent + hashing, must be prime number limited + to 5000011. + format: int64 + maximum: 5000011 + minimum: 2 + type: integer + type: + description: |- + ConsistentHashType defines the type of input to hash on. Valid Type values are + "SourceIP", + "Header", + "Headers", + "Cookie". + "QueryParams". + enum: + - SourceIP + - Header + - Headers + - Cookie + - QueryParams + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If consistent hash type is header, + the header field must be set. + rule: 'self.type == ''Header'' ? has(self.header) + : !has(self.header)' + - message: If consistent hash type is headers, + the headers field must be set. + rule: 'self.type == ''Headers'' ? has(self.headers) + : !has(self.headers)' + - message: If consistent hash type is cookie, + the cookie field must be set. + rule: 'self.type == ''Cookie'' ? has(self.cookie) + : !has(self.cookie)' + - message: If consistent hash type is queryParams, + the queryParams field must be set. + rule: 'self.type == ''QueryParams'' ? has(self.queryParams) + : !has(self.queryParams)' + endpointOverride: + description: |- + EndpointOverride defines the configuration for endpoint override. + When specified, the load balancer will attempt to route requests to endpoints + based on the override information extracted from request headers or metadata. + If the override endpoints are not available, the configured load balancer policy will be used as fallback. + properties: + extractFrom: + description: ExtractFrom defines the sources + to extract endpoint override information + from. + items: + description: EndpointOverrideExtractFrom + defines a source to extract endpoint + override information from. + properties: + header: + description: |- + Header defines the header to get the override endpoint addresses. + The header value must specify at least one endpoint in `IP:Port` format or multiple endpoints in `IP:Port,IP:Port,...` format. + For example `10.0.0.5:8080` or `[2600:4040:5204::1574:24ae]:80`. + The IPv6 address is enclosed in square brackets. + type: string + type: object + maxItems: 10 + minItems: 1 + type: array + required: + - extractFrom + type: object + slowStart: + description: |- + SlowStart defines the configuration related to the slow start load balancer policy. + If set, during slow start window, traffic sent to the newly added hosts will gradually increase. + Currently this is only supported for RoundRobin and LeastRequest load balancers + properties: + window: + description: |- + Window defines the duration of the warm up period for newly added host. + During slow start window, traffic sent to the newly added hosts will gradually increase. + Currently only supports linear growth of traffic. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto#config-cluster-v3-cluster-slowstartconfig + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - window + type: object + type: + description: |- + Type decides the type of Load Balancer policy. + Valid LoadBalancerType values are + "ConsistentHash", + "LeastRequest", + "Random", + "RoundRobin". + enum: + - ConsistentHash + - LeastRequest + - Random + - RoundRobin + type: string + zoneAware: + description: ZoneAware defines the configuration + related to the distribution of requests + between locality zones. + properties: + preferLocal: + description: PreferLocalZone configures + zone-aware routing to prefer sending + traffic to the local locality zone. + properties: + force: + description: |- + ForceLocalZone defines override configuration for forcing all traffic to stay within the local zone instead of the default behavior + which maintains equal distribution among upstream endpoints while sending as much traffic as possible locally. + properties: + minEndpointsInZoneThreshold: + description: |- + MinEndpointsInZoneThreshold is the minimum number of upstream endpoints in the local zone required to honor the forceLocalZone + override. This is useful for protecting zones with fewer endpoints. + format: int32 + type: integer + type: object + minEndpointsThreshold: + description: MinEndpointsThreshold + is the minimum number of total upstream + endpoints across all zones required + to enable zone-aware routing. + format: int64 + type: integer + percentageEnabled: + description: Configures percentage + of requests that will be considered + for zone aware routing if zone aware + routing is configured. If not specified, + Envoy defaults to 100%. + format: int32 + maximum: 100 + minimum: 0 + type: integer + type: object + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: If LoadBalancer type is consistentHash, + consistentHash field needs to be set. + rule: 'self.type == ''ConsistentHash'' ? has(self.consistentHash) + : !has(self.consistentHash)' + - message: Currently SlowStart is only supported + for RoundRobin and LeastRequest load balancers. + rule: 'self.type in [''Random'', ''ConsistentHash''] + ? !has(self.slowStart) : true ' + - message: Currently ZoneAware is only supported + for LeastRequest, Random, and RoundRobin load + balancers. + rule: 'self.type == ''ConsistentHash'' ? !has(self.zoneAware) + : true ' + proxyProtocol: + description: ProxyProtocol enables the Proxy Protocol + when communicating with the backend. + properties: + version: + description: |- + Version of ProxyProtol + Valid ProxyProtocolVersion values are + "V1" + "V2" + enum: + - V1 + - V2 + type: string + required: + - version + type: object + retry: + description: |- + Retry provides more advanced usage, allowing users to customize the number of retries, retry fallback strategy, and retry triggering conditions. + If not set, retry will be disabled. + properties: + numAttemptsPerPriority: + description: |- + NumAttemptsPerPriority defines the number of requests (initial attempt + retries) + that should be sent to the same priority before switching to a different one. + If not specified or set to 0, all requests are sent to the highest priority that is healthy. + format: int32 + type: integer + numRetries: + default: 2 + description: NumRetries is the number of retries + to be attempted. Defaults to 2. + format: int32 + minimum: 0 + type: integer + perRetry: + description: PerRetry is the retry policy + to be applied per retry attempt. + properties: + backOff: + description: |- + Backoff is the backoff policy to be applied per retry attempt. gateway uses a fully jittered exponential + back-off algorithm for retries. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/router_filter#config-http-filters-router-x-envoy-max-retries + properties: + baseInterval: + description: BaseInterval is the base + interval between retries. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxInterval: + description: |- + MaxInterval is the maximum interval between retries. This parameter is optional, but must be greater than or equal to the base_interval if set. + The default is 10 times the base_interval + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + timeout: + description: Timeout is the timeout per + retry attempt. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + retryOn: + description: |- + RetryOn specifies the retry trigger condition. + + If not specified, the default is to retry on connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes(503). + properties: + httpStatusCodes: + description: |- + HttpStatusCodes specifies the http status codes to be retried. + The retriable-status-codes trigger must also be configured for these status codes to trigger a retry. + items: + description: HTTPStatus defines the + http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + triggers: + description: Triggers specifies the retry + trigger condition(Http/Grpc). + items: + description: TriggerEnum specifies the + conditions that trigger retries. + enum: + - 5xx + - gateway-error + - reset + - reset-before-request + - connect-failure + - retriable-4xx + - refused-stream + - retriable-status-codes + - cancelled + - deadline-exceeded + - internal + - resource-exhausted + - unavailable + type: string + type: array + type: object + type: object + tcpKeepalive: + description: |- + TcpKeepalive settings associated with the upstream client connection. + Disabled by default. + properties: + idleTime: + description: |- + The duration a connection needs to be idle before keep-alive + probes start being sent. + The duration format is + Defaults to `7200s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + description: |- + The duration between keep-alive probes. + Defaults to `75s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + probes: + description: |- + The total number of unacknowledged probes to send before deciding + the connection is dead. + Defaults to 9. + format: int32 + type: integer + type: object + timeout: + description: Timeout settings for the backend + connections. + properties: + http: + description: Timeout settings for HTTP. + properties: + connectionIdleTimeout: + description: |- + The idle timeout for an HTTP connection. Idle time is defined as a period in which there are no active requests in the connection. + Default: 1 hour. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxConnectionDuration: + description: |- + The maximum duration of an HTTP connection. + Default: unlimited. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxStreamDuration: + description: |- + MaxStreamDuration is the maximum duration for a stream to complete. This timeout measures the time + from when the request is sent until the response stream is fully consumed and does not apply to + non-streaming requests. + When set to "0s", no max duration is applied and streams can run indefinitely. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + requestTimeout: + description: RequestTimeout is the time + until which entire response is received + from the upstream. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + tcp: + description: Timeout settings for TCP. + properties: + connectTimeout: + description: |- + The timeout for network connection establishment, including TCP and TLS handshakes. + Default: 10 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + type: object + type: object + x-kubernetes-validations: + - message: predictivePercent in preconnect policy + only works with RoundRobin or Random load balancers + rule: '!((has(self.connection) && has(self.connection.preconnect) + && has(self.connection.preconnect.predictivePercent)) + && !(has(self.loadBalancer) && has(self.loadBalancer.type) + && self.loadBalancer.type in [''Random'', ''RoundRobin'']))' + endSessionEndpoint: + description: |- + The OIDC Provider's [end session endpoint](https://openid.net/specs/openid-connect-core-1_0.html#RPLogout). + + If the end session endpoint is provided, EG will use it to log out the user from the OIDC Provider when the user accesses the logout path. + EG will also try to discover the end session endpoint from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse) when authorizationEndpoint or tokenEndpoint is not provided. + type: string + issuer: + description: |- + The OIDC Provider's [issuer identifier](https://openid.net/specs/openid-connect-discovery-1_0.html#IssuerDiscovery). + Issuer MUST be a URI RFC 3986 [RFC3986] with a scheme component that MUST + be https, a host component, and optionally, port and path components and + no query or fragment components. + minLength: 1 + type: string + tokenEndpoint: + description: |- + The OIDC Provider's [token endpoint](https://openid.net/specs/openid-connect-core-1_0.html#TokenEndpoint). + If not provided, EG will try to discover it from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse). + type: string + required: + - issuer + type: object + x-kubernetes-validations: + - message: BackendRefs must be used, backendRef is not + supported. + rule: '!has(self.backendRef)' + - message: Retry timeout is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.perRetry)? + !has(self.backendSettings.retry.perRetry.timeout):true):true):true + - message: HTTPStatusCodes is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.retryOn)? + !has(self.backendSettings.retry.retryOn.httpStatusCodes):true):true):true + redirectURL: + description: |- + The redirect URL to be used in the OIDC + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, uses the default redirect URI "%REQ(x-forwarded-proto)%://%REQ(:authority)%/oauth2/callback" + type: string + refreshToken: + default: true + description: |- + RefreshToken indicates whether the Envoy should automatically refresh the + id token and access token when they expire. + When set to true, the Envoy will use the refresh token to get a new id token + and access token when they expire. + + If not specified, defaults to true. + type: boolean + resources: + description: |- + The OIDC resources to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + items: + type: string + type: array + scopes: + description: |- + The OIDC scopes to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + The "openid" scope is always added to the list of scopes if not already + specified. + items: + type: string + type: array + required: + - clientSecret + - provider + type: object + x-kubernetes-validations: + - message: only one of clientID or clientIDRef must be set + rule: (has(self.clientID) && !has(self.clientIDRef)) || + (!has(self.clientID) && has(self.clientIDRef)) + required: + - awsRoleArn + - oidc + type: object + region: + description: Region specifies the AWS region associated with the + policy. + minLength: 1 + type: string + required: + - region + type: object + azureAPIKey: + description: AzureAPIKey is a mechanism to access Azure OpenAI backend(s). + The API key will be injected into the api-key header. + properties: + secretRef: + description: |- + SecretRef is the reference to the secret containing the Azure API key. + ai-gateway must be given the permission to read this secret. + The key of the secret should be "apiKey". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + required: + - secretRef + type: object + azureCredentials: + description: AzureCredentials is a mechanism to access a backend(s). + Azure OpenAI specific logic will be applied. + properties: + clientID: + description: ClientID is a unique identifier for an application + in Azure. + minLength: 1 + type: string + clientSecretRef: + description: |- + ClientSecretRef is the reference to the secret containing the Azure client secret. + ai-gateway must be given the permission to read this secret. + The key of secret should be "client-secret". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + oidcExchangeToken: + description: |- + OIDCExchangeToken specifies the oidc configurations used to obtain an oidc token. The oidc token will be + used to obtain temporary credentials to access Azure. + properties: + aud: + description: Aud defines the audience that this ID Token is + intended for. + type: string + grantType: + description: GrantType is the method application gets access + token. + type: string + oidc: + description: OIDC is used to obtain oidc tokens via an SSO + server which will be used to exchange for provider credentials. + properties: + clientID: + description: |- + The client ID to be used in the OIDC + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + + Only one of clientID or clientIDRef must be set. + minLength: 1 + type: string + clientIDRef: + description: |- + The Kubernetes secret which contains the client ID to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + Exactly one of clientID or clientIDRef must be set. + This is an Opaque secret. The client ID should be stored in the key "client-id". + + Only one of clientID or clientIDRef must be set. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + clientSecret: + description: |- + The Kubernetes secret which contains the OIDC client secret to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + + This is an Opaque secret. The client secret should be stored in the key + "client-secret". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + cookieConfig: + description: |- + CookieConfigs allows setting the SameSite attribute for OIDC cookies. + By default, its unset. + properties: + sameSite: + enum: + - Lax + - Strict + - None + type: string + type: object + cookieDomain: + description: |- + The optional domain to set the access and ID token cookies on. + If not set, the cookies will default to the host of the request, not including the subdomains. + If set, the cookies will be set on the specified domain and all subdomains. + This means that requests to any subdomain will not require reauthentication after users log in to the parent domain. + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9]))*$ + type: string + cookieNames: + description: |- + The optional cookie name overrides to be used for Bearer and IdToken cookies in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, uses a randomly generated suffix + properties: + accessToken: + description: |- + The name of the cookie used to store the AccessToken in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, defaults to "AccessToken-(randomly generated uid)" + type: string + idToken: + description: |- + The name of the cookie used to store the IdToken in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, defaults to "IdToken-(randomly generated uid)" + type: string + type: object + csrfTokenTTL: + description: |- + CSRFTokenTTL defines how long the CSRF token generated during the OAuth2 authorization flow remains valid. + + This duration determines the lifetime of the CSRF cookie, which is validated against the CSRF token + in the "state" parameter when the provider redirects back to the callback endpoint. + + If omitted, Envoy Gateway defaults the token expiration to 10 minutes. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + defaultRefreshTokenTTL: + description: |- + DefaultRefreshTokenTTL is the default lifetime of the refresh token. + This field is only used when the exp (expiration time) claim is omitted in + the refresh token or the refresh token is not JWT. + + If not specified, defaults to 604800s (one week). + Note: this field is only applicable when the "refreshToken" field is set to true. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + defaultTokenTTL: + description: |- + DefaultTokenTTL is the default lifetime of the id token and access token. + Please note that Envoy will always use the expiry time from the response + of the authorization server if it is provided. This field is only used when + the expiry time is not provided by the authorization. + + If not specified, defaults to 0. In this case, the "expires_in" field in + the authorization response must be set by the authorization server, or the + OAuth flow will fail. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + denyRedirect: + description: |- + Any request that matches any of the provided matchers (with either tokens that are expired or missing tokens) will not be redirected to the OIDC Provider. + This behavior can be useful for AJAX or machine requests. + properties: + headers: + description: Defines the headers to match against + the request to deny redirect to the OIDC Provider. + items: + description: OIDCDenyRedirectHeader defines how + a header is matched + properties: + name: + description: Specifies the name of the header + in the request. + minLength: 1 + type: string + type: + default: Exact + description: Type specifies how to match against + a string. + enum: + - Exact + - Prefix + - Suffix + - RegularExpression + type: string + value: + description: Value specifies the string value + that the match must have. + maxLength: 1024 + minLength: 1 + type: string + required: + - name + - value + type: object + maxItems: 16 + minItems: 1 + type: array + required: + - headers + type: object + disableTokenEncryption: + description: |- + Disable token encryption. When set to true, both the access token and the ID token will be stored in plain text. + This option should only be used in secure environments where token encryption is not required. + Default is false (tokens are encrypted). + type: boolean + forwardAccessToken: + description: |- + ForwardAccessToken indicates whether the Envoy should forward the access token + via the Authorization header Bearer scheme to the upstream. + If not specified, defaults to false. + type: boolean + logoutPath: + description: |- + The path to log a user out, clearing their credential cookies. + + If not specified, uses a default logout path "/logout" + type: string + passThroughAuthHeader: + description: |- + Skips OIDC authentication when the request contains a header that will be extracted by the JWT filter. Unless + explicitly stated otherwise in the extractFrom field, this will be the "Authorization: Bearer ..." header. + + The passThroughAuthHeader option is typically used for non-browser clients that may not be able to handle OIDC + redirects and wish to directly supply a token instead. + + If not specified, defaults to false. + type: boolean + provider: + description: The OIDC Provider configuration. + properties: + authorizationEndpoint: + description: |- + The OIDC Provider's [authorization endpoint](https://openid.net/specs/openid-connect-core-1_0.html#AuthorizationEndpoint). + If not provided, EG will try to discover it from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse). + type: string + backendRef: + description: |- + BackendRef references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + + Deprecated: Use BackendRefs instead. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') + ? has(self.port) : true' + backendRefs: + description: |- + BackendRefs references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + items: + description: BackendRef defines how an ObjectReference + that is specific to BackendRef. + properties: + fallback: + description: |- + Fallback indicates whether the backend is designated as a fallback. + Multiple fallback backends can be configured. + It is highly recommended to configure active or passive health checks to ensure that failover can be detected + when the active backends become unhealthy and to automatically readjust once the primary backends are healthy again. + The overprovisioning factor is set to 1.4, meaning the fallback backends will only start receiving traffic when + the health of the active backends falls below 72%. + type: boolean + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + weight: + default: 1 + description: |- + Weight specifies the proportion of requests forwarded to the referenced + backend. This is computed as weight/(sum of all weights in this + BackendRefs list). For non-zero values, there may be some epsilon from + the exact proportion defined here depending on the precision an + implementation supports. Weight is not a percentage and the sum of + weights does not need to equal 100. + + If only one backend is specified and it has a weight greater than 0, 100% + of the traffic is forwarded to that backend. If weight is set to 0, no + traffic should be forwarded for this entry. If unspecified, weight + defaults to 1. + + Support for this field varies based on the context where used. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == + ''Service'') ? has(self.port) : true' + maxItems: 16 + type: array + backendSettings: + description: |- + BackendSettings holds configuration for managing the connection + to the backend. + properties: + circuitBreaker: + description: |- + Circuit Breaker settings for the upstream connections and requests. + If not set, circuit breakers will be enabled with the default thresholds + properties: + maxConnections: + default: 1024 + description: The maximum number of connections + that Envoy will establish to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRequests: + default: 1024 + description: The maximum number of parallel + requests that Envoy will make to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRetries: + default: 1024 + description: The maximum number of parallel + retries that Envoy will make to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxPendingRequests: + default: 1024 + description: The maximum number of pending + requests that Envoy will queue to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxRequestsPerConnection: + description: |- + The maximum number of requests that Envoy will make over a single connection to the referenced backend defined within a xRoute rule. + Default: unlimited. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + perEndpoint: + description: PerEndpoint defines Circuit Breakers + that will apply per-endpoint for an upstream + cluster + properties: + maxConnections: + default: 1024 + description: MaxConnections configures + the maximum number of connections that + Envoy will establish per-endpoint to + the referenced backend defined within + a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + type: object + type: object + connection: + description: Connection includes backend connection + settings. + properties: + bufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + BufferLimit Soft limit on size of the cluster’s connections read and write buffers. + BufferLimit applies to connection streaming (maybe non-streaming) channel between processes, it's in user space. + If unspecified, an implementation defined default is applied (32768 bytes). + For example, 20Mi, 1Gi, 256Ki etc. + Note: that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + preconnect: + description: |- + Preconnect configures proactive upstream connections to reduce latency by establishing + connections before they’re needed and avoiding connection establishment overhead. + + If unset, Envoy will fetch connections as needed to serve in-flight requests. + properties: + perEndpointPercent: + description: |- + PerEndpointPercent configures how many additional connections to maintain per + upstream endpoint, useful for high-QPS or latency sensitive services. Expressed as a + percentage of the connections required by active streams + (e.g. 100 = preconnect disabled, 105 = 1.05x connections per-endpoint, 200 = 2.00×). + + Allowed value range is between 100-300. When both PerEndpointPercent and + PredictivePercent are set, Envoy ensures both are satisfied (max of the two). + format: int32 + maximum: 300 + minimum: 100 + type: integer + predictivePercent: + description: |- + PredictivePercent configures how many additional connections to maintain + across the cluster by anticipating which upstream endpoint the load balancer + will select next, useful for low-QPS services. Relies on deterministic + loadbalancing and is only supported with Random or RoundRobin. + Expressed as a percentage of the connections required by active streams + (e.g. 100 = 1.0 (no preconnect), 105 = 1.05× connections across the cluster, 200 = 2.00×). + + Minimum allowed value is 100. When both PerEndpointPercent and PredictivePercent are + set Envoy ensures both are satisfied per host (max of the two). + format: int32 + minimum: 100 + type: integer + type: object + socketBufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + SocketBufferLimit provides configuration for the maximum buffer size in bytes for each socket + to backend. + SocketBufferLimit applies to socket streaming channel between TCP/IP stacks, it's in kernel space. + For example, 20Mi, 1Gi, 256Ki etc. + Note that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + type: object + dns: + description: DNS includes dns resolution settings. + properties: + dnsRefreshRate: + description: |- + DNSRefreshRate specifies the rate at which DNS records should be refreshed. + Defaults to 30 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + lookupFamily: + description: |- + LookupFamily determines how Envoy would resolve DNS for Routes where the backend is specified as a fully qualified domain name (FQDN). + If set, this configuration overrides other defaults. + enum: + - IPv4 + - IPv6 + - IPv4Preferred + - IPv6Preferred + - IPv4AndIPv6 + type: string + respectDnsTtl: + description: |- + RespectDNSTTL indicates whether the DNS Time-To-Live (TTL) should be respected. + If the value is set to true, the DNS refresh rate will be set to the resource record’s TTL. + Defaults to true. + type: boolean + type: object + healthCheck: + description: HealthCheck allows gateway to perform + active health checking on backends. + properties: + active: + description: Active health check configuration + properties: + grpc: + description: |- + GRPC defines the configuration of the GRPC health checker. + It's optional, and can only be used if the specified type is GRPC. + properties: + service: + description: |- + Service to send in the health check request. + If this is not specified, then the health check request applies to the entire + server and not to a specific service. + type: string + type: object + healthyThreshold: + default: 1 + description: HealthyThreshold defines + the number of healthy health checks + required before a backend host is marked + healthy. + format: int32 + minimum: 1 + type: integer + http: + description: |- + HTTP defines the configuration of http health checker. + It's required while the health checker type is HTTP. + properties: + expectedResponse: + description: ExpectedResponse defines + a list of HTTP expected responses + to match. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + expectedStatuses: + description: |- + ExpectedStatuses defines a list of HTTP response statuses considered healthy. + Defaults to 200 only + items: + description: HTTPStatus defines + the http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + hostname: + description: |- + Hostname defines the HTTP host that will be requested during health checking. + Default: HTTPRoute or GRPCRoute hostname. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + method: + description: |- + Method defines the HTTP method used for health checking. + Defaults to GET + type: string + path: + description: Path defines the HTTP + path that will be requested during + health checking. + maxLength: 1024 + minLength: 1 + type: string + required: + - path + type: object + initialJitter: + description: |- + InitialJitter defines the maximum time Envoy will wait before the first health check. + Envoy will randomly select a value between 0 and the initial jitter value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + default: 3s + description: Interval defines the time + between active health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + tcp: + description: |- + TCP defines the configuration of tcp health checker. + It's required while the health checker type is TCP. + properties: + receive: + description: Receive defines the expected + response payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + send: + description: Send defines the request + payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + type: object + timeout: + default: 1s + description: Timeout defines the time + to wait for a health check response. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: + allOf: + - enum: + - HTTP + - TCP + - GRPC + - enum: + - HTTP + - TCP + - GRPC + description: Type defines the type of + health checker. + type: string + unhealthyThreshold: + default: 3 + description: UnhealthyThreshold defines + the number of unhealthy health checks + required before a backend host is marked + unhealthy. + format: int32 + minimum: 1 + type: integer + required: + - type + type: object + x-kubernetes-validations: + - message: If Health Checker type is HTTP, + http field needs to be set. + rule: 'self.type == ''HTTP'' ? has(self.http) + : !has(self.http)' + - message: If Health Checker type is TCP, + tcp field needs to be set. + rule: 'self.type == ''TCP'' ? has(self.tcp) + : !has(self.tcp)' + - message: The grpc field can only be set + if the Health Checker type is GRPC. + rule: 'has(self.grpc) ? self.type == ''GRPC'' + : true' + panicThreshold: + description: |- + When number of unhealthy endpoints for a backend reaches this threshold + Envoy will disregard health status and balance across all endpoints. + It's designed to prevent a situation in which host failures cascade throughout the cluster + as load increases. If not set, the default value is 50%. To disable panic mode, set value to `0`. + format: int32 + maximum: 100 + minimum: 0 + type: integer + passive: + description: Passive passive check configuration + properties: + baseEjectionTime: + default: 30s + description: BaseEjectionTime defines + the base duration for which a host will + be ejected on consecutive failures. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + consecutive5XxErrors: + default: 5 + description: Consecutive5xxErrors sets + the number of consecutive 5xx errors + triggering ejection. + format: int32 + type: integer + consecutiveGatewayErrors: + description: ConsecutiveGatewayErrors + sets the number of consecutive gateway + errors triggering ejection. + format: int32 + type: integer + consecutiveLocalOriginFailures: + default: 5 + description: |- + ConsecutiveLocalOriginFailures sets the number of consecutive local origin failures triggering ejection. + Parameter takes effect only when split_external_local_origin_errors is set to true. + format: int32 + type: integer + failurePercentageThreshold: + description: |- + FailurePercentageThreshold sets the failure percentage threshold for outlier detection. + If the failure percentage of a given host is greater than or equal to this value, it will be ejected. + Defaults to 85. + format: int32 + maximum: 100 + minimum: 0 + type: integer + interval: + default: 3s + description: Interval defines the time + between passive health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxEjectionPercent: + default: 10 + description: MaxEjectionPercent sets the + maximum percentage of hosts in a cluster + that can be ejected. + format: int32 + type: integer + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors + enables splitting of errors between + external and local origin. + type: boolean + type: object + type: object + http2: + description: HTTP2 provides HTTP/2 configuration + for backend connections. + properties: + initialConnectionWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialConnectionWindowSize sets the initial window size for HTTP/2 connections. + If not set, the default value is 1 MiB. + x-kubernetes-int-or-string: true + initialStreamWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialStreamWindowSize sets the initial window size for HTTP/2 streams. + If not set, the default value is 64 KiB(64*1024). + x-kubernetes-int-or-string: true + maxConcurrentStreams: + description: |- + MaxConcurrentStreams sets the maximum number of concurrent streams allowed per connection. + If not set, the default value is 100. + format: int32 + maximum: 2147483647 + minimum: 1 + type: integer + onInvalidMessage: + description: |- + OnInvalidMessage determines if Envoy will terminate the connection or just the offending stream in the event of HTTP messaging error + It's recommended for L2 Envoy deployments to set this value to TerminateStream. + https://www.envoyproxy.io/docs/envoy/latest/configuration/best_practices/level_two + Default: TerminateConnection + type: string + type: object + loadBalancer: + description: |- + LoadBalancer policy to apply when routing traffic from the gateway to + the backend endpoints. Defaults to `LeastRequest`. + properties: + consistentHash: + description: |- + ConsistentHash defines the configuration when the load balancer type is + set to ConsistentHash + properties: + cookie: + description: Cookie configures the cookie + hash policy when the consistent hash + type is set to Cookie. + properties: + attributes: + additionalProperties: + type: string + description: Additional Attributes + to set for the generated cookie. + type: object + name: + description: |- + Name of the cookie to hash. + If this cookie does not exist in the request, Envoy will generate a cookie and set + the TTL on the response back to the client based on Layer 4 + attributes of the backend endpoint, to ensure that these future requests + go to the same backend endpoint. Make sure to set the TTL field for this case. + type: string + ttl: + description: |- + TTL of the generated cookie if the cookie is not present. This value sets the + Max-Age attribute value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - name + type: object + header: + description: |- + Header configures the header hash policy when the consistent hash type is set to Header. + + Deprecated: use Headers instead + properties: + name: + description: Name of the header to + hash. + type: string + required: + - name + type: object + headers: + description: Headers configures the header + hash policy for each header, when the + consistent hash type is set to Headers. + items: + description: |- + Header defines the header hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the header + to hash. + type: string + required: + - name + type: object + type: array + queryParams: + description: QueryParams configures the + query parameter hash policy when the + consistent hash type is set to QueryParams. + items: + description: |- + QueryParam defines the query parameter name hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the query param + to hash. + type: string + required: + - name + type: object + type: array + tableSize: + default: 65537 + description: The table size for consistent + hashing, must be prime number limited + to 5000011. + format: int64 + maximum: 5000011 + minimum: 2 + type: integer + type: + description: |- + ConsistentHashType defines the type of input to hash on. Valid Type values are + "SourceIP", + "Header", + "Headers", + "Cookie". + "QueryParams". + enum: + - SourceIP + - Header + - Headers + - Cookie + - QueryParams + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If consistent hash type is header, + the header field must be set. + rule: 'self.type == ''Header'' ? has(self.header) + : !has(self.header)' + - message: If consistent hash type is headers, + the headers field must be set. + rule: 'self.type == ''Headers'' ? has(self.headers) + : !has(self.headers)' + - message: If consistent hash type is cookie, + the cookie field must be set. + rule: 'self.type == ''Cookie'' ? has(self.cookie) + : !has(self.cookie)' + - message: If consistent hash type is queryParams, + the queryParams field must be set. + rule: 'self.type == ''QueryParams'' ? has(self.queryParams) + : !has(self.queryParams)' + endpointOverride: + description: |- + EndpointOverride defines the configuration for endpoint override. + When specified, the load balancer will attempt to route requests to endpoints + based on the override information extracted from request headers or metadata. + If the override endpoints are not available, the configured load balancer policy will be used as fallback. + properties: + extractFrom: + description: ExtractFrom defines the sources + to extract endpoint override information + from. + items: + description: EndpointOverrideExtractFrom + defines a source to extract endpoint + override information from. + properties: + header: + description: |- + Header defines the header to get the override endpoint addresses. + The header value must specify at least one endpoint in `IP:Port` format or multiple endpoints in `IP:Port,IP:Port,...` format. + For example `10.0.0.5:8080` or `[2600:4040:5204::1574:24ae]:80`. + The IPv6 address is enclosed in square brackets. + type: string + type: object + maxItems: 10 + minItems: 1 + type: array + required: + - extractFrom + type: object + slowStart: + description: |- + SlowStart defines the configuration related to the slow start load balancer policy. + If set, during slow start window, traffic sent to the newly added hosts will gradually increase. + Currently this is only supported for RoundRobin and LeastRequest load balancers + properties: + window: + description: |- + Window defines the duration of the warm up period for newly added host. + During slow start window, traffic sent to the newly added hosts will gradually increase. + Currently only supports linear growth of traffic. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto#config-cluster-v3-cluster-slowstartconfig + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - window + type: object + type: + description: |- + Type decides the type of Load Balancer policy. + Valid LoadBalancerType values are + "ConsistentHash", + "LeastRequest", + "Random", + "RoundRobin". + enum: + - ConsistentHash + - LeastRequest + - Random + - RoundRobin + type: string + zoneAware: + description: ZoneAware defines the configuration + related to the distribution of requests + between locality zones. + properties: + preferLocal: + description: PreferLocalZone configures + zone-aware routing to prefer sending + traffic to the local locality zone. + properties: + force: + description: |- + ForceLocalZone defines override configuration for forcing all traffic to stay within the local zone instead of the default behavior + which maintains equal distribution among upstream endpoints while sending as much traffic as possible locally. + properties: + minEndpointsInZoneThreshold: + description: |- + MinEndpointsInZoneThreshold is the minimum number of upstream endpoints in the local zone required to honor the forceLocalZone + override. This is useful for protecting zones with fewer endpoints. + format: int32 + type: integer + type: object + minEndpointsThreshold: + description: MinEndpointsThreshold + is the minimum number of total upstream + endpoints across all zones required + to enable zone-aware routing. + format: int64 + type: integer + percentageEnabled: + description: Configures percentage + of requests that will be considered + for zone aware routing if zone aware + routing is configured. If not specified, + Envoy defaults to 100%. + format: int32 + maximum: 100 + minimum: 0 + type: integer + type: object + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: If LoadBalancer type is consistentHash, + consistentHash field needs to be set. + rule: 'self.type == ''ConsistentHash'' ? has(self.consistentHash) + : !has(self.consistentHash)' + - message: Currently SlowStart is only supported + for RoundRobin and LeastRequest load balancers. + rule: 'self.type in [''Random'', ''ConsistentHash''] + ? !has(self.slowStart) : true ' + - message: Currently ZoneAware is only supported + for LeastRequest, Random, and RoundRobin load + balancers. + rule: 'self.type == ''ConsistentHash'' ? !has(self.zoneAware) + : true ' + proxyProtocol: + description: ProxyProtocol enables the Proxy Protocol + when communicating with the backend. + properties: + version: + description: |- + Version of ProxyProtol + Valid ProxyProtocolVersion values are + "V1" + "V2" + enum: + - V1 + - V2 + type: string + required: + - version + type: object + retry: + description: |- + Retry provides more advanced usage, allowing users to customize the number of retries, retry fallback strategy, and retry triggering conditions. + If not set, retry will be disabled. + properties: + numAttemptsPerPriority: + description: |- + NumAttemptsPerPriority defines the number of requests (initial attempt + retries) + that should be sent to the same priority before switching to a different one. + If not specified or set to 0, all requests are sent to the highest priority that is healthy. + format: int32 + type: integer + numRetries: + default: 2 + description: NumRetries is the number of retries + to be attempted. Defaults to 2. + format: int32 + minimum: 0 + type: integer + perRetry: + description: PerRetry is the retry policy + to be applied per retry attempt. + properties: + backOff: + description: |- + Backoff is the backoff policy to be applied per retry attempt. gateway uses a fully jittered exponential + back-off algorithm for retries. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/router_filter#config-http-filters-router-x-envoy-max-retries + properties: + baseInterval: + description: BaseInterval is the base + interval between retries. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxInterval: + description: |- + MaxInterval is the maximum interval between retries. This parameter is optional, but must be greater than or equal to the base_interval if set. + The default is 10 times the base_interval + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + timeout: + description: Timeout is the timeout per + retry attempt. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + retryOn: + description: |- + RetryOn specifies the retry trigger condition. + + If not specified, the default is to retry on connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes(503). + properties: + httpStatusCodes: + description: |- + HttpStatusCodes specifies the http status codes to be retried. + The retriable-status-codes trigger must also be configured for these status codes to trigger a retry. + items: + description: HTTPStatus defines the + http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + triggers: + description: Triggers specifies the retry + trigger condition(Http/Grpc). + items: + description: TriggerEnum specifies the + conditions that trigger retries. + enum: + - 5xx + - gateway-error + - reset + - reset-before-request + - connect-failure + - retriable-4xx + - refused-stream + - retriable-status-codes + - cancelled + - deadline-exceeded + - internal + - resource-exhausted + - unavailable + type: string + type: array + type: object + type: object + tcpKeepalive: + description: |- + TcpKeepalive settings associated with the upstream client connection. + Disabled by default. + properties: + idleTime: + description: |- + The duration a connection needs to be idle before keep-alive + probes start being sent. + The duration format is + Defaults to `7200s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + description: |- + The duration between keep-alive probes. + Defaults to `75s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + probes: + description: |- + The total number of unacknowledged probes to send before deciding + the connection is dead. + Defaults to 9. + format: int32 + type: integer + type: object + timeout: + description: Timeout settings for the backend + connections. + properties: + http: + description: Timeout settings for HTTP. + properties: + connectionIdleTimeout: + description: |- + The idle timeout for an HTTP connection. Idle time is defined as a period in which there are no active requests in the connection. + Default: 1 hour. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxConnectionDuration: + description: |- + The maximum duration of an HTTP connection. + Default: unlimited. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxStreamDuration: + description: |- + MaxStreamDuration is the maximum duration for a stream to complete. This timeout measures the time + from when the request is sent until the response stream is fully consumed and does not apply to + non-streaming requests. + When set to "0s", no max duration is applied and streams can run indefinitely. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + requestTimeout: + description: RequestTimeout is the time + until which entire response is received + from the upstream. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + tcp: + description: Timeout settings for TCP. + properties: + connectTimeout: + description: |- + The timeout for network connection establishment, including TCP and TLS handshakes. + Default: 10 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + type: object + type: object + x-kubernetes-validations: + - message: predictivePercent in preconnect policy + only works with RoundRobin or Random load balancers + rule: '!((has(self.connection) && has(self.connection.preconnect) + && has(self.connection.preconnect.predictivePercent)) + && !(has(self.loadBalancer) && has(self.loadBalancer.type) + && self.loadBalancer.type in [''Random'', ''RoundRobin'']))' + endSessionEndpoint: + description: |- + The OIDC Provider's [end session endpoint](https://openid.net/specs/openid-connect-core-1_0.html#RPLogout). + + If the end session endpoint is provided, EG will use it to log out the user from the OIDC Provider when the user accesses the logout path. + EG will also try to discover the end session endpoint from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse) when authorizationEndpoint or tokenEndpoint is not provided. + type: string + issuer: + description: |- + The OIDC Provider's [issuer identifier](https://openid.net/specs/openid-connect-discovery-1_0.html#IssuerDiscovery). + Issuer MUST be a URI RFC 3986 [RFC3986] with a scheme component that MUST + be https, a host component, and optionally, port and path components and + no query or fragment components. + minLength: 1 + type: string + tokenEndpoint: + description: |- + The OIDC Provider's [token endpoint](https://openid.net/specs/openid-connect-core-1_0.html#TokenEndpoint). + If not provided, EG will try to discover it from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse). + type: string + required: + - issuer + type: object + x-kubernetes-validations: + - message: BackendRefs must be used, backendRef is not + supported. + rule: '!has(self.backendRef)' + - message: Retry timeout is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.perRetry)? + !has(self.backendSettings.retry.perRetry.timeout):true):true):true + - message: HTTPStatusCodes is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.retryOn)? + !has(self.backendSettings.retry.retryOn.httpStatusCodes):true):true):true + redirectURL: + description: |- + The redirect URL to be used in the OIDC + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, uses the default redirect URI "%REQ(x-forwarded-proto)%://%REQ(:authority)%/oauth2/callback" + type: string + refreshToken: + default: true + description: |- + RefreshToken indicates whether the Envoy should automatically refresh the + id token and access token when they expire. + When set to true, the Envoy will use the refresh token to get a new id token + and access token when they expire. + + If not specified, defaults to true. + type: boolean + resources: + description: |- + The OIDC resources to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + items: + type: string + type: array + scopes: + description: |- + The OIDC scopes to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + The "openid" scope is always added to the list of scopes if not already + specified. + items: + type: string + type: array + required: + - clientSecret + - provider + type: object + x-kubernetes-validations: + - message: only one of clientID or clientIDRef must be set + rule: (has(self.clientID) && !has(self.clientIDRef)) || + (!has(self.clientID) && has(self.clientIDRef)) + required: + - oidc + type: object + tenantID: + description: TenantId is a unique identifier for an Azure Active + Directory instance. + minLength: 1 + type: string + required: + - clientID + - tenantID + type: object + x-kubernetes-validations: + - message: Exactly one of clientSecretRef or oidcExchangeToken must + be specified + rule: (has(self.clientSecretRef) && !has(self.oidcExchangeToken)) + || (!has(self.clientSecretRef) && has(self.oidcExchangeToken)) + gcpCredentials: + description: GCPCredentials is a mechanism to access a backend(s). + GCP specific logic will be applied. + properties: + credentialsFile: + description: CredentialsFile specifies the service account credentials + file to use for the GCP provider. + properties: + secretRef: + description: |- + SecretRef is the reference to the credential file. + + The secret should contain the GCP service account credentials file keyed on "service_account.json". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + required: + - secretRef + type: object + projectName: + description: ProjectName is the GCP project name. + minLength: 1 + type: string + region: + description: Region is the GCP region associated with the policy. + minLength: 1 + type: string + workloadIdentityFederationConfig: + description: WorkloadIdentityFederationConfig is the configuration + for the GCP Workload Identity Federation. + properties: + oidcExchangeToken: + description: |- + OIDCExchangeToken specifies the oidc configurations used to obtain an oidc token. The oidc token will be + used to obtain temporary credentials to access GCP. + properties: + aud: + description: Aud defines the audience that this ID Token + is intended for. + type: string + grantType: + description: GrantType is the method application gets + access token. + type: string + oidc: + description: OIDC is used to obtain oidc tokens via an + SSO server which will be used to exchange for provider + credentials. + properties: + clientID: + description: |- + The client ID to be used in the OIDC + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + + Only one of clientID or clientIDRef must be set. + minLength: 1 + type: string + clientIDRef: + description: |- + The Kubernetes secret which contains the client ID to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + Exactly one of clientID or clientIDRef must be set. + This is an Opaque secret. The client ID should be stored in the key "client-id". + + Only one of clientID or clientIDRef must be set. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For + example "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + clientSecret: + description: |- + The Kubernetes secret which contains the OIDC client secret to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + + This is an Opaque secret. The client secret should be stored in the key + "client-secret". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For + example "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + cookieConfig: + description: |- + CookieConfigs allows setting the SameSite attribute for OIDC cookies. + By default, its unset. + properties: + sameSite: + enum: + - Lax + - Strict + - None + type: string + type: object + cookieDomain: + description: |- + The optional domain to set the access and ID token cookies on. + If not set, the cookies will default to the host of the request, not including the subdomains. + If set, the cookies will be set on the specified domain and all subdomains. + This means that requests to any subdomain will not require reauthentication after users log in to the parent domain. + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9]))*$ + type: string + cookieNames: + description: |- + The optional cookie name overrides to be used for Bearer and IdToken cookies in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, uses a randomly generated suffix + properties: + accessToken: + description: |- + The name of the cookie used to store the AccessToken in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, defaults to "AccessToken-(randomly generated uid)" + type: string + idToken: + description: |- + The name of the cookie used to store the IdToken in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, defaults to "IdToken-(randomly generated uid)" + type: string + type: object + csrfTokenTTL: + description: |- + CSRFTokenTTL defines how long the CSRF token generated during the OAuth2 authorization flow remains valid. + + This duration determines the lifetime of the CSRF cookie, which is validated against the CSRF token + in the "state" parameter when the provider redirects back to the callback endpoint. + + If omitted, Envoy Gateway defaults the token expiration to 10 minutes. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + defaultRefreshTokenTTL: + description: |- + DefaultRefreshTokenTTL is the default lifetime of the refresh token. + This field is only used when the exp (expiration time) claim is omitted in + the refresh token or the refresh token is not JWT. + + If not specified, defaults to 604800s (one week). + Note: this field is only applicable when the "refreshToken" field is set to true. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + defaultTokenTTL: + description: |- + DefaultTokenTTL is the default lifetime of the id token and access token. + Please note that Envoy will always use the expiry time from the response + of the authorization server if it is provided. This field is only used when + the expiry time is not provided by the authorization. + + If not specified, defaults to 0. In this case, the "expires_in" field in + the authorization response must be set by the authorization server, or the + OAuth flow will fail. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + denyRedirect: + description: |- + Any request that matches any of the provided matchers (with either tokens that are expired or missing tokens) will not be redirected to the OIDC Provider. + This behavior can be useful for AJAX or machine requests. + properties: + headers: + description: Defines the headers to match against + the request to deny redirect to the OIDC Provider. + items: + description: OIDCDenyRedirectHeader defines + how a header is matched + properties: + name: + description: Specifies the name of the header + in the request. + minLength: 1 + type: string + type: + default: Exact + description: Type specifies how to match + against a string. + enum: + - Exact + - Prefix + - Suffix + - RegularExpression + type: string + value: + description: Value specifies the string + value that the match must have. + maxLength: 1024 + minLength: 1 + type: string + required: + - name + - value + type: object + maxItems: 16 + minItems: 1 + type: array + required: + - headers + type: object + disableTokenEncryption: + description: |- + Disable token encryption. When set to true, both the access token and the ID token will be stored in plain text. + This option should only be used in secure environments where token encryption is not required. + Default is false (tokens are encrypted). + type: boolean + forwardAccessToken: + description: |- + ForwardAccessToken indicates whether the Envoy should forward the access token + via the Authorization header Bearer scheme to the upstream. + If not specified, defaults to false. + type: boolean + logoutPath: + description: |- + The path to log a user out, clearing their credential cookies. + + If not specified, uses a default logout path "/logout" + type: string + passThroughAuthHeader: + description: |- + Skips OIDC authentication when the request contains a header that will be extracted by the JWT filter. Unless + explicitly stated otherwise in the extractFrom field, this will be the "Authorization: Bearer ..." header. + + The passThroughAuthHeader option is typically used for non-browser clients that may not be able to handle OIDC + redirects and wish to directly supply a token instead. + + If not specified, defaults to false. + type: boolean + provider: + description: The OIDC Provider configuration. + properties: + authorizationEndpoint: + description: |- + The OIDC Provider's [authorization endpoint](https://openid.net/specs/openid-connect-core-1_0.html#AuthorizationEndpoint). + If not provided, EG will try to discover it from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse). + type: string + backendRef: + description: |- + BackendRef references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + + Deprecated: Use BackendRefs instead. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == + ''Service'') ? has(self.port) : true' + backendRefs: + description: |- + BackendRefs references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + items: + description: BackendRef defines how an ObjectReference + that is specific to BackendRef. + properties: + fallback: + description: |- + Fallback indicates whether the backend is designated as a fallback. + Multiple fallback backends can be configured. + It is highly recommended to configure active or passive health checks to ensure that failover can be detected + when the active backends become unhealthy and to automatically readjust once the primary backends are healthy again. + The overprovisioning factor is set to 1.4, meaning the fallback backends will only start receiving traffic when + the health of the active backends falls below 72%. + type: boolean + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + weight: + default: 1 + description: |- + Weight specifies the proportion of requests forwarded to the referenced + backend. This is computed as weight/(sum of all weights in this + BackendRefs list). For non-zero values, there may be some epsilon from + the exact proportion defined here depending on the precision an + implementation supports. Weight is not a percentage and the sum of + weights does not need to equal 100. + + If only one backend is specified and it has a weight greater than 0, 100% + of the traffic is forwarded to that backend. If weight is set to 0, no + traffic should be forwarded for this entry. If unspecified, weight + defaults to 1. + + Support for this field varies based on the context where used. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind + == ''Service'') ? has(self.port) : true' + maxItems: 16 + type: array + backendSettings: + description: |- + BackendSettings holds configuration for managing the connection + to the backend. + properties: + circuitBreaker: + description: |- + Circuit Breaker settings for the upstream connections and requests. + If not set, circuit breakers will be enabled with the default thresholds + properties: + maxConnections: + default: 1024 + description: The maximum number of connections + that Envoy will establish to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRequests: + default: 1024 + description: The maximum number of parallel + requests that Envoy will make to the + referenced backend defined within a + xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRetries: + default: 1024 + description: The maximum number of parallel + retries that Envoy will make to the + referenced backend defined within a + xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxPendingRequests: + default: 1024 + description: The maximum number of pending + requests that Envoy will queue to the + referenced backend defined within a + xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxRequestsPerConnection: + description: |- + The maximum number of requests that Envoy will make over a single connection to the referenced backend defined within a xRoute rule. + Default: unlimited. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + perEndpoint: + description: PerEndpoint defines Circuit + Breakers that will apply per-endpoint + for an upstream cluster + properties: + maxConnections: + default: 1024 + description: MaxConnections configures + the maximum number of connections + that Envoy will establish per-endpoint + to the referenced backend defined + within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + type: object + type: object + connection: + description: Connection includes backend connection + settings. + properties: + bufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + BufferLimit Soft limit on size of the cluster’s connections read and write buffers. + BufferLimit applies to connection streaming (maybe non-streaming) channel between processes, it's in user space. + If unspecified, an implementation defined default is applied (32768 bytes). + For example, 20Mi, 1Gi, 256Ki etc. + Note: that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + preconnect: + description: |- + Preconnect configures proactive upstream connections to reduce latency by establishing + connections before they’re needed and avoiding connection establishment overhead. + + If unset, Envoy will fetch connections as needed to serve in-flight requests. + properties: + perEndpointPercent: + description: |- + PerEndpointPercent configures how many additional connections to maintain per + upstream endpoint, useful for high-QPS or latency sensitive services. Expressed as a + percentage of the connections required by active streams + (e.g. 100 = preconnect disabled, 105 = 1.05x connections per-endpoint, 200 = 2.00×). + + Allowed value range is between 100-300. When both PerEndpointPercent and + PredictivePercent are set, Envoy ensures both are satisfied (max of the two). + format: int32 + maximum: 300 + minimum: 100 + type: integer + predictivePercent: + description: |- + PredictivePercent configures how many additional connections to maintain + across the cluster by anticipating which upstream endpoint the load balancer + will select next, useful for low-QPS services. Relies on deterministic + loadbalancing and is only supported with Random or RoundRobin. + Expressed as a percentage of the connections required by active streams + (e.g. 100 = 1.0 (no preconnect), 105 = 1.05× connections across the cluster, 200 = 2.00×). + + Minimum allowed value is 100. When both PerEndpointPercent and PredictivePercent are + set Envoy ensures both are satisfied per host (max of the two). + format: int32 + minimum: 100 + type: integer + type: object + socketBufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + SocketBufferLimit provides configuration for the maximum buffer size in bytes for each socket + to backend. + SocketBufferLimit applies to socket streaming channel between TCP/IP stacks, it's in kernel space. + For example, 20Mi, 1Gi, 256Ki etc. + Note that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + type: object + dns: + description: DNS includes dns resolution settings. + properties: + dnsRefreshRate: + description: |- + DNSRefreshRate specifies the rate at which DNS records should be refreshed. + Defaults to 30 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + lookupFamily: + description: |- + LookupFamily determines how Envoy would resolve DNS for Routes where the backend is specified as a fully qualified domain name (FQDN). + If set, this configuration overrides other defaults. + enum: + - IPv4 + - IPv6 + - IPv4Preferred + - IPv6Preferred + - IPv4AndIPv6 + type: string + respectDnsTtl: + description: |- + RespectDNSTTL indicates whether the DNS Time-To-Live (TTL) should be respected. + If the value is set to true, the DNS refresh rate will be set to the resource record’s TTL. + Defaults to true. + type: boolean + type: object + healthCheck: + description: HealthCheck allows gateway to + perform active health checking on backends. + properties: + active: + description: Active health check configuration + properties: + grpc: + description: |- + GRPC defines the configuration of the GRPC health checker. + It's optional, and can only be used if the specified type is GRPC. + properties: + service: + description: |- + Service to send in the health check request. + If this is not specified, then the health check request applies to the entire + server and not to a specific service. + type: string + type: object + healthyThreshold: + default: 1 + description: HealthyThreshold defines + the number of healthy health checks + required before a backend host is + marked healthy. + format: int32 + minimum: 1 + type: integer + http: + description: |- + HTTP defines the configuration of http health checker. + It's required while the health checker type is HTTP. + properties: + expectedResponse: + description: ExpectedResponse + defines a list of HTTP expected + responses to match. + properties: + binary: + description: Binary payload + base64 encoded. + format: byte + type: string + text: + description: Text payload + in plain text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines + the type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is + Text, text field needs to + be set. + rule: 'self.type == ''Text'' + ? has(self.text) : !has(self.text)' + - message: If payload type is + Binary, binary field needs + to be set. + rule: 'self.type == ''Binary'' + ? has(self.binary) : !has(self.binary)' + expectedStatuses: + description: |- + ExpectedStatuses defines a list of HTTP response statuses considered healthy. + Defaults to 200 only + items: + description: HTTPStatus defines + the http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + hostname: + description: |- + Hostname defines the HTTP host that will be requested during health checking. + Default: HTTPRoute or GRPCRoute hostname. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + method: + description: |- + Method defines the HTTP method used for health checking. + Defaults to GET + type: string + path: + description: Path defines the + HTTP path that will be requested + during health checking. + maxLength: 1024 + minLength: 1 + type: string + required: + - path + type: object + initialJitter: + description: |- + InitialJitter defines the maximum time Envoy will wait before the first health check. + Envoy will randomly select a value between 0 and the initial jitter value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + default: 3s + description: Interval defines the + time between active health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + tcp: + description: |- + TCP defines the configuration of tcp health checker. + It's required while the health checker type is TCP. + properties: + receive: + description: Receive defines the + expected response payload. + properties: + binary: + description: Binary payload + base64 encoded. + format: byte + type: string + text: + description: Text payload + in plain text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines + the type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is + Text, text field needs to + be set. + rule: 'self.type == ''Text'' + ? has(self.text) : !has(self.text)' + - message: If payload type is + Binary, binary field needs + to be set. + rule: 'self.type == ''Binary'' + ? has(self.binary) : !has(self.binary)' + send: + description: Send defines the + request payload. + properties: + binary: + description: Binary payload + base64 encoded. + format: byte + type: string + text: + description: Text payload + in plain text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines + the type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is + Text, text field needs to + be set. + rule: 'self.type == ''Text'' + ? has(self.text) : !has(self.text)' + - message: If payload type is + Binary, binary field needs + to be set. + rule: 'self.type == ''Binary'' + ? has(self.binary) : !has(self.binary)' + type: object + timeout: + default: 1s + description: Timeout defines the time + to wait for a health check response. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: + allOf: + - enum: + - HTTP + - TCP + - GRPC + - enum: + - HTTP + - TCP + - GRPC + description: Type defines the type + of health checker. + type: string + unhealthyThreshold: + default: 3 + description: UnhealthyThreshold defines + the number of unhealthy health checks + required before a backend host is + marked unhealthy. + format: int32 + minimum: 1 + type: integer + required: + - type + type: object + x-kubernetes-validations: + - message: If Health Checker type is HTTP, + http field needs to be set. + rule: 'self.type == ''HTTP'' ? has(self.http) + : !has(self.http)' + - message: If Health Checker type is TCP, + tcp field needs to be set. + rule: 'self.type == ''TCP'' ? has(self.tcp) + : !has(self.tcp)' + - message: The grpc field can only be + set if the Health Checker type is + GRPC. + rule: 'has(self.grpc) ? self.type == + ''GRPC'' : true' + panicThreshold: + description: |- + When number of unhealthy endpoints for a backend reaches this threshold + Envoy will disregard health status and balance across all endpoints. + It's designed to prevent a situation in which host failures cascade throughout the cluster + as load increases. If not set, the default value is 50%. To disable panic mode, set value to `0`. + format: int32 + maximum: 100 + minimum: 0 + type: integer + passive: + description: Passive passive check configuration + properties: + baseEjectionTime: + default: 30s + description: BaseEjectionTime defines + the base duration for which a host + will be ejected on consecutive failures. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + consecutive5XxErrors: + default: 5 + description: Consecutive5xxErrors + sets the number of consecutive 5xx + errors triggering ejection. + format: int32 + type: integer + consecutiveGatewayErrors: + description: ConsecutiveGatewayErrors + sets the number of consecutive gateway + errors triggering ejection. + format: int32 + type: integer + consecutiveLocalOriginFailures: + default: 5 + description: |- + ConsecutiveLocalOriginFailures sets the number of consecutive local origin failures triggering ejection. + Parameter takes effect only when split_external_local_origin_errors is set to true. + format: int32 + type: integer + failurePercentageThreshold: + description: |- + FailurePercentageThreshold sets the failure percentage threshold for outlier detection. + If the failure percentage of a given host is greater than or equal to this value, it will be ejected. + Defaults to 85. + format: int32 + maximum: 100 + minimum: 0 + type: integer + interval: + default: 3s + description: Interval defines the + time between passive health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxEjectionPercent: + default: 10 + description: MaxEjectionPercent sets + the maximum percentage of hosts + in a cluster that can be ejected. + format: int32 + type: integer + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors + enables splitting of errors between + external and local origin. + type: boolean + type: object + type: object + http2: + description: HTTP2 provides HTTP/2 configuration + for backend connections. + properties: + initialConnectionWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialConnectionWindowSize sets the initial window size for HTTP/2 connections. + If not set, the default value is 1 MiB. + x-kubernetes-int-or-string: true + initialStreamWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialStreamWindowSize sets the initial window size for HTTP/2 streams. + If not set, the default value is 64 KiB(64*1024). + x-kubernetes-int-or-string: true + maxConcurrentStreams: + description: |- + MaxConcurrentStreams sets the maximum number of concurrent streams allowed per connection. + If not set, the default value is 100. + format: int32 + maximum: 2147483647 + minimum: 1 + type: integer + onInvalidMessage: + description: |- + OnInvalidMessage determines if Envoy will terminate the connection or just the offending stream in the event of HTTP messaging error + It's recommended for L2 Envoy deployments to set this value to TerminateStream. + https://www.envoyproxy.io/docs/envoy/latest/configuration/best_practices/level_two + Default: TerminateConnection + type: string + type: object + loadBalancer: + description: |- + LoadBalancer policy to apply when routing traffic from the gateway to + the backend endpoints. Defaults to `LeastRequest`. + properties: + consistentHash: + description: |- + ConsistentHash defines the configuration when the load balancer type is + set to ConsistentHash + properties: + cookie: + description: Cookie configures the + cookie hash policy when the consistent + hash type is set to Cookie. + properties: + attributes: + additionalProperties: + type: string + description: Additional Attributes + to set for the generated cookie. + type: object + name: + description: |- + Name of the cookie to hash. + If this cookie does not exist in the request, Envoy will generate a cookie and set + the TTL on the response back to the client based on Layer 4 + attributes of the backend endpoint, to ensure that these future requests + go to the same backend endpoint. Make sure to set the TTL field for this case. + type: string + ttl: + description: |- + TTL of the generated cookie if the cookie is not present. This value sets the + Max-Age attribute value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - name + type: object + header: + description: |- + Header configures the header hash policy when the consistent hash type is set to Header. + + Deprecated: use Headers instead + properties: + name: + description: Name of the header + to hash. + type: string + required: + - name + type: object + headers: + description: Headers configures the + header hash policy for each header, + when the consistent hash type is + set to Headers. + items: + description: |- + Header defines the header hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the header + to hash. + type: string + required: + - name + type: object + type: array + queryParams: + description: QueryParams configures + the query parameter hash policy + when the consistent hash type is + set to QueryParams. + items: + description: |- + QueryParam defines the query parameter name hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the query + param to hash. + type: string + required: + - name + type: object + type: array + tableSize: + default: 65537 + description: The table size for consistent + hashing, must be prime number limited + to 5000011. + format: int64 + maximum: 5000011 + minimum: 2 + type: integer + type: + description: |- + ConsistentHashType defines the type of input to hash on. Valid Type values are + "SourceIP", + "Header", + "Headers", + "Cookie". + "QueryParams". + enum: + - SourceIP + - Header + - Headers + - Cookie + - QueryParams + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If consistent hash type is + header, the header field must be set. + rule: 'self.type == ''Header'' ? has(self.header) + : !has(self.header)' + - message: If consistent hash type is + headers, the headers field must be + set. + rule: 'self.type == ''Headers'' ? has(self.headers) + : !has(self.headers)' + - message: If consistent hash type is + cookie, the cookie field must be set. + rule: 'self.type == ''Cookie'' ? has(self.cookie) + : !has(self.cookie)' + - message: If consistent hash type is + queryParams, the queryParams field + must be set. + rule: 'self.type == ''QueryParams'' + ? has(self.queryParams) : !has(self.queryParams)' + endpointOverride: + description: |- + EndpointOverride defines the configuration for endpoint override. + When specified, the load balancer will attempt to route requests to endpoints + based on the override information extracted from request headers or metadata. + If the override endpoints are not available, the configured load balancer policy will be used as fallback. + properties: + extractFrom: + description: ExtractFrom defines the + sources to extract endpoint override + information from. + items: + description: EndpointOverrideExtractFrom + defines a source to extract endpoint + override information from. + properties: + header: + description: |- + Header defines the header to get the override endpoint addresses. + The header value must specify at least one endpoint in `IP:Port` format or multiple endpoints in `IP:Port,IP:Port,...` format. + For example `10.0.0.5:8080` or `[2600:4040:5204::1574:24ae]:80`. + The IPv6 address is enclosed in square brackets. + type: string + type: object + maxItems: 10 + minItems: 1 + type: array + required: + - extractFrom + type: object + slowStart: + description: |- + SlowStart defines the configuration related to the slow start load balancer policy. + If set, during slow start window, traffic sent to the newly added hosts will gradually increase. + Currently this is only supported for RoundRobin and LeastRequest load balancers + properties: + window: + description: |- + Window defines the duration of the warm up period for newly added host. + During slow start window, traffic sent to the newly added hosts will gradually increase. + Currently only supports linear growth of traffic. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto#config-cluster-v3-cluster-slowstartconfig + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - window + type: object + type: + description: |- + Type decides the type of Load Balancer policy. + Valid LoadBalancerType values are + "ConsistentHash", + "LeastRequest", + "Random", + "RoundRobin". + enum: + - ConsistentHash + - LeastRequest + - Random + - RoundRobin + type: string + zoneAware: + description: ZoneAware defines the configuration + related to the distribution of requests + between locality zones. + properties: + preferLocal: + description: PreferLocalZone configures + zone-aware routing to prefer sending + traffic to the local locality zone. + properties: + force: + description: |- + ForceLocalZone defines override configuration for forcing all traffic to stay within the local zone instead of the default behavior + which maintains equal distribution among upstream endpoints while sending as much traffic as possible locally. + properties: + minEndpointsInZoneThreshold: + description: |- + MinEndpointsInZoneThreshold is the minimum number of upstream endpoints in the local zone required to honor the forceLocalZone + override. This is useful for protecting zones with fewer endpoints. + format: int32 + type: integer + type: object + minEndpointsThreshold: + description: MinEndpointsThreshold + is the minimum number of total + upstream endpoints across all + zones required to enable zone-aware + routing. + format: int64 + type: integer + percentageEnabled: + description: Configures percentage + of requests that will be considered + for zone aware routing if zone + aware routing is configured. + If not specified, Envoy defaults + to 100%. + format: int32 + maximum: 100 + minimum: 0 + type: integer + type: object + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: If LoadBalancer type is consistentHash, + consistentHash field needs to be set. + rule: 'self.type == ''ConsistentHash'' ? + has(self.consistentHash) : !has(self.consistentHash)' + - message: Currently SlowStart is only supported + for RoundRobin and LeastRequest load balancers. + rule: 'self.type in [''Random'', ''ConsistentHash''] + ? !has(self.slowStart) : true ' + - message: Currently ZoneAware is only supported + for LeastRequest, Random, and RoundRobin + load balancers. + rule: 'self.type == ''ConsistentHash'' ? + !has(self.zoneAware) : true ' + proxyProtocol: + description: ProxyProtocol enables the Proxy + Protocol when communicating with the backend. + properties: + version: + description: |- + Version of ProxyProtol + Valid ProxyProtocolVersion values are + "V1" + "V2" + enum: + - V1 + - V2 + type: string + required: + - version + type: object + retry: + description: |- + Retry provides more advanced usage, allowing users to customize the number of retries, retry fallback strategy, and retry triggering conditions. + If not set, retry will be disabled. + properties: + numAttemptsPerPriority: + description: |- + NumAttemptsPerPriority defines the number of requests (initial attempt + retries) + that should be sent to the same priority before switching to a different one. + If not specified or set to 0, all requests are sent to the highest priority that is healthy. + format: int32 + type: integer + numRetries: + default: 2 + description: NumRetries is the number + of retries to be attempted. Defaults + to 2. + format: int32 + minimum: 0 + type: integer + perRetry: + description: PerRetry is the retry policy + to be applied per retry attempt. + properties: + backOff: + description: |- + Backoff is the backoff policy to be applied per retry attempt. gateway uses a fully jittered exponential + back-off algorithm for retries. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/router_filter#config-http-filters-router-x-envoy-max-retries + properties: + baseInterval: + description: BaseInterval is the + base interval between retries. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxInterval: + description: |- + MaxInterval is the maximum interval between retries. This parameter is optional, but must be greater than or equal to the base_interval if set. + The default is 10 times the base_interval + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + timeout: + description: Timeout is the timeout + per retry attempt. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + retryOn: + description: |- + RetryOn specifies the retry trigger condition. + + If not specified, the default is to retry on connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes(503). + properties: + httpStatusCodes: + description: |- + HttpStatusCodes specifies the http status codes to be retried. + The retriable-status-codes trigger must also be configured for these status codes to trigger a retry. + items: + description: HTTPStatus defines + the http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + triggers: + description: Triggers specifies the + retry trigger condition(Http/Grpc). + items: + description: TriggerEnum specifies + the conditions that trigger retries. + enum: + - 5xx + - gateway-error + - reset + - reset-before-request + - connect-failure + - retriable-4xx + - refused-stream + - retriable-status-codes + - cancelled + - deadline-exceeded + - internal + - resource-exhausted + - unavailable + type: string + type: array + type: object + type: object + tcpKeepalive: + description: |- + TcpKeepalive settings associated with the upstream client connection. + Disabled by default. + properties: + idleTime: + description: |- + The duration a connection needs to be idle before keep-alive + probes start being sent. + The duration format is + Defaults to `7200s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + description: |- + The duration between keep-alive probes. + Defaults to `75s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + probes: + description: |- + The total number of unacknowledged probes to send before deciding + the connection is dead. + Defaults to 9. + format: int32 + type: integer + type: object + timeout: + description: Timeout settings for the backend + connections. + properties: + http: + description: Timeout settings for HTTP. + properties: + connectionIdleTimeout: + description: |- + The idle timeout for an HTTP connection. Idle time is defined as a period in which there are no active requests in the connection. + Default: 1 hour. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxConnectionDuration: + description: |- + The maximum duration of an HTTP connection. + Default: unlimited. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxStreamDuration: + description: |- + MaxStreamDuration is the maximum duration for a stream to complete. This timeout measures the time + from when the request is sent until the response stream is fully consumed and does not apply to + non-streaming requests. + When set to "0s", no max duration is applied and streams can run indefinitely. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + requestTimeout: + description: RequestTimeout is the + time until which entire response + is received from the upstream. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + tcp: + description: Timeout settings for TCP. + properties: + connectTimeout: + description: |- + The timeout for network connection establishment, including TCP and TLS handshakes. + Default: 10 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + type: object + type: object + x-kubernetes-validations: + - message: predictivePercent in preconnect policy + only works with RoundRobin or Random load + balancers + rule: '!((has(self.connection) && has(self.connection.preconnect) + && has(self.connection.preconnect.predictivePercent)) + && !(has(self.loadBalancer) && has(self.loadBalancer.type) + && self.loadBalancer.type in [''Random'', + ''RoundRobin'']))' + endSessionEndpoint: + description: |- + The OIDC Provider's [end session endpoint](https://openid.net/specs/openid-connect-core-1_0.html#RPLogout). + + If the end session endpoint is provided, EG will use it to log out the user from the OIDC Provider when the user accesses the logout path. + EG will also try to discover the end session endpoint from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse) when authorizationEndpoint or tokenEndpoint is not provided. + type: string + issuer: + description: |- + The OIDC Provider's [issuer identifier](https://openid.net/specs/openid-connect-discovery-1_0.html#IssuerDiscovery). + Issuer MUST be a URI RFC 3986 [RFC3986] with a scheme component that MUST + be https, a host component, and optionally, port and path components and + no query or fragment components. + minLength: 1 + type: string + tokenEndpoint: + description: |- + The OIDC Provider's [token endpoint](https://openid.net/specs/openid-connect-core-1_0.html#TokenEndpoint). + If not provided, EG will try to discover it from the provider's [Well-Known Configuration Endpoint](https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderConfigurationResponse). + type: string + required: + - issuer + type: object + x-kubernetes-validations: + - message: BackendRefs must be used, backendRef is + not supported. + rule: '!has(self.backendRef)' + - message: Retry timeout is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.perRetry)? + !has(self.backendSettings.retry.perRetry.timeout):true):true):true + - message: HTTPStatusCodes is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.retryOn)? + !has(self.backendSettings.retry.retryOn.httpStatusCodes):true):true):true + redirectURL: + description: |- + The redirect URL to be used in the OIDC + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + If not specified, uses the default redirect URI "%REQ(x-forwarded-proto)%://%REQ(:authority)%/oauth2/callback" + type: string + refreshToken: + default: true + description: |- + RefreshToken indicates whether the Envoy should automatically refresh the + id token and access token when they expire. + When set to true, the Envoy will use the refresh token to get a new id token + and access token when they expire. + + If not specified, defaults to true. + type: boolean + resources: + description: |- + The OIDC resources to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + items: + type: string + type: array + scopes: + description: |- + The OIDC scopes to be used in the + [Authentication Request](https://openid.net/specs/openid-connect-core-1_0.html#AuthRequest). + The "openid" scope is always added to the list of scopes if not already + specified. + items: + type: string + type: array + required: + - clientSecret + - provider + type: object + x-kubernetes-validations: + - message: only one of clientID or clientIDRef must be + set + rule: (has(self.clientID) && !has(self.clientIDRef)) + || (!has(self.clientID) && has(self.clientIDRef)) + required: + - oidc + type: object + projectID: + description: ProjectID is the GCP project ID. + minLength: 1 + type: string + serviceAccountImpersonation: + description: |- + ServiceAccountImpersonation is the service account impersonation configuration. + This is used to impersonate a service account when getting access token. + properties: + serviceAccountName: + description: ServiceAccountName is the name of the service + account to impersonate. + minLength: 1 + type: string + required: + - serviceAccountName + type: object + workloadIdentityPoolName: + description: |- + WorkloadIdentityPoolName is the name of the workload identity pool defined in GCP. + https://cloud.google.com/iam/docs/workload-identity-federation?hl=en + minLength: 1 + type: string + workloadIdentityProviderName: + description: WorkloadIdentityProviderName is the name of the + external identity provider as registered on Google Cloud + Platform. + minLength: 1 + type: string + required: + - oidcExchangeToken + - projectID + - workloadIdentityPoolName + - workloadIdentityProviderName + type: object + required: + - projectName + - region + type: object + x-kubernetes-validations: + - message: At most one of credentialsFile or workloadIdentityFederationConfig + may be specified + rule: '!(has(self.credentialsFile) && has(self.workloadIdentityFederationConfig))' + targetRefs: + description: |- + TargetRefs are the names of the AIServiceBackend or InferencePool resources this BackendSecurityPolicy is being attached to. + Attaching multiple BackendSecurityPolicies to the same resource is invalid and will result in an error + during the reconciliation of the resource. + items: + description: |- + LocalPolicyTargetReference identifies an API object to apply a direct or + inherited policy to. This should be used as part of Policy resources + that can target Gateway API resources. For more information on how this + policy attachment model works, and a sample Policy resource, refer to + the policy attachment documentation for Gateway API. + properties: + group: + description: Group is the group of the target resource. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + description: Kind is kind of the target resource. + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the target resource. + maxLength: 253 + minLength: 1 + type: string + required: + - group + - kind + - name + type: object + maxItems: 16 + type: array + x-kubernetes-validations: + - message: targetRefs must reference AIServiceBackend or InferencePool + resources + rule: self.all(ref, (ref.group == 'aigateway.envoyproxy.io' && ref.kind + == 'AIServiceBackend') || (ref.group == 'inference.networking.k8s.io' + && ref.kind == 'InferencePool')) + type: + description: Type specifies the type of the backend security policy. + enum: + - APIKey + - AWSCredentials + - AzureAPIKey + - AzureCredentials + - GCPCredentials + - AnthropicAPIKey + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: When type is APIKey, only apiKey field should be set + rule: 'self.type == ''APIKey'' ? (has(self.apiKey) && !has(self.awsCredentials) + && !has(self.azureAPIKey) && !has(self.azureCredentials) && !has(self.gcpCredentials) + && !has(self.anthropicAPIKey)) : true' + - message: When type is AWSCredentials, only awsCredentials field should + be set + rule: 'self.type == ''AWSCredentials'' ? (has(self.awsCredentials) && + !has(self.apiKey) && !has(self.azureAPIKey) && !has(self.azureCredentials) + && !has(self.gcpCredentials) && !has(self.anthropicAPIKey)) : true' + - message: When type is AzureAPIKey, only azureAPIKey field should be + set + rule: 'self.type == ''AzureAPIKey'' ? (has(self.azureAPIKey) && !has(self.apiKey) + && !has(self.awsCredentials) && !has(self.azureCredentials) && !has(self.gcpCredentials) + && !has(self.anthropicAPIKey)) : true' + - message: When type is AzureCredentials, only azureCredentials field + should be set + rule: 'self.type == ''AzureCredentials'' ? (has(self.azureCredentials) + && !has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureAPIKey) + && !has(self.gcpCredentials) && !has(self.anthropicAPIKey)) : true' + - message: When type is GCPCredentials, only gcpCredentials field should + be set + rule: 'self.type == ''GCPCredentials'' ? (has(self.gcpCredentials) && + !has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureAPIKey) + && !has(self.azureCredentials) && !has(self.anthropicAPIKey)) : true' + - message: When type is AnthropicAPIKey, only anthropicAPIKey field should + be set + rule: 'self.type == ''AnthropicAPIKey'' ? (has(self.anthropicAPIKey) + && !has(self.apiKey) && !has(self.awsCredentials) && !has(self.azureAPIKey) + && !has(self.azureCredentials) && !has(self.gcpCredentials)) : true' + status: + description: Status defines the status details of the BackendSecurityPolicy. + properties: + conditions: + description: |- + Conditions is the list of conditions by the reconciliation result. + Currently, at most one condition is set. + + Known .status.conditions.type are: "Accepted", "NotAccepted". + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_gatewayconfigs.yaml b/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_gatewayconfigs.yaml new file mode 100644 index 00000000..f1d4e3ac --- /dev/null +++ b/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_gatewayconfigs.yaml @@ -0,0 +1,1422 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.0 + name: gatewayconfigs.aigateway.envoyproxy.io +spec: + group: aigateway.envoyproxy.io + names: + kind: GatewayConfig + listKind: GatewayConfigList + plural: gatewayconfigs + shortNames: + - gwconfig + singular: gatewayconfig + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.conditions[-1:].type + name: Status + type: string + deprecated: true + deprecationWarning: aigateway.envoyproxy.io/v1alpha1 is deprecated; use aigateway.envoyproxy.io/v1beta1 + instead + name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + GatewayConfig provides configuration for the AI Gateway external processor + container that is deployed alongside the Gateway. + + A GatewayConfig is referenced by a Gateway via the annotation + "aigateway.envoyproxy.io/gateway-config". The GatewayConfig must be in the + same namespace as the Gateway that references it. + + This allows gateway-level configuration of the external processor, including + environment variables (e.g., for tracing configuration) and resource requirements. + + Multiple Gateways can reference the same GatewayConfig to share configuration. + + Environment Variable Precedence: + When merging environment variables, the following precedence applies (highest to lowest): + 1. GatewayConfig.Spec.ExtProc.Kubernetes.Env (this resource) + 2. Global controller flags (extProcExtraEnvVars) + + If the same environment variable name exists in both sources, the GatewayConfig + value takes precedence. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Spec defines the configuration for the external processor. + properties: + extProc: + description: ExtProc defines the configuration for the external processor + container. + properties: + kubernetes: + description: Kubernetes defines the configuration for running + the external processor as a Kubernetes container. + properties: + env: + description: List of environment variables to set in the container. + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's value. + Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap or + its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in + the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of + the exposed resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in the pod's + namespace + properties: + key: + description: The key of the secret to select + from. Must be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its + key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + image: + description: |- + Image specifies the EnvoyProxy container image to be used including a tag, instead of the default image. + This field is mutually exclusive with ImageRepository. + type: string + x-kubernetes-validations: + - message: Image must include a tag and allowed characters + only (e.g., 'repo:tag'). + rule: self.matches('^[a-zA-Z0-9._-]+(:[0-9]+)?(/[a-zA-Z0-9._/-]+)?(:[a-zA-Z0-9._-]+)?(@sha256:[a-z0-9]+)?$') + imageRepository: + description: |- + ImageRepository specifies the container image repository to be used without specifying a tag. + The default tag will be used. + This field is mutually exclusive with Image. + type: string + x-kubernetes-validations: + - message: ImageRepository must contain only allowed characters + and must not include a tag. + rule: self.matches('^[a-zA-Z0-9._-]+(:[0-9]+)?[a-zA-Z0-9._/-]+$') + resources: + description: |- + Resources required by this container. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This field depends on the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + securityContext: + description: |- + SecurityContext defines the security options the container should be run with. + If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. + More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default value is Default which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of + the GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + volumeMounts: + description: |- + VolumeMounts are volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting of a Volume + within a container. + properties: + mountPath: + description: |- + Path within the container at which the volume should be mounted. Must + not contain ':'. + type: string + mountPropagation: + description: |- + mountPropagation determines how mounts are propagated from the host + to container and the other way around. + When not set, MountPropagationNone is used. + This field is beta in 1.10. + When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified + (which defaults to None). + type: string + name: + description: This must match the Name of a Volume. + type: string + readOnly: + description: |- + Mounted read-only if true, read-write otherwise (false or unspecified). + Defaults to false. + type: boolean + recursiveReadOnly: + description: |- + RecursiveReadOnly specifies whether read-only mounts should be handled + recursively. + + If ReadOnly is false, this field has no meaning and must be unspecified. + + If ReadOnly is true, and this field is set to Disabled, the mount is not made + recursively read-only. If this field is set to IfPossible, the mount is made + recursively read-only, if it is supported by the container runtime. If this + field is set to Enabled, the mount is made recursively read-only if it is + supported by the container runtime, otherwise the pod will not be started and + an error will be generated to indicate the reason. + + If this field is set to IfPossible or Enabled, MountPropagation must be set to + None (or be unspecified, which defaults to None). + + If this field is not specified, it is treated as an equivalent of Disabled. + type: string + subPath: + description: |- + Path within the volume from which the container's volume should be mounted. + Defaults to "" (volume's root). + type: string + subPathExpr: + description: |- + Expanded path within the volume from which the container's volume should be mounted. + Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. + Defaults to "" (volume's root). + SubPathExpr and SubPath are mutually exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + type: object + x-kubernetes-validations: + - message: Either image or imageRepository can be set. + rule: '!has(self.image) || !has(self.imageRepository)' + type: object + globalLLMRequestCosts: + description: |- + GlobalLLMRequestCosts defines default LLM request costs that apply to all + routes referencing this GatewayConfig. These costs can be overridden on a + per-route basis via AIGatewayRoute.Spec.LLMRequestCosts. + + When a request matches a route, the cost calculation proceeds as follows: + 1. If the route defines LLMRequestCosts with a matching metadataKey, use that. + 2. Otherwise, fall back to the global cost with that metadataKey (if defined here). + 3. If neither exists, the cost is not calculated for that metadataKey. + + This allows you to define common cost formulas once at the gateway level + (e.g., billing_charges = input_tokens + output_tokens) and only override + them in specific routes when needed (e.g., premium routes with different pricing). + items: + description: LLMRequestCost configures each request cost. + properties: + cel: + description: "CEL is the CEL expression to calculate the cost + of the request.\nThe CEL expression must return a signed or + unsigned integer. If the\nreturn value is negative, it will + be error.\n\nThe expression can use the following variables:\n\n\t* + model: the model name extracted from the request content. + Type: string.\n\t* backend: the backend name in the form of + \"name.namespace\". Type: string.\n\t* input_tokens: the number + of input tokens. Type: unsigned integer.\n\t* cached_input_tokens: + the number of cached read input tokens. Type: unsigned integer.\n\t* + cache_creation_input_tokens: the number of cache creation + input tokens. Type: unsigned integer.\n\t* output_tokens: + the number of output tokens. Type: unsigned integer.\n\t* + total_tokens: the total number of tokens. Type: unsigned integer.\n\t* + reasoning_tokens: the number of reasoning tokens. Type: unsigned + integer.\n\nFor example, the following expressions are valid:\n\n\t* + \"model == 'llama' ? input_tokens + output_token * 0.5 : + total_tokens\"\n\t* \"backend == 'foo.default' ? input_tokens + + output_tokens : total_tokens\"\n\t* \"backend == 'bar.default' + ? (input_tokens - cached_input_tokens) + cached_input_tokens + * 0.1 + cache_creation_input_tokens * 1.25 + output_tokens + : total_tokens\"\n\t* \"input_tokens + output_tokens + total_tokens\"\n\t* + \"input_tokens * output_tokens\"" + type: string + metadataKey: + description: MetadataKey is the key of the metadata to store + this cost of the request. + type: string + type: + description: |- + Type specifies the type of the request cost. The default is "OutputToken", + and it uses "output token" as the cost. The other types are "InputToken", "TotalToken", + "CachedInputToken", "CacheCreationInputToken", "ReasoningToken", and "CEL". + enum: + - OutputToken + - InputToken + - CachedInputToken + - CacheCreationInputToken + - TotalToken + - ReasoningToken + - CEL + type: string + required: + - metadataKey + - type + type: object + type: array + x-kubernetes-list-map-keys: + - metadataKey + x-kubernetes-list-type: map + type: object + status: + description: Status defines the status of the GatewayConfig. + properties: + conditions: + description: Conditions describe the current conditions of the GatewayConfig. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + maxItems: 8 + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + type: object + type: object + served: true + storage: false + subresources: + status: {} + - additionalPrinterColumns: + - jsonPath: .status.conditions[-1:].type + name: Status + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + GatewayConfig provides configuration for the AI Gateway external processor + container that is deployed alongside the Gateway. + + A GatewayConfig is referenced by a Gateway via the annotation + "aigateway.envoyproxy.io/gateway-config". The GatewayConfig must be in the + same namespace as the Gateway that references it. + + This allows gateway-level configuration of the external processor, including + environment variables (e.g., for tracing configuration) and resource requirements. + + Multiple Gateways can reference the same GatewayConfig to share configuration. + + Environment Variable Precedence: + When merging environment variables, the following precedence applies (highest to lowest): + 1. GatewayConfig.Spec.ExtProc.Kubernetes.Env (this resource) + 2. Global controller flags (extProcExtraEnvVars) + + If the same environment variable name exists in both sources, the GatewayConfig + value takes precedence. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Spec defines the configuration for the external processor. + properties: + extProc: + description: ExtProc defines the configuration for the external processor + container. + properties: + kubernetes: + description: Kubernetes defines the configuration for running + the external processor as a Kubernetes container. + properties: + env: + description: List of environment variables to set in the container. + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's value. + Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap or + its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in + the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of + the exposed resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in the pod's + namespace + properties: + key: + description: The key of the secret to select + from. Must be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its + key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + image: + description: |- + Image specifies the EnvoyProxy container image to be used including a tag, instead of the default image. + This field is mutually exclusive with ImageRepository. + type: string + x-kubernetes-validations: + - message: Image must include a tag and allowed characters + only (e.g., 'repo:tag'). + rule: self.matches('^[a-zA-Z0-9._-]+(:[0-9]+)?(/[a-zA-Z0-9._/-]+)?(:[a-zA-Z0-9._-]+)?(@sha256:[a-z0-9]+)?$') + imageRepository: + description: |- + ImageRepository specifies the container image repository to be used without specifying a tag. + The default tag will be used. + This field is mutually exclusive with Image. + type: string + x-kubernetes-validations: + - message: ImageRepository must contain only allowed characters + and must not include a tag. + rule: self.matches('^[a-zA-Z0-9._-]+(:[0-9]+)?[a-zA-Z0-9._/-]+$') + resources: + description: |- + Resources required by this container. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This field depends on the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + securityContext: + description: |- + SecurityContext defines the security options the container should be run with. + If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. + More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default value is Default which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of + the GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + volumeMounts: + description: |- + VolumeMounts are volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting of a Volume + within a container. + properties: + mountPath: + description: |- + Path within the container at which the volume should be mounted. Must + not contain ':'. + type: string + mountPropagation: + description: |- + mountPropagation determines how mounts are propagated from the host + to container and the other way around. + When not set, MountPropagationNone is used. + This field is beta in 1.10. + When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified + (which defaults to None). + type: string + name: + description: This must match the Name of a Volume. + type: string + readOnly: + description: |- + Mounted read-only if true, read-write otherwise (false or unspecified). + Defaults to false. + type: boolean + recursiveReadOnly: + description: |- + RecursiveReadOnly specifies whether read-only mounts should be handled + recursively. + + If ReadOnly is false, this field has no meaning and must be unspecified. + + If ReadOnly is true, and this field is set to Disabled, the mount is not made + recursively read-only. If this field is set to IfPossible, the mount is made + recursively read-only, if it is supported by the container runtime. If this + field is set to Enabled, the mount is made recursively read-only if it is + supported by the container runtime, otherwise the pod will not be started and + an error will be generated to indicate the reason. + + If this field is set to IfPossible or Enabled, MountPropagation must be set to + None (or be unspecified, which defaults to None). + + If this field is not specified, it is treated as an equivalent of Disabled. + type: string + subPath: + description: |- + Path within the volume from which the container's volume should be mounted. + Defaults to "" (volume's root). + type: string + subPathExpr: + description: |- + Expanded path within the volume from which the container's volume should be mounted. + Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. + Defaults to "" (volume's root). + SubPathExpr and SubPath are mutually exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + type: object + x-kubernetes-validations: + - message: Either image or imageRepository can be set. + rule: '!has(self.image) || !has(self.imageRepository)' + type: object + globalLLMRequestCosts: + description: |- + GlobalLLMRequestCosts defines default LLM request costs that apply to all + routes referencing this GatewayConfig. These costs can be overridden on a + per-route basis via AIGatewayRoute.Spec.LLMRequestCosts. + + When a request matches a route, the cost calculation proceeds as follows: + 1. If the route defines LLMRequestCosts with a matching metadataKey, use that. + 2. Otherwise, fall back to the global cost with that metadataKey (if defined here). + 3. If neither exists, the cost is not calculated for that metadataKey. + + This allows you to define common cost formulas once at the gateway level + (e.g., billing_charges = input_tokens + output_tokens) and only override + them in specific routes when needed (e.g., premium routes with different pricing). + items: + description: LLMRequestCost configures each request cost. + properties: + cel: + description: "CEL is the CEL expression to calculate the cost + of the request.\nThe CEL expression must return a signed or + unsigned integer. If the\nreturn value is negative, it will + be error.\n\nThe expression can use the following variables:\n\n\t* + model: the model name extracted from the request content. + Type: string.\n\t* backend: the backend name in the form of + \"name.namespace\". Type: string.\n\t* input_tokens: the number + of input tokens. Type: unsigned integer.\n\t* cached_input_tokens: + the number of cached read input tokens. Type: unsigned integer.\n\t* + cache_creation_input_tokens: the number of cache creation + input tokens. Type: unsigned integer.\n\t* output_tokens: + the number of output tokens. Type: unsigned integer.\n\t* + total_tokens: the total number of tokens. Type: unsigned integer.\n\t* + reasoning_tokens: the number of reasoning tokens. Type: unsigned + integer.\n\nFor example, the following expressions are valid:\n\n\t* + \"model == 'llama' ? input_tokens + output_token * 0.5 : + total_tokens\"\n\t* \"backend == 'foo.default' ? input_tokens + + output_tokens : total_tokens\"\n\t* \"backend == 'bar.default' + ? (input_tokens - cached_input_tokens) + cached_input_tokens + * 0.1 + cache_creation_input_tokens * 1.25 + output_tokens + : total_tokens\"\n\t* \"input_tokens + output_tokens + total_tokens\"\n\t* + \"input_tokens * output_tokens\"" + type: string + metadataKey: + description: MetadataKey is the key of the metadata to store + this cost of the request. + type: string + type: + description: |- + Type specifies the type of the request cost. The default is "OutputToken", + and it uses "output token" as the cost. The other types are "InputToken", "TotalToken", + "CachedInputToken", "CacheCreationInputToken", "ReasoningToken", and "CEL". + enum: + - OutputToken + - InputToken + - CachedInputToken + - CacheCreationInputToken + - TotalToken + - ReasoningToken + - CEL + type: string + required: + - metadataKey + - type + type: object + type: array + x-kubernetes-list-map-keys: + - metadataKey + x-kubernetes-list-type: map + type: object + status: + description: Status defines the status of the GatewayConfig. + properties: + conditions: + description: Conditions describe the current conditions of the GatewayConfig. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + maxItems: 8 + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_mcproutes.yaml b/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_mcproutes.yaml new file mode 100644 index 00000000..81047027 --- /dev/null +++ b/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_mcproutes.yaml @@ -0,0 +1,9346 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.0 + name: mcproutes.aigateway.envoyproxy.io +spec: + group: aigateway.envoyproxy.io + names: + kind: MCPRoute + listKind: MCPRouteList + plural: mcproutes + singular: mcproute + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.conditions[-1:].type + name: Status + type: string + deprecated: true + deprecationWarning: aigateway.envoyproxy.io/v1alpha1 is deprecated; use aigateway.envoyproxy.io/v1beta1 + instead + name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + MCPRoute defines how to route MCP requests to the backend MCP servers. + + This serves as a way to define a "unified" AI API for a Gateway which allows downstream + clients to use a single schema API to interact with multiple MCP backends. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Spec defines the details of the MCPRoute. + properties: + backendRefs: + description: |- + BackendRefs is a list of backend references to the MCP servers. + These MCP servers will be aggregated and exposed as a single MCP endpoint to the clients. + From the client's perspective, they only need to configure a single MCP server URL, e.g. "https://api.example.com/mcp", + and the Envoy AI Gateway will route the requests to the appropriate MCP server based on the requests. + + All names must be unique within this list to avoid potential tools, resources, etc. name collisions. + Also, cross-namespace references are not supported. In other words, the backend MCP servers must be in the + same namespace as the MCPRoute. + items: + description: MCPRouteBackendRef wraps a EG's BackendObjectReference + to reference an MCP server. + properties: + forwardHeaders: + description: |- + ForwardHeaders specifies HTTP headers to extract from the incoming client request + and forward to this backend MCP server. + This enables per-user authentication passthrough (e.g., personal access tokens) + without requiring OAuth configuration. + Each entry specifies a header name to extract and an optional rename for the backend. + items: + description: MCPHeaderForward specifies a header to extract + from the incoming request and forward to a backend. + properties: + backendHeader: + description: |- + BackendHeader is the header name to use when forwarding to the backend. + If not specified, the original header name is used. + minLength: 1 + type: string + name: + description: Name is the header name to extract from the + incoming client request. + minLength: 1 + type: string + required: + - name + type: object + maxItems: 32 + type: array + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + path: + default: /mcp + description: |- + Path is the HTTP endpoint path of the backend MCP server. + If not specified, the default is "/mcp". + maxLength: 1024 + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + securityPolicy: + description: SecurityPolicy is the security policy to apply + to this MCP server. + properties: + apiKey: + description: APIKey is a mechanism to access a backend. + The API key will be injected into the request headers. + properties: + header: + description: |- + Header is the HTTP header to inject the API key into. If not specified, + defaults to "Authorization". + When the header is "Authorization", the injected header value will be + prefixed with "Bearer ". + + Either one of Header or QueryParam can be specified to inject the API key. + minLength: 1 + type: string + inline: + description: Inline contains the API key as an inline + string. + type: string + queryParam: + description: |- + QueryParam is the HTTP query parameter to inject the API key into. + For example, if QueryParam is set to "api_key", and the API key is "mysecretkey", the request URL will be modified to include + "?api_key=mysecretkey". + + Either one of Header or QueryParam can be specified to inject the API key. + minLength: 1 + type: string + secretRef: + description: |- + secretRef is the Kubernetes secret which contains the API keys. + The key of the secret should be "apiKey". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + type: object + x-kubernetes-validations: + - message: exactly one of secretRef or inline must be set + rule: (has(self.secretRef) && !has(self.inline)) || (!has(self.secretRef) + && has(self.inline)) + - message: only one of header or queryParam can be set + rule: '!(has(self.header) && has(self.queryParam))' + type: object + toolSelector: + description: |- + ToolSelector filters the tools exposed by this MCP server. + Supports exact matches and RE2-compatible regular expressions for both include and exclude patterns. + If not specified, all tools from the MCP server are exposed. + properties: + exclude: + description: |- + Exclude is a list of tool names to exclude. The specified tools will not be available. + Exclude rules take precedence over include rules. + items: + type: string + maxItems: 32 + type: array + excludeRegex: + description: |- + ExcludeRegex is a list of RE2-compatible regular expressions that, when matched, exclude the tool. + Tools matching these patterns will not be available. Exclude rules take precedence over include rules. + items: + type: string + maxItems: 32 + type: array + include: + description: Include is a list of tool names to include. + Only the specified tools will be available. + items: + type: string + maxItems: 32 + type: array + includeRegex: + description: |- + IncludeRegex is a list of RE2-compatible regular expressions that, when matched, include the tool. + Only tools matching these patterns will be available. + items: + type: string + maxItems: 32 + type: array + type: object + x-kubernetes-validations: + - message: include and includeRegex are mutually exclusive + rule: '!(has(self.include) && has(self.includeRegex))' + - message: exclude and excludeRegex are mutually exclusive + rule: '!(has(self.exclude) && has(self.excludeRegex))' + - message: at least one of include, includeRegex, exclude, or + excludeRegex must be specified + rule: has(self.include) || has(self.includeRegex) || has(self.exclude) + || has(self.excludeRegex) + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') ? has(self.port) + : true' + maxItems: 256 + minItems: 1 + type: array + x-kubernetes-validations: + - message: all backendRefs names must be unique + rule: self.all(i, self.exists_one(j, j.name == i.name)) + headers: + description: |- + Headers are HTTP headers that must match for this route to be selected. + Multiple match values are ANDed together, meaning, a request must match all the specified headers to select the route. + items: + description: |- + HTTPHeaderMatch describes how to select a HTTP route by matching HTTP request + headers. + properties: + name: + description: |- + Name is the name of the HTTP Header to be matched. Name matching MUST be + case-insensitive. (See https://tools.ietf.org/html/rfc7230#section-3.2). + + If multiple entries specify equivalent header names, only the first + entry with an equivalent name MUST be considered for a match. Subsequent + entries with an equivalent header name MUST be ignored. Due to the + case-insensitivity of header names, "foo" and "Foo" are considered + equivalent. + + When a header is repeated in an HTTP request, it is + implementation-specific behavior as to how this is represented. + Generally, proxies should follow the guidance from the RFC: + https://www.rfc-editor.org/rfc/rfc7230.html#section-3.2.2 regarding + processing a repeated header, with special handling for "Set-Cookie". + maxLength: 256 + minLength: 1 + pattern: ^[A-Za-z0-9!#$%&'*+\-.^_\x60|~]+$ + type: string + type: + default: Exact + description: |- + Type specifies how to match against the value of the header. + + Support: Core (Exact) + + Support: Implementation-specific (RegularExpression) + + Since RegularExpression HeaderMatchType has implementation-specific + conformance, implementations can support POSIX, PCRE or any other dialects + of regular expressions. Please read the implementation's documentation to + determine the supported dialect. + enum: + - Exact + - RegularExpression + type: string + value: + description: Value is the value of HTTP Header to be matched. + maxLength: 4096 + minLength: 1 + type: string + required: + - name + - value + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + parentRefs: + description: |- + ParentRefs are the names of the Gateway resources this MCPRoute is being attached to. + Cross namespace references are not supported. In other words, the Gateway resources must be in the + same namespace as the MCPRoute. Currently, each reference's Kind must be Gateway. + items: + description: |- + ParentReference identifies an API object (usually a Gateway) that can be considered + a parent of this resource (usually a route). There are two kinds of parent resources + with "Core" support: + + * Gateway (Gateway conformance profile) + * Service (Mesh conformance profile, ClusterIP Services only) + + This API may be extended in the future to support additional kinds of parent + resources. + + The API object must be valid in the cluster; the Group and Kind must + be registered in the cluster for this reference to be valid. + properties: + group: + default: gateway.networking.k8s.io + description: |- + Group is the group of the referent. + When unspecified, "gateway.networking.k8s.io" is inferred. + To set the core API group (such as for a "Service" kind referent), + Group must be explicitly set to "" (empty string). + + Support: Core + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Gateway + description: |- + Kind is kind of the referent. + + There are two kinds of parent resources with "Core" support: + + * Gateway (Gateway conformance profile) + * Service (Mesh conformance profile, ClusterIP Services only) + + Support for other resources is Implementation-Specific. + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: |- + Name is the name of the referent. + + Support: Core + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referent. When unspecified, this refers + to the local namespace of the Route. + + Note that there are specific rules for ParentRefs which cross namespace + boundaries. Cross-namespace references are only valid if they are explicitly + allowed by something in the namespace they are referring to. For example: + Gateway has the AllowedRoutes field, and ReferenceGrant provides a + generic way to enable any other kind of cross-namespace reference. + + + ParentRefs from a Route to a Service in the same namespace are "producer" + routes, which apply default routing rules to inbound connections from + any namespace to the Service. + + ParentRefs from a Route to a Service in a different namespace are + "consumer" routes, and these routing rules are only applied to outbound + connections originating from the same namespace as the Route, for which + the intended destination of the connections are a Service targeted as a + ParentRef of the Route. + + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port is the network port this Route targets. It can be interpreted + differently based on the type of parent resource. + + When the parent resource is a Gateway, this targets all listeners + listening on the specified port that also support this kind of Route(and + select this Route). It's not recommended to set `Port` unless the + networking behaviors specified in a Route must apply to a specific port + as opposed to a listener(s) whose port(s) may be changed. When both Port + and SectionName are specified, the name and port of the selected listener + must match both specified values. + + + When the parent resource is a Service, this targets a specific port in the + Service spec. When both Port (experimental) and SectionName are specified, + the name and port of the selected port must match both specified values. + + + Implementations MAY choose to support other parent resources. + Implementations supporting other types of parent resources MUST clearly + document how/if Port is interpreted. + + For the purpose of status, an attachment is considered successful as + long as the parent resource accepts it partially. For example, Gateway + listeners can restrict which Routes can attach to them by Route kind, + namespace, or hostname. If 1 of 2 Gateway listeners accept attachment + from the referencing Route, the Route MUST be considered successfully + attached. If no Gateway listeners accept attachment from this Route, + the Route MUST be considered detached from the Gateway. + + Support: Extended + format: int32 + maximum: 65535 + minimum: 1 + type: integer + sectionName: + description: |- + SectionName is the name of a section within the target resource. In the + following resources, SectionName is interpreted as the following: + + * Gateway: Listener name. When both Port (experimental) and SectionName + are specified, the name and port of the selected listener must match + both specified values. + * Service: Port name. When both Port (experimental) and SectionName + are specified, the name and port of the selected listener must match + both specified values. + + Implementations MAY choose to support attaching Routes to other resources. + If that is the case, they MUST clearly document how SectionName is + interpreted. + + When unspecified (empty string), this will reference the entire resource. + For the purpose of status, an attachment is considered successful if at + least one section in the parent resource accepts it. For example, Gateway + listeners can restrict which Routes can attach to them by Route kind, + namespace, or hostname. If 1 of 2 Gateway listeners accept attachment from + the referencing Route, the Route MUST be considered successfully + attached. If no Gateway listeners accept attachment from this Route, the + Route MUST be considered detached from the Gateway. + + Support: Core + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + required: + - name + type: object + maxItems: 16 + minItems: 1 + type: array + x-kubernetes-validations: + - message: only Gateway is supported + rule: self.all(match, match.kind == 'Gateway') + path: + default: /mcp + description: |- + Path is the HTTP endpoint path that serves MCP requests over the Streamable HTTP transport. + If not specified, the default is "/mcp". + maxLength: 1024 + type: string + securityPolicy: + description: SecurityPolicy defines the security policy for this MCPRoute. + properties: + apiKeyAuth: + description: APIKeyAuth defines the configuration for the API + Key Authentication. + properties: + credentialRefs: + description: |- + CredentialRefs is the Kubernetes secret which contains the API keys. + This is an Opaque secret. + Each API key is stored in the key representing the client id. + If the secrets have a key for a duplicated client, the first one will be used. + items: + description: |- + SecretObjectReference identifies an API object including its namespace, + defaulting to Secret. + + The API object must be valid in the cluster; the Group and Kind must + be registered in the cluster for this reference to be valid. + + References to objects with invalid Group and Kind are not valid, and must + be rejected by the implementation, with appropriate Conditions set + on the containing object. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + type: array + extractFrom: + description: |- + ExtractFrom is where to fetch the key from the coming request. + The value from the first source that has a key will be used. + items: + description: |- + ExtractFrom is where to fetch the key from the coming request. + Only one of header, param or cookie is supposed to be specified. + properties: + cookies: + description: |- + Cookies is the names of the cookie to fetch the key from. + If multiple cookies are specified, envoy will look for the api key in the order of the list. + This field is optional, but only one of headers, params or cookies is supposed to be specified. + items: + type: string + type: array + headers: + description: |- + Headers is the names of the header to fetch the key from. + If multiple headers are specified, envoy will look for the api key in the order of the list. + This field is optional, but only one of headers, params or cookies is supposed to be specified. + items: + type: string + type: array + params: + description: |- + Params is the names of the query parameter to fetch the key from. + If multiple params are specified, envoy will look for the api key in the order of the list. + This field is optional, but only one of headers, params or cookies is supposed to be specified. + items: + type: string + type: array + type: object + type: array + forwardClientIDHeader: + description: |- + ForwardClientIDHeader is the name of the header to forward the client identity to the backend + service. The header will be added to the request with the client id as the value. + type: string + sanitize: + description: Sanitize indicates whether to remove the API + key from the request before forwarding it to the backend + service. + type: boolean + required: + - credentialRefs + - extractFrom + type: object + authorization: + description: Authorization defines the configuration for the MCP + spec compatible authorization. + properties: + defaultAction: + default: Deny + description: DefaultAction is the action to take when no rules + match. If unspecified, defaults to Deny. + enum: + - Allow + - Deny + type: string + rules: + description: |- + Rules defines a list of authorization rules. + These rules are evaluated in order, the first matching rule will be applied, + and the rest will be skipped. + + If no rules are defined, the default action will be applied to all requests. + items: + description: |- + MCPRouteAuthorizationRule defines an authorization rule for MCPRoute based on the MCP authorization spec. + Reference: https://modelcontextprotocol.io/specification/draft/basic/authorization#scope-challenge-handling + properties: + action: + default: Allow + description: Action is the authorization decision for + matching requests. If unspecified, defaults to Allow. + enum: + - Allow + - Deny + type: string + cel: + description: "CEL specifies a Common Expression Language + (CEL) expression evaluated for this rule.\nThe expression + must return a boolean; evaluation errors or non-boolean + results\nare treated as \"no match\".\n\nExample CEL + expressions:\n\t* `request.method == \"POST\"`\n\t* + `request.headers[\"x-custom-header\"] == \"AllowedValue\"`\n\t* + `request.mcp.tool in [\"toolA\", \"toolB\"]`\n\nAvailable + attributes in the CEL expression:\n\n\t* request.method: + HTTP method such as GET or POST. Type: string.\n\t* + request.headers: map of headers with lowercased keys, + first value only. Type: map[string]string.\n\t* request.headers_all: + map of headers with lowercased keys, all values. Type: + map[string][]string.\n\t* request.path: request path + such as /mcp. Type: string.\n\t* request.auth.jwt.claims: + JWT claims when a bearer JWT is present. Type: map[string]any.\n\t* + request.auth.jwt.scopes: JWT scopes when a bearer + JWT is present. Type: []string.\n\t* request.mcp.method: + MCP method such as tools/list or tools/call. Type: + string.\n\t* request.mcp.backend: upstream backend + name (for example, \"kiwi\" or \"github\"). Type: + string.\n\t* request.mcp.tool: tool name without backend + prefix (for example, \"list_issues\"). Type: string.\n\t* + request.mcp.params: parameters of the MCP method, + including keys like \"_meta\" and \"arguments\". Type: + object.\n\nNote: The CEL expression support is experimental, + and the attributes\navailable to the expression may + change in future releases." + maxLength: 4096 + type: string + source: + description: |- + Source defines the authorization source for this rule. + If not specified, the rule will match all sources. + properties: + jwt: + description: JWT defines the JWT scopes required + for this rule to match. + properties: + claims: + description: |- + Claims defines the list of JWT claims required for the rule. Each claim must exist on the token + and have at least one of the expected values. Use to enforce tenant or subject-based access. + items: + description: JWTClaim specifies a claim in + a JWT token. + properties: + name: + description: |- + Name is the name of the claim. + If it is a nested claim, use a dot (.) separated string as the name to + represent the full path to the claim. + For example, if the claim is in the "department" field in the "organization" field, + the name should be "organization.department". + maxLength: 253 + minLength: 1 + type: string + valueType: + default: String + description: |- + ValueType is the type of the claim value. + Only String and StringArray types are supported for now. + enum: + - String + - StringArray + type: string + values: + description: |- + Values are the values that the claim must match. + If the claim is a string type, the specified value must match exactly. + If the claim is a string array type, the specified value must match one of the values in the array. + If multiple values are specified, one of the values must match for the rule to match. + items: + type: string + maxItems: 128 + minItems: 1 + type: array + required: + - name + - values + type: object + maxItems: 16 + type: array + x-kubernetes-validations: + - message: '''scope'' claim name is reserved + for OAuth scopes' + rule: '!self.exists(c, c.name == ''scope'')' + scopes: + description: |- + Scopes defines the list of JWT scopes required for the rule. + If multiple scopes are specified, all scopes must be present in the JWT for the rule to match. + items: + maxLength: 253 + minLength: 1 + type: string + maxItems: 16 + type: array + type: object + x-kubernetes-validations: + - message: either scopes or claims must be specified + rule: (has(self.scopes) && size(self.scopes) > + 0) || (has(self.claims) && size(self.claims) + > 0) + required: + - jwt + type: object + target: + description: |- + Target defines the authorization target for this rule. + If not specified, the rule will match all targets. + properties: + tools: + description: Tools defines the list of tools this + rule applies to. + items: + description: ToolCall represents a tool call in + the MCP authorization target. + properties: + backend: + description: Backend is the name of the backend + this tool belongs to. + type: string + tool: + description: Tool is the name of the tool. + type: string + required: + - backend + - tool + type: object + maxItems: 16 + minItems: 1 + type: array + required: + - tools + type: object + type: object + maxItems: 32 + type: array + type: object + extAuth: + description: ExtAuth defines the configuration for External Authorization. + properties: + bodyToExtAuth: + description: BodyToExtAuth defines the Body to Ext Auth configuration. + properties: + maxRequestBytes: + description: |- + MaxRequestBytes is the maximum size of a message body that the filter will hold in memory. + Envoy will return HTTP 413 and will not initiate the authorization process when buffer + reaches the number set in this field. + Note that this setting will have precedence over failOpen mode. + format: int32 + minimum: 1 + type: integer + required: + - maxRequestBytes + type: object + contextExtensions: + description: |- + ContextExtensions are analogous to http_request.headers, however these + contents will not be sent to the upstream server. This provides an + extension mechanism for sending additional information to the auth server + without modifying the proto definition. It maps to the internal opaque + context in the filter chain. + items: + description: |- + ContextExtension is analogous to http_request.headers, however these + contents will not be sent to the upstream server. This provides an + extension mechanism for sending additional information to the auth server + without modifying the proto definition. It maps to the internal opaque + context in the filter chain. + properties: + name: + description: Name of the context extension. + type: string + type: + default: Value + description: |- + Type is the type of method to use to read the ContextExtension value. + Valid values are Value and ValueRef, default is Value. + enum: + - Value + - ValueRef + type: string + value: + description: Value of the context extension. + type: string + valueRef: + description: ValueRef for the context extension's value. + properties: + group: + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + key: + description: The key to select. + type: string + kind: + description: Kind is kind of the referent. For example + "HTTPRoute" or "Service". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + required: + - group + - key + - kind + - name + type: object + x-kubernetes-validations: + - message: Only a reference to an object of kind ConfigMap + or Secret belonging to default v1 API group is supported. + rule: self.kind in ['ConfigMap', 'Secret'] && self.group + in ['', 'v1'] + required: + - name + - type + type: object + x-kubernetes-validations: + - message: Exactly one of value or valueRef must be set + with correct type. + rule: (self.type == 'Value' && has(self.value) && !has(self.valueRef)) + || (self.type == 'ValueRef' && !has(self.value) && has(self.valueRef)) + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + failOpen: + default: false + description: |- + FailOpen is a switch used to control the behavior when a response from the External Authorization service cannot be obtained. + If FailOpen is set to true, the system allows the traffic to pass through. + Otherwise, if it is set to false or not set (defaulting to false), + the system blocks the traffic and returns a HTTP 5xx error, reflecting a fail-closed approach. + This setting determines whether to prioritize accessibility over strict security in case of authorization service failure. + + If set to true, the External Authorization will also be bypassed if its configuration is invalid. + type: boolean + grpc: + description: |- + GRPC defines the gRPC External Authorization service. + Either GRPCService or HTTPService must be specified, + and only one of them can be provided. + properties: + backendRef: + description: |- + BackendRef references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + + Deprecated: Use BackendRefs instead. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') + ? has(self.port) : true' + backendRefs: + description: |- + BackendRefs references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + items: + description: BackendRef defines how an ObjectReference + that is specific to BackendRef. + properties: + fallback: + description: |- + Fallback indicates whether the backend is designated as a fallback. + Multiple fallback backends can be configured. + It is highly recommended to configure active or passive health checks to ensure that failover can be detected + when the active backends become unhealthy and to automatically readjust once the primary backends are healthy again. + The overprovisioning factor is set to 1.4, meaning the fallback backends will only start receiving traffic when + the health of the active backends falls below 72%. + type: boolean + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + weight: + default: 1 + description: |- + Weight specifies the proportion of requests forwarded to the referenced + backend. This is computed as weight/(sum of all weights in this + BackendRefs list). For non-zero values, there may be some epsilon from + the exact proportion defined here depending on the precision an + implementation supports. Weight is not a percentage and the sum of + weights does not need to equal 100. + + If only one backend is specified and it has a weight greater than 0, 100% + of the traffic is forwarded to that backend. If weight is set to 0, no + traffic should be forwarded for this entry. If unspecified, weight + defaults to 1. + + Support for this field varies based on the context where used. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') + ? has(self.port) : true' + maxItems: 16 + type: array + backendSettings: + description: |- + BackendSettings holds configuration for managing the connection + to the backend. + properties: + circuitBreaker: + description: |- + Circuit Breaker settings for the upstream connections and requests. + If not set, circuit breakers will be enabled with the default thresholds + properties: + maxConnections: + default: 1024 + description: The maximum number of connections + that Envoy will establish to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRequests: + default: 1024 + description: The maximum number of parallel requests + that Envoy will make to the referenced backend + defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRetries: + default: 1024 + description: The maximum number of parallel retries + that Envoy will make to the referenced backend + defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxPendingRequests: + default: 1024 + description: The maximum number of pending requests + that Envoy will queue to the referenced backend + defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxRequestsPerConnection: + description: |- + The maximum number of requests that Envoy will make over a single connection to the referenced backend defined within a xRoute rule. + Default: unlimited. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + perEndpoint: + description: PerEndpoint defines Circuit Breakers + that will apply per-endpoint for an upstream + cluster + properties: + maxConnections: + default: 1024 + description: MaxConnections configures the + maximum number of connections that Envoy + will establish per-endpoint to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + type: object + type: object + connection: + description: Connection includes backend connection + settings. + properties: + bufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + BufferLimit Soft limit on size of the cluster’s connections read and write buffers. + BufferLimit applies to connection streaming (maybe non-streaming) channel between processes, it's in user space. + If unspecified, an implementation defined default is applied (32768 bytes). + For example, 20Mi, 1Gi, 256Ki etc. + Note: that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + preconnect: + description: |- + Preconnect configures proactive upstream connections to reduce latency by establishing + connections before they’re needed and avoiding connection establishment overhead. + + If unset, Envoy will fetch connections as needed to serve in-flight requests. + properties: + perEndpointPercent: + description: |- + PerEndpointPercent configures how many additional connections to maintain per + upstream endpoint, useful for high-QPS or latency sensitive services. Expressed as a + percentage of the connections required by active streams + (e.g. 100 = preconnect disabled, 105 = 1.05x connections per-endpoint, 200 = 2.00×). + + Allowed value range is between 100-300. When both PerEndpointPercent and + PredictivePercent are set, Envoy ensures both are satisfied (max of the two). + format: int32 + maximum: 300 + minimum: 100 + type: integer + predictivePercent: + description: |- + PredictivePercent configures how many additional connections to maintain + across the cluster by anticipating which upstream endpoint the load balancer + will select next, useful for low-QPS services. Relies on deterministic + loadbalancing and is only supported with Random or RoundRobin. + Expressed as a percentage of the connections required by active streams + (e.g. 100 = 1.0 (no preconnect), 105 = 1.05× connections across the cluster, 200 = 2.00×). + + Minimum allowed value is 100. When both PerEndpointPercent and PredictivePercent are + set Envoy ensures both are satisfied per host (max of the two). + format: int32 + minimum: 100 + type: integer + type: object + socketBufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + SocketBufferLimit provides configuration for the maximum buffer size in bytes for each socket + to backend. + SocketBufferLimit applies to socket streaming channel between TCP/IP stacks, it's in kernel space. + For example, 20Mi, 1Gi, 256Ki etc. + Note that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + type: object + dns: + description: DNS includes dns resolution settings. + properties: + dnsRefreshRate: + description: |- + DNSRefreshRate specifies the rate at which DNS records should be refreshed. + Defaults to 30 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + lookupFamily: + description: |- + LookupFamily determines how Envoy would resolve DNS for Routes where the backend is specified as a fully qualified domain name (FQDN). + If set, this configuration overrides other defaults. + enum: + - IPv4 + - IPv6 + - IPv4Preferred + - IPv6Preferred + - IPv4AndIPv6 + type: string + respectDnsTtl: + description: |- + RespectDNSTTL indicates whether the DNS Time-To-Live (TTL) should be respected. + If the value is set to true, the DNS refresh rate will be set to the resource record’s TTL. + Defaults to true. + type: boolean + type: object + healthCheck: + description: HealthCheck allows gateway to perform + active health checking on backends. + properties: + active: + description: Active health check configuration + properties: + grpc: + description: |- + GRPC defines the configuration of the GRPC health checker. + It's optional, and can only be used if the specified type is GRPC. + properties: + service: + description: |- + Service to send in the health check request. + If this is not specified, then the health check request applies to the entire + server and not to a specific service. + type: string + type: object + healthyThreshold: + default: 1 + description: HealthyThreshold defines the + number of healthy health checks required + before a backend host is marked healthy. + format: int32 + minimum: 1 + type: integer + http: + description: |- + HTTP defines the configuration of http health checker. + It's required while the health checker type is HTTP. + properties: + expectedResponse: + description: ExpectedResponse defines + a list of HTTP expected responses to + match. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the type + of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, text + field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? has(self.binary) + : !has(self.binary)' + expectedStatuses: + description: |- + ExpectedStatuses defines a list of HTTP response statuses considered healthy. + Defaults to 200 only + items: + description: HTTPStatus defines the + http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + hostname: + description: |- + Hostname defines the HTTP host that will be requested during health checking. + Default: HTTPRoute or GRPCRoute hostname. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + method: + description: |- + Method defines the HTTP method used for health checking. + Defaults to GET + type: string + path: + description: Path defines the HTTP path + that will be requested during health + checking. + maxLength: 1024 + minLength: 1 + type: string + required: + - path + type: object + initialJitter: + description: |- + InitialJitter defines the maximum time Envoy will wait before the first health check. + Envoy will randomly select a value between 0 and the initial jitter value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + default: 3s + description: Interval defines the time between + active health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + tcp: + description: |- + TCP defines the configuration of tcp health checker. + It's required while the health checker type is TCP. + properties: + receive: + description: Receive defines the expected + response payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the type + of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, text + field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? has(self.binary) + : !has(self.binary)' + send: + description: Send defines the request + payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the type + of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, text + field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? has(self.binary) + : !has(self.binary)' + type: object + timeout: + default: 1s + description: Timeout defines the time to wait + for a health check response. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: + allOf: + - enum: + - HTTP + - TCP + - GRPC + - enum: + - HTTP + - TCP + - GRPC + description: Type defines the type of health + checker. + type: string + unhealthyThreshold: + default: 3 + description: UnhealthyThreshold defines the + number of unhealthy health checks required + before a backend host is marked unhealthy. + format: int32 + minimum: 1 + type: integer + required: + - type + type: object + x-kubernetes-validations: + - message: If Health Checker type is HTTP, http + field needs to be set. + rule: 'self.type == ''HTTP'' ? has(self.http) + : !has(self.http)' + - message: If Health Checker type is TCP, tcp + field needs to be set. + rule: 'self.type == ''TCP'' ? has(self.tcp) + : !has(self.tcp)' + - message: The grpc field can only be set if the + Health Checker type is GRPC. + rule: 'has(self.grpc) ? self.type == ''GRPC'' + : true' + panicThreshold: + description: |- + When number of unhealthy endpoints for a backend reaches this threshold + Envoy will disregard health status and balance across all endpoints. + It's designed to prevent a situation in which host failures cascade throughout the cluster + as load increases. If not set, the default value is 50%. To disable panic mode, set value to `0`. + format: int32 + maximum: 100 + minimum: 0 + type: integer + passive: + description: Passive passive check configuration + properties: + baseEjectionTime: + default: 30s + description: BaseEjectionTime defines the + base duration for which a host will be ejected + on consecutive failures. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + consecutive5XxErrors: + default: 5 + description: Consecutive5xxErrors sets the + number of consecutive 5xx errors triggering + ejection. + format: int32 + type: integer + consecutiveGatewayErrors: + description: ConsecutiveGatewayErrors sets + the number of consecutive gateway errors + triggering ejection. + format: int32 + type: integer + consecutiveLocalOriginFailures: + default: 5 + description: |- + ConsecutiveLocalOriginFailures sets the number of consecutive local origin failures triggering ejection. + Parameter takes effect only when split_external_local_origin_errors is set to true. + format: int32 + type: integer + failurePercentageThreshold: + description: |- + FailurePercentageThreshold sets the failure percentage threshold for outlier detection. + If the failure percentage of a given host is greater than or equal to this value, it will be ejected. + Defaults to 85. + format: int32 + maximum: 100 + minimum: 0 + type: integer + interval: + default: 3s + description: Interval defines the time between + passive health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxEjectionPercent: + default: 10 + description: MaxEjectionPercent sets the maximum + percentage of hosts in a cluster that can + be ejected. + format: int32 + type: integer + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors + enables splitting of errors between external + and local origin. + type: boolean + type: object + type: object + http2: + description: HTTP2 provides HTTP/2 configuration for + backend connections. + properties: + initialConnectionWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialConnectionWindowSize sets the initial window size for HTTP/2 connections. + If not set, the default value is 1 MiB. + x-kubernetes-int-or-string: true + initialStreamWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialStreamWindowSize sets the initial window size for HTTP/2 streams. + If not set, the default value is 64 KiB(64*1024). + x-kubernetes-int-or-string: true + maxConcurrentStreams: + description: |- + MaxConcurrentStreams sets the maximum number of concurrent streams allowed per connection. + If not set, the default value is 100. + format: int32 + maximum: 2147483647 + minimum: 1 + type: integer + onInvalidMessage: + description: |- + OnInvalidMessage determines if Envoy will terminate the connection or just the offending stream in the event of HTTP messaging error + It's recommended for L2 Envoy deployments to set this value to TerminateStream. + https://www.envoyproxy.io/docs/envoy/latest/configuration/best_practices/level_two + Default: TerminateConnection + type: string + type: object + loadBalancer: + description: |- + LoadBalancer policy to apply when routing traffic from the gateway to + the backend endpoints. Defaults to `LeastRequest`. + properties: + consistentHash: + description: |- + ConsistentHash defines the configuration when the load balancer type is + set to ConsistentHash + properties: + cookie: + description: Cookie configures the cookie + hash policy when the consistent hash type + is set to Cookie. + properties: + attributes: + additionalProperties: + type: string + description: Additional Attributes to + set for the generated cookie. + type: object + name: + description: |- + Name of the cookie to hash. + If this cookie does not exist in the request, Envoy will generate a cookie and set + the TTL on the response back to the client based on Layer 4 + attributes of the backend endpoint, to ensure that these future requests + go to the same backend endpoint. Make sure to set the TTL field for this case. + type: string + ttl: + description: |- + TTL of the generated cookie if the cookie is not present. This value sets the + Max-Age attribute value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - name + type: object + header: + description: |- + Header configures the header hash policy when the consistent hash type is set to Header. + + Deprecated: use Headers instead + properties: + name: + description: Name of the header to hash. + type: string + required: + - name + type: object + headers: + description: Headers configures the header + hash policy for each header, when the consistent + hash type is set to Headers. + items: + description: |- + Header defines the header hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the header to hash. + type: string + required: + - name + type: object + type: array + queryParams: + description: QueryParams configures the query + parameter hash policy when the consistent + hash type is set to QueryParams. + items: + description: |- + QueryParam defines the query parameter name hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the query param + to hash. + type: string + required: + - name + type: object + type: array + tableSize: + default: 65537 + description: The table size for consistent + hashing, must be prime number limited to + 5000011. + format: int64 + maximum: 5000011 + minimum: 2 + type: integer + type: + description: |- + ConsistentHashType defines the type of input to hash on. Valid Type values are + "SourceIP", + "Header", + "Headers", + "Cookie". + "QueryParams". + enum: + - SourceIP + - Header + - Headers + - Cookie + - QueryParams + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If consistent hash type is header, + the header field must be set. + rule: 'self.type == ''Header'' ? has(self.header) + : !has(self.header)' + - message: If consistent hash type is headers, + the headers field must be set. + rule: 'self.type == ''Headers'' ? has(self.headers) + : !has(self.headers)' + - message: If consistent hash type is cookie, + the cookie field must be set. + rule: 'self.type == ''Cookie'' ? has(self.cookie) + : !has(self.cookie)' + - message: If consistent hash type is queryParams, + the queryParams field must be set. + rule: 'self.type == ''QueryParams'' ? has(self.queryParams) + : !has(self.queryParams)' + endpointOverride: + description: |- + EndpointOverride defines the configuration for endpoint override. + When specified, the load balancer will attempt to route requests to endpoints + based on the override information extracted from request headers or metadata. + If the override endpoints are not available, the configured load balancer policy will be used as fallback. + properties: + extractFrom: + description: ExtractFrom defines the sources + to extract endpoint override information + from. + items: + description: EndpointOverrideExtractFrom + defines a source to extract endpoint override + information from. + properties: + header: + description: |- + Header defines the header to get the override endpoint addresses. + The header value must specify at least one endpoint in `IP:Port` format or multiple endpoints in `IP:Port,IP:Port,...` format. + For example `10.0.0.5:8080` or `[2600:4040:5204::1574:24ae]:80`. + The IPv6 address is enclosed in square brackets. + type: string + type: object + maxItems: 10 + minItems: 1 + type: array + required: + - extractFrom + type: object + slowStart: + description: |- + SlowStart defines the configuration related to the slow start load balancer policy. + If set, during slow start window, traffic sent to the newly added hosts will gradually increase. + Currently this is only supported for RoundRobin and LeastRequest load balancers + properties: + window: + description: |- + Window defines the duration of the warm up period for newly added host. + During slow start window, traffic sent to the newly added hosts will gradually increase. + Currently only supports linear growth of traffic. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto#config-cluster-v3-cluster-slowstartconfig + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - window + type: object + type: + description: |- + Type decides the type of Load Balancer policy. + Valid LoadBalancerType values are + "ConsistentHash", + "LeastRequest", + "Random", + "RoundRobin". + enum: + - ConsistentHash + - LeastRequest + - Random + - RoundRobin + type: string + zoneAware: + description: ZoneAware defines the configuration + related to the distribution of requests between + locality zones. + properties: + preferLocal: + description: PreferLocalZone configures zone-aware + routing to prefer sending traffic to the + local locality zone. + properties: + force: + description: |- + ForceLocalZone defines override configuration for forcing all traffic to stay within the local zone instead of the default behavior + which maintains equal distribution among upstream endpoints while sending as much traffic as possible locally. + properties: + minEndpointsInZoneThreshold: + description: |- + MinEndpointsInZoneThreshold is the minimum number of upstream endpoints in the local zone required to honor the forceLocalZone + override. This is useful for protecting zones with fewer endpoints. + format: int32 + type: integer + type: object + minEndpointsThreshold: + description: MinEndpointsThreshold is + the minimum number of total upstream + endpoints across all zones required + to enable zone-aware routing. + format: int64 + type: integer + percentageEnabled: + description: Configures percentage of + requests that will be considered for + zone aware routing if zone aware routing + is configured. If not specified, Envoy + defaults to 100%. + format: int32 + maximum: 100 + minimum: 0 + type: integer + type: object + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: If LoadBalancer type is consistentHash, + consistentHash field needs to be set. + rule: 'self.type == ''ConsistentHash'' ? has(self.consistentHash) + : !has(self.consistentHash)' + - message: Currently SlowStart is only supported for + RoundRobin and LeastRequest load balancers. + rule: 'self.type in [''Random'', ''ConsistentHash''] + ? !has(self.slowStart) : true ' + - message: Currently ZoneAware is only supported for + LeastRequest, Random, and RoundRobin load balancers. + rule: 'self.type == ''ConsistentHash'' ? !has(self.zoneAware) + : true ' + proxyProtocol: + description: ProxyProtocol enables the Proxy Protocol + when communicating with the backend. + properties: + version: + description: |- + Version of ProxyProtol + Valid ProxyProtocolVersion values are + "V1" + "V2" + enum: + - V1 + - V2 + type: string + required: + - version + type: object + retry: + description: |- + Retry provides more advanced usage, allowing users to customize the number of retries, retry fallback strategy, and retry triggering conditions. + If not set, retry will be disabled. + properties: + numAttemptsPerPriority: + description: |- + NumAttemptsPerPriority defines the number of requests (initial attempt + retries) + that should be sent to the same priority before switching to a different one. + If not specified or set to 0, all requests are sent to the highest priority that is healthy. + format: int32 + type: integer + numRetries: + default: 2 + description: NumRetries is the number of retries + to be attempted. Defaults to 2. + format: int32 + minimum: 0 + type: integer + perRetry: + description: PerRetry is the retry policy to be + applied per retry attempt. + properties: + backOff: + description: |- + Backoff is the backoff policy to be applied per retry attempt. gateway uses a fully jittered exponential + back-off algorithm for retries. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/router_filter#config-http-filters-router-x-envoy-max-retries + properties: + baseInterval: + description: BaseInterval is the base + interval between retries. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxInterval: + description: |- + MaxInterval is the maximum interval between retries. This parameter is optional, but must be greater than or equal to the base_interval if set. + The default is 10 times the base_interval + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + timeout: + description: Timeout is the timeout per retry + attempt. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + retryOn: + description: |- + RetryOn specifies the retry trigger condition. + + If not specified, the default is to retry on connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes(503). + properties: + httpStatusCodes: + description: |- + HttpStatusCodes specifies the http status codes to be retried. + The retriable-status-codes trigger must also be configured for these status codes to trigger a retry. + items: + description: HTTPStatus defines the http + status code. + maximum: 599 + minimum: 100 + type: integer + type: array + triggers: + description: Triggers specifies the retry + trigger condition(Http/Grpc). + items: + description: TriggerEnum specifies the conditions + that trigger retries. + enum: + - 5xx + - gateway-error + - reset + - reset-before-request + - connect-failure + - retriable-4xx + - refused-stream + - retriable-status-codes + - cancelled + - deadline-exceeded + - internal + - resource-exhausted + - unavailable + type: string + type: array + type: object + type: object + tcpKeepalive: + description: |- + TcpKeepalive settings associated with the upstream client connection. + Disabled by default. + properties: + idleTime: + description: |- + The duration a connection needs to be idle before keep-alive + probes start being sent. + The duration format is + Defaults to `7200s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + description: |- + The duration between keep-alive probes. + Defaults to `75s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + probes: + description: |- + The total number of unacknowledged probes to send before deciding + the connection is dead. + Defaults to 9. + format: int32 + type: integer + type: object + timeout: + description: Timeout settings for the backend connections. + properties: + http: + description: Timeout settings for HTTP. + properties: + connectionIdleTimeout: + description: |- + The idle timeout for an HTTP connection. Idle time is defined as a period in which there are no active requests in the connection. + Default: 1 hour. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxConnectionDuration: + description: |- + The maximum duration of an HTTP connection. + Default: unlimited. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxStreamDuration: + description: |- + MaxStreamDuration is the maximum duration for a stream to complete. This timeout measures the time + from when the request is sent until the response stream is fully consumed and does not apply to + non-streaming requests. + When set to "0s", no max duration is applied and streams can run indefinitely. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + requestTimeout: + description: RequestTimeout is the time until + which entire response is received from the + upstream. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + tcp: + description: Timeout settings for TCP. + properties: + connectTimeout: + description: |- + The timeout for network connection establishment, including TCP and TLS handshakes. + Default: 10 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + type: object + type: object + x-kubernetes-validations: + - message: predictivePercent in preconnect policy only + works with RoundRobin or Random load balancers + rule: '!((has(self.connection) && has(self.connection.preconnect) + && has(self.connection.preconnect.predictivePercent)) + && !(has(self.loadBalancer) && has(self.loadBalancer.type) + && self.loadBalancer.type in [''Random'', ''RoundRobin'']))' + type: object + x-kubernetes-validations: + - message: backendRef or backendRefs needs to be set + rule: has(self.backendRef) || self.backendRefs.size() > + 0 + - message: BackendRefs only supports Service, ServiceImport, + and Backend kind. + rule: 'has(self.backendRefs) ? self.backendRefs.all(f, f.kind + == ''Service'' || f.kind == ''ServiceImport'' || f.kind + == ''Backend'') : true' + - message: BackendRefs only supports Core, multicluster.x-k8s.io, + and gateway.envoyproxy.io groups. + rule: 'has(self.backendRefs) ? (self.backendRefs.all(f, + f.group == "" || f.group == ''multicluster.x-k8s.io'' + || f.group == ''gateway.envoyproxy.io'')) : true' + headersToExtAuth: + description: |- + HeadersToExtAuth defines the client request headers that will be included + in the request to the external authorization service. + Note: If not specified, the default behavior for gRPC and HTTP external + authorization services is different due to backward compatibility reasons. + All headers will be included in the check request to a gRPC authorization server. + Only the following headers will be included in the check request to an HTTP + authorization server: Host, Method, Path, Content-Length, and Authorization. + And these headers will always be included to the check request to an HTTP + authorization server by default, no matter whether they are specified + in HeadersToExtAuth or not. + items: + type: string + type: array + http: + description: |- + HTTP defines the HTTP External Authorization service. + Either GRPCService or HTTPService must be specified, + and only one of them can be provided. + properties: + backendRef: + description: |- + BackendRef references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + + Deprecated: Use BackendRefs instead. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') + ? has(self.port) : true' + backendRefs: + description: |- + BackendRefs references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + items: + description: BackendRef defines how an ObjectReference + that is specific to BackendRef. + properties: + fallback: + description: |- + Fallback indicates whether the backend is designated as a fallback. + Multiple fallback backends can be configured. + It is highly recommended to configure active or passive health checks to ensure that failover can be detected + when the active backends become unhealthy and to automatically readjust once the primary backends are healthy again. + The overprovisioning factor is set to 1.4, meaning the fallback backends will only start receiving traffic when + the health of the active backends falls below 72%. + type: boolean + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + weight: + default: 1 + description: |- + Weight specifies the proportion of requests forwarded to the referenced + backend. This is computed as weight/(sum of all weights in this + BackendRefs list). For non-zero values, there may be some epsilon from + the exact proportion defined here depending on the precision an + implementation supports. Weight is not a percentage and the sum of + weights does not need to equal 100. + + If only one backend is specified and it has a weight greater than 0, 100% + of the traffic is forwarded to that backend. If weight is set to 0, no + traffic should be forwarded for this entry. If unspecified, weight + defaults to 1. + + Support for this field varies based on the context where used. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') + ? has(self.port) : true' + maxItems: 16 + type: array + backendSettings: + description: |- + BackendSettings holds configuration for managing the connection + to the backend. + properties: + circuitBreaker: + description: |- + Circuit Breaker settings for the upstream connections and requests. + If not set, circuit breakers will be enabled with the default thresholds + properties: + maxConnections: + default: 1024 + description: The maximum number of connections + that Envoy will establish to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRequests: + default: 1024 + description: The maximum number of parallel requests + that Envoy will make to the referenced backend + defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRetries: + default: 1024 + description: The maximum number of parallel retries + that Envoy will make to the referenced backend + defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxPendingRequests: + default: 1024 + description: The maximum number of pending requests + that Envoy will queue to the referenced backend + defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxRequestsPerConnection: + description: |- + The maximum number of requests that Envoy will make over a single connection to the referenced backend defined within a xRoute rule. + Default: unlimited. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + perEndpoint: + description: PerEndpoint defines Circuit Breakers + that will apply per-endpoint for an upstream + cluster + properties: + maxConnections: + default: 1024 + description: MaxConnections configures the + maximum number of connections that Envoy + will establish per-endpoint to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + type: object + type: object + connection: + description: Connection includes backend connection + settings. + properties: + bufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + BufferLimit Soft limit on size of the cluster’s connections read and write buffers. + BufferLimit applies to connection streaming (maybe non-streaming) channel between processes, it's in user space. + If unspecified, an implementation defined default is applied (32768 bytes). + For example, 20Mi, 1Gi, 256Ki etc. + Note: that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + preconnect: + description: |- + Preconnect configures proactive upstream connections to reduce latency by establishing + connections before they’re needed and avoiding connection establishment overhead. + + If unset, Envoy will fetch connections as needed to serve in-flight requests. + properties: + perEndpointPercent: + description: |- + PerEndpointPercent configures how many additional connections to maintain per + upstream endpoint, useful for high-QPS or latency sensitive services. Expressed as a + percentage of the connections required by active streams + (e.g. 100 = preconnect disabled, 105 = 1.05x connections per-endpoint, 200 = 2.00×). + + Allowed value range is between 100-300. When both PerEndpointPercent and + PredictivePercent are set, Envoy ensures both are satisfied (max of the two). + format: int32 + maximum: 300 + minimum: 100 + type: integer + predictivePercent: + description: |- + PredictivePercent configures how many additional connections to maintain + across the cluster by anticipating which upstream endpoint the load balancer + will select next, useful for low-QPS services. Relies on deterministic + loadbalancing and is only supported with Random or RoundRobin. + Expressed as a percentage of the connections required by active streams + (e.g. 100 = 1.0 (no preconnect), 105 = 1.05× connections across the cluster, 200 = 2.00×). + + Minimum allowed value is 100. When both PerEndpointPercent and PredictivePercent are + set Envoy ensures both are satisfied per host (max of the two). + format: int32 + minimum: 100 + type: integer + type: object + socketBufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + SocketBufferLimit provides configuration for the maximum buffer size in bytes for each socket + to backend. + SocketBufferLimit applies to socket streaming channel between TCP/IP stacks, it's in kernel space. + For example, 20Mi, 1Gi, 256Ki etc. + Note that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + type: object + dns: + description: DNS includes dns resolution settings. + properties: + dnsRefreshRate: + description: |- + DNSRefreshRate specifies the rate at which DNS records should be refreshed. + Defaults to 30 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + lookupFamily: + description: |- + LookupFamily determines how Envoy would resolve DNS for Routes where the backend is specified as a fully qualified domain name (FQDN). + If set, this configuration overrides other defaults. + enum: + - IPv4 + - IPv6 + - IPv4Preferred + - IPv6Preferred + - IPv4AndIPv6 + type: string + respectDnsTtl: + description: |- + RespectDNSTTL indicates whether the DNS Time-To-Live (TTL) should be respected. + If the value is set to true, the DNS refresh rate will be set to the resource record’s TTL. + Defaults to true. + type: boolean + type: object + healthCheck: + description: HealthCheck allows gateway to perform + active health checking on backends. + properties: + active: + description: Active health check configuration + properties: + grpc: + description: |- + GRPC defines the configuration of the GRPC health checker. + It's optional, and can only be used if the specified type is GRPC. + properties: + service: + description: |- + Service to send in the health check request. + If this is not specified, then the health check request applies to the entire + server and not to a specific service. + type: string + type: object + healthyThreshold: + default: 1 + description: HealthyThreshold defines the + number of healthy health checks required + before a backend host is marked healthy. + format: int32 + minimum: 1 + type: integer + http: + description: |- + HTTP defines the configuration of http health checker. + It's required while the health checker type is HTTP. + properties: + expectedResponse: + description: ExpectedResponse defines + a list of HTTP expected responses to + match. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the type + of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, text + field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? has(self.binary) + : !has(self.binary)' + expectedStatuses: + description: |- + ExpectedStatuses defines a list of HTTP response statuses considered healthy. + Defaults to 200 only + items: + description: HTTPStatus defines the + http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + hostname: + description: |- + Hostname defines the HTTP host that will be requested during health checking. + Default: HTTPRoute or GRPCRoute hostname. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + method: + description: |- + Method defines the HTTP method used for health checking. + Defaults to GET + type: string + path: + description: Path defines the HTTP path + that will be requested during health + checking. + maxLength: 1024 + minLength: 1 + type: string + required: + - path + type: object + initialJitter: + description: |- + InitialJitter defines the maximum time Envoy will wait before the first health check. + Envoy will randomly select a value between 0 and the initial jitter value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + default: 3s + description: Interval defines the time between + active health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + tcp: + description: |- + TCP defines the configuration of tcp health checker. + It's required while the health checker type is TCP. + properties: + receive: + description: Receive defines the expected + response payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the type + of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, text + field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? has(self.binary) + : !has(self.binary)' + send: + description: Send defines the request + payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the type + of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, text + field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? has(self.binary) + : !has(self.binary)' + type: object + timeout: + default: 1s + description: Timeout defines the time to wait + for a health check response. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: + allOf: + - enum: + - HTTP + - TCP + - GRPC + - enum: + - HTTP + - TCP + - GRPC + description: Type defines the type of health + checker. + type: string + unhealthyThreshold: + default: 3 + description: UnhealthyThreshold defines the + number of unhealthy health checks required + before a backend host is marked unhealthy. + format: int32 + minimum: 1 + type: integer + required: + - type + type: object + x-kubernetes-validations: + - message: If Health Checker type is HTTP, http + field needs to be set. + rule: 'self.type == ''HTTP'' ? has(self.http) + : !has(self.http)' + - message: If Health Checker type is TCP, tcp + field needs to be set. + rule: 'self.type == ''TCP'' ? has(self.tcp) + : !has(self.tcp)' + - message: The grpc field can only be set if the + Health Checker type is GRPC. + rule: 'has(self.grpc) ? self.type == ''GRPC'' + : true' + panicThreshold: + description: |- + When number of unhealthy endpoints for a backend reaches this threshold + Envoy will disregard health status and balance across all endpoints. + It's designed to prevent a situation in which host failures cascade throughout the cluster + as load increases. If not set, the default value is 50%. To disable panic mode, set value to `0`. + format: int32 + maximum: 100 + minimum: 0 + type: integer + passive: + description: Passive passive check configuration + properties: + baseEjectionTime: + default: 30s + description: BaseEjectionTime defines the + base duration for which a host will be ejected + on consecutive failures. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + consecutive5XxErrors: + default: 5 + description: Consecutive5xxErrors sets the + number of consecutive 5xx errors triggering + ejection. + format: int32 + type: integer + consecutiveGatewayErrors: + description: ConsecutiveGatewayErrors sets + the number of consecutive gateway errors + triggering ejection. + format: int32 + type: integer + consecutiveLocalOriginFailures: + default: 5 + description: |- + ConsecutiveLocalOriginFailures sets the number of consecutive local origin failures triggering ejection. + Parameter takes effect only when split_external_local_origin_errors is set to true. + format: int32 + type: integer + failurePercentageThreshold: + description: |- + FailurePercentageThreshold sets the failure percentage threshold for outlier detection. + If the failure percentage of a given host is greater than or equal to this value, it will be ejected. + Defaults to 85. + format: int32 + maximum: 100 + minimum: 0 + type: integer + interval: + default: 3s + description: Interval defines the time between + passive health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxEjectionPercent: + default: 10 + description: MaxEjectionPercent sets the maximum + percentage of hosts in a cluster that can + be ejected. + format: int32 + type: integer + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors + enables splitting of errors between external + and local origin. + type: boolean + type: object + type: object + http2: + description: HTTP2 provides HTTP/2 configuration for + backend connections. + properties: + initialConnectionWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialConnectionWindowSize sets the initial window size for HTTP/2 connections. + If not set, the default value is 1 MiB. + x-kubernetes-int-or-string: true + initialStreamWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialStreamWindowSize sets the initial window size for HTTP/2 streams. + If not set, the default value is 64 KiB(64*1024). + x-kubernetes-int-or-string: true + maxConcurrentStreams: + description: |- + MaxConcurrentStreams sets the maximum number of concurrent streams allowed per connection. + If not set, the default value is 100. + format: int32 + maximum: 2147483647 + minimum: 1 + type: integer + onInvalidMessage: + description: |- + OnInvalidMessage determines if Envoy will terminate the connection or just the offending stream in the event of HTTP messaging error + It's recommended for L2 Envoy deployments to set this value to TerminateStream. + https://www.envoyproxy.io/docs/envoy/latest/configuration/best_practices/level_two + Default: TerminateConnection + type: string + type: object + loadBalancer: + description: |- + LoadBalancer policy to apply when routing traffic from the gateway to + the backend endpoints. Defaults to `LeastRequest`. + properties: + consistentHash: + description: |- + ConsistentHash defines the configuration when the load balancer type is + set to ConsistentHash + properties: + cookie: + description: Cookie configures the cookie + hash policy when the consistent hash type + is set to Cookie. + properties: + attributes: + additionalProperties: + type: string + description: Additional Attributes to + set for the generated cookie. + type: object + name: + description: |- + Name of the cookie to hash. + If this cookie does not exist in the request, Envoy will generate a cookie and set + the TTL on the response back to the client based on Layer 4 + attributes of the backend endpoint, to ensure that these future requests + go to the same backend endpoint. Make sure to set the TTL field for this case. + type: string + ttl: + description: |- + TTL of the generated cookie if the cookie is not present. This value sets the + Max-Age attribute value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - name + type: object + header: + description: |- + Header configures the header hash policy when the consistent hash type is set to Header. + + Deprecated: use Headers instead + properties: + name: + description: Name of the header to hash. + type: string + required: + - name + type: object + headers: + description: Headers configures the header + hash policy for each header, when the consistent + hash type is set to Headers. + items: + description: |- + Header defines the header hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the header to hash. + type: string + required: + - name + type: object + type: array + queryParams: + description: QueryParams configures the query + parameter hash policy when the consistent + hash type is set to QueryParams. + items: + description: |- + QueryParam defines the query parameter name hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the query param + to hash. + type: string + required: + - name + type: object + type: array + tableSize: + default: 65537 + description: The table size for consistent + hashing, must be prime number limited to + 5000011. + format: int64 + maximum: 5000011 + minimum: 2 + type: integer + type: + description: |- + ConsistentHashType defines the type of input to hash on. Valid Type values are + "SourceIP", + "Header", + "Headers", + "Cookie". + "QueryParams". + enum: + - SourceIP + - Header + - Headers + - Cookie + - QueryParams + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If consistent hash type is header, + the header field must be set. + rule: 'self.type == ''Header'' ? has(self.header) + : !has(self.header)' + - message: If consistent hash type is headers, + the headers field must be set. + rule: 'self.type == ''Headers'' ? has(self.headers) + : !has(self.headers)' + - message: If consistent hash type is cookie, + the cookie field must be set. + rule: 'self.type == ''Cookie'' ? has(self.cookie) + : !has(self.cookie)' + - message: If consistent hash type is queryParams, + the queryParams field must be set. + rule: 'self.type == ''QueryParams'' ? has(self.queryParams) + : !has(self.queryParams)' + endpointOverride: + description: |- + EndpointOverride defines the configuration for endpoint override. + When specified, the load balancer will attempt to route requests to endpoints + based on the override information extracted from request headers or metadata. + If the override endpoints are not available, the configured load balancer policy will be used as fallback. + properties: + extractFrom: + description: ExtractFrom defines the sources + to extract endpoint override information + from. + items: + description: EndpointOverrideExtractFrom + defines a source to extract endpoint override + information from. + properties: + header: + description: |- + Header defines the header to get the override endpoint addresses. + The header value must specify at least one endpoint in `IP:Port` format or multiple endpoints in `IP:Port,IP:Port,...` format. + For example `10.0.0.5:8080` or `[2600:4040:5204::1574:24ae]:80`. + The IPv6 address is enclosed in square brackets. + type: string + type: object + maxItems: 10 + minItems: 1 + type: array + required: + - extractFrom + type: object + slowStart: + description: |- + SlowStart defines the configuration related to the slow start load balancer policy. + If set, during slow start window, traffic sent to the newly added hosts will gradually increase. + Currently this is only supported for RoundRobin and LeastRequest load balancers + properties: + window: + description: |- + Window defines the duration of the warm up period for newly added host. + During slow start window, traffic sent to the newly added hosts will gradually increase. + Currently only supports linear growth of traffic. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto#config-cluster-v3-cluster-slowstartconfig + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - window + type: object + type: + description: |- + Type decides the type of Load Balancer policy. + Valid LoadBalancerType values are + "ConsistentHash", + "LeastRequest", + "Random", + "RoundRobin". + enum: + - ConsistentHash + - LeastRequest + - Random + - RoundRobin + type: string + zoneAware: + description: ZoneAware defines the configuration + related to the distribution of requests between + locality zones. + properties: + preferLocal: + description: PreferLocalZone configures zone-aware + routing to prefer sending traffic to the + local locality zone. + properties: + force: + description: |- + ForceLocalZone defines override configuration for forcing all traffic to stay within the local zone instead of the default behavior + which maintains equal distribution among upstream endpoints while sending as much traffic as possible locally. + properties: + minEndpointsInZoneThreshold: + description: |- + MinEndpointsInZoneThreshold is the minimum number of upstream endpoints in the local zone required to honor the forceLocalZone + override. This is useful for protecting zones with fewer endpoints. + format: int32 + type: integer + type: object + minEndpointsThreshold: + description: MinEndpointsThreshold is + the minimum number of total upstream + endpoints across all zones required + to enable zone-aware routing. + format: int64 + type: integer + percentageEnabled: + description: Configures percentage of + requests that will be considered for + zone aware routing if zone aware routing + is configured. If not specified, Envoy + defaults to 100%. + format: int32 + maximum: 100 + minimum: 0 + type: integer + type: object + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: If LoadBalancer type is consistentHash, + consistentHash field needs to be set. + rule: 'self.type == ''ConsistentHash'' ? has(self.consistentHash) + : !has(self.consistentHash)' + - message: Currently SlowStart is only supported for + RoundRobin and LeastRequest load balancers. + rule: 'self.type in [''Random'', ''ConsistentHash''] + ? !has(self.slowStart) : true ' + - message: Currently ZoneAware is only supported for + LeastRequest, Random, and RoundRobin load balancers. + rule: 'self.type == ''ConsistentHash'' ? !has(self.zoneAware) + : true ' + proxyProtocol: + description: ProxyProtocol enables the Proxy Protocol + when communicating with the backend. + properties: + version: + description: |- + Version of ProxyProtol + Valid ProxyProtocolVersion values are + "V1" + "V2" + enum: + - V1 + - V2 + type: string + required: + - version + type: object + retry: + description: |- + Retry provides more advanced usage, allowing users to customize the number of retries, retry fallback strategy, and retry triggering conditions. + If not set, retry will be disabled. + properties: + numAttemptsPerPriority: + description: |- + NumAttemptsPerPriority defines the number of requests (initial attempt + retries) + that should be sent to the same priority before switching to a different one. + If not specified or set to 0, all requests are sent to the highest priority that is healthy. + format: int32 + type: integer + numRetries: + default: 2 + description: NumRetries is the number of retries + to be attempted. Defaults to 2. + format: int32 + minimum: 0 + type: integer + perRetry: + description: PerRetry is the retry policy to be + applied per retry attempt. + properties: + backOff: + description: |- + Backoff is the backoff policy to be applied per retry attempt. gateway uses a fully jittered exponential + back-off algorithm for retries. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/router_filter#config-http-filters-router-x-envoy-max-retries + properties: + baseInterval: + description: BaseInterval is the base + interval between retries. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxInterval: + description: |- + MaxInterval is the maximum interval between retries. This parameter is optional, but must be greater than or equal to the base_interval if set. + The default is 10 times the base_interval + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + timeout: + description: Timeout is the timeout per retry + attempt. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + retryOn: + description: |- + RetryOn specifies the retry trigger condition. + + If not specified, the default is to retry on connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes(503). + properties: + httpStatusCodes: + description: |- + HttpStatusCodes specifies the http status codes to be retried. + The retriable-status-codes trigger must also be configured for these status codes to trigger a retry. + items: + description: HTTPStatus defines the http + status code. + maximum: 599 + minimum: 100 + type: integer + type: array + triggers: + description: Triggers specifies the retry + trigger condition(Http/Grpc). + items: + description: TriggerEnum specifies the conditions + that trigger retries. + enum: + - 5xx + - gateway-error + - reset + - reset-before-request + - connect-failure + - retriable-4xx + - refused-stream + - retriable-status-codes + - cancelled + - deadline-exceeded + - internal + - resource-exhausted + - unavailable + type: string + type: array + type: object + type: object + tcpKeepalive: + description: |- + TcpKeepalive settings associated with the upstream client connection. + Disabled by default. + properties: + idleTime: + description: |- + The duration a connection needs to be idle before keep-alive + probes start being sent. + The duration format is + Defaults to `7200s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + description: |- + The duration between keep-alive probes. + Defaults to `75s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + probes: + description: |- + The total number of unacknowledged probes to send before deciding + the connection is dead. + Defaults to 9. + format: int32 + type: integer + type: object + timeout: + description: Timeout settings for the backend connections. + properties: + http: + description: Timeout settings for HTTP. + properties: + connectionIdleTimeout: + description: |- + The idle timeout for an HTTP connection. Idle time is defined as a period in which there are no active requests in the connection. + Default: 1 hour. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxConnectionDuration: + description: |- + The maximum duration of an HTTP connection. + Default: unlimited. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxStreamDuration: + description: |- + MaxStreamDuration is the maximum duration for a stream to complete. This timeout measures the time + from when the request is sent until the response stream is fully consumed and does not apply to + non-streaming requests. + When set to "0s", no max duration is applied and streams can run indefinitely. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + requestTimeout: + description: RequestTimeout is the time until + which entire response is received from the + upstream. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + tcp: + description: Timeout settings for TCP. + properties: + connectTimeout: + description: |- + The timeout for network connection establishment, including TCP and TLS handshakes. + Default: 10 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + type: object + type: object + x-kubernetes-validations: + - message: predictivePercent in preconnect policy only + works with RoundRobin or Random load balancers + rule: '!((has(self.connection) && has(self.connection.preconnect) + && has(self.connection.preconnect.predictivePercent)) + && !(has(self.loadBalancer) && has(self.loadBalancer.type) + && self.loadBalancer.type in [''Random'', ''RoundRobin'']))' + headersToBackend: + description: |- + HeadersToBackend are the authorization response headers that will be added + to the original client request before sending it to the backend server. + Note that coexisting headers will be overridden. + If not specified, no authorization response headers will be added to the + original client request. + items: + type: string + type: array + path: + description: |- + Path is the path of the HTTP External Authorization service. + If path is specified, the authorization request will be sent to that path, + or else the authorization request will use the path of the original request. + + Please note that the original request path will be appended to the path specified here. + For example, if the original request path is "/hello", and the path specified here is "/auth", + then the path of the authorization request will be "/auth/hello". If the path is not specified, + the path of the authorization request will be "/hello". + type: string + type: object + x-kubernetes-validations: + - message: backendRef or backendRefs needs to be set + rule: has(self.backendRef) || self.backendRefs.size() > + 0 + - message: BackendRefs only supports Service, ServiceImport, + and Backend kind. + rule: 'has(self.backendRefs) ? self.backendRefs.all(f, f.kind + == ''Service'' || f.kind == ''ServiceImport'' || f.kind + == ''Backend'') : true' + - message: BackendRefs only supports Core, multicluster.x-k8s.io, + and gateway.envoyproxy.io groups. + rule: 'has(self.backendRefs) ? (self.backendRefs.all(f, + f.group == "" || f.group == ''multicluster.x-k8s.io'' + || f.group == ''gateway.envoyproxy.io'')) : true' + recomputeRoute: + description: |- + RecomputeRoute clears the route cache and recalculates the routing decision. + This field must be enabled if the headers added or modified by the ExtAuth are used for + route matching decisions. If the recomputation selects a new route, features targeting + the new matched route will be applied. + type: boolean + timeout: + description: |- + Timeout defines the timeout for requests to the external authorization service. + If not specified, defaults to 10 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + x-kubernetes-validations: + - message: one of grpc or http must be specified + rule: (has(self.grpc) || has(self.http)) + - message: only one of grpc or http can be specified + rule: (has(self.grpc) && !has(self.http)) || (!has(self.grpc) + && has(self.http)) + oauth: + description: OAuth defines the configuration for the MCP spec + compatible OAuth authentication. + properties: + audiences: + description: |- + Audiences is a list of JWT audiences allowed access. + It is recommended to set this field for token audience validation, as it is a security best practice to prevent token misuse. + Reference: https://modelcontextprotocol.io/specification/2025-06-18/basic/authorization#token-audience-binding-and-validation + items: + type: string + maxItems: 32 + type: array + claimToHeaders: + description: |- + ClaimToHeaders specifies JWT claims to extract and forward as HTTP headers to backend MCP servers. + This enables backends to access user identity for authorization, auditing, or personalization. + + Security considerations: + - Any client-provided headers matching the configured header names will be stripped to prevent forgery + - Only the specified claims are extracted; the full JWT is not forwarded to backends + - Consider using a header prefix (e.g., "X-Jwt-Claim-") to avoid conflicts with other headers + items: + description: ClaimToHeader defines a configuration to convert + JWT claims into HTTP headers + properties: + claim: + description: |- + Claim is the JWT Claim that should be saved into the header : it can be a nested claim of type + (eg. "claim.nested.key", "sub"). The nested claim name must use dot "." + to separate the JSON name path. + type: string + header: + description: Header defines the name of the HTTP request + header that the JWT Claim will be saved into. + type: string + required: + - claim + - header + type: object + maxItems: 16 + type: array + issuer: + description: Issuer is the authorization server's issuer identity. + format: uri + type: string + jwks: + description: |- + JWKS defines how a JSON Web Key Sets (JWKS) can be obtained to verify the access tokens presented by the clients. + + If not specified, the JWKS URI will be discovered from the OAuth 2.0 Authorization Server Metadata + as per RFC 8414 by querying the `/.well-known/oauth-authorization-server` endpoint on the Issuer. + properties: + localJWKS: + description: LocalJWKS defines how to get the JSON Web + Key Sets (JWKS) from a local source. + properties: + inline: + description: Inline contains the value as an inline + string. + type: string + type: + default: Inline + description: |- + Type is the type of method to use to read the body value. + Valid values are Inline and ValueRef, default is Inline. + enum: + - Inline + - ValueRef + type: string + valueRef: + description: |- + ValueRef is a reference to a local ConfigMap that contains the JSON Web Key Sets (JWKS). + + The value of key `jwks` in the ConfigMap will be used. + If the key is not found, the first value in the ConfigMap will be used. + properties: + group: + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + description: Kind is kind of the referent. For + example "HTTPRoute" or "Service". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + required: + - group + - kind + - name + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: Exactly one of inline or valueRef must be set + with correct type. + rule: (self.type == 'Inline' && has(self.inline) && + !has(self.valueRef)) || (self.type == 'ValueRef' && + !has(self.inline) && has(self.valueRef)) + remoteJWKS: + description: |- + RemoteJWKS defines how to fetch and cache JSON Web Key Sets (JWKS) from a remote + HTTP/HTTPS endpoint. + properties: + backendRef: + description: |- + BackendRef references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + + Deprecated: Use BackendRefs instead. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') + ? has(self.port) : true' + backendRefs: + description: |- + BackendRefs references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + items: + description: BackendRef defines how an ObjectReference + that is specific to BackendRef. + properties: + fallback: + description: |- + Fallback indicates whether the backend is designated as a fallback. + Multiple fallback backends can be configured. + It is highly recommended to configure active or passive health checks to ensure that failover can be detected + when the active backends become unhealthy and to automatically readjust once the primary backends are healthy again. + The overprovisioning factor is set to 1.4, meaning the fallback backends will only start receiving traffic when + the health of the active backends falls below 72%. + type: boolean + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + weight: + default: 1 + description: |- + Weight specifies the proportion of requests forwarded to the referenced + backend. This is computed as weight/(sum of all weights in this + BackendRefs list). For non-zero values, there may be some epsilon from + the exact proportion defined here depending on the precision an + implementation supports. Weight is not a percentage and the sum of + weights does not need to equal 100. + + If only one backend is specified and it has a weight greater than 0, 100% + of the traffic is forwarded to that backend. If weight is set to 0, no + traffic should be forwarded for this entry. If unspecified, weight + defaults to 1. + + Support for this field varies based on the context where used. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == + ''Service'') ? has(self.port) : true' + maxItems: 16 + type: array + backendSettings: + description: |- + BackendSettings holds configuration for managing the connection + to the backend. + properties: + circuitBreaker: + description: |- + Circuit Breaker settings for the upstream connections and requests. + If not set, circuit breakers will be enabled with the default thresholds + properties: + maxConnections: + default: 1024 + description: The maximum number of connections + that Envoy will establish to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRequests: + default: 1024 + description: The maximum number of parallel + requests that Envoy will make to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRetries: + default: 1024 + description: The maximum number of parallel + retries that Envoy will make to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxPendingRequests: + default: 1024 + description: The maximum number of pending + requests that Envoy will queue to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxRequestsPerConnection: + description: |- + The maximum number of requests that Envoy will make over a single connection to the referenced backend defined within a xRoute rule. + Default: unlimited. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + perEndpoint: + description: PerEndpoint defines Circuit Breakers + that will apply per-endpoint for an upstream + cluster + properties: + maxConnections: + default: 1024 + description: MaxConnections configures + the maximum number of connections that + Envoy will establish per-endpoint to + the referenced backend defined within + a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + type: object + type: object + connection: + description: Connection includes backend connection + settings. + properties: + bufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + BufferLimit Soft limit on size of the cluster’s connections read and write buffers. + BufferLimit applies to connection streaming (maybe non-streaming) channel between processes, it's in user space. + If unspecified, an implementation defined default is applied (32768 bytes). + For example, 20Mi, 1Gi, 256Ki etc. + Note: that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + preconnect: + description: |- + Preconnect configures proactive upstream connections to reduce latency by establishing + connections before they’re needed and avoiding connection establishment overhead. + + If unset, Envoy will fetch connections as needed to serve in-flight requests. + properties: + perEndpointPercent: + description: |- + PerEndpointPercent configures how many additional connections to maintain per + upstream endpoint, useful for high-QPS or latency sensitive services. Expressed as a + percentage of the connections required by active streams + (e.g. 100 = preconnect disabled, 105 = 1.05x connections per-endpoint, 200 = 2.00×). + + Allowed value range is between 100-300. When both PerEndpointPercent and + PredictivePercent are set, Envoy ensures both are satisfied (max of the two). + format: int32 + maximum: 300 + minimum: 100 + type: integer + predictivePercent: + description: |- + PredictivePercent configures how many additional connections to maintain + across the cluster by anticipating which upstream endpoint the load balancer + will select next, useful for low-QPS services. Relies on deterministic + loadbalancing and is only supported with Random or RoundRobin. + Expressed as a percentage of the connections required by active streams + (e.g. 100 = 1.0 (no preconnect), 105 = 1.05× connections across the cluster, 200 = 2.00×). + + Minimum allowed value is 100. When both PerEndpointPercent and PredictivePercent are + set Envoy ensures both are satisfied per host (max of the two). + format: int32 + minimum: 100 + type: integer + type: object + socketBufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + SocketBufferLimit provides configuration for the maximum buffer size in bytes for each socket + to backend. + SocketBufferLimit applies to socket streaming channel between TCP/IP stacks, it's in kernel space. + For example, 20Mi, 1Gi, 256Ki etc. + Note that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + type: object + dns: + description: DNS includes dns resolution settings. + properties: + dnsRefreshRate: + description: |- + DNSRefreshRate specifies the rate at which DNS records should be refreshed. + Defaults to 30 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + lookupFamily: + description: |- + LookupFamily determines how Envoy would resolve DNS for Routes where the backend is specified as a fully qualified domain name (FQDN). + If set, this configuration overrides other defaults. + enum: + - IPv4 + - IPv6 + - IPv4Preferred + - IPv6Preferred + - IPv4AndIPv6 + type: string + respectDnsTtl: + description: |- + RespectDNSTTL indicates whether the DNS Time-To-Live (TTL) should be respected. + If the value is set to true, the DNS refresh rate will be set to the resource record’s TTL. + Defaults to true. + type: boolean + type: object + healthCheck: + description: HealthCheck allows gateway to perform + active health checking on backends. + properties: + active: + description: Active health check configuration + properties: + grpc: + description: |- + GRPC defines the configuration of the GRPC health checker. + It's optional, and can only be used if the specified type is GRPC. + properties: + service: + description: |- + Service to send in the health check request. + If this is not specified, then the health check request applies to the entire + server and not to a specific service. + type: string + type: object + healthyThreshold: + default: 1 + description: HealthyThreshold defines + the number of healthy health checks + required before a backend host is marked + healthy. + format: int32 + minimum: 1 + type: integer + http: + description: |- + HTTP defines the configuration of http health checker. + It's required while the health checker type is HTTP. + properties: + expectedResponse: + description: ExpectedResponse defines + a list of HTTP expected responses + to match. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + expectedStatuses: + description: |- + ExpectedStatuses defines a list of HTTP response statuses considered healthy. + Defaults to 200 only + items: + description: HTTPStatus defines + the http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + hostname: + description: |- + Hostname defines the HTTP host that will be requested during health checking. + Default: HTTPRoute or GRPCRoute hostname. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + method: + description: |- + Method defines the HTTP method used for health checking. + Defaults to GET + type: string + path: + description: Path defines the HTTP + path that will be requested during + health checking. + maxLength: 1024 + minLength: 1 + type: string + required: + - path + type: object + initialJitter: + description: |- + InitialJitter defines the maximum time Envoy will wait before the first health check. + Envoy will randomly select a value between 0 and the initial jitter value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + default: 3s + description: Interval defines the time + between active health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + tcp: + description: |- + TCP defines the configuration of tcp health checker. + It's required while the health checker type is TCP. + properties: + receive: + description: Receive defines the expected + response payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + send: + description: Send defines the request + payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + type: object + timeout: + default: 1s + description: Timeout defines the time + to wait for a health check response. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: + allOf: + - enum: + - HTTP + - TCP + - GRPC + - enum: + - HTTP + - TCP + - GRPC + description: Type defines the type of + health checker. + type: string + unhealthyThreshold: + default: 3 + description: UnhealthyThreshold defines + the number of unhealthy health checks + required before a backend host is marked + unhealthy. + format: int32 + minimum: 1 + type: integer + required: + - type + type: object + x-kubernetes-validations: + - message: If Health Checker type is HTTP, + http field needs to be set. + rule: 'self.type == ''HTTP'' ? has(self.http) + : !has(self.http)' + - message: If Health Checker type is TCP, + tcp field needs to be set. + rule: 'self.type == ''TCP'' ? has(self.tcp) + : !has(self.tcp)' + - message: The grpc field can only be set + if the Health Checker type is GRPC. + rule: 'has(self.grpc) ? self.type == ''GRPC'' + : true' + panicThreshold: + description: |- + When number of unhealthy endpoints for a backend reaches this threshold + Envoy will disregard health status and balance across all endpoints. + It's designed to prevent a situation in which host failures cascade throughout the cluster + as load increases. If not set, the default value is 50%. To disable panic mode, set value to `0`. + format: int32 + maximum: 100 + minimum: 0 + type: integer + passive: + description: Passive passive check configuration + properties: + baseEjectionTime: + default: 30s + description: BaseEjectionTime defines + the base duration for which a host will + be ejected on consecutive failures. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + consecutive5XxErrors: + default: 5 + description: Consecutive5xxErrors sets + the number of consecutive 5xx errors + triggering ejection. + format: int32 + type: integer + consecutiveGatewayErrors: + description: ConsecutiveGatewayErrors + sets the number of consecutive gateway + errors triggering ejection. + format: int32 + type: integer + consecutiveLocalOriginFailures: + default: 5 + description: |- + ConsecutiveLocalOriginFailures sets the number of consecutive local origin failures triggering ejection. + Parameter takes effect only when split_external_local_origin_errors is set to true. + format: int32 + type: integer + failurePercentageThreshold: + description: |- + FailurePercentageThreshold sets the failure percentage threshold for outlier detection. + If the failure percentage of a given host is greater than or equal to this value, it will be ejected. + Defaults to 85. + format: int32 + maximum: 100 + minimum: 0 + type: integer + interval: + default: 3s + description: Interval defines the time + between passive health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxEjectionPercent: + default: 10 + description: MaxEjectionPercent sets the + maximum percentage of hosts in a cluster + that can be ejected. + format: int32 + type: integer + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors + enables splitting of errors between + external and local origin. + type: boolean + type: object + type: object + http2: + description: HTTP2 provides HTTP/2 configuration + for backend connections. + properties: + initialConnectionWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialConnectionWindowSize sets the initial window size for HTTP/2 connections. + If not set, the default value is 1 MiB. + x-kubernetes-int-or-string: true + initialStreamWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialStreamWindowSize sets the initial window size for HTTP/2 streams. + If not set, the default value is 64 KiB(64*1024). + x-kubernetes-int-or-string: true + maxConcurrentStreams: + description: |- + MaxConcurrentStreams sets the maximum number of concurrent streams allowed per connection. + If not set, the default value is 100. + format: int32 + maximum: 2147483647 + minimum: 1 + type: integer + onInvalidMessage: + description: |- + OnInvalidMessage determines if Envoy will terminate the connection or just the offending stream in the event of HTTP messaging error + It's recommended for L2 Envoy deployments to set this value to TerminateStream. + https://www.envoyproxy.io/docs/envoy/latest/configuration/best_practices/level_two + Default: TerminateConnection + type: string + type: object + loadBalancer: + description: |- + LoadBalancer policy to apply when routing traffic from the gateway to + the backend endpoints. Defaults to `LeastRequest`. + properties: + consistentHash: + description: |- + ConsistentHash defines the configuration when the load balancer type is + set to ConsistentHash + properties: + cookie: + description: Cookie configures the cookie + hash policy when the consistent hash + type is set to Cookie. + properties: + attributes: + additionalProperties: + type: string + description: Additional Attributes + to set for the generated cookie. + type: object + name: + description: |- + Name of the cookie to hash. + If this cookie does not exist in the request, Envoy will generate a cookie and set + the TTL on the response back to the client based on Layer 4 + attributes of the backend endpoint, to ensure that these future requests + go to the same backend endpoint. Make sure to set the TTL field for this case. + type: string + ttl: + description: |- + TTL of the generated cookie if the cookie is not present. This value sets the + Max-Age attribute value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - name + type: object + header: + description: |- + Header configures the header hash policy when the consistent hash type is set to Header. + + Deprecated: use Headers instead + properties: + name: + description: Name of the header to + hash. + type: string + required: + - name + type: object + headers: + description: Headers configures the header + hash policy for each header, when the + consistent hash type is set to Headers. + items: + description: |- + Header defines the header hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the header + to hash. + type: string + required: + - name + type: object + type: array + queryParams: + description: QueryParams configures the + query parameter hash policy when the + consistent hash type is set to QueryParams. + items: + description: |- + QueryParam defines the query parameter name hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the query param + to hash. + type: string + required: + - name + type: object + type: array + tableSize: + default: 65537 + description: The table size for consistent + hashing, must be prime number limited + to 5000011. + format: int64 + maximum: 5000011 + minimum: 2 + type: integer + type: + description: |- + ConsistentHashType defines the type of input to hash on. Valid Type values are + "SourceIP", + "Header", + "Headers", + "Cookie". + "QueryParams". + enum: + - SourceIP + - Header + - Headers + - Cookie + - QueryParams + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If consistent hash type is header, + the header field must be set. + rule: 'self.type == ''Header'' ? has(self.header) + : !has(self.header)' + - message: If consistent hash type is headers, + the headers field must be set. + rule: 'self.type == ''Headers'' ? has(self.headers) + : !has(self.headers)' + - message: If consistent hash type is cookie, + the cookie field must be set. + rule: 'self.type == ''Cookie'' ? has(self.cookie) + : !has(self.cookie)' + - message: If consistent hash type is queryParams, + the queryParams field must be set. + rule: 'self.type == ''QueryParams'' ? has(self.queryParams) + : !has(self.queryParams)' + endpointOverride: + description: |- + EndpointOverride defines the configuration for endpoint override. + When specified, the load balancer will attempt to route requests to endpoints + based on the override information extracted from request headers or metadata. + If the override endpoints are not available, the configured load balancer policy will be used as fallback. + properties: + extractFrom: + description: ExtractFrom defines the sources + to extract endpoint override information + from. + items: + description: EndpointOverrideExtractFrom + defines a source to extract endpoint + override information from. + properties: + header: + description: |- + Header defines the header to get the override endpoint addresses. + The header value must specify at least one endpoint in `IP:Port` format or multiple endpoints in `IP:Port,IP:Port,...` format. + For example `10.0.0.5:8080` or `[2600:4040:5204::1574:24ae]:80`. + The IPv6 address is enclosed in square brackets. + type: string + type: object + maxItems: 10 + minItems: 1 + type: array + required: + - extractFrom + type: object + slowStart: + description: |- + SlowStart defines the configuration related to the slow start load balancer policy. + If set, during slow start window, traffic sent to the newly added hosts will gradually increase. + Currently this is only supported for RoundRobin and LeastRequest load balancers + properties: + window: + description: |- + Window defines the duration of the warm up period for newly added host. + During slow start window, traffic sent to the newly added hosts will gradually increase. + Currently only supports linear growth of traffic. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto#config-cluster-v3-cluster-slowstartconfig + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - window + type: object + type: + description: |- + Type decides the type of Load Balancer policy. + Valid LoadBalancerType values are + "ConsistentHash", + "LeastRequest", + "Random", + "RoundRobin". + enum: + - ConsistentHash + - LeastRequest + - Random + - RoundRobin + type: string + zoneAware: + description: ZoneAware defines the configuration + related to the distribution of requests + between locality zones. + properties: + preferLocal: + description: PreferLocalZone configures + zone-aware routing to prefer sending + traffic to the local locality zone. + properties: + force: + description: |- + ForceLocalZone defines override configuration for forcing all traffic to stay within the local zone instead of the default behavior + which maintains equal distribution among upstream endpoints while sending as much traffic as possible locally. + properties: + minEndpointsInZoneThreshold: + description: |- + MinEndpointsInZoneThreshold is the minimum number of upstream endpoints in the local zone required to honor the forceLocalZone + override. This is useful for protecting zones with fewer endpoints. + format: int32 + type: integer + type: object + minEndpointsThreshold: + description: MinEndpointsThreshold + is the minimum number of total upstream + endpoints across all zones required + to enable zone-aware routing. + format: int64 + type: integer + percentageEnabled: + description: Configures percentage + of requests that will be considered + for zone aware routing if zone aware + routing is configured. If not specified, + Envoy defaults to 100%. + format: int32 + maximum: 100 + minimum: 0 + type: integer + type: object + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: If LoadBalancer type is consistentHash, + consistentHash field needs to be set. + rule: 'self.type == ''ConsistentHash'' ? has(self.consistentHash) + : !has(self.consistentHash)' + - message: Currently SlowStart is only supported + for RoundRobin and LeastRequest load balancers. + rule: 'self.type in [''Random'', ''ConsistentHash''] + ? !has(self.slowStart) : true ' + - message: Currently ZoneAware is only supported + for LeastRequest, Random, and RoundRobin load + balancers. + rule: 'self.type == ''ConsistentHash'' ? !has(self.zoneAware) + : true ' + proxyProtocol: + description: ProxyProtocol enables the Proxy Protocol + when communicating with the backend. + properties: + version: + description: |- + Version of ProxyProtol + Valid ProxyProtocolVersion values are + "V1" + "V2" + enum: + - V1 + - V2 + type: string + required: + - version + type: object + retry: + description: |- + Retry provides more advanced usage, allowing users to customize the number of retries, retry fallback strategy, and retry triggering conditions. + If not set, retry will be disabled. + properties: + numAttemptsPerPriority: + description: |- + NumAttemptsPerPriority defines the number of requests (initial attempt + retries) + that should be sent to the same priority before switching to a different one. + If not specified or set to 0, all requests are sent to the highest priority that is healthy. + format: int32 + type: integer + numRetries: + default: 2 + description: NumRetries is the number of retries + to be attempted. Defaults to 2. + format: int32 + minimum: 0 + type: integer + perRetry: + description: PerRetry is the retry policy + to be applied per retry attempt. + properties: + backOff: + description: |- + Backoff is the backoff policy to be applied per retry attempt. gateway uses a fully jittered exponential + back-off algorithm for retries. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/router_filter#config-http-filters-router-x-envoy-max-retries + properties: + baseInterval: + description: BaseInterval is the base + interval between retries. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxInterval: + description: |- + MaxInterval is the maximum interval between retries. This parameter is optional, but must be greater than or equal to the base_interval if set. + The default is 10 times the base_interval + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + timeout: + description: Timeout is the timeout per + retry attempt. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + retryOn: + description: |- + RetryOn specifies the retry trigger condition. + + If not specified, the default is to retry on connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes(503). + properties: + httpStatusCodes: + description: |- + HttpStatusCodes specifies the http status codes to be retried. + The retriable-status-codes trigger must also be configured for these status codes to trigger a retry. + items: + description: HTTPStatus defines the + http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + triggers: + description: Triggers specifies the retry + trigger condition(Http/Grpc). + items: + description: TriggerEnum specifies the + conditions that trigger retries. + enum: + - 5xx + - gateway-error + - reset + - reset-before-request + - connect-failure + - retriable-4xx + - refused-stream + - retriable-status-codes + - cancelled + - deadline-exceeded + - internal + - resource-exhausted + - unavailable + type: string + type: array + type: object + type: object + tcpKeepalive: + description: |- + TcpKeepalive settings associated with the upstream client connection. + Disabled by default. + properties: + idleTime: + description: |- + The duration a connection needs to be idle before keep-alive + probes start being sent. + The duration format is + Defaults to `7200s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + description: |- + The duration between keep-alive probes. + Defaults to `75s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + probes: + description: |- + The total number of unacknowledged probes to send before deciding + the connection is dead. + Defaults to 9. + format: int32 + type: integer + type: object + timeout: + description: Timeout settings for the backend + connections. + properties: + http: + description: Timeout settings for HTTP. + properties: + connectionIdleTimeout: + description: |- + The idle timeout for an HTTP connection. Idle time is defined as a period in which there are no active requests in the connection. + Default: 1 hour. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxConnectionDuration: + description: |- + The maximum duration of an HTTP connection. + Default: unlimited. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxStreamDuration: + description: |- + MaxStreamDuration is the maximum duration for a stream to complete. This timeout measures the time + from when the request is sent until the response stream is fully consumed and does not apply to + non-streaming requests. + When set to "0s", no max duration is applied and streams can run indefinitely. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + requestTimeout: + description: RequestTimeout is the time + until which entire response is received + from the upstream. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + tcp: + description: Timeout settings for TCP. + properties: + connectTimeout: + description: |- + The timeout for network connection establishment, including TCP and TLS handshakes. + Default: 10 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + type: object + type: object + x-kubernetes-validations: + - message: predictivePercent in preconnect policy + only works with RoundRobin or Random load balancers + rule: '!((has(self.connection) && has(self.connection.preconnect) + && has(self.connection.preconnect.predictivePercent)) + && !(has(self.loadBalancer) && has(self.loadBalancer.type) + && self.loadBalancer.type in [''Random'', ''RoundRobin'']))' + cacheDuration: + default: 300s + description: |- + Duration is a string value representing a duration in time. The format is as specified + in GEP-2257, a strict subset of the syntax parsed by Golang time.ParseDuration. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + uri: + description: |- + URI is the HTTPS URI to fetch the JWKS. Envoy's system trust bundle is used to validate the server certificate. + If a custom trust bundle is needed, it can be specified in a BackendTLSConfig resource and target the BackendRefs. + maxLength: 253 + minLength: 1 + type: string + required: + - uri + type: object + x-kubernetes-validations: + - message: BackendRefs must be used, backendRef is not + supported. + rule: '!has(self.backendRef)' + - message: Retry timeout is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.perRetry)? + !has(self.backendSettings.retry.perRetry.timeout):true):true):true + - message: HTTPStatusCodes is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.retryOn)? + !has(self.backendSettings.retry.retryOn.httpStatusCodes):true):true):true + type: object + x-kubernetes-validations: + - message: either remoteJWKS or localJWKS must be specified. + rule: has(self.remoteJWKS) || has(self.localJWKS) + - message: remoteJWKS and localJWKS cannot both be specified. + rule: '!(has(self.remoteJWKS) && has(self.localJWKS))' + protectedResourceMetadata: + description: |- + ProtectedResourceMetadata defines the OAuth 2.0 Resource Server Metadata as per RFC 8414. + This is used to expose the metadata endpoint for mcp clients to discover the authorization servers, + supported scopes, and JWKS URI. + properties: + resource: + description: |- + Resource is the identifier of the protected resource. + This should match the MCPRoute's URL. For example, if the MCPRoute's URL is + "https://api.example.com/mcp", the Resource should be "https://api.example.com/mcp". + format: uri + type: string + resourceDocumentation: + description: ResourceDocumentation is a URL that provides + human-readable documentation for the resource. + format: uri + type: string + resourceName: + description: ResourceName is a human-readable name for + the protected resource. + maxLength: 256 + type: string + resourcePolicyUri: + description: ResourcePolicyURI is a URL that points to + the resource server's policy document. + format: uri + type: string + resourceSigningAlgValuesSupported: + description: |- + ResourceSigningAlgValuesSupported is a list of JWS signing algorithms supported by the resource server. + These algorithms are used in the "alg" field of the JOSE header in signed tokens. + items: + type: string + maxItems: 16 + minItems: 1 + type: array + scopesSupported: + description: |- + ScopesSupported defines the minimal set of scopes required for the basic functionality of the MCPRoute. + It should avoid broad or overly permissive scopes to prevent clients from requesting tokens with excessive privileges. + + If an operation requires additional scopes that are not present in the access token, the client will receive a + 403 Forbidden response that includes the required scopes in the `scope` field of the `WWW-Authenticate` header. + This enables incremental privilege elevation through targeted `WWW-Authenticate: scope="..."` challenges when + privileged operations are first attempted. + items: + type: string + maxItems: 32 + type: array + required: + - resource + type: object + required: + - issuer + - protectedResourceMetadata + type: object + type: object + x-kubernetes-validations: + - message: oauth must be configured when any authorization rule uses + a jwt source + rule: '!(has(self.authorization) && self.authorization.rules.exists(r, + has(r.source) && has(r.source.jwt)) && !has(self.oauth))' + required: + - backendRefs + - parentRefs + type: object + status: + description: Status defines the status details of the MCPRoute. + properties: + conditions: + description: |- + Conditions is the list of conditions by the reconciliation result. + Currently, at most one condition is set. + + Known .status.conditions.type are: "Accepted", "NotAccepted". + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: false + subresources: + status: {} + - additionalPrinterColumns: + - jsonPath: .status.conditions[-1:].type + name: Status + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: |- + MCPRoute defines how to route MCP requests to the backend MCP servers. + + This serves as a way to define a "unified" AI API for a Gateway which allows downstream + clients to use a single schema API to interact with multiple MCP backends. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Spec defines the details of the MCPRoute. + properties: + backendRefs: + description: |- + BackendRefs is a list of backend references to the MCP servers. + These MCP servers will be aggregated and exposed as a single MCP endpoint to the clients. + From the client's perspective, they only need to configure a single MCP server URL, e.g. "https://api.example.com/mcp", + and the Envoy AI Gateway will route the requests to the appropriate MCP server based on the requests. + + All names must be unique within this list to avoid potential tools, resources, etc. name collisions. + Also, cross-namespace references are not supported. In other words, the backend MCP servers must be in the + same namespace as the MCPRoute. + items: + description: MCPRouteBackendRef wraps a EG's BackendObjectReference + to reference an MCP server. + properties: + forwardHeaders: + description: |- + ForwardHeaders specifies HTTP headers to extract from the incoming client request + and forward to this backend MCP server. + This enables per-user authentication passthrough (e.g., personal access tokens) + without requiring OAuth configuration. + Each entry specifies a header name to extract and an optional rename for the backend. + items: + description: MCPHeaderForward specifies a header to extract + from the incoming request and forward to a backend. + properties: + backendHeader: + description: |- + BackendHeader is the header name to use when forwarding to the backend. + If not specified, the original header name is used. + minLength: 1 + type: string + name: + description: Name is the header name to extract from the + incoming client request. + minLength: 1 + type: string + required: + - name + type: object + maxItems: 32 + type: array + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + path: + default: /mcp + description: |- + Path is the HTTP endpoint path of the backend MCP server. + If not specified, the default is "/mcp". + maxLength: 1024 + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + securityPolicy: + description: SecurityPolicy is the security policy to apply + to this MCP server. + properties: + apiKey: + description: APIKey is a mechanism to access a backend. + The API key will be injected into the request headers. + properties: + header: + description: |- + Header is the HTTP header to inject the API key into. If not specified, + defaults to "Authorization". + When the header is "Authorization", the injected header value will be + prefixed with "Bearer ". + + Either one of Header or QueryParam can be specified to inject the API key. + minLength: 1 + type: string + inline: + description: Inline contains the API key as an inline + string. + type: string + queryParam: + description: |- + QueryParam is the HTTP query parameter to inject the API key into. + For example, if QueryParam is set to "api_key", and the API key is "mysecretkey", the request URL will be modified to include + "?api_key=mysecretkey". + + Either one of Header or QueryParam can be specified to inject the API key. + minLength: 1 + type: string + secretRef: + description: |- + secretRef is the Kubernetes secret which contains the API keys. + The key of the secret should be "apiKey". + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + type: object + x-kubernetes-validations: + - message: exactly one of secretRef or inline must be set + rule: (has(self.secretRef) && !has(self.inline)) || (!has(self.secretRef) + && has(self.inline)) + - message: only one of header or queryParam can be set + rule: '!(has(self.header) && has(self.queryParam))' + type: object + toolSelector: + description: |- + ToolSelector filters the tools exposed by this MCP server. + Supports exact matches and RE2-compatible regular expressions for both include and exclude patterns. + If not specified, all tools from the MCP server are exposed. + properties: + exclude: + description: |- + Exclude is a list of tool names to exclude. The specified tools will not be available. + Exclude rules take precedence over include rules. + items: + type: string + maxItems: 32 + type: array + excludeRegex: + description: |- + ExcludeRegex is a list of RE2-compatible regular expressions that, when matched, exclude the tool. + Tools matching these patterns will not be available. Exclude rules take precedence over include rules. + items: + type: string + maxItems: 32 + type: array + include: + description: Include is a list of tool names to include. + Only the specified tools will be available. + items: + type: string + maxItems: 32 + type: array + includeRegex: + description: |- + IncludeRegex is a list of RE2-compatible regular expressions that, when matched, include the tool. + Only tools matching these patterns will be available. + items: + type: string + maxItems: 32 + type: array + type: object + x-kubernetes-validations: + - message: include and includeRegex are mutually exclusive + rule: '!(has(self.include) && has(self.includeRegex))' + - message: exclude and excludeRegex are mutually exclusive + rule: '!(has(self.exclude) && has(self.excludeRegex))' + - message: at least one of include, includeRegex, exclude, or + excludeRegex must be specified + rule: has(self.include) || has(self.includeRegex) || has(self.exclude) + || has(self.excludeRegex) + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') ? has(self.port) + : true' + maxItems: 256 + minItems: 1 + type: array + x-kubernetes-validations: + - message: all backendRefs names must be unique + rule: self.all(i, self.exists_one(j, j.name == i.name)) + headers: + description: |- + Headers are HTTP headers that must match for this route to be selected. + Multiple match values are ANDed together, meaning, a request must match all the specified headers to select the route. + items: + description: |- + HTTPHeaderMatch describes how to select a HTTP route by matching HTTP request + headers. + properties: + name: + description: |- + Name is the name of the HTTP Header to be matched. Name matching MUST be + case-insensitive. (See https://tools.ietf.org/html/rfc7230#section-3.2). + + If multiple entries specify equivalent header names, only the first + entry with an equivalent name MUST be considered for a match. Subsequent + entries with an equivalent header name MUST be ignored. Due to the + case-insensitivity of header names, "foo" and "Foo" are considered + equivalent. + + When a header is repeated in an HTTP request, it is + implementation-specific behavior as to how this is represented. + Generally, proxies should follow the guidance from the RFC: + https://www.rfc-editor.org/rfc/rfc7230.html#section-3.2.2 regarding + processing a repeated header, with special handling for "Set-Cookie". + maxLength: 256 + minLength: 1 + pattern: ^[A-Za-z0-9!#$%&'*+\-.^_\x60|~]+$ + type: string + type: + default: Exact + description: |- + Type specifies how to match against the value of the header. + + Support: Core (Exact) + + Support: Implementation-specific (RegularExpression) + + Since RegularExpression HeaderMatchType has implementation-specific + conformance, implementations can support POSIX, PCRE or any other dialects + of regular expressions. Please read the implementation's documentation to + determine the supported dialect. + enum: + - Exact + - RegularExpression + type: string + value: + description: Value is the value of HTTP Header to be matched. + maxLength: 4096 + minLength: 1 + type: string + required: + - name + - value + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + parentRefs: + description: |- + ParentRefs are the names of the Gateway resources this MCPRoute is being attached to. + Cross namespace references are not supported. In other words, the Gateway resources must be in the + same namespace as the MCPRoute. Currently, each reference's Kind must be Gateway. + items: + description: |- + ParentReference identifies an API object (usually a Gateway) that can be considered + a parent of this resource (usually a route). There are two kinds of parent resources + with "Core" support: + + * Gateway (Gateway conformance profile) + * Service (Mesh conformance profile, ClusterIP Services only) + + This API may be extended in the future to support additional kinds of parent + resources. + + The API object must be valid in the cluster; the Group and Kind must + be registered in the cluster for this reference to be valid. + properties: + group: + default: gateway.networking.k8s.io + description: |- + Group is the group of the referent. + When unspecified, "gateway.networking.k8s.io" is inferred. + To set the core API group (such as for a "Service" kind referent), + Group must be explicitly set to "" (empty string). + + Support: Core + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Gateway + description: |- + Kind is kind of the referent. + + There are two kinds of parent resources with "Core" support: + + * Gateway (Gateway conformance profile) + * Service (Mesh conformance profile, ClusterIP Services only) + + Support for other resources is Implementation-Specific. + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: |- + Name is the name of the referent. + + Support: Core + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referent. When unspecified, this refers + to the local namespace of the Route. + + Note that there are specific rules for ParentRefs which cross namespace + boundaries. Cross-namespace references are only valid if they are explicitly + allowed by something in the namespace they are referring to. For example: + Gateway has the AllowedRoutes field, and ReferenceGrant provides a + generic way to enable any other kind of cross-namespace reference. + + + ParentRefs from a Route to a Service in the same namespace are "producer" + routes, which apply default routing rules to inbound connections from + any namespace to the Service. + + ParentRefs from a Route to a Service in a different namespace are + "consumer" routes, and these routing rules are only applied to outbound + connections originating from the same namespace as the Route, for which + the intended destination of the connections are a Service targeted as a + ParentRef of the Route. + + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port is the network port this Route targets. It can be interpreted + differently based on the type of parent resource. + + When the parent resource is a Gateway, this targets all listeners + listening on the specified port that also support this kind of Route(and + select this Route). It's not recommended to set `Port` unless the + networking behaviors specified in a Route must apply to a specific port + as opposed to a listener(s) whose port(s) may be changed. When both Port + and SectionName are specified, the name and port of the selected listener + must match both specified values. + + + When the parent resource is a Service, this targets a specific port in the + Service spec. When both Port (experimental) and SectionName are specified, + the name and port of the selected port must match both specified values. + + + Implementations MAY choose to support other parent resources. + Implementations supporting other types of parent resources MUST clearly + document how/if Port is interpreted. + + For the purpose of status, an attachment is considered successful as + long as the parent resource accepts it partially. For example, Gateway + listeners can restrict which Routes can attach to them by Route kind, + namespace, or hostname. If 1 of 2 Gateway listeners accept attachment + from the referencing Route, the Route MUST be considered successfully + attached. If no Gateway listeners accept attachment from this Route, + the Route MUST be considered detached from the Gateway. + + Support: Extended + format: int32 + maximum: 65535 + minimum: 1 + type: integer + sectionName: + description: |- + SectionName is the name of a section within the target resource. In the + following resources, SectionName is interpreted as the following: + + * Gateway: Listener name. When both Port (experimental) and SectionName + are specified, the name and port of the selected listener must match + both specified values. + * Service: Port name. When both Port (experimental) and SectionName + are specified, the name and port of the selected listener must match + both specified values. + + Implementations MAY choose to support attaching Routes to other resources. + If that is the case, they MUST clearly document how SectionName is + interpreted. + + When unspecified (empty string), this will reference the entire resource. + For the purpose of status, an attachment is considered successful if at + least one section in the parent resource accepts it. For example, Gateway + listeners can restrict which Routes can attach to them by Route kind, + namespace, or hostname. If 1 of 2 Gateway listeners accept attachment from + the referencing Route, the Route MUST be considered successfully + attached. If no Gateway listeners accept attachment from this Route, the + Route MUST be considered detached from the Gateway. + + Support: Core + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + required: + - name + type: object + maxItems: 16 + minItems: 1 + type: array + x-kubernetes-validations: + - message: only Gateway is supported + rule: self.all(match, match.kind == 'Gateway') + path: + default: /mcp + description: |- + Path is the HTTP endpoint path that serves MCP requests over the Streamable HTTP transport. + If not specified, the default is "/mcp". + maxLength: 1024 + type: string + securityPolicy: + description: SecurityPolicy defines the security policy for this MCPRoute. + properties: + apiKeyAuth: + description: APIKeyAuth defines the configuration for the API + Key Authentication. + properties: + credentialRefs: + description: |- + CredentialRefs is the Kubernetes secret which contains the API keys. + This is an Opaque secret. + Each API key is stored in the key representing the client id. + If the secrets have a key for a duplicated client, the first one will be used. + items: + description: |- + SecretObjectReference identifies an API object including its namespace, + defaulting to Secret. + + The API object must be valid in the cluster; the Group and Kind must + be registered in the cluster for this reference to be valid. + + References to objects with invalid Group and Kind are not valid, and must + be rejected by the implementation, with appropriate Conditions set + on the containing object. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Secret + description: Kind is kind of the referent. For example + "Secret". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + type: array + extractFrom: + description: |- + ExtractFrom is where to fetch the key from the coming request. + The value from the first source that has a key will be used. + items: + description: |- + ExtractFrom is where to fetch the key from the coming request. + Only one of header, param or cookie is supposed to be specified. + properties: + cookies: + description: |- + Cookies is the names of the cookie to fetch the key from. + If multiple cookies are specified, envoy will look for the api key in the order of the list. + This field is optional, but only one of headers, params or cookies is supposed to be specified. + items: + type: string + type: array + headers: + description: |- + Headers is the names of the header to fetch the key from. + If multiple headers are specified, envoy will look for the api key in the order of the list. + This field is optional, but only one of headers, params or cookies is supposed to be specified. + items: + type: string + type: array + params: + description: |- + Params is the names of the query parameter to fetch the key from. + If multiple params are specified, envoy will look for the api key in the order of the list. + This field is optional, but only one of headers, params or cookies is supposed to be specified. + items: + type: string + type: array + type: object + type: array + forwardClientIDHeader: + description: |- + ForwardClientIDHeader is the name of the header to forward the client identity to the backend + service. The header will be added to the request with the client id as the value. + type: string + sanitize: + description: Sanitize indicates whether to remove the API + key from the request before forwarding it to the backend + service. + type: boolean + required: + - credentialRefs + - extractFrom + type: object + authorization: + description: Authorization defines the configuration for the MCP + spec compatible authorization. + properties: + defaultAction: + default: Deny + description: DefaultAction is the action to take when no rules + match. If unspecified, defaults to Deny. + enum: + - Allow + - Deny + type: string + rules: + description: |- + Rules defines a list of authorization rules. + These rules are evaluated in order, the first matching rule will be applied, + and the rest will be skipped. + + If no rules are defined, the default action will be applied to all requests. + items: + description: |- + MCPRouteAuthorizationRule defines an authorization rule for MCPRoute based on the MCP authorization spec. + Reference: https://modelcontextprotocol.io/specification/draft/basic/authorization#scope-challenge-handling + properties: + action: + default: Allow + description: Action is the authorization decision for + matching requests. If unspecified, defaults to Allow. + enum: + - Allow + - Deny + type: string + cel: + description: "CEL specifies a Common Expression Language + (CEL) expression evaluated for this rule.\nThe expression + must return a boolean; evaluation errors or non-boolean + results\nare treated as \"no match\".\n\nExample CEL + expressions:\n\t* `request.method == \"POST\"`\n\t* + `request.headers[\"x-custom-header\"] == \"AllowedValue\"`\n\t* + `request.mcp.tool in [\"toolA\", \"toolB\"]`\n\nAvailable + attributes in the CEL expression:\n\n\t* request.method: + HTTP method such as GET or POST. Type: string.\n\t* + request.headers: map of headers with lowercased keys, + first value only. Type: map[string]string.\n\t* request.headers_all: + map of headers with lowercased keys, all values. Type: + map[string][]string.\n\t* request.path: request path + such as /mcp. Type: string.\n\t* request.auth.jwt.claims: + JWT claims when a bearer JWT is present. Type: map[string]any.\n\t* + request.auth.jwt.scopes: JWT scopes when a bearer + JWT is present. Type: []string.\n\t* request.mcp.method: + MCP method such as tools/list or tools/call. Type: + string.\n\t* request.mcp.backend: upstream backend + name (for example, \"kiwi\" or \"github\"). Type: + string.\n\t* request.mcp.tool: tool name without backend + prefix (for example, \"list_issues\"). Type: string.\n\t* + request.mcp.params: parameters of the MCP method, + including keys like \"_meta\" and \"arguments\". Type: + object.\n\nNote: The CEL expression support is experimental, + and the attributes\navailable to the expression may + change in future releases." + maxLength: 4096 + type: string + source: + description: |- + Source defines the authorization source for this rule. + If not specified, the rule will match all sources. + properties: + jwt: + description: JWT defines the JWT scopes required + for this rule to match. + properties: + claims: + description: |- + Claims defines the list of JWT claims required for the rule. Each claim must exist on the token + and have at least one of the expected values. Use to enforce tenant or subject-based access. + items: + description: JWTClaim specifies a claim in + a JWT token. + properties: + name: + description: |- + Name is the name of the claim. + If it is a nested claim, use a dot (.) separated string as the name to + represent the full path to the claim. + For example, if the claim is in the "department" field in the "organization" field, + the name should be "organization.department". + maxLength: 253 + minLength: 1 + type: string + valueType: + default: String + description: |- + ValueType is the type of the claim value. + Only String and StringArray types are supported for now. + enum: + - String + - StringArray + type: string + values: + description: |- + Values are the values that the claim must match. + If the claim is a string type, the specified value must match exactly. + If the claim is a string array type, the specified value must match one of the values in the array. + If multiple values are specified, one of the values must match for the rule to match. + items: + type: string + maxItems: 128 + minItems: 1 + type: array + required: + - name + - values + type: object + maxItems: 16 + type: array + x-kubernetes-validations: + - message: '''scope'' claim name is reserved + for OAuth scopes' + rule: '!self.exists(c, c.name == ''scope'')' + scopes: + description: |- + Scopes defines the list of JWT scopes required for the rule. + If multiple scopes are specified, all scopes must be present in the JWT for the rule to match. + items: + maxLength: 253 + minLength: 1 + type: string + maxItems: 16 + type: array + type: object + x-kubernetes-validations: + - message: either scopes or claims must be specified + rule: (has(self.scopes) && size(self.scopes) > + 0) || (has(self.claims) && size(self.claims) + > 0) + required: + - jwt + type: object + target: + description: |- + Target defines the authorization target for this rule. + If not specified, the rule will match all targets. + properties: + tools: + description: Tools defines the list of tools this + rule applies to. + items: + description: ToolCall represents a tool call in + the MCP authorization target. + properties: + backend: + description: Backend is the name of the backend + this tool belongs to. + type: string + tool: + description: Tool is the name of the tool. + type: string + required: + - backend + - tool + type: object + maxItems: 16 + minItems: 1 + type: array + required: + - tools + type: object + type: object + maxItems: 32 + type: array + type: object + extAuth: + description: ExtAuth defines the configuration for External Authorization. + properties: + bodyToExtAuth: + description: BodyToExtAuth defines the Body to Ext Auth configuration. + properties: + maxRequestBytes: + description: |- + MaxRequestBytes is the maximum size of a message body that the filter will hold in memory. + Envoy will return HTTP 413 and will not initiate the authorization process when buffer + reaches the number set in this field. + Note that this setting will have precedence over failOpen mode. + format: int32 + minimum: 1 + type: integer + required: + - maxRequestBytes + type: object + contextExtensions: + description: |- + ContextExtensions are analogous to http_request.headers, however these + contents will not be sent to the upstream server. This provides an + extension mechanism for sending additional information to the auth server + without modifying the proto definition. It maps to the internal opaque + context in the filter chain. + items: + description: |- + ContextExtension is analogous to http_request.headers, however these + contents will not be sent to the upstream server. This provides an + extension mechanism for sending additional information to the auth server + without modifying the proto definition. It maps to the internal opaque + context in the filter chain. + properties: + name: + description: Name of the context extension. + type: string + type: + default: Value + description: |- + Type is the type of method to use to read the ContextExtension value. + Valid values are Value and ValueRef, default is Value. + enum: + - Value + - ValueRef + type: string + value: + description: Value of the context extension. + type: string + valueRef: + description: ValueRef for the context extension's value. + properties: + group: + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + key: + description: The key to select. + type: string + kind: + description: Kind is kind of the referent. For example + "HTTPRoute" or "Service". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + required: + - group + - key + - kind + - name + type: object + x-kubernetes-validations: + - message: Only a reference to an object of kind ConfigMap + or Secret belonging to default v1 API group is supported. + rule: self.kind in ['ConfigMap', 'Secret'] && self.group + in ['', 'v1'] + required: + - name + - type + type: object + x-kubernetes-validations: + - message: Exactly one of value or valueRef must be set + with correct type. + rule: (self.type == 'Value' && has(self.value) && !has(self.valueRef)) + || (self.type == 'ValueRef' && !has(self.value) && has(self.valueRef)) + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + failOpen: + default: false + description: |- + FailOpen is a switch used to control the behavior when a response from the External Authorization service cannot be obtained. + If FailOpen is set to true, the system allows the traffic to pass through. + Otherwise, if it is set to false or not set (defaulting to false), + the system blocks the traffic and returns a HTTP 5xx error, reflecting a fail-closed approach. + This setting determines whether to prioritize accessibility over strict security in case of authorization service failure. + + If set to true, the External Authorization will also be bypassed if its configuration is invalid. + type: boolean + grpc: + description: |- + GRPC defines the gRPC External Authorization service. + Either GRPCService or HTTPService must be specified, + and only one of them can be provided. + properties: + backendRef: + description: |- + BackendRef references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + + Deprecated: Use BackendRefs instead. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') + ? has(self.port) : true' + backendRefs: + description: |- + BackendRefs references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + items: + description: BackendRef defines how an ObjectReference + that is specific to BackendRef. + properties: + fallback: + description: |- + Fallback indicates whether the backend is designated as a fallback. + Multiple fallback backends can be configured. + It is highly recommended to configure active or passive health checks to ensure that failover can be detected + when the active backends become unhealthy and to automatically readjust once the primary backends are healthy again. + The overprovisioning factor is set to 1.4, meaning the fallback backends will only start receiving traffic when + the health of the active backends falls below 72%. + type: boolean + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + weight: + default: 1 + description: |- + Weight specifies the proportion of requests forwarded to the referenced + backend. This is computed as weight/(sum of all weights in this + BackendRefs list). For non-zero values, there may be some epsilon from + the exact proportion defined here depending on the precision an + implementation supports. Weight is not a percentage and the sum of + weights does not need to equal 100. + + If only one backend is specified and it has a weight greater than 0, 100% + of the traffic is forwarded to that backend. If weight is set to 0, no + traffic should be forwarded for this entry. If unspecified, weight + defaults to 1. + + Support for this field varies based on the context where used. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') + ? has(self.port) : true' + maxItems: 16 + type: array + backendSettings: + description: |- + BackendSettings holds configuration for managing the connection + to the backend. + properties: + circuitBreaker: + description: |- + Circuit Breaker settings for the upstream connections and requests. + If not set, circuit breakers will be enabled with the default thresholds + properties: + maxConnections: + default: 1024 + description: The maximum number of connections + that Envoy will establish to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRequests: + default: 1024 + description: The maximum number of parallel requests + that Envoy will make to the referenced backend + defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRetries: + default: 1024 + description: The maximum number of parallel retries + that Envoy will make to the referenced backend + defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxPendingRequests: + default: 1024 + description: The maximum number of pending requests + that Envoy will queue to the referenced backend + defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxRequestsPerConnection: + description: |- + The maximum number of requests that Envoy will make over a single connection to the referenced backend defined within a xRoute rule. + Default: unlimited. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + perEndpoint: + description: PerEndpoint defines Circuit Breakers + that will apply per-endpoint for an upstream + cluster + properties: + maxConnections: + default: 1024 + description: MaxConnections configures the + maximum number of connections that Envoy + will establish per-endpoint to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + type: object + type: object + connection: + description: Connection includes backend connection + settings. + properties: + bufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + BufferLimit Soft limit on size of the cluster’s connections read and write buffers. + BufferLimit applies to connection streaming (maybe non-streaming) channel between processes, it's in user space. + If unspecified, an implementation defined default is applied (32768 bytes). + For example, 20Mi, 1Gi, 256Ki etc. + Note: that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + preconnect: + description: |- + Preconnect configures proactive upstream connections to reduce latency by establishing + connections before they’re needed and avoiding connection establishment overhead. + + If unset, Envoy will fetch connections as needed to serve in-flight requests. + properties: + perEndpointPercent: + description: |- + PerEndpointPercent configures how many additional connections to maintain per + upstream endpoint, useful for high-QPS or latency sensitive services. Expressed as a + percentage of the connections required by active streams + (e.g. 100 = preconnect disabled, 105 = 1.05x connections per-endpoint, 200 = 2.00×). + + Allowed value range is between 100-300. When both PerEndpointPercent and + PredictivePercent are set, Envoy ensures both are satisfied (max of the two). + format: int32 + maximum: 300 + minimum: 100 + type: integer + predictivePercent: + description: |- + PredictivePercent configures how many additional connections to maintain + across the cluster by anticipating which upstream endpoint the load balancer + will select next, useful for low-QPS services. Relies on deterministic + loadbalancing and is only supported with Random or RoundRobin. + Expressed as a percentage of the connections required by active streams + (e.g. 100 = 1.0 (no preconnect), 105 = 1.05× connections across the cluster, 200 = 2.00×). + + Minimum allowed value is 100. When both PerEndpointPercent and PredictivePercent are + set Envoy ensures both are satisfied per host (max of the two). + format: int32 + minimum: 100 + type: integer + type: object + socketBufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + SocketBufferLimit provides configuration for the maximum buffer size in bytes for each socket + to backend. + SocketBufferLimit applies to socket streaming channel between TCP/IP stacks, it's in kernel space. + For example, 20Mi, 1Gi, 256Ki etc. + Note that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + type: object + dns: + description: DNS includes dns resolution settings. + properties: + dnsRefreshRate: + description: |- + DNSRefreshRate specifies the rate at which DNS records should be refreshed. + Defaults to 30 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + lookupFamily: + description: |- + LookupFamily determines how Envoy would resolve DNS for Routes where the backend is specified as a fully qualified domain name (FQDN). + If set, this configuration overrides other defaults. + enum: + - IPv4 + - IPv6 + - IPv4Preferred + - IPv6Preferred + - IPv4AndIPv6 + type: string + respectDnsTtl: + description: |- + RespectDNSTTL indicates whether the DNS Time-To-Live (TTL) should be respected. + If the value is set to true, the DNS refresh rate will be set to the resource record’s TTL. + Defaults to true. + type: boolean + type: object + healthCheck: + description: HealthCheck allows gateway to perform + active health checking on backends. + properties: + active: + description: Active health check configuration + properties: + grpc: + description: |- + GRPC defines the configuration of the GRPC health checker. + It's optional, and can only be used if the specified type is GRPC. + properties: + service: + description: |- + Service to send in the health check request. + If this is not specified, then the health check request applies to the entire + server and not to a specific service. + type: string + type: object + healthyThreshold: + default: 1 + description: HealthyThreshold defines the + number of healthy health checks required + before a backend host is marked healthy. + format: int32 + minimum: 1 + type: integer + http: + description: |- + HTTP defines the configuration of http health checker. + It's required while the health checker type is HTTP. + properties: + expectedResponse: + description: ExpectedResponse defines + a list of HTTP expected responses to + match. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the type + of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, text + field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? has(self.binary) + : !has(self.binary)' + expectedStatuses: + description: |- + ExpectedStatuses defines a list of HTTP response statuses considered healthy. + Defaults to 200 only + items: + description: HTTPStatus defines the + http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + hostname: + description: |- + Hostname defines the HTTP host that will be requested during health checking. + Default: HTTPRoute or GRPCRoute hostname. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + method: + description: |- + Method defines the HTTP method used for health checking. + Defaults to GET + type: string + path: + description: Path defines the HTTP path + that will be requested during health + checking. + maxLength: 1024 + minLength: 1 + type: string + required: + - path + type: object + initialJitter: + description: |- + InitialJitter defines the maximum time Envoy will wait before the first health check. + Envoy will randomly select a value between 0 and the initial jitter value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + default: 3s + description: Interval defines the time between + active health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + tcp: + description: |- + TCP defines the configuration of tcp health checker. + It's required while the health checker type is TCP. + properties: + receive: + description: Receive defines the expected + response payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the type + of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, text + field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? has(self.binary) + : !has(self.binary)' + send: + description: Send defines the request + payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the type + of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, text + field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? has(self.binary) + : !has(self.binary)' + type: object + timeout: + default: 1s + description: Timeout defines the time to wait + for a health check response. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: + allOf: + - enum: + - HTTP + - TCP + - GRPC + - enum: + - HTTP + - TCP + - GRPC + description: Type defines the type of health + checker. + type: string + unhealthyThreshold: + default: 3 + description: UnhealthyThreshold defines the + number of unhealthy health checks required + before a backend host is marked unhealthy. + format: int32 + minimum: 1 + type: integer + required: + - type + type: object + x-kubernetes-validations: + - message: If Health Checker type is HTTP, http + field needs to be set. + rule: 'self.type == ''HTTP'' ? has(self.http) + : !has(self.http)' + - message: If Health Checker type is TCP, tcp + field needs to be set. + rule: 'self.type == ''TCP'' ? has(self.tcp) + : !has(self.tcp)' + - message: The grpc field can only be set if the + Health Checker type is GRPC. + rule: 'has(self.grpc) ? self.type == ''GRPC'' + : true' + panicThreshold: + description: |- + When number of unhealthy endpoints for a backend reaches this threshold + Envoy will disregard health status and balance across all endpoints. + It's designed to prevent a situation in which host failures cascade throughout the cluster + as load increases. If not set, the default value is 50%. To disable panic mode, set value to `0`. + format: int32 + maximum: 100 + minimum: 0 + type: integer + passive: + description: Passive passive check configuration + properties: + baseEjectionTime: + default: 30s + description: BaseEjectionTime defines the + base duration for which a host will be ejected + on consecutive failures. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + consecutive5XxErrors: + default: 5 + description: Consecutive5xxErrors sets the + number of consecutive 5xx errors triggering + ejection. + format: int32 + type: integer + consecutiveGatewayErrors: + description: ConsecutiveGatewayErrors sets + the number of consecutive gateway errors + triggering ejection. + format: int32 + type: integer + consecutiveLocalOriginFailures: + default: 5 + description: |- + ConsecutiveLocalOriginFailures sets the number of consecutive local origin failures triggering ejection. + Parameter takes effect only when split_external_local_origin_errors is set to true. + format: int32 + type: integer + failurePercentageThreshold: + description: |- + FailurePercentageThreshold sets the failure percentage threshold for outlier detection. + If the failure percentage of a given host is greater than or equal to this value, it will be ejected. + Defaults to 85. + format: int32 + maximum: 100 + minimum: 0 + type: integer + interval: + default: 3s + description: Interval defines the time between + passive health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxEjectionPercent: + default: 10 + description: MaxEjectionPercent sets the maximum + percentage of hosts in a cluster that can + be ejected. + format: int32 + type: integer + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors + enables splitting of errors between external + and local origin. + type: boolean + type: object + type: object + http2: + description: HTTP2 provides HTTP/2 configuration for + backend connections. + properties: + initialConnectionWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialConnectionWindowSize sets the initial window size for HTTP/2 connections. + If not set, the default value is 1 MiB. + x-kubernetes-int-or-string: true + initialStreamWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialStreamWindowSize sets the initial window size for HTTP/2 streams. + If not set, the default value is 64 KiB(64*1024). + x-kubernetes-int-or-string: true + maxConcurrentStreams: + description: |- + MaxConcurrentStreams sets the maximum number of concurrent streams allowed per connection. + If not set, the default value is 100. + format: int32 + maximum: 2147483647 + minimum: 1 + type: integer + onInvalidMessage: + description: |- + OnInvalidMessage determines if Envoy will terminate the connection or just the offending stream in the event of HTTP messaging error + It's recommended for L2 Envoy deployments to set this value to TerminateStream. + https://www.envoyproxy.io/docs/envoy/latest/configuration/best_practices/level_two + Default: TerminateConnection + type: string + type: object + loadBalancer: + description: |- + LoadBalancer policy to apply when routing traffic from the gateway to + the backend endpoints. Defaults to `LeastRequest`. + properties: + consistentHash: + description: |- + ConsistentHash defines the configuration when the load balancer type is + set to ConsistentHash + properties: + cookie: + description: Cookie configures the cookie + hash policy when the consistent hash type + is set to Cookie. + properties: + attributes: + additionalProperties: + type: string + description: Additional Attributes to + set for the generated cookie. + type: object + name: + description: |- + Name of the cookie to hash. + If this cookie does not exist in the request, Envoy will generate a cookie and set + the TTL on the response back to the client based on Layer 4 + attributes of the backend endpoint, to ensure that these future requests + go to the same backend endpoint. Make sure to set the TTL field for this case. + type: string + ttl: + description: |- + TTL of the generated cookie if the cookie is not present. This value sets the + Max-Age attribute value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - name + type: object + header: + description: |- + Header configures the header hash policy when the consistent hash type is set to Header. + + Deprecated: use Headers instead + properties: + name: + description: Name of the header to hash. + type: string + required: + - name + type: object + headers: + description: Headers configures the header + hash policy for each header, when the consistent + hash type is set to Headers. + items: + description: |- + Header defines the header hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the header to hash. + type: string + required: + - name + type: object + type: array + queryParams: + description: QueryParams configures the query + parameter hash policy when the consistent + hash type is set to QueryParams. + items: + description: |- + QueryParam defines the query parameter name hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the query param + to hash. + type: string + required: + - name + type: object + type: array + tableSize: + default: 65537 + description: The table size for consistent + hashing, must be prime number limited to + 5000011. + format: int64 + maximum: 5000011 + minimum: 2 + type: integer + type: + description: |- + ConsistentHashType defines the type of input to hash on. Valid Type values are + "SourceIP", + "Header", + "Headers", + "Cookie". + "QueryParams". + enum: + - SourceIP + - Header + - Headers + - Cookie + - QueryParams + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If consistent hash type is header, + the header field must be set. + rule: 'self.type == ''Header'' ? has(self.header) + : !has(self.header)' + - message: If consistent hash type is headers, + the headers field must be set. + rule: 'self.type == ''Headers'' ? has(self.headers) + : !has(self.headers)' + - message: If consistent hash type is cookie, + the cookie field must be set. + rule: 'self.type == ''Cookie'' ? has(self.cookie) + : !has(self.cookie)' + - message: If consistent hash type is queryParams, + the queryParams field must be set. + rule: 'self.type == ''QueryParams'' ? has(self.queryParams) + : !has(self.queryParams)' + endpointOverride: + description: |- + EndpointOverride defines the configuration for endpoint override. + When specified, the load balancer will attempt to route requests to endpoints + based on the override information extracted from request headers or metadata. + If the override endpoints are not available, the configured load balancer policy will be used as fallback. + properties: + extractFrom: + description: ExtractFrom defines the sources + to extract endpoint override information + from. + items: + description: EndpointOverrideExtractFrom + defines a source to extract endpoint override + information from. + properties: + header: + description: |- + Header defines the header to get the override endpoint addresses. + The header value must specify at least one endpoint in `IP:Port` format or multiple endpoints in `IP:Port,IP:Port,...` format. + For example `10.0.0.5:8080` or `[2600:4040:5204::1574:24ae]:80`. + The IPv6 address is enclosed in square brackets. + type: string + type: object + maxItems: 10 + minItems: 1 + type: array + required: + - extractFrom + type: object + slowStart: + description: |- + SlowStart defines the configuration related to the slow start load balancer policy. + If set, during slow start window, traffic sent to the newly added hosts will gradually increase. + Currently this is only supported for RoundRobin and LeastRequest load balancers + properties: + window: + description: |- + Window defines the duration of the warm up period for newly added host. + During slow start window, traffic sent to the newly added hosts will gradually increase. + Currently only supports linear growth of traffic. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto#config-cluster-v3-cluster-slowstartconfig + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - window + type: object + type: + description: |- + Type decides the type of Load Balancer policy. + Valid LoadBalancerType values are + "ConsistentHash", + "LeastRequest", + "Random", + "RoundRobin". + enum: + - ConsistentHash + - LeastRequest + - Random + - RoundRobin + type: string + zoneAware: + description: ZoneAware defines the configuration + related to the distribution of requests between + locality zones. + properties: + preferLocal: + description: PreferLocalZone configures zone-aware + routing to prefer sending traffic to the + local locality zone. + properties: + force: + description: |- + ForceLocalZone defines override configuration for forcing all traffic to stay within the local zone instead of the default behavior + which maintains equal distribution among upstream endpoints while sending as much traffic as possible locally. + properties: + minEndpointsInZoneThreshold: + description: |- + MinEndpointsInZoneThreshold is the minimum number of upstream endpoints in the local zone required to honor the forceLocalZone + override. This is useful for protecting zones with fewer endpoints. + format: int32 + type: integer + type: object + minEndpointsThreshold: + description: MinEndpointsThreshold is + the minimum number of total upstream + endpoints across all zones required + to enable zone-aware routing. + format: int64 + type: integer + percentageEnabled: + description: Configures percentage of + requests that will be considered for + zone aware routing if zone aware routing + is configured. If not specified, Envoy + defaults to 100%. + format: int32 + maximum: 100 + minimum: 0 + type: integer + type: object + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: If LoadBalancer type is consistentHash, + consistentHash field needs to be set. + rule: 'self.type == ''ConsistentHash'' ? has(self.consistentHash) + : !has(self.consistentHash)' + - message: Currently SlowStart is only supported for + RoundRobin and LeastRequest load balancers. + rule: 'self.type in [''Random'', ''ConsistentHash''] + ? !has(self.slowStart) : true ' + - message: Currently ZoneAware is only supported for + LeastRequest, Random, and RoundRobin load balancers. + rule: 'self.type == ''ConsistentHash'' ? !has(self.zoneAware) + : true ' + proxyProtocol: + description: ProxyProtocol enables the Proxy Protocol + when communicating with the backend. + properties: + version: + description: |- + Version of ProxyProtol + Valid ProxyProtocolVersion values are + "V1" + "V2" + enum: + - V1 + - V2 + type: string + required: + - version + type: object + retry: + description: |- + Retry provides more advanced usage, allowing users to customize the number of retries, retry fallback strategy, and retry triggering conditions. + If not set, retry will be disabled. + properties: + numAttemptsPerPriority: + description: |- + NumAttemptsPerPriority defines the number of requests (initial attempt + retries) + that should be sent to the same priority before switching to a different one. + If not specified or set to 0, all requests are sent to the highest priority that is healthy. + format: int32 + type: integer + numRetries: + default: 2 + description: NumRetries is the number of retries + to be attempted. Defaults to 2. + format: int32 + minimum: 0 + type: integer + perRetry: + description: PerRetry is the retry policy to be + applied per retry attempt. + properties: + backOff: + description: |- + Backoff is the backoff policy to be applied per retry attempt. gateway uses a fully jittered exponential + back-off algorithm for retries. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/router_filter#config-http-filters-router-x-envoy-max-retries + properties: + baseInterval: + description: BaseInterval is the base + interval between retries. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxInterval: + description: |- + MaxInterval is the maximum interval between retries. This parameter is optional, but must be greater than or equal to the base_interval if set. + The default is 10 times the base_interval + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + timeout: + description: Timeout is the timeout per retry + attempt. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + retryOn: + description: |- + RetryOn specifies the retry trigger condition. + + If not specified, the default is to retry on connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes(503). + properties: + httpStatusCodes: + description: |- + HttpStatusCodes specifies the http status codes to be retried. + The retriable-status-codes trigger must also be configured for these status codes to trigger a retry. + items: + description: HTTPStatus defines the http + status code. + maximum: 599 + minimum: 100 + type: integer + type: array + triggers: + description: Triggers specifies the retry + trigger condition(Http/Grpc). + items: + description: TriggerEnum specifies the conditions + that trigger retries. + enum: + - 5xx + - gateway-error + - reset + - reset-before-request + - connect-failure + - retriable-4xx + - refused-stream + - retriable-status-codes + - cancelled + - deadline-exceeded + - internal + - resource-exhausted + - unavailable + type: string + type: array + type: object + type: object + tcpKeepalive: + description: |- + TcpKeepalive settings associated with the upstream client connection. + Disabled by default. + properties: + idleTime: + description: |- + The duration a connection needs to be idle before keep-alive + probes start being sent. + The duration format is + Defaults to `7200s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + description: |- + The duration between keep-alive probes. + Defaults to `75s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + probes: + description: |- + The total number of unacknowledged probes to send before deciding + the connection is dead. + Defaults to 9. + format: int32 + type: integer + type: object + timeout: + description: Timeout settings for the backend connections. + properties: + http: + description: Timeout settings for HTTP. + properties: + connectionIdleTimeout: + description: |- + The idle timeout for an HTTP connection. Idle time is defined as a period in which there are no active requests in the connection. + Default: 1 hour. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxConnectionDuration: + description: |- + The maximum duration of an HTTP connection. + Default: unlimited. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxStreamDuration: + description: |- + MaxStreamDuration is the maximum duration for a stream to complete. This timeout measures the time + from when the request is sent until the response stream is fully consumed and does not apply to + non-streaming requests. + When set to "0s", no max duration is applied and streams can run indefinitely. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + requestTimeout: + description: RequestTimeout is the time until + which entire response is received from the + upstream. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + tcp: + description: Timeout settings for TCP. + properties: + connectTimeout: + description: |- + The timeout for network connection establishment, including TCP and TLS handshakes. + Default: 10 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + type: object + type: object + x-kubernetes-validations: + - message: predictivePercent in preconnect policy only + works with RoundRobin or Random load balancers + rule: '!((has(self.connection) && has(self.connection.preconnect) + && has(self.connection.preconnect.predictivePercent)) + && !(has(self.loadBalancer) && has(self.loadBalancer.type) + && self.loadBalancer.type in [''Random'', ''RoundRobin'']))' + type: object + x-kubernetes-validations: + - message: backendRef or backendRefs needs to be set + rule: has(self.backendRef) || self.backendRefs.size() > + 0 + - message: BackendRefs only supports Service, ServiceImport, + and Backend kind. + rule: 'has(self.backendRefs) ? self.backendRefs.all(f, f.kind + == ''Service'' || f.kind == ''ServiceImport'' || f.kind + == ''Backend'') : true' + - message: BackendRefs only supports Core, multicluster.x-k8s.io, + and gateway.envoyproxy.io groups. + rule: 'has(self.backendRefs) ? (self.backendRefs.all(f, + f.group == "" || f.group == ''multicluster.x-k8s.io'' + || f.group == ''gateway.envoyproxy.io'')) : true' + headersToExtAuth: + description: |- + HeadersToExtAuth defines the client request headers that will be included + in the request to the external authorization service. + Note: If not specified, the default behavior for gRPC and HTTP external + authorization services is different due to backward compatibility reasons. + All headers will be included in the check request to a gRPC authorization server. + Only the following headers will be included in the check request to an HTTP + authorization server: Host, Method, Path, Content-Length, and Authorization. + And these headers will always be included to the check request to an HTTP + authorization server by default, no matter whether they are specified + in HeadersToExtAuth or not. + items: + type: string + type: array + http: + description: |- + HTTP defines the HTTP External Authorization service. + Either GRPCService or HTTPService must be specified, + and only one of them can be provided. + properties: + backendRef: + description: |- + BackendRef references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + + Deprecated: Use BackendRefs instead. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') + ? has(self.port) : true' + backendRefs: + description: |- + BackendRefs references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + items: + description: BackendRef defines how an ObjectReference + that is specific to BackendRef. + properties: + fallback: + description: |- + Fallback indicates whether the backend is designated as a fallback. + Multiple fallback backends can be configured. + It is highly recommended to configure active or passive health checks to ensure that failover can be detected + when the active backends become unhealthy and to automatically readjust once the primary backends are healthy again. + The overprovisioning factor is set to 1.4, meaning the fallback backends will only start receiving traffic when + the health of the active backends falls below 72%. + type: boolean + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + weight: + default: 1 + description: |- + Weight specifies the proportion of requests forwarded to the referenced + backend. This is computed as weight/(sum of all weights in this + BackendRefs list). For non-zero values, there may be some epsilon from + the exact proportion defined here depending on the precision an + implementation supports. Weight is not a percentage and the sum of + weights does not need to equal 100. + + If only one backend is specified and it has a weight greater than 0, 100% + of the traffic is forwarded to that backend. If weight is set to 0, no + traffic should be forwarded for this entry. If unspecified, weight + defaults to 1. + + Support for this field varies based on the context where used. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') + ? has(self.port) : true' + maxItems: 16 + type: array + backendSettings: + description: |- + BackendSettings holds configuration for managing the connection + to the backend. + properties: + circuitBreaker: + description: |- + Circuit Breaker settings for the upstream connections and requests. + If not set, circuit breakers will be enabled with the default thresholds + properties: + maxConnections: + default: 1024 + description: The maximum number of connections + that Envoy will establish to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRequests: + default: 1024 + description: The maximum number of parallel requests + that Envoy will make to the referenced backend + defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRetries: + default: 1024 + description: The maximum number of parallel retries + that Envoy will make to the referenced backend + defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxPendingRequests: + default: 1024 + description: The maximum number of pending requests + that Envoy will queue to the referenced backend + defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxRequestsPerConnection: + description: |- + The maximum number of requests that Envoy will make over a single connection to the referenced backend defined within a xRoute rule. + Default: unlimited. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + perEndpoint: + description: PerEndpoint defines Circuit Breakers + that will apply per-endpoint for an upstream + cluster + properties: + maxConnections: + default: 1024 + description: MaxConnections configures the + maximum number of connections that Envoy + will establish per-endpoint to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + type: object + type: object + connection: + description: Connection includes backend connection + settings. + properties: + bufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + BufferLimit Soft limit on size of the cluster’s connections read and write buffers. + BufferLimit applies to connection streaming (maybe non-streaming) channel between processes, it's in user space. + If unspecified, an implementation defined default is applied (32768 bytes). + For example, 20Mi, 1Gi, 256Ki etc. + Note: that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + preconnect: + description: |- + Preconnect configures proactive upstream connections to reduce latency by establishing + connections before they’re needed and avoiding connection establishment overhead. + + If unset, Envoy will fetch connections as needed to serve in-flight requests. + properties: + perEndpointPercent: + description: |- + PerEndpointPercent configures how many additional connections to maintain per + upstream endpoint, useful for high-QPS or latency sensitive services. Expressed as a + percentage of the connections required by active streams + (e.g. 100 = preconnect disabled, 105 = 1.05x connections per-endpoint, 200 = 2.00×). + + Allowed value range is between 100-300. When both PerEndpointPercent and + PredictivePercent are set, Envoy ensures both are satisfied (max of the two). + format: int32 + maximum: 300 + minimum: 100 + type: integer + predictivePercent: + description: |- + PredictivePercent configures how many additional connections to maintain + across the cluster by anticipating which upstream endpoint the load balancer + will select next, useful for low-QPS services. Relies on deterministic + loadbalancing and is only supported with Random or RoundRobin. + Expressed as a percentage of the connections required by active streams + (e.g. 100 = 1.0 (no preconnect), 105 = 1.05× connections across the cluster, 200 = 2.00×). + + Minimum allowed value is 100. When both PerEndpointPercent and PredictivePercent are + set Envoy ensures both are satisfied per host (max of the two). + format: int32 + minimum: 100 + type: integer + type: object + socketBufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + SocketBufferLimit provides configuration for the maximum buffer size in bytes for each socket + to backend. + SocketBufferLimit applies to socket streaming channel between TCP/IP stacks, it's in kernel space. + For example, 20Mi, 1Gi, 256Ki etc. + Note that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + type: object + dns: + description: DNS includes dns resolution settings. + properties: + dnsRefreshRate: + description: |- + DNSRefreshRate specifies the rate at which DNS records should be refreshed. + Defaults to 30 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + lookupFamily: + description: |- + LookupFamily determines how Envoy would resolve DNS for Routes where the backend is specified as a fully qualified domain name (FQDN). + If set, this configuration overrides other defaults. + enum: + - IPv4 + - IPv6 + - IPv4Preferred + - IPv6Preferred + - IPv4AndIPv6 + type: string + respectDnsTtl: + description: |- + RespectDNSTTL indicates whether the DNS Time-To-Live (TTL) should be respected. + If the value is set to true, the DNS refresh rate will be set to the resource record’s TTL. + Defaults to true. + type: boolean + type: object + healthCheck: + description: HealthCheck allows gateway to perform + active health checking on backends. + properties: + active: + description: Active health check configuration + properties: + grpc: + description: |- + GRPC defines the configuration of the GRPC health checker. + It's optional, and can only be used if the specified type is GRPC. + properties: + service: + description: |- + Service to send in the health check request. + If this is not specified, then the health check request applies to the entire + server and not to a specific service. + type: string + type: object + healthyThreshold: + default: 1 + description: HealthyThreshold defines the + number of healthy health checks required + before a backend host is marked healthy. + format: int32 + minimum: 1 + type: integer + http: + description: |- + HTTP defines the configuration of http health checker. + It's required while the health checker type is HTTP. + properties: + expectedResponse: + description: ExpectedResponse defines + a list of HTTP expected responses to + match. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the type + of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, text + field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? has(self.binary) + : !has(self.binary)' + expectedStatuses: + description: |- + ExpectedStatuses defines a list of HTTP response statuses considered healthy. + Defaults to 200 only + items: + description: HTTPStatus defines the + http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + hostname: + description: |- + Hostname defines the HTTP host that will be requested during health checking. + Default: HTTPRoute or GRPCRoute hostname. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + method: + description: |- + Method defines the HTTP method used for health checking. + Defaults to GET + type: string + path: + description: Path defines the HTTP path + that will be requested during health + checking. + maxLength: 1024 + minLength: 1 + type: string + required: + - path + type: object + initialJitter: + description: |- + InitialJitter defines the maximum time Envoy will wait before the first health check. + Envoy will randomly select a value between 0 and the initial jitter value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + default: 3s + description: Interval defines the time between + active health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + tcp: + description: |- + TCP defines the configuration of tcp health checker. + It's required while the health checker type is TCP. + properties: + receive: + description: Receive defines the expected + response payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the type + of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, text + field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? has(self.binary) + : !has(self.binary)' + send: + description: Send defines the request + payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the type + of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, text + field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? has(self.binary) + : !has(self.binary)' + type: object + timeout: + default: 1s + description: Timeout defines the time to wait + for a health check response. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: + allOf: + - enum: + - HTTP + - TCP + - GRPC + - enum: + - HTTP + - TCP + - GRPC + description: Type defines the type of health + checker. + type: string + unhealthyThreshold: + default: 3 + description: UnhealthyThreshold defines the + number of unhealthy health checks required + before a backend host is marked unhealthy. + format: int32 + minimum: 1 + type: integer + required: + - type + type: object + x-kubernetes-validations: + - message: If Health Checker type is HTTP, http + field needs to be set. + rule: 'self.type == ''HTTP'' ? has(self.http) + : !has(self.http)' + - message: If Health Checker type is TCP, tcp + field needs to be set. + rule: 'self.type == ''TCP'' ? has(self.tcp) + : !has(self.tcp)' + - message: The grpc field can only be set if the + Health Checker type is GRPC. + rule: 'has(self.grpc) ? self.type == ''GRPC'' + : true' + panicThreshold: + description: |- + When number of unhealthy endpoints for a backend reaches this threshold + Envoy will disregard health status and balance across all endpoints. + It's designed to prevent a situation in which host failures cascade throughout the cluster + as load increases. If not set, the default value is 50%. To disable panic mode, set value to `0`. + format: int32 + maximum: 100 + minimum: 0 + type: integer + passive: + description: Passive passive check configuration + properties: + baseEjectionTime: + default: 30s + description: BaseEjectionTime defines the + base duration for which a host will be ejected + on consecutive failures. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + consecutive5XxErrors: + default: 5 + description: Consecutive5xxErrors sets the + number of consecutive 5xx errors triggering + ejection. + format: int32 + type: integer + consecutiveGatewayErrors: + description: ConsecutiveGatewayErrors sets + the number of consecutive gateway errors + triggering ejection. + format: int32 + type: integer + consecutiveLocalOriginFailures: + default: 5 + description: |- + ConsecutiveLocalOriginFailures sets the number of consecutive local origin failures triggering ejection. + Parameter takes effect only when split_external_local_origin_errors is set to true. + format: int32 + type: integer + failurePercentageThreshold: + description: |- + FailurePercentageThreshold sets the failure percentage threshold for outlier detection. + If the failure percentage of a given host is greater than or equal to this value, it will be ejected. + Defaults to 85. + format: int32 + maximum: 100 + minimum: 0 + type: integer + interval: + default: 3s + description: Interval defines the time between + passive health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxEjectionPercent: + default: 10 + description: MaxEjectionPercent sets the maximum + percentage of hosts in a cluster that can + be ejected. + format: int32 + type: integer + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors + enables splitting of errors between external + and local origin. + type: boolean + type: object + type: object + http2: + description: HTTP2 provides HTTP/2 configuration for + backend connections. + properties: + initialConnectionWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialConnectionWindowSize sets the initial window size for HTTP/2 connections. + If not set, the default value is 1 MiB. + x-kubernetes-int-or-string: true + initialStreamWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialStreamWindowSize sets the initial window size for HTTP/2 streams. + If not set, the default value is 64 KiB(64*1024). + x-kubernetes-int-or-string: true + maxConcurrentStreams: + description: |- + MaxConcurrentStreams sets the maximum number of concurrent streams allowed per connection. + If not set, the default value is 100. + format: int32 + maximum: 2147483647 + minimum: 1 + type: integer + onInvalidMessage: + description: |- + OnInvalidMessage determines if Envoy will terminate the connection or just the offending stream in the event of HTTP messaging error + It's recommended for L2 Envoy deployments to set this value to TerminateStream. + https://www.envoyproxy.io/docs/envoy/latest/configuration/best_practices/level_two + Default: TerminateConnection + type: string + type: object + loadBalancer: + description: |- + LoadBalancer policy to apply when routing traffic from the gateway to + the backend endpoints. Defaults to `LeastRequest`. + properties: + consistentHash: + description: |- + ConsistentHash defines the configuration when the load balancer type is + set to ConsistentHash + properties: + cookie: + description: Cookie configures the cookie + hash policy when the consistent hash type + is set to Cookie. + properties: + attributes: + additionalProperties: + type: string + description: Additional Attributes to + set for the generated cookie. + type: object + name: + description: |- + Name of the cookie to hash. + If this cookie does not exist in the request, Envoy will generate a cookie and set + the TTL on the response back to the client based on Layer 4 + attributes of the backend endpoint, to ensure that these future requests + go to the same backend endpoint. Make sure to set the TTL field for this case. + type: string + ttl: + description: |- + TTL of the generated cookie if the cookie is not present. This value sets the + Max-Age attribute value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - name + type: object + header: + description: |- + Header configures the header hash policy when the consistent hash type is set to Header. + + Deprecated: use Headers instead + properties: + name: + description: Name of the header to hash. + type: string + required: + - name + type: object + headers: + description: Headers configures the header + hash policy for each header, when the consistent + hash type is set to Headers. + items: + description: |- + Header defines the header hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the header to hash. + type: string + required: + - name + type: object + type: array + queryParams: + description: QueryParams configures the query + parameter hash policy when the consistent + hash type is set to QueryParams. + items: + description: |- + QueryParam defines the query parameter name hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the query param + to hash. + type: string + required: + - name + type: object + type: array + tableSize: + default: 65537 + description: The table size for consistent + hashing, must be prime number limited to + 5000011. + format: int64 + maximum: 5000011 + minimum: 2 + type: integer + type: + description: |- + ConsistentHashType defines the type of input to hash on. Valid Type values are + "SourceIP", + "Header", + "Headers", + "Cookie". + "QueryParams". + enum: + - SourceIP + - Header + - Headers + - Cookie + - QueryParams + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If consistent hash type is header, + the header field must be set. + rule: 'self.type == ''Header'' ? has(self.header) + : !has(self.header)' + - message: If consistent hash type is headers, + the headers field must be set. + rule: 'self.type == ''Headers'' ? has(self.headers) + : !has(self.headers)' + - message: If consistent hash type is cookie, + the cookie field must be set. + rule: 'self.type == ''Cookie'' ? has(self.cookie) + : !has(self.cookie)' + - message: If consistent hash type is queryParams, + the queryParams field must be set. + rule: 'self.type == ''QueryParams'' ? has(self.queryParams) + : !has(self.queryParams)' + endpointOverride: + description: |- + EndpointOverride defines the configuration for endpoint override. + When specified, the load balancer will attempt to route requests to endpoints + based on the override information extracted from request headers or metadata. + If the override endpoints are not available, the configured load balancer policy will be used as fallback. + properties: + extractFrom: + description: ExtractFrom defines the sources + to extract endpoint override information + from. + items: + description: EndpointOverrideExtractFrom + defines a source to extract endpoint override + information from. + properties: + header: + description: |- + Header defines the header to get the override endpoint addresses. + The header value must specify at least one endpoint in `IP:Port` format or multiple endpoints in `IP:Port,IP:Port,...` format. + For example `10.0.0.5:8080` or `[2600:4040:5204::1574:24ae]:80`. + The IPv6 address is enclosed in square brackets. + type: string + type: object + maxItems: 10 + minItems: 1 + type: array + required: + - extractFrom + type: object + slowStart: + description: |- + SlowStart defines the configuration related to the slow start load balancer policy. + If set, during slow start window, traffic sent to the newly added hosts will gradually increase. + Currently this is only supported for RoundRobin and LeastRequest load balancers + properties: + window: + description: |- + Window defines the duration of the warm up period for newly added host. + During slow start window, traffic sent to the newly added hosts will gradually increase. + Currently only supports linear growth of traffic. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto#config-cluster-v3-cluster-slowstartconfig + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - window + type: object + type: + description: |- + Type decides the type of Load Balancer policy. + Valid LoadBalancerType values are + "ConsistentHash", + "LeastRequest", + "Random", + "RoundRobin". + enum: + - ConsistentHash + - LeastRequest + - Random + - RoundRobin + type: string + zoneAware: + description: ZoneAware defines the configuration + related to the distribution of requests between + locality zones. + properties: + preferLocal: + description: PreferLocalZone configures zone-aware + routing to prefer sending traffic to the + local locality zone. + properties: + force: + description: |- + ForceLocalZone defines override configuration for forcing all traffic to stay within the local zone instead of the default behavior + which maintains equal distribution among upstream endpoints while sending as much traffic as possible locally. + properties: + minEndpointsInZoneThreshold: + description: |- + MinEndpointsInZoneThreshold is the minimum number of upstream endpoints in the local zone required to honor the forceLocalZone + override. This is useful for protecting zones with fewer endpoints. + format: int32 + type: integer + type: object + minEndpointsThreshold: + description: MinEndpointsThreshold is + the minimum number of total upstream + endpoints across all zones required + to enable zone-aware routing. + format: int64 + type: integer + percentageEnabled: + description: Configures percentage of + requests that will be considered for + zone aware routing if zone aware routing + is configured. If not specified, Envoy + defaults to 100%. + format: int32 + maximum: 100 + minimum: 0 + type: integer + type: object + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: If LoadBalancer type is consistentHash, + consistentHash field needs to be set. + rule: 'self.type == ''ConsistentHash'' ? has(self.consistentHash) + : !has(self.consistentHash)' + - message: Currently SlowStart is only supported for + RoundRobin and LeastRequest load balancers. + rule: 'self.type in [''Random'', ''ConsistentHash''] + ? !has(self.slowStart) : true ' + - message: Currently ZoneAware is only supported for + LeastRequest, Random, and RoundRobin load balancers. + rule: 'self.type == ''ConsistentHash'' ? !has(self.zoneAware) + : true ' + proxyProtocol: + description: ProxyProtocol enables the Proxy Protocol + when communicating with the backend. + properties: + version: + description: |- + Version of ProxyProtol + Valid ProxyProtocolVersion values are + "V1" + "V2" + enum: + - V1 + - V2 + type: string + required: + - version + type: object + retry: + description: |- + Retry provides more advanced usage, allowing users to customize the number of retries, retry fallback strategy, and retry triggering conditions. + If not set, retry will be disabled. + properties: + numAttemptsPerPriority: + description: |- + NumAttemptsPerPriority defines the number of requests (initial attempt + retries) + that should be sent to the same priority before switching to a different one. + If not specified or set to 0, all requests are sent to the highest priority that is healthy. + format: int32 + type: integer + numRetries: + default: 2 + description: NumRetries is the number of retries + to be attempted. Defaults to 2. + format: int32 + minimum: 0 + type: integer + perRetry: + description: PerRetry is the retry policy to be + applied per retry attempt. + properties: + backOff: + description: |- + Backoff is the backoff policy to be applied per retry attempt. gateway uses a fully jittered exponential + back-off algorithm for retries. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/router_filter#config-http-filters-router-x-envoy-max-retries + properties: + baseInterval: + description: BaseInterval is the base + interval between retries. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxInterval: + description: |- + MaxInterval is the maximum interval between retries. This parameter is optional, but must be greater than or equal to the base_interval if set. + The default is 10 times the base_interval + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + timeout: + description: Timeout is the timeout per retry + attempt. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + retryOn: + description: |- + RetryOn specifies the retry trigger condition. + + If not specified, the default is to retry on connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes(503). + properties: + httpStatusCodes: + description: |- + HttpStatusCodes specifies the http status codes to be retried. + The retriable-status-codes trigger must also be configured for these status codes to trigger a retry. + items: + description: HTTPStatus defines the http + status code. + maximum: 599 + minimum: 100 + type: integer + type: array + triggers: + description: Triggers specifies the retry + trigger condition(Http/Grpc). + items: + description: TriggerEnum specifies the conditions + that trigger retries. + enum: + - 5xx + - gateway-error + - reset + - reset-before-request + - connect-failure + - retriable-4xx + - refused-stream + - retriable-status-codes + - cancelled + - deadline-exceeded + - internal + - resource-exhausted + - unavailable + type: string + type: array + type: object + type: object + tcpKeepalive: + description: |- + TcpKeepalive settings associated with the upstream client connection. + Disabled by default. + properties: + idleTime: + description: |- + The duration a connection needs to be idle before keep-alive + probes start being sent. + The duration format is + Defaults to `7200s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + description: |- + The duration between keep-alive probes. + Defaults to `75s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + probes: + description: |- + The total number of unacknowledged probes to send before deciding + the connection is dead. + Defaults to 9. + format: int32 + type: integer + type: object + timeout: + description: Timeout settings for the backend connections. + properties: + http: + description: Timeout settings for HTTP. + properties: + connectionIdleTimeout: + description: |- + The idle timeout for an HTTP connection. Idle time is defined as a period in which there are no active requests in the connection. + Default: 1 hour. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxConnectionDuration: + description: |- + The maximum duration of an HTTP connection. + Default: unlimited. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxStreamDuration: + description: |- + MaxStreamDuration is the maximum duration for a stream to complete. This timeout measures the time + from when the request is sent until the response stream is fully consumed and does not apply to + non-streaming requests. + When set to "0s", no max duration is applied and streams can run indefinitely. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + requestTimeout: + description: RequestTimeout is the time until + which entire response is received from the + upstream. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + tcp: + description: Timeout settings for TCP. + properties: + connectTimeout: + description: |- + The timeout for network connection establishment, including TCP and TLS handshakes. + Default: 10 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + type: object + type: object + x-kubernetes-validations: + - message: predictivePercent in preconnect policy only + works with RoundRobin or Random load balancers + rule: '!((has(self.connection) && has(self.connection.preconnect) + && has(self.connection.preconnect.predictivePercent)) + && !(has(self.loadBalancer) && has(self.loadBalancer.type) + && self.loadBalancer.type in [''Random'', ''RoundRobin'']))' + headersToBackend: + description: |- + HeadersToBackend are the authorization response headers that will be added + to the original client request before sending it to the backend server. + Note that coexisting headers will be overridden. + If not specified, no authorization response headers will be added to the + original client request. + items: + type: string + type: array + path: + description: |- + Path is the path of the HTTP External Authorization service. + If path is specified, the authorization request will be sent to that path, + or else the authorization request will use the path of the original request. + + Please note that the original request path will be appended to the path specified here. + For example, if the original request path is "/hello", and the path specified here is "/auth", + then the path of the authorization request will be "/auth/hello". If the path is not specified, + the path of the authorization request will be "/hello". + type: string + type: object + x-kubernetes-validations: + - message: backendRef or backendRefs needs to be set + rule: has(self.backendRef) || self.backendRefs.size() > + 0 + - message: BackendRefs only supports Service, ServiceImport, + and Backend kind. + rule: 'has(self.backendRefs) ? self.backendRefs.all(f, f.kind + == ''Service'' || f.kind == ''ServiceImport'' || f.kind + == ''Backend'') : true' + - message: BackendRefs only supports Core, multicluster.x-k8s.io, + and gateway.envoyproxy.io groups. + rule: 'has(self.backendRefs) ? (self.backendRefs.all(f, + f.group == "" || f.group == ''multicluster.x-k8s.io'' + || f.group == ''gateway.envoyproxy.io'')) : true' + recomputeRoute: + description: |- + RecomputeRoute clears the route cache and recalculates the routing decision. + This field must be enabled if the headers added or modified by the ExtAuth are used for + route matching decisions. If the recomputation selects a new route, features targeting + the new matched route will be applied. + type: boolean + timeout: + description: |- + Timeout defines the timeout for requests to the external authorization service. + If not specified, defaults to 10 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + x-kubernetes-validations: + - message: one of grpc or http must be specified + rule: (has(self.grpc) || has(self.http)) + - message: only one of grpc or http can be specified + rule: (has(self.grpc) && !has(self.http)) || (!has(self.grpc) + && has(self.http)) + oauth: + description: OAuth defines the configuration for the MCP spec + compatible OAuth authentication. + properties: + audiences: + description: |- + Audiences is a list of JWT audiences allowed access. + It is recommended to set this field for token audience validation, as it is a security best practice to prevent token misuse. + Reference: https://modelcontextprotocol.io/specification/2025-06-18/basic/authorization#token-audience-binding-and-validation + items: + type: string + maxItems: 32 + type: array + claimToHeaders: + description: |- + ClaimToHeaders specifies JWT claims to extract and forward as HTTP headers to backend MCP servers. + This enables backends to access user identity for authorization, auditing, or personalization. + + Security considerations: + - Any client-provided headers matching the configured header names will be stripped to prevent forgery + - Only the specified claims are extracted; the full JWT is not forwarded to backends + - Consider using a header prefix (e.g., "X-Jwt-Claim-") to avoid conflicts with other headers + items: + description: ClaimToHeader defines a configuration to convert + JWT claims into HTTP headers + properties: + claim: + description: |- + Claim is the JWT Claim that should be saved into the header : it can be a nested claim of type + (eg. "claim.nested.key", "sub"). The nested claim name must use dot "." + to separate the JSON name path. + type: string + header: + description: Header defines the name of the HTTP request + header that the JWT Claim will be saved into. + type: string + required: + - claim + - header + type: object + maxItems: 16 + type: array + issuer: + description: Issuer is the authorization server's issuer identity. + format: uri + type: string + jwks: + description: |- + JWKS defines how a JSON Web Key Sets (JWKS) can be obtained to verify the access tokens presented by the clients. + + If not specified, the JWKS URI will be discovered from the OAuth 2.0 Authorization Server Metadata + as per RFC 8414 by querying the `/.well-known/oauth-authorization-server` endpoint on the Issuer. + properties: + localJWKS: + description: LocalJWKS defines how to get the JSON Web + Key Sets (JWKS) from a local source. + properties: + inline: + description: Inline contains the value as an inline + string. + type: string + type: + default: Inline + description: |- + Type is the type of method to use to read the body value. + Valid values are Inline and ValueRef, default is Inline. + enum: + - Inline + - ValueRef + type: string + valueRef: + description: |- + ValueRef is a reference to a local ConfigMap that contains the JSON Web Key Sets (JWKS). + + The value of key `jwks` in the ConfigMap will be used. + If the key is not found, the first value in the ConfigMap will be used. + properties: + group: + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + description: Kind is kind of the referent. For + example "HTTPRoute" or "Service". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + required: + - group + - kind + - name + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: Exactly one of inline or valueRef must be set + with correct type. + rule: (self.type == 'Inline' && has(self.inline) && + !has(self.valueRef)) || (self.type == 'ValueRef' && + !has(self.inline) && has(self.valueRef)) + remoteJWKS: + description: |- + RemoteJWKS defines how to fetch and cache JSON Web Key Sets (JWKS) from a remote + HTTP/HTTPS endpoint. + properties: + backendRef: + description: |- + BackendRef references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + + Deprecated: Use BackendRefs instead. + properties: + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == ''Service'') + ? has(self.port) : true' + backendRefs: + description: |- + BackendRefs references a Kubernetes object that represents the + backend server to which the authorization request will be sent. + items: + description: BackendRef defines how an ObjectReference + that is specific to BackendRef. + properties: + fallback: + description: |- + Fallback indicates whether the backend is designated as a fallback. + Multiple fallback backends can be configured. + It is highly recommended to configure active or passive health checks to ensure that failover can be detected + when the active backends become unhealthy and to automatically readjust once the primary backends are healthy again. + The overprovisioning factor is set to 1.4, meaning the fallback backends will only start receiving traffic when + the health of the active backends falls below 72%. + type: boolean + group: + default: "" + description: |- + Group is the group of the referent. For example, "gateway.networking.k8s.io". + When unspecified or empty string, core API group is inferred. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the backend. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details. + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port specifies the destination port number to use for this resource. + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + weight: + default: 1 + description: |- + Weight specifies the proportion of requests forwarded to the referenced + backend. This is computed as weight/(sum of all weights in this + BackendRefs list). For non-zero values, there may be some epsilon from + the exact proportion defined here depending on the precision an + implementation supports. Weight is not a percentage and the sum of + weights does not need to equal 100. + + If only one backend is specified and it has a weight greater than 0, 100% + of the traffic is forwarded to that backend. If weight is set to 0, no + traffic should be forwarded for this entry. If unspecified, weight + defaults to 1. + + Support for this field varies based on the context where used. + format: int32 + maximum: 1000000 + minimum: 0 + type: integer + required: + - name + type: object + x-kubernetes-validations: + - message: Must have port for Service reference + rule: '(size(self.group) == 0 && self.kind == + ''Service'') ? has(self.port) : true' + maxItems: 16 + type: array + backendSettings: + description: |- + BackendSettings holds configuration for managing the connection + to the backend. + properties: + circuitBreaker: + description: |- + Circuit Breaker settings for the upstream connections and requests. + If not set, circuit breakers will be enabled with the default thresholds + properties: + maxConnections: + default: 1024 + description: The maximum number of connections + that Envoy will establish to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRequests: + default: 1024 + description: The maximum number of parallel + requests that Envoy will make to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxParallelRetries: + default: 1024 + description: The maximum number of parallel + retries that Envoy will make to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxPendingRequests: + default: 1024 + description: The maximum number of pending + requests that Envoy will queue to the referenced + backend defined within a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + maxRequestsPerConnection: + description: |- + The maximum number of requests that Envoy will make over a single connection to the referenced backend defined within a xRoute rule. + Default: unlimited. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + perEndpoint: + description: PerEndpoint defines Circuit Breakers + that will apply per-endpoint for an upstream + cluster + properties: + maxConnections: + default: 1024 + description: MaxConnections configures + the maximum number of connections that + Envoy will establish per-endpoint to + the referenced backend defined within + a xRoute rule. + format: int64 + maximum: 4294967295 + minimum: 0 + type: integer + type: object + type: object + connection: + description: Connection includes backend connection + settings. + properties: + bufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + BufferLimit Soft limit on size of the cluster’s connections read and write buffers. + BufferLimit applies to connection streaming (maybe non-streaming) channel between processes, it's in user space. + If unspecified, an implementation defined default is applied (32768 bytes). + For example, 20Mi, 1Gi, 256Ki etc. + Note: that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + preconnect: + description: |- + Preconnect configures proactive upstream connections to reduce latency by establishing + connections before they’re needed and avoiding connection establishment overhead. + + If unset, Envoy will fetch connections as needed to serve in-flight requests. + properties: + perEndpointPercent: + description: |- + PerEndpointPercent configures how many additional connections to maintain per + upstream endpoint, useful for high-QPS or latency sensitive services. Expressed as a + percentage of the connections required by active streams + (e.g. 100 = preconnect disabled, 105 = 1.05x connections per-endpoint, 200 = 2.00×). + + Allowed value range is between 100-300. When both PerEndpointPercent and + PredictivePercent are set, Envoy ensures both are satisfied (max of the two). + format: int32 + maximum: 300 + minimum: 100 + type: integer + predictivePercent: + description: |- + PredictivePercent configures how many additional connections to maintain + across the cluster by anticipating which upstream endpoint the load balancer + will select next, useful for low-QPS services. Relies on deterministic + loadbalancing and is only supported with Random or RoundRobin. + Expressed as a percentage of the connections required by active streams + (e.g. 100 = 1.0 (no preconnect), 105 = 1.05× connections across the cluster, 200 = 2.00×). + + Minimum allowed value is 100. When both PerEndpointPercent and PredictivePercent are + set Envoy ensures both are satisfied per host (max of the two). + format: int32 + minimum: 100 + type: integer + type: object + socketBufferLimit: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + SocketBufferLimit provides configuration for the maximum buffer size in bytes for each socket + to backend. + SocketBufferLimit applies to socket streaming channel between TCP/IP stacks, it's in kernel space. + For example, 20Mi, 1Gi, 256Ki etc. + Note that when the suffix is not provided, the value is interpreted as bytes. + x-kubernetes-int-or-string: true + type: object + dns: + description: DNS includes dns resolution settings. + properties: + dnsRefreshRate: + description: |- + DNSRefreshRate specifies the rate at which DNS records should be refreshed. + Defaults to 30 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + lookupFamily: + description: |- + LookupFamily determines how Envoy would resolve DNS for Routes where the backend is specified as a fully qualified domain name (FQDN). + If set, this configuration overrides other defaults. + enum: + - IPv4 + - IPv6 + - IPv4Preferred + - IPv6Preferred + - IPv4AndIPv6 + type: string + respectDnsTtl: + description: |- + RespectDNSTTL indicates whether the DNS Time-To-Live (TTL) should be respected. + If the value is set to true, the DNS refresh rate will be set to the resource record’s TTL. + Defaults to true. + type: boolean + type: object + healthCheck: + description: HealthCheck allows gateway to perform + active health checking on backends. + properties: + active: + description: Active health check configuration + properties: + grpc: + description: |- + GRPC defines the configuration of the GRPC health checker. + It's optional, and can only be used if the specified type is GRPC. + properties: + service: + description: |- + Service to send in the health check request. + If this is not specified, then the health check request applies to the entire + server and not to a specific service. + type: string + type: object + healthyThreshold: + default: 1 + description: HealthyThreshold defines + the number of healthy health checks + required before a backend host is marked + healthy. + format: int32 + minimum: 1 + type: integer + http: + description: |- + HTTP defines the configuration of http health checker. + It's required while the health checker type is HTTP. + properties: + expectedResponse: + description: ExpectedResponse defines + a list of HTTP expected responses + to match. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + expectedStatuses: + description: |- + ExpectedStatuses defines a list of HTTP response statuses considered healthy. + Defaults to 200 only + items: + description: HTTPStatus defines + the http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + hostname: + description: |- + Hostname defines the HTTP host that will be requested during health checking. + Default: HTTPRoute or GRPCRoute hostname. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + method: + description: |- + Method defines the HTTP method used for health checking. + Defaults to GET + type: string + path: + description: Path defines the HTTP + path that will be requested during + health checking. + maxLength: 1024 + minLength: 1 + type: string + required: + - path + type: object + initialJitter: + description: |- + InitialJitter defines the maximum time Envoy will wait before the first health check. + Envoy will randomly select a value between 0 and the initial jitter value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + default: 3s + description: Interval defines the time + between active health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + tcp: + description: |- + TCP defines the configuration of tcp health checker. + It's required while the health checker type is TCP. + properties: + receive: + description: Receive defines the expected + response payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + send: + description: Send defines the request + payload. + properties: + binary: + description: Binary payload base64 + encoded. + format: byte + type: string + text: + description: Text payload in plain + text. + type: string + type: + allOf: + - enum: + - Text + - Binary + - enum: + - Text + - Binary + description: Type defines the + type of the payload. + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If payload type is Text, + text field needs to be set. + rule: 'self.type == ''Text'' ? has(self.text) + : !has(self.text)' + - message: If payload type is Binary, + binary field needs to be set. + rule: 'self.type == ''Binary'' ? + has(self.binary) : !has(self.binary)' + type: object + timeout: + default: 1s + description: Timeout defines the time + to wait for a health check response. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: + allOf: + - enum: + - HTTP + - TCP + - GRPC + - enum: + - HTTP + - TCP + - GRPC + description: Type defines the type of + health checker. + type: string + unhealthyThreshold: + default: 3 + description: UnhealthyThreshold defines + the number of unhealthy health checks + required before a backend host is marked + unhealthy. + format: int32 + minimum: 1 + type: integer + required: + - type + type: object + x-kubernetes-validations: + - message: If Health Checker type is HTTP, + http field needs to be set. + rule: 'self.type == ''HTTP'' ? has(self.http) + : !has(self.http)' + - message: If Health Checker type is TCP, + tcp field needs to be set. + rule: 'self.type == ''TCP'' ? has(self.tcp) + : !has(self.tcp)' + - message: The grpc field can only be set + if the Health Checker type is GRPC. + rule: 'has(self.grpc) ? self.type == ''GRPC'' + : true' + panicThreshold: + description: |- + When number of unhealthy endpoints for a backend reaches this threshold + Envoy will disregard health status and balance across all endpoints. + It's designed to prevent a situation in which host failures cascade throughout the cluster + as load increases. If not set, the default value is 50%. To disable panic mode, set value to `0`. + format: int32 + maximum: 100 + minimum: 0 + type: integer + passive: + description: Passive passive check configuration + properties: + baseEjectionTime: + default: 30s + description: BaseEjectionTime defines + the base duration for which a host will + be ejected on consecutive failures. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + consecutive5XxErrors: + default: 5 + description: Consecutive5xxErrors sets + the number of consecutive 5xx errors + triggering ejection. + format: int32 + type: integer + consecutiveGatewayErrors: + description: ConsecutiveGatewayErrors + sets the number of consecutive gateway + errors triggering ejection. + format: int32 + type: integer + consecutiveLocalOriginFailures: + default: 5 + description: |- + ConsecutiveLocalOriginFailures sets the number of consecutive local origin failures triggering ejection. + Parameter takes effect only when split_external_local_origin_errors is set to true. + format: int32 + type: integer + failurePercentageThreshold: + description: |- + FailurePercentageThreshold sets the failure percentage threshold for outlier detection. + If the failure percentage of a given host is greater than or equal to this value, it will be ejected. + Defaults to 85. + format: int32 + maximum: 100 + minimum: 0 + type: integer + interval: + default: 3s + description: Interval defines the time + between passive health checks. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxEjectionPercent: + default: 10 + description: MaxEjectionPercent sets the + maximum percentage of hosts in a cluster + that can be ejected. + format: int32 + type: integer + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors + enables splitting of errors between + external and local origin. + type: boolean + type: object + type: object + http2: + description: HTTP2 provides HTTP/2 configuration + for backend connections. + properties: + initialConnectionWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialConnectionWindowSize sets the initial window size for HTTP/2 connections. + If not set, the default value is 1 MiB. + x-kubernetes-int-or-string: true + initialStreamWindowSize: + allOf: + - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + - pattern: ^[1-9]+[0-9]*([EPTGMK]i|[EPTGMk])?$ + anyOf: + - type: integer + - type: string + description: |- + InitialStreamWindowSize sets the initial window size for HTTP/2 streams. + If not set, the default value is 64 KiB(64*1024). + x-kubernetes-int-or-string: true + maxConcurrentStreams: + description: |- + MaxConcurrentStreams sets the maximum number of concurrent streams allowed per connection. + If not set, the default value is 100. + format: int32 + maximum: 2147483647 + minimum: 1 + type: integer + onInvalidMessage: + description: |- + OnInvalidMessage determines if Envoy will terminate the connection or just the offending stream in the event of HTTP messaging error + It's recommended for L2 Envoy deployments to set this value to TerminateStream. + https://www.envoyproxy.io/docs/envoy/latest/configuration/best_practices/level_two + Default: TerminateConnection + type: string + type: object + loadBalancer: + description: |- + LoadBalancer policy to apply when routing traffic from the gateway to + the backend endpoints. Defaults to `LeastRequest`. + properties: + consistentHash: + description: |- + ConsistentHash defines the configuration when the load balancer type is + set to ConsistentHash + properties: + cookie: + description: Cookie configures the cookie + hash policy when the consistent hash + type is set to Cookie. + properties: + attributes: + additionalProperties: + type: string + description: Additional Attributes + to set for the generated cookie. + type: object + name: + description: |- + Name of the cookie to hash. + If this cookie does not exist in the request, Envoy will generate a cookie and set + the TTL on the response back to the client based on Layer 4 + attributes of the backend endpoint, to ensure that these future requests + go to the same backend endpoint. Make sure to set the TTL field for this case. + type: string + ttl: + description: |- + TTL of the generated cookie if the cookie is not present. This value sets the + Max-Age attribute value. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - name + type: object + header: + description: |- + Header configures the header hash policy when the consistent hash type is set to Header. + + Deprecated: use Headers instead + properties: + name: + description: Name of the header to + hash. + type: string + required: + - name + type: object + headers: + description: Headers configures the header + hash policy for each header, when the + consistent hash type is set to Headers. + items: + description: |- + Header defines the header hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the header + to hash. + type: string + required: + - name + type: object + type: array + queryParams: + description: QueryParams configures the + query parameter hash policy when the + consistent hash type is set to QueryParams. + items: + description: |- + QueryParam defines the query parameter name hashing configuration for consistent hash based + load balancing. + properties: + name: + description: Name of the query param + to hash. + type: string + required: + - name + type: object + type: array + tableSize: + default: 65537 + description: The table size for consistent + hashing, must be prime number limited + to 5000011. + format: int64 + maximum: 5000011 + minimum: 2 + type: integer + type: + description: |- + ConsistentHashType defines the type of input to hash on. Valid Type values are + "SourceIP", + "Header", + "Headers", + "Cookie". + "QueryParams". + enum: + - SourceIP + - Header + - Headers + - Cookie + - QueryParams + type: string + required: + - type + type: object + x-kubernetes-validations: + - message: If consistent hash type is header, + the header field must be set. + rule: 'self.type == ''Header'' ? has(self.header) + : !has(self.header)' + - message: If consistent hash type is headers, + the headers field must be set. + rule: 'self.type == ''Headers'' ? has(self.headers) + : !has(self.headers)' + - message: If consistent hash type is cookie, + the cookie field must be set. + rule: 'self.type == ''Cookie'' ? has(self.cookie) + : !has(self.cookie)' + - message: If consistent hash type is queryParams, + the queryParams field must be set. + rule: 'self.type == ''QueryParams'' ? has(self.queryParams) + : !has(self.queryParams)' + endpointOverride: + description: |- + EndpointOverride defines the configuration for endpoint override. + When specified, the load balancer will attempt to route requests to endpoints + based on the override information extracted from request headers or metadata. + If the override endpoints are not available, the configured load balancer policy will be used as fallback. + properties: + extractFrom: + description: ExtractFrom defines the sources + to extract endpoint override information + from. + items: + description: EndpointOverrideExtractFrom + defines a source to extract endpoint + override information from. + properties: + header: + description: |- + Header defines the header to get the override endpoint addresses. + The header value must specify at least one endpoint in `IP:Port` format or multiple endpoints in `IP:Port,IP:Port,...` format. + For example `10.0.0.5:8080` or `[2600:4040:5204::1574:24ae]:80`. + The IPv6 address is enclosed in square brackets. + type: string + type: object + maxItems: 10 + minItems: 1 + type: array + required: + - extractFrom + type: object + slowStart: + description: |- + SlowStart defines the configuration related to the slow start load balancer policy. + If set, during slow start window, traffic sent to the newly added hosts will gradually increase. + Currently this is only supported for RoundRobin and LeastRequest load balancers + properties: + window: + description: |- + Window defines the duration of the warm up period for newly added host. + During slow start window, traffic sent to the newly added hosts will gradually increase. + Currently only supports linear growth of traffic. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/api-v3/config/cluster/v3/cluster.proto#config-cluster-v3-cluster-slowstartconfig + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + required: + - window + type: object + type: + description: |- + Type decides the type of Load Balancer policy. + Valid LoadBalancerType values are + "ConsistentHash", + "LeastRequest", + "Random", + "RoundRobin". + enum: + - ConsistentHash + - LeastRequest + - Random + - RoundRobin + type: string + zoneAware: + description: ZoneAware defines the configuration + related to the distribution of requests + between locality zones. + properties: + preferLocal: + description: PreferLocalZone configures + zone-aware routing to prefer sending + traffic to the local locality zone. + properties: + force: + description: |- + ForceLocalZone defines override configuration for forcing all traffic to stay within the local zone instead of the default behavior + which maintains equal distribution among upstream endpoints while sending as much traffic as possible locally. + properties: + minEndpointsInZoneThreshold: + description: |- + MinEndpointsInZoneThreshold is the minimum number of upstream endpoints in the local zone required to honor the forceLocalZone + override. This is useful for protecting zones with fewer endpoints. + format: int32 + type: integer + type: object + minEndpointsThreshold: + description: MinEndpointsThreshold + is the minimum number of total upstream + endpoints across all zones required + to enable zone-aware routing. + format: int64 + type: integer + percentageEnabled: + description: Configures percentage + of requests that will be considered + for zone aware routing if zone aware + routing is configured. If not specified, + Envoy defaults to 100%. + format: int32 + maximum: 100 + minimum: 0 + type: integer + type: object + type: object + required: + - type + type: object + x-kubernetes-validations: + - message: If LoadBalancer type is consistentHash, + consistentHash field needs to be set. + rule: 'self.type == ''ConsistentHash'' ? has(self.consistentHash) + : !has(self.consistentHash)' + - message: Currently SlowStart is only supported + for RoundRobin and LeastRequest load balancers. + rule: 'self.type in [''Random'', ''ConsistentHash''] + ? !has(self.slowStart) : true ' + - message: Currently ZoneAware is only supported + for LeastRequest, Random, and RoundRobin load + balancers. + rule: 'self.type == ''ConsistentHash'' ? !has(self.zoneAware) + : true ' + proxyProtocol: + description: ProxyProtocol enables the Proxy Protocol + when communicating with the backend. + properties: + version: + description: |- + Version of ProxyProtol + Valid ProxyProtocolVersion values are + "V1" + "V2" + enum: + - V1 + - V2 + type: string + required: + - version + type: object + retry: + description: |- + Retry provides more advanced usage, allowing users to customize the number of retries, retry fallback strategy, and retry triggering conditions. + If not set, retry will be disabled. + properties: + numAttemptsPerPriority: + description: |- + NumAttemptsPerPriority defines the number of requests (initial attempt + retries) + that should be sent to the same priority before switching to a different one. + If not specified or set to 0, all requests are sent to the highest priority that is healthy. + format: int32 + type: integer + numRetries: + default: 2 + description: NumRetries is the number of retries + to be attempted. Defaults to 2. + format: int32 + minimum: 0 + type: integer + perRetry: + description: PerRetry is the retry policy + to be applied per retry attempt. + properties: + backOff: + description: |- + Backoff is the backoff policy to be applied per retry attempt. gateway uses a fully jittered exponential + back-off algorithm for retries. For additional details, + see https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/router_filter#config-http-filters-router-x-envoy-max-retries + properties: + baseInterval: + description: BaseInterval is the base + interval between retries. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxInterval: + description: |- + MaxInterval is the maximum interval between retries. This parameter is optional, but must be greater than or equal to the base_interval if set. + The default is 10 times the base_interval + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + timeout: + description: Timeout is the timeout per + retry attempt. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + retryOn: + description: |- + RetryOn specifies the retry trigger condition. + + If not specified, the default is to retry on connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes(503). + properties: + httpStatusCodes: + description: |- + HttpStatusCodes specifies the http status codes to be retried. + The retriable-status-codes trigger must also be configured for these status codes to trigger a retry. + items: + description: HTTPStatus defines the + http status code. + maximum: 599 + minimum: 100 + type: integer + type: array + triggers: + description: Triggers specifies the retry + trigger condition(Http/Grpc). + items: + description: TriggerEnum specifies the + conditions that trigger retries. + enum: + - 5xx + - gateway-error + - reset + - reset-before-request + - connect-failure + - retriable-4xx + - refused-stream + - retriable-status-codes + - cancelled + - deadline-exceeded + - internal + - resource-exhausted + - unavailable + type: string + type: array + type: object + type: object + tcpKeepalive: + description: |- + TcpKeepalive settings associated with the upstream client connection. + Disabled by default. + properties: + idleTime: + description: |- + The duration a connection needs to be idle before keep-alive + probes start being sent. + The duration format is + Defaults to `7200s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + interval: + description: |- + The duration between keep-alive probes. + Defaults to `75s`. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + probes: + description: |- + The total number of unacknowledged probes to send before deciding + the connection is dead. + Defaults to 9. + format: int32 + type: integer + type: object + timeout: + description: Timeout settings for the backend + connections. + properties: + http: + description: Timeout settings for HTTP. + properties: + connectionIdleTimeout: + description: |- + The idle timeout for an HTTP connection. Idle time is defined as a period in which there are no active requests in the connection. + Default: 1 hour. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxConnectionDuration: + description: |- + The maximum duration of an HTTP connection. + Default: unlimited. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + maxStreamDuration: + description: |- + MaxStreamDuration is the maximum duration for a stream to complete. This timeout measures the time + from when the request is sent until the response stream is fully consumed and does not apply to + non-streaming requests. + When set to "0s", no max duration is applied and streams can run indefinitely. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + requestTimeout: + description: RequestTimeout is the time + until which entire response is received + from the upstream. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + tcp: + description: Timeout settings for TCP. + properties: + connectTimeout: + description: |- + The timeout for network connection establishment, including TCP and TLS handshakes. + Default: 10 seconds. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + type: object + type: object + type: object + x-kubernetes-validations: + - message: predictivePercent in preconnect policy + only works with RoundRobin or Random load balancers + rule: '!((has(self.connection) && has(self.connection.preconnect) + && has(self.connection.preconnect.predictivePercent)) + && !(has(self.loadBalancer) && has(self.loadBalancer.type) + && self.loadBalancer.type in [''Random'', ''RoundRobin'']))' + cacheDuration: + default: 300s + description: |- + Duration is a string value representing a duration in time. The format is as specified + in GEP-2257, a strict subset of the syntax parsed by Golang time.ParseDuration. + pattern: ^([0-9]{1,5}(h|m|s|ms)){1,4}$ + type: string + uri: + description: |- + URI is the HTTPS URI to fetch the JWKS. Envoy's system trust bundle is used to validate the server certificate. + If a custom trust bundle is needed, it can be specified in a BackendTLSConfig resource and target the BackendRefs. + maxLength: 253 + minLength: 1 + type: string + required: + - uri + type: object + x-kubernetes-validations: + - message: BackendRefs must be used, backendRef is not + supported. + rule: '!has(self.backendRef)' + - message: Retry timeout is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.perRetry)? + !has(self.backendSettings.retry.perRetry.timeout):true):true):true + - message: HTTPStatusCodes is not supported. + rule: has(self.backendSettings)? (has(self.backendSettings.retry)?(has(self.backendSettings.retry.retryOn)? + !has(self.backendSettings.retry.retryOn.httpStatusCodes):true):true):true + type: object + x-kubernetes-validations: + - message: either remoteJWKS or localJWKS must be specified. + rule: has(self.remoteJWKS) || has(self.localJWKS) + - message: remoteJWKS and localJWKS cannot both be specified. + rule: '!(has(self.remoteJWKS) && has(self.localJWKS))' + protectedResourceMetadata: + description: |- + ProtectedResourceMetadata defines the OAuth 2.0 Resource Server Metadata as per RFC 8414. + This is used to expose the metadata endpoint for mcp clients to discover the authorization servers, + supported scopes, and JWKS URI. + properties: + resource: + description: |- + Resource is the identifier of the protected resource. + This should match the MCPRoute's URL. For example, if the MCPRoute's URL is + "https://api.example.com/mcp", the Resource should be "https://api.example.com/mcp". + format: uri + type: string + resourceDocumentation: + description: ResourceDocumentation is a URL that provides + human-readable documentation for the resource. + format: uri + type: string + resourceName: + description: ResourceName is a human-readable name for + the protected resource. + maxLength: 256 + type: string + resourcePolicyUri: + description: ResourcePolicyURI is a URL that points to + the resource server's policy document. + format: uri + type: string + resourceSigningAlgValuesSupported: + description: |- + ResourceSigningAlgValuesSupported is a list of JWS signing algorithms supported by the resource server. + These algorithms are used in the "alg" field of the JOSE header in signed tokens. + items: + type: string + maxItems: 16 + minItems: 1 + type: array + scopesSupported: + description: |- + ScopesSupported defines the minimal set of scopes required for the basic functionality of the MCPRoute. + It should avoid broad or overly permissive scopes to prevent clients from requesting tokens with excessive privileges. + + If an operation requires additional scopes that are not present in the access token, the client will receive a + 403 Forbidden response that includes the required scopes in the `scope` field of the `WWW-Authenticate` header. + This enables incremental privilege elevation through targeted `WWW-Authenticate: scope="..."` challenges when + privileged operations are first attempted. + items: + type: string + maxItems: 32 + type: array + required: + - resource + type: object + required: + - issuer + - protectedResourceMetadata + type: object + type: object + x-kubernetes-validations: + - message: oauth must be configured when any authorization rule uses + a jwt source + rule: '!(has(self.authorization) && self.authorization.rules.exists(r, + has(r.source) && has(r.source.jwt)) && !has(self.oauth))' + required: + - backendRefs + - parentRefs + type: object + status: + description: Status defines the status details of the MCPRoute. + properties: + conditions: + description: |- + Conditions is the list of conditions by the reconciliation result. + Currently, at most one condition is set. + + Known .status.conditions.type are: "Accepted", "NotAccepted". + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_quotapolicies.yaml b/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_quotapolicies.yaml new file mode 100644 index 00000000..540c17cd --- /dev/null +++ b/sources/envoy-ai-gateway-crds/v0.6.0/templates/aigateway.envoyproxy.io_quotapolicies.yaml @@ -0,0 +1,497 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.0 + labels: + gateway.networking.k8s.io/policy: direct + name: quotapolicies.aigateway.envoyproxy.io +spec: + group: aigateway.envoyproxy.io + names: + kind: QuotaPolicy + listKind: QuotaPolicyList + plural: quotapolicies + singular: quotapolicy + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.conditions[-1:].type + name: Status + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + QuotaPolicy specifies token quota configuration for inference services. + Providing a list of backends in the AIGatewayRouteRule allows failover to a different service + if token quota for a service had been exceeded. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: QuotaPolicySpec specifies rules for computing token based + costs of requests. + properties: + perModelQuotas: + description: |- + PerModelQuotas specifies quota for different models served by the AIServiceBackend(s) where this + policy is attached. + items: + properties: + modelName: + description: Model name for which the quota is specified. + minLength: 1 + type: string + quota: + description: Expression for computing request cost and rules + for matching requests to quota buckets. + properties: + bucketRules: + description: |- + BucketRules are a list of client selectors and quotas. If a request + matches multiple rules, each of their associated quotas get applied, so a + single request might burn down the quota for multiple rules. + items: + properties: + clientSelectors: + description: |- + ClientSelectors holds the list of conditions to select + specific clients using attributes from the traffic flow. + All individual select conditions must hold True for this rule + and its limit to be applied. + + If no client selectors are specified, the rule applies to all traffic of + the targeted AIServiceBackend. + items: + description: |- + RateLimitSelectCondition specifies the attributes within the traffic flow that can + be used to select a subset of clients to be ratelimited. + All the individual conditions must hold True for the overall condition to hold True. + And, at least one of headers or methods or path or sourceCIDR or queryParams condition must be specified. + properties: + headers: + description: |- + Headers is a list of request headers to match. Multiple header values are ANDed together, + meaning, a request MUST match all the specified headers. + items: + description: HeaderMatch defines the match + attributes within the HTTP Headers of the + request. + properties: + invert: + default: false + description: |- + Invert specifies whether the value match result will be inverted. + Do not set this field when Type="Distinct", implying matching on any/all unique + values within the header. + type: boolean + name: + description: |- + Name of the HTTP header. + The header name is case-insensitive unless PreserveHeaderCase is set to true. + For example, "Foo" and "foo" are considered the same header. + maxLength: 256 + minLength: 1 + type: string + type: + default: Exact + description: Type specifies how to match + against the value of the header. + enum: + - Exact + - RegularExpression + - Distinct + type: string + value: + description: |- + Value within the HTTP header. + Do not set this field when Type="Distinct", implying matching on any/all unique + values within the header. + maxLength: 1024 + type: string + required: + - name + type: object + maxItems: 16 + type: array + methods: + description: |- + Methods is a list of request methods to match. Multiple method values are ORed together, + meaning, a request can match any one of the specified methods. If not specified, it matches all methods. + items: + description: MethodMatch defines the matching + criteria for the HTTP method of a request. + properties: + invert: + default: false + description: Invert specifies whether + the value match result will be inverted. + type: boolean + value: + description: Value specifies the HTTP + method. + enum: + - GET + - HEAD + - POST + - PUT + - DELETE + - CONNECT + - OPTIONS + - TRACE + - PATCH + type: string + required: + - value + type: object + type: array + path: + description: |- + Path is the request path to match. + Support Exact, PathPrefix and RegularExpression match types. + properties: + invert: + default: false + description: Invert specifies whether the + value match result will be inverted. + type: boolean + type: + default: PathPrefix + description: Type specifies how to match + against the value of the path. + enum: + - Exact + - PathPrefix + - RegularExpression + type: string + value: + default: / + description: Value specifies the HTTP path. + maxLength: 1024 + type: string + required: + - value + type: object + queryParams: + description: |- + QueryParams is a list of query parameters to match. Multiple query parameter values are ANDed together, + meaning, a request MUST match all the specified query parameters. + items: + description: QueryParamMatch defines the match + attributes within the query parameters of + the request. + properties: + invert: + default: false + description: |- + Invert specifies whether the value match result will be inverted. + Do not set this field when Type="Distinct", implying matching on any/all unique + values within the query parameter. + type: boolean + name: + description: Name of the query parameter. + maxLength: 256 + minLength: 1 + type: string + type: + default: Exact + description: Type specifies how to match + against the value of the query parameter. + enum: + - Exact + - RegularExpression + - Distinct + type: string + value: + description: |- + Value of the query parameter. + Do not set this field when Type="Distinct", implying matching on any/all unique + values within the query parameter. + maxLength: 1024 + type: string + required: + - name + type: object + maxItems: 16 + type: array + sourceCIDR: + description: SourceCIDR is the client IP Address + range to match on. + properties: + type: + default: Exact + enum: + - Exact + - Distinct + type: string + value: + description: |- + Value is the IP CIDR that represents the range of Source IP Addresses of the client. + These could also be the intermediate addresses through which the request has flown through and is part of the `X-Forwarded-For` header. + For example, `192.168.0.1/32`, `192.168.0.0/24`, `001:db8::/64`. + maxLength: 256 + minLength: 1 + type: string + required: + - value + type: object + type: object + x-kubernetes-validations: + - message: at least one of headers, methods, path, + sourceCIDR or queryParams must be specified + rule: has(self.headers) || has(self.methods) || + has(self.path) || has(self.sourceCIDR) || has(self.queryParams) + maxItems: 8 + type: array + quota: + description: |- + Quota value for given client selectors. + This quota is applied for traffic flows when the selectors + compute to True, causing the request to be counted towards the limit. + A response with 429 HTTP status code is sent back to the client when + the selected requests have exceeded the quota. + properties: + duration: + description: |- + Time window. The suffix is used to specify units. The following + suffixes are supported: + * s - seconds (the default unit) + * m - minutes + * h - hours + type: string + limit: + description: The limit alloted for a specified + time window. + type: integer + required: + - duration + - limit + type: object + shadowMode: + description: |- + ShadowMode indicates whether this quota rule runs in shadow mode. + When enabled, all quota checks are performed (cache lookups, + counter updates, telemetry generation), but the outcome is never enforced. + The request always succeeds, even if the configured quota is exceeded. + type: boolean + required: + - quota + type: object + type: array + costExpression: + description: |- + CostExpression specifies a CEL expression for computing the quota burndown of the LLM-related request. + If no expression is specified the "total_tokens" value is used. + For example: + + * "input_tokens + cached_input_tokens * 0.1 + output_tokens * 6" + type: string + defaultBucket: + description: |- + Quota applicable to all traffic. This value can be overridden for specific classes of requests + using the "BucketRules" configuration. + properties: + duration: + description: |- + Time window. The suffix is used to specify units. The following + suffixes are supported: + * s - seconds (the default unit) + * m - minutes + * h - hours + type: string + limit: + description: The limit alloted for a specified time + window. + type: integer + required: + - duration + - limit + type: object + mode: + description: |- + The "Mode" determines how quota is charged to the "DefaultBucket" and matching "BucketRules". + In the "exclusive" mode the quota is charged to matching BucketRules or the DefaultBucket + if no BucketRules match the request. The request is denied if all matching buckets are out of quota. + In the "shared" mode the quota is charged to all matching "BucketRules" AND the "DefaultBucket" + and request is allowed only if the quota is available in all matching buckets. + enum: + - Exclusive + - Shared + type: string + required: + - mode + type: object + required: + - modelName + - quota + type: object + maxItems: 128 + type: array + serviceQuota: + description: |- + Quota for all models served by AIServiceBackend(s). This value can be overridden for specific models using the "PerModelQuotas" + configuration. + properties: + costExpression: + description: |- + CostExpression specifies a CEL expression for computing the quota burndown of the LLM-related request. + If no expression is specified the "total_tokens" value is used. + For example: + + * "input_tokens + cached_input_tokens * 0.1 + output_tokens * 6" + type: string + quota: + description: |- + Quota value applicable to all requests. + A response with 429 HTTP status code is sent back to the client when + the selected requests have exceeded the quota. + properties: + duration: + description: |- + Time window. The suffix is used to specify units. The following + suffixes are supported: + * s - seconds (the default unit) + * m - minutes + * h - hours + type: string + limit: + description: The limit alloted for a specified time window. + type: integer + required: + - duration + - limit + type: object + required: + - quota + type: object + targetRefs: + description: TargetRefs are the names of the AIServiceBackend resources + this QuotaPolicy is being attached to. + items: + description: |- + LocalPolicyTargetReference identifies an API object to apply a direct or + inherited policy to. This should be used as part of Policy resources + that can target Gateway API resources. For more information on how this + policy attachment model works, and a sample Policy resource, refer to + the policy attachment documentation for Gateway API. + properties: + group: + description: Group is the group of the target resource. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + description: Kind is kind of the target resource. + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the target resource. + maxLength: 253 + minLength: 1 + type: string + required: + - group + - kind + - name + type: object + maxItems: 16 + type: array + x-kubernetes-validations: + - message: targetRefs must reference AIServiceBackend resources + rule: self.all(ref, ref.group == 'aigateway.envoyproxy.io' && ref.kind + == 'AIServiceBackend') + type: object + status: + description: Status defines the status details of the QuotaPolicy. + properties: + conditions: + description: |- + Conditions is the list of conditions by the reconciliation result. + Currently, at most one condition is set. + + Known .status.conditions.type are: "Accepted", "NotAccepted". + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/sources/envoy-ai-gateway-crds/v0.6.0/values.yaml b/sources/envoy-ai-gateway-crds/v0.6.0/values.yaml new file mode 100644 index 00000000..2e314111 --- /dev/null +++ b/sources/envoy-ai-gateway-crds/v0.6.0/values.yaml @@ -0,0 +1,5 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + diff --git a/sources/envoy-ai-gateway/v0.6.0/Chart.yaml b/sources/envoy-ai-gateway/v0.6.0/Chart.yaml new file mode 100644 index 00000000..7e07a615 --- /dev/null +++ b/sources/envoy-ai-gateway/v0.6.0/Chart.yaml @@ -0,0 +1,20 @@ +apiVersion: v2 +appVersion: v0.6.0 +description: The Helm chart for Envoy AI Gateway +home: https://aigateway.envoyproxy.io/ +icon: https://raw.githubusercontent.com/envoyproxy/ai-gateway/refs/heads/main/site/static/img/logo.svg +keywords: +- gateway-api +- envoyproxy +- envoy-gateway +- eg +- ai-gateway +- ai +maintainers: +- name: envoy-ai-gateway-maintainers + url: https://github.com/envoyproxy/ai-gateway/blob/main/CODEOWNERS +name: ai-gateway-helm +sources: +- https://github.com/envoyproxy/ai-gateway +type: application +version: v0.6.0 diff --git a/sources/envoy-ai-gateway/v0.6.0/templates/NOTES.txt b/sources/envoy-ai-gateway/v0.6.0/templates/NOTES.txt new file mode 100644 index 00000000..e69de29b diff --git a/sources/envoy-ai-gateway/v0.6.0/templates/_helpers.tpl b/sources/envoy-ai-gateway/v0.6.0/templates/_helpers.tpl new file mode 100644 index 00000000..357098ea --- /dev/null +++ b/sources/envoy-ai-gateway/v0.6.0/templates/_helpers.tpl @@ -0,0 +1,133 @@ +{* + Copyright Envoy AI Gateway Authors + SPDX-License-Identifier: Apache-2.0 + The full text of the Apache license is available in the LICENSE file at + the root of the repo. +*} + +{{/* +Expand the name of the chart. +*/}} +{{- define "ai-gateway-helm.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "ai-gateway-helm.controller.fullname" -}} +{{- if .Values.controller.fullnameOverride }} +{{- .Values.controller.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.controller.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "ai-gateway-helm.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "ai-gateway-helm.labels" -}} +helm.sh/chart: {{ include "ai-gateway-helm.chart" . }} +{{ include "ai-gateway-helm.controller.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "ai-gateway-helm.controller.selectorLabels" -}} +app.kubernetes.io/name: {{ include "ai-gateway-helm.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "ai-gateway-helm.controller.serviceAccountName" -}} +{{- if .Values.controller.serviceAccount.create }} +{{- default (include "ai-gateway-helm.controller.fullname" .) .Values.controller.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.controller.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Create the name of the cluster role to use +*/}} +{{- define "ai-gateway-helm.controller.clusterRoleName" -}} +{{- $existing := lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" (include "ai-gateway-helm.controller.serviceAccountName" .) }} +{{- if $existing }} +{{- (include "ai-gateway-helm.controller.serviceAccountName" .) }} +{{- else }} +{{- printf "%s:%s" (include "ai-gateway-helm.controller.serviceAccountName" .) .Release.Namespace }} +{{- end }} +{{- end }} + +{{- define "ai-gateway-helm.inference-pool.clusterRoleName" -}} +{{- $existing := lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "envoy-ai-gateway-inference-pool-reader" }} +{{- if $existing }} +{{- "envoy-ai-gateway-inference-pool-reader" }} +{{- else }} +{{- printf "%s:%s" "envoy-ai-gateway-inference-pool-reader" .Release.Namespace}} +{{- end}} +{{- end -}} + +{{- define "ai-gateway-helm.inference-pool.clusterRoleBindingName" -}} +{{- $existing := lookup "rbac.authorization.k8s.io/v1" "ClusterRoleBinding" "" "envoy-ai-gateway-inference-pool-reader-binding" }} +{{- if $existing }} +{{- "envoy-ai-gateway-inference-pool-reader-binding" }} +{{- else }} +{{- printf "%s:%s" "envoy-ai-gateway-inference-pool-reader-binding" .Release.Namespace}} +{{- end}} +{{- end -}} + +{{/* +Convert extraEnvVars array to semicolon-separated string for extProc +*/}} +{{- define "ai-gateway-helm.extProc.envVarsString" -}} +{{- $envVars := list -}} +{{- range .Values.extProc.extraEnvVars -}} + {{- $envVars = append $envVars (printf "%s=%s" .name .value) -}} +{{- end -}} +{{- join ";" $envVars -}} +{{- end }} + +{{/* +Convert imagePullSecrets array to semicolon-separated string for extProc +*/}} +{{- define "ai-gateway-helm.extProc.imagePullSecretsString" -}} +{{- $src := default .Values.global.imagePullSecrets .Values.extProc.imagePullSecrets -}} +{{- $secrets := list -}} +{{- range $src -}} + {{- $secrets = append $secrets .name -}} +{{- end -}} +{{- join ";" $secrets -}} +{{- end }} + +{{/* +Returns controller imagePullSecrets if defined, otherwise falls back to global.imagePullSecrets. +This returns YAML (not an object), intended to be used with `with (include ...)` in templates. +*/}} +{{- define "ai-gateway-helm.controller.imagePullSecrets" -}} +{{- $src := default .Values.global.imagePullSecrets .Values.controller.imagePullSecrets -}} +{{- if $src -}} +{{- toYaml $src -}} +{{- end -}} +{{- end -}} diff --git a/sources/envoy-ai-gateway/v0.6.0/templates/admission_webhook.yaml b/sources/envoy-ai-gateway/v0.6.0/templates/admission_webhook.yaml new file mode 100644 index 00000000..f761d52a --- /dev/null +++ b/sources/envoy-ai-gateway/v0.6.0/templates/admission_webhook.yaml @@ -0,0 +1,106 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + +{{/* Compute all cert data once up-front so both the MutatingWebhookConfiguration and + the Secret use the same CA. In the cert-manager path all variables stay empty; + cert-manager's cainjector injects the CA into the webhook via the annotation. + In the self-signed path we reuse existing data from the secret on upgrades, or + generate a fresh CA + cert on first install. */}} +{{- $caCrt := "" }} +{{- $tlsCrt := "" }} +{{- $tlsKey := "" }} +{{- if not .Values.controller.mutatingWebhook.certManager.enable }} + {{- $existing := lookup "v1" "Secret" .Release.Namespace .Values.controller.mutatingWebhook.tlsCertSecretName }} + {{- if $existing }} + {{- $caCrt = index $existing.data .Values.controller.mutatingWebhook.caBundleName }} + {{- $tlsCrt = index $existing.data .Values.controller.mutatingWebhook.tlsCertName }} + {{- $tlsKey = index $existing.data .Values.controller.mutatingWebhook.tlsKeyName }} + {{- else }} + {{- $serviceName := include "ai-gateway-helm.controller.fullname" . }} + {{- $ca := genCA (printf "%s-ca" $serviceName) 3650 }} + {{- $dnsNames := list + $serviceName + (printf "%s.%s" $serviceName .Release.Namespace) + (printf "%s.%s.svc" $serviceName .Release.Namespace) + (printf "%s.%s.svc.cluster.local" $serviceName .Release.Namespace) + -}} + {{- $cert := genSignedCert (printf "%s.%s.svc" $serviceName .Release.Namespace) nil $dnsNames 365 $ca }} + {{- $caCrt = $ca.Cert | b64enc }} + {{- $tlsCrt = $cert.Cert | b64enc }} + {{- $tlsKey = $cert.Key | b64enc }} + {{- end }} +{{- end }} +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: +{{- if .Values.controller.mutatingWebhook.certManager.enable }} + annotations: + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ .Values.controller.mutatingWebhook.certManager.certificateName}} +{{- end }} + name: envoy-ai-gateway-gateway-pod-mutator.{{ .Release.Namespace }} +webhooks: + - name: {{ include "ai-gateway-helm.controller.fullname" . }}.{{ .Release.Namespace }}.svc.cluster.local + clientConfig: + {{- if $caCrt }} + caBundle: {{ $caCrt }} + {{- end }} + service: + name: {{ include "ai-gateway-helm.controller.fullname" . }} + namespace: {{ .Release.Namespace }} + port: {{ .Values.controller.mutatingWebhook.port }} + path: /mutate + rules: + - apiGroups: [""] + apiVersions: ["v1"] + operations: ["CREATE"] + resources: ["pods"] + {{- if .Values.controller.mutatingWebhook.objectSelector }} + objectSelector: + {{- toYaml .Values.controller.mutatingWebhook.objectSelector | nindent 6 }} + {{- end}} + {{- if .Values.controller.mutatingWebhook.namespaceSelector }} + namespaceSelector: + {{- toYaml .Values.controller.mutatingWebhook.namespaceSelector | nindent 6 }} + {{- end}} + sideEffects: None + admissionReviewVersions: ["v1"] + timeoutSeconds: 10 + failurePolicy: Fail +--- +{{- if .Values.controller.mutatingWebhook.certManager.enable }} +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ .Values.controller.mutatingWebhook.certManager.certificateName }} + namespace: {{ .Release.Namespace }} +spec: + commonName: {{ include "ai-gateway-helm.controller.fullname" . }}.{{ .Release.Namespace }}.svc + dnsNames: + - {{ include "ai-gateway-helm.controller.fullname" . }}.{{ .Release.Namespace }}.svc + issuerRef: + kind: Issuer + name: {{ .Values.controller.mutatingWebhook.certManager.issuerName }} + secretName: {{ .Values.controller.mutatingWebhook.tlsCertSecretName }} +--- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: {{ .Values.controller.mutatingWebhook.certManager.issuerName }} + namespace: {{ .Release.Namespace }} +spec: + selfSigned: {} +--- +{{- else }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Values.controller.mutatingWebhook.tlsCertSecretName }} + namespace: {{ .Release.Namespace }} +data: + {{ .Values.controller.mutatingWebhook.caBundleName }}: {{ $caCrt }} + {{ .Values.controller.mutatingWebhook.tlsCertName }}: {{ $tlsCrt }} + {{ .Values.controller.mutatingWebhook.tlsKeyName }}: {{ $tlsKey }} +--- +{{- end }} diff --git a/sources/envoy-ai-gateway/v0.6.0/templates/deployment.yaml b/sources/envoy-ai-gateway/v0.6.0/templates/deployment.yaml new file mode 100644 index 00000000..82699410 --- /dev/null +++ b/sources/envoy-ai-gateway/v0.6.0/templates/deployment.yaml @@ -0,0 +1,171 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "ai-gateway-helm.controller.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "ai-gateway-helm.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.controller.replicaCount }} + selector: + matchLabels: + {{- include "ai-gateway-helm.controller.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.controller.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "ai-gateway-helm.controller.selectorLabels" . | nindent 8 }} + spec: + {{- with (include "ai-gateway-helm.controller.imagePullSecrets" .) }} + imagePullSecrets: + {{- . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "ai-gateway-helm.controller.serviceAccountName" . }} + hostNetwork: {{ .Values.controller.hostNetwork }} + {{- if .Values.controller.hostNetwork }} + dnsPolicy: ClusterFirstWithHostNet + {{- end }} + securityContext: + {{- toYaml .Values.controller.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.controller.securityContext | nindent 12 }} + image: "{{ .Values.controller.image.repository }}:{{ .Values.controller.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.controller.imagePullPolicy }} + ports: + {{- range .Values.controller.service.ports }} + - containerPort: {{ .targetPort }} + {{- end }} + args: + - -logLevel={{ .Values.controller.logLevel }} + - --extProcImage={{ .Values.extProc.image.repository }}:{{ .Values.extProc.image.tag | default .Chart.AppVersion }} + - --extProcImagePullPolicy={{ .Values.extProc.imagePullPolicy }} + - --extProcLogLevel={{ .Values.extProc.logLevel }} + {{- if .Values.extProc.enableRedaction }} + - --extProcEnableRedaction=true + {{- end }} + {{- with (include "ai-gateway-helm.extProc.imagePullSecretsString" .) }} + - --extProcImagePullSecrets={{ . }} + {{- end }} + {{- if .Values.controller.requestHeaderAttributes }} + - --requestHeaderAttributes={{ .Values.controller.requestHeaderAttributes }} + {{- end }} + {{- if ne .Values.controller.spanRequestHeaderAttributes nil }} + - --spanRequestHeaderAttributes={{ .Values.controller.spanRequestHeaderAttributes }} + {{- end }} + {{- if .Values.controller.metricsRequestHeaderAttributes }} + - --metricsRequestHeaderAttributes={{ .Values.controller.metricsRequestHeaderAttributes }} + {{- end }} + {{- if ne .Values.controller.logRequestHeaderAttributes nil }} + - --logRequestHeaderAttributes={{ .Values.controller.logRequestHeaderAttributes }} + {{- end }} + {{- $endpointPrefixes := list -}} + {{- if hasKey .Values.endpointConfig "openai" -}} + {{- $openai := .Values.endpointConfig.openai -}} + {{- $endpointPrefixes = append $endpointPrefixes (printf "openai:%s" $openai) -}} + {{- end -}} + {{- if hasKey .Values.endpointConfig "cohere" -}} + {{- $cohere := .Values.endpointConfig.cohere -}} + {{- $endpointPrefixes = append $endpointPrefixes (printf "cohere:%s" $cohere) -}} + {{- end -}} + {{- if hasKey .Values.endpointConfig "anthropic" -}} + {{- $anthropic := .Values.endpointConfig.anthropic -}} + {{- $endpointPrefixes = append $endpointPrefixes (printf "anthropic:%s" $anthropic) -}} + {{- end -}} + {{- if $endpointPrefixes }} + - "--endpointPrefixes={{ join "," $endpointPrefixes }}" + {{- end }} + {{- if .Values.extProc.extraEnvVars }} + - --extProcExtraEnvVars={{ include "ai-gateway-helm.extProc.envVarsString" . }} + {{- end }} + - --tlsCertDir=/certs + - --tlsCertName={{ .Values.controller.mutatingWebhook.tlsCertName }} + - --tlsKeyName={{ .Values.controller.mutatingWebhook.tlsKeyName }} + - --webhookPort={{ .Values.controller.mutatingWebhook.port }} + {{- if .Values.controller.leaderElection.enabled }} + - --enableLeaderElection=true + {{- end }} + - --rootPrefix={{ .Values.endpointConfig.rootPrefix }} + {{- if ne .Values.controller.maxRecvMsgSize nil }} + - --maxRecvMsgSize={{ .Values.controller.maxRecvMsgSize }} + {{- end }} + - --cacheSyncTimeout={{ .Values.controller.watch.cacheSyncTimeout }} + - --watchNamespaces={{ join "," .Values.controller.watch.namespaces }} + - --mcpSessionEncryptionSeed={{ .Values.controller.mcp.sessionEncryption.seed }} + - --mcpSessionEncryptionIterations={{ .Values.controller.mcp.sessionEncryption.iterations }} + {{- if .Values.controller.mcp.sessionEncryption.fallback.seed }} + - --mcpFallbackSessionEncryptionSeed={{ .Values.controller.mcp.sessionEncryption.fallback.seed }} + - --mcpFallbackSessionEncryptionIterations={{ .Values.controller.mcp.sessionEncryption.fallback.iterations }} + {{- end }} + livenessProbe: + grpc: + port: 1063 + initialDelaySeconds: 5 + periodSeconds: 2 + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + {{- if .Values.controller.podEnv }} + {{- range $key, $val := .Values.controller.podEnv }} + - name: {{ $key }} + value: {{ $val }} + {{- end }} + {{- end }} + {{- if .Values.controller.extraEnvVars }} + {{- toYaml .Values.controller.extraEnvVars | nindent 12 }} + {{- end }} + readinessProbe: + grpc: + port: 1063 + initialDelaySeconds: 5 + periodSeconds: 2 + resources: + {{- toYaml .Values.controller.resources | nindent 12 }} + volumeMounts: + - mountPath: /certs + name: certs + readOnly: true + {{- if .Values.controller.volumes }} + {{- range $volume := .Values.controller.volumes }} + - mountPath: {{ $volume.mountPath }} + name: {{ $volume.name }} + {{- if $volume.subPath }} + subPath: {{ $volume.subPath }} + {{- end }} + {{- end}} + {{- end }} + volumes: + - name: certs + secret: + secretName: {{ .Values.controller.mutatingWebhook.tlsCertSecretName }} + {{- if .Values.controller.volumes }} + {{- range $volume := .Values.controller.volumes }} + - name: {{ $volume.name }} + configMap: + defaultMode: {{ $volume.configmap.defaultMode }} + name: {{ $volume.configmap.name }} + {{- end }} + {{- end }} + {{- with .Values.controller.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.controller.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.controller.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/sources/envoy-ai-gateway/v0.6.0/templates/envoy_gateway_cluster_role_for_ai_resources.yaml b/sources/envoy-ai-gateway/v0.6.0/templates/envoy_gateway_cluster_role_for_ai_resources.yaml new file mode 100644 index 00000000..03d8ffca --- /dev/null +++ b/sources/envoy-ai-gateway/v0.6.0/templates/envoy_gateway_cluster_role_for_ai_resources.yaml @@ -0,0 +1,35 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + +# Grants envoy-gateway service account read access to AI Gateway CRDs. +# Required when extensionManager.resources includes AIGatewayRoute/AIServiceBackend. +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "ai-gateway-helm.controller.fullname" . }}-envoy-gateway-ai-resources +rules: + - apiGroups: + - "aigateway.envoyproxy.io" + resources: + - "aigatewayroutes" + - "aiservicebackends" + verbs: + - "get" + - "list" + - "watch" +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "ai-gateway-helm.controller.fullname" . }}-envoy-gateway-ai-resources +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "ai-gateway-helm.controller.fullname" . }}-envoy-gateway-ai-resources +subjects: + - kind: ServiceAccount + name: envoy-gateway + namespace: {{ .Values.envoyGateway.namespace }} diff --git a/sources/envoy-ai-gateway/v0.6.0/templates/envoy_gateway_cluster_role_for_inference_pool.yaml b/sources/envoy-ai-gateway/v0.6.0/templates/envoy_gateway_cluster_role_for_inference_pool.yaml new file mode 100644 index 00000000..2378e5ea --- /dev/null +++ b/sources/envoy-ai-gateway/v0.6.0/templates/envoy_gateway_cluster_role_for_inference_pool.yaml @@ -0,0 +1,38 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + +# This file contains the RBAC roles and role bindings for the Envoy Gateway +# so that it can read the InferencePool resources that are set in the HTTPRoutes +# generated by the AI Gateway. +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "ai-gateway-helm.inference-pool.clusterRoleName" . }} +rules: + - apiGroups: + - "inference.networking.k8s.io" + resources: + - "inferencepools" + verbs: + - "get" + - "list" + - "watch" +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "ai-gateway-helm.inference-pool.clusterRoleBindingName" . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "ai-gateway-helm.inference-pool.clusterRoleName" . }} +subjects: + - kind: ServiceAccount + # The service account name is hardcoded to "envoy-gateway": + # https://github.com/envoyproxy/gateway/blob/70af785fba094929bc6044a57470d429205c4d5e/charts/gateway-helm/templates/envoy-gateway-serviceaccount.yaml#L4 + name: envoy-gateway + namespace: {{ .Values.envoyGateway.namespace }} +--- diff --git a/sources/envoy-ai-gateway/v0.6.0/templates/service.yaml b/sources/envoy-ai-gateway/v0.6.0/templates/service.yaml new file mode 100644 index 00000000..4254cd33 --- /dev/null +++ b/sources/envoy-ai-gateway/v0.6.0/templates/service.yaml @@ -0,0 +1,20 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "ai-gateway-helm.controller.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "ai-gateway-helm.labels" . | nindent 4 }} +spec: + type: {{ .Values.controller.service.type }} + {{- with .Values.controller.service.ports }} + ports: + {{- toYaml . | nindent 4 }} + {{- end }} + selector: + {{- include "ai-gateway-helm.controller.selectorLabels" . | nindent 4 }} diff --git a/sources/envoy-ai-gateway/v0.6.0/templates/serviceaccount.yaml b/sources/envoy-ai-gateway/v0.6.0/templates/serviceaccount.yaml new file mode 100644 index 00000000..a984ca1c --- /dev/null +++ b/sources/envoy-ai-gateway/v0.6.0/templates/serviceaccount.yaml @@ -0,0 +1,115 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + +{{ if .Values.controller.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "ai-gateway-helm.controller.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "ai-gateway-helm.labels" . | nindent 4 }} + {{- with .Values.controller.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "ai-gateway-helm.controller.clusterRoleName" . }} +rules: + - apiGroups: [""] + resources: + - services + - secrets + - pods # TODO: this can be limited to EG system namespace, not the cluster level. + verbs: + - '*' + - apiGroups: ["apps"] + resources: + - deployments # TODO: this can be limited to EG system namespace, not the cluster level. + - daemonsets # TODO: this can be limited to EG system namespace, not the cluster level. + verbs: + - '*' + - apiGroups: + - inference.networking.k8s.io + resources: + - '*' + verbs: + - '*' + - apiGroups: + - gateway.networking.k8s.io + resources: + - '*' + verbs: + - '*' + - apiGroups: + - aigateway.envoyproxy.io + resources: + - '*' + verbs: + - '*' + - apiGroups: + - gateway.envoyproxy.io + resources: + - '*' + verbs: + - '*' + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - watch + - list + - create + - update + - apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - get + - list + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + verbs: + - get + - list + - watch + - apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + resourceNames: + - 'envoy-ai-gateway-gateway-pod-mutator.{{ .Release.Namespace }}' + verbs: + - update + - patch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "ai-gateway-helm.controller.clusterRoleName" . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "ai-gateway-helm.controller.clusterRoleName" . }} +subjects: + - kind: ServiceAccount + name: {{ include "ai-gateway-helm.controller.serviceAccountName" . }} + namespace: '{{ .Release.Namespace }}' +{{- end }} diff --git a/sources/envoy-ai-gateway/v0.6.0/values.yaml b/sources/envoy-ai-gateway/v0.6.0/values.yaml new file mode 100644 index 00000000..8b08ee50 --- /dev/null +++ b/sources/envoy-ai-gateway/v0.6.0/values.yaml @@ -0,0 +1,263 @@ +# Copyright Envoy AI Gateway Authors +# SPDX-License-Identifier: Apache-2.0 +# The full text of the Apache license is available in the LICENSE file at +# the root of the repo. + +# Default values for ai-gateway-helm. + +# Global values shared across this chart and its subcharts (umbrella charts can set these once). +global: + # global.imagePullSecrets -- references to pre-created imagePullSecrets in the target namespace + # Precedence: component-specific values win. If `controller.imagePullSecrets` or + # `extProc.imagePullSecrets` are set (non-empty), those are used; otherwise this + # `global.imagePullSecrets` value is used as a fallback (values are not merged). + # Example: + # imagePullSecrets: + # - name: regcred + imagePullSecrets: [] + +# Global configuration for the endpoints supported by the AI Gateway. +endpointConfig: + # The prefix for all the routes served by the AI Gateway. Defaulting to "/". All the generated routes will have this prefix. + # + # With the default "/", the AI Gateway will assume that the downstream client's OpenAI SDK will talk to the Gateway using the base_url + # set to "http:///v1" which has the default "/v1" prefix in the base_url. + # + # This can be used for providing a separation between AIGatewayRoutes and normal HTTPRoutes when the top level "/v1/" is not desired. + rootPrefix: "/" + # Explicit provider prefixes. + # Defaults align with the gateway's built-in provider prefixes. + # and can be set to empty string to remove the extra provider root segment. + # Examples: + # openai: "" # results in /v1/... + # cohere: "/cohere" # results in /cohere/v2/... + # anthropic: "/anthropic" # results in /anthropic/v1/... + openai: "" + cohere: "/cohere" + anthropic: "/anthropic" + +extProc: + image: + repository: docker.io/envoyproxy/ai-gateway-extproc + # Overrides the image tag whose default is the chart appVersion. + tag: "" + imagePullPolicy: IfNotPresent + imagePullSecrets: [] + # One of "info", "debug", "trace", "warn", "error", "fatal", "panic". + logLevel: info + # Enable redaction of sensitive information in debug logs (only takes effect when logLevel is "debug"). + enableRedaction: false + + ## @param extProc.extraEnvVars Array with extra environment variables to add to extProc containers + ## e.g: + ## extraEnvVars: + ## - name: OTEL_SERVICE_NAME + ## value: "ai-gateway-extproc" + ## - name: OTEL_TRACES_EXPORTER + ## value: "otlp" + ## # This disables pprof endpoint at "localhost:6060/debug/pprof" that can be accessed via port-forwarding for profiling. + ## # Enabled by default for troubleshooting purposes given the impact is negligible when not in use. + ## - name: DISABLE_PPROF + ## value: "true" + ## # GCP ADC authentication requests will use this proxy if set. + ## - name: AI_GATEWAY_GCP_AUTH_PROXY_URL + ## value: "http://proxy.example.com:8080" + ## + extraEnvVars: [] + +controller: + logLevel: info + nameOverride: "" + fullnameOverride: "ai-gateway-controller" + + # Comma-separated key-value pairs for mapping HTTP request headers to Otel attributes shared across metrics, spans, and access logs. + # Format: "header1:attribute1,header2:attribute2" + # Example: "x-tenant-id:tenant.id" + # x-api-key-id is injected by the SecurityPolicy apiKeyAuth (matched Secret data key = client ID). + # x-aim-service-id is injected by headerMutation on each AIGatewayRoute backendRef. + requestHeaderAttributes: "x-api-key-id:api_key_id,x-aim-service-id:aim_service_id" + + # Comma-separated key-value pairs for mapping HTTP request headers to otel span attributes. + # Format: "header1:attribute1,header2:attribute2" + # Defaults to "agent-session-id:session.id" when unset. + # Set to "" to disable the default. + # Example: "x-forwarded-proto:url.scheme,x-tenant-id:tenant.id" + spanRequestHeaderAttributes: null + + # Comma-separated key-value pairs for mapping HTTP request headers to Otel metric attributes. + # Format: "header1:label1,header2:label2" + # Example: "x-tenant-id:tenant.id,x-tenant-id:tenant.id" + metricsRequestHeaderAttributes: "x-api-key-id:api_key_id,x-aim-service-id:aim_service_id" + + # Comma-separated key-value pairs for mapping HTTP request headers to access log attributes. + # Format: "header1:attribute1,header2:attribute2" + # Defaults to "agent-session-id:session.id" when unset. + # Set to "" to disable the default. + # Example: "x-forwarded-proto:url.scheme,x-tenant-id:tenant.id" + logRequestHeaderAttributes: null + + + # -- Service Account -- + serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + + # Enable leader election mechanism for protecting against split brain if multiple operator pods/replicas are started. + leaderElection: + enabled: true + + # Configuration for how the Kubernetes controllers watch the different resources. + watch: + # Namespaces to watch. An empty list means to watch all namespaces. + # Default is an empty list, to watch all namespaces. + namespaces: [] + # Sync timeout for the Kubernetes cache. If the cache is not synced within this time, the controller will exit. + # Default is 2 minutes. + cacheSyncTimeout: 2m + + # -- Deployment configs -- + image: + repository: docker.io/envoyproxy/ai-gateway-controller + # Overrides the image tag whose default is the chart appVersion. + tag: "" + imagePullPolicy: IfNotPresent + replicaCount: 1 + imagePullSecrets: [] + podAnnotations: {} + podSecurityContext: {} + securityContext: {} + # DEPRECATED: podEnv will be removed after v0.3. Use controller.extraEnvVars instead. + # Example of a podEnv + # AWS STS request when rotating OIDC credentials will be configured to use AI_GATEWAY_STS_PROXY_URL proxy if set. + # - key: AI_GATEWAY_STS_PROXY_URL + # value: some-proxy-placeholder + # Azure authentication request will be configured to use AI_GATEWAY_AZURE_PROXY_URL proxy if set. + # - key: AI_GATEWAY_AZURE_PROXY_URL + # value: some-proxy-placeholder + # GCP authentication request will be configured to use AI_GATEWAY_GCP_AUTH_PROXY_URL proxy if set. + # - key: AI_GATEWAY_GCP_AUTH_PROXY_URL + # value: some-proxy-placeholder + podEnv: {} + + ## @param controller.extraEnvVars Array with extra environment variables to add to controller containers + ## e.g: + ## extraEnvVars: + ## # This disables pprof endpoint at "localhost:6060/debug/pprof" that can be accessed via port-forwarding for profiling. + ## # Enabled by default for troubleshooting purposes given the impact is negligible when not in use. + ## - name: DISABLE_PPROF + ## value: "true" + extraEnvVars: [] + + # Example of volumes + # - mountPath: /placeholder/path + # name: volume-name + # subPath: placeholder-sub-path + # configmap: + # defaultMode: placeholder + # name: configmap-name + volumes: [] + service: + type: ClusterIP + ports: + # Note: the port and targetPort here must match mutatingWebhook.port, as the + # Service needs to route traffic to the webhook server on the same port. + - name: mutating-webhook + protocol: TCP + port: 9443 + appProtocol: http + targetPort: 9443 + - name: grpc + protocol: TCP + port: 1063 + appProtocol: grpc + targetPort: 1063 + - name: http-metrics + protocol: TCP + appProtocol: http + port: 8080 + targetPort: 8080 + + mutatingWebhook: + # The port on which the mutating webhook server listens. Must match the service port defined in service.ports. + port: 9443 + # The secret that contains the CA certificate and the server certificate for the webhook server. + # Defaults to the self-signed cert generated by the project. The namespace of the secret + # must be the same as the namespace of the controller installation. + # + # The self-signed cert is embedded as part of the helm chart, so it is not recommended for production use. + # You can use the configurations below to specify a custom secret that contains the CA certificate and the server certificate. + # + # When specifying a secret generated by cert-manager, other fields (tlsCertName, tlsKeyName and caBundleName) + # do not need to be set modified since they match the default values by cert-manager: + # https://cert-manager.io/docs/usage/certificate/ + tlsCertSecretName: self-signed-cert-for-mutating-webhook + # The name of the CA certificate in the secret to serve the webhook. + tlsCertName: tls.crt + # The name of the server certificate in the secret to serve the webhook. + tlsKeyName: tls.key + # The name of the CA bundle in the secret to use for the webhook. + caBundleName: ca.crt + # When cert-manager is enabled, the self-signed cert is created and rotated by cert-manager. + certManager: + enable: false + # The name of the issuer. + issuerName: self-signed-issuer-for-mutating-webhook + # The name of the certificate. + certificateName: self-signed-cert-for-mutating-webhook + # The object selector for the mutating webhook. By default it selects envoy-gateway managed objects. + objectSelector: + matchLabels: + app.kubernetes.io/managed-by: envoy-gateway + # Example of namespaceSelector to select the webhook target based on namespaces. + # See: https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-namespaceselector + # namespaceSelector: + # matchExpressions: + # - key: environment + # operator: In + # values: ["prod","staging"] + + # When true, the controller pod uses the host's network namespace. + # When enabled, dnsPolicy defaults to "ClusterFirstWithHostNet" so that + # DNS resolution continues to work correctly inside the pod. + hostNetwork: false + + resources: {} + nodeSelector: {} + tolerations: [] + affinity: {} + + # maxRecvMsgSize is the maximum message size in bytes that the gRPC extension server can receive + # from xDS (envoy-gateway). + # This value should be increased in setups where count/complexity of xDS + # resources (configuration) is big. Defaults to 4MB. + # maxRecvMsgSize: "4194304" + + # MCP Proxy settings + mcp: + # Configuration for MCP session encryption. + sessionEncryption: + # Seed used to derive the MCP session encryption key. + # This value should be set to a secure random string in production environments instead of the default below. + seed: "default-insecure-seed" + # Number of PBKDF2 iterations to use for deriving the MCP session encryption key. + # A higher number increases security but also increases CPU usage and time consumed during key derivation. + # The default value of 100,000 is a reasonable balance for most production environments. + iterations: 100000 + # Optional fallback configuration for MCP session encryption key rotation. + fallback: + # Fallback seed used to derive the MCP session encryption key. + # Set to a non-empty value to enable fallback. + # It should be the previous seed used before changing to the new seed. + seed: "" + # Number of PBKDF2 iterations to use for deriving the MCP session encryption key with the fallback seed. + iterations: 100000 + +# Configuration for the Envoy Gateway component that AI Gateway relies on to program Envoy. +envoyGateway: + # The namespace where the Envoy Gateway controller is installed. + namespace : envoy-gateway-system diff --git a/sources/envoy-gateway-config/README.md b/sources/envoy-gateway-config/README.md new file mode 100644 index 00000000..cf9f2fbb --- /dev/null +++ b/sources/envoy-gateway-config/README.md @@ -0,0 +1,25 @@ +# Envoy Gateway + AI Gateway (shared listener) + +Single `Gateway` `https` on `:443` serves apps (`HTTPRoute`) and AI (`AIGatewayRoute`) by hostname. + +## Architecture + +- **Envoy Gateway** auto-creates a MetalLB `LoadBalancer` for external `:443` +- **`https` ClusterIP** (optional, default on) is only for CoreDNS `*.domain` rewrite — not the external front door +- **Envoy AI Gateway** controller connects via `extensionManager` on the envoy-gateway Helm chart +- **`GatewayConfig` `ai-extproc`** adds the ext_proc sidecar to the shared data plane + +## Values + +| Value | Default | Purpose | +|-------|---------|---------| +| `gatewayDnsService.enabled` | `true` | Stable ClusterIP `https` for in-cluster DNS | +| `k8sApiPassthrough.enabled` | `false` | TLS passthrough to K8s API on `:6443` | + +**Do not enable `k8sApiPassthrough`** when node-ip equals the MetalLB pool IP (cluster-bloom default). Envoy `:6443` hijacks pod→apiserver traffic and breaks controllers. + +## AI routes + +Create `AIGatewayRoute` resources parented to `Gateway/https` in `envoy-gateway-system`. Match on `Host: ai.` and `x-ai-eg-model` header. + +See `~/dev/envoy-ai-gateway-shared-listener/` for a standalone reference bundle. diff --git a/sources/envoy-gateway-config/templates/client-trafic-policy.yaml b/sources/envoy-gateway-config/templates/client-trafic-policy.yaml index 3cca2931..8c9085e2 100644 --- a/sources/envoy-gateway-config/templates/client-trafic-policy.yaml +++ b/sources/envoy-gateway-config/templates/client-trafic-policy.yaml @@ -1,7 +1,7 @@ apiVersion: gateway.envoyproxy.io/v1alpha1 kind: ClientTrafficPolicy metadata: - name: preserve-url-encoding + name: shared-gateway-traffic namespace: envoy-gateway-system spec: targetRefs: @@ -10,3 +10,5 @@ spec: name: https path: escapedSlashesAction: KeepUnchanged + connection: + bufferLimit: 50Mi diff --git a/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml b/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml index 6a4a991b..2ec23820 100644 --- a/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml +++ b/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml @@ -1,6 +1,5 @@ -# EnvoyProxy configuration for access logging and pod placement -# Replaces kgateway HTTPListenerPolicy_access-logs.yaml functionality -# Referenced by Gateway to enable per-gateway access logging and ensure data plane pods land on first node +# EnvoyProxy configuration for access logging and pod placement. +# Referenced by Gateway via infrastructure.parametersRef. apiVersion: gateway.envoyproxy.io/v1alpha1 kind: EnvoyProxy metadata: @@ -37,7 +36,11 @@ spec: authority: "%REQ(:AUTHORITY)%" backendHost: "%UPSTREAM_HOST%" backendCluster: "%UPSTREAM_CLUSTER%" + model: "%REQ(x-ai-eg-model)%" + llm_input_token: "%DYNAMIC_METADATA(io.envoy.ai_gateway:llm_input_token)%" + llm_output_token: "%DYNAMIC_METADATA(io.envoy.ai_gateway:llm_output_token)%" + llm_total_token: "%DYNAMIC_METADATA(io.envoy.ai_gateway:llm_total_token)%" sinks: - type: File file: - path: /dev/stdout \ No newline at end of file + path: /dev/stdout diff --git a/sources/envoy-gateway-config/templates/gateway-config-ai.yaml b/sources/envoy-gateway-config/templates/gateway-config-ai.yaml new file mode 100644 index 00000000..43b41d88 --- /dev/null +++ b/sources/envoy-gateway-config/templates/gateway-config-ai.yaml @@ -0,0 +1,12 @@ +apiVersion: aigateway.envoyproxy.io/v1beta1 +kind: GatewayConfig +metadata: + name: ai-extproc + namespace: envoy-gateway-system +spec: + extProc: + kubernetes: + resources: + requests: + cpu: 100m + memory: 256Mi diff --git a/sources/envoy-gateway-config/templates/gateway.yaml b/sources/envoy-gateway-config/templates/gateway.yaml index abdee200..16279f89 100644 --- a/sources/envoy-gateway-config/templates/gateway.yaml +++ b/sources/envoy-gateway-config/templates/gateway.yaml @@ -1,8 +1,12 @@ +# Shared listener: apps via HTTPRoute, AI via AIGatewayRoute on the same :443 Gateway. +# Do not use mergeGateways — Envoy AI Gateway does not support it (envoyproxy/ai-gateway#1638). apiVersion: gateway.networking.k8s.io/v1 kind: Gateway metadata: name: https namespace: envoy-gateway-system + annotations: + aigateway.envoyproxy.io/gateway-config: ai-extproc spec: gatewayClassName: envoy-gateway infrastructure: @@ -24,6 +28,7 @@ spec: kind: Secret name: cluster-tls mode: Terminate +{{- if .Values.k8sApiPassthrough.enabled }} - allowedRoutes: kinds: - group: gateway.networking.k8s.io @@ -32,7 +37,8 @@ spec: from: All hostname: "k8s.{{ .Values.domain }}" name: k8s-passthrough - port: 443 + port: 6443 protocol: TLS tls: mode: Passthrough +{{- end }} diff --git a/sources/envoy-gateway-config/templates/https-service.yaml b/sources/envoy-gateway-config/templates/https-service.yaml index 9eac8dfa..f68b4487 100644 --- a/sources/envoy-gateway-config/templates/https-service.yaml +++ b/sources/envoy-gateway-config/templates/https-service.yaml @@ -1,3 +1,6 @@ +{{- if .Values.gatewayDnsService.enabled }} +# Stable ClusterIP for CoreDNS *.domain rewrite — not a LoadBalancer front door. +# External traffic uses Envoy Gateway auto-generated LoadBalancer (MetalLB). apiVersion: v1 kind: Service metadata: @@ -16,3 +19,4 @@ spec: protocol: TCP targetPort: 10443 type: ClusterIP +{{- end }} diff --git a/sources/envoy-gateway-config/templates/tlsroute-k8s-passthrough.yaml b/sources/envoy-gateway-config/templates/tlsroute-k8s-passthrough.yaml index 5df90c07..e5ac4867 100644 --- a/sources/envoy-gateway-config/templates/tlsroute-k8s-passthrough.yaml +++ b/sources/envoy-gateway-config/templates/tlsroute-k8s-passthrough.yaml @@ -1,7 +1,6 @@ -# TLS Passthrough Route for Kubernetes API Access -# Copied from kgateway-config with namespace update -# Allows direct TLS access to Kubernetes API via k8s.domain ---- +{{- if .Values.k8sApiPassthrough.enabled }} +# TLS passthrough to kubernetes API on Gateway :6443 (k8s.domain). +# Requires node-ip != MetalLB VIP — see k8sApiPassthrough in values.yaml. apiVersion: gateway.networking.k8s.io/v1alpha2 kind: TLSRoute metadata: @@ -15,10 +14,12 @@ spec: kind: Gateway name: https namespace: envoy-gateway-system + sectionName: k8s-passthrough rules: - backendRefs: - group: "" kind: Service name: kubernetes port: 443 - weight: 1 \ No newline at end of file + weight: 1 +{{- end }} diff --git a/sources/envoy-gateway-config/values.yaml b/sources/envoy-gateway-config/values.yaml index 235cec5e..c4f58310 100644 --- a/sources/envoy-gateway-config/values.yaml +++ b/sources/envoy-gateway-config/values.yaml @@ -1 +1,11 @@ -domain: # to be filled by cluster-forge app \ No newline at end of file +domain: # to be filled by cluster-forge app + +# Stable ClusterIP DNS name for in-cluster *.domain resolution (CoreDNS rewrite). +# Does not replace Envoy Gateway auto-generated LoadBalancer from MetalLB. +gatewayDnsService: + enabled: true + +# TLS passthrough to kubernetes API via Gateway. Disabled by default on cluster-bloom +# where node-ip equals the MetalLB pool IP — a :6443 listener hijacks pod→apiserver traffic. +k8sApiPassthrough: + enabled: false diff --git a/sources/inference-extension-crds/v1.5.0/Chart.yaml b/sources/inference-extension-crds/v1.5.0/Chart.yaml new file mode 100644 index 00000000..ecbcd6a4 --- /dev/null +++ b/sources/inference-extension-crds/v1.5.0/Chart.yaml @@ -0,0 +1,14 @@ +apiVersion: v2 +appVersion: v1.5.0 +description: CRDs for Gateway API Inference Extension (InferencePool) +home: https://gateway-api-inference-extension.sigs.k8s.io/ +keywords: +- gateway-api +- inference +- inference-extension +- inferencepool +name: inference-extension-crds +sources: +- https://github.com/kubernetes-sigs/gateway-api-inference-extension +type: application +version: v1.5.0 diff --git a/sources/inference-extension-crds/v1.5.0/templates/inferencepools.yaml b/sources/inference-extension-crds/v1.5.0/templates/inferencepools.yaml new file mode 100644 index 00000000..363f7dfa --- /dev/null +++ b/sources/inference-extension-crds/v1.5.0/templates/inferencepools.yaml @@ -0,0 +1,373 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + api-approved.kubernetes.io: https://github.com/kubernetes-sigs/gateway-api-inference-extension/pull/1173 + inference.networking.k8s.io/bundle-version: v1.5.0 + name: inferencepools.inference.networking.k8s.io +spec: + group: inference.networking.k8s.io + names: + kind: InferencePool + listKind: InferencePoolList + plural: inferencepools + shortNames: + - infpool + singular: inferencepool + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: | + InferencePool is the Schema for the InferencePools API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Spec defines the desired state of the InferencePool. + properties: + appProtocol: + default: http + description: |- + AppProtocol describes the application protocol for all the target ports. + + If unspecified, the protocol defaults to HTTP/1.1. + + Supported values include: + * "http": HTTP/1.1. This is the default. + * "kubernetes.io/h2c": HTTP/2 over cleartext. + enum: + - http + - kubernetes.io/h2c + type: string + endpointPickerRef: + description: |- + EndpointPickerRef is a reference to the Endpoint Picker extension and its + associated configuration. + properties: + failureMode: + default: FailClose + description: |- + FailureMode configures how the parent handles the case when the Endpoint Picker extension + is non-responsive. When unspecified, defaults to "FailClose". + enum: + - FailOpen + - FailClose + type: string + group: + default: "" + description: |- + Group is the group of the referent API object. When unspecified, the default value + is "", representing the Core API group. + maxLength: 253 + minLength: 0 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. + + Required if the referent is ambiguous, e.g. service with multiple ports. + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations MUST NOT + support ExternalName Services. + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent API object. + maxLength: 253 + minLength: 1 + type: string + port: + description: |- + Port is the port of the Endpoint Picker extension service. + + Port is required when the referent is a Kubernetes Service. In this + case, the port number is the service port number, not the target port. + For other resources, destination port might be derived from the referent + resource or this field. + properties: + number: + description: |- + Number defines the port number to access the selected model server Pods. + The number must be in the range 1 to 65535. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - number + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: port is required when kind is 'Service' or unspecified + (defaults to 'Service') + rule: self.kind != 'Service' || has(self.port) + selector: + description: |- + Selector determines which Pods are members of this inference pool. + It matches Pods by their labels only within the same namespace; cross-namespace + selection is not supported. + + The structure of this LabelSelector is intentionally simple to be compatible + with Kubernetes Service selectors, as some implementations may translate + this configuration into a Service resource. + properties: + matchLabels: + additionalProperties: + description: |- + LabelValue is the value of a label. This is used for validation + of maps. This matches the Kubernetes label validation rules: + * must be 63 characters or less (can be empty), + * unless empty, must begin and end with an alphanumeric character ([a-z0-9A-Z]), + * could contain dashes (-), underscores (_), dots (.), and alphanumerics between. + + Valid values include: + + * MyValue + * my.name + * 123-my-value + maxLength: 63 + minLength: 0 + pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$ + type: string + description: |- + MatchLabels contains a set of required {key,value} pairs. + An object must match every label in this map to be selected. + The matching logic is an AND operation on all entries. + maxProperties: 64 + minProperties: 1 + type: object + required: + - matchLabels + type: object + targetPorts: + description: |- + TargetPorts defines a list of ports that are exposed by this InferencePool. + Every port will be treated as a distinctive endpoint by EPP, + addressable as a 'podIP:portNumber' combination. + items: + description: Port defines the network port that will be exposed + by this InferencePool. + properties: + number: + description: |- + Number defines the port number to access the selected model server Pods. + The number must be in the range 1 to 65535. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - number + type: object + maxItems: 8 + minItems: 1 + type: array + x-kubernetes-list-type: atomic + x-kubernetes-validations: + - message: port number must be unique + rule: self.all(p1, self.exists_one(p2, p1.number==p2.number)) + required: + - endpointPickerRef + - selector + - targetPorts + type: object + status: + description: Status defines the observed state of the InferencePool. + properties: + parents: + description: |- + Parents is a list of parent resources, typically Gateways, that are associated with + the InferencePool, and the status of the InferencePool with respect to each parent. + + A controller that manages the InferencePool, must add an entry for each parent it manages + and remove the parent entry when the controller no longer considers the InferencePool to + be associated with that parent. + + A maximum of 32 parents will be represented in this list. When the list is empty, + it indicates that the InferencePool is not associated with any parents. + items: + description: ParentStatus defines the observed state of InferencePool + from a Parent, i.e. Gateway. + properties: + conditions: + description: |- + Conditions is a list of status conditions that provide information about the observed + state of the InferencePool. This field is required to be set by the controller that + manages the InferencePool. + + Supported condition types are: + + * "Accepted" + * "ResolvedRefs" + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + maxItems: 8 + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + controllerName: + description: |- + ControllerName is a domain/path string that indicates the name of the controller that + wrote this status. This corresponds with the GatewayClass controllerName field when the + parentRef references a Gateway kind. + + Example: "example.net/gateway-controller". + + The format of this field is DOMAIN "/" PATH, where DOMAIN and PATH are valid Kubernetes names: + + https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + + Controllers MAY populate this field when writing status. When populating this field, controllers + should ensure that entries to status populated with their ControllerName are cleaned up when they + are no longer necessary. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*\/[A-Za-z0-9\/\-._~%!$&'()*+,;=:]+$ + type: string + parentRef: + description: |- + ParentRef is used to identify the parent resource that this status + is associated with. It is used to match the InferencePool with the parent + resource, such as a Gateway. + properties: + group: + default: gateway.networking.k8s.io + description: |- + Group is the group of the referent API object. When unspecified, the referent is assumed + to be in the "gateway.networking.k8s.io" API group. + maxLength: 253 + minLength: 0 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Gateway + description: |- + Kind is the kind of the referent API object. When unspecified, the referent is assumed + to be a "Gateway" kind. + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent API object. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details: https://gateway-api.sigs.k8s.io/api-types/referencegrant/ + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + required: + - parentRef + type: object + maxItems: 32 + type: array + x-kubernetes-list-type: atomic + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} +status: + acceptedNames: + kind: "" + plural: "" + conditions: null + storedVersions: null diff --git a/sources/inference-extension-crds/v1.5.0/values.yaml b/sources/inference-extension-crds/v1.5.0/values.yaml new file mode 100644 index 00000000..2b477a6c --- /dev/null +++ b/sources/inference-extension-crds/v1.5.0/values.yaml @@ -0,0 +1 @@ +# No configurable values — this chart only installs the InferencePool CRD. diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/collectors-logs-metrics-k8s.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/collectors-logs-metrics-k8s.yaml index 0a23f2c6..3aa36e96 100644 --- a/sources/otel-lgtm-stack/v1.0.7/templates/collectors-logs-metrics-k8s.yaml +++ b/sources/otel-lgtm-stack/v1.0.7/templates/collectors-logs-metrics-k8s.yaml @@ -665,6 +665,33 @@ spec: target_label: node scrape_interval: 5m scrape_timeout: 30s + - job_name: ai-gateway-extproc + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - envoy-gateway-system + relabel_configs: + - action: keep + regex: https + source_labels: + - __meta_kubernetes_pod_label_gateway_envoyproxy_io_owning_gateway_name + - action: replace + regex: (.+) + replacement: $1:1064 + source_labels: + - __meta_kubernetes_pod_ip + target_label: __address__ + - action: replace + source_labels: + - __meta_kubernetes_pod_namespace + target_label: namespace + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: pod + metrics_path: /metrics + scrape_interval: 30s processors: batch: send_batch_size: 2000 From 8857d935e9cca8cf8f923d3ca225c46a140879b5 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 4 Jun 2026 13:20:17 +0300 Subject: [PATCH 03/21] fix: render argocd application template for oci and https --- root/templates/cluster-apps.yaml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/root/templates/cluster-apps.yaml b/root/templates/cluster-apps.yaml index 70067a78..c94c3534 100644 --- a/root/templates/cluster-apps.yaml +++ b/root/templates/cluster-apps.yaml @@ -24,14 +24,12 @@ spec: sources: # Primary source: OCI/external chart - repoURL: {{ $renderedRepoURL }} - {{- if hasPrefix "oci://" $renderedRepoURL }} - {{- if eq .path "." }} - chart: {{ trimPrefix "oci://" $renderedRepoURL | base }} - {{- else }} - chart: {{ .path }} - {{- end }} - {{- else }} + {{- if and .path (not (hasPrefix "oci://" $renderedRepoURL)) }} path: {{ .path }} + {{- else if .chart }} + chart: {{ .chart }} + {{- else if hasPrefix "oci://" $renderedRepoURL }} + path: {{ .path | default "." }} {{- end }} targetRevision: {{ .repoVersion | default $clusterForgeTargetRevision | quote }} helm: @@ -72,16 +70,18 @@ spec: source: repoURL: {{ $renderedRepoURL | default $clusterForgeRepoUrl }} targetRevision: {{ .repoVersion | default $clusterForgeTargetRevision | quote }} - {{- if .chart }} + {{- if and .path (not (hasPrefix "oci://" $renderedRepoURL)) }} + {{- if .repoURL }} + path: {{ .path }} + {{- else }} + path: sources/{{ .path }} + {{- end }} + {{- else if .chart }} chart: {{ .chart }} {{- else if .repoURL }} {{- if hasPrefix "oci://" $renderedRepoURL }} path: {{ .path | default "." }} - {{- else }} - path: {{ .path }} {{- end }} - {{- else }} - path: sources/{{ .path }} {{- end }} {{- if or .valuesFile .valuesObject .helmParameters }} helm: From e73fac33e0063d0a58375468663a7229de778071 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 4 Jun 2026 15:10:04 +0300 Subject: [PATCH 04/21] EAI-5821 fix sbom components --- sbom/components.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sbom/components.yaml b/sbom/components.yaml index 299130e8..6d0aa621 100644 --- a/sbom/components.yaml +++ b/sbom/components.yaml @@ -119,6 +119,18 @@ components: projectUrl: https://github.com/cloudnative-pg/cloudnative-pg license: Apache License 2.0 licenseUrl: https://github.com/cloudnative-pg/cloudnative-pg/blob/main/LICENSE + envoy-ai-gateway: + path: envoy-ai-gateway/v0.6.0 + sourceUrl: oci://docker.io/envoyproxy/ai-gateway-helm + projectUrl: https://github.com/envoyproxy/ai-gateway + license: Apache License 2.0 + licenseUrl: https://github.com/envoyproxy/ai-gateway/blob/main/LICENSE + envoy-ai-gateway-crds: + path: envoy-ai-gateway-crds/v0.6.0 + sourceUrl: oci://docker.io/envoyproxy/ai-gateway-crds-helm + projectUrl: https://github.com/envoyproxy/ai-gateway + license: Apache License 2.0 + licenseUrl: https://github.com/envoyproxy/ai-gateway/blob/main/LICENSE envoy-gateway: path: envoy-gateway/v1.7.1 sourceUrl: oci://docker.io/envoyproxy/gateway-helm @@ -138,6 +150,12 @@ components: projectUrl: https://github.com/go-gitea/gitea license: MIT License licenseUrl: https://github.com/go-gitea/gitea/blob/main/LICENSE + inference-extension-crds: + path: inference-extension-crds/v1.5.0 + sourceUrl: https://github.com/kubernetes-sigs/gateway-api-inference-extension + projectUrl: https://github.com/kubernetes-sigs/gateway-api-inference-extension + license: Apache License 2.0 + licenseUrl: https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/LICENSE kaiwo: path: null repoVersion: v0.2.1 From 26e90ed84339f7fca7930dc147b48f1b9ad249d0 Mon Sep 17 00:00:00 2001 From: Mika Ranta Date: Fri, 5 Jun 2026 09:51:10 +0300 Subject: [PATCH 05/21] feat(envoy-gateway-config): add tls-passthrough gateway for k8s API Replace the :6443 k8s-passthrough listener on the shared `https` gateway with a dedicated `tls-passthrough` gateway on :443 that owns the external MetalLB LoadBalancer and does SNI-based TLS passthrough: k8s. -> kube API service, *. -> apps gateway. The apps gateway moves to ClusterIP behind it. The listener and TLSRoutes carry explicit hostnames: Envoy Gateway TLS passthrough builds SNI filter chains from hostnames, so an empty hostname yields an empty Envoy config that never routes. Listening on :443 instead of :6443 avoids hijacking pod->apiserver traffic where the node IP equals the MetalLB pool IP. Refs: EAI-5821 --- .../templates/envoy-proxy-access-logs.yaml | 4 +++ .../templates/gateway.yaml | 18 ++-------- .../templates/tls-passthrough-gateway.yaml | 36 +++++++++++++++++++ .../tls-passthrough-proxy-config.yaml | 22 ++++++++++++ .../templates/tlsroute-k8s-passthrough.yaml | 25 ------------- .../tlsroute-tls-passthrough-apps.yaml | 26 ++++++++++++++ .../tlsroute-tls-passthrough-k8s.yaml | 24 +++++++++++++ sources/envoy-gateway-config/values.yaml | 24 +++++++++---- 8 files changed, 133 insertions(+), 46 deletions(-) create mode 100644 sources/envoy-gateway-config/templates/tls-passthrough-gateway.yaml create mode 100644 sources/envoy-gateway-config/templates/tls-passthrough-proxy-config.yaml delete mode 100644 sources/envoy-gateway-config/templates/tlsroute-k8s-passthrough.yaml create mode 100644 sources/envoy-gateway-config/templates/tlsroute-tls-passthrough-apps.yaml create mode 100644 sources/envoy-gateway-config/templates/tlsroute-tls-passthrough-k8s.yaml diff --git a/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml b/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml index 2ec23820..9950d034 100644 --- a/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml +++ b/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml @@ -9,6 +9,10 @@ spec: provider: type: Kubernetes kubernetes: + envoyService: + # ClusterIP: the apps gateway sits behind the tls-passthrough gateway and + # no longer owns the external LoadBalancer / VIP. + type: {{ .Values.appsGateway.serviceType }} envoyDeployment: pod: nodeSelector: diff --git a/sources/envoy-gateway-config/templates/gateway.yaml b/sources/envoy-gateway-config/templates/gateway.yaml index 16279f89..090a7a42 100644 --- a/sources/envoy-gateway-config/templates/gateway.yaml +++ b/sources/envoy-gateway-config/templates/gateway.yaml @@ -1,4 +1,6 @@ -# Shared listener: apps via HTTPRoute, AI via AIGatewayRoute on the same :443 Gateway. +# Apps gateway: terminates TLS and serves apps via HTTPRoute and AI via AIGatewayRoute. +# Exposed as ClusterIP (see appsGateway.serviceType); the tls-passthrough gateway is the +# external front door and TLS-passes-through to this gateway by SNI. # Do not use mergeGateways — Envoy AI Gateway does not support it (envoyproxy/ai-gateway#1638). apiVersion: gateway.networking.k8s.io/v1 kind: Gateway @@ -28,17 +30,3 @@ spec: kind: Secret name: cluster-tls mode: Terminate -{{- if .Values.k8sApiPassthrough.enabled }} - - allowedRoutes: - kinds: - - group: gateway.networking.k8s.io - kind: TLSRoute - namespaces: - from: All - hostname: "k8s.{{ .Values.domain }}" - name: k8s-passthrough - port: 6443 - protocol: TLS - tls: - mode: Passthrough -{{- end }} diff --git a/sources/envoy-gateway-config/templates/tls-passthrough-gateway.yaml b/sources/envoy-gateway-config/templates/tls-passthrough-gateway.yaml new file mode 100644 index 00000000..c040febc --- /dev/null +++ b/sources/envoy-gateway-config/templates/tls-passthrough-gateway.yaml @@ -0,0 +1,36 @@ +{{- if .Values.tlsPassthroughGateway.enabled }} +# tls-passthrough gateway: the external front door. Owns the MetalLB LoadBalancer on :443 and +# does pure TLS passthrough by SNI (see the tls-passthrough TLSRoutes): +# k8s. -> kubernetes API service +# everything else -> apps gateway (`https` ClusterIP), which terminates TLS. +# Separate Envoy data plane from the apps gateway (no mergeGateways), and listens on :443 +# only so it never hijacks pod->apiserver (:6443) traffic on cluster-bloom. +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: tls-passthrough + namespace: envoy-gateway-system +spec: + gatewayClassName: envoy-gateway + infrastructure: + parametersRef: + group: gateway.envoyproxy.io + kind: EnvoyProxy + name: tls-passthrough-proxy-config + listeners: + # TLS passthrough is SNI-based, so the listener and its routes must carry hostnames — + # an empty hostname yields no filter chain and an empty Envoy config. *. covers + # both app hostnames and k8s.; the k8s route wins by SNI longest-match. + - name: tls + hostname: "*.{{ .Values.domain }}" + port: 443 + protocol: TLS + tls: + mode: Passthrough + allowedRoutes: + kinds: + - group: gateway.networking.k8s.io + kind: TLSRoute + namespaces: + from: All +{{- end }} diff --git a/sources/envoy-gateway-config/templates/tls-passthrough-proxy-config.yaml b/sources/envoy-gateway-config/templates/tls-passthrough-proxy-config.yaml new file mode 100644 index 00000000..b68646e1 --- /dev/null +++ b/sources/envoy-gateway-config/templates/tls-passthrough-proxy-config.yaml @@ -0,0 +1,22 @@ +{{- if .Values.tlsPassthroughGateway.enabled }} +# EnvoyProxy for the tls-passthrough gateway: external LoadBalancer (VIP) + first-node placement. +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: EnvoyProxy +metadata: + name: tls-passthrough-proxy-config + namespace: envoy-gateway-system +spec: + provider: + type: Kubernetes + kubernetes: + envoyService: + type: LoadBalancer + {{- with .Values.tlsPassthroughGateway.loadBalancerIP }} + loadBalancerIP: {{ . | quote }} + {{- end }} + envoyDeployment: + pod: + nodeSelector: + cluster-bloom/first-node: "true" + priorityClassName: "system-cluster-critical" +{{- end }} diff --git a/sources/envoy-gateway-config/templates/tlsroute-k8s-passthrough.yaml b/sources/envoy-gateway-config/templates/tlsroute-k8s-passthrough.yaml deleted file mode 100644 index e5ac4867..00000000 --- a/sources/envoy-gateway-config/templates/tlsroute-k8s-passthrough.yaml +++ /dev/null @@ -1,25 +0,0 @@ -{{- if .Values.k8sApiPassthrough.enabled }} -# TLS passthrough to kubernetes API on Gateway :6443 (k8s.domain). -# Requires node-ip != MetalLB VIP — see k8sApiPassthrough in values.yaml. -apiVersion: gateway.networking.k8s.io/v1alpha2 -kind: TLSRoute -metadata: - name: k8s-passthrough - namespace: default -spec: - hostnames: - - k8s.{{ .Values.domain }} - parentRefs: - - group: gateway.networking.k8s.io - kind: Gateway - name: https - namespace: envoy-gateway-system - sectionName: k8s-passthrough - rules: - - backendRefs: - - group: "" - kind: Service - name: kubernetes - port: 443 - weight: 1 -{{- end }} diff --git a/sources/envoy-gateway-config/templates/tlsroute-tls-passthrough-apps.yaml b/sources/envoy-gateway-config/templates/tlsroute-tls-passthrough-apps.yaml new file mode 100644 index 00000000..a176f333 --- /dev/null +++ b/sources/envoy-gateway-config/templates/tlsroute-tls-passthrough-apps.yaml @@ -0,0 +1,26 @@ +{{- if .Values.tlsPassthroughGateway.enabled }} +# Wildcard *.: everything except k8s. (which has a more specific route and +# wins by SNI longest-match) is passed through to the apps gateway's stable ClusterIP +# (`https`), which terminates TLS and routes via HTTPRoute / AIGatewayRoute. +# TLS passthrough is SNI-based, so this must carry a hostname. Requires gatewayDnsService.enabled. +apiVersion: gateway.networking.k8s.io/v1alpha2 +kind: TLSRoute +metadata: + name: tls-passthrough-apps + namespace: envoy-gateway-system +spec: + hostnames: + - "*.{{ .Values.domain }}" + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: tls-passthrough + namespace: envoy-gateway-system + rules: + - backendRefs: + - group: "" + kind: Service + name: https + port: 443 + weight: 1 +{{- end }} diff --git a/sources/envoy-gateway-config/templates/tlsroute-tls-passthrough-k8s.yaml b/sources/envoy-gateway-config/templates/tlsroute-tls-passthrough-k8s.yaml new file mode 100644 index 00000000..b73e3ef3 --- /dev/null +++ b/sources/envoy-gateway-config/templates/tlsroute-tls-passthrough-k8s.yaml @@ -0,0 +1,24 @@ +{{- if .Values.tlsPassthroughGateway.enabled }} +# SNI k8s. -> kubernetes API service (TLS passthrough via the tls-passthrough gateway). +# In the `default` namespace alongside the `kubernetes` service, so no ReferenceGrant is needed. +apiVersion: gateway.networking.k8s.io/v1alpha2 +kind: TLSRoute +metadata: + name: tls-passthrough-k8s-api + namespace: default +spec: + hostnames: + - k8s.{{ .Values.domain }} + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: tls-passthrough + namespace: envoy-gateway-system + rules: + - backendRefs: + - group: "" + kind: Service + name: kubernetes + port: 443 + weight: 1 +{{- end }} diff --git a/sources/envoy-gateway-config/values.yaml b/sources/envoy-gateway-config/values.yaml index c4f58310..168a6475 100644 --- a/sources/envoy-gateway-config/values.yaml +++ b/sources/envoy-gateway-config/values.yaml @@ -1,11 +1,23 @@ domain: # to be filled by cluster-forge app -# Stable ClusterIP DNS name for in-cluster *.domain resolution (CoreDNS rewrite). -# Does not replace Envoy Gateway auto-generated LoadBalancer from MetalLB. +# Apps gateway (`https`): terminates TLS (cluster-tls) and serves HTTPRoute / AIGatewayRoute. +# No longer owns the external LoadBalancer — exposed as ClusterIP and fronted by the +# tls-passthrough gateway below. +appsGateway: + serviceType: ClusterIP + +# Stable ClusterIP `https` selecting the apps-gateway data plane. Used both by CoreDNS +# (*.domain rewrite) and as the passthrough backend for the tls-passthrough gateway, so it +# must stay enabled while tlsPassthroughGateway is on. gatewayDnsService: enabled: true -# TLS passthrough to kubernetes API via Gateway. Disabled by default on cluster-bloom -# where node-ip equals the MetalLB pool IP — a :6443 listener hijacks pod→apiserver traffic. -k8sApiPassthrough: - enabled: false +# tls-passthrough gateway: owns the external MetalLB LoadBalancer on :443 and does SNI-based +# TLS passthrough — k8s. -> kube API service, everything else -> apps gateway. +# Listening on :443 (not :6443) avoids hijacking pod->apiserver traffic on cluster-bloom +# where node-ip equals the MetalLB pool IP. +tlsPassthroughGateway: + enabled: true + # Pin the external VIP here if the MetalLB pool holds more than one address; otherwise + # MetalLB auto-assigns from the pool. + loadBalancerIP: "" From ad36a8182ad84257ac35c594f59c01f7c4bb7341 Mon Sep 17 00:00:00 2001 From: Mika Ranta Date: Fri, 5 Jun 2026 10:20:30 +0300 Subject: [PATCH 06/21] fix(envoy-gateway-config): invert gateway front door while debugging --- .../tls-passthrough-proxy-config.yaml | 6 +++-- sources/envoy-gateway-config/values.yaml | 22 ++++++++++++------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/sources/envoy-gateway-config/templates/tls-passthrough-proxy-config.yaml b/sources/envoy-gateway-config/templates/tls-passthrough-proxy-config.yaml index b68646e1..66bbf323 100644 --- a/sources/envoy-gateway-config/templates/tls-passthrough-proxy-config.yaml +++ b/sources/envoy-gateway-config/templates/tls-passthrough-proxy-config.yaml @@ -1,5 +1,5 @@ {{- if .Values.tlsPassthroughGateway.enabled }} -# EnvoyProxy for the tls-passthrough gateway: external LoadBalancer (VIP) + first-node placement. +# EnvoyProxy for the tls-passthrough gateway: serviceType-controlled external service + first-node placement. apiVersion: gateway.envoyproxy.io/v1alpha1 kind: EnvoyProxy metadata: @@ -10,10 +10,12 @@ spec: type: Kubernetes kubernetes: envoyService: - type: LoadBalancer + type: {{ .Values.tlsPassthroughGateway.serviceType }} + {{- if eq .Values.tlsPassthroughGateway.serviceType "LoadBalancer" }} {{- with .Values.tlsPassthroughGateway.loadBalancerIP }} loadBalancerIP: {{ . | quote }} {{- end }} + {{- end }} envoyDeployment: pod: nodeSelector: diff --git a/sources/envoy-gateway-config/values.yaml b/sources/envoy-gateway-config/values.yaml index 168a6475..e2be2d23 100644 --- a/sources/envoy-gateway-config/values.yaml +++ b/sources/envoy-gateway-config/values.yaml @@ -1,10 +1,15 @@ domain: # to be filled by cluster-forge app +# TEMPORARY (EAI-5821 debug): the tls-passthrough gateway does not come up as a LoadBalancer, +# so the external front door is inverted — the apps gateway owns the external LoadBalancer (VIP) +# and the tls-passthrough gateway is demoted to ClusterIP. This sacrifices k8s. +# passthrough but keeps app/UI ingress working. Switch both serviceType values back +# (appsGateway -> ClusterIP, tlsPassthroughGateway -> LoadBalancer) once the tls-passthrough +# gateway works as the LoadBalancer. + # Apps gateway (`https`): terminates TLS (cluster-tls) and serves HTTPRoute / AIGatewayRoute. -# No longer owns the external LoadBalancer — exposed as ClusterIP and fronted by the -# tls-passthrough gateway below. appsGateway: - serviceType: ClusterIP + serviceType: LoadBalancer # normally ClusterIP (fronted by the tls-passthrough gateway) # Stable ClusterIP `https` selecting the apps-gateway data plane. Used both by CoreDNS # (*.domain rewrite) and as the passthrough backend for the tls-passthrough gateway, so it @@ -12,12 +17,13 @@ appsGateway: gatewayDnsService: enabled: true -# tls-passthrough gateway: owns the external MetalLB LoadBalancer on :443 and does SNI-based -# TLS passthrough — k8s. -> kube API service, everything else -> apps gateway. -# Listening on :443 (not :6443) avoids hijacking pod->apiserver traffic on cluster-bloom -# where node-ip equals the MetalLB pool IP. +# tls-passthrough gateway: when serviceType is LoadBalancer it owns the external MetalLB +# LoadBalancer on :443 and does SNI-based TLS passthrough — k8s. -> kube API service, +# everything else -> apps gateway. Listening on :443 (not :6443) avoids hijacking +# pod->apiserver traffic on cluster-bloom where node-ip equals the MetalLB pool IP. tlsPassthroughGateway: enabled: true + serviceType: ClusterIP # normally LoadBalancer; ClusterIP while debugging (apps gateway is the front door) # Pin the external VIP here if the MetalLB pool holds more than one address; otherwise - # MetalLB auto-assigns from the pool. + # MetalLB auto-assigns from the pool. Only applies when serviceType is LoadBalancer. loadBalancerIP: "" From 30aa91152e6ea9c356e1609ea6d3177c01bbf944 Mon Sep 17 00:00:00 2001 From: Mika Ranta Date: Fri, 5 Jun 2026 12:39:22 +0300 Subject: [PATCH 07/21] fix(envoy-gateway): scope AI extension to its own listeners Set extensionManager listener.includeAll=false so the AI Gateway xDS translation hook only receives listeners generated for its own resources (AIGatewayRoute/AIServiceBackend/InferencePool). With includeAll=true the hook also received the L4 tls-passthrough listener and tried to insert its request-header-metadata HTTP filter into a TCP filter chain that has no HTTPConnectionManager. That failed xDS translation for the entire GatewayClass, so the passthrough data plane got an empty snapshot and never left initialization. --- root/values.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/root/values.yaml b/root/values.yaml index 1a245c55..62e3080b 100644 --- a/root/values.yaml +++ b/root/values.yaml @@ -602,7 +602,14 @@ apps: xdsTranslator: translation: listener: - includeAll: true + # Only hand the AI extension listeners generated for its own + # resources (AIGatewayRoute/AIServiceBackend/InferencePool), not + # every listener. With includeAll:true the extension also received + # the L4 tls-passthrough listener and tried to insert its HTTP + # request-header-metadata filter into a TCP filter chain with no + # HTTPConnectionManager, failing xDS translation for the whole + # GatewayClass and leaving the passthrough data plane stuck in init. + includeAll: false route: includeAll: true cluster: From 6de26322eed2a6dd852708fe9427c3f70afba90c Mon Sep 17 00:00:00 2001 From: Mika Ranta Date: Fri, 5 Jun 2026 13:04:06 +0300 Subject: [PATCH 08/21] fix(envoy-gateway-config): restore passthrough gateway as front door Revert the debug inversion: the tls-passthrough gateway owns the external MetalLB LoadBalancer on :443 (SNI passthrough) and the apps gateway drops back to ClusterIP behind it. The inversion was a workaround for the passthrough data plane not starting, which is now fixed. --- sources/envoy-gateway-config/values.yaml | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/sources/envoy-gateway-config/values.yaml b/sources/envoy-gateway-config/values.yaml index e2be2d23..ff856a55 100644 --- a/sources/envoy-gateway-config/values.yaml +++ b/sources/envoy-gateway-config/values.yaml @@ -1,15 +1,10 @@ domain: # to be filled by cluster-forge app -# TEMPORARY (EAI-5821 debug): the tls-passthrough gateway does not come up as a LoadBalancer, -# so the external front door is inverted — the apps gateway owns the external LoadBalancer (VIP) -# and the tls-passthrough gateway is demoted to ClusterIP. This sacrifices k8s. -# passthrough but keeps app/UI ingress working. Switch both serviceType values back -# (appsGateway -> ClusterIP, tlsPassthroughGateway -> LoadBalancer) once the tls-passthrough -# gateway works as the LoadBalancer. - # Apps gateway (`https`): terminates TLS (cluster-tls) and serves HTTPRoute / AIGatewayRoute. +# ClusterIP — not the external front door; the tls-passthrough gateway fronts it and +# TLS-passes-through by SNI. appsGateway: - serviceType: LoadBalancer # normally ClusterIP (fronted by the tls-passthrough gateway) + serviceType: ClusterIP # Stable ClusterIP `https` selecting the apps-gateway data plane. Used both by CoreDNS # (*.domain rewrite) and as the passthrough backend for the tls-passthrough gateway, so it @@ -23,7 +18,7 @@ gatewayDnsService: # pod->apiserver traffic on cluster-bloom where node-ip equals the MetalLB pool IP. tlsPassthroughGateway: enabled: true - serviceType: ClusterIP # normally LoadBalancer; ClusterIP while debugging (apps gateway is the front door) + serviceType: LoadBalancer # external front door: owns the MetalLB VIP on :443 # Pin the external VIP here if the MetalLB pool holds more than one address; otherwise # MetalLB auto-assigns from the pool. Only applies when serviceType is LoadBalancer. loadBalancerIP: "" From 9a54b66f142c4ed184181c7decffb1d44ff9bcad Mon Sep 17 00:00:00 2001 From: John Lybeck Date: Thu, 4 Jun 2026 13:02:17 +0000 Subject: [PATCH 09/21] EAI-5821: Wire API key auth and metrics for AI gateway - Bump cluster-auth to 0.6.0-rc2, which injects x-api-key-id and x-auth-username on every authenticated request and supports SecurityPolicy contextExtensions for per-IS group enforcement (required by the ai-gateway-discovery controller) - Add api_key_id and aim_service_id to access log fields so every AI gateway request is attributed to the originating API key and AIM service in structured logs --- sources/cluster-auth/0.5.9/values.yaml | 2 +- .../envoy-gateway-config/templates/envoy-proxy-access-logs.yaml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sources/cluster-auth/0.5.9/values.yaml b/sources/cluster-auth/0.5.9/values.yaml index 2d300fd1..74fc1f35 100644 --- a/sources/cluster-auth/0.5.9/values.yaml +++ b/sources/cluster-auth/0.5.9/values.yaml @@ -3,7 +3,7 @@ replicaCount: 1 image: repository: ghcr.io/silogen/cluster-auth pullPolicy: Always - tag: "0.5.9" + tag: "0.6.0-rc2" imagePullSecrets: [] nameOverride: "" diff --git a/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml b/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml index 9950d034..b1d7ff19 100644 --- a/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml +++ b/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml @@ -44,6 +44,8 @@ spec: llm_input_token: "%DYNAMIC_METADATA(io.envoy.ai_gateway:llm_input_token)%" llm_output_token: "%DYNAMIC_METADATA(io.envoy.ai_gateway:llm_output_token)%" llm_total_token: "%DYNAMIC_METADATA(io.envoy.ai_gateway:llm_total_token)%" + api_key_id: "%REQ(x-api-key-id)%" + aim_service_id: "%REQ(x-aim-service-id)%" sinks: - type: File file: From 8daf1759fd868b4410dcfc7c5681af99fcc37f01 Mon Sep 17 00:00:00 2001 From: John Lybeck Date: Fri, 5 Jun 2026 11:20:44 +0000 Subject: [PATCH 10/21] EAI-5821: Bump cluster-auth to 0.6.0-rc3 --- sources/cluster-auth/0.5.9/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/cluster-auth/0.5.9/values.yaml b/sources/cluster-auth/0.5.9/values.yaml index 74fc1f35..fa60c54a 100644 --- a/sources/cluster-auth/0.5.9/values.yaml +++ b/sources/cluster-auth/0.5.9/values.yaml @@ -3,7 +3,7 @@ replicaCount: 1 image: repository: ghcr.io/silogen/cluster-auth pullPolicy: Always - tag: "0.6.0-rc2" + tag: "0.6.0-rc3" imagePullSecrets: [] nameOverride: "" From 524cda927829b0c8e15c39d4b92ddd40e3016b30 Mon Sep 17 00:00:00 2001 From: Mika Ranta Date: Mon, 8 Jun 2026 09:12:58 +0300 Subject: [PATCH 11/21] fix(envoy-gateway): wire EPP into shared listener Set listener.includeAll=true so the AI extension injects the EPP ext_proc filter into the shared https :443 listener. InferencePool routes were returning 503 (no healthy upstream) because nothing set x-gateway-destination-endpoint on that Gateway-owned listener. Add failOpen=true so the extension erroring on the tls-passthrough L4 listener (an HTTP filter can't splice into a TCP chain) no longer fails that proxy's xDS translation and leaves it stuck in init. mergeGateways is off, so each gateway is a separate translation pass: the https proxy gets the filter, the passthrough proxy keeps its original xDS. --- root/values.yaml | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/root/values.yaml b/root/values.yaml index 62e3080b..f839826b 100644 --- a/root/values.yaml +++ b/root/values.yaml @@ -587,6 +587,13 @@ apps: extensionApis: enableBackend: true extensionManager: + # Tolerate the AI extension erroring on the tls-passthrough gateway's L4 + # listener (it tries to splice an HTTP filter into a TCP chain with no + # HTTPConnectionManager). With mergeGateways:false each gateway is its own + # translation pass, so the https proxy still gets the EPP ext_proc filter + # while the passthrough proxy keeps its original (correct) xDS instead of + # failing translation and getting stuck in init. + failOpen: true resources: - group: aigateway.envoyproxy.io version: v1beta1 @@ -602,14 +609,13 @@ apps: xdsTranslator: translation: listener: - # Only hand the AI extension listeners generated for its own - # resources (AIGatewayRoute/AIServiceBackend/InferencePool), not - # every listener. With includeAll:true the extension also received - # the L4 tls-passthrough listener and tried to insert its HTTP - # request-header-metadata filter into a TCP filter chain with no - # HTTPConnectionManager, failing xDS translation for the whole - # GatewayClass and leaving the passthrough data plane stuck in init. - includeAll: false + # Hand the AI extension every listener so the EPP ext_proc filter + # gets injected into the shared https :443 listener (owned by the + # Gateway, not by AIGatewayRoute) — required for InferencePool routes, + # which otherwise 503 because nothing sets x-gateway-destination-endpoint. + # The L4 tls-passthrough listener also reaches the extension and errors, + # but failOpen:true (above) keeps that proxy's xDS intact. + includeAll: true route: includeAll: true cluster: From 28a58c44bb8414788e073b4581a5caffc3988248 Mon Sep 17 00:00:00 2001 From: John Lybeck Date: Mon, 8 Jun 2026 08:52:13 +0000 Subject: [PATCH 12/21] EAI-5821: Add ext-proc metrics scraping, bump otel-lgtm-stack to v1.0.8 and cluster-auth to 0.6.0-rc4 --- root/values.yaml | 2 +- sbom/components.yaml | 2 +- sources/cluster-auth/0.5.9/values.yaml | 2 +- .../{v1.0.7 => v1.0.8}/Chart.yaml | 4 ++-- .../templates/chrony-node-exporter.yaml | 0 .../collectors-logs-metrics-k8s.yaml | 0 .../templates/collectors-metrics-rest.yaml | 19 +++++++++++++++++++ .../collectors-rbac-instrumentation.yaml | 0 .../dashboards-cluster-health-overview.yaml | 0 .../templates/dashboards-default.yaml | 0 .../templates/dashboards-gpu.yaml | 0 .../templates/dashboards-minio.yaml | 0 .../templates/grafana-externalsecret.yaml | 0 .../templates/grafana-httproute.yaml | 0 .../templates/kube-state-metrics.yaml | 0 .../templates/lgtm-stack.yaml | 0 .../templates/node-exporter.yaml | 0 .../{v1.0.7 => v1.0.8}/values.yaml | 0 18 files changed, 24 insertions(+), 5 deletions(-) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/Chart.yaml (95%) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/templates/chrony-node-exporter.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/templates/collectors-logs-metrics-k8s.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/templates/collectors-metrics-rest.yaml (91%) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/templates/collectors-rbac-instrumentation.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/templates/dashboards-cluster-health-overview.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/templates/dashboards-default.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/templates/dashboards-gpu.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/templates/dashboards-minio.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/templates/grafana-externalsecret.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/templates/grafana-httproute.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/templates/kube-state-metrics.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/templates/lgtm-stack.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/templates/node-exporter.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.7 => v1.0.8}/values.yaml (100%) diff --git a/root/values.yaml b/root/values.yaml index f839826b..b9626bbe 100644 --- a/root/values.yaml +++ b/root/values.yaml @@ -866,7 +866,7 @@ apps: - name: cluster.name value: "{{ .Values.global.domain }}" namespace: otel-lgtm-stack - path: otel-lgtm-stack/v1.0.7 + path: otel-lgtm-stack/v1.0.8 syncWave: -20 valuesObject: cluster: diff --git a/sbom/components.yaml b/sbom/components.yaml index 6d0aa621..4884ab72 100644 --- a/sbom/components.yaml +++ b/sbom/components.yaml @@ -266,7 +266,7 @@ components: license: Apache License 2.0 licenseUrl: https://github.com/open-telemetry/opentelemetry-operator/blob/main/LICENSE otel-lgtm-stack: - path: otel-lgtm-stack/v1.0.7 + path: otel-lgtm-stack/v1.0.8 sourceUrl: https://github.com/silogen/docker-otel-lgtm projectUrl: https://github.com/grafana/docker-otel-lgtm license: Apache License 2.0 diff --git a/sources/cluster-auth/0.5.9/values.yaml b/sources/cluster-auth/0.5.9/values.yaml index fa60c54a..a57ec91a 100644 --- a/sources/cluster-auth/0.5.9/values.yaml +++ b/sources/cluster-auth/0.5.9/values.yaml @@ -3,7 +3,7 @@ replicaCount: 1 image: repository: ghcr.io/silogen/cluster-auth pullPolicy: Always - tag: "0.6.0-rc3" + tag: "0.6.0-rc4" imagePullSecrets: [] nameOverride: "" diff --git a/sources/otel-lgtm-stack/v1.0.7/Chart.yaml b/sources/otel-lgtm-stack/v1.0.8/Chart.yaml similarity index 95% rename from sources/otel-lgtm-stack/v1.0.7/Chart.yaml rename to sources/otel-lgtm-stack/v1.0.8/Chart.yaml index 390773b6..0a56e4c7 100644 --- a/sources/otel-lgtm-stack/v1.0.7/Chart.yaml +++ b/sources/otel-lgtm-stack/v1.0.8/Chart.yaml @@ -8,10 +8,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 1.0.7 +version: 1.0.8 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "1.0.7" \ No newline at end of file +appVersion: "1.0.8" \ No newline at end of file diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/chrony-node-exporter.yaml b/sources/otel-lgtm-stack/v1.0.8/templates/chrony-node-exporter.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.7/templates/chrony-node-exporter.yaml rename to sources/otel-lgtm-stack/v1.0.8/templates/chrony-node-exporter.yaml diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/collectors-logs-metrics-k8s.yaml b/sources/otel-lgtm-stack/v1.0.8/templates/collectors-logs-metrics-k8s.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.7/templates/collectors-logs-metrics-k8s.yaml rename to sources/otel-lgtm-stack/v1.0.8/templates/collectors-logs-metrics-k8s.yaml diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/collectors-metrics-rest.yaml b/sources/otel-lgtm-stack/v1.0.8/templates/collectors-metrics-rest.yaml similarity index 91% rename from sources/otel-lgtm-stack/v1.0.7/templates/collectors-metrics-rest.yaml rename to sources/otel-lgtm-stack/v1.0.8/templates/collectors-metrics-rest.yaml index 0857538b..9f5b7b24 100644 --- a/sources/otel-lgtm-stack/v1.0.7/templates/collectors-metrics-rest.yaml +++ b/sources/otel-lgtm-stack/v1.0.8/templates/collectors-metrics-rest.yaml @@ -121,6 +121,25 @@ spec: static_configs: - targets: - longhorn-backend.longhorn.svc.cluster.local:9500 + - job_name: ai-gateway-extproc + scrape_interval: 30s + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - envoy-gateway-system + metrics_path: /metrics + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] + regex: envoy + action: keep + - source_labels: [__meta_kubernetes_pod_label_gateway_envoyproxy_io_owning_gateway_name] + regex: https + action: keep + - source_labels: [__meta_kubernetes_pod_ip] + regex: (.+) + replacement: $1:1064 + target_label: __address__ service: pipelines: metrics: diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/collectors-rbac-instrumentation.yaml b/sources/otel-lgtm-stack/v1.0.8/templates/collectors-rbac-instrumentation.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.7/templates/collectors-rbac-instrumentation.yaml rename to sources/otel-lgtm-stack/v1.0.8/templates/collectors-rbac-instrumentation.yaml diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/dashboards-cluster-health-overview.yaml b/sources/otel-lgtm-stack/v1.0.8/templates/dashboards-cluster-health-overview.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.7/templates/dashboards-cluster-health-overview.yaml rename to sources/otel-lgtm-stack/v1.0.8/templates/dashboards-cluster-health-overview.yaml diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/dashboards-default.yaml b/sources/otel-lgtm-stack/v1.0.8/templates/dashboards-default.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.7/templates/dashboards-default.yaml rename to sources/otel-lgtm-stack/v1.0.8/templates/dashboards-default.yaml diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/dashboards-gpu.yaml b/sources/otel-lgtm-stack/v1.0.8/templates/dashboards-gpu.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.7/templates/dashboards-gpu.yaml rename to sources/otel-lgtm-stack/v1.0.8/templates/dashboards-gpu.yaml diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/dashboards-minio.yaml b/sources/otel-lgtm-stack/v1.0.8/templates/dashboards-minio.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.7/templates/dashboards-minio.yaml rename to sources/otel-lgtm-stack/v1.0.8/templates/dashboards-minio.yaml diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/grafana-externalsecret.yaml b/sources/otel-lgtm-stack/v1.0.8/templates/grafana-externalsecret.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.7/templates/grafana-externalsecret.yaml rename to sources/otel-lgtm-stack/v1.0.8/templates/grafana-externalsecret.yaml diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/grafana-httproute.yaml b/sources/otel-lgtm-stack/v1.0.8/templates/grafana-httproute.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.7/templates/grafana-httproute.yaml rename to sources/otel-lgtm-stack/v1.0.8/templates/grafana-httproute.yaml diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/kube-state-metrics.yaml b/sources/otel-lgtm-stack/v1.0.8/templates/kube-state-metrics.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.7/templates/kube-state-metrics.yaml rename to sources/otel-lgtm-stack/v1.0.8/templates/kube-state-metrics.yaml diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/lgtm-stack.yaml b/sources/otel-lgtm-stack/v1.0.8/templates/lgtm-stack.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.7/templates/lgtm-stack.yaml rename to sources/otel-lgtm-stack/v1.0.8/templates/lgtm-stack.yaml diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/node-exporter.yaml b/sources/otel-lgtm-stack/v1.0.8/templates/node-exporter.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.7/templates/node-exporter.yaml rename to sources/otel-lgtm-stack/v1.0.8/templates/node-exporter.yaml diff --git a/sources/otel-lgtm-stack/v1.0.7/values.yaml b/sources/otel-lgtm-stack/v1.0.8/values.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.7/values.yaml rename to sources/otel-lgtm-stack/v1.0.8/values.yaml From 7cf7870187365026b06e71abe1e979417ef784c1 Mon Sep 17 00:00:00 2001 From: John Lybeck Date: Mon, 8 Jun 2026 08:53:25 +0000 Subject: [PATCH 13/21] Revert "EAI-5821: Add ext-proc metrics scraping, bump otel-lgtm-stack to v1.0.8 and cluster-auth to 0.6.0-rc4" This reverts commit 28a58c44bb8414788e073b4581a5caffc3988248. --- root/values.yaml | 2 +- sbom/components.yaml | 2 +- sources/cluster-auth/0.5.9/values.yaml | 2 +- .../{v1.0.8 => v1.0.7}/Chart.yaml | 4 ++-- .../templates/chrony-node-exporter.yaml | 0 .../collectors-logs-metrics-k8s.yaml | 0 .../templates/collectors-metrics-rest.yaml | 19 ------------------- .../collectors-rbac-instrumentation.yaml | 0 .../dashboards-cluster-health-overview.yaml | 0 .../templates/dashboards-default.yaml | 0 .../templates/dashboards-gpu.yaml | 0 .../templates/dashboards-minio.yaml | 0 .../templates/grafana-externalsecret.yaml | 0 .../templates/grafana-httproute.yaml | 0 .../templates/kube-state-metrics.yaml | 0 .../templates/lgtm-stack.yaml | 0 .../templates/node-exporter.yaml | 0 .../{v1.0.8 => v1.0.7}/values.yaml | 0 18 files changed, 5 insertions(+), 24 deletions(-) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/Chart.yaml (95%) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/templates/chrony-node-exporter.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/templates/collectors-logs-metrics-k8s.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/templates/collectors-metrics-rest.yaml (91%) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/templates/collectors-rbac-instrumentation.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/templates/dashboards-cluster-health-overview.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/templates/dashboards-default.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/templates/dashboards-gpu.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/templates/dashboards-minio.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/templates/grafana-externalsecret.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/templates/grafana-httproute.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/templates/kube-state-metrics.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/templates/lgtm-stack.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/templates/node-exporter.yaml (100%) rename sources/otel-lgtm-stack/{v1.0.8 => v1.0.7}/values.yaml (100%) diff --git a/root/values.yaml b/root/values.yaml index b9626bbe..f839826b 100644 --- a/root/values.yaml +++ b/root/values.yaml @@ -866,7 +866,7 @@ apps: - name: cluster.name value: "{{ .Values.global.domain }}" namespace: otel-lgtm-stack - path: otel-lgtm-stack/v1.0.8 + path: otel-lgtm-stack/v1.0.7 syncWave: -20 valuesObject: cluster: diff --git a/sbom/components.yaml b/sbom/components.yaml index 4884ab72..6d0aa621 100644 --- a/sbom/components.yaml +++ b/sbom/components.yaml @@ -266,7 +266,7 @@ components: license: Apache License 2.0 licenseUrl: https://github.com/open-telemetry/opentelemetry-operator/blob/main/LICENSE otel-lgtm-stack: - path: otel-lgtm-stack/v1.0.8 + path: otel-lgtm-stack/v1.0.7 sourceUrl: https://github.com/silogen/docker-otel-lgtm projectUrl: https://github.com/grafana/docker-otel-lgtm license: Apache License 2.0 diff --git a/sources/cluster-auth/0.5.9/values.yaml b/sources/cluster-auth/0.5.9/values.yaml index a57ec91a..fa60c54a 100644 --- a/sources/cluster-auth/0.5.9/values.yaml +++ b/sources/cluster-auth/0.5.9/values.yaml @@ -3,7 +3,7 @@ replicaCount: 1 image: repository: ghcr.io/silogen/cluster-auth pullPolicy: Always - tag: "0.6.0-rc4" + tag: "0.6.0-rc3" imagePullSecrets: [] nameOverride: "" diff --git a/sources/otel-lgtm-stack/v1.0.8/Chart.yaml b/sources/otel-lgtm-stack/v1.0.7/Chart.yaml similarity index 95% rename from sources/otel-lgtm-stack/v1.0.8/Chart.yaml rename to sources/otel-lgtm-stack/v1.0.7/Chart.yaml index 0a56e4c7..390773b6 100644 --- a/sources/otel-lgtm-stack/v1.0.8/Chart.yaml +++ b/sources/otel-lgtm-stack/v1.0.7/Chart.yaml @@ -8,10 +8,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 1.0.8 +version: 1.0.7 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "1.0.8" \ No newline at end of file +appVersion: "1.0.7" \ No newline at end of file diff --git a/sources/otel-lgtm-stack/v1.0.8/templates/chrony-node-exporter.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/chrony-node-exporter.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.8/templates/chrony-node-exporter.yaml rename to sources/otel-lgtm-stack/v1.0.7/templates/chrony-node-exporter.yaml diff --git a/sources/otel-lgtm-stack/v1.0.8/templates/collectors-logs-metrics-k8s.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/collectors-logs-metrics-k8s.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.8/templates/collectors-logs-metrics-k8s.yaml rename to sources/otel-lgtm-stack/v1.0.7/templates/collectors-logs-metrics-k8s.yaml diff --git a/sources/otel-lgtm-stack/v1.0.8/templates/collectors-metrics-rest.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/collectors-metrics-rest.yaml similarity index 91% rename from sources/otel-lgtm-stack/v1.0.8/templates/collectors-metrics-rest.yaml rename to sources/otel-lgtm-stack/v1.0.7/templates/collectors-metrics-rest.yaml index 9f5b7b24..0857538b 100644 --- a/sources/otel-lgtm-stack/v1.0.8/templates/collectors-metrics-rest.yaml +++ b/sources/otel-lgtm-stack/v1.0.7/templates/collectors-metrics-rest.yaml @@ -121,25 +121,6 @@ spec: static_configs: - targets: - longhorn-backend.longhorn.svc.cluster.local:9500 - - job_name: ai-gateway-extproc - scrape_interval: 30s - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - envoy-gateway-system - metrics_path: /metrics - relabel_configs: - - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] - regex: envoy - action: keep - - source_labels: [__meta_kubernetes_pod_label_gateway_envoyproxy_io_owning_gateway_name] - regex: https - action: keep - - source_labels: [__meta_kubernetes_pod_ip] - regex: (.+) - replacement: $1:1064 - target_label: __address__ service: pipelines: metrics: diff --git a/sources/otel-lgtm-stack/v1.0.8/templates/collectors-rbac-instrumentation.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/collectors-rbac-instrumentation.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.8/templates/collectors-rbac-instrumentation.yaml rename to sources/otel-lgtm-stack/v1.0.7/templates/collectors-rbac-instrumentation.yaml diff --git a/sources/otel-lgtm-stack/v1.0.8/templates/dashboards-cluster-health-overview.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/dashboards-cluster-health-overview.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.8/templates/dashboards-cluster-health-overview.yaml rename to sources/otel-lgtm-stack/v1.0.7/templates/dashboards-cluster-health-overview.yaml diff --git a/sources/otel-lgtm-stack/v1.0.8/templates/dashboards-default.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/dashboards-default.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.8/templates/dashboards-default.yaml rename to sources/otel-lgtm-stack/v1.0.7/templates/dashboards-default.yaml diff --git a/sources/otel-lgtm-stack/v1.0.8/templates/dashboards-gpu.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/dashboards-gpu.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.8/templates/dashboards-gpu.yaml rename to sources/otel-lgtm-stack/v1.0.7/templates/dashboards-gpu.yaml diff --git a/sources/otel-lgtm-stack/v1.0.8/templates/dashboards-minio.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/dashboards-minio.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.8/templates/dashboards-minio.yaml rename to sources/otel-lgtm-stack/v1.0.7/templates/dashboards-minio.yaml diff --git a/sources/otel-lgtm-stack/v1.0.8/templates/grafana-externalsecret.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/grafana-externalsecret.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.8/templates/grafana-externalsecret.yaml rename to sources/otel-lgtm-stack/v1.0.7/templates/grafana-externalsecret.yaml diff --git a/sources/otel-lgtm-stack/v1.0.8/templates/grafana-httproute.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/grafana-httproute.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.8/templates/grafana-httproute.yaml rename to sources/otel-lgtm-stack/v1.0.7/templates/grafana-httproute.yaml diff --git a/sources/otel-lgtm-stack/v1.0.8/templates/kube-state-metrics.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/kube-state-metrics.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.8/templates/kube-state-metrics.yaml rename to sources/otel-lgtm-stack/v1.0.7/templates/kube-state-metrics.yaml diff --git a/sources/otel-lgtm-stack/v1.0.8/templates/lgtm-stack.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/lgtm-stack.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.8/templates/lgtm-stack.yaml rename to sources/otel-lgtm-stack/v1.0.7/templates/lgtm-stack.yaml diff --git a/sources/otel-lgtm-stack/v1.0.8/templates/node-exporter.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/node-exporter.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.8/templates/node-exporter.yaml rename to sources/otel-lgtm-stack/v1.0.7/templates/node-exporter.yaml diff --git a/sources/otel-lgtm-stack/v1.0.8/values.yaml b/sources/otel-lgtm-stack/v1.0.7/values.yaml similarity index 100% rename from sources/otel-lgtm-stack/v1.0.8/values.yaml rename to sources/otel-lgtm-stack/v1.0.7/values.yaml From e1a30f7378ad1bc2697bfdc1937505814010d961 Mon Sep 17 00:00:00 2001 From: John Lybeck Date: Mon, 8 Jun 2026 08:58:37 +0000 Subject: [PATCH 14/21] EAI-5821: Add ext-proc metrics scraping, bump otel-lgtm-stack to 1.0.8 and cluster-auth to 0.6.0-rc4 --- sources/cluster-auth/0.5.9/values.yaml | 2 +- sources/otel-lgtm-stack/v1.0.7/Chart.yaml | 4 ++-- .../templates/collectors-metrics-rest.yaml | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/sources/cluster-auth/0.5.9/values.yaml b/sources/cluster-auth/0.5.9/values.yaml index fa60c54a..a57ec91a 100644 --- a/sources/cluster-auth/0.5.9/values.yaml +++ b/sources/cluster-auth/0.5.9/values.yaml @@ -3,7 +3,7 @@ replicaCount: 1 image: repository: ghcr.io/silogen/cluster-auth pullPolicy: Always - tag: "0.6.0-rc3" + tag: "0.6.0-rc4" imagePullSecrets: [] nameOverride: "" diff --git a/sources/otel-lgtm-stack/v1.0.7/Chart.yaml b/sources/otel-lgtm-stack/v1.0.7/Chart.yaml index 390773b6..0a56e4c7 100644 --- a/sources/otel-lgtm-stack/v1.0.7/Chart.yaml +++ b/sources/otel-lgtm-stack/v1.0.7/Chart.yaml @@ -8,10 +8,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 1.0.7 +version: 1.0.8 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "1.0.7" \ No newline at end of file +appVersion: "1.0.8" \ No newline at end of file diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/collectors-metrics-rest.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/collectors-metrics-rest.yaml index 0857538b..9f5b7b24 100644 --- a/sources/otel-lgtm-stack/v1.0.7/templates/collectors-metrics-rest.yaml +++ b/sources/otel-lgtm-stack/v1.0.7/templates/collectors-metrics-rest.yaml @@ -121,6 +121,25 @@ spec: static_configs: - targets: - longhorn-backend.longhorn.svc.cluster.local:9500 + - job_name: ai-gateway-extproc + scrape_interval: 30s + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - envoy-gateway-system + metrics_path: /metrics + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] + regex: envoy + action: keep + - source_labels: [__meta_kubernetes_pod_label_gateway_envoyproxy_io_owning_gateway_name] + regex: https + action: keep + - source_labels: [__meta_kubernetes_pod_ip] + regex: (.+) + replacement: $1:1064 + target_label: __address__ service: pipelines: metrics: From 7bf627ceff031d9b60eef7b4dae1fcb1187cdbb5 Mon Sep 17 00:00:00 2001 From: John Lybeck Date: Mon, 8 Jun 2026 12:45:44 +0000 Subject: [PATCH 15/21] EAI-5821: Bump cluster-auth to 0.6.0-rc5 --- sources/cluster-auth/0.5.9/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/cluster-auth/0.5.9/values.yaml b/sources/cluster-auth/0.5.9/values.yaml index a57ec91a..4d9e264c 100644 --- a/sources/cluster-auth/0.5.9/values.yaml +++ b/sources/cluster-auth/0.5.9/values.yaml @@ -3,7 +3,7 @@ replicaCount: 1 image: repository: ghcr.io/silogen/cluster-auth pullPolicy: Always - tag: "0.6.0-rc4" + tag: "0.6.0-rc5" imagePullSecrets: [] nameOverride: "" From 6cdbc0d212ff7899e5801ba5039fe50097973bc5 Mon Sep 17 00:00:00 2001 From: John Lybeck Date: Wed, 10 Jun 2026 12:47:06 +0000 Subject: [PATCH 16/21] EAI-5821: Bump cluster-auth image from 0.6.0-rc5 to 0.6.0-rc6 Co-Authored-By: Claude Sonnet 4.6 --- sources/cluster-auth/0.5.9/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/cluster-auth/0.5.9/values.yaml b/sources/cluster-auth/0.5.9/values.yaml index 4d9e264c..eda7ee64 100644 --- a/sources/cluster-auth/0.5.9/values.yaml +++ b/sources/cluster-auth/0.5.9/values.yaml @@ -3,7 +3,7 @@ replicaCount: 1 image: repository: ghcr.io/silogen/cluster-auth pullPolicy: Always - tag: "0.6.0-rc5" + tag: "0.6.0-rc6" imagePullSecrets: [] nameOverride: "" From 0fdd5e1963d38b31d867ed622110a78e331f8aab Mon Sep 17 00:00:00 2001 From: John Lybeck Date: Thu, 11 Jun 2026 06:45:26 +0000 Subject: [PATCH 17/21] EAI-5821: bump cluster-auth to 0.6.0-rc7 --- sources/cluster-auth/0.5.9/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/cluster-auth/0.5.9/values.yaml b/sources/cluster-auth/0.5.9/values.yaml index eda7ee64..6e096bff 100644 --- a/sources/cluster-auth/0.5.9/values.yaml +++ b/sources/cluster-auth/0.5.9/values.yaml @@ -3,7 +3,7 @@ replicaCount: 1 image: repository: ghcr.io/silogen/cluster-auth pullPolicy: Always - tag: "0.6.0-rc6" + tag: "0.6.0-rc7" imagePullSecrets: [] nameOverride: "" From ca33b279ec884d990651b8236bf21062cbbfa7a0 Mon Sep 17 00:00:00 2001 From: John Lybeck Date: Thu, 11 Jun 2026 12:29:06 +0000 Subject: [PATCH 18/21] EAI-5821: bump cluster-auth to 0.6.0-rc8 --- sources/cluster-auth/0.5.9/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/cluster-auth/0.5.9/values.yaml b/sources/cluster-auth/0.5.9/values.yaml index 6e096bff..db9164c8 100644 --- a/sources/cluster-auth/0.5.9/values.yaml +++ b/sources/cluster-auth/0.5.9/values.yaml @@ -3,7 +3,7 @@ replicaCount: 1 image: repository: ghcr.io/silogen/cluster-auth pullPolicy: Always - tag: "0.6.0-rc7" + tag: "0.6.0-rc8" imagePullSecrets: [] nameOverride: "" From 5d09f1d3a6c137fb83e0563d5ef6d057956de47a Mon Sep 17 00:00:00 2001 From: Tomas Saaristola Date: Fri, 12 Jun 2026 07:54:48 +0000 Subject: [PATCH 19/21] fix: reorder ext_proc before ext_authz in EnvoyProxy Without filterOrder, ext_authz (cluster-auth) runs before ext_proc (AI Gateway) sets x-ai-eg-model from the request body. This causes cluster-auth to fall back to a catch-all route with no annotations, firing defaultAction: allow and bypassing per-model authorization. Fixes EAI-6805. --- .../templates/envoy-proxy-access-logs.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml b/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml index b1d7ff19..444b91f1 100644 --- a/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml +++ b/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml @@ -6,6 +6,10 @@ metadata: name: access-logging-config namespace: envoy-gateway-system spec: + filterOrder: + - name: envoy.filters.http.ext_proc + relativeTo: envoy.filters.http.ext_authz + position: Before provider: type: Kubernetes kubernetes: From a87c0e0a3f1a6f61144cf5b2d0357cc7418e065b Mon Sep 17 00:00:00 2001 From: Tomas Saaristola Date: Fri, 12 Jun 2026 08:05:02 +0000 Subject: [PATCH 20/21] fix: correct filterOrder syntax for EnvoyProxy CRD v1.7.1 CRD uses name+before/after, not relativeTo+position. --- .../templates/envoy-proxy-access-logs.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml b/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml index 444b91f1..ea5daf02 100644 --- a/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml +++ b/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml @@ -8,8 +8,7 @@ metadata: spec: filterOrder: - name: envoy.filters.http.ext_proc - relativeTo: envoy.filters.http.ext_authz - position: Before + before: envoy.filters.http.ext_authz provider: type: Kubernetes kubernetes: From ce4766975af1ca5ffbc28887fd67af126d74cd40 Mon Sep 17 00:00:00 2001 From: Tomas Saaristola Date: Fri, 12 Jun 2026 08:38:23 +0000 Subject: [PATCH 21/21] EAI-6805: set x-ai-eg-model via Lua before ext_authz The AI Gateway's ext_proc is inserted via xdsTranslator post-hooks, which run after filterOrder is applied, so filterOrder cannot reorder it relative to ext_authz. Without x-ai-eg-model set, cluster-auth falls through to a catch-all route with no annotations and allows any model. Add a Lua EnvoyExtensionPolicy that buffers the request body, extracts the "model" JSON field, and sets x-ai-eg-model before ext_authz runs. Order the Lua filter before ext_authz via EnvoyProxy.spec.filterOrder. --- .../envoy-extension-policy-model-header.yaml | 35 +++++++++++++++++++ .../templates/envoy-proxy-access-logs.yaml | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 sources/envoy-gateway-config/templates/envoy-extension-policy-model-header.yaml diff --git a/sources/envoy-gateway-config/templates/envoy-extension-policy-model-header.yaml b/sources/envoy-gateway-config/templates/envoy-extension-policy-model-header.yaml new file mode 100644 index 00000000..c230084b --- /dev/null +++ b/sources/envoy-gateway-config/templates/envoy-extension-policy-model-header.yaml @@ -0,0 +1,35 @@ +# Lua filter that sets x-ai-eg-model from the request body before ext_authz runs. +# +# The AI Gateway's ext_proc reads the body and sets x-ai-eg-model, but it is +# inserted via xdsTranslator post-hooks which run after filterOrder is applied. +# This means ext_authz (cluster-auth) runs without x-ai-eg-model set, causing +# it to fall through to a catch-all route with no auth annotations and allow +# the request regardless of which model is targeted (EAI-6805). +# +# This Lua filter runs early in the chain (ordered before ext_authz via +# EnvoyProxy.spec.filterOrder), reads the model field from the JSON body, and +# sets x-ai-eg-model so cluster-auth can match the correct per-model HTTPRoute +# and enforce the cluster-auth/allowed-group annotation. +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: EnvoyExtensionPolicy +metadata: + name: set-model-header-from-body + namespace: envoy-gateway-system +spec: + targetRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: https + lua: + - type: Inline + inline: | + function envoy_on_request(request_handle) + local body = request_handle:body() + if body == nil then + return + end + local model = tostring(body):match('"model"%s*:%s*"([^"]+)"') + if model ~= nil then + request_handle:headers():replace("x-ai-eg-model", model) + end + end diff --git a/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml b/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml index ea5daf02..01616452 100644 --- a/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml +++ b/sources/envoy-gateway-config/templates/envoy-proxy-access-logs.yaml @@ -7,7 +7,7 @@ metadata: namespace: envoy-gateway-system spec: filterOrder: - - name: envoy.filters.http.ext_proc + - name: envoy.filters.http.lua before: envoy.filters.http.ext_authz provider: type: Kubernetes