From aa244a2203fca94864c69e2851c629471db5a166 Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Thu, 15 Jan 2026 08:55:07 -0500 Subject: [PATCH 01/30] feat: Charmhub module for upgrades --- terraform/charmhub/README.md | 122 +++++++++++++++++++++++++++++++++++ terraform/charmhub/main.tf | 72 +++++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 terraform/charmhub/README.md create mode 100644 terraform/charmhub/main.tf diff --git a/terraform/charmhub/README.md b/terraform/charmhub/README.md new file mode 100644 index 00000000..5eb901fa --- /dev/null +++ b/terraform/charmhub/README.md @@ -0,0 +1,122 @@ +# Terraform module for the COS solution + +This Terraform module computes a charm’s latest revision (from a channel and base) using the CharmHub API. + + +## Providers + +| Name | Version | +|------|---------| +| [juju](#provider\_juju) | ~> 1.0 | + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [alertmanager](#module\_alertmanager) | git:: | n/a | +| [catalogue](#module\_catalogue) | git:: | n/a | +| [grafana](#module\_grafana) | git:: | n/a | +| [loki](#module\_loki) | git:: | n/a | +| [mimir](#module\_mimir) | git:: | n/a | +| [opentelemetry\_collector](#module\_opentelemetry\_collector) | git:: | n/a | +| [ssc](#module\_ssc) | git:: | n/a | +| [tempo](#module\_tempo) | git:: | n/a | +| [traefik](#module\_traefik) | git:: | n/a | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [alertmanager](#input\_alertmanager) | Application configuration for Alertmanager. For more details: |
object({
app_name = optional(string, "alertmanager")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | +| [anti\_affinity](#input\_anti\_affinity) | Enable anti-affinity constraints across all HA modules (Mimir, Loki, Tempo) | `bool` | `true` | no | +| [catalogue](#input\_catalogue) | Application configuration for Catalogue. For more details: |
object({
app_name = optional(string, "catalogue")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | +| [channel](#input\_channel) | Channel that the applications are (unless overwritten by external\_channels) deployed from | `string` | n/a | yes | +| [cloud](#input\_cloud) | Kubernetes cloud or environment where this COS module will be deployed (e.g self-managed, aws) | `string` | `"self-managed"` | no | +| [external\_ca\_cert\_offer\_url](#input\_external\_ca\_cert\_offer\_url) | A Juju offer URL (e.g. admin/external-ca.send-ca-cert) of a CA providing the 'certificate\_transfer' integration for applications to trust ingress via Traefik. | `string` | `null` | no | +| [external\_certificates\_offer\_url](#input\_external\_certificates\_offer\_url) | A Juju offer URL of a CA providing the 'tls\_certificates' integration for Traefik to supply it with server certificates | `string` | `null` | no | +| [grafana](#input\_grafana) | Application configuration for Grafana. For more details: |
object({
app_name = optional(string, "grafana")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | +| [internal\_tls](#input\_internal\_tls) | Specify whether to use TLS or not for internal COS communication. By default, TLS is enabled using self-signed-certificates | `bool` | `true` | no | +| [loki\_bucket](#input\_loki\_bucket) | Loki bucket name | `string` | `"loki"` | no | +| [loki\_coordinator](#input\_loki\_coordinator) | Application configuration for Loki Coordinator. For more details: |
object({
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 3)
})
| `{}` | no | +| [loki\_worker](#input\_loki\_worker) | Application configuration for all Loki Workers. For more details: |
object({
backend_config = optional(map(string), {})
read_config = optional(map(string), {})
write_config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
backend_units = optional(number, 3)
read_units = optional(number, 3)
write_units = optional(number, 3)
})
| `{}` | no | +| [mimir\_bucket](#input\_mimir\_bucket) | Mimir bucket name | `string` | `"mimir"` | no | +| [mimir\_coordinator](#input\_mimir\_coordinator) | Application configuration for Mimir Coordinator. For more details: |
object({
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 3)
})
| `{}` | no | +| [mimir\_worker](#input\_mimir\_worker) | Application configuration for all Mimir Workers. For more details: |
object({
backend_config = optional(map(string), {})
read_config = optional(map(string), {})
write_config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
backend_units = optional(number, 3)
read_units = optional(number, 3)
write_units = optional(number, 3)
})
| `{}` | no | +| [model\_uuid](#input\_model\_uuid) | Reference to an existing model resource or data source for the model to deploy to | `string` | n/a | yes | +| [opentelemetry\_collector](#input\_opentelemetry\_collector) | Application configuration for OpenTelemetry Collector. For more details: |
object({
app_name = optional(string, "otelcol")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | +| [s3\_access\_key](#input\_s3\_access\_key) | S3 access-key credential | `string` | n/a | yes | +| [s3\_endpoint](#input\_s3\_endpoint) | S3 endpoint | `string` | n/a | yes | +| [s3\_integrator](#input\_s3\_integrator) | Application configuration for all S3-integrators in coordinated workers. For more details: |
object({
channel = optional(string, "2/edge")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | +| [s3\_secret\_key](#input\_s3\_secret\_key) | S3 secret-key credential | `string` | n/a | yes | +| [ssc](#input\_ssc) | Application configuration for Self-signed-certificates. For more details: |
object({
app_name = optional(string, "ca")
channel = optional(string, "1/stable")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | +| [tempo\_bucket](#input\_tempo\_bucket) | Tempo bucket name | `string` | `"tempo"` | no | +| [tempo\_coordinator](#input\_tempo\_coordinator) | Application configuration for Tempo Coordinator. For more details: |
object({
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 3)
})
| `{}` | no | +| [tempo\_worker](#input\_tempo\_worker) | Application configuration for all Tempo workers. For more details: |
object({
querier_config = optional(map(string), {})
query_frontend_config = optional(map(string), {})
ingester_config = optional(map(string), {})
distributor_config = optional(map(string), {})
compactor_config = optional(map(string), {})
metrics_generator_config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
compactor_units = optional(number, 3)
distributor_units = optional(number, 3)
ingester_units = optional(number, 3)
metrics_generator_units = optional(number, 3)
querier_units = optional(number, 3)
query_frontend_units = optional(number, 3)
})
| `{}` | no | +| [traefik](#input\_traefik) | Application configuration for Traefik. For more details: |
object({
app_name = optional(string, "traefik")
channel = optional(string, "latest/stable")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [components](#output\_components) | All Terraform charm modules which make up this product module | +| [offers](#output\_offers) | All Juju offers which are exposed by this product module | + + +## Usage + +This example defines and provides multiple charm names to the `charmhubs` module. This module then +computes the latest revision in the specified channel e.g., `2/stable`. Finally, it creates +`juju_application.apps` with the computed revisions. + +```hcl +terraform { + required_providers { + juju = { + source = "juju/juju" + } + http = { + source = "hashicorp/http" + version = "~> 3.0" + } + } +} + +locals { + channel = "2/stable" + base = "ubuntu@24.04" + + charms = { + alertmanager = "alertmanager-k8s" + prometheus = "prometheus-k8s" + grafana = "grafana-k8s" + } +} + +module "charmhubs" { + source = "../charmhub" + for_each = local.charms + + charm = each.value + channel = local.channel + base = local.base + architecture = "amd64" +} + +resource "juju_model" "development" { + name = "development" +} + +resource "juju_application" "apps" { + for_each = local.charms + + model_uuid = juju_model.development.uuid + trust = true + + charm { + name = each.value + channel = local.channel + revision = module.charmhubs[each.key].charm_revision + base = local.base + } +} +``` diff --git a/terraform/charmhub/main.tf b/terraform/charmhub/main.tf new file mode 100644 index 00000000..99aa8f3f --- /dev/null +++ b/terraform/charmhub/main.tf @@ -0,0 +1,72 @@ +terraform { + required_providers { + http = { + source = "hashicorp/http" + version = "~> 3.0" + } + } +} + +variable "charm" { + description = "Name of the charm (e.g., postgresql)" + type = string +} + +variable "channel" { + description = "Channel name (e.g., 14/stable, 16/edge)" + type = string +} + +variable "base" { + description = "Base Ubuntu (e.g., ubuntu@22.04, ubuntu@24.04)" + type = string +} + +variable "architecture" { + description = "Architecture (e.g., amd64, arm64)" + type = string + default = "amd64" +} + +data "http" "charmhub_info" { + url = "https://api.charmhub.io/v2/charms/info/${var.charm}?fields=channel-map.revision.revision" + + request_headers = { + Accept = "application/json" + } + + lifecycle { + postcondition { + condition = self.status_code == 200 + error_message = "Failed to fetch charm info from Charmhub API" + } + } +} + +locals { + charmhub_response = jsondecode(data.http.charmhub_info.response_body) + base_version = split("@", var.base)[1] + + matching_channels = [ + for entry in local.charmhub_response["channel-map"] : + entry if( + entry.channel.name == var.channel && + entry.channel.base.channel == local.base_version && + entry.channel.base.architecture == var.architecture + ) + ] + + revision = length(local.matching_channels) > 0 ? local.matching_channels[0].revision.revision : null +} + +check "revision_found" { + assert { + condition = local.revision != null + error_message = "No matching revision found for charm '${var.charm}' with channel '${var.channel}', base '${var.base}', and architecture '${var.architecture}'. Please verify the combination exists in Charmhub." + } +} + +output "charm_revision" { + description = "The revision number for the specified charm channel and base" + value = local.revision +} From 6a5e1922c9cc772f0325356a7ee7a1c59b96f616 Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Thu, 15 Jan 2026 10:26:24 -0500 Subject: [PATCH 02/30] chore: README --- terraform/charmhub/README.md | 47 ++++++------------------------------ 1 file changed, 7 insertions(+), 40 deletions(-) diff --git a/terraform/charmhub/README.md b/terraform/charmhub/README.md index 5eb901fa..e5aa6946 100644 --- a/terraform/charmhub/README.md +++ b/terraform/charmhub/README.md @@ -7,59 +7,26 @@ This Terraform module computes a charm’s latest revision (from a channel and b | Name | Version | |------|---------| -| [juju](#provider\_juju) | ~> 1.0 | +| [http](#provider\_http) | ~> 3.0 | ## Modules -| Name | Source | Version | -|------|--------|---------| -| [alertmanager](#module\_alertmanager) | git:: | n/a | -| [catalogue](#module\_catalogue) | git:: | n/a | -| [grafana](#module\_grafana) | git:: | n/a | -| [loki](#module\_loki) | git:: | n/a | -| [mimir](#module\_mimir) | git:: | n/a | -| [opentelemetry\_collector](#module\_opentelemetry\_collector) | git:: | n/a | -| [ssc](#module\_ssc) | git:: | n/a | -| [tempo](#module\_tempo) | git:: | n/a | -| [traefik](#module\_traefik) | git:: | n/a | +No modules. ## Inputs | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [alertmanager](#input\_alertmanager) | Application configuration for Alertmanager. For more details: |
object({
app_name = optional(string, "alertmanager")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | -| [anti\_affinity](#input\_anti\_affinity) | Enable anti-affinity constraints across all HA modules (Mimir, Loki, Tempo) | `bool` | `true` | no | -| [catalogue](#input\_catalogue) | Application configuration for Catalogue. For more details: |
object({
app_name = optional(string, "catalogue")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | -| [channel](#input\_channel) | Channel that the applications are (unless overwritten by external\_channels) deployed from | `string` | n/a | yes | -| [cloud](#input\_cloud) | Kubernetes cloud or environment where this COS module will be deployed (e.g self-managed, aws) | `string` | `"self-managed"` | no | -| [external\_ca\_cert\_offer\_url](#input\_external\_ca\_cert\_offer\_url) | A Juju offer URL (e.g. admin/external-ca.send-ca-cert) of a CA providing the 'certificate\_transfer' integration for applications to trust ingress via Traefik. | `string` | `null` | no | -| [external\_certificates\_offer\_url](#input\_external\_certificates\_offer\_url) | A Juju offer URL of a CA providing the 'tls\_certificates' integration for Traefik to supply it with server certificates | `string` | `null` | no | -| [grafana](#input\_grafana) | Application configuration for Grafana. For more details: |
object({
app_name = optional(string, "grafana")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | -| [internal\_tls](#input\_internal\_tls) | Specify whether to use TLS or not for internal COS communication. By default, TLS is enabled using self-signed-certificates | `bool` | `true` | no | -| [loki\_bucket](#input\_loki\_bucket) | Loki bucket name | `string` | `"loki"` | no | -| [loki\_coordinator](#input\_loki\_coordinator) | Application configuration for Loki Coordinator. For more details: |
object({
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 3)
})
| `{}` | no | -| [loki\_worker](#input\_loki\_worker) | Application configuration for all Loki Workers. For more details: |
object({
backend_config = optional(map(string), {})
read_config = optional(map(string), {})
write_config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
backend_units = optional(number, 3)
read_units = optional(number, 3)
write_units = optional(number, 3)
})
| `{}` | no | -| [mimir\_bucket](#input\_mimir\_bucket) | Mimir bucket name | `string` | `"mimir"` | no | -| [mimir\_coordinator](#input\_mimir\_coordinator) | Application configuration for Mimir Coordinator. For more details: |
object({
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 3)
})
| `{}` | no | -| [mimir\_worker](#input\_mimir\_worker) | Application configuration for all Mimir Workers. For more details: |
object({
backend_config = optional(map(string), {})
read_config = optional(map(string), {})
write_config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
backend_units = optional(number, 3)
read_units = optional(number, 3)
write_units = optional(number, 3)
})
| `{}` | no | -| [model\_uuid](#input\_model\_uuid) | Reference to an existing model resource or data source for the model to deploy to | `string` | n/a | yes | -| [opentelemetry\_collector](#input\_opentelemetry\_collector) | Application configuration for OpenTelemetry Collector. For more details: |
object({
app_name = optional(string, "otelcol")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | -| [s3\_access\_key](#input\_s3\_access\_key) | S3 access-key credential | `string` | n/a | yes | -| [s3\_endpoint](#input\_s3\_endpoint) | S3 endpoint | `string` | n/a | yes | -| [s3\_integrator](#input\_s3\_integrator) | Application configuration for all S3-integrators in coordinated workers. For more details: |
object({
channel = optional(string, "2/edge")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | -| [s3\_secret\_key](#input\_s3\_secret\_key) | S3 secret-key credential | `string` | n/a | yes | -| [ssc](#input\_ssc) | Application configuration for Self-signed-certificates. For more details: |
object({
app_name = optional(string, "ca")
channel = optional(string, "1/stable")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | -| [tempo\_bucket](#input\_tempo\_bucket) | Tempo bucket name | `string` | `"tempo"` | no | -| [tempo\_coordinator](#input\_tempo\_coordinator) | Application configuration for Tempo Coordinator. For more details: |
object({
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 3)
})
| `{}` | no | -| [tempo\_worker](#input\_tempo\_worker) | Application configuration for all Tempo workers. For more details: |
object({
querier_config = optional(map(string), {})
query_frontend_config = optional(map(string), {})
ingester_config = optional(map(string), {})
distributor_config = optional(map(string), {})
compactor_config = optional(map(string), {})
metrics_generator_config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
compactor_units = optional(number, 3)
distributor_units = optional(number, 3)
ingester_units = optional(number, 3)
metrics_generator_units = optional(number, 3)
querier_units = optional(number, 3)
query_frontend_units = optional(number, 3)
})
| `{}` | no | -| [traefik](#input\_traefik) | Application configuration for Traefik. For more details: |
object({
app_name = optional(string, "traefik")
channel = optional(string, "latest/stable")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | +| [architecture](#input\_architecture) | Architecture (e.g., amd64, arm64) | `string` | `"amd64"` | no | +| [base](#input\_base) | Base Ubuntu (e.g., ubuntu@22.04, ubuntu@24.04) | `string` | n/a | yes | +| [channel](#input\_channel) | Channel name (e.g., 14/stable, 16/edge) | `string` | n/a | yes | +| [charm](#input\_charm) | Name of the charm (e.g., postgresql) | `string` | n/a | yes | ## Outputs | Name | Description | |------|-------------| -| [components](#output\_components) | All Terraform charm modules which make up this product module | -| [offers](#output\_offers) | All Juju offers which are exposed by this product module | +| [charm\_revision](#output\_charm\_revision) | The revision number for the specified charm channel and base | ## Usage From d8b0fbddbba5d505675e65dbf6b75c88cd179bcc Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Thu, 15 Jan 2026 13:35:04 -0500 Subject: [PATCH 03/30] feat: add an upgrade doc --- docs/tutorial/index.rst | 16 ++- .../installation/cos-lite-microk8s-sandbox.md | 2 + docs/tutorial/upgrade-product-module.md | 127 ++++++++++++++++++ 3 files changed, 142 insertions(+), 3 deletions(-) create mode 100644 docs/tutorial/upgrade-product-module.md diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst index ba75c2fb..a151de9b 100644 --- a/docs/tutorial/index.rst +++ b/docs/tutorial/index.rst @@ -19,6 +19,16 @@ or COS Lite, Juju-based observability stacks running on Kubernetes. 1. Deploy the observability stack +Upgrade +============= + +In this part of the tutorial you will learn how to upgrade COS( Lite) to a new channel using Terraform. + +.. toctree:: + :maxdepth: 1 + + 2. Upgrade COS to a new channel + Configuration ============= @@ -29,7 +39,7 @@ charm. .. toctree:: :maxdepth: 1 - 2. Sync alert rules from Git + 3. Sync alert rules from Git Instrumentation =============== @@ -41,7 +51,7 @@ application using the Grafana Agent machine charm. .. toctree:: :maxdepth: 1 - 3. Instrument machine charms + 4. Instrument machine charms Redaction @@ -52,4 +62,4 @@ By implementing a solid redaction strategy you can mitigate the risk of unwanted .. toctree:: :maxdepth: 1 - 4. Redact sensitive data + 5. Redact sensitive data diff --git a/docs/tutorial/installation/cos-lite-microk8s-sandbox.md b/docs/tutorial/installation/cos-lite-microk8s-sandbox.md index e93b23df..51952855 100644 --- a/docs/tutorial/installation/cos-lite-microk8s-sandbox.md +++ b/docs/tutorial/installation/cos-lite-microk8s-sandbox.md @@ -173,6 +173,8 @@ $ juju deploy cos-lite \ --overlay ./storage-small-overlay.yaml ``` +(deploy-cos-ref)= + ## Deploy COS Lite using Terraform Create a `cos-lite-microk8s-sandbox.tf` file with the following Terraform module, or include it in your Terraform plan: diff --git a/docs/tutorial/upgrade-product-module.md b/docs/tutorial/upgrade-product-module.md new file mode 100644 index 00000000..39ab0d81 --- /dev/null +++ b/docs/tutorial/upgrade-product-module.md @@ -0,0 +1,127 @@ +# Upgrade COS to a new channel + +In this example, you will learn how to deploy COS Lite and upgrade from channel `2/stable` to `2/edge`. To do this, we can deploy COS Lite via Terraform in the same way as [in the tutorial](https://documentation.ubuntu.com/observability/track-2/tutorial/installation/cos-lite-microk8s-sandbox). + +## Prerequisites + +This tutorial assumes that you already have the following: + +- Deployed {ref}`COS Lite with Terraform ` + +## Introduction + +Imagine you have COS Lite (or COS) deployed on a specific channel like `2/stable` and want to +upgrade to a different channel or track e.g., `2/edge`. To do so, an admin would have to manually +`juju refresh` each COS charm, or specify the correct revision in the Terraform module and apply. + +This is simplified with the `charmhubs` module, which allows the juju admin to specify a list of +COS charms to upgrade within the specified `track/channel`. The rest is handled by Terraform. + +## Update the COS Lite Terraform module + +Once deployed, we can add the `locals` definition and a `charmhubs` module: + +```{note} +Copy all the Terraform blocks into one file and remove the `+` symbols. +They are only used to highlight the changes to the COS Lite Terraform module. +``` + +```{literalinclude} /tutorial/installation/cos-lite-microk8s-sandbox.tf +:lines: 1-7 +``` + +```diff ++ http = { ++ source = "hashicorp/http" ++ version = "~> 3.0" ++ } +``` + +```{literalinclude} /tutorial/installation/cos-lite-microk8s-sandbox.tf +:lines: 8-18, 20-21 +``` + +```diff ++ channel = local.channel ++ alertmanager = { revision = module.charmhubs["alertmanager"].charm_revision } ++ catalogue = { revision = module.charmhubs["catalogue"].charm_revision } ++ grafana = { revision = module.charmhubs["grafana"].charm_revision } ++ loki = { revision = module.charmhubs["loki"].charm_revision } ++ prometheus = { revision = module.charmhubs["prometheus"].charm_revision } +} + ++locals { ++ channel = "2/edge" ++ base = "ubuntu@24.04" ++ ++ charms = { ++ alertmanager = "alertmanager-k8s" ++ catalogue = "catalogue-k8s" ++ grafana = "grafana-k8s" ++ loki = "loki-k8s" ++ prometheus = "prometheus-k8s" ++ } ++} + ++module "charmhubs" { ++ source = "../charmhub" ++ for_each = local.charms ++ ++ charm = each.value ++ channel = local.channel ++ base = local.base ++ architecture = "amd64" ++} +``` + +and apply these changes on top of the previous state with: + +```shell +terraform -chdir= apply +``` + +you will notice that Terraform updates each charm to the latest revision in the `2/edge` channel: + +```shell +Terraform used the selected providers to generate the following +execution plan. Resource actions are indicated with the following +symbols: + + create + ~ update in-place + +Terraform will perform the following actions: + + # module.cos.module.alertmanager.juju_application.alertmanager will be updated in-place + ~ resource "juju_application" "alertmanager" { + id = "23dae45b-db71-405b-8035-1bc57a6e6285:alertmanager" + ~ machines = [] -> (known after apply) + name = "alertmanager" + ~ storage = [ + - { + - count = 1 -> null + - label = "data-5" -> null + - pool = "kubernetes" -> null + - size = "1G" -> null + }, + ] -> (known after apply) + # (7 unchanged attributes hidden) + + ~ charm { + ~ channel = "2/stable" -> "2/edge" + name = "alertmanager-k8s" + ~ revision = 191 -> 192 + # (1 unchanged attribute hidden) + } + } + +# snip ... + +Plan: 0 to add, 5 to change, 0 to destroy. +``` + +## Upgrade information + +You can consult the follow release documentation for upgrade compatibility: + +- [release-policy](/reference/release-policy/) +- [release-notes](/reference/release-notes/) From 0634ec8e76c7f88d7086ef5b739791299d47db57 Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Fri, 16 Jan 2026 09:05:33 -0500 Subject: [PATCH 04/30] chore: doc improvements --- .../installation/cos-lite-microk8s-sandbox.tf | 5 +- docs/tutorial/upgrade-product-module.md | 101 +++++++++++------- 2 files changed, 64 insertions(+), 42 deletions(-) diff --git a/docs/tutorial/installation/cos-lite-microk8s-sandbox.tf b/docs/tutorial/installation/cos-lite-microk8s-sandbox.tf index c3d1a1b0..9432b5c9 100644 --- a/docs/tutorial/installation/cos-lite-microk8s-sandbox.tf +++ b/docs/tutorial/installation/cos-lite-microk8s-sandbox.tf @@ -8,6 +8,8 @@ terraform { } } +# before-cos + resource "juju_model" "cos" { name = "cos" config = { logging-config = "=WARNING; unit=DEBUG" } @@ -16,7 +18,8 @@ resource "juju_model" "cos" { module "cos-lite" { source = "git::https://github.com/canonical/observability-stack//terraform/cos-lite?ref=track/2" model_uuid = juju_model.cos.uuid - channel = "2/stable" ssc = { channel = "1/stable" } traefik = { channel = "latest/edge" } + # before-channel + channel = "2/stable" } diff --git a/docs/tutorial/upgrade-product-module.md b/docs/tutorial/upgrade-product-module.md index 39ab0d81..2ceedf50 100644 --- a/docs/tutorial/upgrade-product-module.md +++ b/docs/tutorial/upgrade-product-module.md @@ -11,34 +11,32 @@ This tutorial assumes that you already have the following: ## Introduction Imagine you have COS Lite (or COS) deployed on a specific channel like `2/stable` and want to -upgrade to a different channel or track e.g., `2/edge`. To do so, an admin would have to manually -`juju refresh` each COS charm, or specify the correct revision in the Terraform module and apply. +upgrade to a different channel (or track) e.g., `2/edge`. To do so, an admin would have to manually +`juju refresh` each COS charm. Or they can determine the correct charm revisions, update the Terraform module, and apply. This is simplified with the `charmhubs` module, which allows the juju admin to specify a list of COS charms to upgrade within the specified `track/channel`. The rest is handled by Terraform. ## Update the COS Lite Terraform module -Once deployed, we can add the `locals` definition and a `charmhubs` module: +Once deployed, we can: + +1. update the `cos-lite` module +2. determine which charms to upgrade +3. add the `locals` and `charmhubs` modules ```{note} -Copy all the Terraform blocks into one file and remove the `+` symbols. -They are only used to highlight the changes to the COS Lite Terraform module. +This tutorial assumed you have deployed COS Lite from a root module located at `./main.tf`. ``` -```{literalinclude} /tutorial/installation/cos-lite-microk8s-sandbox.tf -:lines: 1-7 -``` - -```diff -+ http = { -+ source = "hashicorp/http" -+ version = "~> 3.0" -+ } -``` +First, update your `cos-lite` module, in the existing `./main.tf` file, with the updated content: ```{literalinclude} /tutorial/installation/cos-lite-microk8s-sandbox.tf -:lines: 8-18, 20-21 +--- +language: hcl +start-after: "# before-cos" +end-before: "# before-channel" +--- ``` ```diff @@ -49,38 +47,56 @@ They are only used to highlight the changes to the COS Lite Terraform module. + loki = { revision = module.charmhubs["loki"].charm_revision } + prometheus = { revision = module.charmhubs["prometheus"].charm_revision } } +``` -+locals { -+ channel = "2/edge" -+ base = "ubuntu@24.04" -+ -+ charms = { -+ alertmanager = "alertmanager-k8s" -+ catalogue = "catalogue-k8s" -+ grafana = "grafana-k8s" -+ loki = "loki-k8s" -+ prometheus = "prometheus-k8s" -+ } -+} - -+module "charmhubs" { -+ source = "../charmhub" -+ for_each = local.charms -+ -+ charm = each.value -+ channel = local.channel -+ base = local.base -+ architecture = "amd64" -+} +Then remove the `+` symbols; they are only used to highlight the changes to the `cos-lite` module. +Finally, add the feature components (required for upgrading the product) into the same `./main.tf` file: + +```hcl +terraform { + required_providers { + juju = { + source = "juju/juju" + version = "~> 1.0" + } + http = { + source = "hashicorp/http" + version = "~> 3.0" + } + } +} + +locals { + channel = "2/edge" + base = "ubuntu@24.04" + + charms = { + alertmanager = "alertmanager-k8s" + catalogue = "catalogue-k8s" + grafana = "grafana-k8s" + loki = "loki-k8s" + prometheus = "prometheus-k8s" + } +} + +module "charmhubs" { + source = "../charmhub" + for_each = local.charms + + charm = each.value + channel = local.channel + base = local.base + architecture = "amd64" +} ``` -and apply these changes on top of the previous state with: +At this point, you will have one `main.tf` file. Now you can plan these changes with: ```shell -terraform -chdir= apply +terraform plan ``` -you will notice that Terraform updates each charm to the latest revision in the `2/edge` channel: +you will notice that Terraform plans to update each charm to the latest revision in the `2/edge` channel: ```shell Terraform used the selected providers to generate the following @@ -121,6 +137,9 @@ Plan: 0 to add, 5 to change, 0 to destroy. ## Upgrade information +This tutorial only considers upgrading COS Lite. However, the `charmhubs` module is product-agnostic +and can be used to upgrade charms, and other products e.g., COS. + You can consult the follow release documentation for upgrade compatibility: - [release-policy](/reference/release-policy/) From 2c634d1ec6b2ea664edd8bfe433aeeca8012606a Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Mon, 19 Jan 2026 08:54:29 -0500 Subject: [PATCH 05/30] feat: charmhub inside COS Lite --- docs/tutorial/index.rst | 6 +- ...ct-module.md => refresh-product-module.md} | 62 +++++++++---------- terraform/cos-lite/applications.tf | 18 +++--- terraform/cos-lite/charmhub.tf | 30 +++++++++ terraform/cos-lite/integrations.tf | 9 ++- terraform/cos-lite/variables.tf | 7 +++ 6 files changed, 88 insertions(+), 44 deletions(-) rename docs/tutorial/{upgrade-product-module.md => refresh-product-module.md} (64%) create mode 100644 terraform/cos-lite/charmhub.tf diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst index a151de9b..fd1fd2fd 100644 --- a/docs/tutorial/index.rst +++ b/docs/tutorial/index.rst @@ -19,15 +19,15 @@ or COS Lite, Juju-based observability stacks running on Kubernetes. 1. Deploy the observability stack -Upgrade +Refresh ============= -In this part of the tutorial you will learn how to upgrade COS( Lite) to a new channel using Terraform. +In this part of the tutorial you will learn how to refresh COS (or COS Lite) to a new channel using Terraform. .. toctree:: :maxdepth: 1 - 2. Upgrade COS to a new channel + 2. Refresh COS to a new channel Configuration ============= diff --git a/docs/tutorial/upgrade-product-module.md b/docs/tutorial/refresh-product-module.md similarity index 64% rename from docs/tutorial/upgrade-product-module.md rename to docs/tutorial/refresh-product-module.md index 2ceedf50..a9550dad 100644 --- a/docs/tutorial/upgrade-product-module.md +++ b/docs/tutorial/refresh-product-module.md @@ -1,6 +1,6 @@ -# Upgrade COS to a new channel +# Refresh COS to a new channel -In this example, you will learn how to deploy COS Lite and upgrade from channel `2/stable` to `2/edge`. To do this, we can deploy COS Lite via Terraform in the same way as [in the tutorial](https://documentation.ubuntu.com/observability/track-2/tutorial/installation/cos-lite-microk8s-sandbox). +In this example, you will learn how to deploy COS Lite and refresh from channel `2/stable` to `2/edge`. To do this, we can deploy COS Lite via Terraform in the same way as [in the tutorial](https://documentation.ubuntu.com/observability/track-2/tutorial/installation/cos-lite-microk8s-sandbox). ## Prerequisites @@ -11,19 +11,19 @@ This tutorial assumes that you already have the following: ## Introduction Imagine you have COS Lite (or COS) deployed on a specific channel like `2/stable` and want to -upgrade to a different channel (or track) e.g., `2/edge`. To do so, an admin would have to manually +refresh to a different channel (or track) e.g., `2/edge`. To do so, an admin would have to manually `juju refresh` each COS charm. Or they can determine the correct charm revisions, update the Terraform module, and apply. -This is simplified with the `charmhubs` module, which allows the juju admin to specify a list of -COS charms to upgrade within the specified `track/channel`. The rest is handled by Terraform. +This is simplified with the `charmhub` module, which allows the juju admin to specify a list of +COS charms to refresh within the specified `track/channel`. The rest is handled by Terraform. ## Update the COS Lite Terraform module Once deployed, we can: 1. update the `cos-lite` module -2. determine which charms to upgrade -3. add the `locals` and `charmhubs` modules +2. determine which charms to refresh +3. add the `locals` and `charmhub` modules ```{note} This tutorial assumed you have deployed COS Lite from a root module located at `./main.tf`. @@ -41,11 +41,11 @@ end-before: "# before-channel" ```diff + channel = local.channel -+ alertmanager = { revision = module.charmhubs["alertmanager"].charm_revision } -+ catalogue = { revision = module.charmhubs["catalogue"].charm_revision } -+ grafana = { revision = module.charmhubs["grafana"].charm_revision } -+ loki = { revision = module.charmhubs["loki"].charm_revision } -+ prometheus = { revision = module.charmhubs["prometheus"].charm_revision } ++ alertmanager = { revision = module.charmhub["alertmanager"].charm_revision } ++ catalogue = { revision = module.charmhub["catalogue"].charm_revision } ++ grafana = { revision = module.charmhub["grafana"].charm_revision } ++ loki = { revision = module.charmhub["loki"].charm_revision } ++ prometheus = { revision = module.charmhub["prometheus"].charm_revision } } ``` @@ -79,7 +79,7 @@ locals { } } -module "charmhubs" { +module "charmhub" { source = "../charmhub" for_each = local.charms @@ -109,18 +109,8 @@ Terraform will perform the following actions: # module.cos.module.alertmanager.juju_application.alertmanager will be updated in-place ~ resource "juju_application" "alertmanager" { - id = "23dae45b-db71-405b-8035-1bc57a6e6285:alertmanager" - ~ machines = [] -> (known after apply) - name = "alertmanager" - ~ storage = [ - - { - - count = 1 -> null - - label = "data-5" -> null - - pool = "kubernetes" -> null - - size = "1G" -> null - }, - ] -> (known after apply) - # (7 unchanged attributes hidden) + +# snip ... ~ charm { ~ channel = "2/stable" -> "2/edge" @@ -128,19 +118,27 @@ Terraform will perform the following actions: ~ revision = 191 -> 192 # (1 unchanged attribute hidden) } - } # snip ... Plan: 0 to add, 5 to change, 0 to destroy. ``` -## Upgrade information +and finally apply the changes with: + +```shell +terraform apply +``` + +At this point, you will have successfully upgraded COS Lite from `2/stable` to `2/edge`! + +## Refresh information -This tutorial only considers upgrading COS Lite. However, the `charmhubs` module is product-agnostic -and can be used to upgrade charms, and other products e.g., COS. +This tutorial only considers upgrading COS Lite. However, the `charmhub` module is product-agnostic +and can be used to refresh charms, and other products e.g., COS. -You can consult the follow release documentation for upgrade compatibility: +You can consult the follow release documentation for refresh compatibility: -- [release-policy](/reference/release-policy/) -- [release-notes](/reference/release-notes/) +- [how-to cross-track upgrade](/how-to/upgrade/) +- [release policy](/reference/release-policy/) +- [release notes](/reference/release-notes/) diff --git a/terraform/cos-lite/applications.tf b/terraform/cos-lite/applications.tf index dd1bd5ab..c752794a 100644 --- a/terraform/cos-lite/applications.tf +++ b/terraform/cos-lite/applications.tf @@ -1,23 +1,27 @@ module "alertmanager" { - source = "git::https://github.com/canonical/alertmanager-k8s-operator//terraform" + # source = "git::https://github.com/canonical/alertmanager-k8s-operator//terraform" + source = "../../../alertmanager-k8s-operator/terraform" app_name = var.alertmanager.app_name channel = var.channel config = var.alertmanager.config constraints = var.alertmanager.constraints model_uuid = var.model_uuid - revision = var.alertmanager.revision + revision = local.alertmanager_revision storage_directives = var.alertmanager.storage_directives units = var.alertmanager.units + + # TODO: Add validation or wrap this in a local } module "catalogue" { - source = "git::https://github.com/canonical/catalogue-k8s-operator//terraform" + # source = "git::https://github.com/canonical/catalogue-k8s-operator//terraform" + source = "../../../catalogue-k8s-operator/terraform" app_name = var.catalogue.app_name channel = var.channel config = var.catalogue.config constraints = var.catalogue.constraints model_uuid = var.model_uuid - revision = var.catalogue.revision + revision = local.catalogue_revision storage_directives = var.catalogue.storage_directives units = var.catalogue.units } @@ -29,7 +33,7 @@ module "grafana" { config = var.grafana.config constraints = var.grafana.constraints model_uuid = var.model_uuid - revision = var.grafana.revision + revision = local.grafana_revision storage_directives = var.grafana.storage_directives units = var.grafana.units } @@ -42,7 +46,7 @@ module "loki" { constraints = var.loki.constraints model_uuid = var.model_uuid storage_directives = var.loki.storage_directives - revision = var.loki.revision + revision = local.loki_revision units = var.loki.units } @@ -54,7 +58,7 @@ module "prometheus" { constraints = var.prometheus.constraints model_uuid = var.model_uuid storage_directives = var.prometheus.storage_directives - revision = var.prometheus.revision + revision = local.prometheus_revision units = var.prometheus.units } diff --git a/terraform/cos-lite/charmhub.tf b/terraform/cos-lite/charmhub.tf new file mode 100644 index 00000000..73470692 --- /dev/null +++ b/terraform/cos-lite/charmhub.tf @@ -0,0 +1,30 @@ +locals { + # User input takes priority + alertmanager_revision = var.alertmanager.revision != null ? var.alertmanager.revision : module.charmhub["alertmanager"].charm_revision + catalogue_revision = var.catalogue.revision != null ? var.catalogue.revision : module.charmhub["catalogue"].charm_revision + grafana_revision = var.grafana.revision != null ? var.grafana.revision : module.charmhub["grafana"].charm_revision + loki_revision = var.loki.revision != null ? var.loki.revision : module.charmhub["loki"].charm_revision + prometheus_revision = var.prometheus.revision != null ? var.prometheus.revision : module.charmhub["prometheus"].charm_revision +} + +variable "charms_to_refresh" { + description = "A map of charm names to query from Charmhub." + type = map(string) + default = { + alertmanager = "alertmanager-k8s" + catalogue = "catalogue-k8s" + grafana = "grafana-k8s" + loki = "loki-k8s" + prometheus = "prometheus-k8s" + } +} + +module "charmhub" { + source = "../charmhub" + for_each = var.charms_to_refresh + + charm = each.value + channel = var.channel + base = var.base + architecture = "amd64" +} diff --git a/terraform/cos-lite/integrations.tf b/terraform/cos-lite/integrations.tf index b976f953..20943b10 100644 --- a/terraform/cos-lite/integrations.tf +++ b/terraform/cos-lite/integrations.tf @@ -234,12 +234,17 @@ resource "juju_integration" "catalogue_ingress" { } } +# TODO: Can we make this conditional based on the computed upgrade between Grafana + +# │ Unable to update application resource, got error: updating charm config: cannot upgrade application "grafana" to charm +# │ "ch:amd64/grafana-k8s-172": would break relation "grafana:ingress traefik:ingress" + resource "juju_integration" "grafana_ingress" { model_uuid = var.model_uuid application { name = module.traefik.app_name - endpoint = module.traefik.endpoints.ingress + endpoint = tonumber(local.alertmanager_revision) >= 175 ? module.traefik.endpoints.ingress : module.traefik.endpoints.traefik_route } application { @@ -413,7 +418,7 @@ resource "juju_integration" "external_grafana_ca_cert" { } resource "juju_integration" "external_prom_ca_cert" { - count = local.tls_termination ? 1 : 0 + count = local.tls_termination && tonumber(local.prometheus_revision) >= 276 ? 1 : 0 model_uuid = var.model_uuid application { diff --git a/terraform/cos-lite/variables.tf b/terraform/cos-lite/variables.tf index 49b379e2..3bca2be3 100644 --- a/terraform/cos-lite/variables.tf +++ b/terraform/cos-lite/variables.tf @@ -10,6 +10,13 @@ locals { tls_termination = var.external_certificates_offer_url != null ? true : false } +# TODO: Discuss how this was missed bc we don't have any base terraform tests. TF plan would catch this error +variable "base" { + description = "The operating system on which to deploy. E.g. ubuntu@22.04. Changing this value for machine charms will trigger a replace by terraform." + default = "ubuntu@24.04" + type = string +} + variable "channel" { description = "Channel that the applications are (unless overwritten by external_channels) deployed from" type = string From 424447acf2da467d3710a19af629a5444f125882 Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Mon, 19 Jan 2026 09:49:38 -0500 Subject: [PATCH 06/30] chore: update tutorial --- docs/tutorial/refresh-product-module.md | 27 ++----------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/docs/tutorial/refresh-product-module.md b/docs/tutorial/refresh-product-module.md index a9550dad..996ef572 100644 --- a/docs/tutorial/refresh-product-module.md +++ b/docs/tutorial/refresh-product-module.md @@ -40,16 +40,11 @@ end-before: "# before-channel" ``` ```diff -+ channel = local.channel -+ alertmanager = { revision = module.charmhub["alertmanager"].charm_revision } -+ catalogue = { revision = module.charmhub["catalogue"].charm_revision } -+ grafana = { revision = module.charmhub["grafana"].charm_revision } -+ loki = { revision = module.charmhub["loki"].charm_revision } -+ prometheus = { revision = module.charmhub["prometheus"].charm_revision } ++ channel = local.channel } ``` -Then remove the `+` symbols; they are only used to highlight the changes to the `cos-lite` module. +Then remove the `+` symbol; it is only used to highlight the changes to the `cos-lite` module. Finally, add the feature components (required for upgrading the product) into the same `./main.tf` file: ```hcl @@ -69,24 +64,6 @@ terraform { locals { channel = "2/edge" base = "ubuntu@24.04" - - charms = { - alertmanager = "alertmanager-k8s" - catalogue = "catalogue-k8s" - grafana = "grafana-k8s" - loki = "loki-k8s" - prometheus = "prometheus-k8s" - } -} - -module "charmhub" { - source = "../charmhub" - for_each = local.charms - - charm = each.value - channel = local.channel - base = local.base - architecture = "amd64" } ``` From b3b6492157209f07e781128bb51fe1502cc6b89c Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Mon, 19 Jan 2026 10:05:04 -0500 Subject: [PATCH 07/30] chore: revert apps to remote source --- terraform/cos-lite/applications.tf | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/terraform/cos-lite/applications.tf b/terraform/cos-lite/applications.tf index c752794a..f1cd5203 100644 --- a/terraform/cos-lite/applications.tf +++ b/terraform/cos-lite/applications.tf @@ -1,6 +1,5 @@ module "alertmanager" { - # source = "git::https://github.com/canonical/alertmanager-k8s-operator//terraform" - source = "../../../alertmanager-k8s-operator/terraform" + source = "git::https://github.com/canonical/alertmanager-k8s-operator//terraform" app_name = var.alertmanager.app_name channel = var.channel config = var.alertmanager.config @@ -14,8 +13,7 @@ module "alertmanager" { } module "catalogue" { - # source = "git::https://github.com/canonical/catalogue-k8s-operator//terraform" - source = "../../../catalogue-k8s-operator/terraform" + source = "git::https://github.com/canonical/catalogue-k8s-operator//terraform" app_name = var.catalogue.app_name channel = var.channel config = var.catalogue.config From 91dcf5f2eec916e7ee7882700b08303e156be559 Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Wed, 21 Jan 2026 10:11:40 -0500 Subject: [PATCH 08/30] chore --- .../installation/cos-lite-microk8s-sandbox.tf | 3 +- docs/tutorial/refresh-product-module.md | 38 +++++++------------ terraform/cos-lite/variables.tf | 2 +- 3 files changed, 16 insertions(+), 27 deletions(-) diff --git a/docs/tutorial/installation/cos-lite-microk8s-sandbox.tf b/docs/tutorial/installation/cos-lite-microk8s-sandbox.tf index 9432b5c9..8e30b15e 100644 --- a/docs/tutorial/installation/cos-lite-microk8s-sandbox.tf +++ b/docs/tutorial/installation/cos-lite-microk8s-sandbox.tf @@ -20,6 +20,5 @@ module "cos-lite" { model_uuid = juju_model.cos.uuid ssc = { channel = "1/stable" } traefik = { channel = "latest/edge" } - # before-channel - channel = "2/stable" + channel = "2/stable" } diff --git a/docs/tutorial/refresh-product-module.md b/docs/tutorial/refresh-product-module.md index 996ef572..c9f94323 100644 --- a/docs/tutorial/refresh-product-module.md +++ b/docs/tutorial/refresh-product-module.md @@ -4,48 +4,43 @@ In this example, you will learn how to deploy COS Lite and refresh from channel ## Prerequisites -This tutorial assumes that you already have the following: +This tutorial assumes that you already: -- Deployed {ref}`COS Lite with Terraform ` +- Know how to deploy {ref}`COS Lite with Terraform ` ## Introduction Imagine you have COS Lite (or COS) deployed on a specific channel like `2/stable` and want to refresh to a different channel (or track) e.g., `2/edge`. To do so, an admin would have to manually -`juju refresh` each COS charm. Or they can determine the correct charm revisions, update the Terraform module, and apply. +`juju refresh` each COS charm and address any refresh errors. Alternatively, they can determine the +correct charm `channel` and `revision`(s), update the Terraform module, and apply. -This is simplified with the `charmhub` module, which allows the juju admin to specify a list of -COS charms to refresh within the specified `track/channel`. The rest is handled by Terraform. +This is simplified within COS (and COS Lite) by mimicking the `juju refresh` behavior on a product +level, allowing the juju admin to specify a list of charms to refresh within the specified +`track/channel`. The rest is handled by Terraform. ## Update the COS Lite Terraform module -Once deployed, we can: - -1. update the `cos-lite` module -2. determine which charms to refresh -3. add the `locals` and `charmhub` modules +Once deployed, we can determine which charms to refresh with the `charms_to_refresh` input variable, detailed in the [README](https://github.com/canonical/observability-stack/tree/main/terraform/cos-lite). This defaults to: all charms owned by the `observability-team`. ```{note} This tutorial assumed you have deployed COS Lite from a root module located at `./main.tf`. ``` -First, update your `cos-lite` module, in the existing `./main.tf` file, with the updated content: +Then, replace `2/stable` with `2/edge` in your `cos-lite` module within the existing `./main.tf` file: ```{literalinclude} /tutorial/installation/cos-lite-microk8s-sandbox.tf --- language: hcl start-after: "# before-cos" -end-before: "# before-channel" --- ``` -```diff -+ channel = local.channel -} +```{note} +The `base` input variable for the `cos-lite` module is important if the `track/channel` deploys charms to a different base than the default, detailed in the [README](https://github.com/canonical/observability-stack/tree/main/terraform/cos-lite). ``` -Then remove the `+` symbol; it is only used to highlight the changes to the `cos-lite` module. -Finally, add the feature components (required for upgrading the product) into the same `./main.tf` file: +Finally, add the provider definitions into the same `./main.tf` file: ```hcl terraform { @@ -60,20 +55,15 @@ terraform { } } } - -locals { - channel = "2/edge" - base = "ubuntu@24.04" -} ``` -At this point, you will have one `main.tf` file. Now you can plan these changes with: +At this point, you will have one `main.tf` file ready for deployment. Now you can plan these changes with: ```shell terraform plan ``` -you will notice that Terraform plans to update each charm to the latest revision in the `2/edge` channel: +and Terraform plans to update each charm to the latest revision in the `2/edge` channel: ```shell Terraform used the selected providers to generate the following diff --git a/terraform/cos-lite/variables.tf b/terraform/cos-lite/variables.tf index 3bca2be3..2d517a3f 100644 --- a/terraform/cos-lite/variables.tf +++ b/terraform/cos-lite/variables.tf @@ -12,7 +12,7 @@ locals { # TODO: Discuss how this was missed bc we don't have any base terraform tests. TF plan would catch this error variable "base" { - description = "The operating system on which to deploy. E.g. ubuntu@22.04. Changing this value for machine charms will trigger a replace by terraform." + description = "The operating system on which to deploy. E.g. ubuntu@22.04. Changing this value for machine charms will trigger a replace by terraform. Check Charmhub for per-charm base support." default = "ubuntu@24.04" type = string } From 188e4f57d1d1caefd8005cc249b7311e5282983e Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Mon, 9 Feb 2026 08:03:33 -0500 Subject: [PATCH 09/30] chore: dump ideas --- .../installation/cos-lite-microk8s-sandbox.tf | 2 +- terraform/charmhub/main.tf | 16 +++++++++++++--- terraform/cos-lite/charmhub.tf | 6 ++++++ terraform/cos-lite/versions.tf | 1 + 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/docs/tutorial/installation/cos-lite-microk8s-sandbox.tf b/docs/tutorial/installation/cos-lite-microk8s-sandbox.tf index 8e30b15e..630acc71 100644 --- a/docs/tutorial/installation/cos-lite-microk8s-sandbox.tf +++ b/docs/tutorial/installation/cos-lite-microk8s-sandbox.tf @@ -18,7 +18,7 @@ resource "juju_model" "cos" { module "cos-lite" { source = "git::https://github.com/canonical/observability-stack//terraform/cos-lite?ref=track/2" model_uuid = juju_model.cos.uuid + channel = "2/stable" ssc = { channel = "1/stable" } traefik = { channel = "latest/edge" } - channel = "2/stable" } diff --git a/terraform/charmhub/main.tf b/terraform/charmhub/main.tf index 99aa8f3f..956aa9c0 100644 --- a/terraform/charmhub/main.tf +++ b/terraform/charmhub/main.tf @@ -45,13 +45,21 @@ data "http" "charmhub_info" { locals { charmhub_response = jsondecode(data.http.charmhub_info.response_body) - base_version = split("@", var.base)[1] + # base_version = split("@", var.base)[1] matching_channels = [ for entry in local.charmhub_response["channel-map"] : entry if( entry.channel.name == var.channel && - entry.channel.base.channel == local.base_version && + + # TODO: I think we can ignore this base input if we assume that 24.04 is always dev/and track/2 + # TODO: Capture all matching JSON bodies for channel & architecture. Then validate that it's only one. If not, the user should be warned that the base needs to be specified. + # E.g. you specify channel as 1/stable, but then base defaults to 24.04. This would fail bc 22.04 is for 1/stable + + # TODO: Test that this works with the product to charm channel mapping like the revisions override I have + # curl "https://api.charmhub.io/v2/charms/info/alertmanager-k8s?fields=channel-map.revision.revision" | jq -r '.["channel-map"] + + # entry.channel.base.channel == local.base_version && entry.channel.base.architecture == var.architecture ) ] @@ -62,7 +70,9 @@ locals { check "revision_found" { assert { condition = local.revision != null - error_message = "No matching revision found for charm '${var.charm}' with channel '${var.channel}', base '${var.base}', and architecture '${var.architecture}'. Please verify the combination exists in Charmhub." + # TODO: Undo + # error_message = "No matching revision found for charm '${var.charm}' with channel '${var.channel}', base '${var.base}', and architecture '${var.architecture}'. Please verify the combination exists in Charmhub." + error_message = "No matching revision found for charm '${var.charm}' with channel '${var.channel}', and architecture '${var.architecture}'. Please verify the combination exists in Charmhub." } } diff --git a/terraform/cos-lite/charmhub.tf b/terraform/cos-lite/charmhub.tf index 73470692..f839cc62 100644 --- a/terraform/cos-lite/charmhub.tf +++ b/terraform/cos-lite/charmhub.tf @@ -28,3 +28,9 @@ module "charmhub" { base = var.base architecture = "amd64" } + +# TODO: Remove +output "charm_revisions" { + description = "The revision number for the specified charm channel and base" + value = { for k, v in module.charmhub : k => v.charm_revision } +} \ No newline at end of file diff --git a/terraform/cos-lite/versions.tf b/terraform/cos-lite/versions.tf index f65a3495..bac56e08 100644 --- a/terraform/cos-lite/versions.tf +++ b/terraform/cos-lite/versions.tf @@ -5,5 +5,6 @@ terraform { source = "juju/juju" version = "~> 1.0" } + # TODO: Add the http provider here } } \ No newline at end of file From 97f585a827944cda17d629d183e52927cfa26547 Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Fri, 20 Mar 2026 10:59:05 -0400 Subject: [PATCH 10/30] chore: remove charmhub module --- terraform/charmhub/README.md | 89 ------------------- terraform/charmhub/main.tf | 82 ----------------- .../cos-lite/{charmhub.tf => upgrades.tf} | 61 +++++++++++++ 3 files changed, 61 insertions(+), 171 deletions(-) delete mode 100644 terraform/charmhub/README.md delete mode 100644 terraform/charmhub/main.tf rename terraform/cos-lite/{charmhub.tf => upgrades.tf} (55%) diff --git a/terraform/charmhub/README.md b/terraform/charmhub/README.md deleted file mode 100644 index e5aa6946..00000000 --- a/terraform/charmhub/README.md +++ /dev/null @@ -1,89 +0,0 @@ -# Terraform module for the COS solution - -This Terraform module computes a charm’s latest revision (from a channel and base) using the CharmHub API. - - -## Providers - -| Name | Version | -|------|---------| -| [http](#provider\_http) | ~> 3.0 | - -## Modules - -No modules. - -## Inputs - -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:--------:| -| [architecture](#input\_architecture) | Architecture (e.g., amd64, arm64) | `string` | `"amd64"` | no | -| [base](#input\_base) | Base Ubuntu (e.g., ubuntu@22.04, ubuntu@24.04) | `string` | n/a | yes | -| [channel](#input\_channel) | Channel name (e.g., 14/stable, 16/edge) | `string` | n/a | yes | -| [charm](#input\_charm) | Name of the charm (e.g., postgresql) | `string` | n/a | yes | - -## Outputs - -| Name | Description | -|------|-------------| -| [charm\_revision](#output\_charm\_revision) | The revision number for the specified charm channel and base | - - -## Usage - -This example defines and provides multiple charm names to the `charmhubs` module. This module then -computes the latest revision in the specified channel e.g., `2/stable`. Finally, it creates -`juju_application.apps` with the computed revisions. - -```hcl -terraform { - required_providers { - juju = { - source = "juju/juju" - } - http = { - source = "hashicorp/http" - version = "~> 3.0" - } - } -} - -locals { - channel = "2/stable" - base = "ubuntu@24.04" - - charms = { - alertmanager = "alertmanager-k8s" - prometheus = "prometheus-k8s" - grafana = "grafana-k8s" - } -} - -module "charmhubs" { - source = "../charmhub" - for_each = local.charms - - charm = each.value - channel = local.channel - base = local.base - architecture = "amd64" -} - -resource "juju_model" "development" { - name = "development" -} - -resource "juju_application" "apps" { - for_each = local.charms - - model_uuid = juju_model.development.uuid - trust = true - - charm { - name = each.value - channel = local.channel - revision = module.charmhubs[each.key].charm_revision - base = local.base - } -} -``` diff --git a/terraform/charmhub/main.tf b/terraform/charmhub/main.tf deleted file mode 100644 index 956aa9c0..00000000 --- a/terraform/charmhub/main.tf +++ /dev/null @@ -1,82 +0,0 @@ -terraform { - required_providers { - http = { - source = "hashicorp/http" - version = "~> 3.0" - } - } -} - -variable "charm" { - description = "Name of the charm (e.g., postgresql)" - type = string -} - -variable "channel" { - description = "Channel name (e.g., 14/stable, 16/edge)" - type = string -} - -variable "base" { - description = "Base Ubuntu (e.g., ubuntu@22.04, ubuntu@24.04)" - type = string -} - -variable "architecture" { - description = "Architecture (e.g., amd64, arm64)" - type = string - default = "amd64" -} - -data "http" "charmhub_info" { - url = "https://api.charmhub.io/v2/charms/info/${var.charm}?fields=channel-map.revision.revision" - - request_headers = { - Accept = "application/json" - } - - lifecycle { - postcondition { - condition = self.status_code == 200 - error_message = "Failed to fetch charm info from Charmhub API" - } - } -} - -locals { - charmhub_response = jsondecode(data.http.charmhub_info.response_body) - # base_version = split("@", var.base)[1] - - matching_channels = [ - for entry in local.charmhub_response["channel-map"] : - entry if( - entry.channel.name == var.channel && - - # TODO: I think we can ignore this base input if we assume that 24.04 is always dev/and track/2 - # TODO: Capture all matching JSON bodies for channel & architecture. Then validate that it's only one. If not, the user should be warned that the base needs to be specified. - # E.g. you specify channel as 1/stable, but then base defaults to 24.04. This would fail bc 22.04 is for 1/stable - - # TODO: Test that this works with the product to charm channel mapping like the revisions override I have - # curl "https://api.charmhub.io/v2/charms/info/alertmanager-k8s?fields=channel-map.revision.revision" | jq -r '.["channel-map"] - - # entry.channel.base.channel == local.base_version && - entry.channel.base.architecture == var.architecture - ) - ] - - revision = length(local.matching_channels) > 0 ? local.matching_channels[0].revision.revision : null -} - -check "revision_found" { - assert { - condition = local.revision != null - # TODO: Undo - # error_message = "No matching revision found for charm '${var.charm}' with channel '${var.channel}', base '${var.base}', and architecture '${var.architecture}'. Please verify the combination exists in Charmhub." - error_message = "No matching revision found for charm '${var.charm}' with channel '${var.channel}', and architecture '${var.architecture}'. Please verify the combination exists in Charmhub." - } -} - -output "charm_revision" { - description = "The revision number for the specified charm channel and base" - value = local.revision -} diff --git a/terraform/cos-lite/charmhub.tf b/terraform/cos-lite/upgrades.tf similarity index 55% rename from terraform/cos-lite/charmhub.tf rename to terraform/cos-lite/upgrades.tf index f839cc62..46529ccc 100644 --- a/terraform/cos-lite/charmhub.tf +++ b/terraform/cos-lite/upgrades.tf @@ -33,4 +33,65 @@ module "charmhub" { output "charm_revisions" { description = "The revision number for the specified charm channel and base" value = { for k, v in module.charmhub : k => v.charm_revision } +} + + +# -------------- Upgrade logic -------------- + +locals { + channel = "dev/edge" +} + +data "juju_charm" "graphana_info" { + charm = "grafana-k8s" + channel = local.channel + base = "ubuntu@24.04" +} + +resource "juju_application" "grafana" { + model_uuid = juju_model.test.uuid + trust = true + + charm { + name = "grafana-k8s" + channel = local.channel + revision = data.juju_charm.graphana_info.revision + } +} + +resource "juju_model" "test" { + name = "test-2131231" +} + +resource "juju_application" "traefik" { + model_uuid = juju_model.test.uuid + trust = true + + charm { + name = "traefik-k8s" + channel = "latest/stable" + } +} + +resource "terraform_data" "interface" { + input = data.juju_charm.graphana_info.requires["ingress"] +} + +resource "juju_integration" "ingress" { + model_uuid = juju_model.test.uuid + + application { + name = juju_application.traefik.name + } + + application { + name = juju_application.grafana.name + endpoint = "ingress" + } + + lifecycle { + replace_triggered_by = [ + terraform_data.interface + ] + } } \ No newline at end of file From 440dbee77739307bcfeaa0ea1e94f0c222df2394 Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Sun, 22 Mar 2026 19:29:52 -0400 Subject: [PATCH 11/30] chore: implement juju_data --- terraform/cos-lite/applications.tf | 20 +++--- terraform/cos-lite/integrations.tf | 16 ++--- terraform/cos-lite/upgrades.tf | 97 +++--------------------------- terraform/cos-lite/variables.tf | 7 +++ terraform/cos-lite/versions.tf | 1 - 5 files changed, 33 insertions(+), 108 deletions(-) diff --git a/terraform/cos-lite/applications.tf b/terraform/cos-lite/applications.tf index f1cd5203..a040aa84 100644 --- a/terraform/cos-lite/applications.tf +++ b/terraform/cos-lite/applications.tf @@ -5,21 +5,19 @@ module "alertmanager" { config = var.alertmanager.config constraints = var.alertmanager.constraints model_uuid = var.model_uuid - revision = local.alertmanager_revision + revision = var.alertmanager.revision storage_directives = var.alertmanager.storage_directives units = var.alertmanager.units - - # TODO: Add validation or wrap this in a local } module "catalogue" { source = "git::https://github.com/canonical/catalogue-k8s-operator//terraform" app_name = var.catalogue.app_name - channel = var.channel + channel = var.catalogue.channel config = var.catalogue.config constraints = var.catalogue.constraints model_uuid = var.model_uuid - revision = local.catalogue_revision + revision = var.catalogue.revision storage_directives = var.catalogue.storage_directives units = var.catalogue.units } @@ -27,11 +25,11 @@ module "catalogue" { module "grafana" { source = "git::https://github.com/canonical/grafana-k8s-operator//terraform" app_name = var.grafana.app_name - channel = var.channel + channel = var.grafana.channel config = var.grafana.config constraints = var.grafana.constraints model_uuid = var.model_uuid - revision = local.grafana_revision + revision = data.juju_charm.grafana_info.revision storage_directives = var.grafana.storage_directives units = var.grafana.units } @@ -39,24 +37,24 @@ module "grafana" { module "loki" { source = "git::https://github.com/canonical/loki-k8s-operator//terraform" app_name = var.loki.app_name - channel = var.channel + channel = var.loki.channel config = var.loki.config constraints = var.loki.constraints model_uuid = var.model_uuid storage_directives = var.loki.storage_directives - revision = local.loki_revision + revision = var.loki.revision units = var.loki.units } module "prometheus" { source = "git::https://github.com/canonical/prometheus-k8s-operator//terraform" app_name = var.prometheus.app_name - channel = var.channel + channel = var.prometheus.channel config = var.prometheus.config constraints = var.prometheus.constraints model_uuid = var.model_uuid storage_directives = var.prometheus.storage_directives - revision = local.prometheus_revision + revision = var.prometheus.revision units = var.prometheus.units } diff --git a/terraform/cos-lite/integrations.tf b/terraform/cos-lite/integrations.tf index 20943b10..48e6e588 100644 --- a/terraform/cos-lite/integrations.tf +++ b/terraform/cos-lite/integrations.tf @@ -234,23 +234,26 @@ resource "juju_integration" "catalogue_ingress" { } } -# TODO: Can we make this conditional based on the computed upgrade between Grafana - -# │ Unable to update application resource, got error: updating charm config: cannot upgrade application "grafana" to charm -# │ "ch:amd64/grafana-k8s-172": would break relation "grafana:ingress traefik:ingress" - resource "juju_integration" "grafana_ingress" { model_uuid = var.model_uuid application { name = module.traefik.app_name - endpoint = tonumber(local.alertmanager_revision) >= 175 ? module.traefik.endpoints.ingress : module.traefik.endpoints.traefik_route } application { name = module.grafana.app_name endpoint = module.grafana.endpoints.ingress } + + # NOTE: grafana.revision >= 175 the ingress endpoint changes from traefik_route to + # ingress_per_app so we need a lifecycle to trigger integration replacement, otherwise + # the upgrade will fail + lifecycle { + replace_triggered_by = [ + terraform_data.interface + ] + } } resource "juju_integration" "prometheus_ingress" { @@ -418,7 +421,6 @@ resource "juju_integration" "external_grafana_ca_cert" { } resource "juju_integration" "external_prom_ca_cert" { - count = local.tls_termination && tonumber(local.prometheus_revision) >= 276 ? 1 : 0 model_uuid = var.model_uuid application { diff --git a/terraform/cos-lite/upgrades.tf b/terraform/cos-lite/upgrades.tf index 46529ccc..a9624766 100644 --- a/terraform/cos-lite/upgrades.tf +++ b/terraform/cos-lite/upgrades.tf @@ -1,97 +1,16 @@ -locals { - # User input takes priority - alertmanager_revision = var.alertmanager.revision != null ? var.alertmanager.revision : module.charmhub["alertmanager"].charm_revision - catalogue_revision = var.catalogue.revision != null ? var.catalogue.revision : module.charmhub["catalogue"].charm_revision - grafana_revision = var.grafana.revision != null ? var.grafana.revision : module.charmhub["grafana"].charm_revision - loki_revision = var.loki.revision != null ? var.loki.revision : module.charmhub["loki"].charm_revision - prometheus_revision = var.prometheus.revision != null ? var.prometheus.revision : module.charmhub["prometheus"].charm_revision -} - -variable "charms_to_refresh" { - description = "A map of charm names to query from Charmhub." - type = map(string) - default = { - alertmanager = "alertmanager-k8s" - catalogue = "catalogue-k8s" - grafana = "grafana-k8s" - loki = "loki-k8s" - prometheus = "prometheus-k8s" - } -} - -module "charmhub" { - source = "../charmhub" - for_each = var.charms_to_refresh - - charm = each.value - channel = var.channel - base = var.base - architecture = "amd64" -} - -# TODO: Remove -output "charm_revisions" { - description = "The revision number for the specified charm channel and base" - value = { for k, v in module.charmhub : k => v.charm_revision } -} - - # -------------- Upgrade logic -------------- -locals { - channel = "dev/edge" -} - -data "juju_charm" "graphana_info" { +## -------- grafana.revision >= 175 ---------- +# the ingress endpoint changes from traefik_route to ingress_per_app so we need a lifecycle to +# trigger integration replacement, otherwise the upgrade will fail +data "juju_charm" "grafana_info" { charm = "grafana-k8s" - channel = local.channel - base = "ubuntu@24.04" -} - -resource "juju_application" "grafana" { - model_uuid = juju_model.test.uuid - trust = true - - charm { - name = "grafana-k8s" - channel = local.channel - revision = data.juju_charm.graphana_info.revision - } -} - -resource "juju_model" "test" { - name = "test-2131231" -} - -resource "juju_application" "traefik" { - model_uuid = juju_model.test.uuid - trust = true - - charm { - name = "traefik-k8s" - channel = "latest/stable" - } + channel = var.grafana.channel + base = var.base } resource "terraform_data" "interface" { - input = data.juju_charm.graphana_info.requires["ingress"] + input = data.juju_charm.grafana_info.requires["ingress"] } -resource "juju_integration" "ingress" { - model_uuid = juju_model.test.uuid - - application { - name = juju_application.traefik.name - } - - application { - name = juju_application.grafana.name - endpoint = "ingress" - } - - lifecycle { - replace_triggered_by = [ - terraform_data.interface - ] - } -} \ No newline at end of file +# -------------- End upgrade logic -------------- \ No newline at end of file diff --git a/terraform/cos-lite/variables.tf b/terraform/cos-lite/variables.tf index 2d517a3f..b3666a47 100644 --- a/terraform/cos-lite/variables.tf +++ b/terraform/cos-lite/variables.tf @@ -58,6 +58,7 @@ variable "external_ca_cert_offer_url" { variable "alertmanager" { type = object({ app_name = optional(string, "alertmanager") + channel = optional(string, "dev/edge") config = optional(map(string), {}) constraints = optional(string, "arch=amd64") revision = optional(number, null) @@ -71,6 +72,7 @@ variable "alertmanager" { variable "catalogue" { type = object({ app_name = optional(string, "catalogue") + channel = optional(string, "dev/edge") config = optional(map(string), {}) constraints = optional(string, "arch=amd64") revision = optional(number, null) @@ -81,9 +83,12 @@ variable "catalogue" { description = "Application configuration for Catalogue. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application" } +# TODO: Update all charms to surface their channel input, since we will have fine-grained channels per charm +# TODO: Update the channel defaults to be the track variable "grafana" { type = object({ app_name = optional(string, "grafana") + channel = optional(string, "dev/edge") config = optional(map(string), {}) constraints = optional(string, "arch=amd64") revision = optional(number, null) @@ -97,6 +102,7 @@ variable "grafana" { variable "loki" { type = object({ app_name = optional(string, "loki") + channel = optional(string, "dev/edge") config = optional(map(string), {}) constraints = optional(string, "arch=amd64") revision = optional(number, null) @@ -110,6 +116,7 @@ variable "loki" { variable "prometheus" { type = object({ app_name = optional(string, "prometheus") + channel = optional(string, "dev/edge") config = optional(map(string), {}) constraints = optional(string, "arch=amd64") revision = optional(number, null) diff --git a/terraform/cos-lite/versions.tf b/terraform/cos-lite/versions.tf index bac56e08..f65a3495 100644 --- a/terraform/cos-lite/versions.tf +++ b/terraform/cos-lite/versions.tf @@ -5,6 +5,5 @@ terraform { source = "juju/juju" version = "~> 1.0" } - # TODO: Add the http provider here } } \ No newline at end of file From f4ffc389c7af8d3ebf0d3a2fb3f2c35207b27c1b Mon Sep 17 00:00:00 2001 From: Sina P <55766091+sinapah@users.noreply.github.com> Date: Wed, 4 Feb 2026 09:02:29 -0500 Subject: [PATCH 12/30] fix: remove ubuntu/litestream from list of oci images (#177) --- docs/reference/rock-oci-images.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/reference/rock-oci-images.md b/docs/reference/rock-oci-images.md index 6127bb4e..30c894f9 100644 --- a/docs/reference/rock-oci-images.md +++ b/docs/reference/rock-oci-images.md @@ -8,7 +8,6 @@ | `ubuntu/grafana-agent` | [Image](https://hub.docker.com/r/ubuntu/grafana-agent) | [Source](https://github.com/canonical/grafana-agent-rock) | [Issues](https://github.com/canonical/grafana-agent-rock/issues) | | `ubuntu/grafana` | [Image](https://hub.docker.com/r/ubuntu/grafana) | [Source](https://github.com/canonical/grafana-rock) | [Issues](https://github.com/canonical/grafana-rock/issues) | | `ubuntu/karma` | [Image](https://hub.docker.com/r/ubuntu/karma) | [Source](https://github.com/canonical/karma-rock) | [Issues](https://github.com/canonical/karma-rock/issues) | -| `ubuntu/litestream` | [Image](https://hub.docker.com/r/ubuntu/litestream) | [Source](https://github.com/canonical/litestream-rock) | [Issues](https://github.com/canonical/litestream-rock/issues) | | `ubuntu/loki` | [Image](https://hub.docker.com/r/ubuntu/loki) | [Source](https://github.com/canonical/loki-rock) | [Issues](https://github.com/canonical/loki-rock/issues) | | `ubuntu/mimir` | [Image](https://hub.docker.com/r/ubuntu/mimir) | [Source](https://github.com/canonical/mimir-rock) | [Issues](https://github.com/canonical/mimir-rock/issues) | | `ubuntu/nginx-prometheus-exporter` | [Image](https://hub.docker.com/r/ubuntu/nginx-prometheus-exporter) | [Source](https://github.com/canonical/nginx-prometheus-exporter-rock) | [Issues](https://github.com/canonical/nginx-prometheus-exporter-rock/issues) | From 5c7bf3aa1049f128bb38f006f923c815b56a448d Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Wed, 4 Feb 2026 09:09:16 -0500 Subject: [PATCH 13/30] fix: Outdated docs (#178) --- .../integrating-cos-lite-with-uncharmed-applications.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/how-to/integrating-cos-lite-with-uncharmed-applications.md b/docs/how-to/integrating-cos-lite-with-uncharmed-applications.md index 73db6bbc..ac98f562 100644 --- a/docs/how-to/integrating-cos-lite-with-uncharmed-applications.md +++ b/docs/how-to/integrating-cos-lite-with-uncharmed-applications.md @@ -99,9 +99,7 @@ See [this guide](https://github.com/canonical/cos-configuration-k8s-operator#dep ### Using TLS -To enable secure communications with (and within) COS Lite, deploy COS Lite with the -[TLS overlay](https://github.com/canonical/cos-lite-bundle/pull/80). -You can follow [this guide](https://charmhub.io/traefik-k8s/docs/tls-termination) to enable TLS in Traefik and COS Lite. +You can follow [this guide](./configure-tls-encryption.md) to enable TLS in COS and COS Lite. ### Grafana Agent snap as a client As a client (e.g. scraping `/metrics` endpoint), Grafana Agent must trust the CA that signed the COS charms (or the COS From 055a627677c83dd9bfcf35e1bade8430b28e9122 Mon Sep 17 00:00:00 2001 From: Sina P <55766091+sinapah@users.noreply.github.com> Date: Wed, 4 Feb 2026 09:33:15 -0500 Subject: [PATCH 14/30] chore: add blackbox exporter machine to list of charms in docs (#176) --- docs/reference/charms.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/reference/charms.md b/docs/reference/charms.md index 27af7e7d..f5d70c30 100644 --- a/docs/reference/charms.md +++ b/docs/reference/charms.md @@ -33,6 +33,7 @@ | Project | Substrate | Charmhub | Source Code | Bug Report | |--------------------------|-----------|--------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------| | Blackbox Exporter | K8s | [Charmhub](https://charmhub.io/blackbox-exporter-k8s) | [Source](https://github.com/canonical/blackbox-exporter-k8s-operator) | [Issues](https://github.com/canonical/blackbox-exporter-k8s-operator/issues) | +| Blackbox Exporter | Machine | [Charmhub](https://charmhub.io/blackbox-exporter) | [Source](https://github.com/canonical/blackbox-exporter-operator) | [Issues](https://github.com/canonical/blackbox-exporter-operator/issues) | | COS Configuration | K8s | [Charmhub](https://charmhub.io/cos-configuration-k8s) | [Source](https://github.com/canonical/cos-configuration-k8s-operator) | [Issues](https://github.com/canonical/cos-configuration-k8s-operator/issues) | | COS Proxy | Machines | [Charmhub](https://charmhub.io/cos-proxy) | [Source](https://github.com/canonical/cos-proxy-operator) | [Issues](https://github.com/canonical/cos-proxy-operator/issues) | | Grafana Agent | K8s | [Charmhub](https://charmhub.io/grafana-agent-k8s) | [Source](https://github.com/canonical/grafana-agent-k8s-operator) | [Issues](https://github.com/canonical/grafana-agent-k8s-operator/issues) | From 42999c2affe5970e4e01f1e1917423b682d56817 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Thu, 5 Feb 2026 16:04:47 -0500 Subject: [PATCH 15/30] docs: fix extra double quote in cloud-init script (#181) --- docs/tutorial/installation/cos-lite-canonical-k8s-sandbox.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial/installation/cos-lite-canonical-k8s-sandbox.conf b/docs/tutorial/installation/cos-lite-canonical-k8s-sandbox.conf index 03246a69..2856bc86 100644 --- a/docs/tutorial/installation/cos-lite-canonical-k8s-sandbox.conf +++ b/docs/tutorial/installation/cos-lite-canonical-k8s-sandbox.conf @@ -11,7 +11,7 @@ snap: runcmd: - | - echo "Setting up K8s..."" + echo "Setting up K8s..." k8s bootstrap k8s status --wait-ready k8s enable local-storage From f2b3517b0748a38f6f80169f11bd4ea20e78a363 Mon Sep 17 00:00:00 2001 From: Bartlomiej Gmerek <42570669+Gmerold@users.noreply.github.com> Date: Thu, 12 Feb 2026 08:33:13 +0100 Subject: [PATCH 16/30] Separates the storage directives for different worker roles (#182) * Separates the storage directives for different worker roles Signed-off-by: Bartlomiej Gmerek * chore: TODOs for tests * Separates storage directives for Tempo workers Signed-off-by: Bartlomiej Gmerek * Separates storage directives for Tempo workers Signed-off-by: Bartlomiej Gmerek * Cleans up after testing Signed-off-by: Bartlomiej Gmerek --------- Signed-off-by: Bartlomiej Gmerek Co-authored-by: Michael Thamm --- terraform/cos/README.md | 4 +- terraform/cos/applications.tf | 159 ++++++++++++++++++---------------- terraform/cos/variables.tf | 75 +++++++++------- terraform/loki/README.md | 4 +- terraform/loki/main.tf | 6 +- terraform/loki/variables.tf | 16 +++- terraform/mimir/README.md | 4 +- terraform/mimir/main.tf | 6 +- terraform/mimir/variables.tf | 16 +++- 9 files changed, 168 insertions(+), 122 deletions(-) diff --git a/terraform/cos/README.md b/terraform/cos/README.md index 4a951003..86f4953a 100644 --- a/terraform/cos/README.md +++ b/terraform/cos/README.md @@ -41,10 +41,10 @@ This is a Terraform module facilitating the deployment of the COS solution, usin | [internal\_tls](#input\_internal\_tls) | Specify whether to use TLS or not for internal COS communication. By default, TLS is enabled using self-signed-certificates | `bool` | `true` | no | | [loki\_bucket](#input\_loki\_bucket) | Loki bucket name | `string` | `"loki"` | no | | [loki\_coordinator](#input\_loki\_coordinator) | Application configuration for Loki Coordinator. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 3)
})
| `{}` | no | -| [loki\_worker](#input\_loki\_worker) | Application configuration for all Loki Workers. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
backend_config = optional(map(string), {})
read_config = optional(map(string), {})
write_config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
backend_units = optional(number, 3)
read_units = optional(number, 3)
write_units = optional(number, 3)
})
| `{}` | no | +| [loki\_worker](#input\_loki\_worker) | Application configuration for all Loki Workers. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
backend_config = optional(map(string), {})
read_config = optional(map(string), {})
write_config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
backend_storage_directives = optional(map(string), {})
read_storage_directives = optional(map(string), {})
write_storage_directives = optional(map(string), {})
backend_units = optional(number, 3)
read_units = optional(number, 3)
write_units = optional(number, 3)
})
| `{}` | no | | [mimir\_bucket](#input\_mimir\_bucket) | Mimir bucket name | `string` | `"mimir"` | no | | [mimir\_coordinator](#input\_mimir\_coordinator) | Application configuration for Mimir Coordinator. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 3)
})
| `{}` | no | -| [mimir\_worker](#input\_mimir\_worker) | Application configuration for all Mimir Workers. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
backend_config = optional(map(string), {})
read_config = optional(map(string), {})
write_config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
backend_units = optional(number, 3)
read_units = optional(number, 3)
write_units = optional(number, 3)
})
| `{}` | no | +| [mimir\_worker](#input\_mimir\_worker) | Application configuration for all Mimir Workers. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
backend_config = optional(map(string), {})
read_config = optional(map(string), {})
write_config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
backend_storage_directives = optional(map(string), {})
read_storage_directives = optional(map(string), {})
write_storage_directives = optional(map(string), {})
backend_units = optional(number, 3)
read_units = optional(number, 3)
write_units = optional(number, 3)
})
| `{}` | no | | [model\_uuid](#input\_model\_uuid) | Reference to an existing model resource or data source for the model to deploy to | `string` | n/a | yes | | [opentelemetry\_collector](#input\_opentelemetry\_collector) | Application configuration for OpenTelemetry Collector. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
app_name = optional(string, "otelcol")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | | [s3\_access\_key](#input\_s3\_access\_key) | S3 access-key credential | `string` | n/a | yes | diff --git a/terraform/cos/applications.tf b/terraform/cos/applications.tf index 1c56535a..bd59c7f0 100644 --- a/terraform/cos/applications.tf +++ b/terraform/cos/applications.tf @@ -35,34 +35,36 @@ module "grafana" { } module "loki" { - source = "git::https://github.com/canonical/observability-stack//terraform/loki" - anti_affinity = var.anti_affinity - channel = var.channel - model_uuid = var.model_uuid - s3_endpoint = var.s3_endpoint - s3_secret_key = var.s3_secret_key - s3_access_key = var.s3_access_key - s3_bucket = var.loki_bucket - s3_integrator_channel = var.s3_integrator.channel - s3_integrator_config = var.s3_integrator.config - s3_integrator_constraints = var.s3_integrator.constraints - s3_integrator_revision = var.s3_integrator.revision - s3_integrator_storage_directives = var.s3_integrator.storage_directives - s3_integrator_units = var.s3_integrator.units - coordinator_config = var.loki_coordinator.config - coordinator_constraints = var.loki_coordinator.constraints - coordinator_revision = var.loki_coordinator.revision - coordinator_storage_directives = var.loki_coordinator.storage_directives - coordinator_units = var.loki_coordinator.units - backend_config = var.loki_worker.backend_config - read_config = var.loki_worker.read_config - write_config = var.loki_worker.write_config - worker_constraints = var.loki_worker.constraints - worker_revision = var.loki_worker.revision - worker_storage_directives = var.loki_worker.storage_directives - backend_units = var.loki_worker.backend_units - read_units = var.loki_worker.read_units - write_units = var.loki_worker.write_units + source = "git::https://github.com/canonical/observability-stack//terraform/loki" + anti_affinity = var.anti_affinity + channel = var.channel + model_uuid = var.model_uuid + s3_endpoint = var.s3_endpoint + s3_secret_key = var.s3_secret_key + s3_access_key = var.s3_access_key + s3_bucket = var.loki_bucket + s3_integrator_channel = var.s3_integrator.channel + s3_integrator_config = var.s3_integrator.config + s3_integrator_constraints = var.s3_integrator.constraints + s3_integrator_revision = var.s3_integrator.revision + s3_integrator_storage_directives = var.s3_integrator.storage_directives + s3_integrator_units = var.s3_integrator.units + coordinator_config = var.loki_coordinator.config + coordinator_constraints = var.loki_coordinator.constraints + coordinator_revision = var.loki_coordinator.revision + coordinator_storage_directives = var.loki_coordinator.storage_directives + coordinator_units = var.loki_coordinator.units + backend_config = var.loki_worker.backend_config + read_config = var.loki_worker.read_config + write_config = var.loki_worker.write_config + worker_constraints = var.loki_worker.constraints + worker_revision = var.loki_worker.revision + backend_worker_storage_directives = var.loki_worker.backend_storage_directives + read_worker_storage_directives = var.loki_worker.read_storage_directives + write_worker_storage_directives = var.loki_worker.write_storage_directives + backend_units = var.loki_worker.backend_units + read_units = var.loki_worker.read_units + write_units = var.loki_worker.write_units } module "mimir" { @@ -89,19 +91,21 @@ module "mimir" { "max_global_exemplars_per_user" = "100000" } ) - coordinator_constraints = var.mimir_coordinator.constraints - coordinator_revision = var.mimir_coordinator.revision - coordinator_storage_directives = var.mimir_coordinator.storage_directives - coordinator_units = var.mimir_coordinator.units - backend_config = var.mimir_worker.backend_config - read_config = var.mimir_worker.read_config - write_config = var.mimir_worker.write_config - worker_constraints = var.mimir_worker.constraints - worker_revision = var.mimir_worker.revision - worker_storage_directives = var.mimir_worker.storage_directives - backend_units = var.mimir_worker.backend_units - read_units = var.mimir_worker.read_units - write_units = var.mimir_worker.write_units + coordinator_constraints = var.mimir_coordinator.constraints + coordinator_revision = var.mimir_coordinator.revision + coordinator_storage_directives = var.mimir_coordinator.storage_directives + coordinator_units = var.mimir_coordinator.units + backend_config = var.mimir_worker.backend_config + read_config = var.mimir_worker.read_config + write_config = var.mimir_worker.write_config + worker_constraints = var.mimir_worker.constraints + worker_revision = var.mimir_worker.revision + backend_worker_storage_directives = var.mimir_worker.backend_storage_directives + read_worker_storage_directives = var.mimir_worker.read_storage_directives + write_worker_storage_directives = var.mimir_worker.write_storage_directives + backend_units = var.mimir_worker.backend_units + read_units = var.mimir_worker.read_units + write_units = var.mimir_worker.write_units } module "opentelemetry_collector" { @@ -129,40 +133,45 @@ module "ssc" { } module "tempo" { - source = "git::https://github.com/canonical/tempo-operators//terraform" - anti_affinity = var.anti_affinity - channel = var.channel - model_uuid = var.model_uuid - s3_endpoint = var.s3_endpoint - s3_access_key = var.s3_access_key - s3_secret_key = var.s3_secret_key - s3_bucket = var.tempo_bucket - s3_integrator_channel = var.s3_integrator.channel - s3_integrator_config = var.s3_integrator.config - s3_integrator_constraints = var.s3_integrator.constraints - s3_integrator_revision = var.s3_integrator.revision - s3_integrator_storage_directives = var.s3_integrator.storage_directives - s3_integrator_units = var.s3_integrator.units - coordinator_config = var.tempo_coordinator.config - coordinator_constraints = var.tempo_coordinator.constraints - coordinator_revision = var.tempo_coordinator.revision - coordinator_storage_directives = var.tempo_coordinator.storage_directives - coordinator_units = var.tempo_coordinator.units - querier_config = var.tempo_worker.querier_config - query_frontend_config = var.tempo_worker.query_frontend_config - ingester_config = var.tempo_worker.ingester_config - distributor_config = var.tempo_worker.distributor_config - compactor_config = var.tempo_worker.compactor_config - metrics_generator_config = var.tempo_worker.metrics_generator_config - worker_constraints = var.tempo_worker.constraints - worker_revision = var.tempo_worker.revision - worker_storage_directives = var.tempo_worker.storage_directives - compactor_units = var.tempo_worker.compactor_units - distributor_units = var.tempo_worker.distributor_units - ingester_units = var.tempo_worker.ingester_units - metrics_generator_units = var.tempo_worker.metrics_generator_units - querier_units = var.tempo_worker.querier_units - query_frontend_units = var.tempo_worker.query_frontend_units + source = "git::https://github.com/canonical/tempo-operators//terraform" + anti_affinity = var.anti_affinity + channel = var.channel + model_uuid = var.model_uuid + s3_endpoint = var.s3_endpoint + s3_access_key = var.s3_access_key + s3_secret_key = var.s3_secret_key + s3_bucket = var.tempo_bucket + s3_integrator_channel = var.s3_integrator.channel + s3_integrator_config = var.s3_integrator.config + s3_integrator_constraints = var.s3_integrator.constraints + s3_integrator_revision = var.s3_integrator.revision + s3_integrator_storage_directives = var.s3_integrator.storage_directives + s3_integrator_units = var.s3_integrator.units + coordinator_config = var.tempo_coordinator.config + coordinator_constraints = var.tempo_coordinator.constraints + coordinator_revision = var.tempo_coordinator.revision + coordinator_storage_directives = var.tempo_coordinator.storage_directives + coordinator_units = var.tempo_coordinator.units + querier_config = var.tempo_worker.querier_config + query_frontend_config = var.tempo_worker.query_frontend_config + ingester_config = var.tempo_worker.ingester_config + distributor_config = var.tempo_worker.distributor_config + compactor_config = var.tempo_worker.compactor_config + metrics_generator_config = var.tempo_worker.metrics_generator_config + worker_constraints = var.tempo_worker.constraints + worker_revision = var.tempo_worker.revision + compactor_worker_storage_directives = var.tempo_worker.compactor_worker_storage_directives + distributor_worker_storage_directives = var.tempo_worker.distributor_worker_storage_directives + ingester_worker_storage_directives = var.tempo_worker.ingester_worker_storage_directives + metrics_generator_worker_storage_directives = var.tempo_worker.metrics_generator_worker_storage_directives + querier_worker_storage_directives = var.tempo_worker.querier_worker_storage_directives + query_frontend_worker_storage_directives = var.tempo_worker.query_frontend_worker_storage_directives + compactor_units = var.tempo_worker.compactor_units + distributor_units = var.tempo_worker.distributor_units + ingester_units = var.tempo_worker.ingester_units + metrics_generator_units = var.tempo_worker.metrics_generator_units + querier_units = var.tempo_worker.querier_units + query_frontend_units = var.tempo_worker.query_frontend_units } module "traefik" { diff --git a/terraform/cos/variables.tf b/terraform/cos/variables.tf index e8372714..46150c31 100644 --- a/terraform/cos/variables.tf +++ b/terraform/cos/variables.tf @@ -157,15 +157,17 @@ variable "loki_coordinator" { variable "loki_worker" { type = object({ - backend_config = optional(map(string), {}) - read_config = optional(map(string), {}) - write_config = optional(map(string), {}) - constraints = optional(string, "arch=amd64") - revision = optional(number, null) - storage_directives = optional(map(string), {}) - backend_units = optional(number, 3) - read_units = optional(number, 3) - write_units = optional(number, 3) + backend_config = optional(map(string), {}) + read_config = optional(map(string), {}) + write_config = optional(map(string), {}) + constraints = optional(string, "arch=amd64") + revision = optional(number, null) + backend_storage_directives = optional(map(string), {}) + read_storage_directives = optional(map(string), {}) + write_storage_directives = optional(map(string), {}) + backend_units = optional(number, 3) + read_units = optional(number, 3) + write_units = optional(number, 3) }) default = {} description = "Application configuration for all Loki Workers. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application" @@ -185,15 +187,17 @@ variable "mimir_coordinator" { variable "mimir_worker" { type = object({ - backend_config = optional(map(string), {}) - read_config = optional(map(string), {}) - write_config = optional(map(string), {}) - constraints = optional(string, "arch=amd64") - revision = optional(number, null) - storage_directives = optional(map(string), {}) - backend_units = optional(number, 3) - read_units = optional(number, 3) - write_units = optional(number, 3) + backend_config = optional(map(string), {}) + read_config = optional(map(string), {}) + write_config = optional(map(string), {}) + constraints = optional(string, "arch=amd64") + revision = optional(number, null) + backend_storage_directives = optional(map(string), {}) + read_storage_directives = optional(map(string), {}) + write_storage_directives = optional(map(string), {}) + backend_units = optional(number, 3) + read_units = optional(number, 3) + write_units = optional(number, 3) }) default = {} description = "Application configuration for all Mimir Workers. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application" @@ -254,21 +258,26 @@ variable "tempo_coordinator" { variable "tempo_worker" { type = object({ - querier_config = optional(map(string), {}) - query_frontend_config = optional(map(string), {}) - ingester_config = optional(map(string), {}) - distributor_config = optional(map(string), {}) - compactor_config = optional(map(string), {}) - metrics_generator_config = optional(map(string), {}) - constraints = optional(string, "arch=amd64") - revision = optional(number, null) - storage_directives = optional(map(string), {}) - compactor_units = optional(number, 3) - distributor_units = optional(number, 3) - ingester_units = optional(number, 3) - metrics_generator_units = optional(number, 3) - querier_units = optional(number, 3) - query_frontend_units = optional(number, 3) + querier_config = optional(map(string), {}) + query_frontend_config = optional(map(string), {}) + ingester_config = optional(map(string), {}) + distributor_config = optional(map(string), {}) + compactor_config = optional(map(string), {}) + metrics_generator_config = optional(map(string), {}) + constraints = optional(string, "arch=amd64") + revision = optional(number, null) + compactor_worker_storage_directives = optional(map(string), {}) + distributor_worker_storage_directives = optional(map(string), {}) + ingester_worker_storage_directives = optional(map(string), {}) + metrics_generator_worker_storage_directives = optional(map(string), {}) + querier_worker_storage_directives = optional(map(string), {}) + query_frontend_worker_storage_directives = optional(map(string), {}) + compactor_units = optional(number, 3) + distributor_units = optional(number, 3) + ingester_units = optional(number, 3) + metrics_generator_units = optional(number, 3) + querier_units = optional(number, 3) + query_frontend_units = optional(number, 3) }) default = {} description = "Application configuration for all Tempo workers. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application" diff --git a/terraform/loki/README.md b/terraform/loki/README.md index c3fa27c0..f826c447 100644 --- a/terraform/loki/README.md +++ b/terraform/loki/README.md @@ -29,6 +29,7 @@ This is a Terraform module facilitating the deployment of Loki solution, using t | [backend\_config](#input\_backend\_config) | Map of the backend worker configuration options | `map(string)` | `{}` | no | | [backend\_name](#input\_backend\_name) | Name of the Loki app with the backend role | `string` | `"loki-backend"` | no | | [backend\_units](#input\_backend\_units) | Number of Loki worker units with the backend role | `number` | `1` | no | +| [backend\_worker\_storage\_directives](#input\_backend\_worker\_storage\_directives) | Map of storage used by the backend worker application, which defaults to 1 GB, allocated by Juju | `map(string)` | `{}` | no | | [channel](#input\_channel) | Channel that the applications are deployed from | `string` | n/a | yes | | [coordinator\_config](#input\_coordinator\_config) | Map of the coordinator configuration options | `map(string)` | `{}` | no | | [coordinator\_constraints](#input\_coordinator\_constraints) | String listing constraints for the coordinator application | `string` | `"arch=amd64"` | no | @@ -39,6 +40,7 @@ This is a Terraform module facilitating the deployment of Loki solution, using t | [read\_config](#input\_read\_config) | Map of the read worker configuration options | `map(string)` | `{}` | no | | [read\_name](#input\_read\_name) | Name of the Loki app with the read role | `string` | `"loki-read"` | no | | [read\_units](#input\_read\_units) | Number of Loki worker units with the read role | `number` | `1` | no | +| [read\_worker\_storage\_directives](#input\_read\_worker\_storage\_directives) | Map of storage used by the read worker application, which defaults to 1 GB, allocated by Juju | `map(string)` | `{}` | no | | [s3\_access\_key](#input\_s3\_access\_key) | S3 access-key credential | `string` | n/a | yes | | [s3\_bucket](#input\_s3\_bucket) | Bucket name | `string` | `"loki"` | no | | [s3\_endpoint](#input\_s3\_endpoint) | S3 endpoint | `string` | n/a | yes | @@ -52,10 +54,10 @@ This is a Terraform module facilitating the deployment of Loki solution, using t | [s3\_secret\_key](#input\_s3\_secret\_key) | S3 secret-key credential | `string` | n/a | yes | | [worker\_constraints](#input\_worker\_constraints) | String listing constraints for the worker application | `string` | `"arch=amd64"` | no | | [worker\_revision](#input\_worker\_revision) | Revision number of the worker application | `number` | `null` | no | -| [worker\_storage\_directives](#input\_worker\_storage\_directives) | Map of storage used by the worker application, which defaults to 1 GB, allocated by Juju | `map(string)` | `{}` | no | | [write\_config](#input\_write\_config) | Map of the write worker configuration options | `map(string)` | `{}` | no | | [write\_name](#input\_write\_name) | Name of the Loki app with the write role | `string` | `"loki-write"` | no | | [write\_units](#input\_write\_units) | Number of Loki worker units with the write role | `number` | `1` | no | +| [write\_worker\_storage\_directives](#input\_write\_worker\_storage\_directives) | Map of storage used by the write worker application, which defaults to 1 GB, allocated by Juju | `map(string)` | `{}` | no | ## Outputs diff --git a/terraform/loki/main.tf b/terraform/loki/main.tf index 138d26bb..bd4465e9 100644 --- a/terraform/loki/main.tf +++ b/terraform/loki/main.tf @@ -60,7 +60,7 @@ module "loki_backend" { }, var.backend_config) model_uuid = var.model_uuid revision = var.worker_revision - storage_directives = var.worker_storage_directives + storage_directives = var.backend_worker_storage_directives units = var.backend_units } @@ -76,7 +76,7 @@ module "loki_read" { }, var.read_config) model_uuid = var.model_uuid revision = var.worker_revision - storage_directives = var.worker_storage_directives + storage_directives = var.read_worker_storage_directives units = var.read_units } @@ -92,7 +92,7 @@ module "loki_write" { }, var.write_config) model_uuid = var.model_uuid revision = var.worker_revision - storage_directives = var.worker_storage_directives + storage_directives = var.write_worker_storage_directives units = var.write_units } diff --git a/terraform/loki/variables.tf b/terraform/loki/variables.tf index c3676c6f..46f5bbd8 100644 --- a/terraform/loki/variables.tf +++ b/terraform/loki/variables.tf @@ -172,8 +172,20 @@ variable "coordinator_storage_directives" { default = {} } -variable "worker_storage_directives" { - description = "Map of storage used by the worker application, which defaults to 1 GB, allocated by Juju" +variable "backend_worker_storage_directives" { + description = "Map of storage used by the backend worker application, which defaults to 1 GB, allocated by Juju" + type = map(string) + default = {} +} + +variable "read_worker_storage_directives" { + description = "Map of storage used by the read worker application, which defaults to 1 GB, allocated by Juju" + type = map(string) + default = {} +} + +variable "write_worker_storage_directives" { + description = "Map of storage used by the write worker application, which defaults to 1 GB, allocated by Juju" type = map(string) default = {} } diff --git a/terraform/mimir/README.md b/terraform/mimir/README.md index db9b0698..2b04d652 100644 --- a/terraform/mimir/README.md +++ b/terraform/mimir/README.md @@ -29,6 +29,7 @@ This is a Terraform module facilitating the deployment of Mimir solution, using | [backend\_config](#input\_backend\_config) | Map of the backend worker configuration options | `map(string)` | `{}` | no | | [backend\_name](#input\_backend\_name) | Name of the Mimir backend (meta role) app | `string` | `"mimir-backend"` | no | | [backend\_units](#input\_backend\_units) | Number of Mimir worker units with the backend meta role | `number` | `1` | no | +| [backend\_worker\_storage\_directives](#input\_backend\_worker\_storage\_directives) | Map of storage used by the backend worker application, which defaults to 1 GB, allocated by Juju | `map(string)` | `{}` | no | | [channel](#input\_channel) | Channel that the applications are deployed from | `string` | n/a | yes | | [coordinator\_config](#input\_coordinator\_config) | Map of the coordinator configuration options | `map(string)` | `{}` | no | | [coordinator\_constraints](#input\_coordinator\_constraints) | String listing constraints for the coordinator application | `string` | `"arch=amd64"` | no | @@ -39,6 +40,7 @@ This is a Terraform module facilitating the deployment of Mimir solution, using | [read\_config](#input\_read\_config) | Map of the read worker configuration options | `map(string)` | `{}` | no | | [read\_name](#input\_read\_name) | Name of the Mimir read (meta role) app | `string` | `"mimir-read"` | no | | [read\_units](#input\_read\_units) | Number of Mimir worker units with the read meta role | `number` | `1` | no | +| [read\_worker\_storage\_directives](#input\_read\_worker\_storage\_directives) | Map of storage used by the read worker application, which defaults to 1 GB, allocated by Juju | `map(string)` | `{}` | no | | [s3\_access\_key](#input\_s3\_access\_key) | S3 access-key credential | `string` | n/a | yes | | [s3\_bucket](#input\_s3\_bucket) | Bucket name | `string` | `"mimir"` | no | | [s3\_endpoint](#input\_s3\_endpoint) | S3 endpoint | `string` | n/a | yes | @@ -52,10 +54,10 @@ This is a Terraform module facilitating the deployment of Mimir solution, using | [s3\_secret\_key](#input\_s3\_secret\_key) | S3 secret-key credential | `string` | n/a | yes | | [worker\_constraints](#input\_worker\_constraints) | String listing constraints for the worker application | `string` | `"arch=amd64"` | no | | [worker\_revision](#input\_worker\_revision) | Revision number of the worker application | `number` | `null` | no | -| [worker\_storage\_directives](#input\_worker\_storage\_directives) | Map of storage used by the worker application, which defaults to 1 GB, allocated by Juju | `map(string)` | `{}` | no | | [write\_config](#input\_write\_config) | Map of the write worker configuration options | `map(string)` | `{}` | no | | [write\_name](#input\_write\_name) | Name of the Mimir write (meta role) app | `string` | `"mimir-write"` | no | | [write\_units](#input\_write\_units) | Number of Mimir worker units with the write meta role | `number` | `1` | no | +| [write\_worker\_storage\_directives](#input\_write\_worker\_storage\_directives) | Map of storage used by the write worker application, which defaults to 1 GB, allocated by Juju | `map(string)` | `{}` | no | ## Outputs diff --git a/terraform/mimir/main.tf b/terraform/mimir/main.tf index 92b030b8..98ee580b 100644 --- a/terraform/mimir/main.tf +++ b/terraform/mimir/main.tf @@ -61,7 +61,7 @@ module "mimir_backend" { }, var.backend_config) model_uuid = var.model_uuid revision = var.worker_revision - storage_directives = var.worker_storage_directives + storage_directives = var.backend_worker_storage_directives units = var.backend_units } @@ -77,7 +77,7 @@ module "mimir_read" { constraints = var.anti_affinity ? "arch=amd64 tags=anti-pod.app.kubernetes.io/name=${var.read_name},anti-pod.topology-key=kubernetes.io/hostname" : var.worker_constraints model_uuid = var.model_uuid revision = var.worker_revision - storage_directives = var.worker_storage_directives + storage_directives = var.read_worker_storage_directives units = var.read_units } @@ -93,7 +93,7 @@ module "mimir_write" { constraints = var.anti_affinity ? "arch=amd64 tags=anti-pod.app.kubernetes.io/name=${var.write_name},anti-pod.topology-key=kubernetes.io/hostname" : var.worker_constraints model_uuid = var.model_uuid revision = var.worker_revision - storage_directives = var.worker_storage_directives + storage_directives = var.write_worker_storage_directives units = var.write_units } diff --git a/terraform/mimir/variables.tf b/terraform/mimir/variables.tf index fa908ce8..f4f717fd 100644 --- a/terraform/mimir/variables.tf +++ b/terraform/mimir/variables.tf @@ -172,8 +172,20 @@ variable "coordinator_storage_directives" { default = {} } -variable "worker_storage_directives" { - description = "Map of storage used by the worker application, which defaults to 1 GB, allocated by Juju" +variable "backend_worker_storage_directives" { + description = "Map of storage used by the backend worker application, which defaults to 1 GB, allocated by Juju" + type = map(string) + default = {} +} + +variable "read_worker_storage_directives" { + description = "Map of storage used by the read worker application, which defaults to 1 GB, allocated by Juju" + type = map(string) + default = {} +} + +variable "write_worker_storage_directives" { + description = "Map of storage used by the write worker application, which defaults to 1 GB, allocated by Juju" type = map(string) default = {} } From 3f6cffc854a3e82a2e91f2ea78c7cad92775a8ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20C=2E=20Mass=C3=B3n?= <939888+Abuelodelanada@users.noreply.github.com> Date: Tue, 17 Feb 2026 16:19:27 -0300 Subject: [PATCH 17/30] Update source URL for cos-lite module (#190) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR fixes https://github.com/canonical/observability-stack/issues/188 Signed-off-by: Jose C. Massón <939888+Abuelodelanada@users.noreply.github.com> --- terraform/cos-lite/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/cos-lite/README.md b/terraform/cos-lite/README.md index 5fec6150..e3684961 100644 --- a/terraform/cos-lite/README.md +++ b/terraform/cos-lite/README.md @@ -78,7 +78,7 @@ resource "juju_model" "cos" { } module "cos-lite" { - source = "git::https://github.com/canonical/observability-stack//terraform/cos-lite" + source = "git::https://github.com/canonical/observability-stack//terraform/cos-lite?ref=track/2" model_uuid = juju_model.cos.uuid channel = "2/edge" } From 7078c6e45bc05f68d9c8c2d821b3d09dc8506153 Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Tue, 17 Feb 2026 14:39:07 -0500 Subject: [PATCH 18/30] fix: READMEs (#191) --- terraform/cos-lite/README.md | 2 +- terraform/cos/README.md | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/terraform/cos-lite/README.md b/terraform/cos-lite/README.md index e3684961..3e3a8a69 100644 --- a/terraform/cos-lite/README.md +++ b/terraform/cos-lite/README.md @@ -80,7 +80,7 @@ resource "juju_model" "cos" { module "cos-lite" { source = "git::https://github.com/canonical/observability-stack//terraform/cos-lite?ref=track/2" model_uuid = juju_model.cos.uuid - channel = "2/edge" + channel = "2/stable" } ``` diff --git a/terraform/cos/README.md b/terraform/cos/README.md index 86f4953a..d8033329 100644 --- a/terraform/cos/README.md +++ b/terraform/cos/README.md @@ -99,9 +99,9 @@ resource "juju_model" "cos" { } module "cos" { - source = "git::https://github.com/canonical/observability-stack//terraform/cos" + source = "git::https://github.com/canonical/observability-stack//terraform/cos?ref=track/2" model_uuid = juju_model.cos.uuid - channel = "2/edge" + channel = "2/stable" s3_endpoint = "http://S3_HOST_IP:8080" s3_secret_key = "secret-key" @@ -146,9 +146,9 @@ In order to deploy COS on AWS, update the `cloud` input of the `cos` module to ` ```hcl module "cos" { - source = "git::https://github.com/canonical/observability-stack//terraform/cos" + source = "git::https://github.com/canonical/observability-stack//terraform/cos?ref=track/2" model_uuid = juju_model.cos.uuid - channel = "2/edge" + channel = "2/stable" cloud = "aws" s3_endpoint = "http://S3_HOST_IP:8080" From 2170e0218b4f200157f4d59c698000ff07d22a3f Mon Sep 17 00:00:00 2001 From: Leon <82407168+sed-i@users.noreply.github.com> Date: Mon, 23 Feb 2026 13:22:20 -0500 Subject: [PATCH 19/30] feat(doc): troubleshoot grafana admin password (#194) --- docs/how-to/troubleshooting/index.rst | 1 + .../troubleshoot-grafana-admin-password.md | 14 ++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 docs/how-to/troubleshooting/troubleshoot-grafana-admin-password.md diff --git a/docs/how-to/troubleshooting/index.rst b/docs/how-to/troubleshooting/index.rst index ad04db8e..16efe88e 100644 --- a/docs/how-to/troubleshooting/index.rst +++ b/docs/how-to/troubleshooting/index.rst @@ -12,3 +12,4 @@ Troubleshooting Troubleshoot integrations Troubleshoot "no data" in Grafana panels Troubleshoot firing alert rules + Troubleshoot grafana admin password diff --git a/docs/how-to/troubleshooting/troubleshoot-grafana-admin-password.md b/docs/how-to/troubleshooting/troubleshoot-grafana-admin-password.md new file mode 100644 index 00000000..11b6e1d1 --- /dev/null +++ b/docs/how-to/troubleshooting/troubleshoot-grafana-admin-password.md @@ -0,0 +1,14 @@ +# Troubleshoot grafana admin password + +Compare the output of: + +- Charm action: `juju run graf/0 get-admin-password` +- Pebble plan: `juju ssh --container grafana graf/0 /charm/bin/pebble plan | grep GF_SECURITY_ADMIN_PASSWORD` +- Secret content: Obtain secret id from `juju secrets` and then `juju show-secret d6buvufmp25c7am9qqtg --reveal` + +All 3 should be identical. If they are not identical, + +1. Manually [reset the admin password](https://grafana.com/docs/grafana/latest/administration/cli/#reset-admin-password), + `juju ssh --container grafana graf/0 grafana cli --config /etc/grafana/grafana-config.ini admin reset-admin-password pa55w0rd` +2. Update the secret with the same: `juju update-secret d6buvufmp25c7am9qqtg password=pa55w0rd` +3. Run the action so the charm updates the pebble service environment variable: `juju run graf/0 get-admin-password` From 61f716f294226253ecac79832ff4dda86b2d2b67 Mon Sep 17 00:00:00 2001 From: Leon <82407168+sed-i@users.noreply.github.com> Date: Thu, 5 Mar 2026 00:06:13 -0500 Subject: [PATCH 20/30] feat(docs): update otelcol docs (#199) --- docs/.custom_wordlist.txt | 66 +++++++++++++--- docs/how-to/index.rst | 3 +- ... => selectively-drop-telemetry-otelcol.md} | 77 ++++--------------- ...electively-drop-telemetry-scrape-config.md | 63 +++++++++++++++ docs/how-to/tiered-otelcols.md | 10 ++- docs/how-to/troubleshooting/index.rst | 1 + .../troubleshooting/troubleshoot-otelcol.md | 29 +++++++ docs/tutorial/redact-sensitive-data.md | 5 +- 8 files changed, 175 insertions(+), 79 deletions(-) rename docs/how-to/{selectively-drop-telemetry.md => selectively-drop-telemetry-otelcol.md} (70%) create mode 100644 docs/how-to/selectively-drop-telemetry-scrape-config.md create mode 100644 docs/how-to/troubleshooting/troubleshoot-otelcol.md diff --git a/docs/.custom_wordlist.txt b/docs/.custom_wordlist.txt index fa074f24..c4642a34 100644 --- a/docs/.custom_wordlist.txt +++ b/docs/.custom_wordlist.txt @@ -1,8 +1,6 @@ # Leave a blank line at the end of this file to support concatenation aggregator -Aggregator alerter -Alerter alertmanager backend backend's @@ -11,6 +9,7 @@ BCrypt bhat blackbox boolean +Canonical('s)? catalogue catalogue's Ceph @@ -24,16 +23,15 @@ configs contextualize cpu cryptographic -Cryptographic cryptographically CSI CSRs customizable datasource -Datasource datasources Dev DNS +docstrings? dvipng extracurriculars Filebeat @@ -45,9 +43,11 @@ FQDN freefont frontend frontends +Furo gb gh Gi +GitHub github GitOps GPG @@ -55,20 +55,26 @@ grafana Grafana's gRPCs gyre +HostHealth hostname hostPath +html http https +HTTP HTTPS -HostHealth ingester instantiation integrations +Intersphinx io IPs IPv juju +landscape lang +lastmod +LaTeX latexmk lifecycle linkcheck @@ -76,6 +82,7 @@ LMA LogProxyConsumer LogQL loki +Makefile matchers MetalLB MetricsEndpointProvider @@ -86,16 +93,19 @@ microservice microservices Mimir Minio +Multipass +MyST Nginx NRPE +Numpy OCI +Open Graph +openapi OpenStack -opentelemetry OpenTelemetry +opentelemetry OpentelemetryCollector OSD -OSD -OSDs OSDs OTEL otelcol @@ -103,60 +113,92 @@ otf OTLP OTTL PagerDuty +PDF PII plantuml +PNG postgresql +PR programmatically prometheus promlabs -promlens PromLens +promlens PromQL Promtail provisioner proxied PVC PVCs +Pygments +pymarkdown +QEMU querier RadosGW reachability +Read the Docs +readthedocs +redeclare relabeler -Relabeler +reStructuredText +retrigger(ing)? rf robustperception +Rockcraft routable +rst scalable +SCons SDK +sequenceDiagram SHA +sitemapindex SLAs Snapcraft SNMP +Sphinx +Spread +spread_test_example SSL StatefulSets storages +subproject +subprojects +SVG tanmay Terraform tex texlive Tiering -tls TLS +tls TOC +toctree Traefik Traefik's +txt ubuntu UI UIs uncharmed +uncomment(ing)? unencrypted -Unencrypted +URL utils +uv venv visualizes +VMs VPN WAL WCAG +whitespace +whitespaces +wordlist www xetex xindy +xml +yaml +YouTube ZooKeeper diff --git a/docs/how-to/index.rst b/docs/how-to/index.rst index cc9576f4..e76801e3 100644 --- a/docs/how-to/index.rst +++ b/docs/how-to/index.rst @@ -52,7 +52,8 @@ with COS to actually observe them. Disable built-in charm alert rules Testing with Minio Configure TLS encryption - Selectively drop telemetry + Selectively drop telemetry using scrape config + Selectively drop telemetry using opentelemetry-collector Tier OpenTelemetry Collector with different pipelines per data stream Troubleshooting diff --git a/docs/how-to/selectively-drop-telemetry.md b/docs/how-to/selectively-drop-telemetry-otelcol.md similarity index 70% rename from docs/how-to/selectively-drop-telemetry.md rename to docs/how-to/selectively-drop-telemetry-otelcol.md index b159aaa4..26a7c5fb 100644 --- a/docs/how-to/selectively-drop-telemetry.md +++ b/docs/how-to/selectively-drop-telemetry-otelcol.md @@ -1,61 +1,10 @@ -# Selectively drop telemetry +# Selectively drop telemetry using opentelemetry-collector Sometimes, from a resource perspective, applications are instrumented with more telemetry than we want to afford. In such cases, we can choose to selectively drop some before they are ingested. -## Scrape config - -Metrics can be dropped by using the `drop` action in several different places: -- Under [``](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) section ([``](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs) subsection). For example: all the self-monitoring scrape jobs that e.g. COS Lite has in place. -- Under [``](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) section (`` subsection). For example: prometheus can be told to drop metrics before pushing them to another prometheus over remote-write API. This use case is not addressed in this guide. - -### MetricsEndpointProvider -Charms that integrate with prometheus or otelcol, provide a "scrape config" to `MetricsEndpointProvider` (imported from [`charms.prometheus_k8s.v0.prometheus_scrape`](https://charmhub.io/prometheus-k8s/libraries/prometheus_scrape)). - -Let's take for example the alertmanager self-metrics that prometheus scrapes. If we do not want prometheus or otelcol to ingest any `scrape_samples_*` metrics from alertmanager, then we need to adjust the scrape job specified in the alertmanager charm: - -```diff -diff --git a/src/charm.py b/src/charm.py -index fa3678c..f0e943b 100755 ---- a/src/charm.py -+++ b/src/charm.py -@@ -250,6 +250,13 @@ class AlertmanagerCharm(CharmBase): - "scheme": metrics_endpoint.scheme, - "metrics_path": metrics_path, - "static_configs": [{"targets": [target]}], -+ "metric_relabel_configs": [ -+ { -+ "source_labels": ["__name__"], -+ "regex": "scrape_samples_.+", -+ "action": "drop", -+ } -+ ] - } - - return [config] -``` - -### scrape-config charm -In a typical scrape-config deployment such as: - -```{mermaid} -graph LR - some-external-target --- scrape-target --- scrape-config --- prometheus -``` - -We can specify the `drop` action via a config option for the [scrape-config charm](https://charmhub.io/prometheus-scrape-config-k8s): +## Filter processor -```shell -$ juju config sc metric_relabel_configs="$(cat < ResourceLogs -> ScopeLogs -> LogRecord @@ -89,6 +38,7 @@ This allows us to understand the structure of the signal's resources and attribu ### Understanding processors Before reaching an exporter, a signal is first processed by a processor and any modification to signals are propagated throughout the remainder of the pipeline. The [filter](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/processor/filterprocessor/README.md) processor supports the [OpenTelemetry Transformation Language (OTTL)](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/README.md). This allows us to define: + 1. A function that transforms (or drops) telemetry 2. Optionally, a condition that determines whether the function is executed. @@ -100,13 +50,15 @@ Incorrectly modifying or dropping telemetry can result in data loss! To gain insight on how effective the filter processor is, curl the metrics endpoint for the `otelcol_processor_filter_datapoints_filtered` metric with: ```shell -juju ssh --container otelcol opentelemetry-collector/0 "curl http://localhost:8888/metrics" | grep otelcol_processor_filter_datapoints_filtered` +juju ssh --container otelcol opentelemetry-collector/0 "curl http://localhost:8888/metrics" | grep otelcol_processor_filter_datapoints_filtered ``` ### Drop metrics + By default, otelcol self-scrapes its metrics and sends it into the configured pipeline, which is useful for operational diagnostics. In some use cases, this self-scraping telemetry is not desired and can be dropped. A metric signal flowing through the pipeline will look similar to: + ```shell ResourceMetrics #0 service.name=otelcol server.address=how-to_7b30903e_otelcol_otelcol/0 service.instance.id=299818a5-2dab-43e2-a6a5-015bab12cc75 server.port= url.scheme=http juju_application=otelcol juju_charm=opentelemetry-collector-k8s juju_model=how-to juju_model_uuid=7b30903e-8941-4a40-864c-0cbbf277c57f juju_unit=otelcol/0 service.version=0.130.1 ScopeMetrics #0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver@0.130.1 @@ -122,7 +74,9 @@ processors: metric_names: - "scrape_samples_.+" ``` + Alternatively, you can use an OTTL expression for the entire `otelcol` service: + ```yaml processors: filter/exclude: @@ -132,14 +86,17 @@ processors: ``` ### Drop logs + The log bodies may contain successful (`2xx`) status codes. In some use cases, this telemetry is not desired and can be dropped using the filter processor. A log signal flowing through the pipeline will look similar to: + ```shell ResourceLog #0 loki.format=raw ScopeLog #0 {"level":"WARNING", "host":"161.168.71.228", "user-identifier":"-", "datetime":"19/Aug/2025:15:33:08 +0000", "method": "PATCH", "request": "/portals/utilize", "protocol":"HTTP/1.1", "status":205, "bytes":9281, "referer": "http://www.leadportals.info/extensible/world-class/supply-chains", "message": "molestias et impedit ... fugiat error di"} job=juju_test-1-1_5599bed2_flog juju_application=flog juju_charm=flog-k8s juju_model=test-1-1 juju_model_uuid=5599bed2-5711-4573-8dbd-95f76fa60f3e juju_unit=flog/0 container=workload filename=/bin/fake.log loki.attribute.labels=container, job, filename, juju_application, juju_charm, juju_model, juju_model_uuid, juju_unit, snap_name, path ``` + **Note**: the log body is enclosed in curly braces. ```yaml @@ -153,9 +110,11 @@ processors: ``` ### Drop traces + When an application is scaled, we receive traces for multiple units. In some use cases, this telemetry is not desired and can be dropped using the filter processor. A trace signal flowing through the pipeline will look similar to: + ```shell ResourceTraces #0 juju_application=graf juju_charm=grafana-k8s juju_model=how-to juju_model_uuid=7b30903e-8941-4a40-864c-0cbbf277c57f juju_unit=graf/1 process.runtime.description=go version go1.19.13 linux/amd64 service.name=grafana service.version=3.5.5 telemetry.sdk.language=go telemetry.sdk.name=opentelemetry telemetry.sdk.version=1.14.0 ScopeTraces #0 component-main @@ -175,12 +134,6 @@ processors: ``` ## References + - Official docs: [collector configuration](https://opentelemetry.io/docs/collector/configuration/) - The [OTLP data model](https://betterstack.com/community/guides/observability/otlp/#the-otlp-data-model) -- Official docs: [``](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) -- [Dropping metrics at scrape time with Prometheus](https://www.robustperception.io/dropping-metrics-at-scrape-time-with-prometheus/) (robustperception, 2015) -- [How relabeling in Prometheus works](https://grafana.com/blog/2022/03/21/how-relabeling-in-prometheus-works/) (grafana.com, 2022) -- [How to drop and delete metrics in Prometheus](https://tanmay-bhat.github.io/posts/how-to-drop-and-delete-metrics-in-prometheus/) (gh:tanmay-bhat, 2022) -- Playgrounds: - - https://demo.promlens.com/ - - https://relabeler.promlabs.com/ \ No newline at end of file diff --git a/docs/how-to/selectively-drop-telemetry-scrape-config.md b/docs/how-to/selectively-drop-telemetry-scrape-config.md new file mode 100644 index 00000000..7bbda902 --- /dev/null +++ b/docs/how-to/selectively-drop-telemetry-scrape-config.md @@ -0,0 +1,63 @@ +# Selectively drop telemetry using scrape config + +Sometimes, from a resource perspective, applications are instrumented with more telemetry than we want to afford. In such cases, we can choose to selectively drop some before they are ingested. + +## Scrape config + +Metrics can be dropped by using the `drop` action in several different places: +- Under [``](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) section ([``](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs) subsection). For example: all the self-monitoring scrape jobs that e.g. COS Lite has in place. +- Under [``](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) section (`` subsection). For example: prometheus can be told to drop metrics before pushing them to another prometheus over remote-write API. This use case is not addressed in this guide. + +### MetricsEndpointProvider +Charms that integrate with prometheus or otelcol, provide a "scrape config" to `MetricsEndpointProvider` (imported from [`charms.prometheus_k8s.v0.prometheus_scrape`](https://charmhub.io/prometheus-k8s/libraries/prometheus_scrape)). + +Let's take for example the alertmanager self-metrics that prometheus scrapes. If we do not want prometheus or otelcol to ingest any `scrape_samples_*` metrics from alertmanager, then we need to adjust the scrape job specified in the alertmanager charm: + +```diff +diff --git a/src/charm.py b/src/charm.py +index fa3678c..f0e943b 100755 +--- a/src/charm.py ++++ b/src/charm.py +@@ -250,6 +250,13 @@ class AlertmanagerCharm(CharmBase): + "scheme": metrics_endpoint.scheme, + "metrics_path": metrics_path, + "static_configs": [{"targets": [target]}], ++ "metric_relabel_configs": [ ++ { ++ "source_labels": ["__name__"], ++ "regex": "scrape_samples_.+", ++ "action": "drop", ++ } ++ ] + } + + return [config] +``` + +### scrape-config charm +In a typical scrape-config deployment such as: + +```{mermaid} +graph LR + some-external-target --- scrape-target --- scrape-config --- prometheus +``` + +We can specify the `drop` action via a config option for the [scrape-config charm](https://charmhub.io/prometheus-scrape-config-k8s): + +```shell +$ juju config sc metric_relabel_configs="$(cat < Troubleshoot firing alert rules Troubleshoot grafana admin password + Troubleshoot OpenTelemetry Collector diff --git a/docs/how-to/troubleshooting/troubleshoot-otelcol.md b/docs/how-to/troubleshooting/troubleshoot-otelcol.md new file mode 100644 index 00000000..c8c1eb45 --- /dev/null +++ b/docs/how-to/troubleshooting/troubleshoot-otelcol.md @@ -0,0 +1,29 @@ +# Troubleshoot OpenTelemetry Collector + +## High resource usage + +### Attempting to scrape too many logs? + +Inspect the list of files opened by otelcol and their size. + +```bash +juju ssh ubuntu/0 "sudo lsof -nP -p $(pgrep otelcol)" +``` + +You should see entries such as: + +``` +COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME +otelcol 45246 root 46r REG 8,1 11980753 3206003 /var/log/syslog +otelcol 45246 root 12r REG 8,1 292292 3205748 /var/log/lastlog +otelcol 45246 root 30r REG 8,1 157412 3161673 /var/log/auth.log +otelcol 45246 root 16r REG 8,1 96678 3195546 /var/log/juju/machine-lock.log +otelcol 45246 root 45r REG 8,1 77200 3205894 /var/log/cloud-init.log +otelcol 45246 root 35r REG 8,1 61211 3205745 /var/log/dpkg.log +otelcol 45246 root 25r REG 8,1 29037 3205893 /var/log/cloud-init-output.log +otelcol 45246 root 18r REG 8,1 6121 3205741 /var/log/apt/history.log +otelcol 45246 root 15r REG 8,1 1941 3206035 /var/log/unattended-upgrades/unattended-upgrades.log +otelcol 45246 root 17r REG 8,1 474 3183206 /var/log/alternatives.log +``` + +Compare the total size of logs to the available memory. diff --git a/docs/tutorial/redact-sensitive-data.md b/docs/tutorial/redact-sensitive-data.md index 002d72c4..8194216f 100644 --- a/docs/tutorial/redact-sensitive-data.md +++ b/docs/tutorial/redact-sensitive-data.md @@ -3,8 +3,7 @@ ## Introduction For applications that deal with sensitive, private data, the ability to mask certain information in their telemetry is important, in some cases required. -As OpentelemetryCollector charm serves as the entry point for telemetry data, it can mask logs, traces and metrics before they are sent to Loki, Tempo, Mimir or another destinations. - +As OpenTelemetry Collector charm serves as the entry point for telemetry data, it can mask logs, traces and metrics before they are sent to Loki, Tempo, Mimir or another destinations. ## Deployment scenario @@ -114,4 +113,4 @@ Once this config is applied, we can verify in Grafana, that our concrete log: has been redacted: -![image](assets/grafaana-redacted-log.png) +![Grafana screenshot showing the log entry with the referer field redacted](assets/grafaana-redacted-log.png) From 1d27584288fefc90ba60678942954207e361fa1e Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Tue, 10 Mar 2026 13:20:26 -0400 Subject: [PATCH 21/30] feat: Tests for product module `channel` input (#202) --- .github/workflows/terraform.yml | 12 ++++++ justfile | 15 ++++++- terraform/cos-lite/README.md | 2 +- .../tests/channel_validation.tftest.hcl | 38 +++++++++++++++++ terraform/cos-lite/variables.tf | 9 +++- terraform/cos/README.md | 4 +- .../cos/tests/channel_validation.tftest.hcl | 41 +++++++++++++++++++ terraform/cos/variables.tf | 9 +++- .../test_upgrade_cos_lite_tls_external.py | 2 +- .../test_upgrade_cos_lite_tls_full.py | 2 +- .../test_upgrade_cos_lite_tls_internal.py | 2 +- .../test_upgrade_cos_lite_tls_none.py | 2 +- 12 files changed, 128 insertions(+), 10 deletions(-) create mode 100644 terraform/cos-lite/tests/channel_validation.tftest.hcl create mode 100644 terraform/cos/tests/channel_validation.tftest.hcl diff --git a/.github/workflows/terraform.yml b/.github/workflows/terraform.yml index 52ba8f66..73ae12c8 100644 --- a/.github/workflows/terraform.yml +++ b/.github/workflows/terraform.yml @@ -44,6 +44,18 @@ jobs: sudo snap install just --classic - name: Validate the Terraform modules run: just validate-terraform + test-unit: + name: Terraform unit tests + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Install dependencies + run: | + sudo snap install terraform --classic + sudo snap install just --classic + - name: Unit test the Terraform modules + run: just unit test-integration-cos-lite: name: COS Lite Terraform integration uses: canonical/observability-stack/.github/workflows/_integration.yml@main diff --git a/justfile b/justfile index 066c6794..463de799 100644 --- a/justfile +++ b/justfile @@ -20,6 +20,10 @@ lint: lint-workflows lint-terraform lint-terraform-docs [group("Format")] fmt: format-terraform format-terraform-docs +# Run unit tests +[group("Unit")] +unit: (unit-test "cos") (unit-test "cos-lite") + # Lint the Github workflows [group("Lint")] lint-workflows: @@ -35,7 +39,7 @@ lint-terraform: # Lint the Terraform documentation [group("Lint")] lint-terraform-docs: - terraform-docs --config .tfdocs-config.yml . + terraform-docs --config .tfdocs-config.yml --output-check . # Format the Terraform modules [group("Format")] @@ -50,12 +54,21 @@ format-terraform-docs: terraform-docs --config .tfdocs-config.yml . # Validate the Terraform modules +[group("Static")] [working-directory("./terraform")] validate-terraform: if [ -z "${terraform}" ]; then echo "ERROR: please install terraform or opentofu"; exit 1; fi set -e; for repo in */; do (cd "$repo" && echo "Processing ${repo%/}..." && $terraform init -upgrade && $terraform validate) || exit 1; done +# Run a unit test +[group("Unit")] +[working-directory("./terraform")] +unit-test module: + if [ -z "${terraform}" ]; then echo "ERROR: please install terraform or opentofu"; exit 1; fi + $terraform -chdir={{module}} init -upgrade && $terraform -chdir={{module}} test + # Run integration tests +[group("Integration")] [working-directory("./tests/integration")] integration *args='': uv run ${uv_flags} pytest -vv --capture=no --exitfirst "${args}" diff --git a/terraform/cos-lite/README.md b/terraform/cos-lite/README.md index 3e3a8a69..b9ac46c7 100644 --- a/terraform/cos-lite/README.md +++ b/terraform/cos-lite/README.md @@ -27,7 +27,7 @@ This is a Terraform module facilitating the deployment of the COS Lite solution, |------|-------------|------|---------|:--------:| | [alertmanager](#input\_alertmanager) | Application configuration for Alertmanager. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
app_name = optional(string, "alertmanager")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | | [catalogue](#input\_catalogue) | Application configuration for Catalogue. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
app_name = optional(string, "catalogue")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | -| [channel](#input\_channel) | Channel that the applications are (unless overwritten by external\_channels) deployed from | `string` | n/a | yes | +| [channel](#input\_channel) | Channel that the applications are (unless overwritten by individual channels) deployed from | `string` | `"dev/edge"` | no | | [external\_ca\_cert\_offer\_url](#input\_external\_ca\_cert\_offer\_url) | A Juju offer URL (e.g. admin/external-ca.send-ca-cert) of a CA providing the 'certificate\_transfer' integration for applications to trust ingress via Traefik. | `string` | `null` | no | | [external\_certificates\_offer\_url](#input\_external\_certificates\_offer\_url) | A Juju offer URL (e.g. admin/external-ca.certificates) of a CA providing the 'tls\_certificates' integration for Traefik to supply it with server certificates. | `string` | `null` | no | | [grafana](#input\_grafana) | Application configuration for Grafana. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
app_name = optional(string, "grafana")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | diff --git a/terraform/cos-lite/tests/channel_validation.tftest.hcl b/terraform/cos-lite/tests/channel_validation.tftest.hcl new file mode 100644 index 00000000..5f38c6a9 --- /dev/null +++ b/terraform/cos-lite/tests/channel_validation.tftest.hcl @@ -0,0 +1,38 @@ +mock_provider "juju" {} + +variables { + model_uuid = "00000000-0000-0000-0000-000000000000" +} + +# ---Happy path--- + +run "valid_channel_stable" { + command = plan + variables { channel = "dev/stable" } +} + +run "valid_channel_candidate" { + command = plan + variables { channel = "dev/candidate" } +} + +run "valid_channel_beta" { + command = plan + variables { channel = "dev/beta" } +} + +run "valid_channel_edge" { + command = plan + variables { channel = "dev/edge" } +} + +# ---Failure path--- +# NOTE: Invalid risks (e.g. "dev/risk") are validated by the Juju provider at the +# resource level inside child modules. Terraform test's expect_failures cannot +# reference resources inside child modules, so we cannot assert on that here. + +run "invalid_channel_track_2" { + command = plan + variables { channel = "2/stable" } + expect_failures = [var.channel] +} diff --git a/terraform/cos-lite/variables.tf b/terraform/cos-lite/variables.tf index b3666a47..8e6915b5 100644 --- a/terraform/cos-lite/variables.tf +++ b/terraform/cos-lite/variables.tf @@ -18,8 +18,15 @@ variable "base" { } variable "channel" { - description = "Channel that the applications are (unless overwritten by external_channels) deployed from" + description = "Channel that the applications are (unless overwritten by individual channels) deployed from" type = string + default = "dev/edge" + + validation { + # the TF Juju provider correctly identifies invalid risks; no need to validate it + condition = startswith(var.channel, "dev/") + error_message = "The track of the channel must be 'dev/'. e.g. 'dev/edge'." + } } variable "model_uuid" { diff --git a/terraform/cos/README.md b/terraform/cos/README.md index d8033329..bae37f57 100644 --- a/terraform/cos/README.md +++ b/terraform/cos/README.md @@ -33,7 +33,7 @@ This is a Terraform module facilitating the deployment of the COS solution, usin | [alertmanager](#input\_alertmanager) | Application configuration for Alertmanager. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
app_name = optional(string, "alertmanager")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | | [anti\_affinity](#input\_anti\_affinity) | Enable anti-affinity constraints across all HA modules (Mimir, Loki, Tempo) | `bool` | `true` | no | | [catalogue](#input\_catalogue) | Application configuration for Catalogue. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
app_name = optional(string, "catalogue")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | -| [channel](#input\_channel) | Channel that the applications are (unless overwritten by external\_channels) deployed from | `string` | n/a | yes | +| [channel](#input\_channel) | Channel that the applications are (unless overwritten by individual channels) deployed from | `string` | `"dev/edge"` | no | | [cloud](#input\_cloud) | Kubernetes cloud or environment where this COS module will be deployed (e.g self-managed, aws) | `string` | `"self-managed"` | no | | [external\_ca\_cert\_offer\_url](#input\_external\_ca\_cert\_offer\_url) | A Juju offer URL (e.g. admin/external-ca.send-ca-cert) of a CA providing the 'certificate\_transfer' integration for applications to trust ingress via Traefik. | `string` | `null` | no | | [external\_certificates\_offer\_url](#input\_external\_certificates\_offer\_url) | A Juju offer URL of a CA providing the 'tls\_certificates' integration for Traefik to supply it with server certificates | `string` | `null` | no | @@ -54,7 +54,7 @@ This is a Terraform module facilitating the deployment of the COS solution, usin | [ssc](#input\_ssc) | Application configuration for Self-signed-certificates. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
app_name = optional(string, "ca")
channel = optional(string, "1/stable")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | | [tempo\_bucket](#input\_tempo\_bucket) | Tempo bucket name | `string` | `"tempo"` | no | | [tempo\_coordinator](#input\_tempo\_coordinator) | Application configuration for Tempo Coordinator. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 3)
})
| `{}` | no | -| [tempo\_worker](#input\_tempo\_worker) | Application configuration for all Tempo workers. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
querier_config = optional(map(string), {})
query_frontend_config = optional(map(string), {})
ingester_config = optional(map(string), {})
distributor_config = optional(map(string), {})
compactor_config = optional(map(string), {})
metrics_generator_config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
compactor_units = optional(number, 3)
distributor_units = optional(number, 3)
ingester_units = optional(number, 3)
metrics_generator_units = optional(number, 3)
querier_units = optional(number, 3)
query_frontend_units = optional(number, 3)
})
| `{}` | no | +| [tempo\_worker](#input\_tempo\_worker) | Application configuration for all Tempo workers. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
querier_config = optional(map(string), {})
query_frontend_config = optional(map(string), {})
ingester_config = optional(map(string), {})
distributor_config = optional(map(string), {})
compactor_config = optional(map(string), {})
metrics_generator_config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
compactor_worker_storage_directives = optional(map(string), {})
distributor_worker_storage_directives = optional(map(string), {})
ingester_worker_storage_directives = optional(map(string), {})
metrics_generator_worker_storage_directives = optional(map(string), {})
querier_worker_storage_directives = optional(map(string), {})
query_frontend_worker_storage_directives = optional(map(string), {})
compactor_units = optional(number, 3)
distributor_units = optional(number, 3)
ingester_units = optional(number, 3)
metrics_generator_units = optional(number, 3)
querier_units = optional(number, 3)
query_frontend_units = optional(number, 3)
})
| `{}` | no | | [traefik](#input\_traefik) | Application configuration for Traefik. For more details: https://registry.terraform.io/providers/juju/juju/latest/docs/resources/application |
object({
app_name = optional(string, "traefik")
channel = optional(string, "latest/stable")
config = optional(map(string), {})
constraints = optional(string, "arch=amd64")
revision = optional(number, null)
storage_directives = optional(map(string), {})
units = optional(number, 1)
})
| `{}` | no | ## Outputs diff --git a/terraform/cos/tests/channel_validation.tftest.hcl b/terraform/cos/tests/channel_validation.tftest.hcl new file mode 100644 index 00000000..c2fdf469 --- /dev/null +++ b/terraform/cos/tests/channel_validation.tftest.hcl @@ -0,0 +1,41 @@ +mock_provider "juju" {} + +variables { + model_uuid = "00000000-0000-0000-0000-000000000000" + s3_endpoint = "foo" + s3_access_key = "foo" + s3_secret_key = "foo" +} + +# ---Happy path--- + +run "valid_channel_stable" { + command = plan + variables { channel = "dev/stable" } +} + +run "valid_channel_candidate" { + command = plan + variables { channel = "dev/candidate" } +} + +run "valid_channel_beta" { + command = plan + variables { channel = "dev/beta" } +} + +run "valid_channel_edge" { + command = plan + variables { channel = "dev/edge" } +} + +# ---Failure path--- +# NOTE: Invalid risks (e.g. "dev/risk") are validated by the Juju provider at the +# resource level inside child modules. Terraform test's expect_failures cannot +# reference resources inside child modules, so we cannot assert on that here. + +run "invalid_channel_track_2" { + command = plan + variables { channel = "2/stable" } + expect_failures = [var.channel] +} diff --git a/terraform/cos/variables.tf b/terraform/cos/variables.tf index 46150c31..7e770558 100644 --- a/terraform/cos/variables.tf +++ b/terraform/cos/variables.tf @@ -11,8 +11,15 @@ locals { } variable "channel" { - description = "Channel that the applications are (unless overwritten by external_channels) deployed from" + description = "Channel that the applications are (unless overwritten by individual channels) deployed from" type = string + default = "dev/edge" + + validation { + # the TF Juju provider correctly identifies invalid risks; no need to validate it + condition = startswith(var.channel, "dev/") + error_message = "The track of the channel must be 'dev/'. e.g. 'dev/edge'." + } } variable "model_uuid" { diff --git a/tests/integration/cos_lite/tls_external/test_upgrade_cos_lite_tls_external.py b/tests/integration/cos_lite/tls_external/test_upgrade_cos_lite_tls_external.py index 9e028b0e..62a7555b 100644 --- a/tests/integration/cos_lite/tls_external/test_upgrade_cos_lite_tls_external.py +++ b/tests/integration/cos_lite/tls_external/test_upgrade_cos_lite_tls_external.py @@ -25,7 +25,7 @@ def test_deploy_from_track( # GIVEN a module deployed from track n-1 tf_manager.init(TRACK_2_TF_FILE) tf_manager.apply(ca_model=ca_model.model, cos_model=cos_model.model) - wait_for_active_idle_without_error([ca_model, cos_model]) + wait_for_active_idle_without_error([ca_model, cos_model], timeout=60*60) tls_ctx = get_tls_context(tmp_path, ca_model, "self-signed-certificates") catalogue_apps_are_reachable(cos_model, tls_ctx) diff --git a/tests/integration/cos_lite/tls_full/test_upgrade_cos_lite_tls_full.py b/tests/integration/cos_lite/tls_full/test_upgrade_cos_lite_tls_full.py index 340d81b5..d73b8bc2 100644 --- a/tests/integration/cos_lite/tls_full/test_upgrade_cos_lite_tls_full.py +++ b/tests/integration/cos_lite/tls_full/test_upgrade_cos_lite_tls_full.py @@ -25,7 +25,7 @@ def test_deploy_from_track( # GIVEN a module deployed from track n-1 tf_manager.init(TRACK_2_TF_FILE) tf_manager.apply(ca_model=ca_model.model, cos_model=cos_model.model) - wait_for_active_idle_without_error([ca_model, cos_model]) + wait_for_active_idle_without_error([ca_model, cos_model], timeout=60*60) tls_ctx = get_tls_context(tmp_path, ca_model, "self-signed-certificates") catalogue_apps_are_reachable(cos_model, tls_ctx) diff --git a/tests/integration/cos_lite/tls_internal/test_upgrade_cos_lite_tls_internal.py b/tests/integration/cos_lite/tls_internal/test_upgrade_cos_lite_tls_internal.py index 76a132ec..125a8019 100644 --- a/tests/integration/cos_lite/tls_internal/test_upgrade_cos_lite_tls_internal.py +++ b/tests/integration/cos_lite/tls_internal/test_upgrade_cos_lite_tls_internal.py @@ -22,7 +22,7 @@ def test_deploy_from_track(tf_manager, cos_model: jubilant.Juju): # GIVEN a module deployed from track n-1 tf_manager.init(TRACK_2_TF_FILE) tf_manager.apply(model=cos_model.model) - wait_for_active_idle_without_error([cos_model]) + wait_for_active_idle_without_error([cos_model], timeout=60*60) catalogue_apps_are_reachable(cos_model) diff --git a/tests/integration/cos_lite/tls_none/test_upgrade_cos_lite_tls_none.py b/tests/integration/cos_lite/tls_none/test_upgrade_cos_lite_tls_none.py index 1a8352c5..3150c1d3 100644 --- a/tests/integration/cos_lite/tls_none/test_upgrade_cos_lite_tls_none.py +++ b/tests/integration/cos_lite/tls_none/test_upgrade_cos_lite_tls_none.py @@ -22,7 +22,7 @@ def test_deploy_from_track(tf_manager, cos_model: jubilant.Juju): # GIVEN a module deployed from track n-1 tf_manager.init(TRACK_2_TF_FILE) tf_manager.apply(model=cos_model.model) - wait_for_active_idle_without_error([cos_model]) + wait_for_active_idle_without_error([cos_model], timeout=60*60) catalogue_apps_are_reachable(cos_model) From dba3ceff23652e3cb04e58d7179cf47fc95e77b2 Mon Sep 17 00:00:00 2001 From: Sina P <55766091+sinapah@users.noreply.github.com> Date: Wed, 11 Mar 2026 09:34:02 -0400 Subject: [PATCH 22/30] docs: point to correct file for cos-lite variables.tf (#205) --- docs/tutorial/installation/cos-lite-microk8s-sandbox.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial/installation/cos-lite-microk8s-sandbox.md b/docs/tutorial/installation/cos-lite-microk8s-sandbox.md index 51952855..b18bd1fa 100644 --- a/docs/tutorial/installation/cos-lite-microk8s-sandbox.md +++ b/docs/tutorial/installation/cos-lite-microk8s-sandbox.md @@ -184,7 +184,7 @@ Create a `cos-lite-microk8s-sandbox.tf` file with the following Terraform module -**Note**: You can customize further the revisions of each charm and other aspects of COS Lite: have a look at the [`variables.tf`](../../../terraform/cos/variables.tf) file of the COS Lite Terraform module for the complete documentation. +**Note**: You can customize further the revisions of each charm and other aspects of COS Lite: have a look at the [`variables.tf`](../../../terraform/cos-lite/variables.tf) file of the COS Lite Terraform module for the complete documentation. From 51dd4b7e0b1cac9cb80fc1415f12bfc24b91e9cd Mon Sep 17 00:00:00 2001 From: Sina P <55766091+sinapah@users.noreply.github.com> Date: Wed, 11 Mar 2026 15:42:10 -0400 Subject: [PATCH 23/30] docs: deprecation notice in tutorial for migrating from GA to otelcol (#209) --- docs/how-to/migrate-gagent-to-otelcol.md | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/how-to/migrate-gagent-to-otelcol.md b/docs/how-to/migrate-gagent-to-otelcol.md index feee7eba..65d9dd18 100644 --- a/docs/how-to/migrate-gagent-to-otelcol.md +++ b/docs/how-to/migrate-gagent-to-otelcol.md @@ -1,14 +1,18 @@ # Migrate from Grafana Agent to OpenTelemetry Collector +> Grafana Agent has reached End-of-Life (EOL) on November 1, 2025. +Grafana Agent is no longer receiving support, security, or bug fixes from the vendor. Since it is part of COS, the charmed operators for Grafana Agent will continue to receive bug fixes until July 2026. You should plan to migrate from charmed Grafana Agent to charmed Opentelemetry Collector before that date. These are the steps to follow: -1. deploy the collector next to the agent charm -2. look at the relations for grafana-agent, and replicate them for the collector - - note that some relation endpoints have slightly different names, for clarity: - - `logging-consumer` is now `send-loki-logs` - - `grafana-cloud-config` is now `cloud-config` -3. verify that data is appearing in the backends (Mimir, Prometheus, Loki, etc.) -4. remove grafana-agent from your deployment +1. Ensure you are using Juju 3.6. +1. Deploy the collector next to the agent charm +1. Look at the relations for grafana-agent, and replicate them for the collector + - Note that some relation endpoints have slightly different names, for clarity: + - `logging-consumer` is now `send-loki-logs` + - `grafana-cloud-config` is now `cloud-config` +1. Verify that data is appearing in the backends (Mimir, Prometheus, Loki, etc.) +1. Remove grafana-agent from your deployment + ## Known Issues From db994cea09bb50c83ce26d97521b25d277cb0e28 Mon Sep 17 00:00:00 2001 From: Michael Thamm Date: Mon, 16 Mar 2026 16:33:26 -0400 Subject: [PATCH 24/30] fix: Tiering HowTo doc with OTLP endpoints (#211) --- docs/how-to/tiered-otelcols.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/how-to/tiered-otelcols.md b/docs/how-to/tiered-otelcols.md index 03ddcfc7..ebe4f66d 100644 --- a/docs/how-to/tiered-otelcols.md +++ b/docs/how-to/tiered-otelcols.md @@ -16,8 +16,8 @@ flowchart TB flog[flog] --> fan-out fan-out["opentelemetry-collector
(redact & batch)"] -fan-out --> warn -fan-out --> info +fan-out --send-otlp--> warn +fan-out --send-otlp--> info warn["opentelemetry-collector
(cold filter)"] --> loki-cold info["opentelemetry-collector
(hot filter)"] --> loki-hot loki-hot["loki
(hot storage)"] @@ -55,10 +55,10 @@ Another imaginable scenario is classifying log streams prior to ingestion into a flowchart TB flog-dev["flog
(dev)"] --> dev -dev["opentelemetry-collector
(dev attributes)"] --> fan-in +dev["opentelemetry-collector
(dev attributes)"] --send-otlp--> fan-in flog-prod["flog
(prod)"] --> prod -prod["opentelemetry-collector
(prod attributes)"] --> fan-in +prod["opentelemetry-collector
(prod attributes)"] --send-otlp--> fan-in fan-in["opentelemetry-collector
(redact & batch)"] --> loki[loki] From c7cfe085886a3a003368ecf0e4caaf41a49edae6 Mon Sep 17 00:00:00 2001 From: Yanisa Haley Scherber Date: Wed, 18 Mar 2026 14:07:04 -0500 Subject: [PATCH 25/30] add cookie banner and Google Analytics tags (#213) --- docs/.sphinx/_static/bundle.js | 1 + docs/.sphinx/_static/cookie-banner.css | 3672 ++++++++++++++++++++++++ docs/.sphinx/_templates/footer.html | 92 + docs/.sphinx/_templates/header.html | 72 + docs/conf.py | 16 +- 5 files changed, 3849 insertions(+), 4 deletions(-) create mode 100644 docs/.sphinx/_static/bundle.js create mode 100644 docs/.sphinx/_static/cookie-banner.css create mode 100644 docs/.sphinx/_templates/footer.html create mode 100644 docs/.sphinx/_templates/header.html diff --git a/docs/.sphinx/_static/bundle.js b/docs/.sphinx/_static/bundle.js new file mode 100644 index 00000000..f722f50a --- /dev/null +++ b/docs/.sphinx/_static/bundle.js @@ -0,0 +1 @@ +(()=>{"use strict";function e(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}function t(e,t){for(var n=0;ncookie policy.',buttonManage:"Manage your tracker settings",buttonAcceptAll:"Accept all"},manager:{title:"Tracking choices",body1:"We use cookies to recognize visitors and remember your preferences. They enhance user experience, personalize content and ads, provide social media features, measure campaign effectiveness, and analyze traffic on our websites.",body2:'Select the types of trackers you consent to, both by us, and third parties. Learn more at data privacy: cookie policy - you can change your choices at any time from the footer of the site.',acceptAll:"Accept all",SavePreferences:"Save preferences"}},zh:{notification:{title:"您的追踪器设置",body1:'我们使用 cookie 和相似的方法来识别访问者和记住访问者的偏好设置,并用来衡量活动的效果和分析 Canonical 旗下所有网站的流量。选择”接受“,您同意我们和受信的第三方来使用这些资料。更多细节或者随时变更您的选择,请阅读我们的 cookie 策略。',buttonManage:"管理您的追踪器设置",buttonAcceptAll:"接受"},manager:{title:"追踪选项",body1:"我们使用cookie来识别访问者和记住您的偏好设置 它们增强用户体验,使内容和广告个性化,提供社交媒体功能,衡量活动效果和网站流量分析。",body2:'选择您同意授予我们和受信的第三方的追踪类型。点击数据隐私:cookie策略了解更多,您可以在网站底部随时更改您的选择。',acceptAll:"接受全部",SavePreferences:"保存偏好设置"}},ja:{notification:{title:"トラッキング機能の設定",body1:'当社は、訪問者を識別し、設定を記憶するためにクッキーおよび類似の手法を使用しています。また、キャンペーンの効果測定や当社ウェブサイト上のトラフィックの分析にもクッキーを利用します。',body2:"「同意」を選択すると、当社および信頼できる第三者によるこれらの手法の利用に同意したものとみなされます。詳細や同意設定の変更は、いつでも当社のクッキーポリシーでご確認いただけます。",buttonManage:"トラッキング機能の設定の管理",buttonAcceptAll:"同意する"},manager:{title:"トラッキング機能の選択",body1:"当社は、訪問者を識別し、設定を記憶するためにクッキーおよび類似の手法を使用しています。これらの手法は、ユーザー体験の向上、パーソナライズされたコンテンツや広告の表示、ソーシャルメディア機能の提供、キャンペーン効果の測定、そして当社ウェブサイト上のトラフィック分析にも役立ちます。",body2:'当社および信頼できる第三者による、どの種類のトラッキング機能を利用することに同意されるかを選択してください。詳細は データプライバシー:クッキーポリシー をご覧ください。設定は、当社ウェブサイトのフッターからいつでも変更できます。',acceptAll:"すべて同意",SavePreferences:"設定を保存"}}},d={ad_storage:"denied",ad_user_data:"denied",ad_personalization:"denied",analytics_storage:"denied",functionality_storage:"denied",personalization_storage:"denied",security_storage:"denied"},u=["security_storage"],p=["ad_storage","ad_user_data","ad_personalization","analytics_storage"],h=["functionality_storage","personalization_storage"],f=["ad_storage","ad_user_data","ad_personalization","analytics_storage","functionality_storage","personalization_storage"],y=function(e){var t=new Date;t.setTime(t.getTime()+31536e6);var n="expires="+t.toUTCString();document.cookie="_cookies_accepted="+e+"; "+n+"; samesite=lax;path=/;",S(e)&&w()},g=function(e){for(var t=document.cookie.split(";"),n="",o="",i=0;i\n