From bae5f887807c5c1972ed108a47d22231d2b6c8c7 Mon Sep 17 00:00:00 2001 From: Suresh Mandalapu Date: Thu, 23 Apr 2026 21:24:26 +0100 Subject: [PATCH 1/5] azure.compute.disk.unattached --- .../azure/rules/unattached_managed_disks.py | 541 +++++++++-- docs/rules/azure.md | 8 +- docs/specs/azure/unattached_managed_disks.md | 428 +++++++++ .../azure/test_azure_sql_database_idle.py | 22 +- .../test_azure_unattached_managed_disks.py | 838 +++++++++++++++++- 5 files changed, 1732 insertions(+), 105 deletions(-) create mode 100644 docs/specs/azure/unattached_managed_disks.md diff --git a/cleancloud/providers/azure/rules/unattached_managed_disks.py b/cleancloud/providers/azure/rules/unattached_managed_disks.py index 9308572..7d9b2d7 100644 --- a/cleancloud/providers/azure/rules/unattached_managed_disks.py +++ b/cleancloud/providers/azure/rules/unattached_managed_disks.py @@ -1,5 +1,45 @@ +""" +Rule: azure.compute.disk.unattached + +Intent: + Detect Azure managed disks that are truly unattached and have remained + unattached long enough to be conservative cleanup review candidates. + + This is a deliberately low-noise review-candidate rule only. It does not + prove that a disk is safe to delete, that no cluster / migration / DR + workflow exists, or that a specific monthly saving will be realized. + +Exclusions: + - id absent or empty + - name absent or empty + - outside optional region filter (exact lowercase match) + - provisioning state does not resolve to exactly "Succeeded" + - disk state does not resolve to exactly "Unattached" + - any attachment surface present, unknown, or conflicting + - shared-disk exclusion triggered (max_shares > 1 or unresolvable) + - frequent-attach exclusion triggered or unresolvable + - unattached age unknown, invalid, in the future, or less than 7 days + +Detection: + - provisioning state is "Succeeded" + - disk state is "Unattached" + - managed_by confirmed absent + - managed_by_extended confirmed absent / empty + - max_shares is known and not greater than 1 + - optimized_for_frequent_attach is False + - unattached age >= 7 days (last_ownership_update_time primary; time_created fallback) + +Cost model (spec 10): + estimated_monthly_cost_usd = None (always) + Azure managed disk pricing varies by disk type, size tier, redundancy, and + for some SKUs performance configuration and shared-disk behavior. + +APIs: + - Microsoft.Compute/disks/read (compute_client.disks.list) +""" + from datetime import datetime, timezone -from typing import List +from typing import List, Optional, Tuple from azure.mgmt.compute import ComputeManagementClient @@ -8,99 +48,484 @@ from cleancloud.core.finding import Finding from cleancloud.core.risk import RiskLevel -MIN_AGE_DAYS_HIGH = 14 -MIN_AGE_DAYS_MEDIUM = 7 +_RULE_ID = "azure.compute.disk.unattached" +_RESOURCE_TYPE = "azure.compute.disk" +_MIN_UNATTACHED_DAYS = 7 +# Sentinel: field cannot be determined reliably (absent from payload OR conflicting +# across control-plane surfaces). Callers must skip when they receive this. +_UNRESOLVABLE = object() -def _age_in_days(created_at: datetime) -> int: - now = datetime.now(timezone.utc) - return (now - created_at).days + +def _norm_location(s: str) -> str: + """Lowercase only -- exact lowercase match per spec 7.""" + return s.lower() if s else "" + + +# --------------------------------------------------------------------------- +# SDK-first / nested-fallback resolvers with conflict detection (spec 9.1-9.6) +# --------------------------------------------------------------------------- + + +def _resolve_provisioning_state(disk) -> Optional[str]: + """ + Resolve provisioning state per spec 9.1. + Reads both SDK projection and nested fallback. Returns None (skip) on + conflict (both non-None but different values) or when both absent. + + Only "Succeeded" is eligible for evaluation; caller skips on anything else. + """ + sdk_val = getattr(disk, "provisioning_state", None) + + props = getattr(disk, "properties", None) + nested_val = None + if props is not None: + nested_val = getattr(props, "provisioning_state", None) + if nested_val is None: + nested_val = getattr(props, "provisioningState", None) + + # Conflict: both surfaces have a value and they disagree -> unknown -> skip + if sdk_val is not None and nested_val is not None and sdk_val != nested_val: + return None + + return sdk_val or nested_val + + +def _resolve_disk_state(disk) -> Optional[str]: + """ + Resolve disk state per spec 9.2. + Reads both SDK projection and nested fallback. Returns None (skip) on + conflict (both non-None but different values) or when both absent. + + Only "Unattached" is eligible for emission; caller skips on anything else. + """ + sdk_val = getattr(disk, "disk_state", None) + + props = getattr(disk, "properties", None) + nested_val = None + if props is not None: + nested_val = getattr(props, "disk_state", None) + if nested_val is None: + nested_val = getattr(props, "diskState", None) + + # Conflict: both surfaces have a value and they disagree -> unknown -> skip + if sdk_val is not None and nested_val is not None and sdk_val != nested_val: + return None + + return sdk_val or nested_val + + +def _resolve_managed_by(disk): + """ + Resolve managed_by per spec 9.3. Uses getattr with _UNRESOLVABLE sentinel + to distinguish "field present with None value" (confirmed absent = not + attached) from "field missing entirely" (unknown). + + Returns: + - None: confirmed absent (disk not attached via managed_by) + - str: confirmed attached (non-empty VM resource ID) + - _UNRESOLVABLE: field not found in any source, or SDK and nested conflict + + Callers must skip when they receive _UNRESOLVABLE. + """ + sdk_val = getattr(disk, "managed_by", _UNRESOLVABLE) + + props = getattr(disk, "properties", None) + nested_val = _UNRESOLVABLE + if props is not None: + nested_val = getattr(props, "managed_by", _UNRESOLVABLE) + if nested_val is _UNRESOLVABLE: + nested_val = getattr(props, "managedBy", _UNRESOLVABLE) + + sdk_found = sdk_val is not _UNRESOLVABLE + nested_found = nested_val is not _UNRESOLVABLE + + if sdk_found and nested_found: + # Normalize empty string to None for comparison + sdk_eff = sdk_val or None + nested_eff = nested_val or None + if sdk_eff != nested_eff: + return _UNRESOLVABLE # surfaces disagree -> cannot resolve reliably + return sdk_eff + + if sdk_found: + return sdk_val or None + + if nested_found: + return nested_val or None + + return _UNRESOLVABLE # field absent from all sources -> unknown + + +def _resolve_managed_by_extended(disk): + """ + Resolve managed_by_extended per spec 9.3. Uses _UNRESOLVABLE sentinel to + distinguish confirmed-empty from unresolvable. + + Returns: + - [] (empty list): confirmed absent or empty (no shared-attachment) + - list (non-empty): confirmed shared-attached + - _UNRESOLVABLE: field missing from all sources, uncoercible, or conflicting + + Callers must skip when they receive _UNRESOLVABLE. + """ + + def _to_list(raw): + """Coerce raw field value to a list. Returns _UNRESOLVABLE if not iterable.""" + if raw is None: + return [] + if raw is _UNRESOLVABLE: + return _UNRESOLVABLE + try: + return list(raw) + except TypeError: + return _UNRESOLVABLE + + sdk_raw = getattr(disk, "managed_by_extended", _UNRESOLVABLE) + + props = getattr(disk, "properties", None) + nested_raw = _UNRESOLVABLE + if props is not None: + nested_raw = getattr(props, "managed_by_extended", _UNRESOLVABLE) + if nested_raw is _UNRESOLVABLE: + nested_raw = getattr(props, "managedByExtended", _UNRESOLVABLE) + + sdk_found = sdk_raw is not _UNRESOLVABLE + nested_found = nested_raw is not _UNRESOLVABLE + + if not sdk_found and not nested_found: + return _UNRESOLVABLE # field absent from all sources + + sdk_list = _to_list(sdk_raw) if sdk_found else _UNRESOLVABLE + nested_list = _to_list(nested_raw) if nested_found else _UNRESOLVABLE + + # If any *present* source is uncoercible, state is unreliable -> skip. + # sdk_list/_nested_list is _UNRESOLVABLE either because the source was absent + # (not found) or because coercion failed; only the latter warrants a skip. + if (sdk_found and sdk_list is _UNRESOLVABLE) or (nested_found and nested_list is _UNRESOLVABLE): + return _UNRESOLVABLE + + if sdk_found and nested_found: + # Conflict: one says empty, other says non-empty + if bool(sdk_list) != bool(nested_list): + return _UNRESOLVABLE + return sdk_list # both agree; SDK is authoritative + + if sdk_found: + return sdk_list + + return nested_list + + +def _resolve_max_shares(disk) -> Optional[int]: + """ + Resolve max_shares per spec 9.4. + Reads both SDK and nested sources. Returns None (unknown -> caller skips) when + the field is absent from all sources, when coercion fails on a present value + (malformed -> unresolvable), or when the two surfaces disagree. + Unknown must not be treated as equivalent to max_shares == 1. + """ + + def _to_int(v): + try: + return int(v) + except (TypeError, ValueError): + return None # malformed -> caller treats source as unresolvable + + sdk_raw = getattr(disk, "max_shares", None) + if sdk_raw is not None: + sdk_int = _to_int(sdk_raw) + if sdk_int is None: + return None # present but uncoercible -> unresolvable -> skip + else: + sdk_int = None + + props = getattr(disk, "properties", None) + nested_raw = None + if props is not None: + nested_raw = getattr(props, "max_shares", None) + if nested_raw is None: + nested_raw = getattr(props, "maxShares", None) + if nested_raw is not None: + nested_int = _to_int(nested_raw) + if nested_int is None: + return None # present but uncoercible -> unresolvable -> skip + else: + nested_int = None + + # Conflict: both surfaces have a value and they disagree -> unknown -> skip + if sdk_int is not None and nested_int is not None and sdk_int != nested_int: + return None + + return sdk_int if sdk_int is not None else nested_int + + +def _resolve_optimized_for_frequent_attach(disk) -> Optional[bool]: + """ + Resolve optimized_for_frequent_attach per spec 9.5. + Reads both SDK and nested sources. Returns None (unknown -> caller skips) when + the field is absent from all sources, when a present value is not a reliable + boolean (e.g. strings like "false" or unexpected types), or when surfaces disagree. + Unknown must not be treated as False. + """ + + def _to_bool(v): + # Only actual Python booleans are reliable. Strings such as "false" or + # "true" must not be coerced -- bool("false") == True, which is wrong. + if isinstance(v, bool): + return v + return None # anything else -> unresolvable + + sdk_raw = getattr(disk, "optimized_for_frequent_attach", None) + if sdk_raw is not None: + sdk_bool = _to_bool(sdk_raw) + if sdk_bool is None: + return None # present but not a reliable boolean -> unresolvable -> skip + else: + sdk_bool = None + + props = getattr(disk, "properties", None) + nested_raw = None + if props is not None: + nested_raw = getattr(props, "optimized_for_frequent_attach", None) + if nested_raw is None: + nested_raw = getattr(props, "optimizedForFrequentAttach", None) + if nested_raw is not None: + nested_bool = _to_bool(nested_raw) + if nested_bool is None: + return None # present but not a reliable boolean -> unresolvable -> skip + else: + nested_bool = None + + # Conflict: both surfaces have a value and they disagree -> unknown -> skip + if sdk_bool is not None and nested_bool is not None and sdk_bool != nested_bool: + return None + + return sdk_bool if sdk_bool is not None else nested_bool + + +def _coerce_datetime(val) -> Optional[datetime]: + """ + Coerce val to a timezone-aware UTC datetime. + + Accepts: + - datetime objects (with or without tzinfo) + - ISO 8601 strings (Azure REST payloads use "Z" or "+00:00" suffixes) + + Returns None for None, unsupported types, or unparseable strings. + """ + if val is None: + return None + if isinstance(val, datetime): + if val.tzinfo is None: + return val.replace(tzinfo=timezone.utc) + return val + if isinstance(val, str): + try: + # Python 3.7-3.9 fromisoformat does not support "Z"; normalize first + dt = datetime.fromisoformat(val.replace("Z", "+00:00")) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + return dt + except (ValueError, AttributeError): + return None + return None + + +def _resolve_age_anchor(disk, now: datetime) -> Optional[Tuple[str, float]]: + """ + Resolve unattached age anchor per spec 9.6. + Returns (anchor_label, age_days) or None (skip). + + Priority: + 1. last_ownership_update_time (primary) + 2. time_created (fallback, only when primary is absent) + + If the primary is present but invalid or in the future -> skip (no fallback). + If neither can be resolved -> skip (age unknown). + """ + + def _get_raw(obj, snake, camel): + val = getattr(obj, snake, None) + if val is None: + val = getattr(obj, camel, None) + return val + + # Primary: last_ownership_update_time + lowt_raw = getattr(disk, "last_ownership_update_time", None) + if lowt_raw is None: + props = getattr(disk, "properties", None) + if props is not None: + lowt_raw = _get_raw(props, "last_ownership_update_time", "lastOwnershipUpdateTime") + + if lowt_raw is not None: + # Primary is present -- use it; if invalid/future -> skip without falling back + lowt = _coerce_datetime(lowt_raw) + if lowt is None or lowt > now: + return None + return ("last_ownership_update_time", (now - lowt).total_seconds() / 86400) + + # Fallback: time_created (only when primary is absent) + tc_raw = getattr(disk, "time_created", None) + if tc_raw is None: + props = getattr(disk, "properties", None) + if props is not None: + tc_raw = _get_raw(props, "time_created", "timeCreated") + + if tc_raw is not None: + tc = _coerce_datetime(tc_raw) + if tc is None or tc > now: + return None + return ("time_created", (now - tc).total_seconds() / 86400) + + return None # both absent -> age unknown -> skip def find_unattached_managed_disks( + *, subscription_id: str, credential, region_filter: str = None, + client: Optional[ComputeManagementClient] = None, ) -> List[Finding]: """ - Find unattached Azure managed disks that are likely orphaned. + Find Azure managed disks that are truly unattached and have remained + unattached for at least 7 days. - Conservative rule (review-only): - - Disk is not attached to any VM - - Age exceeds threshold - - Does NOT infer intended usage + Detection requires: + - provisioning state resolves to exactly "Succeeded" + - disk state resolves to exactly "Unattached" + - managed_by confirmed absent (not unresolvable) + - managed_by_extended confirmed absent / empty (not unresolvable) + - max_shares is known and not greater than 1 + - optimized_for_frequent_attach is False + - unattached age >= 7 days (last_ownership_update_time primary; time_created fallback) IAM permissions: - Microsoft.Compute/disks/read """ - findings: List[Finding] = [] - compute_client = ComputeManagementClient( + compute_client = client or ComputeManagementClient( credential=credential, subscription_id=subscription_id, ) + now = datetime.now(timezone.utc) + for disk in compute_client.disks.list(): - if region_filter and disk.location != region_filter: + # spec 8.1: id must be present and non-empty + disk_id = getattr(disk, "id", None) + if not disk_id: continue - # Primary signal: attachment state - if disk.managed_by is not None: + # spec 8.2: name must be present and non-empty + disk_name = getattr(disk, "name", None) + if not disk_name: continue - # Secondary signal: age - if not disk.time_created: + # spec 8.3: region filter -- exact lowercase match + location = _norm_location(getattr(disk, "location", "") or "") + if region_filter and location != _norm_location(region_filter): continue - disk_age_days = _age_in_days(disk.time_created) - - if disk_age_days >= MIN_AGE_DAYS_MEDIUM: - confidence_value = ConfidenceLevel.MEDIUM # conservative for all ages - else: - continue # too new - - evidence = Evidence( - signals_used=[ - "Disk.managed_by is None (not attached to any VM)", - f"Disk age = {disk_age_days} days", - ], - signals_not_checked=[ - "Planned future VM attachment", - "IaC-managed intent", - "Application-level usage", - "Disaster recovery or backup planning", - ], - time_window=f"{MIN_AGE_DAYS_MEDIUM}-{MIN_AGE_DAYS_HIGH}+ days", - ) + # spec 8.4 / 9.1: provisioning state must resolve to exactly "Succeeded" + # Returns None on conflict or both-absent -> skip + if _resolve_provisioning_state(disk) != "Succeeded": + continue + + # spec 8.5 / 9.2: disk state must resolve to exactly "Unattached" + # Returns None on conflict or both-absent -> skip + disk_state = _resolve_disk_state(disk) + if disk_state != "Unattached": + continue + + # spec 8.6 / 9.3: managed_by must be confirmed absent (not attached, not unknown) + # _UNRESOLVABLE means field absent from all sources or SDK/nested conflict -> skip + managed_by = _resolve_managed_by(disk) + if managed_by is _UNRESOLVABLE or managed_by: + continue + + # spec 8.6 / 9.3: managed_by_extended must be confirmed empty (not unknown) + managed_by_extended = _resolve_managed_by_extended(disk) + if managed_by_extended is _UNRESOLVABLE or managed_by_extended: + continue + + # spec 8.7 / 9.4: max_shares must be known and not > 1 + max_shares = _resolve_max_shares(disk) + if max_shares is None or max_shares > 1: + continue + + # spec 8.8 / 9.5: optimized_for_frequent_attach must be False (not True, not unknown) + optimized_for_frequent_attach = _resolve_optimized_for_frequent_attach(disk) + if optimized_for_frequent_attach is None or optimized_for_frequent_attach: + continue + + # spec 8.9 / 9.6: unattached age must resolve and be >= 7 days + age_result = _resolve_age_anchor(disk, now) + if age_result is None: + continue + age_anchor, age_days = age_result + if age_days < _MIN_UNATTACHED_DAYS: + continue - # ~$0.10/GB-month average across Standard/Premium tiers - disk_size = disk.disk_size_gb or 0 - cost_usd = round(disk_size * 0.10, 2) if disk_size > 0 else None + # --- EMIT --- + sku = getattr(disk, "sku", None) + sku_name = getattr(sku, "name", None) if sku else None + tags = getattr(disk, "tags", None) or {} findings.append( Finding( provider="azure", - rule_id="azure.compute.disk.unattached", - resource_type="azure.compute.disk", - resource_id=disk.id, - region=disk.location, - estimated_monthly_cost_usd=cost_usd, - title="Unattached Azure managed disk", - summary=f"Disk not attached to any VM for {disk_age_days} days", - reason="Disk has no VM attachment and exceeds age threshold", + rule_id=_RULE_ID, + resource_type=_RESOURCE_TYPE, + resource_id=disk_id, + region=location, + estimated_monthly_cost_usd=None, # spec 10: always None + title="Unattached Azure Managed Disk", + summary=( + f"Managed disk '{disk_name}' has been unattached for " f"{age_days:.0f} days" + ), + reason=( + f"Disk state is 'Unattached' with no attachment surfaces and " + f"unattached age {age_days:.0f} days >= {_MIN_UNATTACHED_DAYS} days" + ), risk=RiskLevel.LOW, - confidence=confidence_value, - detected_at=datetime.now(timezone.utc), - evidence=evidence, + confidence=ConfidenceLevel.MEDIUM, + detected_at=now, + evidence=Evidence( + signals_used=[ + "Provisioning state is 'Succeeded'", + "Disk state is 'Unattached'", + "managed_by confirmed absent", + "managed_by_extended confirmed absent / empty", + f"Shared-disk exclusion not triggered: max_shares = {max_shares} (not > 1)", + "Frequent-attach exclusion not triggered: optimized_for_frequent_attach = False", + f"Unattached age anchor: {age_anchor}", + f"Unattached age: {age_days:.1f} days (>= {_MIN_UNATTACHED_DAYS} days)", + ], + signals_not_checked=[ + "Planned future VM attachment", + "Undeclared migration or restore intent", + "DR / backup planning intent", + "Exact Azure billing amount for this disk", + "Resource locks or delete-protection context", + ], + time_window=None, + ), details={ - "resource_name": disk.name, + "resource_name": disk_name, "subscription_id": subscription_id, - "managed_by": None, - "age_days": disk_age_days, - "sku": disk.sku.name if disk.sku else None, - "size_gb": disk.disk_size_gb, - "tags": disk.tags, + "disk_state": disk_state, + "managed_by": managed_by, + "managed_by_extended": managed_by_extended, + "max_shares": max_shares, + "optimized_for_frequent_attach": optimized_for_frequent_attach, + "age_anchor": age_anchor, + "age_days": round(age_days, 1), + "sku": sku_name, + "size_gb": getattr(disk, "disk_size_gb", None), + "tags": tags, }, ) ) diff --git a/docs/rules/azure.md b/docs/rules/azure.md index c63f044..5eca111 100644 --- a/docs/rules/azure.md +++ b/docs/rules/azure.md @@ -7,7 +7,7 @@ | Rule ID | Cost Surface | What It Detects | |---|---|---| | `azure.vm.stopped_not_deallocated` | Compute | Stopped but not deallocated VMs (full charges) | -| `azure.compute.disk.unattached` | Storage | Managed disks not attached to any VM | +| `azure.compute.disk.unattached` | Storage | Managed disks in `Unattached` state with no attachment surfaces and unattached age >= 7 days | | `azure.compute.snapshot.old` | Storage | Old managed snapshots as conservative review candidates | | `azure.network.public_ip.unused` | Network | Public IPs unattached across all four control-plane linkage surfaces | | `azure.load_balancer.no_backends` | Network | Standard LBs with billable rules but no backend members | @@ -46,7 +46,7 @@ ## Storage #### `azure.compute.disk.unattached` -**Detects:** Managed disks with `managed_by is None` for 7+ days +**Detects:** Managed disks in `Unattached` disk state with no attachment surfaces confirmed absent and unattached age >= 7 days (age anchored to `lastOwnershipUpdateTime` when available, `timeCreated` as fallback) **Confidence / Risk:** MEDIUM (deterministic state; attachment intent unknown) / LOW @@ -54,9 +54,9 @@ **Params:** none -**Exclusions:** disks younger than 7 days +**Exclusions:** `provisioning_state != "Succeeded"`; `disk_state != "Unattached"`; `managed_by` or `managed_by_extended` present or unresolvable; `max_shares > 1` or unresolvable (shared-disk capable); `optimized_for_frequent_attach == True` or unresolvable; unattached age < 7 days or age anchor unresolvable; conflicting control-plane signals across SDK and raw surfaces -**Spec:** — +**Spec:** [specs/azure/unattached_managed_disks.md](../specs/azure/unattached_managed_disks.md) #### `azure.compute.snapshot.old` **Detects:** Managed snapshots older than 30 days as conservative review candidates; confidence escalates with age relative to `max_age_days` diff --git a/docs/specs/azure/unattached_managed_disks.md b/docs/specs/azure/unattached_managed_disks.md new file mode 100644 index 0000000..a95615c --- /dev/null +++ b/docs/specs/azure/unattached_managed_disks.md @@ -0,0 +1,428 @@ +# Azure Rule Spec — `azure.compute.disk.unattached` + +## 1. Rule Identity + +- **Rule ID:** `azure.compute.disk.unattached` +- **Provider:** Azure +- **ARM resource type:** `Microsoft.Compute/disks` +- **Finding resource_type:** `azure.compute.disk` + +--- + +## 2. Intent + +Detect **Azure managed disks that are truly unattached and have remained unattached long enough** to be conservative cleanup review candidates. + +This rule is deliberately **low-noise**. It is a **review-candidate** rule only, not proof that a disk is safe to delete, not proof that no cluster / migration / DR workflow exists, and not proof of a specific monthly saving. + +--- + +## 3. Azure Documentation Grounding + +### 3.1 Unattached disks keep billing after VM deletion + +Microsoft documents that when you delete a VM, attached disks are not deleted by default, and you **continue to pay for unattached disks**. + +Source: *Find and delete unattached Azure managed and unmanaged disks* +URL: https://learn.microsoft.com/en-us/azure/virtual-machines/disks-find-unattached-portal + +Rule consequence: + +1. Unattached managed disks are a valid hygiene surface. +2. The rule should focus on **review candidates**, not automatic delete claims. + +### 3.2 Canonical disk state semantics + +Microsoft REST documentation for `Microsoft.Compute/disks` defines `diskState`, including: + +- `Unattached` +- `Attached` +- `Reserved` +- `Frozen` +- `ActiveSAS` +- `ActiveSASFrozen` +- `ReadyToUpload` +- `ActiveUpload` + +Source: *Disks - Get (REST API)* +URL: https://learn.microsoft.com/en-us/rest/api/compute/disks/get?view=rest-compute-2024-11-04 + +Rule consequence: + +1. `managedBy is None` alone is not sufficient proof of ordinary orphaned state. +2. Only `diskState == "Unattached"` is eligible for emission. +3. Upload/export/SAS/hibernation/deallocated states must be skipped. + +### 3.3 Ownership-change timing + +Microsoft REST documentation defines `LastOwnershipUpdateTime` as the UTC time when disk ownership last changed, such as when the disk was attached or detached, or when the VM it was attached to was deallocated or started. + +Source: *Disks - Get (REST API)* +URL: https://learn.microsoft.com/en-us/rest/api/compute/disks/get?view=rest-compute-2024-11-04 + +Rule consequence: + +1. Aging by **creation time alone** is too noisy. +2. When available, `lastOwnershipUpdateTime` is the correct conservative aging signal for unattached duration. + +### 3.4 Shared-disk semantics + +Microsoft documents that Azure shared disks can be attached to **multiple VMs simultaneously** and are explicitly modeled using `maxShares > 1`. Shared disks are valid clustered-application infrastructure. + +Source: *Share an Azure managed disk* +URL: https://learn.microsoft.com/en-us/azure/virtual-machines/disks-shared + +Rule consequence: + +1. Shared-disk-capable resources are operationally special. +2. To reduce false positives, disks with shared-disk signals should be skipped rather than emitted as ordinary orphaned disks. + +### 3.5 Frequent-attach intent + +Microsoft ARM / Bicep documentation defines `optimizedForFrequentAttach` as a property for data disks that are frequently detached from one VM and attached to another. + +Source: *Microsoft.Compute/disks ARM / Bicep reference* +URL: https://learn.microsoft.com/en-us/azure/templates/microsoft.compute/disks + +Rule consequence: + +A disk explicitly optimized for frequent attach is an intentional operational pattern and should be skipped to avoid false positives. + +### 3.6 Pricing semantics + +Microsoft pricing documentation states that managed disk pricing varies by: + +- disk type / SKU +- size tier +- redundancy +- for some SKUs, performance configuration +- shared-disk semantics + +Source: *Azure Managed Disks pricing* +URL: https://azure.microsoft.com/en-us/pricing/details/managed-disks/ + +Rule consequence: + +1. The rule must **not** use a flat estimate such as `$0.10/GB-month`. +2. `estimated_monthly_cost_usd` should remain `None` unless a future implementation uses a documented, SKU-aware pricing source. + +--- + +## 4. Detection Goal + +Emit a finding only when **all** of the following are true: + +1. `disk.id` is present and non-empty +2. `disk.name` is present and non-empty +3. the optional region filter matches the normalized location +4. `provisioning_state` resolves to exactly `"Succeeded"` +5. `disk_state` resolves to exactly `"Unattached"` +6. no attachment surfaces are present under the attachment contract +7. the shared-disk exclusion contract is resolved and **not** triggered +8. the frequent-attach exclusion contract is resolved and **not** triggered +9. unattached age resolves reliably and is at least `min_unattached_days` + +If any required signal cannot be established reliably, skip rather than emit. + +--- + +## 5. Non-Goals + +This rule does **not** attempt to prove: + +- that deleting the disk is safe +- that no restore / migration / upload workflow is in progress +- that the disk has no disaster recovery purpose +- that the disk is not intentionally retained for reuse +- that the disk produces a specific monthly saving + +--- + +## 6. Canonical Inputs + +### 6.1 Required control-plane surfaces + +The implementation may use: + +- `compute_client.disks.list()` + +No guest inspection, VM-level guest metrics, or Azure Monitor metrics are required for this rule. + +### 6.2 Unattached-age threshold + +- Configurable parameter: none +- Fixed threshold: `min_unattached_days = 7` + +Reason: + +- This rule is intentionally conservative. +- A short aging buffer reduces false positives from recent VM deletes, detach operations, maintenance, and migration workflows. + +--- + +## 7. Normalization Contract + +| Field | Normalization | +|---|---| +| `location` | Lowercase ARM location string; compare by exact lowercase string equality only. Do not remove spaces, hyphens, or digits. | +| `provisioning_state` | Compare case-sensitively to canonical Azure value `"Succeeded"` after SDK/raw resolution. | +| `disk_state` | Compare case-sensitively to canonical Azure values such as `"Unattached"` after SDK/raw resolution. | +| `managed_by` | Treat non-empty value as attached. | +| `managed_by_extended` | Treat any non-empty collection / payload as attached. | +| `max_shares` | Value greater than `1` indicates shared-disk capability. | +| `optimized_for_frequent_attach` | `True` indicates intentional frequent detach / attach workflow. Unknown or unresolved must not be treated as `False`. | +| `last_ownership_update_time` | Parse as UTC instant; use as the primary unattached-age signal when present. | +| `time_created` | Parse as UTC instant; use only as fallback age signal if ownership-update time is unavailable. | +| `tags` | `disk.tags or {}` — never `None` in output. | + +--- + +## 8. Unified Decision Rule + +| # | Condition | Action | +|---|---|---| +| 8.1 | `id` absent, `None`, or empty | Skip | +| 8.2 | `name` absent, `None`, or empty | Skip | +| 8.3 | Region filter set and normalized location does not match | Skip | +| 8.4 | `provisioning_state` does not resolve to `"Succeeded"` | Skip | +| 8.5 | `disk_state` is unknown, missing, or does not resolve to `"Unattached"` | Skip | +| 8.6 | Any attachment surface is present under the attachment contract | Skip | +| 8.7 | Shared-disk exclusion contract is triggered | Skip | +| 8.8 | Frequent-attach exclusion contract is triggered or cannot be resolved reliably | Skip | +| 8.9 | Unattached age is unknown, invalid, in the future, or less than `7` days | Skip | +| 8.10 | All required signals resolve and unattached age is at least `7` days | **EMIT** | + +--- + +## 9. Canonical Evaluation Contracts + +### 9.1 Provisioning-state contract + +Resolve provisioning state in this order: + +1. SDK projection such as `disk.provisioning_state` +2. nested/raw properties projection if present +3. otherwise unknown + +Only `"Succeeded"` is eligible for evaluation. Unknown or any other value must skip. + +### 9.2 Disk-state contract + +Resolve disk state in this order: + +1. SDK projection such as `disk.disk_state` +2. nested/raw properties projection such as `properties.diskState` +3. otherwise unknown + +Only `"Unattached"` is eligible for emission. + +If `disk_state` is unknown, missing, conflicting across control-plane surfaces, or otherwise cannot be resolved reliably, the disk must skip. + +Mandatory skips include any other disk state, including: + +- `"Attached"` +- `"Reserved"` +- `"Frozen"` +- `"ActiveSAS"` +- `"ActiveSASFrozen"` +- `"ReadyToUpload"` +- `"ActiveUpload"` + +### 9.3 Attachment contract + +Treat the disk as **attached** when any of the following are true: + +1. `managed_by` resolves to a non-empty value +2. `managed_by_extended` resolves to any non-empty collection / payload + +Required behavior: + +1. Prefer SDK projections first. +2. Fall back to nested/raw properties fields only if needed. +3. Confirmed empty attachment surfaces are eligible to continue; unknown or unresolved attachment surfaces are not equivalent to empty. +4. If attachment surfaces conflict with `disk_state == "Unattached"`, skip rather than emit. +5. If attachment surfaces cannot be resolved reliably, skip rather than emit. + +### 9.4 Shared-disk exclusion contract + +Skip when reliable control-plane signals indicate the disk is configured for shared-disk use, including: + +1. `max_shares > 1` +2. equivalent shared-disk control-plane context is present + +Required behavior: + +1. Prefer SDK projections first. +2. Fall back to nested/raw properties fields only if needed. +3. If shared-disk context cannot be resolved reliably, skip rather than emit. +4. `max_shares` unknown must not be treated as equivalent to `max_shares == 1`. + +### 9.5 Frequent-attach exclusion contract + +Skip when `optimized_for_frequent_attach == True`. + +This is required because Microsoft explicitly documents it as an intentional frequent detach / attach workflow. + +If `optimized_for_frequent_attach` cannot be resolved reliably, the disk must skip rather than emit. + +### 9.6 Unattached-age contract + +Resolve the primary age signal in this order: + +1. `last_ownership_update_time` +2. `time_created` +3. otherwise unknown + +Interpretation: + +1. If `last_ownership_update_time` is present and valid, use it as the primary conservative attachment-state-change anchor for aging +2. Else if `time_created` is present and valid, use it as a fallback +3. Else age is unknown and the disk must skip +4. If the selected age anchor is invalid, unparseable, or in the future relative to `now`, the disk must skip + +Emit only when: + +- `now - age_anchor >= 7 days` + +Rationale: + +`lastOwnershipUpdateTime` is the conservative signal for how recently the disk's ownership state changed, even though it is not a perfect exact measure of unattached duration. This avoids over-flagging old disks that were only recently detached. + +### 9.7 Eventual-consistency note + +Azure control-plane state can lag briefly around detach, deallocate, SAS, or upload transitions. + +Rule consequence: + +1. Conflicting control-plane signals must skip rather than emit. +2. The 7-day unattached-age buffer exists partly to reduce transient false positives from detach / state propagation lag. + +### 9.8 Context-only fields + +The following may appear in details/evidence but must not create or suppress findings directly: + +- `sku` +- `disk_size_gb` +- `tier` +- `os_type` +- `zones` +- `network_access_policy` + +--- + +## 10. Cost Model + +`estimated_monthly_cost_usd = None` + +Mandatory rules: + +1. Do **not** use a flat `$ / GB` estimate +2. Do **not** infer cost from disk size alone +3. Do **not** infer cost from SKU alone +4. Document that Azure managed disk pricing varies by disk type, size tier, redundancy, and for some SKUs performance configuration and shared-disk behavior + +--- + +## 11. Finding Shape + +### 11.1 Required fields + +| Field | Value | +|---|---| +| `provider` | `"azure"` | +| `rule_id` | `"azure.compute.disk.unattached"` | +| `resource_type` | `"azure.compute.disk"` | +| `resource_id` | Original ARM id from `disk.id` | +| `region` | Normalized location | +| `risk` | `LOW` | +| `confidence` | `MEDIUM` | +| `estimated_monthly_cost_usd` | `None` | + +### 11.2 Required evidence + +`signals_used` must clearly disclose: + +1. provisioning state is `"Succeeded"` +2. disk state is `"Unattached"` +3. `managed_by` is absent +4. `managed_by_extended` is absent / empty +5. shared-disk exclusion contract is resolved and not triggered (`max_shares` known and not greater than `1`, with no equivalent shared-disk signal) +6. frequent-attach exclusion contract is resolved and not triggered (`optimized_for_frequent_attach == False`) +7. unattached age anchor type (`last_ownership_update_time` or `time_created`) +8. unattached age in days + +`signals_not_checked` should include remaining blind spots such as: + +1. planned future VM attachment +2. undeclared migration or restore intent +3. DR / backup planning intent +4. exact Azure billing amount for this disk +5. resource locks or delete-protection context + +### 11.3 Required details + +Details should include at least: + +- `resource_name` +- `subscription_id` +- `disk_state` +- `managed_by` +- `managed_by_extended` +- `max_shares` +- `optimized_for_frequent_attach` +- `age_anchor` +- `age_days` +- `sku` +- `size_gb` +- `tags` + +--- + +## 12. Failure Behavior + +- If the disk list call raises, let the exception propagate +- If an individual disk record is malformed or missing required fields, skip that disk +- If disk state, attachment surfaces, or age signals cannot be resolved reliably, skip that disk +- Do not silently emit on partial control-plane state + +--- + +## 13. Acceptance Examples + +### 13.1 Must emit + +1. A disk with `provisioning_state == "Succeeded"`, `disk_state == "Unattached"`, no `managed_by`, empty `managed_by_extended`, `max_shares == 1`, `optimized_for_frequent_attach == False`, and `last_ownership_update_time` 10 days ago -> **EMIT** + +### 13.2 Must skip + +1. `managed_by` present -> **SKIP** +2. `managed_by_extended` non-empty -> **SKIP** +3. `disk_state == "Reserved"` -> **SKIP** +4. `disk_state == "ActiveSAS"` -> **SKIP** +5. `disk_state == "ReadyToUpload"` -> **SKIP** +6. `max_shares > 1` -> **SKIP** +7. `optimized_for_frequent_attach == True` -> **SKIP** +8. `last_ownership_update_time` 2 days ago even if `time_created` is 200 days old -> **SKIP** +9. `provisioning_state != "Succeeded"` -> **SKIP** + +--- + +## 14. Anti-Goals + +Implementations must **not**: + +1. treat `managed_by is None` as sufficient proof of ordinary unattached state by itself +2. age disks solely from `time_created` when `last_ownership_update_time` is available +3. emit for shared-disk-capable resources +4. emit for upload/export/SAS disk states +5. use flat managed-disk cost estimates + +--- + +## 15. Rule Summary + +Rule: `azure.compute.disk.unattached` + +- **Signal:** managed disk in `Unattached` state with no attachment surfaces and unattached age >= 7 days +- **Primary exclusions:** non-`Succeeded`, non-`Unattached`, shared disks, frequent-attach disks, recent detachments, upload/export/SAS states +- **Cost model:** `estimated_monthly_cost_usd = None` diff --git a/tests/cleancloud/providers/azure/test_azure_sql_database_idle.py b/tests/cleancloud/providers/azure/test_azure_sql_database_idle.py index f877d1b..d88f759 100644 --- a/tests/cleancloud/providers/azure/test_azure_sql_database_idle.py +++ b/tests/cleancloud/providers/azure/test_azure_sql_database_idle.py @@ -199,7 +199,7 @@ def _run( # =========================================================================== -# TestMustEmit — spec §13.1 +# TestMustEmit — spec 13.1 # =========================================================================== @@ -219,7 +219,7 @@ def test_multiple_qualifying_databases_all_emit(self): # =========================================================================== -# TestMustSkip — spec §13.2 +# TestMustSkip — spec 13.2 # =========================================================================== @@ -265,7 +265,7 @@ def test_metric_query_fails_skips(self): # =========================================================================== -# TestOnlineStateContract — spec §9.1 +# TestOnlineStateContract — spec 9.1 # =========================================================================== @@ -301,7 +301,7 @@ def test_nested_snake_case_status_offline_skips(self): # =========================================================================== -# TestAgeContract — spec §9.2 +# TestAgeContract — spec 9.2 # =========================================================================== @@ -348,7 +348,7 @@ def test_nested_snake_case_creation_date_emits(self): # =========================================================================== -# TestElasticPoolContract — spec §9.3 +# TestElasticPoolContract — spec 9.3 # =========================================================================== @@ -377,7 +377,7 @@ def test_nested_snake_case_elastic_pool_id_skips(self): # =========================================================================== -# TestReplicaSecondaryContract — spec §9.4 +# TestReplicaSecondaryContract — spec 9.4 # =========================================================================== @@ -421,7 +421,7 @@ def test_nested_source_database_id_alone_does_not_skip(self): # =========================================================================== -# TestPausedStateContract — spec §9.5 +# TestPausedStateContract — spec 9.5 # =========================================================================== @@ -462,7 +462,7 @@ def test_nested_camel_case_paused_resumed_pair_does_not_skip(self): # =========================================================================== -# TestMetricsContract — spec §9.6 +# TestMetricsContract — spec 9.6 # =========================================================================== @@ -534,7 +534,7 @@ def test_second_metric_fails_skips_db(self): # =========================================================================== -# TestFindingShape — spec §11 +# TestFindingShape — spec 11 # =========================================================================== @@ -641,7 +641,7 @@ def test_evidence_signals_include_all_five_metrics(self): # =========================================================================== -# TestRegionFilter — spec §8.3 +# TestRegionFilter — spec 8.3 # =========================================================================== @@ -672,7 +672,7 @@ def test_server_level_prefilter_skips_all_dbs_on_mismatched_server(self): # =========================================================================== -# TestFailureBehavior — spec §12 +# TestFailureBehavior — spec 12 # =========================================================================== diff --git a/tests/cleancloud/providers/azure/test_azure_unattached_managed_disks.py b/tests/cleancloud/providers/azure/test_azure_unattached_managed_disks.py index 6014c4f..5262a65 100644 --- a/tests/cleancloud/providers/azure/test_azure_unattached_managed_disks.py +++ b/tests/cleancloud/providers/azure/test_azure_unattached_managed_disks.py @@ -1,47 +1,821 @@ +""" +Tests for azure.compute.disk.unattached -- spec-aligned. + +Covers: must-emit, must-skip, id/name guards, region filter, + provisioning-state contract, disk-state contract, + attachment contract (managed_by, managed_by_extended), + shared-disk contract (max_shares), frequent-attach contract, + age-anchor contract (primary/fallback/ISO strings/no-fallback), + finding shape, failure behavior, SDK/camelCase fallbacks, + conflict detection, resolver unit tests. +""" + from datetime import datetime, timedelta, timezone +from types import SimpleNamespace from unittest.mock import MagicMock import pytest from cleancloud.providers.azure.rules.unattached_managed_disks import ( + _UNRESOLVABLE, + _coerce_datetime, + _resolve_age_anchor, + _resolve_managed_by, + _resolve_managed_by_extended, + _resolve_max_shares, + _resolve_optimized_for_frequent_attach, find_unattached_managed_disks, ) +# --------------------------------------------------------------------------- +# Shared constants +# --------------------------------------------------------------------------- + +_SUB = "sub-123" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + -@pytest.fixture -def mock_compute_client(monkeypatch): - class Disk: - def __init__(self, disk_id, name, managed_by, created_days_ago, location): - self.id = disk_id - self.name = name - self.managed_by = managed_by - self.time_created = datetime.now(timezone.utc) - timedelta(days=created_days_ago) - self.location = location - self.sku = MagicMock(name="Standard_LRS") - self.tags = None - self.disk_size_gb = 128 - - mock_client = MagicMock() - mock_client.disks.list.return_value = [ - Disk("disk-1", "disk-1", None, 20, "eastus"), # should be flagged - Disk("disk-2", "disk-2", "vm-1", 20, "eastus"), # should NOT be flagged - Disk("disk-3", "disk-3", None, 2, "eastus"), # too new, NOT flagged - ] - monkeypatch.setattr( - "cleancloud.providers.azure.rules.unattached_managed_disks.ComputeManagementClient", - lambda credential, subscription_id: mock_client, +def _ago(days: float) -> datetime: + """Return a UTC datetime `days` ago (negative = future).""" + return datetime.now(timezone.utc) - timedelta(days=days) + + +def _make_disk( + disk_id="disk-1", + name="my-disk", + location="eastus", + provisioning_state="Succeeded", + disk_state="Unattached", + managed_by=None, + managed_by_extended=None, + max_shares=1, + optimized_for_frequent_attach=False, + last_ownership_update_time=None, + time_created=None, + sku_name="Standard_LRS", + disk_size_gb=128, + tags=None, + properties=None, +) -> SimpleNamespace: + """ + Build a fully qualifying disk SimpleNamespace. + Defaults to time_created=10 days ago when no age anchor is supplied, + so the disk passes the 7-day age check out of the box. + """ + if last_ownership_update_time is None and time_created is None: + time_created = _ago(10) + return SimpleNamespace( + id=disk_id, + name=name, + location=location, + provisioning_state=provisioning_state, + disk_state=disk_state, + managed_by=managed_by, + managed_by_extended=managed_by_extended, + max_shares=max_shares, + optimized_for_frequent_attach=optimized_for_frequent_attach, + last_ownership_update_time=last_ownership_update_time, + time_created=time_created, + sku=SimpleNamespace(name=sku_name) if sku_name else None, + disk_size_gb=disk_size_gb, + tags=tags, + properties=properties, + ) + + +def _run(disks, region_filter=None): + client = MagicMock() + client.disks.list.return_value = disks + return find_unattached_managed_disks( + subscription_id=_SUB, + credential=None, + region_filter=region_filter, + client=client, ) - return mock_client -def test_find_unattached_managed_disks(mock_compute_client): - findings = find_unattached_managed_disks(subscription_id="sub-1", credential=MagicMock()) - resource_ids = [f.resource_id for f in findings] +def _one(disks, region_filter=None): + results = _run(disks, region_filter) + assert len(results) == 1 + return results[0] + + +# =========================================================================== +# TestMustEmit -- spec 13.1 +# =========================================================================== + + +class TestMustEmit: + def test_qualifying_disk_emits(self): + assert len(_run([_make_disk()])) == 1 + + def test_spec_example_lowt_10_days_emits(self): + # spec 13.1.1: all conditions met, last_ownership_update_time 10 days ago + disk = _make_disk( + provisioning_state="Succeeded", + disk_state="Unattached", + managed_by=None, + managed_by_extended=None, + max_shares=1, + optimized_for_frequent_attach=False, + last_ownership_update_time=_ago(10), + ) + assert len(_run([disk])) == 1 + + def test_multiple_qualifying_disks_all_emit(self): + disks = [_make_disk(disk_id=f"disk-{i}", name=f"d{i}") for i in range(3)] + assert len(_run(disks)) == 3 + + +# =========================================================================== +# TestMustSkip -- spec 13.2 +# =========================================================================== + + +class TestMustSkip: + def test_managed_by_present_skips(self): + assert _run([_make_disk(managed_by="/subscriptions/s/vms/vm1")]) == [] + + def test_managed_by_extended_nonempty_skips(self): + assert _run([_make_disk(managed_by_extended=["/subscriptions/s/vms/vm1"])]) == [] + + def test_disk_state_reserved_skips(self): + assert _run([_make_disk(disk_state="Reserved")]) == [] + + def test_disk_state_active_sas_skips(self): + assert _run([_make_disk(disk_state="ActiveSAS")]) == [] + + def test_disk_state_ready_to_upload_skips(self): + assert _run([_make_disk(disk_state="ReadyToUpload")]) == [] + + def test_max_shares_gt_1_skips(self): + assert _run([_make_disk(max_shares=2)]) == [] + + def test_optimized_for_frequent_attach_true_skips(self): + assert _run([_make_disk(optimized_for_frequent_attach=True)]) == [] + + def test_primary_anchor_recent_skips_even_if_time_created_old(self): + # spec 13.2.8: last_ownership_update_time 2 days ago, time_created 200 days ago + # -> primary is used, no fallback, skip + disk = _make_disk( + last_ownership_update_time=_ago(2), + time_created=_ago(200), + ) + assert _run([disk]) == [] + + def test_provisioning_state_not_succeeded_skips(self): + assert _run([_make_disk(provisioning_state="Failed")]) == [] + + +# =========================================================================== +# TestIdNameGuards -- spec 8.1, 8.2 +# =========================================================================== + + +class TestIdNameGuards: + def test_id_absent_skips(self): + disk = _make_disk() + del disk.id + assert _run([disk]) == [] + + def test_id_empty_skips(self): + assert _run([_make_disk(disk_id="")]) == [] + + def test_name_absent_skips(self): + disk = _make_disk() + del disk.name + assert _run([disk]) == [] + + def test_name_empty_skips(self): + assert _run([_make_disk(name="")]) == [] + + +# =========================================================================== +# TestRegionFilter -- spec 8.3 +# =========================================================================== + + +class TestRegionFilter: + def test_no_filter_emits(self): + assert len(_run([_make_disk()])) == 1 + + def test_matching_filter_emits(self): + assert len(_run([_make_disk(location="eastus")], region_filter="eastus")) == 1 + + def test_filter_is_case_insensitive(self): + # Both sides are lowercased before comparison + assert len(_run([_make_disk(location="eastus")], region_filter="EastUS")) == 1 + + def test_mismatched_filter_skips(self): + assert _run([_make_disk(location="eastus")], region_filter="westus") == [] + + +# =========================================================================== +# TestProvisioningStateContract -- spec 9.1 +# =========================================================================== + + +class TestProvisioningStateContract: + def test_failed_skips(self): + assert _run([_make_disk(provisioning_state="Failed")]) == [] + + def test_none_skips(self): + assert _run([_make_disk(provisioning_state=None)]) == [] + + def test_sdk_nested_conflict_skips(self): + # SDK "Succeeded", nested camelCase "Failed" -> conflict -> skip + disk = _make_disk(provisioning_state="Succeeded") + disk.properties = SimpleNamespace(provisioningState="Failed") + assert _run([disk]) == [] + + def test_nested_snake_fallback_emits(self): + # SDK None, nested snake "Succeeded" -> use nested -> emit + disk = _make_disk(provisioning_state=None) + disk.properties = SimpleNamespace( + provisioning_state="Succeeded", + provisioningState=None, + ) + assert len(_run([disk])) == 1 + + def test_nested_camelcase_fallback_emits(self): + # SDK None, nested camelCase "Succeeded" -> use nested -> emit + disk = _make_disk(provisioning_state=None) + disk.properties = SimpleNamespace(provisioningState="Succeeded") + assert len(_run([disk])) == 1 + + +# =========================================================================== +# TestDiskStateContract -- spec 9.2 +# =========================================================================== + + +class TestDiskStateContract: + def test_attached_skips(self): + assert _run([_make_disk(disk_state="Attached")]) == [] + + def test_frozen_skips(self): + assert _run([_make_disk(disk_state="Frozen")]) == [] + + def test_none_skips(self): + assert _run([_make_disk(disk_state=None)]) == [] + + def test_sdk_nested_conflict_skips(self): + # SDK "Unattached", nested "Attached" -> conflict -> skip + disk = _make_disk(disk_state="Unattached") + disk.properties = SimpleNamespace(diskState="Attached") + assert _run([disk]) == [] + + def test_nested_camelcase_fallback_emits(self): + # SDK None, nested camelCase "Unattached" -> use nested -> emit + disk = _make_disk(disk_state=None) + disk.properties = SimpleNamespace(diskState="Unattached") + assert len(_run([disk])) == 1 + + +# =========================================================================== +# TestAttachmentContract -- spec 9.3 +# =========================================================================== + + +class TestAttachmentContract: + def test_managed_by_confirmed_absent_emits(self): + # Attribute present on disk with value None -> confirmed absent + disk = _make_disk(managed_by=None) + assert len(_run([disk])) == 1 + + def test_managed_by_absent_from_all_sources_skips(self): + # Attribute deleted from disk, no properties -> _UNRESOLVABLE -> skip + disk = _make_disk() + del disk.managed_by + assert _run([disk]) == [] + + def test_managed_by_sdk_none_nested_nonempty_conflict_skips(self): + # SDK None (absent), nested camelCase non-empty (attached) -> conflict -> skip + disk = _make_disk(managed_by=None) + disk.properties = SimpleNamespace(managedBy="/subscriptions/s/vms/vm-conflict") + assert _run([disk]) == [] + + def test_managed_by_extended_confirmed_empty_emits(self): + disk = _make_disk(managed_by_extended=None) # None -> [] confirmed empty + assert len(_run([disk])) == 1 + + def test_managed_by_extended_absent_from_all_sources_skips(self): + disk = _make_disk() + del disk.managed_by_extended + # properties is None -> neither source found -> _UNRESOLVABLE -> skip + assert _run([disk]) == [] + + def test_managed_by_extended_present_source_uncoercible_skips(self): + # SDK confirmed empty (None -> []), nested present but non-iterable -> skip + disk = _make_disk(managed_by_extended=None) + # Only camelCase on props so snake_case returns _UNRESOLVABLE, then camelCase=42 + disk.properties = SimpleNamespace(managedByExtended=42) + assert _run([disk]) == [] + + def test_managed_by_extended_conflict_empty_vs_nonempty_skips(self): + # SDK empty ([]), nested non-empty -> conflict -> skip + disk = _make_disk(managed_by_extended=None) # sdk: [] + disk.properties = SimpleNamespace(managedByExtended=["/subscriptions/s/vms/vm1"]) + assert _run([disk]) == [] + + def test_managed_by_extended_nested_camelcase_empty_emits(self): + # SDK absent, nested camelCase empty list -> confirmed empty -> emit + disk = _make_disk() + del disk.managed_by_extended + disk.properties = SimpleNamespace(managedByExtended=[]) + assert len(_run([disk])) == 1 + + +# =========================================================================== +# TestSharedDiskContract -- spec 9.4 +# =========================================================================== + + +class TestSharedDiskContract: + def test_max_shares_1_emits(self): + assert len(_run([_make_disk(max_shares=1)])) == 1 + + def test_max_shares_2_skips(self): + assert _run([_make_disk(max_shares=2)]) == [] + + def test_max_shares_unknown_skips(self): + # None -> unknown -> must not be treated as 1 -> skip + assert _run([_make_disk(max_shares=None)]) == [] + + def test_max_shares_malformed_string_skips(self): + # Non-numeric -> coercion fails -> unresolvable -> skip + assert _run([_make_disk(max_shares="abc")]) == [] + + def test_max_shares_conflict_skips(self): + # SDK 1, nested camelCase 2 -> conflict -> skip + disk = _make_disk(max_shares=1) + disk.properties = SimpleNamespace(maxShares=2) + assert _run([disk]) == [] + + def test_max_shares_nested_camelcase_fallback_emits(self): + # SDK None, nested camelCase 1 -> use nested -> emit + disk = _make_disk(max_shares=None) + disk.properties = SimpleNamespace(maxShares=1) + assert len(_run([disk])) == 1 + + +# =========================================================================== +# TestFrequentAttachContract -- spec 9.5 +# =========================================================================== + + +class TestFrequentAttachContract: + def test_false_emits(self): + assert len(_run([_make_disk(optimized_for_frequent_attach=False)])) == 1 + + def test_true_skips(self): + assert _run([_make_disk(optimized_for_frequent_attach=True)]) == [] + + def test_unknown_skips(self): + # None -> unknown -> must not be treated as False -> skip + assert _run([_make_disk(optimized_for_frequent_attach=None)]) == [] + + def test_string_false_skips(self): + # "false" is not a reliable boolean (bool("false") == True is wrong) -> skip + assert _run([_make_disk(optimized_for_frequent_attach="false")]) == [] + + def test_string_true_skips(self): + assert _run([_make_disk(optimized_for_frequent_attach="true")]) == [] + + def test_conflict_skips(self): + # SDK False, nested camelCase True -> conflict -> skip + disk = _make_disk(optimized_for_frequent_attach=False) + disk.properties = SimpleNamespace(optimizedForFrequentAttach=True) + assert _run([disk]) == [] + + def test_nested_camelcase_false_fallback_emits(self): + # SDK None, nested camelCase False -> use nested -> emit + disk = _make_disk(optimized_for_frequent_attach=None) + disk.properties = SimpleNamespace(optimizedForFrequentAttach=False) + assert len(_run([disk])) == 1 + + +# =========================================================================== +# TestAgeAnchorContract -- spec 9.6 +# =========================================================================== + + +class TestAgeAnchorContract: + def test_last_ownership_update_time_used_as_primary(self): + disk = _make_disk(last_ownership_update_time=_ago(10)) + f = _one([disk]) + assert f.details["age_anchor"] == "last_ownership_update_time" + + def test_time_created_used_as_fallback_when_primary_absent(self): + disk = _make_disk(time_created=_ago(10)) + # last_ownership_update_time defaults to None in _make_disk when time_created given + f = _one([disk]) + assert f.details["age_anchor"] == "time_created" + + def test_primary_present_recent_skips_no_fallback_to_time_created(self): + # spec 13.2.8 + disk = _make_disk(last_ownership_update_time=_ago(2), time_created=_ago(200)) + assert _run([disk]) == [] + + def test_primary_present_invalid_skips_no_fallback(self): + # Primary exists but unparseable -> skip; time_created not consulted + disk = _make_disk(time_created=_ago(200)) + disk.last_ownership_update_time = "not-a-datetime" + assert _run([disk]) == [] + + def test_primary_future_skips(self): + disk = _make_disk(last_ownership_update_time=_ago(-1)) + assert _run([disk]) == [] + + def test_time_created_future_skips(self): + disk = _make_disk(last_ownership_update_time=None, time_created=_ago(-1)) + assert _run([disk]) == [] + + def test_both_absent_skips(self): + disk = _make_disk() + disk.last_ownership_update_time = None + disk.time_created = None + assert _run([disk]) == [] + + def test_age_exactly_7_days_emits(self): + disk = _make_disk(last_ownership_update_time=_ago(7)) + assert len(_run([disk])) == 1 + + def test_age_6_days_skips(self): + disk = _make_disk(last_ownership_update_time=_ago(6)) + assert _run([disk]) == [] + + def test_iso_string_datetime_parsed_for_time_created(self): + # Raw ARM REST payload may return ISO strings with "Z" suffix + iso_str = (_ago(10)).strftime("%Y-%m-%dT%H:%M:%SZ") + disk = _make_disk(last_ownership_update_time=None, time_created=None) + disk.time_created = iso_str + assert len(_run([disk])) == 1 + + def test_iso_string_datetime_parsed_for_last_ownership_update_time(self): + iso_str = (_ago(10)).strftime("%Y-%m-%dT%H:%M:%SZ") + disk = _make_disk(last_ownership_update_time=None, time_created=None) + disk.last_ownership_update_time = iso_str + assert len(_run([disk])) == 1 + + +# =========================================================================== +# TestFindingShape -- spec 11 +# =========================================================================== + + +class TestFindingShape: + def setup_method(self): + self.f = _one([_make_disk(disk_id="disk-abc", name="my-disk", tags={"env": "prod"})]) + + def test_provider(self): + assert self.f.provider == "azure" + + def test_rule_id(self): + assert self.f.rule_id == "azure.compute.disk.unattached" + + def test_resource_type(self): + assert self.f.resource_type == "azure.compute.disk" + + def test_resource_id(self): + assert self.f.resource_id == "disk-abc" + + def test_region(self): + assert self.f.region == "eastus" + + def test_risk_low(self): + from cleancloud.core.risk import RiskLevel + + assert self.f.risk == RiskLevel.LOW + + def test_confidence_medium(self): + from cleancloud.core.confidence import ConfidenceLevel + + assert self.f.confidence == ConfidenceLevel.MEDIUM + + def test_estimated_monthly_cost_always_none(self): + assert self.f.estimated_monthly_cost_usd is None + + def test_signals_used_count(self): + assert len(self.f.evidence.signals_used) == 8 + + def test_required_detail_keys(self): + d = self.f.details + for key in ( + "resource_name", + "subscription_id", + "disk_state", + "managed_by", + "managed_by_extended", + "max_shares", + "optimized_for_frequent_attach", + "age_anchor", + "age_days", + "sku", + "size_gb", + "tags", + ): + assert key in d, f"missing detail key: {key}" + + def test_tags_present_when_disk_has_tags(self): + assert self.f.details["tags"] == {"env": "prod"} + + def test_tags_empty_dict_when_disk_has_none_tags(self): + f = _one([_make_disk(tags=None)]) + assert f.details["tags"] == {} + + def test_age_anchor_label_in_details(self): + assert self.f.details["age_anchor"] in ("last_ownership_update_time", "time_created") + + def test_age_days_is_numeric(self): + assert isinstance(self.f.details["age_days"], float) + + +# =========================================================================== +# TestFailureBehavior -- spec 12 +# =========================================================================== + + +class TestFailureBehavior: + def test_disk_list_raises_propagates(self): + client = MagicMock() + client.disks.list.side_effect = RuntimeError("network error") + with pytest.raises(RuntimeError): + find_unattached_managed_disks(subscription_id=_SUB, credential=None, client=client) + + def test_malformed_disk_skipped_good_disk_still_emits(self): + bad = _make_disk(disk_id="") # fails id guard + good = _make_disk(disk_id="good", name="good") + assert len(_run([bad, good])) == 1 + + def test_no_findings_when_all_disks_skip(self): + disks = [ + _make_disk(managed_by="/vm/1"), + _make_disk(disk_state="Reserved"), + _make_disk(max_shares=2), + ] + assert _run(disks) == [] + + +# =========================================================================== +# TestResolveManagedBy -- unit +# =========================================================================== + + +class TestResolveManagedBy: + def test_attribute_none_is_confirmed_absent(self): + disk = SimpleNamespace(managed_by=None, properties=None) + assert _resolve_managed_by(disk) is None + + def test_attribute_nonempty_is_confirmed_attached(self): + disk = SimpleNamespace(managed_by="/subscriptions/s/vms/vm1", properties=None) + assert _resolve_managed_by(disk) == "/subscriptions/s/vms/vm1" + + def test_attribute_missing_returns_unresolvable(self): + disk = SimpleNamespace(properties=None) + assert _resolve_managed_by(disk) is _UNRESOLVABLE + + def test_sdk_none_nested_nonempty_is_conflict(self): + disk = SimpleNamespace( + managed_by=None, + properties=SimpleNamespace(managedBy="/subscriptions/s/vms/vm2"), + ) + assert _resolve_managed_by(disk) is _UNRESOLVABLE + + def test_both_absent_returns_unresolvable(self): + disk = SimpleNamespace(properties=SimpleNamespace()) + assert _resolve_managed_by(disk) is _UNRESOLVABLE + + def test_nested_snake_fallback_none_is_absent(self): + disk = SimpleNamespace(properties=SimpleNamespace(managed_by=None, managedBy=None)) + assert _resolve_managed_by(disk) is None + + +# =========================================================================== +# TestResolveManagedByExtended -- unit +# =========================================================================== + + +class TestResolveManagedByExtended: + def test_none_is_confirmed_empty(self): + disk = SimpleNamespace(managed_by_extended=None, properties=None) + assert _resolve_managed_by_extended(disk) == [] + + def test_nonempty_list_is_attached(self): + disk = SimpleNamespace(managed_by_extended=["/vm/1"], properties=None) + assert _resolve_managed_by_extended(disk) == ["/vm/1"] + + def test_absent_from_all_sources_returns_unresolvable(self): + disk = SimpleNamespace(properties=None) + assert _resolve_managed_by_extended(disk) is _UNRESOLVABLE + + def test_present_source_uncoercible_returns_unresolvable(self): + # SDK confirmed empty, nested present but non-iterable + disk = SimpleNamespace( + managed_by_extended=None, + properties=SimpleNamespace(managedByExtended=42), + ) + assert _resolve_managed_by_extended(disk) is _UNRESOLVABLE + + def test_conflict_empty_vs_nonempty_returns_unresolvable(self): + disk = SimpleNamespace( + managed_by_extended=None, + properties=SimpleNamespace(managedByExtended=["/vm/1"]), + ) + assert _resolve_managed_by_extended(disk) is _UNRESOLVABLE + + def test_both_empty_agree_returns_empty_list(self): + disk = SimpleNamespace( + managed_by_extended=None, + properties=SimpleNamespace(managedByExtended=[]), + ) + assert _resolve_managed_by_extended(disk) == [] + + +# =========================================================================== +# TestResolveMaxShares -- unit +# =========================================================================== + + +class TestResolveMaxShares: + def test_1_returns_1(self): + disk = SimpleNamespace(max_shares=1, properties=None) + assert _resolve_max_shares(disk) == 1 + + def test_2_returns_2(self): + disk = SimpleNamespace(max_shares=2, properties=None) + assert _resolve_max_shares(disk) == 2 + + def test_none_returns_none(self): + disk = SimpleNamespace(max_shares=None, properties=None) + assert _resolve_max_shares(disk) is None + + def test_malformed_string_returns_none(self): + disk = SimpleNamespace(max_shares="abc", properties=None) + assert _resolve_max_shares(disk) is None + + def test_conflict_returns_none(self): + disk = SimpleNamespace( + max_shares=1, + properties=SimpleNamespace(max_shares=None, maxShares=2), + ) + assert _resolve_max_shares(disk) is None + + def test_nested_camelcase_fallback(self): + disk = SimpleNamespace( + max_shares=None, + properties=SimpleNamespace(max_shares=None, maxShares=1), + ) + assert _resolve_max_shares(disk) == 1 + + +# =========================================================================== +# TestResolveOptimizedForFrequentAttach -- unit +# =========================================================================== + + +class TestResolveOptimizedForFrequentAttach: + def test_false_returns_false(self): + disk = SimpleNamespace(optimized_for_frequent_attach=False, properties=None) + assert _resolve_optimized_for_frequent_attach(disk) is False + + def test_true_returns_true(self): + disk = SimpleNamespace(optimized_for_frequent_attach=True, properties=None) + assert _resolve_optimized_for_frequent_attach(disk) is True + + def test_none_returns_none(self): + disk = SimpleNamespace(optimized_for_frequent_attach=None, properties=None) + assert _resolve_optimized_for_frequent_attach(disk) is None + + def test_string_false_returns_none(self): + disk = SimpleNamespace(optimized_for_frequent_attach="false", properties=None) + assert _resolve_optimized_for_frequent_attach(disk) is None + + def test_string_true_returns_none(self): + disk = SimpleNamespace(optimized_for_frequent_attach="true", properties=None) + assert _resolve_optimized_for_frequent_attach(disk) is None + + def test_integer_1_returns_none(self): + # int is not a reliable boolean + disk = SimpleNamespace(optimized_for_frequent_attach=1, properties=None) + assert _resolve_optimized_for_frequent_attach(disk) is None + + def test_conflict_returns_none(self): + disk = SimpleNamespace( + optimized_for_frequent_attach=False, + properties=SimpleNamespace( + optimized_for_frequent_attach=None, + optimizedForFrequentAttach=True, + ), + ) + assert _resolve_optimized_for_frequent_attach(disk) is None + + def test_nested_camelcase_false_fallback(self): + disk = SimpleNamespace( + optimized_for_frequent_attach=None, + properties=SimpleNamespace( + optimized_for_frequent_attach=None, + optimizedForFrequentAttach=False, + ), + ) + assert _resolve_optimized_for_frequent_attach(disk) is False + + +# =========================================================================== +# TestCoerceDatetime -- unit +# =========================================================================== + + +class TestCoerceDatetime: + def test_none_returns_none(self): + assert _coerce_datetime(None) is None + + def test_aware_datetime_returned_unchanged(self): + dt = datetime(2024, 1, 15, 10, 0, tzinfo=timezone.utc) + assert _coerce_datetime(dt) == dt + + def test_naive_datetime_gets_utc(self): + dt = datetime(2024, 1, 15, 10, 0) + result = _coerce_datetime(dt) + assert result.tzinfo == timezone.utc + + def test_iso_string_z_suffix(self): + result = _coerce_datetime("2024-01-15T10:30:00Z") + assert result is not None + assert result.tzinfo is not None + assert result.year == 2024 and result.month == 1 and result.day == 15 + + def test_iso_string_plus_offset(self): + result = _coerce_datetime("2024-01-15T10:30:00+00:00") + assert result is not None + assert result.tzinfo is not None + + def test_unparseable_string_returns_none(self): + assert _coerce_datetime("not-a-date") is None + + def test_integer_returns_none(self): + assert _coerce_datetime(12345) is None + + +# =========================================================================== +# TestResolveAgeAnchor -- unit +# =========================================================================== + + +class TestResolveAgeAnchor: + def _now(self): + return datetime.now(timezone.utc) + + def test_last_ownership_update_time_returned_as_primary(self): + now = self._now() + lowt = now - timedelta(days=10) + disk = SimpleNamespace( + last_ownership_update_time=lowt, + time_created=now - timedelta(days=200), + properties=None, + ) + label, days = _resolve_age_anchor(disk, now) + assert label == "last_ownership_update_time" + assert 9.9 < days < 10.1 + + def test_time_created_used_when_primary_absent(self): + now = self._now() + disk = SimpleNamespace( + last_ownership_update_time=None, + time_created=now - timedelta(days=10), + properties=None, + ) + label, days = _resolve_age_anchor(disk, now) + assert label == "time_created" + assert 9.9 < days < 10.1 + + def test_primary_invalid_returns_none_no_fallback(self): + now = self._now() + disk = SimpleNamespace( + last_ownership_update_time="bad-value", + time_created=now - timedelta(days=200), + properties=None, + ) + assert _resolve_age_anchor(disk, now) is None - assert "disk-1" in resource_ids - assert "disk-2" not in resource_ids - assert "disk-3" not in resource_ids + def test_primary_future_returns_none(self): + now = self._now() + disk = SimpleNamespace( + last_ownership_update_time=now + timedelta(days=1), + time_created=now - timedelta(days=200), + properties=None, + ) + assert _resolve_age_anchor(disk, now) is None - # Verify cost estimate (128 GB * $0.10/GB) - finding = [f for f in findings if f.resource_id == "disk-1"][0] - assert finding.estimated_monthly_cost_usd == 12.8 + def test_both_absent_returns_none(self): + now = self._now() + disk = SimpleNamespace( + last_ownership_update_time=None, + time_created=None, + properties=None, + ) + assert _resolve_age_anchor(disk, now) is None From afc97a339dc50dd7cba167303780c94de22e8b4d Mon Sep 17 00:00:00 2001 From: Suresh Mandalapu Date: Thu, 23 Apr 2026 21:54:05 +0100 Subject: [PATCH 2/5] azure.resource.untagged --- .../azure/rules/untagged_resources.py | 498 +++++++-- docs/rules/azure.md | 8 +- docs/specs/azure/untagged_resources.md | 355 +++++++ .../azure/test_azure_untagged_resources.py | 946 +++++++++++++++++- 4 files changed, 1710 insertions(+), 97 deletions(-) create mode 100644 docs/specs/azure/untagged_resources.md diff --git a/cleancloud/providers/azure/rules/untagged_resources.py b/cleancloud/providers/azure/rules/untagged_resources.py index f1b52cc..26d7967 100644 --- a/cleancloud/providers/azure/rules/untagged_resources.py +++ b/cleancloud/providers/azure/rules/untagged_resources.py @@ -1,3 +1,39 @@ +""" +Rule: azure.resource.untagged + +Intent: + Detect Azure managed disks and snapshots that currently have zero direct + resource tags and have remained in that state long enough to be conservative + governance review candidates. + + This is a read-only hygiene rule. It is not a waste rule, not proof that a + resource violates a mandatory tagging policy, and not proof that a resource + is safe to delete. + +Exclusions: + - id absent or empty + - name absent or empty + - outside optional region filter (exact lowercase match) + - provisioning state does not resolve to exactly "Succeeded" + - direct tag state is unknown or cannot be resolved reliably + - direct current tag count > 0 (resource is tagged) + - resource age is unknown, invalid, in the future, or less than 7 days + +Detection: + - provisioning state is "Succeeded" + - direct resource tags resolve to zero (None or {}) + - resource age >= 7 days (time_created; SDK-first with nested fallback) + +Cost model (spec 10): + estimated_monthly_cost_usd = None (always) + Missing tags are a governance / allocability metadata issue, not a canonical + Azure price signal. + +APIs: + - Microsoft.Compute/disks/read (compute_client.disks.list) + - Microsoft.Compute/snapshots/read (compute_client.snapshots.list) +""" + from datetime import datetime, timezone from typing import List, Optional @@ -8,31 +44,279 @@ from cleancloud.core.finding import Finding from cleancloud.core.risk import RiskLevel -MIN_SNAPSHOT_AGE_DAYS = 7 +_RULE_ID = "azure.resource.untagged" +_MIN_UNTAGGED_AGE_DAYS = 7 + +# Sentinel: field cannot be determined reliably (absent from payload OR +# conflicting across control-plane surfaces). Callers must skip. +_UNRESOLVABLE = object() + + +def _norm_location(s: str) -> str: + """Lowercase only -- exact lowercase match per spec 7.""" + return s.lower() if s else "" + + +# --------------------------------------------------------------------------- +# SDK-first / nested-fallback resolvers with conflict detection +# --------------------------------------------------------------------------- + + +def _resolve_provisioning_state(resource) -> Optional[str]: + """ + Resolve provisioning state per spec 9.2. + + Reads both SDK projection and nested fallback. Returns None on conflict + (both non-None but different values) or when both absent. + + Only "Succeeded" is eligible for evaluation; caller skips on anything else. + """ + sdk_val = getattr(resource, "provisioning_state", None) + + props = getattr(resource, "properties", None) + nested_val = None + if props is not None: + nested_val = getattr(props, "provisioning_state", None) + if nested_val is None: + nested_val = getattr(props, "provisioningState", None) + + if sdk_val is not None and nested_val is not None and sdk_val != nested_val: + return None # conflict -> skip + + return sdk_val or nested_val + + +def _resolve_tags(resource): + """ + Resolve direct resource tag state per spec 9.3. + + Returns: + - dict (may be empty {}): tag state resolved; caller checks len() + - _UNRESOLVABLE: field missing entirely or non-mapping non-None value -> skip + + Required behavior: + - None value -> {} (zero direct tags) + - {} empty mapping -> {} (zero direct tags) + - non-empty mapping -> return it (tagged -> caller skips) + - field missing -> _UNRESOLVABLE (unknown -> skip) + - non-mapping, non-None -> _UNRESOLVABLE (unresolved -> skip) + """ + raw = getattr(resource, "tags", _UNRESOLVABLE) + if raw is _UNRESOLVABLE: + return _UNRESOLVABLE # field absent entirely -> unknown + if raw is None: + return {} # confirmed: no tags + if isinstance(raw, dict): + return raw # may be empty or non-empty; caller decides + return _UNRESOLVABLE # non-mapping non-None -> unresolved -> skip + + +def _coerce_datetime(val) -> Optional[datetime]: + """ + Coerce val to a timezone-aware UTC datetime. + + Accepts datetime objects and ISO 8601 strings (Azure REST payloads + use "Z" or "+00:00" suffixes). Returns None for None, unsupported + types, or unparseable strings. + """ + if val is None: + return None + if isinstance(val, datetime): + if val.tzinfo is None: + return val.replace(tzinfo=timezone.utc) + return val + if isinstance(val, str): + try: + # Python 3.7-3.9 fromisoformat does not support "Z"; normalize first + dt = datetime.fromisoformat(val.replace("Z", "+00:00")) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + return dt + except (ValueError, AttributeError): + return None + return None + + +def _resolve_time_created(resource) -> Optional[datetime]: + """ + Resolve creation time per spec 9.4. + + Priority: + 1. SDK projection: resource.time_created + 2. nested/raw properties: resource.properties.timeCreated + + Fail-closed: if a surface is present but unparseable or invalid, return + None immediately -- do not fall back to the other surface. + If both present and valid but conflict materially (> 60s difference) -> None (skip). + If absent from all sources -> None (skip). + """ + + def _get_nested(props, snake, camel): + val = getattr(props, snake, None) + if val is None: + val = getattr(props, camel, None) + return val + + sdk_raw = getattr(resource, "time_created", None) + + props = getattr(resource, "properties", None) + nested_raw = None + if props is not None: + nested_raw = _get_nested(props, "time_created", "timeCreated") + # If a surface is present, it must parse cleanly -- no cross-surface fallback + if sdk_raw is not None: + sdk_dt = _coerce_datetime(sdk_raw) + if sdk_dt is None: + return None # present but invalid/unparseable -> skip + else: + sdk_dt = None -def _age_in_days(created_at: datetime) -> int: - return (datetime.now(timezone.utc) - created_at).days + if nested_raw is not None: + nested_dt = _coerce_datetime(nested_raw) + if nested_dt is None: + return None # present but invalid/unparseable -> skip + else: + nested_dt = None + + # Conflict: both present and valid but disagree materially + if sdk_dt is not None and nested_dt is not None: + if abs((sdk_dt - nested_dt).total_seconds()) > 60: + return None # conflict -> skip + + return sdk_dt if sdk_dt is not None else nested_dt + + +def _resolve_disk_attachment_context(disk) -> str: + """ + Resolve disk attachment context for confidence determination per spec 9.5. + + This is confidence-only context; it never gates emission. + + Returns: + - "unattached": disk_state == "Unattached", managed_by confirmed absent, + managed_by_extended confirmed empty -> MEDIUM confidence + - "attached": any attachment surface confirmed present + - "special_state": disk_state present but not "Unattached" or "Attached" + - "unresolved": any required surface is unknown or conflicting + + Confidence is MEDIUM only when this returns "unattached". + """ + props = getattr(disk, "properties", None) + + # --- disk_state --- + sdk_ds = getattr(disk, "disk_state", None) + nested_ds = None + if props is not None: + nested_ds = getattr(props, "disk_state", None) + if nested_ds is None: + nested_ds = getattr(props, "diskState", None) + if sdk_ds is not None and nested_ds is not None and sdk_ds != nested_ds: + disk_state = None # conflict + else: + disk_state = sdk_ds or nested_ds + + # --- managed_by --- + sdk_mb = getattr(disk, "managed_by", _UNRESOLVABLE) + nested_mb = _UNRESOLVABLE + if props is not None: + nested_mb = getattr(props, "managed_by", _UNRESOLVABLE) + if nested_mb is _UNRESOLVABLE: + nested_mb = getattr(props, "managedBy", _UNRESOLVABLE) + sdk_mb_found = sdk_mb is not _UNRESOLVABLE + nested_mb_found = nested_mb is not _UNRESOLVABLE + + if sdk_mb_found and nested_mb_found: + sdk_eff = sdk_mb or None + nested_eff = nested_mb or None + if sdk_eff != nested_eff: + return "unresolved" # conflict + managed_by = sdk_eff + elif sdk_mb_found: + managed_by = sdk_mb or None + elif nested_mb_found: + managed_by = nested_mb or None + else: + managed_by = _UNRESOLVABLE + + if managed_by is _UNRESOLVABLE: + return "unresolved" + if managed_by: + return "attached" + + # --- managed_by_extended --- + def _to_list(raw): + if raw is None: + return [] + if raw is _UNRESOLVABLE: + return _UNRESOLVABLE + try: + return list(raw) + except TypeError: + return _UNRESOLVABLE + + sdk_mbe_raw = getattr(disk, "managed_by_extended", _UNRESOLVABLE) + nested_mbe_raw = _UNRESOLVABLE + if props is not None: + nested_mbe_raw = getattr(props, "managed_by_extended", _UNRESOLVABLE) + if nested_mbe_raw is _UNRESOLVABLE: + nested_mbe_raw = getattr(props, "managedByExtended", _UNRESOLVABLE) + sdk_mbe_found = sdk_mbe_raw is not _UNRESOLVABLE + nested_mbe_found = nested_mbe_raw is not _UNRESOLVABLE + + if not sdk_mbe_found and not nested_mbe_found: + mbe = _UNRESOLVABLE + else: + sdk_mbe = _to_list(sdk_mbe_raw) if sdk_mbe_found else _UNRESOLVABLE + nested_mbe = _to_list(nested_mbe_raw) if nested_mbe_found else _UNRESOLVABLE + if (sdk_mbe_found and sdk_mbe is _UNRESOLVABLE) or ( + nested_mbe_found and nested_mbe is _UNRESOLVABLE + ): + mbe = _UNRESOLVABLE + elif sdk_mbe_found and nested_mbe_found: + if bool(sdk_mbe) != bool(nested_mbe): + mbe = _UNRESOLVABLE + else: + mbe = sdk_mbe + elif sdk_mbe_found: + mbe = sdk_mbe + else: + mbe = nested_mbe + + if mbe is _UNRESOLVABLE: + return "unresolved" + if mbe: + return "attached" + + # --- disk_state final check --- + if disk_state is None: + return "unresolved" + if disk_state == "Unattached": + return "unattached" + if disk_state in ("Attached", "Reserved"): + return "attached" + return "special_state" def find_untagged_resources( + *, subscription_id: str, credential, region_filter: str = None, client: Optional[ComputeManagementClient] = None, ) -> List[Finding]: """ - Find untagged Azure resources (disks, snapshots). + Find Azure managed disks and snapshots with zero direct resource tags. - Conservative rule (review-only): - - Only checks presence of tags - - Does NOT infer intended usage or IaC ownership + Detection requires (for each resource): + - provisioning state resolves to exactly "Succeeded" + - direct resource tag state resolves reliably and is zero + - resource age >= 7 days (time_created; SDK-first with nested fallback) IAM permissions: - Microsoft.Compute/disks/read - Microsoft.Compute/snapshots/read """ - findings: List[Finding] = [] compute_client = client or ComputeManagementClient( @@ -40,50 +324,112 @@ def find_untagged_resources( subscription_id=subscription_id, ) + now = datetime.now(timezone.utc) + # ====================== # Managed Disks # ====================== for disk in compute_client.disks.list(): - if region_filter and (disk.location or "").lower() != region_filter.lower(): + # spec 8.2: id must be present and non-empty + disk_id = getattr(disk, "id", None) + if not disk_id: continue - if disk.tags: + # spec 8.3: name must be present and non-empty + disk_name = getattr(disk, "name", None) + if not disk_name: continue - confidence_value = ( - ConfidenceLevel.MEDIUM if disk.managed_by is None else ConfidenceLevel.LOW - ) + # spec 8.4: region filter -- exact lowercase match + location = _norm_location(getattr(disk, "location", "") or "") + if region_filter and location != _norm_location(region_filter): + continue + + # spec 8.5 / 9.2: provisioning state must resolve to exactly "Succeeded" + prov_state = _resolve_provisioning_state(disk) + if prov_state != "Succeeded": + continue + + # spec 8.6 / 9.3: direct tag state must resolve reliably + tags = _resolve_tags(disk) + if tags is _UNRESOLVABLE: + continue + + # spec 8.7: skip if tagged + if tags: + continue + + # spec 8.8 / 9.4: resource age must resolve and be >= 7 days + time_created = _resolve_time_created(disk) + if time_created is None or time_created > now: + continue + age_days = (now - time_created).total_seconds() / 86400 + if age_days < _MIN_UNTAGGED_AGE_DAYS: + continue - evidence = Evidence( - signals_used=["No tags found on disk"], - signals_not_checked=[ - "Planned VM attachment", - "IaC-managed intent", - "Application-level usage", - "Disaster recovery or backup planning", - ], - time_window=None, + # spec 9.5: disk attachment context for confidence (never gates emission) + attachment_context = _resolve_disk_attachment_context(disk) + confidence = ( + ConfidenceLevel.MEDIUM if attachment_context == "unattached" else ConfidenceLevel.LOW ) + if attachment_context == "unattached": + confidence_note = ( + "Attachment context appears ordinarily unattached (strengthened to MEDIUM)" + ) + else: + confidence_note = f"Attachment context: {attachment_context} (confidence remains LOW)" + + # Best-effort context values for details (spec 11.4) + disk_state_detail = getattr(disk, "disk_state", None) + managed_by_detail = getattr(disk, "managed_by", None) + managed_by_extended_detail = getattr(disk, "managed_by_extended", None) findings.append( Finding( provider="azure", - rule_id="azure.resource.untagged", + rule_id=_RULE_ID, resource_type="azure.compute.disk", - resource_id=disk.id, - region=disk.location, - title="Untagged Azure managed disk", - summary="Disk has no tags", - reason="No tags found on resource", + resource_id=disk_id, + region=location, + estimated_monthly_cost_usd=None, # spec 10: always None + title="Untagged Azure Managed Disk", + summary=( + f"Managed disk '{disk_name}' has zero direct tags " + f"and is {age_days:.0f} days old" + ), + reason="No direct tags found on managed disk resource", risk=RiskLevel.LOW, - confidence=confidence_value, - detected_at=datetime.now(timezone.utc), - evidence=evidence, + confidence=confidence, + detected_at=now, + evidence=Evidence( + signals_used=[ + "Supported resource family: managed_disk", + "Provisioning state is 'Succeeded'", + "Direct resource tags resolved to zero current tags", + f"Resource age: {age_days:.1f} days (>= {_MIN_UNTAGGED_AGE_DAYS} days)", + confidence_note, + ], + signals_not_checked=[ + "Required tag-key or tag-value policy compliance", + "Azure Policy remediation status", + "Resource-group or subscription tag intent", + "IaC-managed ownership intent", + "Future planned usage or DR / backup purpose", + ], + time_window=None, + ), details={ - "resource_name": disk.name, + "resource_name": disk_name, "subscription_id": subscription_id, + "resource_family": "managed_disk", "tags_present": False, - "managed_by": disk.managed_by, + "current_tag_count": 0, + "age_days": round(age_days, 1), + "provisioning_state": prov_state, + "tags": tags, + "disk_state": disk_state_detail, + "managed_by": managed_by_detail, + "managed_by_extended": managed_by_extended_detail, }, ) ) @@ -92,49 +438,85 @@ def find_untagged_resources( # Snapshots # ====================== for snap in compute_client.snapshots.list(): - if region_filter and (snap.location or "").lower() != region_filter.lower(): + # spec 8.2: id must be present and non-empty + snap_id = getattr(snap, "id", None) + if not snap_id: continue - if snap.tags: + # spec 8.3: name must be present and non-empty + snap_name = getattr(snap, "name", None) + if not snap_name: continue - if not snap.time_created: + # spec 8.4: region filter -- exact lowercase match + location = _norm_location(getattr(snap, "location", "") or "") + if region_filter and location != _norm_location(region_filter): continue - age_days = _age_in_days(snap.time_created) - if age_days < MIN_SNAPSHOT_AGE_DAYS: + # spec 8.5 / 9.2: provisioning state must resolve to exactly "Succeeded" + prov_state = _resolve_provisioning_state(snap) + if prov_state != "Succeeded": continue - evidence = Evidence( - signals_used=[f"No tags found on snapshot, age {age_days} days"], - signals_not_checked=[ - "Disk usage by applications", - "IaC-managed ownership", - "Disaster recovery or backup planning", - "Future planned usage", - ], - time_window=f">={MIN_SNAPSHOT_AGE_DAYS} days", - ) + # spec 8.6 / 9.3: direct tag state must resolve reliably + tags = _resolve_tags(snap) + if tags is _UNRESOLVABLE: + continue + + # spec 8.7: skip if tagged + if tags: + continue + + # spec 8.8 / 9.4: resource age must resolve and be >= 7 days + time_created = _resolve_time_created(snap) + if time_created is None or time_created > now: + continue + age_days = (now - time_created).total_seconds() / 86400 + if age_days < _MIN_UNTAGGED_AGE_DAYS: + continue findings.append( Finding( provider="azure", - rule_id="azure.resource.untagged", + rule_id=_RULE_ID, resource_type="azure.compute.snapshot", - resource_id=snap.id, - region=snap.location, - title="Untagged Azure snapshot", - summary="Snapshot has no tags", - reason="No tags found on resource", + resource_id=snap_id, + region=location, + estimated_monthly_cost_usd=None, # spec 10: always None + title="Untagged Azure Snapshot", + summary=( + f"Snapshot '{snap_name}' has zero direct tags " + f"and is {age_days:.0f} days old" + ), + reason="No direct tags found on snapshot resource", risk=RiskLevel.LOW, confidence=ConfidenceLevel.LOW, - detected_at=datetime.now(timezone.utc), - evidence=evidence, + detected_at=now, + evidence=Evidence( + signals_used=[ + "Supported resource family: snapshot", + "Provisioning state is 'Succeeded'", + "Direct resource tags resolved to zero current tags", + f"Resource age: {age_days:.1f} days (>= {_MIN_UNTAGGED_AGE_DAYS} days)", + ], + signals_not_checked=[ + "Required tag-key or tag-value policy compliance", + "Azure Policy remediation status", + "Resource-group or subscription tag intent", + "IaC-managed ownership intent", + "Future planned usage or DR / backup purpose", + ], + time_window=f">={_MIN_UNTAGGED_AGE_DAYS} days", + ), details={ - "resource_name": snap.name, + "resource_name": snap_name, "subscription_id": subscription_id, + "resource_family": "snapshot", "tags_present": False, - "age_days": age_days, + "current_tag_count": 0, + "age_days": round(age_days, 1), + "provisioning_state": prov_state, + "tags": tags, }, ) ) diff --git a/docs/rules/azure.md b/docs/rules/azure.md index 5eca111..ab11c39 100644 --- a/docs/rules/azure.md +++ b/docs/rules/azure.md @@ -188,17 +188,17 @@ ## Governance #### `azure.resource.untagged` -**Detects:** Managed disks and snapshots with zero tags +**Detects:** Managed disks and snapshots with zero direct resource tags and resource age >= 7 days -**Confidence / Risk:** MEDIUM (untagged + unattached disk); LOW (untagged snapshot or attached disk) / LOW +**Confidence / Risk:** MEDIUM (untagged disk whose attachment context resolves conservatively as ordinarily unattached); LOW (untagged snapshot or disk with attached/unresolved attachment context) / LOW **Permissions:** `Microsoft.Compute/disks/read`, `Microsoft.Compute/snapshots/read` **Params:** none -**Exclusions:** disks younger than 7 days +**Exclusions:** `provisioning_state != "Succeeded"`; direct tag state unresolvable (field missing or non-mapping non-None value); resource has at least one direct tag; resource age < 7 days or `time_created` unresolvable, invalid, or in the future; conflicting SDK vs nested provisioning-state or time-created signals -**Spec:** — +**Spec:** [specs/azure/untagged_resources.md](../specs/azure/untagged_resources.md) --- diff --git a/docs/specs/azure/untagged_resources.md b/docs/specs/azure/untagged_resources.md new file mode 100644 index 0000000..72eb0f3 --- /dev/null +++ b/docs/specs/azure/untagged_resources.md @@ -0,0 +1,355 @@ +# Azure Rule Spec — `azure.resource.untagged` + +## 1. Rule Identity + +- **Rule ID:** `azure.resource.untagged` +- **Provider:** Azure +- **Supported ARM resource types:** `Microsoft.Compute/disks`, `Microsoft.Compute/snapshots` +- **Finding resource_type values:** `azure.compute.disk`, `azure.compute.snapshot` + +--- + +## 2. Intent + +Detect supported Azure compute resources that currently have **zero direct resource tags** and have remained in that state long enough to be conservative governance review candidates. + +This is a **read-only hygiene rule**. It is **not** a waste rule, **not** proof that a resource violates a mandatory tagging policy, and **not** proof that a resource is safe to delete. + +--- + +## 3. Azure Documentation Grounding + +### 3.1 Azure tag semantics + +Microsoft documents that Azure tags are **key-value metadata** applied to resources, resource groups, and subscriptions. + +Microsoft also documents that: + +- tag names are case-insensitive for operations +- tag values are case-sensitive +- tags are stored as plain text +- resources do **not** inherit tags from their resource group or subscription unless separate policy/remediation mechanisms are used + +Source: *Use tags to organize your Azure resources and management hierarchy* +URL: https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/tag-resources + +Rule consequence: + +1. This rule must evaluate only whether the **resource itself** currently has direct tags. +2. Resource-group or subscription tags must **not** be treated as equivalent to resource tags. +3. The rule must not evaluate required tag keys, tag values, or tag-policy compliance. + +### 3.2 Tag support and policy remediation + +Microsoft documents that: + +- not all Azure resource types support tags +- Azure Policy can add, replace, or inherit tags during create/update +- remediation tasks can apply tags to existing resources later + +Sources: + +- *Tag support for Azure resources* +- *Tutorial: Govern tag compliance using Azure Policy* + +URLs: + +- https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/tag-support +- https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/tag-policies + +Rule consequence: + +1. This rule must stay limited to resource families with documented tag support and inventory contracts. +2. A conservative age buffer is appropriate to reduce transient post-deployment or pending-remediation noise. + +### 3.3 Managed disk inventory shape + +Microsoft Compute REST and SDK documentation for `Microsoft.Compute/disks` shows list/inventory objects include fields such as: + +- `id` +- `name` +- `location` +- `tags` +- `provisioningState` +- `timeCreated` +- `managedBy` +- `managedByExtended` +- `diskState` + +Sources: + +- *Disks - List (REST API)* +- *azure.mgmt.compute.models.Disk* + +URLs: + +- https://learn.microsoft.com/en-us/rest/api/compute/disks/list?view=rest-compute-2025-04-01 +- https://learn.microsoft.com/en-us/python/api/azure-mgmt-compute/azure.mgmt.compute.models.disk?view=azure-python + +Rule consequence: + +1. Direct disk tag state can be evaluated from the disk resource payload itself. +2. Disk attachment state may be used as **context** for confidence, but it must not change whether a reliably untagged disk is emitted. + +### 3.4 Snapshot inventory shape + +Microsoft Compute REST and SDK documentation for `Microsoft.Compute/snapshots` shows list/inventory objects include fields such as: + +- `id` +- `name` +- `location` +- `tags` +- `provisioningState` +- `timeCreated` + +Sources: + +- *Snapshots - List (REST API)* +- *azure.mgmt.compute.models.Snapshot* + +URLs: + +- https://learn.microsoft.com/en-us/rest/api/compute/snapshots/list?view=rest-compute-2025-04-01 +- https://learn.microsoft.com/en-us/python/api/azure-mgmt-compute/azure.mgmt.compute.models.snapshot?view=azure-python + +Rule consequence: + +Direct snapshot tag state can be evaluated from the snapshot resource payload itself. + +--- + +## 4. Detection Goal + +Emit a finding only when **all** of the following are true: + +1. the resource belongs to a supported family: managed disk or snapshot +2. `resource.id` is present and non-empty +3. `resource.name` is present and non-empty +4. the optional region filter matches the normalized location +5. `provisioning_state` resolves to exactly `"Succeeded"` +6. direct resource tag state resolves reliably +7. direct current tag count is `0` +8. resource age resolves reliably and is at least `7` days + +If any required signal cannot be established reliably, skip rather than emit. + +--- + +## 5. Non-Goals + +This rule does **not** attempt to prove: + +- that the resource is unused or wasteful +- that deleting the resource is safe +- that the resource violates a required-tag policy +- that inherited resource-group or subscription tags are absent +- that Azure Policy remediation is or is not pending +- that a specific monthly saving exists + +--- + +## 6. Canonical Inputs + +### 6.1 Required control-plane surfaces + +The implementation may use: + +- `compute_client.disks.list()` +- `compute_client.snapshots.list()` + +It must not require: + +- Azure Resource Graph +- Resource Groups Tagging APIs +- Azure Policy compliance APIs +- resource group or subscription tag lookups + +### 6.2 Untagged-age threshold + +- Configurable parameter: none +- Fixed threshold: `min_untagged_age_days = 7` + +Reason: + +- Azure Policy remediation and deployment workflows can apply tags after initial creation/update. +- A short fixed buffer reduces transient governance noise without changing the factual tag contract. + +--- + +## 7. Normalization Contract + +| Field | Normalization | +|---|---| +| `resource_family` | One of `managed_disk` or `snapshot`; any other family is out of scope. | +| `location` | Lowercase ARM location string; compare by exact lowercase string equality only. Do not remove spaces, hyphens, or digits. | +| `provisioning_state` | Compare case-sensitively to canonical Azure value `"Succeeded"` after SDK/raw resolution. Values such as `"SucceededWithWarnings"` or `"succeeded"` are not equivalent. | +| `tags` | Direct resource tags only. `None` or empty mapping means zero direct tags. Non-empty mapping means tagged. Missing tag field or non-mapping non-`None` values are unresolved and must skip. | +| `time_created` | Parse as UTC instant. If absent, invalid, or in the future, age is unknown. | +| `managed_by` | Disk-only context. Non-empty value indicates direct VM attachment context. | +| `managed_by_extended` | Disk-only context. Any non-empty collection / payload indicates shared-attachment context. | +| `disk_state` | Disk-only context. Compare case-sensitively to Azure values such as `"Unattached"` after SDK/raw resolution when used for confidence context. | + +--- + +## 8. Unified Decision Rule + +| # | Condition | Action | +|---|---|---| +| 8.1 | Unsupported family | Skip | +| 8.2 | `id` absent, `None`, or empty | Skip | +| 8.3 | `name` absent, `None`, or empty | Skip | +| 8.4 | Region filter set and normalized location does not match | Skip | +| 8.5 | `provisioning_state` does not resolve to `"Succeeded"` | Skip | +| 8.6 | Direct tag state is unknown or cannot be resolved reliably | Skip | +| 8.7 | Direct current tag count is greater than `0` | Skip | +| 8.8 | Resource age is unknown, invalid, in the future, or less than `7` days | Skip | +| 8.9 | All required signals resolve and direct current tag count is `0` for a supported resource | **EMIT** | + +--- + +## 9. Canonical Evaluation Contracts + +### 9.1 Supported-scope contract + +This rule is intentionally limited to: + +1. Azure managed disks +2. Azure snapshots + +It must not claim coverage of all Azure resources or all taggable Azure resource types. + +### 9.2 Provisioning-state contract + +Resolve provisioning state in this order: + +1. SDK projection such as `resource.provisioning_state` +2. nested/raw properties projection such as `resource.properties.provisioningState` +3. otherwise unknown + +Required behavior: + +1. Only `"Succeeded"` is eligible for evaluation. +2. Values such as `"SucceededWithWarnings"` or `"succeeded"` are not equivalent to `"Succeeded"` and must skip. +3. If SDK and nested/raw values both exist and conflict, the resource must skip. +4. Unknown or any other value must skip. + +### 9.3 Direct-tag contract + +The authoritative tag source for this rule is the **direct resource `tags` payload** on the disk or snapshot inventory object. + +Required behavior: + +1. Prefer the SDK/resource field such as `resource.tags`. +2. Treat `None` as zero direct tags. +3. Treat an empty mapping such as `{}` as zero direct tags. +4. Treat a non-empty mapping as tagged. +5. If the tag field is missing entirely or is a non-mapping non-`None` value, tag state is unresolved and the resource must skip. +6. Do **not** synthesize tag state from resource-group tags, subscription tags, Azure Policy assignments, or inheritance assumptions. + +### 9.4 Age contract + +Resolve creation time in this order: + +1. SDK projection such as `resource.time_created` +2. nested/raw properties projection such as `resource.properties.timeCreated` +3. otherwise unknown + +Required behavior: + +1. If SDK and nested/raw creation times both exist and conflict materially, the resource should skip rather than guess. +2. If creation time is absent, invalid, unparseable, or in the future, the resource must skip. +3. Emit only when `now - creation_time >= 7 days`. + +### 9.5 Disk attachment context contract + +Disk attachment context is **confidence-only context**, not a baseline eligibility requirement. + +Use disk attachment context only to distinguish: + +1. a directly untagged disk that also appears ordinarily unattached +2. a directly untagged disk whose attachment context is attached, special-state, or unresolved + +Required behavior: + +1. A reliably untagged disk that satisfies the baseline rule must still emit even if attachment context is attached or unresolved. +2. Higher confidence is allowed only when unattached context resolves conservatively. +3. If disk attachment context indicates the disk is attached, special-state, or unresolved, confidence must remain `LOW`. +4. Snapshot findings do not use disk attachment context. + +--- + +## 10. Cost Model + +`estimated_monthly_cost_usd = None` + +Missing tags are a governance / allocability metadata issue, not a canonical Azure price signal. + +--- + +## 11. Finding Shape + +### 11.1 Required fields + +| Field | Value | +|---|---| +| `provider` | `"azure"` | +| `rule_id` | `"azure.resource.untagged"` | +| `resource_type` | `azure.compute.disk` or `azure.compute.snapshot` | +| `resource_id` | Original ARM id from the resource object | +| `region` | Normalized location | +| `risk` | `LOW` | +| `estimated_monthly_cost_usd` | `None` | + +### 11.2 Confidence contract + +- **Managed disk:** `MEDIUM` only when the resource is reliably untagged **and** ordinary unattached-disk context resolves conservatively +- **Managed disk:** otherwise `LOW` +- **Snapshot:** `LOW` + +### 11.3 Required evidence + +`signals_used` must clearly disclose: + +1. supported resource family +2. provisioning state is `"Succeeded"` +3. direct resource tags resolved to zero current tags +4. resource age in days and the `>= 7 days` threshold +5. for disks, whether attachment context appears ordinarily unattached, attached, special-state, or unresolved +6. for disks, whether attachment context strengthened confidence or remained context-only + +`signals_not_checked` should include remaining blind spots such as: + +1. required tag-key or tag-value policy compliance +2. Azure Policy remediation status +3. resource-group or subscription tag intent +4. IaC-managed ownership intent +5. future planned usage or DR / backup purpose + +### 11.4 Required details + +Details should include at least: + +- `resource_name` +- `subscription_id` +- `resource_family` +- `tags_present` +- `current_tag_count` +- `age_days` +- `provisioning_state` +- `tags` + +For disks, details may also include: + +- `disk_state` +- `managed_by` +- `managed_by_extended` + +--- + +## 12. Failure Behavior + +- If `disks.list()` raises, let the exception propagate for the disk pass +- If `snapshots.list()` raises, let the exception propagate for the snapshot pass +- If an individual resource record is malformed or missing required fields, skip that resource +- Do not emit on partial or unresolved direct-tag state +- Do not infer tag presence from parent scopes diff --git a/tests/cleancloud/providers/azure/test_azure_untagged_resources.py b/tests/cleancloud/providers/azure/test_azure_untagged_resources.py index 9671b0b..b654c33 100644 --- a/tests/cleancloud/providers/azure/test_azure_untagged_resources.py +++ b/tests/cleancloud/providers/azure/test_azure_untagged_resources.py @@ -1,48 +1,924 @@ -from datetime import datetime, timezone +""" +Tests for azure.resource.untagged -- spec-aligned. + +Covers: must-emit (disk + snapshot), must-skip, id/name guards, region filter, + provisioning-state contract, tag contract, age contract (SDK/nested/conflict/ + invalid fail-closed), disk confidence contract (attachment context), + snapshot confidence, finding shape, failure behavior, + resolver unit tests (_resolve_provisioning_state, _resolve_tags, + _resolve_time_created, _coerce_datetime, _resolve_disk_attachment_context). +""" + +from datetime import datetime, timedelta, timezone from types import SimpleNamespace +from unittest.mock import MagicMock import pytest -from cleancloud.providers.azure.rules.untagged_resources import find_untagged_resources +from cleancloud.providers.azure.rules.untagged_resources import ( + _UNRESOLVABLE, + _coerce_datetime, + _resolve_disk_attachment_context, + _resolve_provisioning_state, + _resolve_tags, + _resolve_time_created, + find_untagged_resources, +) +# --------------------------------------------------------------------------- +# Shared constants +# --------------------------------------------------------------------------- -@pytest.fixture -def mock_compute_client(mocker): - # Mock disks.list() - disk_old = SimpleNamespace( - id="disk-1", - name="disk-old", - location="eastus", - managed_by=None, - time_created=datetime.now(timezone.utc), - sku=SimpleNamespace(name="Standard_LRS"), - tags=None, - ) - disk_attached = SimpleNamespace( - id="disk-2", - name="disk-attached", - location="eastus", - managed_by="/subscriptions/xxx/resourceGroups/rg/providers/Microsoft.Compute/virtualMachines/vm1", - time_created=datetime.now(timezone.utc), - sku=None, - tags={"env": "prod"}, +_SUB = "sub-123" +_MIN_AGE = 7 # days + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _ago(days: float) -> datetime: + """Return a UTC datetime `days` ago (negative = future).""" + return datetime.now(timezone.utc) - timedelta(days=days) + + +def _make_disk( + disk_id="disk-1", + name="my-disk", + location="eastus", + provisioning_state="Succeeded", + disk_state="Unattached", + managed_by=None, + managed_by_extended=None, + time_created=None, + tags=None, + properties=None, +) -> SimpleNamespace: + """ + Build a fully qualifying disk SimpleNamespace. + Defaults to time_created=10 days ago, so the disk passes the 7-day + age check out of the box. + """ + if time_created is None: + time_created = _ago(10) + return SimpleNamespace( + id=disk_id, + name=name, + location=location, + provisioning_state=provisioning_state, + disk_state=disk_state, + managed_by=managed_by, + managed_by_extended=managed_by_extended, + time_created=time_created, + tags=tags, + properties=properties, ) - disks_client = mocker.MagicMock() - disks_client.list.return_value = [disk_old, disk_attached] - client = mocker.MagicMock() - client.disks = disks_client - return client +def _make_snap( + snap_id="snap-1", + name="my-snap", + location="eastus", + provisioning_state="Succeeded", + time_created=None, + tags=None, + properties=None, +) -> SimpleNamespace: + """ + Build a fully qualifying snapshot SimpleNamespace. + Defaults to time_created=10 days ago. + """ + if time_created is None: + time_created = _ago(10) + return SimpleNamespace( + id=snap_id, + name=name, + location=location, + provisioning_state=provisioning_state, + time_created=time_created, + tags=tags, + properties=properties, + ) -def test_find_untagged_resources(mock_compute_client): - findings = find_untagged_resources( - subscription_id="sub-123", +def _run(disks=None, snaps=None, region_filter=None): + client = MagicMock() + client.disks.list.return_value = disks or [] + client.snapshots.list.return_value = snaps or [] + return find_untagged_resources( + subscription_id=_SUB, credential=None, - region_filter="eastus", - client=mock_compute_client, + region_filter=region_filter, + client=client, ) - resource_ids = [f.resource_id for f in findings] - assert "disk-1" in resource_ids - assert "disk-2" not in resource_ids + + +def _one_disk(disks, region_filter=None): + results = _run(disks=disks, region_filter=region_filter) + disk_findings = [f for f in results if f.resource_type == "azure.compute.disk"] + assert len(disk_findings) == 1 + return disk_findings[0] + + +def _one_snap(snaps, region_filter=None): + results = _run(snaps=snaps, region_filter=region_filter) + snap_findings = [f for f in results if f.resource_type == "azure.compute.snapshot"] + assert len(snap_findings) == 1 + return snap_findings[0] + + +# =========================================================================== +# TestMustEmit -- both resource families +# =========================================================================== + + +class TestMustEmit: + def test_qualifying_disk_emits(self): + assert len(_run(disks=[_make_disk()])) == 1 + + def test_qualifying_snapshot_emits(self): + assert len(_run(snaps=[_make_snap()])) == 1 + + def test_multiple_qualifying_disks_all_emit(self): + disks = [_make_disk(disk_id=f"disk-{i}", name=f"d{i}") for i in range(3)] + assert len(_run(disks=disks)) == 3 + + def test_multiple_qualifying_snaps_all_emit(self): + snaps = [_make_snap(snap_id=f"snap-{i}", name=f"s{i}") for i in range(3)] + assert len(_run(snaps=snaps)) == 3 + + def test_disk_and_snap_both_emit(self): + results = _run(disks=[_make_disk()], snaps=[_make_snap()]) + assert len(results) == 2 + types = {f.resource_type for f in results} + assert types == {"azure.compute.disk", "azure.compute.snapshot"} + + def test_disk_tags_none_emits(self): + # None -> zero direct tags -> emit + assert len(_run(disks=[_make_disk(tags=None)])) == 1 + + def test_disk_tags_empty_dict_emits(self): + # {} -> zero direct tags -> emit + assert len(_run(disks=[_make_disk(tags={})])) == 1 + + def test_snap_tags_none_emits(self): + assert len(_run(snaps=[_make_snap(tags=None)])) == 1 + + def test_snap_tags_empty_dict_emits(self): + assert len(_run(snaps=[_make_snap(tags={})])) == 1 + + +# =========================================================================== +# TestMustSkip -- disks +# =========================================================================== + + +class TestMustSkipDisks: + def test_tagged_disk_skips(self): + assert _run(disks=[_make_disk(tags={"env": "prod"})]) == [] + + def test_provisioning_state_failed_skips(self): + assert _run(disks=[_make_disk(provisioning_state="Failed")]) == [] + + def test_provisioning_state_creating_skips(self): + assert _run(disks=[_make_disk(provisioning_state="Creating")]) == [] + + def test_provisioning_state_succeeded_with_warnings_skips(self): + # Spec: only exact "Succeeded" qualifies + assert _run(disks=[_make_disk(provisioning_state="SucceededWithWarnings")]) == [] + + def test_provisioning_state_lowercase_succeeded_skips(self): + assert _run(disks=[_make_disk(provisioning_state="succeeded")]) == [] + + def test_disk_age_6_days_skips(self): + assert _run(disks=[_make_disk(time_created=_ago(6))]) == [] + + def test_disk_age_exactly_7_days_emits(self): + assert len(_run(disks=[_make_disk(time_created=_ago(7))])) == 1 + + def test_disk_time_created_future_skips(self): + assert _run(disks=[_make_disk(time_created=_ago(-1))]) == [] + + def test_disk_time_created_absent_skips(self): + disk = _make_disk() + disk.time_created = None + assert _run(disks=[disk]) == [] + + def test_disk_time_created_invalid_string_skips(self): + disk = _make_disk() + disk.time_created = "not-a-date" + assert _run(disks=[disk]) == [] + + def test_disk_tags_missing_field_skips(self): + disk = _make_disk() + del disk.tags + assert _run(disks=[disk]) == [] + + def test_disk_tags_non_mapping_non_none_skips(self): + disk = _make_disk(tags="tagged") + assert _run(disks=[disk]) == [] + + def test_disk_tags_list_skips(self): + disk = _make_disk(tags=["env", "prod"]) + assert _run(disks=[disk]) == [] + + +# =========================================================================== +# TestMustSkip -- snapshots +# =========================================================================== + + +class TestMustSkipSnaps: + def test_tagged_snap_skips(self): + assert _run(snaps=[_make_snap(tags={"team": "infra"})]) == [] + + def test_snap_age_6_days_skips(self): + assert _run(snaps=[_make_snap(time_created=_ago(6))]) == [] + + def test_snap_age_exactly_7_days_emits(self): + assert len(_run(snaps=[_make_snap(time_created=_ago(7))])) == 1 + + def test_snap_time_created_future_skips(self): + assert _run(snaps=[_make_snap(time_created=_ago(-1))]) == [] + + def test_snap_time_created_absent_skips(self): + snap = _make_snap() + snap.time_created = None + assert _run(snaps=[snap]) == [] + + def test_snap_provisioning_state_not_succeeded_skips(self): + assert _run(snaps=[_make_snap(provisioning_state="Failed")]) == [] + + def test_snap_tags_missing_field_skips(self): + snap = _make_snap() + del snap.tags + assert _run(snaps=[snap]) == [] + + def test_snap_tags_non_mapping_skips(self): + snap = _make_snap(tags=42) + assert _run(snaps=[snap]) == [] + + +# =========================================================================== +# TestIdNameGuards -- spec 8.2, 8.3 +# =========================================================================== + + +class TestIdNameGuards: + def test_disk_id_absent_skips(self): + disk = _make_disk() + del disk.id + assert _run(disks=[disk]) == [] + + def test_disk_id_none_skips(self): + assert _run(disks=[_make_disk(disk_id=None)]) == [] + + def test_disk_id_empty_skips(self): + assert _run(disks=[_make_disk(disk_id="")]) == [] + + def test_disk_name_absent_skips(self): + disk = _make_disk() + del disk.name + assert _run(disks=[disk]) == [] + + def test_disk_name_none_skips(self): + assert _run(disks=[_make_disk(name=None)]) == [] + + def test_disk_name_empty_skips(self): + assert _run(disks=[_make_disk(name="")]) == [] + + def test_snap_id_absent_skips(self): + snap = _make_snap() + del snap.id + assert _run(snaps=[snap]) == [] + + def test_snap_name_absent_skips(self): + snap = _make_snap() + del snap.name + assert _run(snaps=[snap]) == [] + + def test_snap_id_empty_skips(self): + assert _run(snaps=[_make_snap(snap_id="")]) == [] + + def test_snap_name_empty_skips(self): + assert _run(snaps=[_make_snap(name="")]) == [] + + +# =========================================================================== +# TestRegionFilter -- spec 8.4 +# =========================================================================== + + +class TestRegionFilter: + def test_matching_region_emits(self): + assert len(_run(disks=[_make_disk(location="eastus")], region_filter="eastus")) == 1 + + def test_non_matching_region_skips(self): + assert _run(disks=[_make_disk(location="westus")], region_filter="eastus") == [] + + def test_region_filter_is_case_insensitive(self): + assert len(_run(disks=[_make_disk(location="eastus")], region_filter="EastUS")) == 1 + + def test_no_region_filter_emits_all(self): + disks = [ + _make_disk(disk_id="d1", name="a", location="eastus"), + _make_disk(disk_id="d2", name="b", location="westus"), + ] + assert len(_run(disks=disks)) == 2 + + def test_snap_region_filter_respected(self): + assert _run(snaps=[_make_snap(location="westus")], region_filter="eastus") == [] + + +# =========================================================================== +# TestProvisioningStateContract -- spec 9.2 +# =========================================================================== + + +class TestProvisioningStateContract: + def test_sdk_succeeded_emits(self): + assert len(_run(disks=[_make_disk(provisioning_state="Succeeded")])) == 1 + + def test_nested_provisioning_state_used_when_sdk_absent(self): + disk = _make_disk() + disk.provisioning_state = None + disk.properties = SimpleNamespace(provisioning_state="Succeeded", provisioningState=None) + assert len(_run(disks=[disk])) == 1 + + def test_nested_camel_case_used(self): + disk = _make_disk() + disk.provisioning_state = None + disk.properties = SimpleNamespace(provisioning_state=None, provisioningState="Succeeded") + assert len(_run(disks=[disk])) == 1 + + def test_sdk_nested_conflict_skips(self): + disk = _make_disk() + disk.provisioning_state = "Succeeded" + disk.properties = SimpleNamespace(provisioning_state="Failed", provisioningState=None) + assert _run(disks=[disk]) == [] + + def test_both_absent_skips(self): + disk = _make_disk(provisioning_state=None) + disk.properties = None + assert _run(disks=[disk]) == [] + + +# =========================================================================== +# TestTagContract -- spec 9.3 +# =========================================================================== + + +class TestTagContract: + def test_tags_none_zero_count(self): + assert len(_run(disks=[_make_disk(tags=None)])) == 1 + + def test_tags_empty_dict_zero_count(self): + assert len(_run(disks=[_make_disk(tags={})])) == 1 + + def test_tags_non_empty_skips(self): + assert _run(disks=[_make_disk(tags={"env": "prod"})]) == [] + + def test_tags_field_missing_skips(self): + disk = _make_disk() + del disk.tags + assert _run(disks=[disk]) == [] + + def test_tags_string_value_skips(self): + disk = _make_disk(tags="yes-tagged") + assert _run(disks=[disk]) == [] + + def test_tags_int_value_skips(self): + disk = _make_disk(tags=1) + assert _run(disks=[disk]) == [] + + def test_tags_list_value_skips(self): + disk = _make_disk(tags=["env", "prod"]) + assert _run(disks=[disk]) == [] + + def test_snap_tags_none_zero_count(self): + assert len(_run(snaps=[_make_snap(tags=None)])) == 1 + + def test_snap_tags_non_empty_skips(self): + assert _run(snaps=[_make_snap(tags={"x": "y"})]) == [] + + def test_snap_tags_missing_field_skips(self): + snap = _make_snap() + del snap.tags + assert _run(snaps=[snap]) == [] + + +# =========================================================================== +# TestAgeContract -- spec 9.4 +# =========================================================================== + + +class TestAgeContract: + def test_sdk_time_created_used(self): + disk = _make_disk(time_created=_ago(10)) + assert len(_run(disks=[disk])) == 1 + + def test_nested_time_created_used_when_sdk_absent(self): + disk = _make_disk() + disk.time_created = None + disk.properties = SimpleNamespace(time_created=_ago(10), timeCreated=None) + assert len(_run(disks=[disk])) == 1 + + def test_nested_camel_time_created_used(self): + disk = _make_disk() + disk.time_created = None + disk.properties = SimpleNamespace(time_created=None, timeCreated=_ago(10)) + assert len(_run(disks=[disk])) == 1 + + def test_sdk_present_but_invalid_skips_no_fallback(self): + # SDK surface present but unparseable -- must skip, not fall back to nested + disk = _make_disk() + disk.time_created = "not-a-date" + disk.properties = SimpleNamespace(time_created=None, timeCreated=_ago(10)) + assert _run(disks=[disk]) == [] + + def test_nested_present_but_invalid_skips(self): + disk = _make_disk() + disk.time_created = None + disk.properties = SimpleNamespace(time_created="bad", timeCreated=None) + assert _run(disks=[disk]) == [] + + def test_sdk_nested_conflict_skips(self): + disk = _make_disk() + disk.time_created = _ago(10) + disk.properties = SimpleNamespace(time_created=_ago(50), timeCreated=None) + assert _run(disks=[disk]) == [] + + def test_sdk_nested_agree_within_60s_emits(self): + base = _ago(10) + disk = _make_disk() + disk.time_created = base + disk.properties = SimpleNamespace( + time_created=base + timedelta(seconds=30), timeCreated=None + ) + assert len(_run(disks=[disk])) == 1 + + def test_time_created_iso_string_accepted(self): + disk = _make_disk() + disk.time_created = _ago(10).isoformat().replace("+00:00", "Z") + assert len(_run(disks=[disk])) == 1 + + def test_time_created_future_skips(self): + assert _run(disks=[_make_disk(time_created=_ago(-1))]) == [] + + def test_both_absent_skips(self): + disk = _make_disk() + disk.time_created = None + disk.properties = None + assert _run(disks=[disk]) == [] + + def test_snap_age_contract(self): + assert _run(snaps=[_make_snap(time_created=_ago(6))]) == [] + assert len(_run(snaps=[_make_snap(time_created=_ago(8))])) == 1 + + def test_snap_sdk_invalid_skips_no_fallback(self): + snap = _make_snap() + snap.time_created = "garbage" + snap.properties = SimpleNamespace(time_created=None, timeCreated=_ago(10)) + assert _run(snaps=[snap]) == [] + + +# =========================================================================== +# TestDiskConfidenceContract -- spec 9.5 / 11.2 +# =========================================================================== + + +class TestDiskConfidenceContract: + def test_unattached_disk_medium_confidence(self): + disk = _make_disk( + disk_state="Unattached", + managed_by=None, + managed_by_extended=None, + ) + f = _one_disk([disk]) + assert f.confidence.value == "medium" + + def test_attached_disk_low_confidence(self): + disk = _make_disk( + disk_state="Attached", + managed_by="/subscriptions/s/vms/vm1", + managed_by_extended=None, + ) + # managed_by is present but tags are None -> would emit as untagged + # however managed_by is set, so disk attachment context is "attached" + f = _one_disk([disk]) + assert f.confidence.value == "low" + + def test_managed_by_extended_nonempty_low_confidence(self): + disk = _make_disk( + disk_state="Unattached", + managed_by=None, + managed_by_extended=["/sub/vms/vm2"], + ) + f = _one_disk([disk]) + assert f.confidence.value == "low" + + def test_disk_state_not_unattached_low_confidence(self): + disk = _make_disk( + disk_state="Reserved", + managed_by=None, + managed_by_extended=None, + ) + f = _one_disk([disk]) + assert f.confidence.value == "low" + + def test_managed_by_extended_unresolvable_low_confidence(self): + # Field absent from all sources -> unresolved -> LOW + disk = _make_disk( + disk_state="Unattached", + managed_by=None, + ) + del disk.managed_by_extended + f = _one_disk([disk]) + assert f.confidence.value == "low" + + def test_managed_by_field_absent_low_confidence(self): + disk = _make_disk(disk_state="Unattached", managed_by_extended=None) + del disk.managed_by + f = _one_disk([disk]) + assert f.confidence.value == "low" + + def test_snapshot_always_low_confidence(self): + f = _one_snap([_make_snap()]) + assert f.confidence.value == "low" + + +# =========================================================================== +# TestFindingShape -- spec 11 +# =========================================================================== + + +class TestFindingShape: + def test_disk_finding_required_fields(self): + f = _one_disk([_make_disk()]) + assert f.provider == "azure" + assert f.rule_id == "azure.resource.untagged" + assert f.resource_type == "azure.compute.disk" + assert f.resource_id == "disk-1" + assert f.region == "eastus" + assert f.risk.value == "low" + assert f.estimated_monthly_cost_usd is None + + def test_disk_details_required_keys(self): + f = _one_disk([_make_disk()]) + d = f.details + assert d["resource_name"] == "my-disk" + assert d["subscription_id"] == _SUB + assert d["resource_family"] == "managed_disk" + assert d["tags_present"] is False + assert d["current_tag_count"] == 0 + assert isinstance(d["age_days"], float) + assert d["provisioning_state"] == "Succeeded" + assert isinstance(d["tags"], dict) + + def test_disk_details_disk_context_keys(self): + # disk_state, managed_by, managed_by_extended present in details + f = _one_disk([_make_disk(disk_state="Unattached", managed_by=None)]) + d = f.details + assert "disk_state" in d + assert "managed_by" in d + assert "managed_by_extended" in d + + def test_disk_age_days_rounded(self): + f = _one_disk([_make_disk(time_created=_ago(10))]) + assert f.details["age_days"] == round(f.details["age_days"], 1) + + def test_disk_estimated_cost_always_none(self): + f = _one_disk([_make_disk()]) + assert f.estimated_monthly_cost_usd is None + + def test_snap_finding_required_fields(self): + f = _one_snap([_make_snap()]) + assert f.provider == "azure" + assert f.rule_id == "azure.resource.untagged" + assert f.resource_type == "azure.compute.snapshot" + assert f.resource_id == "snap-1" + assert f.region == "eastus" + assert f.risk.value == "low" + assert f.estimated_monthly_cost_usd is None + + def test_snap_details_required_keys(self): + f = _one_snap([_make_snap()]) + d = f.details + assert d["resource_name"] == "my-snap" + assert d["subscription_id"] == _SUB + assert d["resource_family"] == "snapshot" + assert d["tags_present"] is False + assert d["current_tag_count"] == 0 + assert isinstance(d["age_days"], float) + assert d["provisioning_state"] == "Succeeded" + assert isinstance(d["tags"], dict) + + def test_disk_evidence_has_required_signals(self): + f = _one_disk([_make_disk()]) + used = " ".join(f.evidence.signals_used) + assert "managed_disk" in used + assert "Succeeded" in used + assert "zero current tags" in used + assert str(_MIN_AGE) in used + + def test_snap_evidence_has_required_signals(self): + f = _one_snap([_make_snap()]) + used = " ".join(f.evidence.signals_used) + assert "snapshot" in used + assert "Succeeded" in used + assert "zero current tags" in used + + def test_disk_evidence_signals_not_checked(self): + f = _one_disk([_make_disk()]) + combined = " ".join(f.evidence.signals_not_checked).lower() + assert "policy" in combined + assert "tag" in combined + + def test_snap_evidence_time_window(self): + f = _one_snap([_make_snap()]) + assert f.evidence.time_window is not None + assert str(_MIN_AGE) in f.evidence.time_window + + +# =========================================================================== +# TestFailureBehavior -- spec 12 +# =========================================================================== + + +class TestFailureBehavior: + def test_disks_list_raises_propagates(self): + client = MagicMock() + client.disks.list.side_effect = RuntimeError("API error") + client.snapshots.list.return_value = [] + with pytest.raises(RuntimeError): + find_untagged_resources(subscription_id=_SUB, credential=None, client=client) + + def test_snapshots_list_raises_propagates(self): + client = MagicMock() + client.disks.list.return_value = [] + client.snapshots.list.side_effect = RuntimeError("API error") + with pytest.raises(RuntimeError): + find_untagged_resources(subscription_id=_SUB, credential=None, client=client) + + def test_malformed_disk_skipped_other_emits(self): + # Disk with no id is skipped; the valid disk still emits + bad = SimpleNamespace( + id=None, + name="bad", + location="eastus", + provisioning_state="Succeeded", + time_created=_ago(10), + disk_state="Unattached", + managed_by=None, + managed_by_extended=None, + tags=None, + properties=None, + ) + good = _make_disk(disk_id="disk-good", name="good") + results = _run(disks=[bad, good]) + ids = [f.resource_id for f in results] + assert "disk-good" in ids + assert None not in ids + + def test_inject_client_used(self): + client = MagicMock() + client.disks.list.return_value = [_make_disk()] + client.snapshots.list.return_value = [] + results = find_untagged_resources(subscription_id=_SUB, credential=None, client=client) + client.disks.list.assert_called_once() + assert len(results) == 1 + + +# =========================================================================== +# TestResolveProvisioningState -- unit tests for the resolver +# =========================================================================== + + +class TestResolveProvisioningState: + def _ns(self, sdk=None, props=None): + return SimpleNamespace(provisioning_state=sdk, properties=props) + + def test_sdk_value_returned(self): + assert _resolve_provisioning_state(self._ns(sdk="Succeeded")) == "Succeeded" + + def test_nested_snake_case_used(self): + obj = self._ns( + sdk=None, props=SimpleNamespace(provisioning_state="Succeeded", provisioningState=None) + ) + assert _resolve_provisioning_state(obj) == "Succeeded" + + def test_nested_camel_case_used(self): + obj = self._ns( + sdk=None, props=SimpleNamespace(provisioning_state=None, provisioningState="Succeeded") + ) + assert _resolve_provisioning_state(obj) == "Succeeded" + + def test_conflict_returns_none(self): + obj = self._ns( + sdk="Succeeded", + props=SimpleNamespace(provisioning_state="Failed", provisioningState=None), + ) + assert _resolve_provisioning_state(obj) is None + + def test_both_absent_returns_none(self): + obj = self._ns(sdk=None, props=None) + assert _resolve_provisioning_state(obj) is None + + +# =========================================================================== +# TestResolveTags -- unit tests for the resolver +# =========================================================================== + + +class TestResolveTags: + def _ns(self, tags): + return SimpleNamespace(tags=tags) + + def test_none_returns_empty_dict(self): + assert _resolve_tags(self._ns(None)) == {} + + def test_empty_dict_returns_empty_dict(self): + assert _resolve_tags(self._ns({})) == {} + + def test_non_empty_dict_returned(self): + result = _resolve_tags(self._ns({"env": "prod"})) + assert result == {"env": "prod"} + + def test_missing_field_returns_unresolvable(self): + obj = SimpleNamespace() + assert _resolve_tags(obj) is _UNRESOLVABLE + + def test_string_value_returns_unresolvable(self): + assert _resolve_tags(self._ns("tagged")) is _UNRESOLVABLE + + def test_int_value_returns_unresolvable(self): + assert _resolve_tags(self._ns(42)) is _UNRESOLVABLE + + def test_list_value_returns_unresolvable(self): + assert _resolve_tags(self._ns(["a", "b"])) is _UNRESOLVABLE + + +# =========================================================================== +# TestResolveTimeCreated -- unit tests for the resolver +# =========================================================================== + + +class TestResolveTimeCreated: + def _ns(self, sdk=None, props=None): + return SimpleNamespace(time_created=sdk, properties=props) + + def test_sdk_datetime_returned(self): + dt = _ago(10) + result = _resolve_time_created(self._ns(sdk=dt)) + assert result == dt + + def test_nested_snake_case_used(self): + dt = _ago(10) + obj = self._ns(sdk=None, props=SimpleNamespace(time_created=dt, timeCreated=None)) + assert _resolve_time_created(obj) == dt + + def test_nested_camel_used(self): + dt = _ago(10) + obj = self._ns(sdk=None, props=SimpleNamespace(time_created=None, timeCreated=dt)) + assert _resolve_time_created(obj) == dt + + def test_sdk_invalid_no_fallback(self): + # SDK is present-but-invalid; must skip, not use nested + dt = _ago(10) + obj = self._ns(sdk="bad", props=SimpleNamespace(time_created=dt, timeCreated=None)) + assert _resolve_time_created(obj) is None + + def test_nested_invalid_skips(self): + obj = self._ns(sdk=None, props=SimpleNamespace(time_created="bad", timeCreated=None)) + assert _resolve_time_created(obj) is None + + def test_material_conflict_skips(self): + obj = self._ns( + sdk=_ago(10), + props=SimpleNamespace(time_created=_ago(50), timeCreated=None), + ) + assert _resolve_time_created(obj) is None + + def test_agree_within_60s_returns_sdk(self): + base = _ago(10) + obj = self._ns( + sdk=base, + props=SimpleNamespace(time_created=base + timedelta(seconds=30), timeCreated=None), + ) + result = _resolve_time_created(obj) + assert result == base + + def test_both_absent_returns_none(self): + assert _resolve_time_created(self._ns(sdk=None, props=None)) is None + + def test_iso_string_accepted(self): + dt = _ago(10) + iso = dt.isoformat().replace("+00:00", "Z") + obj = self._ns(sdk=iso) + result = _resolve_time_created(obj) + assert result is not None + assert abs((result - dt).total_seconds()) < 1 + + +# =========================================================================== +# TestCoerceDatetime -- unit tests +# =========================================================================== + + +class TestCoerceDatetime: + def test_none_returns_none(self): + assert _coerce_datetime(None) is None + + def test_aware_datetime_returned_unchanged(self): + dt = _ago(5) + assert _coerce_datetime(dt) == dt + + def test_naive_datetime_gets_utc(self): + naive = datetime(2024, 1, 15, 12, 0, 0) + result = _coerce_datetime(naive) + assert result.tzinfo is not None + assert result.tzinfo == timezone.utc + + def test_iso_z_suffix_accepted(self): + result = _coerce_datetime("2024-01-15T12:00:00Z") + assert result is not None + assert result.year == 2024 + + def test_iso_plus_utc_offset_accepted(self): + result = _coerce_datetime("2024-01-15T12:00:00+00:00") + assert result is not None + + def test_invalid_string_returns_none(self): + assert _coerce_datetime("not-a-date") is None + + def test_unsupported_type_returns_none(self): + assert _coerce_datetime(12345) is None + + +# =========================================================================== +# TestResolveDiskAttachmentContext -- unit tests for confidence helper +# =========================================================================== + + +class TestResolveDiskAttachmentContext: + def _ns(self, disk_state="Unattached", managed_by=None, managed_by_extended=None, props=None): + return SimpleNamespace( + disk_state=disk_state, + managed_by=managed_by, + managed_by_extended=managed_by_extended, + properties=props, + ) + + def test_fully_unattached_returns_unattached(self): + obj = self._ns(disk_state="Unattached", managed_by=None, managed_by_extended=None) + assert _resolve_disk_attachment_context(obj) == "unattached" + + def test_managed_by_set_returns_attached(self): + obj = self._ns(disk_state="Unattached", managed_by="/sub/vms/vm1") + assert _resolve_disk_attachment_context(obj) == "attached" + + def test_managed_by_extended_nonempty_returns_attached(self): + obj = self._ns( + disk_state="Unattached", managed_by=None, managed_by_extended=["/sub/vms/vm2"] + ) + assert _resolve_disk_attachment_context(obj) == "attached" + + def test_disk_state_reserved_returns_attached(self): + # "Reserved" is a pre-attachment state; treated as "attached" semantically + obj = self._ns(disk_state="Reserved", managed_by=None, managed_by_extended=None) + assert _resolve_disk_attachment_context(obj) == "attached" + + def test_disk_state_active_sas_returns_special_state(self): + obj = self._ns(disk_state="ActiveSAS", managed_by=None, managed_by_extended=None) + assert _resolve_disk_attachment_context(obj) == "special_state" + + def test_disk_state_attached_returns_attached(self): + obj = self._ns(disk_state="Attached", managed_by=None, managed_by_extended=None) + assert _resolve_disk_attachment_context(obj) == "attached" + + def test_managed_by_field_missing_returns_unresolved(self): + obj = SimpleNamespace(disk_state="Unattached", managed_by_extended=None, properties=None) + assert _resolve_disk_attachment_context(obj) == "unresolved" + + def test_managed_by_extended_field_missing_returns_unresolved(self): + obj = SimpleNamespace(disk_state="Unattached", managed_by=None, properties=None) + assert _resolve_disk_attachment_context(obj) == "unresolved" + + def test_disk_state_conflict_returns_unresolved(self): + obj = self._ns( + disk_state="Unattached", + managed_by=None, + managed_by_extended=None, + props=SimpleNamespace(disk_state="Attached", diskState=None), + ) + assert _resolve_disk_attachment_context(obj) == "unresolved" + + def test_managed_by_extended_uncoercible_returns_unresolved(self): + obj = SimpleNamespace( + disk_state="Unattached", + managed_by=None, + managed_by_extended=42, # not iterable + properties=None, + ) + assert _resolve_disk_attachment_context(obj) == "unresolved" From 1e3a5c9b488956e709785267563e11af2eb78b53 Mon Sep 17 00:00:00 2001 From: Suresh Mandalapu Date: Thu, 23 Apr 2026 22:18:17 +0100 Subject: [PATCH 3/5] azure.vm.stopped_not_deallocated --- .../azure/rules/vm_stopped_not_deallocated.py | 241 ++++-- docs/rules/azure.md | 10 +- .../specs/azure/vm_stopped_not_deallocated.md | 341 +++++++++ .../providers/azure/test_azure_vm_stopped.py | 689 ++++++++++++++---- 4 files changed, 1084 insertions(+), 197 deletions(-) create mode 100644 docs/specs/azure/vm_stopped_not_deallocated.md diff --git a/cleancloud/providers/azure/rules/vm_stopped_not_deallocated.py b/cleancloud/providers/azure/rules/vm_stopped_not_deallocated.py index 5d7d93a..f69a377 100644 --- a/cleancloud/providers/azure/rules/vm_stopped_not_deallocated.py +++ b/cleancloud/providers/azure/rules/vm_stopped_not_deallocated.py @@ -1,6 +1,43 @@ +""" +Rule: azure.vm.stopped_not_deallocated + +Intent: + Detect Azure virtual machines in the billed stopped-allocated state + (PowerState/stopped) that continue to incur compute charges even though + they appear off. + + This is a review-candidate rule only. It does not prove the stop was + accidental, that the VM should be deleted, or that a specific monthly + saving exists. + +Exclusions: + - id absent or empty + - name absent or empty + - outside optional region filter (exact lowercase match) + - provisioning state does not resolve to exactly "Succeeded" + - per-VM instance_view retrieval fails (skip that VM) + - runtime power state unresolvable, absent, or not exactly PowerState/stopped + +Detection: + - provisioning state is "Succeeded" (SDK-first with nested fallback) + - runtime power state resolves to exactly "PowerState/stopped" from + instance_view statuses (one unambiguous code) + +Cost model (spec 10): + estimated_monthly_cost_usd = None (always) + The rule may state that compute charges continue, but must not emit a + numeric estimate without a SKU-aware pricing model. + +APIs: + - Microsoft.Compute/virtualMachines/read (virtual_machines.list_all) + - Microsoft.Compute/virtualMachines/instanceView/action + (virtual_machines.instance_view per VM) +""" + from datetime import datetime, timezone from typing import List, Optional +from azure.core.exceptions import HttpResponseError, ServiceRequestError, ServiceResponseError from azure.mgmt.compute import ComputeManagementClient from cleancloud.core.confidence import ConfidenceLevel @@ -8,9 +45,18 @@ from cleancloud.core.finding import Finding from cleancloud.core.risk import RiskLevel +_RULE_ID = "azure.vm.stopped_not_deallocated" +_RESOURCE_TYPE = "azure.virtual_machine" +_TARGET_POWER_STATE = "PowerState/stopped" + + +def _norm_location(s: str) -> str: + """Lowercase only -- exact lowercase match per spec 7.""" + return s.lower() if s else "" + def _extract_resource_group(vm_id: str) -> str: - """Extract resource group name from a VM resource ID.""" + """Extract resource group name from a VM ARM resource ID.""" parts = vm_id.split("/") for i, part in enumerate(parts): if part.lower() == "resourcegroups" and i + 1 < len(parts): @@ -18,12 +64,62 @@ def _extract_resource_group(vm_id: str) -> str: raise ValueError(f"Cannot extract resource group from VM ID: {vm_id}") -def _get_power_state(instance_view) -> Optional[str]: - """Extract power state from instance view statuses.""" - for status in instance_view.statuses or []: - if status.code and status.code.startswith("PowerState/"): - return status.code - return None +# --------------------------------------------------------------------------- +# Resolvers +# --------------------------------------------------------------------------- + + +def _resolve_provisioning_state(vm) -> Optional[str]: + """ + Resolve provisioning state from the VM model/control-plane payload per spec 9.1. + + Priority: + 1. SDK projection: vm.provisioning_state + 2. nested/raw properties: vm.properties.provisioningState + + Returns None (skip) on conflict or when both absent. + Only "Succeeded" is eligible; caller skips on anything else. + """ + sdk_val = getattr(vm, "provisioning_state", None) + + props = getattr(vm, "properties", None) + nested_val = None + if props is not None: + nested_val = getattr(props, "provisioning_state", None) + if nested_val is None: + nested_val = getattr(props, "provisioningState", None) + + if sdk_val is not None and nested_val is not None and sdk_val != nested_val: + return None # conflict -> skip + + return sdk_val or nested_val + + +def _resolve_power_state(instance_view) -> Optional[str]: + """ + Extract the runtime power state from instance view statuses per spec 9.2. + + Required behavior: + - Collect all codes starting with "PowerState/" + - Zero codes -> unknown -> return None (skip) + - Exactly one unique code -> return it + - Multiple conflicting codes -> ambiguous -> return None (skip) + + Caller skips unless the returned value is exactly "PowerState/stopped". + """ + statuses = getattr(instance_view, "statuses", None) or [] + ps_codes = [ + s.code for s in statuses if getattr(s, "code", None) and s.code.startswith("PowerState/") + ] + + if not ps_codes: + return None # no power state present -> unknown -> skip + + unique = set(ps_codes) + if len(unique) > 1: + return None # conflicting codes -> ambiguous -> skip + + return ps_codes[0] def find_stopped_not_deallocated_vms( @@ -34,14 +130,14 @@ def find_stopped_not_deallocated_vms( client: Optional[ComputeManagementClient] = None, ) -> List[Finding]: """ - Find Azure VMs that are stopped but not deallocated. + Find Azure VMs that are stopped but not deallocated (PowerState/stopped). - VMs in 'Stopped' state (OS-level shutdown) still incur full compute charges. - Only 'Deallocated' VMs stop incurring compute costs. This is a common Azure - cost trap where users think their VM is off but are paying full price. + VMs in the stopped-allocated state still incur full compute charges. + Only deallocated VMs stop incurring compute costs. IAM permissions: - Microsoft.Compute/virtualMachines/read + - Microsoft.Compute/virtualMachines/instanceView/action """ findings: List[Finding] = [] @@ -50,70 +146,111 @@ def find_stopped_not_deallocated_vms( subscription_id=subscription_id, ) + now = datetime.now(timezone.utc) + for vm in compute_client.virtual_machines.list_all(): - if region_filter and (vm.location or "").lower() != region_filter.lower(): + # spec 8.1: id must be present and non-empty + vm_id = getattr(vm, "id", None) + if not vm_id: + continue + + # spec 8.2: name must be present and non-empty + vm_name = getattr(vm, "name", None) + if not vm_name: + continue + + # spec 8.3: region filter -- exact lowercase match + location = _norm_location(getattr(vm, "location", "") or "") + if region_filter and location != _norm_location(region_filter): continue + # spec 8.4 / 9.1: provisioning state must resolve to exactly "Succeeded" + # Avoids transient Stopped observations during VM creation / start + if _resolve_provisioning_state(vm) != "Succeeded": + continue + + # spec 8.5 / 9.2: per-VM instance_view to get authoritative runtime power state. + # Expected per-VM retrieval failures -> skip this VM (spec 12). + # ValueError: malformed ARM id from _extract_resource_group. + # HttpResponseError: HTTP-level failure (404, 403, 429, 5xx). + # ServiceRequestError: transport failure before a response (connection reset, + # DNS, network timeout). + # ServiceResponseError: transport failure while reading the response + # (incomplete read, stream closed). + # SerializationError, DeserializationError, and all other exceptions propagate. try: - resource_group = _extract_resource_group(vm.id) + resource_group = _extract_resource_group(vm_id) instance_view = compute_client.virtual_machines.instance_view( resource_group_name=resource_group, - vm_name=vm.name, + vm_name=vm_name, ) - except Exception: + except (ValueError, HttpResponseError, ServiceRequestError, ServiceResponseError): continue - power_state = _get_power_state(instance_view) + # spec 8.5 / 9.2: resolve power state; None = unknown -> skip + power_state = _resolve_power_state(instance_view) + if power_state is None: + continue - if power_state != "PowerState/stopped": + # spec 8.6: emit only for exact PowerState/stopped + if power_state != _TARGET_POWER_STATE: continue - evidence = Evidence( - signals_used=[ - "VM power state is 'Stopped' (not 'Deallocated')", - "Stopped VMs incur full compute charges", - ], - signals_not_checked=[ - "Whether stop was intentional or accidental", - "Planned future usage", - "IaC-managed intent", - ], - time_window=None, - ) + # --- Context fields (best-effort; never gate emission) --- + hw = getattr(vm, "hardware_profile", None) + vm_size = getattr(hw, "vm_size", None) if hw else None + + sp = getattr(vm, "storage_profile", None) + os_disk = getattr(sp, "os_disk", None) if sp else None + os_type = getattr(os_disk, "os_type", None) if os_disk else None + tags = getattr(vm, "tags", None) or {} # spec 7: never None in output + + # --- EMIT --- findings.append( Finding( provider="azure", - rule_id="azure.vm.stopped_not_deallocated", - resource_type="azure.virtual_machine", - resource_id=vm.id, - region=vm.location, + rule_id=_RULE_ID, + resource_type=_RESOURCE_TYPE, + resource_id=vm_id, + region=location, + estimated_monthly_cost_usd=None, # spec 10: always None title="Azure VM Stopped but Not Deallocated", summary=( - f"VM '{vm.name}' is stopped but not deallocated — " - "still incurring full compute charges" + f"VM '{vm_name}' is stopped but not deallocated — " "compute charges continue" + ), + reason=( + "Runtime power state is 'PowerState/stopped'; " + "stopped-allocated VMs continue to incur compute charges" ), - reason="VM power state is 'Stopped' (not 'Deallocated')", risk=RiskLevel.HIGH, confidence=ConfidenceLevel.HIGH, - detected_at=datetime.now(timezone.utc), - evidence=evidence, + detected_at=now, + evidence=Evidence( + signals_used=[ + "Provisioning state is 'Succeeded'", + f"Runtime power state is exact '{_TARGET_POWER_STATE}' " + "from instance_view statuses", + "Stopped-allocated VMs continue to incur compute charges; " + "only 'Deallocated' stops compute billing", + ], + signals_not_checked=[ + "Whether the stop was intentional or accidental", + "Planned restart or future usage", + "IaC-managed or schedule-managed intent", + "Reservation, savings plan, or licensing context", + "Attached disk, networking, or license costs", + ], + time_window=None, + ), details={ - "vm_name": vm.name, - "vm_size": ( - getattr(vm.hardware_profile, "vm_size", None) - if vm.hardware_profile - else None - ), - "os_type": ( - getattr(vm.storage_profile.os_disk, "os_type", None) - if vm.storage_profile and vm.storage_profile.os_disk - else None - ), - "location": vm.location, - "power_state": power_state, + "vm_name": vm_name, "subscription_id": subscription_id, - "tags": vm.tags, + "power_state": power_state, + "provisioning_state": "Succeeded", + "vm_size": vm_size, + "os_type": os_type, + "tags": tags, }, ) ) diff --git a/docs/rules/azure.md b/docs/rules/azure.md index ab11c39..eefc205 100644 --- a/docs/rules/azure.md +++ b/docs/rules/azure.md @@ -29,17 +29,17 @@ ## Compute #### `azure.vm.stopped_not_deallocated` -**Detects:** VMs in `PowerState/stopped` state (full compute charges continue; only `deallocated` stops billing) +**Detects:** VMs whose runtime power state resolves to exactly `PowerState/stopped` from per-VM `instance_view` statuses, with `provisioning_state == "Succeeded"` confirmed from the control-plane model payload -**Confidence / Risk:** HIGH (deterministic power state) / HIGH +**Confidence / Risk:** HIGH (deterministic power state from instance_view) / HIGH -**Permissions:** `Microsoft.Compute/virtualMachines/read` +**Permissions:** `Microsoft.Compute/virtualMachines/read`, `Microsoft.Compute/virtualMachines/instanceView/action` **Params:** none -**Exclusions:** `PowerState/deallocated`, transitional states (starting, stopping, deallocating) +**Exclusions:** `id` or `name` absent/empty; `provisioning_state != "Succeeded"` (SDK+nested, conflict -> skip); per-VM `instance_view` retrieval fails (skip that VM); no `PowerState/` code in statuses; multiple conflicting `PowerState/` codes; any power state other than exact `PowerState/stopped` -**Spec:** — +**Spec:** [specs/azure/vm_stopped_not_deallocated.md](../specs/azure/vm_stopped_not_deallocated.md) --- diff --git a/docs/specs/azure/vm_stopped_not_deallocated.md b/docs/specs/azure/vm_stopped_not_deallocated.md new file mode 100644 index 0000000..749a97e --- /dev/null +++ b/docs/specs/azure/vm_stopped_not_deallocated.md @@ -0,0 +1,341 @@ +# Azure Rule Spec — `azure.vm.stopped_not_deallocated` + +## 1. Rule Identity + +- **Rule ID:** `azure.vm.stopped_not_deallocated` +- **Provider:** Azure +- **ARM resource type:** `Microsoft.Compute/virtualMachines` +- **Finding resource_type:** `azure.virtual_machine` + +--- + +## 2. Intent + +Detect Azure virtual machines that are in the **billed stopped-allocated state** — Azure power state `PowerState/stopped` — and therefore still incur compute charges even though they appear off. + +This rule is deliberately **low-noise**. It is a **review-candidate** rule only, not proof that the VM should be deleted, not proof that the stop was accidental, and not proof of a specific monthly saving. + +--- + +## 3. Azure Documentation Grounding + +### 3.1 Azure VM power states and billing + +Microsoft documents that Azure virtual machines expose power states and that billing differs by power state: + +- `Starting` — billed +- `Running` — billed +- `Stopping` — billed +- `Stopped` / PoweredOff / Stopped (Allocated) — billed +- `Deallocating` — not billed +- `Deallocated` / Stopped (Deallocated) — not billed + +Microsoft also documents that some other resources such as disks and networking can continue to incur charges even when compute is not billed. + +Source: *States and billing status of Azure Virtual Machines* +URL: https://learn.microsoft.com/en-us/azure/virtual-machines/states-billing + +Rule consequence: + +1. `PowerState/stopped` is a valid billed compute hygiene surface. +2. `PowerState/deallocated` and deallocation-related states must not emit. +3. The rule concerns **compute billing semantics**, not total VM-adjacent resource billing. + +### 3.2 Power state retrieval surfaces + +Microsoft documents that VM runtime state is available from **Instance View**, including a `statuses` array containing entries such as: + +- `ProvisioningState/` +- `PowerState/` + +Sources: + +- *States and billing status of Azure Virtual Machines* +- *Virtual Machines - Instance View (REST API)* +- *azure.mgmt.compute.models.VirtualMachineInstanceView* + +URLs: + +- https://learn.microsoft.com/en-us/azure/virtual-machines/states-billing +- https://learn.microsoft.com/en-us/rest/api/compute/virtual-machines/instance-view?view=rest-compute-2025-04-01 +- https://learn.microsoft.com/en-us/python/api/azure-mgmt-compute/azure.mgmt.compute.models.virtualmachineinstanceview?view=azure-python + +Rule consequence: + +1. Runtime power state must come from an authoritative Instance View–style `statuses` surface. +2. If runtime power state cannot be resolved reliably for a VM, that VM must be skipped rather than guessed. + +### 3.3 Subscription-wide power-state retrieval + +Microsoft documents that subscription-wide VM power states can be retrieved with `Virtual Machines - List All` using `statusOnly=true`. + +Microsoft also documents that in rare situations the power state may be unavailable because of intermittent retrieval issues and recommends retrying or checking Azure Resource Health. + +Sources: + +- *States and billing status of Azure Virtual Machines* +- *Virtual Machines - List All (REST API)* + +URLs: + +- https://learn.microsoft.com/en-us/azure/virtual-machines/states-billing +- https://learn.microsoft.com/en-us/rest/api/compute/virtual-machines/list-all?view=rest-compute-2025-04-01 + +Rule consequence: + +1. A future implementation may use subscription-wide status retrieval or per-VM instance-view reads. +2. Missing runtime power state must be treated as **unknown -> skip**, not as stopped. + +### 3.4 Stopped can be transient during create/start + +Microsoft documents that the `Stopped` state can also be observed briefly: + +- during VM creation +- while starting a VM from `Stopped (Deallocated)` + +Source: *States and billing status of Azure Virtual Machines* +URL: https://learn.microsoft.com/en-us/azure/virtual-machines/states-billing + +Rule consequence: + +A conservative enterprise-quality rule should not rely on raw `PowerState/stopped` alone. It must also require stable control-plane context so transient create/start observations do not emit. + +### 3.5 PowerOff vs Deallocate billing semantics + +Microsoft SDK documentation states: + +- `begin_power_off` stops a VM while preserving provisioned resources, and **you are still charged** +- `begin_deallocate` releases compute resources, and **you are not billed for compute resources** + +Source: *azure.mgmt.compute.operations.VirtualMachinesOperations* +URL: https://learn.microsoft.com/en-us/python/api/azure-mgmt-compute/azure.mgmt.compute.operations.virtualmachinesoperations?view=azure-python + +Rule consequence: + +The canonical detection target is **powered off but still allocated**, not any generic “off” state. + +--- + +## 4. Detection Goal + +Emit a finding only when **all** of the following are true: + +1. `vm.id` is present and non-empty +2. `vm.name` is present and non-empty +3. the optional region filter matches the normalized location +4. stable control-plane provisioning state resolves to exactly `"Succeeded"` +5. runtime power state resolves reliably from an authoritative status surface +6. runtime power state resolves to exactly `PowerState/stopped` + +If any required signal cannot be established reliably, skip rather than emit. + +This rule intentionally prioritizes **precision over recall** and skips unresolved runtime-state cases by design. + +--- + +## 5. Non-Goals + +This rule does **not** attempt to prove: + +- that the VM is unused +- that the VM should be deleted +- that the stop was accidental +- that a future restart is not planned +- that attached disks, networking, licenses, or reservations are removable +- that a specific monthly saving exists + +--- + +## 6. Canonical Inputs + +### 6.1 Required control-plane surfaces + +The implementation may use either of these authoritative runtime-state approaches: + +1. `compute_client.virtual_machines.list_all()` plus per-VM `instance_view(...)` +2. `Virtual Machines - List All` with `statusOnly=true` when runtime power state is available per VM + +It may also use the VM model payload for: + +- `id` +- `name` +- `location` +- `tags` +- `hardware_profile.vm_size` +- `storage_profile.os_disk.os_type` +- `provisioning_state` + +### 6.2 Required runtime-state source + +Runtime power state must come from a `statuses` surface that yields codes such as: + +- `PowerState/running` +- `PowerState/stopped` +- `PowerState/deallocated` + +It must not be inferred from: + +- provisioning state alone +- presence or absence of NICs/disks +- guest metrics +- operator assumptions + +If an implementation supports more than one runtime-state source, it must define a deterministic precedence order and apply that order consistently. A recommended precedence is: + +1. per-VM `instance_view(...)` +2. subscription-wide `listAll(statusOnly=true)` status payload + +--- + +## 7. Normalization Contract + +| Field | Normalization | +|---|---| +| `location` | Lowercase ARM location string; compare by exact lowercase string equality only. Do not remove spaces, hyphens, or digits. | +| `provisioning_state` | Compare case-sensitively to canonical Azure value `"Succeeded"` from the model/control-plane resource shape. | +| `power_state_code` | Extract exactly one runtime status code that starts with `PowerState/`. If none exist or multiple conflicting values exist, power state is unknown. | +| `tags` | `vm.tags or {}` — never `None` in output. Tags are contextual only for this rule. | +| `vm_size` | Context only from `hardware_profile.vm_size` when available. | +| `os_type` | Context only from `storage_profile.os_disk.os_type` when available. | + +--- + +## 8. Unified Decision Rule + +| # | Condition | Action | +|---|---|---| +| 8.1 | `id` absent, `None`, or empty | Skip | +| 8.2 | `name` absent, `None`, or empty | Skip | +| 8.3 | Region filter set and normalized location does not match | Skip | +| 8.4 | `provisioning_state` does not resolve to `"Succeeded"` | Skip | +| 8.5 | runtime power state cannot be retrieved or resolved reliably | Skip | +| 8.6 | runtime power state is anything other than exact `PowerState/stopped` | Skip | +| 8.7 | all required signals resolve and runtime power state is exact `PowerState/stopped` | **EMIT** | + +--- + +## 9. Canonical Evaluation Contracts + +### 9.1 Provisioning-state contract + +Resolve provisioning state from the VM model/control-plane payload: + +1. SDK projection such as `vm.provisioning_state` +2. nested/raw properties projection such as `vm.properties.provisioningState` +3. otherwise unknown + +Required behavior: + +1. Only exact `"Succeeded"` is eligible. +2. Unknown or any other value must skip. +3. If SDK and nested/raw values both exist and conflict, the VM must skip. + +Rationale: + +This helps avoid transient false positives because Microsoft documents that `Stopped` can appear briefly during creation or while starting from deallocated. + +### 9.2 Runtime power-state contract + +Resolve runtime power state from an authoritative `statuses` collection. + +Required behavior: + +1. If more than one runtime-state source is available, use the implementation's defined deterministic precedence order consistently. +2. Select only codes that start with `PowerState/`. +3. If no power-state code is present, the VM must skip. +4. If multiple conflicting power-state codes are present, the VM must skip. +5. Emit only for exact `PowerState/stopped`. + +Mandatory skips include: + +- `PowerState/running` +- `PowerState/starting` +- `PowerState/stopping` +- `PowerState/deallocating` +- `PowerState/deallocated` +- any unknown, malformed, or conflicting power-state code + +### 9.3 Transitional-state contract + +This rule must be conservative around transient state. + +Required behavior: + +1. If runtime power state is unavailable because status retrieval failed or returned incomplete data, skip. +2. If provisioning state is not stable-success, skip. +3. Do not emit for “probably stopped” or last-known stale power state without reliable current status. + +Coverage note: + +This rule intentionally trades some recall for reliability. Missing or incomplete runtime power state may cause otherwise valid stopped-allocated VMs to be skipped under partial API-health conditions, and that is acceptable by design. + +### 9.4 Cost semantics contract + +This rule may state that **compute charges continue** for stopped-allocated VMs because Microsoft documents that behavior. + +However: + +1. `estimated_monthly_cost_usd` must remain `None` +2. the rule must not claim exact total VM cost +3. the rule must not claim that all VM-adjacent charges disappear after deallocation, because disks and networking may still incur charges + +--- + +## 10. Cost Model + +`estimated_monthly_cost_usd = None` + +The rule may describe billed compute semantics qualitatively, but must not emit a numeric monthly estimate without a documented SKU-aware pricing model. + +--- + +## 11. Finding Shape + +### 11.1 Required fields + +| Field | Value | +|---|---| +| `provider` | `"azure"` | +| `rule_id` | `"azure.vm.stopped_not_deallocated"` | +| `resource_type` | `"azure.virtual_machine"` | +| `resource_id` | original ARM id from `vm.id` | +| `region` | normalized location | +| `risk` | `HIGH` | +| `confidence` | `HIGH` | +| `estimated_monthly_cost_usd` | `None` | + +### 11.2 Required evidence + +`signals_used` must clearly disclose: + +1. provisioning state is `"Succeeded"` +2. runtime power state is exact `PowerState/stopped` +3. stopped-allocated VMs continue to incur compute charges + +`signals_not_checked` should include remaining blind spots such as: + +1. whether the stop was intentional +2. planned restart or future usage +3. IaC-managed or schedule-managed intent +4. reservation, savings plan, or licensing context + +### 11.3 Required details + +Details should include at least: + +- `vm_name` +- `subscription_id` +- `power_state` +- `provisioning_state` +- `vm_size` +- `os_type` +- `tags` + +--- + +## 12. Failure Behavior + +- If subscription-wide VM inventory fails, let the exception propagate +- If per-VM runtime-status retrieval fails for a specific VM, skip that VM +- If a VM record is malformed or missing required fields, skip that VM +- Do not emit on missing, incomplete, or conflicting runtime power state diff --git a/tests/cleancloud/providers/azure/test_azure_vm_stopped.py b/tests/cleancloud/providers/azure/test_azure_vm_stopped.py index 71962e7..efba998 100644 --- a/tests/cleancloud/providers/azure/test_azure_vm_stopped.py +++ b/tests/cleancloud/providers/azure/test_azure_vm_stopped.py @@ -1,183 +1,592 @@ +""" +Tests for azure.vm.stopped_not_deallocated -- spec-aligned. + +Covers: must-emit, must-skip, id/name guards, region filter, + provisioning-state contract (SDK/nested/conflict), + power-state contract (exact match, transitional, conflicting codes, + no code, None statuses), + instance_view failure behavior, finding shape, + resolver unit tests (_resolve_provisioning_state, _resolve_power_state). +""" + from types import SimpleNamespace +from unittest.mock import MagicMock import pytest +from azure.core.exceptions import ( + HttpResponseError, + ServiceRequestError, + ServiceResponseError, +) from cleancloud.providers.azure.rules.vm_stopped_not_deallocated import ( + _resolve_power_state, + _resolve_provisioning_state, find_stopped_not_deallocated_vms, ) +# --------------------------------------------------------------------------- +# Shared constants +# --------------------------------------------------------------------------- + +_SUB = "sub-123" +_RG = "rg-test" +_VM_ID_TMPL = ( + f"/subscriptions/{_SUB}/resourceGroups/{_RG}" + "/providers/Microsoft.Compute/virtualMachines/{name}" +) + -def _make_vm(name, location="eastus", vm_size="Standard_D2s_v3", os_type="Linux", tags=None): +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_vm( + name="my-vm", + location="eastus", + provisioning_state="Succeeded", + vm_size="Standard_D2s_v3", + os_type="Linux", + tags=None, + vm_id=None, + properties=None, +) -> SimpleNamespace: + """ + Build a fully qualifying VM SimpleNamespace. + Includes provisioning_state="Succeeded" by default. + """ return SimpleNamespace( - id=f"/subscriptions/sub-123/resourceGroups/rg-test/providers/Microsoft.Compute/virtualMachines/{name}", + id=vm_id or _VM_ID_TMPL.format(name=name), name=name, location=location, + provisioning_state=provisioning_state, hardware_profile=SimpleNamespace(vm_size=vm_size), storage_profile=SimpleNamespace(os_disk=SimpleNamespace(os_type=os_type)), tags=tags, + properties=properties, ) -def _make_instance_view(power_state_code): - return SimpleNamespace( - statuses=[ - SimpleNamespace(code="ProvisioningState/succeeded"), - SimpleNamespace(code=power_state_code), - ], +def _make_iv(power_state_code, extra_codes=None) -> SimpleNamespace: + """ + Build an instance_view SimpleNamespace with the given power state code. + Optionally inject extra status codes (for conflict tests). + """ + statuses = [SimpleNamespace(code="ProvisioningState/succeeded")] + if power_state_code is not None: + statuses.append(SimpleNamespace(code=power_state_code)) + for code in extra_codes or []: + statuses.append(SimpleNamespace(code=code)) + return SimpleNamespace(statuses=statuses) + + +def _run(vms, iv_map=None, iv_default=None, region_filter=None): + """ + Run find_stopped_not_deallocated_vms with an injected mock client. + + iv_map: dict of vm_name -> instance_view (for per-VM control) + iv_default: single instance_view returned for all VMs + """ + client = MagicMock() + client.virtual_machines.list_all.return_value = vms + + if iv_map is not None: + client.virtual_machines.instance_view.side_effect = ( + lambda resource_group_name, vm_name: iv_map[vm_name] + ) + elif iv_default is not None: + client.virtual_machines.instance_view.return_value = iv_default + else: + client.virtual_machines.instance_view.return_value = _make_iv("PowerState/stopped") + + return find_stopped_not_deallocated_vms( + subscription_id=_SUB, + credential=None, + region_filter=region_filter, + client=client, ) -@pytest.fixture -def mock_compute_client(mocker): - client = mocker.MagicMock() - return client +def _one(vms, **kwargs): + results = _run(vms, **kwargs) + assert len(results) == 1 + return results[0] -def test_stopped_vm_detected(mock_compute_client): - """PowerState/stopped should be flagged.""" - vm = _make_vm("vm-stopped") - mock_compute_client.virtual_machines.list_all.return_value = [vm] - mock_compute_client.virtual_machines.instance_view.return_value = _make_instance_view( - "PowerState/stopped" - ) +# =========================================================================== +# TestMustEmit +# =========================================================================== - findings = find_stopped_not_deallocated_vms( - subscription_id="sub-123", - credential=None, - client=mock_compute_client, - ) - assert len(findings) == 1 - finding = findings[0] - assert finding.provider == "azure" - assert finding.rule_id == "azure.vm.stopped_not_deallocated" - assert finding.resource_type == "azure.virtual_machine" - assert finding.confidence.value == "high" - assert finding.risk.value == "high" - assert finding.details["vm_name"] == "vm-stopped" - assert finding.details["power_state"] == "PowerState/stopped" - assert finding.details["vm_size"] == "Standard_D2s_v3" - - -def test_deallocated_vm_skipped(mock_compute_client): - """PowerState/deallocated should NOT be flagged.""" - vm = _make_vm("vm-deallocated") - mock_compute_client.virtual_machines.list_all.return_value = [vm] - mock_compute_client.virtual_machines.instance_view.return_value = _make_instance_view( - "PowerState/deallocated" - ) +class TestMustEmit: + def test_qualifying_vm_emits(self): + assert len(_run([_make_vm()])) == 1 - findings = find_stopped_not_deallocated_vms( - subscription_id="sub-123", - credential=None, - client=mock_compute_client, - ) + def test_multiple_stopped_vms_all_emit(self): + vms = [_make_vm(name=f"vm-{i}") for i in range(3)] + assert len(_run(vms)) == 3 - assert len(findings) == 0 + def test_emit_regardless_of_tags(self): + assert len(_run([_make_vm(tags={"env": "prod"})])) == 1 + def test_emit_when_tags_none(self): + assert len(_run([_make_vm(tags=None)])) == 1 -def test_running_vm_skipped(mock_compute_client): - """PowerState/running should NOT be flagged.""" - vm = _make_vm("vm-running") - mock_compute_client.virtual_machines.list_all.return_value = [vm] - mock_compute_client.virtual_machines.instance_view.return_value = _make_instance_view( - "PowerState/running" - ) - findings = find_stopped_not_deallocated_vms( - subscription_id="sub-123", - credential=None, - client=mock_compute_client, - ) +# =========================================================================== +# TestMustSkip +# =========================================================================== - assert len(findings) == 0 - - -def test_transitional_states_skipped(mock_compute_client): - """Transitional power states (starting, stopping, deallocating) should NOT be flagged.""" - vms = [ - _make_vm("vm-starting"), - _make_vm("vm-stopping"), - _make_vm("vm-deallocating"), - ] - mock_compute_client.virtual_machines.list_all.return_value = vms - - instance_views = { - "vm-starting": _make_instance_view("PowerState/starting"), - "vm-stopping": _make_instance_view("PowerState/stopping"), - "vm-deallocating": _make_instance_view("PowerState/deallocating"), - } - mock_compute_client.virtual_machines.instance_view.side_effect = ( - lambda resource_group_name, vm_name: instance_views[vm_name] - ) - findings = find_stopped_not_deallocated_vms( - subscription_id="sub-123", - credential=None, - client=mock_compute_client, - ) +class TestMustSkip: + def test_deallocated_skips(self): + assert _run([_make_vm()], iv_default=_make_iv("PowerState/deallocated")) == [] - assert len(findings) == 0 + def test_running_skips(self): + assert _run([_make_vm()], iv_default=_make_iv("PowerState/running")) == [] + def test_starting_skips(self): + assert _run([_make_vm()], iv_default=_make_iv("PowerState/starting")) == [] -def test_region_filter(mock_compute_client): - """Only VMs in the filtered region should be checked.""" - vms = [ - _make_vm("vm-east", location="eastus"), - _make_vm("vm-west", location="westus"), - ] - mock_compute_client.virtual_machines.list_all.return_value = vms - mock_compute_client.virtual_machines.instance_view.return_value = _make_instance_view( - "PowerState/stopped" - ) + def test_stopping_skips(self): + assert _run([_make_vm()], iv_default=_make_iv("PowerState/stopping")) == [] - findings = find_stopped_not_deallocated_vms( - subscription_id="sub-123", - credential=None, - region_filter="eastus", - client=mock_compute_client, - ) + def test_deallocating_skips(self): + assert _run([_make_vm()], iv_default=_make_iv("PowerState/deallocating")) == [] - assert len(findings) == 1 - assert findings[0].details["vm_name"] == "vm-east" - - -def test_mixed_vms(mock_compute_client): - """Only stopped (not deallocated) VMs should be flagged from a mix.""" - vms = [ - _make_vm("vm-running"), - _make_vm("vm-stopped"), - _make_vm("vm-deallocated"), - ] - mock_compute_client.virtual_machines.list_all.return_value = vms - - instance_views = { - "vm-running": _make_instance_view("PowerState/running"), - "vm-stopped": _make_instance_view("PowerState/stopped"), - "vm-deallocated": _make_instance_view("PowerState/deallocated"), - } - mock_compute_client.virtual_machines.instance_view.side_effect = ( - lambda resource_group_name, vm_name: instance_views[vm_name] - ) + def test_provisioning_state_failed_skips(self): + assert _run([_make_vm(provisioning_state="Failed")]) == [] - findings = find_stopped_not_deallocated_vms( - subscription_id="sub-123", - credential=None, - client=mock_compute_client, - ) + def test_provisioning_state_creating_skips(self): + assert _run([_make_vm(provisioning_state="Creating")]) == [] - assert len(findings) == 1 - assert findings[0].details["vm_name"] == "vm-stopped" + def test_provisioning_state_updating_skips(self): + assert _run([_make_vm(provisioning_state="Updating")]) == [] + def test_provisioning_state_lowercase_skips(self): + # Case-sensitive: only exact "Succeeded" qualifies + assert _run([_make_vm(provisioning_state="succeeded")]) == [] -def test_empty_subscription(mock_compute_client): - """No VMs should return empty findings.""" - mock_compute_client.virtual_machines.list_all.return_value = [] + def test_no_power_state_code_skips(self): + # Instance view has no PowerState/ code + assert _run([_make_vm()], iv_default=_make_iv(None)) == [] - findings = find_stopped_not_deallocated_vms( - subscription_id="sub-123", - credential=None, - client=mock_compute_client, - ) + def test_conflicting_power_state_codes_skips(self): + # Multiple different PowerState/ codes -> ambiguous -> skip + iv = _make_iv("PowerState/stopped", extra_codes=["PowerState/running"]) + assert _run([_make_vm()], iv_default=iv) == [] + + def test_mixed_vms_only_stopped_emits(self): + vms = [ + _make_vm("vm-running"), + _make_vm("vm-stopped"), + _make_vm("vm-deallocated"), + ] + iv_map = { + "vm-running": _make_iv("PowerState/running"), + "vm-stopped": _make_iv("PowerState/stopped"), + "vm-deallocated": _make_iv("PowerState/deallocated"), + } + results = _run(vms, iv_map=iv_map) + assert len(results) == 1 + assert results[0].details["vm_name"] == "vm-stopped" + + +# =========================================================================== +# TestIdNameGuards -- spec 8.1, 8.2 +# =========================================================================== + + +class TestIdNameGuards: + def test_id_absent_skips(self): + vm = _make_vm() + del vm.id + assert _run([vm]) == [] + + def test_id_none_skips(self): + vm = _make_vm() + vm.id = None + assert _run([vm]) == [] + + def test_id_empty_skips(self): + vm = _make_vm() + vm.id = "" + assert _run([vm]) == [] + + def test_name_absent_skips(self): + vm = _make_vm() + del vm.name + assert _run([vm]) == [] + + def test_name_none_skips(self): + vm = _make_vm() + vm.name = None + assert _run([vm]) == [] + + def test_name_empty_skips(self): + vm = _make_vm() + vm.name = "" + assert _run([vm]) == [] + + +# =========================================================================== +# TestRegionFilter -- spec 8.3 +# =========================================================================== + + +class TestRegionFilter: + def test_matching_region_emits(self): + results = _run([_make_vm(location="eastus")], region_filter="eastus") + assert len(results) == 1 + + def test_non_matching_region_skips(self): + assert _run([_make_vm(location="westus")], region_filter="eastus") == [] + + def test_region_filter_case_insensitive(self): + assert len(_run([_make_vm(location="eastus")], region_filter="EastUS")) == 1 + + def test_no_region_filter_emits_all(self): + vms = [ + _make_vm(name="vm-east", location="eastus"), + _make_vm(name="vm-west", location="westus"), + ] + assert len(_run(vms)) == 2 + + def test_region_filter_only_eastus(self): + vms = [ + _make_vm(name="vm-east", location="eastus"), + _make_vm(name="vm-west", location="westus"), + ] + results = _run(vms, region_filter="eastus") + assert len(results) == 1 + assert results[0].details["vm_name"] == "vm-east" + + +# =========================================================================== +# TestProvisioningStateContract -- spec 9.1 +# =========================================================================== + + +class TestProvisioningStateContract: + def test_sdk_succeeded_emits(self): + assert len(_run([_make_vm(provisioning_state="Succeeded")])) == 1 + + def test_nested_snake_case_used(self): + vm = _make_vm(provisioning_state=None) + vm.properties = SimpleNamespace(provisioning_state="Succeeded", provisioningState=None) + assert len(_run([vm])) == 1 + + def test_nested_camel_case_used(self): + vm = _make_vm(provisioning_state=None) + vm.properties = SimpleNamespace(provisioning_state=None, provisioningState="Succeeded") + assert len(_run([vm])) == 1 + + def test_sdk_nested_conflict_skips(self): + vm = _make_vm(provisioning_state="Succeeded") + vm.properties = SimpleNamespace(provisioning_state="Failed", provisioningState=None) + assert _run([vm]) == [] + + def test_both_absent_skips(self): + vm = _make_vm(provisioning_state=None) + vm.properties = None + assert _run([vm]) == [] + + def test_sdk_and_nested_agree_emits(self): + vm = _make_vm(provisioning_state="Succeeded") + vm.properties = SimpleNamespace(provisioning_state="Succeeded", provisioningState=None) + assert len(_run([vm])) == 1 + + +# =========================================================================== +# TestPowerStateContract -- spec 9.2 +# =========================================================================== + + +class TestPowerStateContract: + def test_exact_stopped_emits(self): + assert len(_run([_make_vm()], iv_default=_make_iv("PowerState/stopped"))) == 1 + + def test_all_non_stopped_codes_skip(self): + non_stopped = [ + "PowerState/running", + "PowerState/starting", + "PowerState/stopping", + "PowerState/deallocating", + "PowerState/deallocated", + ] + for code in non_stopped: + assert _run([_make_vm()], iv_default=_make_iv(code)) == [], f"Expected skip for {code}" + + def test_no_power_state_code_skips(self): + # statuses has only ProvisioningState/ -- no PowerState/ + assert _run([_make_vm()], iv_default=_make_iv(None)) == [] + + def test_duplicate_same_code_emits(self): + # Two identical PowerState/stopped codes -> not conflicting -> emit + iv = _make_iv("PowerState/stopped", extra_codes=["PowerState/stopped"]) + assert len(_run([_make_vm()], iv_default=iv)) == 1 + + def test_conflicting_power_state_codes_skip(self): + iv = _make_iv("PowerState/stopped", extra_codes=["PowerState/running"]) + assert _run([_make_vm()], iv_default=iv) == [] + + def test_statuses_none_skips(self): + iv = SimpleNamespace(statuses=None) + assert _run([_make_vm()], iv_default=iv) == [] + + def test_statuses_empty_skips(self): + iv = SimpleNamespace(statuses=[]) + assert _run([_make_vm()], iv_default=iv) == [] + + def test_statuses_code_none_skips(self): + # Status entry with code=None should be ignored; no PowerState/ -> skip + iv = SimpleNamespace(statuses=[SimpleNamespace(code=None)]) + assert _run([_make_vm()], iv_default=iv) == [] + + +# =========================================================================== +# TestInstanceViewFailure -- spec 12 +# =========================================================================== + + +class TestInstanceViewFailure: + def test_instance_view_http_error_skips_vm(self): + # HttpResponseError (404, 403, 429, 5xx) -> skip that VM + client = MagicMock() + client.virtual_machines.list_all.return_value = [_make_vm()] + client.virtual_machines.instance_view.side_effect = HttpResponseError(message="Not Found") + assert ( + find_stopped_not_deallocated_vms(subscription_id=_SUB, credential=None, client=client) + == [] + ) + + def test_instance_view_service_request_error_skips_vm(self): + # ServiceRequestError (connection reset, DNS, network timeout) -> skip that VM + client = MagicMock() + client.virtual_machines.list_all.return_value = [_make_vm()] + client.virtual_machines.instance_view.side_effect = ServiceRequestError( + message="Connection reset" + ) + assert ( + find_stopped_not_deallocated_vms(subscription_id=_SUB, credential=None, client=client) + == [] + ) + + def test_instance_view_service_response_error_skips_vm(self): + # ServiceResponseError (incomplete read, stream closed) -> skip that VM + client = MagicMock() + client.virtual_machines.list_all.return_value = [_make_vm()] + client.virtual_machines.instance_view.side_effect = ServiceResponseError( + message="Incomplete read" + ) + assert ( + find_stopped_not_deallocated_vms(subscription_id=_SUB, credential=None, client=client) + == [] + ) + + def test_instance_view_azure_base_error_propagates(self): + # AzureError (root base class, e.g. serialization bug) is NOT caught + from azure.core.exceptions import AzureError + + client = MagicMock() + client.virtual_machines.list_all.return_value = [_make_vm()] + client.virtual_machines.instance_view.side_effect = AzureError("internal SDK error") + with pytest.raises(AzureError): + find_stopped_not_deallocated_vms(subscription_id=_SUB, credential=None, client=client) + + def test_instance_view_unrelated_error_propagates(self): + # Non-Azure errors (coding bugs) must NOT be swallowed + client = MagicMock() + client.virtual_machines.list_all.return_value = [_make_vm()] + client.virtual_machines.instance_view.side_effect = RuntimeError("bug") + with pytest.raises(RuntimeError): + find_stopped_not_deallocated_vms(subscription_id=_SUB, credential=None, client=client) + + def test_instance_view_fails_one_emits_other(self): + vm_ok = _make_vm(name="vm-ok") + vm_bad = _make_vm(name="vm-bad") + + client = MagicMock() + client.virtual_machines.list_all.return_value = [vm_bad, vm_ok] + + def _iv(resource_group_name, vm_name): + if vm_name == "vm-bad": + raise ServiceRequestError(message="timeout") + return _make_iv("PowerState/stopped") + + client.virtual_machines.instance_view.side_effect = _iv + results = find_stopped_not_deallocated_vms( + subscription_id=_SUB, credential=None, client=client + ) + assert len(results) == 1 + assert results[0].details["vm_name"] == "vm-ok" + + def test_list_all_exception_propagates(self): + client = MagicMock() + client.virtual_machines.list_all.side_effect = RuntimeError("subscription error") + with pytest.raises(RuntimeError): + find_stopped_not_deallocated_vms(subscription_id=_SUB, credential=None, client=client) + + def test_malformed_id_skips_vm(self): + # ID with no resourceGroups segment -> _extract_resource_group raises -> skip + vm = _make_vm(vm_id="/malformed/id") + assert _run([vm]) == [] + + def test_inject_client_used(self): + client = MagicMock() + client.virtual_machines.list_all.return_value = [_make_vm()] + client.virtual_machines.instance_view.return_value = _make_iv("PowerState/stopped") + results = find_stopped_not_deallocated_vms( + subscription_id=_SUB, credential=None, client=client + ) + client.virtual_machines.list_all.assert_called_once() + assert len(results) == 1 + + +# =========================================================================== +# TestFindingShape -- spec 11 +# =========================================================================== + + +class TestFindingShape: + def test_required_fields(self): + f = _one([_make_vm()]) + assert f.provider == "azure" + assert f.rule_id == "azure.vm.stopped_not_deallocated" + assert f.resource_type == "azure.virtual_machine" + assert f.resource_id == _VM_ID_TMPL.format(name="my-vm") + assert f.region == "eastus" + assert f.risk.value == "high" + assert f.confidence.value == "high" + assert f.estimated_monthly_cost_usd is None + + def test_required_detail_keys(self): + f = _one([_make_vm(vm_size="Standard_D4s_v3", os_type="Windows")]) + d = f.details + assert d["vm_name"] == "my-vm" + assert d["subscription_id"] == _SUB + assert d["power_state"] == "PowerState/stopped" + assert d["provisioning_state"] == "Succeeded" + assert d["vm_size"] == "Standard_D4s_v3" + assert d["os_type"] == "Windows" + assert isinstance(d["tags"], dict) + + def test_tags_normalized_to_empty_dict_when_none(self): + f = _one([_make_vm(tags=None)]) + assert f.details["tags"] == {} + + def test_tags_preserved_when_set(self): + f = _one([_make_vm(tags={"env": "prod", "team": "infra"})]) + assert f.details["tags"] == {"env": "prod", "team": "infra"} + + def test_evidence_signals_used(self): + f = _one([_make_vm()]) + used = " ".join(f.evidence.signals_used) + assert "Succeeded" in used + assert "PowerState/stopped" in used + assert "compute" in used.lower() + + def test_evidence_signals_not_checked(self): + f = _one([_make_vm()]) + combined = " ".join(f.evidence.signals_not_checked).lower() + assert "intentional" in combined or "accidental" in combined + assert "reservation" in combined or "licensing" in combined + + def test_estimated_cost_always_none(self): + f = _one([_make_vm()]) + assert f.estimated_monthly_cost_usd is None + + +# =========================================================================== +# TestResolveProvisioningState -- unit tests +# =========================================================================== + + +class TestResolveProvisioningState: + def _ns(self, sdk=None, props=None): + return SimpleNamespace(provisioning_state=sdk, properties=props) + + def test_sdk_value_returned(self): + assert _resolve_provisioning_state(self._ns(sdk="Succeeded")) == "Succeeded" + + def test_nested_snake_used(self): + obj = self._ns( + sdk=None, + props=SimpleNamespace(provisioning_state="Succeeded", provisioningState=None), + ) + assert _resolve_provisioning_state(obj) == "Succeeded" + + def test_nested_camel_used(self): + obj = self._ns( + sdk=None, + props=SimpleNamespace(provisioning_state=None, provisioningState="Succeeded"), + ) + assert _resolve_provisioning_state(obj) == "Succeeded" + + def test_conflict_returns_none(self): + obj = self._ns( + sdk="Succeeded", + props=SimpleNamespace(provisioning_state="Failed", provisioningState=None), + ) + assert _resolve_provisioning_state(obj) is None + + def test_both_absent_returns_none(self): + assert _resolve_provisioning_state(self._ns()) is None + + def test_no_props_attribute_uses_sdk(self): + obj = SimpleNamespace(provisioning_state="Succeeded") + assert _resolve_provisioning_state(obj) == "Succeeded" + + +# =========================================================================== +# TestResolvePowerState -- unit tests +# =========================================================================== + + +class TestResolvePowerState: + def _iv(self, codes): + return SimpleNamespace(statuses=[SimpleNamespace(code=c) for c in codes]) + + def test_single_stopped_code(self): + assert _resolve_power_state(self._iv(["PowerState/stopped"])) == "PowerState/stopped" + + def test_single_running_code(self): + assert _resolve_power_state(self._iv(["PowerState/running"])) == "PowerState/running" + + def test_provisioning_code_ignored(self): + # ProvisioningState/ is not a PowerState/ code + assert ( + _resolve_power_state(self._iv(["ProvisioningState/succeeded", "PowerState/stopped"])) + == "PowerState/stopped" + ) + + def test_no_power_state_code_returns_none(self): + assert _resolve_power_state(self._iv(["ProvisioningState/succeeded"])) is None + + def test_empty_statuses_returns_none(self): + assert _resolve_power_state(self._iv([])) is None + + def test_statuses_none_returns_none(self): + iv = SimpleNamespace(statuses=None) + assert _resolve_power_state(iv) is None + + def test_conflicting_codes_returns_none(self): + assert _resolve_power_state(self._iv(["PowerState/stopped", "PowerState/running"])) is None + + def test_duplicate_same_code_returns_code(self): + # Two identical codes -> not conflicting + assert ( + _resolve_power_state(self._iv(["PowerState/stopped", "PowerState/stopped"])) + == "PowerState/stopped" + ) + + def test_code_none_entry_ignored(self): + iv = SimpleNamespace( + statuses=[SimpleNamespace(code=None), SimpleNamespace(code="PowerState/stopped")] + ) + assert _resolve_power_state(iv) == "PowerState/stopped" - assert findings == [] + def test_no_statuses_attribute_returns_none(self): + iv = SimpleNamespace() + assert _resolve_power_state(iv) is None From 4a685e92f92debf694f2c1cd85caa664db332951 Mon Sep 17 00:00:00 2001 From: Suresh Mandalapu Date: Thu, 23 Apr 2026 23:20:28 +0100 Subject: [PATCH 4/5] azure.virtual_network_gateway.idle --- .../azure/rules/vnet_gateway_idle.py | 832 +++++++-- docs/rules/azure.md | 12 +- docs/specs/azure/vnet_gateway_idle.md | 517 ++++++ .../azure/test_azure_vnet_gateway_idle.py | 1597 ++++++++++++++--- 4 files changed, 2530 insertions(+), 428 deletions(-) create mode 100644 docs/specs/azure/vnet_gateway_idle.md diff --git a/cleancloud/providers/azure/rules/vnet_gateway_idle.py b/cleancloud/providers/azure/rules/vnet_gateway_idle.py index 39993c0..d356d72 100644 --- a/cleancloud/providers/azure/rules/vnet_gateway_idle.py +++ b/cleancloud/providers/azure/rules/vnet_gateway_idle.py @@ -1,6 +1,57 @@ -from datetime import datetime, timezone -from typing import List, Optional - +""" +Rule: azure.virtual_network_gateway.idle + +Intent: + Detect Azure virtual network gateways (VPN and ExpressRoute) that have no + configured in-scope connection resources and no applicable gateway activity + over a 30-day observation window, making them review candidates for cleanup + or rightsizing. + + This is a review-candidate rule only. It does not prove the gateway should + be deleted, that a standby topology is unnecessary, or that a specific + monthly saving exists. + +Exclusions: + - id absent or empty + - name absent or empty + - outside optional region filter (exact lowercase match) + - provisioning state does not resolve to exactly "Succeeded" + - gateway type does not resolve to exactly "Vpn" or "ExpressRoute" + - allowVirtualWanTraffic resolves to True + - ExpressRoute: adminState is "Disabled" or any connection has bypass enabled + - VPN: any P2S field group resolves to a present, non-empty configured value + - any configured in-scope connection resource present (IPsec, Vnet2Vnet, ExpressRoute) + - any connection has unknown or conflicting type resolution + - connection listing fails, is partial, or unresolvable + - any applicable idle metric is unknown, below coverage threshold, or non-zero + - per-gateway get/list_connections/metric retrieval raises an expected SDK error + +Detection: + - provisioning state is "Succeeded" (SDK-first, nested fallback, conflict -> skip) + - gateway type is "Vpn" or "ExpressRoute" + - allowVirtualWanTraffic is not True + - no ExpressRoute standby/bypass signals present + - no P2S configuration for VPN gateways + - no configured in-scope connection resources + - all applicable idle metrics zero over 30 days with >= 80% daily bucket coverage + +Cost model (spec 10): + estimated_monthly_cost_usd = None (always) + Gateway billing varies by SKU, type, and transfer topology. No flat estimate. + +APIs: + - Microsoft.Resources/resources/read (resources.list, subscription-wide inventory) + - Microsoft.Network/virtualNetworkGateways/read (get, list_connections) + - Microsoft.Insights/metrics/read (metrics.list) +""" + +import math +from datetime import datetime, timedelta, timezone +from enum import Enum +from typing import List, Optional, Tuple + +from azure.core.exceptions import HttpResponseError, ServiceRequestError, ServiceResponseError +from azure.mgmt.monitor import MonitorManagementClient from azure.mgmt.network import NetworkManagementClient from azure.mgmt.resource import ResourceManagementClient @@ -9,64 +60,448 @@ from cleancloud.core.finding import Finding from cleancloud.core.risk import RiskLevel -# VNet Gateway SKU monthly costs (approximate) -SKU_COSTS = { - # VPN Gateway SKUs - "Basic": "$27/month", - "VpnGw1": "$140/month", - "VpnGw1AZ": "$195/month", - "VpnGw2": "$360/month", - "VpnGw2AZ": "$505/month", - "VpnGw3": "$930/month", - "VpnGw3AZ": "$1,115/month", - "VpnGw4": "$1,680/month", - "VpnGw4AZ": "$1,845/month", - "VpnGw5": "$3,360/month", - "VpnGw5AZ": "$3,525/month", - # ExpressRoute Gateway SKUs - "Standard": "$125/month", - "HighPerformance": "$335/month", - "UltraPerformance": "$670/month", - "ErGw1AZ": "$195/month", - "ErGw2AZ": "$505/month", - "ErGw3AZ": "$1,115/month", - "ErGwScale": "Usage-based", -} - -# Numeric costs for aggregation -_SKU_COST_USD = { - "Basic": 27, - "VpnGw1": 140, - "VpnGw1AZ": 195, - "VpnGw2": 360, - "VpnGw2AZ": 505, - "VpnGw3": 930, - "VpnGw3AZ": 1115, - "VpnGw4": 1680, - "VpnGw4AZ": 1845, - "VpnGw5": 3360, - "VpnGw5AZ": 3525, - "Standard": 125, - "HighPerformance": 335, - "UltraPerformance": 670, - "ErGw1AZ": 195, - "ErGw2AZ": 505, - "ErGw3AZ": 1115, -} +_RULE_ID = "azure.virtual_network_gateway.idle" +_RESOURCE_TYPE = "azure.virtual_network_gateway" +_IDLE_WINDOW_DAYS = 30 +_MIN_COVERAGE = 0.80 + +# In-scope connection types per spec 7 / 9.2 +_IN_SCOPE_CONNECTION_TYPES = frozenset({"IPsec", "Vnet2Vnet", "ExpressRoute"}) + +# Idle metrics per gateway type / SKU family (spec 9.6) +_VPN_METRICS: Tuple[str, ...] = ( + "AverageBandwidth", + "InboundFlowsCount", + "OutboundFlowsCount", +) +_ER_STANDARD_METRICS: Tuple[str, ...] = ( + "ExpressRouteGatewayBitsPerSecond", + "ExpressRouteGatewayPacketsPerSecond", + "ExpressRouteGatewayActiveFlows", +) +_ER_SCALABLE_METRICS: Tuple[str, ...] = ( + "ScalableExpressRouteGatewayBitsPerSecond", + "ScalableExpressRouteGatewayPacketsPerSecond", + "ScalableExpressRouteGatewayActiveFlows", +) + +# ExpressRoute SKU tiers that use the scalable metric family +_ER_SCALABLE_TIERS = frozenset({"ErGwScale"}) + +# P2S field groups to check: (snake_case, camelCase) per spec 9.3 +_P2S_FIELD_PAIRS = ( + ("vpn_client_address_pool", "vpnClientAddressPool"), + ("vng_client_connection_configurations", "vngClientConnectionConfigurations"), + ("vpn_client_root_certificates", "vpnClientRootCertificates"), + ("vpn_client_revoked_certificates", "vpnClientRevokedCertificates"), + ("vpn_authentication_types", "vpnAuthenticationTypes"), + ("vpn_client_protocols", "vpnClientProtocols"), + ("aad_tenant", "aadTenant"), + ("aad_audience", "aadAudience"), + ("aad_issuer", "aadIssuer"), +) + +_SENTINEL = object() + + +class _MetricResult(Enum): + ACTIVE = "ACTIVE" + ZERO = "ZERO" + UNKNOWN = "UNKNOWN" + + +# --------------------------------------------------------------------------- +# Utilities +# --------------------------------------------------------------------------- + + +def _norm_location(s: str) -> str: + """Lowercase only -- exact lowercase match per spec 7.""" + return s.lower() if s else "" def _extract_resource_group(resource_id: str) -> Optional[str]: - """Extract resource group name from Azure resource ID.""" + """Extract resource group name from Azure ARM resource ID.""" if not resource_id: return None parts = resource_id.split("/") try: - rg_idx = parts.index("resourceGroups") - return parts[rg_idx + 1] + idx = parts.index("resourceGroups") + return parts[idx + 1] except (ValueError, IndexError): return None +def _is_field_nonempty(val) -> bool: + """True if val represents a present, non-empty configured value.""" + if val is None: + return False + if isinstance(val, str): + return bool(val.strip()) + try: + return len(val) > 0 + except TypeError: + return bool(val) + + +# --------------------------------------------------------------------------- +# Gateway-state resolvers (spec 9.1) +# --------------------------------------------------------------------------- + + +def _resolve_provisioning_state(gateway) -> Optional[str]: + """ + SDK-first / nested fallback. Returns None on conflict or both absent. + Only "Succeeded" is eligible; caller skips on anything else. + """ + sdk_val = getattr(gateway, "provisioning_state", None) + props = getattr(gateway, "properties", None) + nested_val = None + if props is not None: + nested_val = getattr(props, "provisioning_state", None) + if nested_val is None: + nested_val = getattr(props, "provisioningState", None) + if sdk_val is not None and nested_val is not None and sdk_val != nested_val: + return None # conflict -> skip + return sdk_val or nested_val + + +def _resolve_gateway_type(gateway) -> Optional[str]: + """ + SDK-first / nested fallback. Returns None on conflict or both absent. + Only "Vpn" and "ExpressRoute" are in scope; caller skips on anything else. + """ + sdk_val = getattr(gateway, "gateway_type", None) + props = getattr(gateway, "properties", None) + nested_val = None + if props is not None: + nested_val = getattr(props, "gateway_type", None) + if nested_val is None: + nested_val = getattr(props, "gatewayType", None) + if sdk_val is not None and nested_val is not None and sdk_val != nested_val: + return None # conflict -> skip + return sdk_val or nested_val + + +# --------------------------------------------------------------------------- +# Connection-surface resolver (spec 9.2) +# --------------------------------------------------------------------------- + + +def _resolve_connection_type(connection) -> Optional[str]: + """ + Resolve connectionType from a connection resource. + Returns None if absent from all sources or conflicting. + """ + sdk_val = getattr(connection, "connection_type", None) + props = getattr(connection, "properties", None) + nested_val = None + if props is not None: + nested_val = getattr(props, "connection_type", None) + if nested_val is None: + nested_val = getattr(props, "connectionType", None) + if sdk_val is not None and nested_val is not None and sdk_val != nested_val: + return None # conflict -> unresolvable + return sdk_val or nested_val + + +def _connections_gate( + net_client: NetworkManagementClient, + resource_group: str, + gw_name: str, + gateway_type: str, +) -> bool: + """ + Enumerate connections and apply the connection-surface contract (spec 9.2). + + Returns True (gate passed: no in-scope connections and no bypass issues). + Returns False (skip: in-scope connection present, unresolvable type, or bypass enabled). + Raises on list_connections() failure -- propagates to caller's per-gateway try-except. + """ + for conn in net_client.virtual_network_gateways.list_connections(resource_group, gw_name): + conn_type = _resolve_connection_type(conn) + if conn_type is None: + return False # spec 9.2.5: unknown/conflicting type -> skip gateway + + if conn_type in _IN_SCOPE_CONNECTION_TYPES: + return False # spec 9.2.7: in-scope connection present -> skip + + # spec 9.4: ExpressRoute bypass / FastPath check + if gateway_type == "ExpressRoute": + bypass = _resolve_express_route_bypass(conn) + if bypass is None or bypass is True: + return False # unresolvable or bypass enabled -> skip + + return True # no in-scope connections and no bypass issues + + +def _resolve_express_route_bypass(connection) -> Optional[bool]: + """ + Resolve expressRouteGatewayBypass from a connection resource. + Returns True (bypass enabled), False (confirmed not enabled), None (unresolvable). + """ + sdk_raw = getattr(connection, "express_route_gateway_bypass", _SENTINEL) + props = getattr(connection, "properties", None) + nested_raw = _SENTINEL + if props is not None: + nested_raw = getattr(props, "express_route_gateway_bypass", _SENTINEL) + if nested_raw is _SENTINEL: + nested_raw = getattr(props, "expressRouteGatewayBypass", _SENTINEL) + + sdk_found = sdk_raw is not _SENTINEL + nested_found = nested_raw is not _SENTINEL + + def _to_bool(v): + return v if isinstance(v, bool) else None + + sdk_b = _to_bool(sdk_raw) if sdk_found else None + nested_b = _to_bool(nested_raw) if nested_found else None + + # Present but not a bool -> unresolvable + if sdk_found and sdk_raw is not _SENTINEL and sdk_b is None: + return None + if nested_found and nested_raw is not _SENTINEL and nested_b is None: + return None + + if sdk_b is not None and nested_b is not None and sdk_b != nested_b: + return None # conflict -> unresolvable + + result = sdk_b if sdk_b is not None else nested_b + return result # None if absent = field absent = not bypassed + + +# --------------------------------------------------------------------------- +# P2S configuration resolver (spec 9.3) +# --------------------------------------------------------------------------- + + +def _resolve_p2s_configured(gateway) -> Optional[bool]: + """ + Resolve P2S configuration presence for VPN gateways per spec 9.3. + + Returns: + - True: P2S is configured (any field group non-empty) + - False: confirmed not configured (all field groups empty / vpnClientConfiguration absent) + - None: unresolvable (conflicting vpnClientConfiguration presence, or conflicting + field group values) -> caller must skip + """ + sdk_vcc = getattr(gateway, "vpn_client_configuration", _SENTINEL) + props = getattr(gateway, "properties", None) + nested_vcc = _SENTINEL + if props is not None: + nested_vcc = getattr(props, "vpn_client_configuration", _SENTINEL) + if nested_vcc is _SENTINEL: + nested_vcc = getattr(props, "vpnClientConfiguration", _SENTINEL) + + sdk_found = sdk_vcc is not _SENTINEL + nested_found = nested_vcc is not _SENTINEL + + if sdk_found and nested_found: + sdk_none = sdk_vcc is None + nested_none = nested_vcc is None + if sdk_none != nested_none: + return None # one says present, other says absent -> conflict -> skip + if sdk_none: + return False # both confirm absent + vcc = sdk_vcc # both present; use SDK as primary + elif sdk_found: + if sdk_vcc is None: + return False # SDK explicitly confirms absent (null) + vcc = sdk_vcc + elif nested_found: + if nested_vcc is None: + return False # nested explicitly confirms absent (null) + vcc = nested_vcc + else: + return None # field absent from all sources -> P2S state unresolvable -> skip + + # vcc is a non-None object -- check each of the 9 field groups + for snake, camel in _P2S_FIELD_PAIRS: + sdk_f = getattr(vcc, snake, None) + camel_f = getattr(vcc, camel, None) + sdk_ne = _is_field_nonempty(sdk_f) + camel_ne = _is_field_nonempty(camel_f) + # Conflict: one says non-empty, the other says empty (and both are present) + if sdk_f is not None and camel_f is not None and sdk_ne != camel_ne: + return None # conflicting field group -> unresolvable -> skip + if sdk_ne or camel_ne: + return True # P2S configured + + return False # all field groups empty -> not configured + + +# --------------------------------------------------------------------------- +# Unsupported topology / bypass signals (spec 9.4, 9.5) +# --------------------------------------------------------------------------- + + +def _gateway_has_virtual_wan_traffic(gateway) -> bool: + """Returns True only when allowVirtualWanTraffic is confirmed True (spec 9.5).""" + for attr in ("allow_virtual_wan_traffic", "allowVirtualWanTraffic"): + if getattr(gateway, attr, None) is True: + return True + props = getattr(gateway, "properties", None) + if props is not None: + for attr in ("allow_virtual_wan_traffic", "allowVirtualWanTraffic"): + if getattr(props, attr, None) is True: + return True + return False + + +def _gateway_admin_state_disabled(gateway) -> Optional[bool]: + """ + Resolve adminState for ExpressRoute gateways per spec 9.4. + + Returns: + - True: adminState is confirmed "Disabled" -> caller must skip + - False: adminState is a non-empty string other than "Disabled" -> proceed + - None: absent from all sources, conflict, None value, or unrecognized type + -> state unresolvable -> caller must skip (fail-closed) + """ + + def _find(obj) -> object: + for attr in ("admin_state", "adminState"): + v = getattr(obj, attr, _SENTINEL) + if v is not _SENTINEL: + return v + return _SENTINEL + + sdk_val = _find(gateway) + props = getattr(gateway, "properties", None) + nested_val = _find(props) if props is not None else _SENTINEL + + sdk_found = sdk_val is not _SENTINEL + nested_found = nested_val is not _SENTINEL + + if not sdk_found and not nested_found: + return None # absent from all sources -> unresolvable -> skip + + if sdk_found and nested_found and sdk_val != nested_val: + return None # conflict -> unresolvable -> skip + + val = sdk_val if sdk_found else nested_val + + if val == "Disabled": + return True + if isinstance(val, str) and val: + return False # confirmed non-Disabled (e.g., "Enabled") + return None # None value, non-string, or empty string -> unresolvable -> skip + + +# --------------------------------------------------------------------------- +# ExpressRoute metric family selection (spec 9.6.2) +# --------------------------------------------------------------------------- + + +def _er_metric_family(sku_tier: Optional[str]) -> Optional[Tuple[str, ...]]: + """ + Return the applicable ExpressRoute metric tuple for the given SKU tier. + Returns None if sku_tier is absent (unresolvable -> caller skips). + """ + if sku_tier is None: + return None # unknown -> unresolvable -> skip + if sku_tier in _ER_SCALABLE_TIERS: + return _ER_SCALABLE_METRICS + return _ER_STANDARD_METRICS # Standard/HighPerformance/UltraPerformance/AZ family + + +# --------------------------------------------------------------------------- +# Metric evaluation (spec 9.6) +# --------------------------------------------------------------------------- + + +def _evaluate_metric( + monitor_client: MonitorManagementClient, + resource_uri: str, + metric_name: str, + window_start: datetime, + window_end: datetime, +) -> _MetricResult: + """ + Evaluate a single Azure Monitor metric over the 30-day window per spec 9.6. + + Uses daily (P1D) buckets with a >= 80% coverage requirement. + Returns ACTIVE, ZERO, or UNKNOWN. + UNKNOWN covers: query failure, unusable response shape, no valid series, + insufficient bucket coverage, or any unparseable datapoint (fail-closed). + + Fail-closed on unparseable datapoints (spec 9.6.1): if any datapoint within + the response has an absent or non-datetime timestamp, the entire metric + evaluation returns UNKNOWN rather than silently discarding the datapoint and + continuing with the remaining series. + + Datapoints with no populated aggregation value (total, average, maximum all + None) are treated as sparse/missing -- they do not contribute to observed + bucket coverage, which drives the result toward UNKNOWN through the coverage + threshold rather than triggering an immediate UNKNOWN. + + Datapoints that fall outside the requested window are legitimate edge returns + from the Azure Monitor API and are silently filtered out. + """ + fmt = "%Y-%m-%dT%H:%M:%SZ" + timespan = f"{window_start.strftime(fmt)}/{window_end.strftime(fmt)}" + + first_bucket = window_start.replace(hour=0, minute=0, second=0, microsecond=0) + expected_buckets = math.ceil((window_end - first_bucket).total_seconds() / 86400) + if expected_buckets == 0: + return _MetricResult.UNKNOWN + + try: + response = monitor_client.metrics.list( + resource_uri, + metricnames=metric_name, + timespan=timespan, + interval="P1D", + aggregation="Total,Average,Maximum", + ) + except Exception: + return _MetricResult.UNKNOWN + + if not hasattr(response, "value") or response.value is None: + return _MetricResult.UNKNOWN + + # Per-bucket maximum across all populated aggregation types (Total, Average, Maximum). + # Using max rather than sum avoids double-counting when multiple aggregation fields + # are populated for the same datapoint. + bucket_max: dict = {} + for metric in response.value: + for ts in metric.timeseries or []: + for data in ts.data or []: + if data.timestamp is None: + return _MetricResult.UNKNOWN # unparseable datapoint -> fail-closed + ts_dt = data.timestamp + if not isinstance(ts_dt, datetime): + return _MetricResult.UNKNOWN # unparseable timestamp type -> fail-closed + # Accept any of Total / Average / Maximum -- take the max of what is present. + agg_val = None + for agg_attr in ("total", "average", "maximum"): + v = getattr(data, agg_attr, None) + if v is not None: + agg_val = max(agg_val, v) if agg_val is not None else v + if agg_val is None: + continue # sparse/missing aggregation -> reduces coverage, does not fail-close + ts_utc = ts_dt if ts_dt.tzinfo is not None else ts_dt.replace(tzinfo=timezone.utc) + if not (window_start <= ts_utc < window_end): + continue + key = ts_utc.strftime("%Y-%m-%dT00:00:00Z") + existing = bucket_max.get(key) + bucket_max[key] = max(existing, agg_val) if existing is not None else agg_val + + observed = len(bucket_max) + if observed == 0: + return _MetricResult.UNKNOWN + if observed / expected_buckets < _MIN_COVERAGE: + return _MetricResult.UNKNOWN + + signal = sum(bucket_max.values()) + return _MetricResult.ACTIVE if signal > 0 else _MetricResult.ZERO + + +# --------------------------------------------------------------------------- +# Main rule function +# --------------------------------------------------------------------------- + + def find_idle_vnet_gateways( *, subscription_id: str, @@ -74,33 +509,33 @@ def find_idle_vnet_gateways( region_filter: str = None, client: Optional[NetworkManagementClient] = None, resource_client: Optional[ResourceManagementClient] = None, + monitor_client: Optional[MonitorManagementClient] = None, ) -> List[Finding]: """ - Find Azure VNet Gateways (VPN/ExpressRoute) with no active connections. - - Conservative rule (review-only): - - Flags VPN gateways with zero Site-to-Site or Point-to-Site connections - - Flags ExpressRoute gateways with no circuits - - Skips non-Succeeded provisioning state - - Includes SKU cost estimates ($140-3500+/month) + Find Azure VNet Gateways (VPN or ExpressRoute) with no configured in-scope + connections and no applicable gateway activity over 30 days. IAM permissions: + - Microsoft.Resources/resources/read - Microsoft.Network/virtualNetworkGateways/read - - Microsoft.Network/connections/read + - Microsoft.Insights/metrics/read """ findings: List[Finding] = [] net_client = client or NetworkManagementClient( - credential=credential, - subscription_id=subscription_id, + credential=credential, subscription_id=subscription_id ) - res_client = resource_client or ResourceManagementClient( - credential=credential, - subscription_id=subscription_id, + credential=credential, subscription_id=subscription_id ) + mon_client = monitor_client or MonitorManagementClient( + credential=credential, subscription_id=subscription_id + ) + + now = datetime.now(timezone.utc) + window_start = now - timedelta(days=_IDLE_WINDOW_DAYS) - # List all VNet gateways across the subscription using resource filter + # Subscription-wide gateway inventory (spec 12: propagate if this fails) gateway_resources = list( res_client.resources.list( filter="resourceType eq 'Microsoft.Network/virtualNetworkGateways'" @@ -108,139 +543,160 @@ def find_idle_vnet_gateways( ) for gw_resource in gateway_resources: - resource_group = _extract_resource_group(gw_resource.id) - if not resource_group: + # spec 8.1: id guard + gw_id = getattr(gw_resource, "id", None) + if not gw_id: continue - # Get full gateway details - try: - gw = net_client.virtual_network_gateways.get(resource_group, gw_resource.name) - except Exception: - continue - - if region_filter and gw.location != region_filter: + # spec 8.2: name guard + gw_name = getattr(gw_resource, "name", None) + if not gw_name: continue - # Skip non-Succeeded provisioning state - if getattr(gw, "provisioning_state", None) not in (None, "Succeeded"): - continue - - gateway_type = getattr(gw, "gateway_type", None) - - # Only handle known gateway types - if gateway_type not in ("Vpn", "ExpressRoute"): + # Extract resource group before the per-gateway try block (no API call) + resource_group = _extract_resource_group(gw_id) + if not resource_group: continue - sku_name = gw.sku.name if gw.sku else "unknown" - sku_tier = gw.sku.tier if gw.sku else "unknown" - - # Get connections for this gateway + # Per-gateway: get full details, list connections, evaluate metrics. + # Expected SDK retrieval failures -> skip this gateway (spec 12). + # HttpResponseError: HTTP-level failure (404, 403, 429, 5xx). + # ServiceRequestError: transport failure before a response. + # ServiceResponseError: transport failure while reading the response. try: - connections = list( - net_client.virtual_network_gateways.list_connections(resource_group, gw.name) + gw = net_client.virtual_network_gateways.get(resource_group, gw_name) + + # spec 8.3: region filter -- exact lowercase match + location = _norm_location(getattr(gw, "location", "") or "") + if region_filter and location != _norm_location(region_filter): + continue + + # spec 8.4 / 9.1: provisioning state must resolve to exactly "Succeeded" + if _resolve_provisioning_state(gw) != "Succeeded": + continue + + # spec 8.5 / 9.1: gateway type must resolve to "Vpn" or "ExpressRoute" + gateway_type = _resolve_gateway_type(gw) + if gateway_type not in ("Vpn", "ExpressRoute"): + continue + + sku = getattr(gw, "sku", None) + sku_name = getattr(sku, "name", None) if sku else None + sku_tier = getattr(sku, "tier", None) if sku else None + + # spec 8.9 / 9.5: Virtual WAN / hub-managed topology signal + if _gateway_has_virtual_wan_traffic(gw): + continue + + # spec 8.10 / 9.4: ExpressRoute standby / bypass -- adminState + # None (unresolvable) is also a skip: fail-closed per spec 9.4.4 + if gateway_type == "ExpressRoute": + admin_disabled = _gateway_admin_state_disabled(gw) + if admin_disabled is None or admin_disabled: + continue + + # spec 8.8 / 9.3: P2S configuration (VPN gateways only) + if gateway_type == "Vpn": + p2s_result = _resolve_p2s_configured(gw) + if p2s_result is None or p2s_result: + continue # unresolvable or P2S configured -> skip + + # spec 8.7 / 9.2: connection-surface contract + # list_connections() failure propagates to the outer except and skips this gateway + if not _connections_gate(net_client, resource_group, gw_name, gateway_type): + continue + + # spec 8.11-8.13 / 9.6: idle metrics + if gateway_type == "Vpn": + metrics_to_check = _VPN_METRICS + else: + metrics_to_check = _er_metric_family(sku_tier) + if metrics_to_check is None: + continue # unknown ER SKU tier -> unresolvable -> skip + + resource_uri = gw.id or gw_id + all_zero = True + for metric_name in metrics_to_check: + result = _evaluate_metric(mon_client, resource_uri, metric_name, window_start, now) + if result != _MetricResult.ZERO: + all_zero = False + break + + if not all_zero: + continue + + # --- EMIT --- + tags = getattr(gw, "tags", None) or {} # spec 7: never None in output + + type_label = "VPN" if gateway_type == "Vpn" else "ExpressRoute" + signals_used = [ + "Provisioning state is 'Succeeded'", + f"Gateway type is '{gateway_type}'", + "No configured in-scope connection resources " + "(IPsec, Vnet2Vnet, ExpressRoute) -- list_connections() exhausted", + ] + if gateway_type == "Vpn": + signals_used.append("No P2S (point-to-site) configuration present") + else: + signals_used.append( + "No ExpressRoute standby/bypass topology detected " + "(adminState not Disabled, no bypass-enabled connections)" + ) + signals_used.append("allowVirtualWanTraffic signal is not True") + signals_used.append( + f"All applicable idle metrics ({', '.join(metrics_to_check)}) " + f"resolved to zero over {_IDLE_WINDOW_DAYS} days " + f"with >= {int(_MIN_COVERAGE * 100)}% daily bucket coverage" ) - except Exception: - connections = [] - - # Check for active connections. - # The list_connections() API returns summary entities where connection_status - # is often None even for live connections. Treat None/unknown status as - # potentially active (conservative). Only treat as inactive when explicitly - # NotConnected or Disconnected. - _inactive_statuses = {"NotConnected", "Disconnected"} - active_connections = [] - for c in connections: - status = getattr(c, "connection_status", None) - # Treat unknown status as active (conservative assumption) - if status not in _inactive_statuses: - active_connections.append(c) - - # For VPN gateways, also check VPN client configuration (P2S) - vpn_client_config = getattr(gw, "vpn_client_configuration", None) - has_p2s_config = ( - vpn_client_config is not None - and getattr(vpn_client_config, "vpn_client_address_pool", None) is not None - ) - - # Determine if gateway is idle - is_idle = False - signals = [] - - if gateway_type == "Vpn": - if len(active_connections) == 0 and not has_p2s_config: - is_idle = True - signals.append("No active Site-to-Site connections detected") - signals.append("No Point-to-Site configuration found") - elif len(active_connections) == 0 and has_p2s_config: - # Has P2S config but we can't check active P2S clients without - # additional API calls, so we'll note it as a signal not checked - pass - elif gateway_type == "ExpressRoute": - if len(active_connections) == 0: - is_idle = True - signals.append("No active ExpressRoute circuit connections detected") - - if not is_idle: - continue - - signals.append(f"Gateway SKU is {sku_name} ({SKU_COSTS.get(sku_name, 'significant cost')})") - - evidence = Evidence( - signals_used=signals, - signals_not_checked=[ - "Active Point-to-Site VPN clients", - "Connection status not always populated by Azure list API (connections with unknown status treated as active)", - "Planned connection setup", - "IaC-managed intent", - "Migration or teardown in progress", - "Disaster recovery or failover standby", - "Test/dev environment gateway", - ], - time_window=None, - ) - - cost_estimate = SKU_COSTS.get(sku_name, "Unknown") - cost_usd = float(_SKU_COST_USD[sku_name]) if sku_name in _SKU_COST_USD else None - - title = ( - "VPN Gateway Has No Active Connections" - if gateway_type == "Vpn" - else "ExpressRoute Gateway Has No Active Connections" - ) - findings.append( - Finding( - provider="azure", - rule_id="azure.virtual_network_gateway.idle", - resource_type="azure.virtual_network_gateway", - resource_id=gw.id, - region=gw.location, - estimated_monthly_cost_usd=cost_usd, - title=title, - summary=( - f"{gateway_type} Gateway '{gw.name}' appears to have no active connections " - f"and may be incurring significant charges ({cost_estimate}) without serving traffic." - ), - reason=f"No active connections on {gateway_type} Gateway", - risk=RiskLevel.HIGH, - confidence=ConfidenceLevel.MEDIUM, - detected_at=datetime.now(timezone.utc), - evidence=evidence, - details={ - "resource_name": gw.name, - "resource_group": resource_group, - "subscription_id": subscription_id, - "gateway_type": gateway_type, - "sku_name": sku_name, - "sku_tier": sku_tier, - "sku_capacity": gw.sku.capacity if gw.sku else None, - "total_connections": len(connections), - "active_connections": len(active_connections), - "has_p2s_config": has_p2s_config, - "cost_estimate": cost_estimate, - "tags": gw.tags, - }, + findings.append( + Finding( + provider="azure", + rule_id=_RULE_ID, + resource_type=_RESOURCE_TYPE, + resource_id=resource_uri, + region=location, + estimated_monthly_cost_usd=None, # spec 10: always None + title=f"Idle Azure {type_label} Gateway", + summary=( + f"{gateway_type} gateway '{gw_name}' has no configured " + f"connections and zero applicable gateway activity " + f"over {_IDLE_WINDOW_DAYS} days" + ), + reason=( + f"No in-scope connections, no applicable bypass topology, " + f"and all idle metrics zero over {_IDLE_WINDOW_DAYS} days" + ), + risk=RiskLevel.HIGH, + confidence=ConfidenceLevel.HIGH, + detected_at=now, + evidence=Evidence( + signals_used=signals_used, + signals_not_checked=[ + "Planned future connectivity rollout", + "DR / failover intent not visible in Azure control plane", + "Organizational ownership or IaC intent", + "Exact monthly billing amount", + ], + time_window=f"{_IDLE_WINDOW_DAYS} days", + ), + details={ + "resource_name": gw_name, + "resource_group": resource_group, + "subscription_id": subscription_id, + "gateway_type": gateway_type, + "provisioning_state": "Succeeded", + "sku_name": sku_name, + "sku_tier": sku_tier, + "tags": tags, + "p2s_configured": False if gateway_type == "Vpn" else None, + "idle_window_days": _IDLE_WINDOW_DAYS, + "metrics_used": list(metrics_to_check), + }, + ) ) - ) + + except (HttpResponseError, ServiceRequestError, ServiceResponseError): + continue # per-gateway retrieval failure -> skip this gateway (spec 12) return findings diff --git a/docs/rules/azure.md b/docs/rules/azure.md index eefc205..08b7bb1 100644 --- a/docs/rules/azure.md +++ b/docs/rules/azure.md @@ -115,17 +115,17 @@ **Spec:** [specs/azure/app_gateway_no_backends.md](../specs/azure/app_gateway_no_backends.md) #### `azure.virtual_network_gateway.idle` -**Detects:** VPN or ExpressRoute Gateways with no active S2S/ExpressRoute connections +**Detects:** VPN or ExpressRoute Gateways with no configured in-scope connection resources (IPsec, Vnet2Vnet, ExpressRoute) and zero applicable gateway metrics over a 30-day window -**Confidence / Risk:** MEDIUM (no active connections; P2S client count not checked) / HIGH +**Confidence / Risk:** HIGH (all connection, P2S, bypass, and metric signals resolved deterministically) / HIGH -**Permissions:** `Microsoft.Network/virtualNetworkGateways/read`, `Microsoft.Network/connections/read` +**Permissions:** `Microsoft.Resources/resources/read`, `Microsoft.Network/virtualNetworkGateways/read`, `Microsoft.Insights/metrics/read` -**Params:** none +**Params:** none (30-day fixed idle window) -**Exclusions:** gateways with P2S configuration present and no active connections are still flagged if no other connections exist +**Exclusions:** `id` or `name` absent/empty; malformed ARM id (resource group unextractable); `provisioning_state != "Succeeded"` (SDK+nested, conflict -> skip); `gateway_type` not `"Vpn"` or `"ExpressRoute"` (conflict -> skip); `allowVirtualWanTraffic == True`; ExpressRoute: `adminState == "Disabled"` or any connection with `expressRouteGatewayBypass == True` or unresolvable; VPN: any P2S field group (`vpnClientConfiguration`, address pool, root certs, AAD/Entra tenant, etc.) non-empty or unresolvable; any configured in-scope connection resource present; any connection type unresolvable or conflicting; `list_connections()` fails; ExpressRoute SKU tier absent (metric family unresolvable); any applicable metric unknown, below 80% daily-bucket coverage, or non-zero; per-gateway SDK retrieval errors (HttpResponseError, ServiceRequestError, ServiceResponseError) -**Spec:** — +**Spec:** [specs/azure/vnet_gateway_idle.md](../specs/azure/vnet_gateway_idle.md) --- diff --git a/docs/specs/azure/vnet_gateway_idle.md b/docs/specs/azure/vnet_gateway_idle.md new file mode 100644 index 0000000..a24cd2d --- /dev/null +++ b/docs/specs/azure/vnet_gateway_idle.md @@ -0,0 +1,517 @@ +# Azure Rule Spec — `azure.virtual_network_gateway.idle` + +## 1. Rule Identity + +- **Rule ID:** `azure.virtual_network_gateway.idle` +- **Provider:** Azure +- **ARM resource type:** `Microsoft.Network/virtualNetworkGateways` +- **Finding resource_type:** `azure.virtual_network_gateway` + +--- + +## 2. Intent + +Detect Azure virtual network gateways that appear to have **no configured or active connectivity surfaces** and **no applicable gateway activity** over a conservative observation window, making them review candidates for cleanup or rightsizing. + +This rule is deliberately **low-noise**. It is a **review-candidate** rule only, not proof that the gateway should be deleted, not proof that a standby topology is unnecessary, and not proof of a specific monthly saving. + +--- + +## 3. Azure Documentation Grounding + +### 3.1 VPN Gateway supports multiple connection modes and has ongoing compute cost + +Microsoft documents that a VPN gateway can support: + +- site-to-site connections +- VNet-to-VNet connections +- point-to-site connections +- coexisting ExpressRoute + VPN topologies + +Microsoft also documents that multiple connections can exist on the same VPN gateway and that all VPN tunnels share the available gateway bandwidth. + +Microsoft further documents that VPN gateways have hourly compute cost in addition to applicable data-transfer charges. + +Source: *About VPN Gateway* +URL: https://learn.microsoft.com/en-us/azure/vpn-gateway/vpn-gateway-about-vpngateways + +Rule consequence: + +1. A VPN gateway with configured connection surfaces is not equivalent to an unconfigured orphaned resource. +2. Idle evaluation must consider both S2S/VNet-to-VNet and P2S connectivity surfaces. +3. Exact monthly savings must not be inferred from SKU alone. + +### 3.2 ExpressRoute gateways exchange routes and can bypass the gateway data path + +Microsoft documents that an ExpressRoute virtual network gateway serves two key purposes: + +1. exchanging IP routes between networks +2. routing network traffic between them + +Microsoft also documents that **FastPath** allows on-premises traffic to bypass the virtual network gateway for improved performance. + +Source: *About ExpressRoute virtual network gateways* +URL: https://learn.microsoft.com/en-us/azure/expressroute/expressroute-about-virtual-network-gateways + +Rule consequence: + +1. Zero gateway traffic alone is **not** sufficient proof of ExpressRoute idleness when bypass/FastPath is enabled. +2. A conservative rule should skip ExpressRoute topologies that can bypass the gateway data path. +3. ExpressRoute gateways may still have operational value through route exchange even when traffic is low. + +### 3.3 Gateway control-plane resource shape + +Microsoft ARM/Bicep and SDK documentation for `Microsoft.Network/virtualNetworkGateways` exposes fields including: + +- `gatewayType` +- `provisioningState` +- `sku` +- `vpnClientConfiguration` +- `bgpSettings` +- `adminState` +- `allowRemoteVnetTraffic` +- `allowVirtualWanTraffic` +- `tags` + +Sources: + +- *Microsoft.Network/virtualNetworkGateways ARM / Bicep reference* +- *azure.mgmt.network.models.VirtualNetworkGateway* + +URLs: + +- https://learn.microsoft.com/en-us/azure/templates/microsoft.network/virtualnetworkgateways +- https://learn.microsoft.com/en-us/python/api/azure-mgmt-network/azure.mgmt.network.models.virtualnetworkgateway?view=azure-python + +Rule consequence: + +These fields provide the canonical control-plane inputs for gateway type, provisioning stability, P2S configuration presence, ExpressRoute standby/bypass-related context, and unsupported Virtual WAN / hybrid-topology signals. + +### 3.4 Connection resource shape + +Microsoft ARM/Bicep and SDK documentation for `Microsoft.Network/connections` exposes fields including: + +- `connectionType` +- `connectionStatus` +- `provisioningState` +- `ingressBytesTransferred` +- `egressBytesTransferred` +- `expressRouteGatewayBypass` + +Microsoft documents `connectionStatus` known values as: + +- `Unknown` +- `Connecting` +- `Connected` +- `NotConnected` + +Sources: + +- *Microsoft.Network/connections ARM / Bicep reference* +- *azure.mgmt.network.models.VirtualNetworkGatewayConnection* + +URLs: + +- https://learn.microsoft.com/en-us/azure/templates/microsoft.network/connections +- https://learn.microsoft.com/en-us/python/api/azure-mgmt-network/azure.mgmt.network.models.virtualnetworkgatewayconnection?view=azure-python + +Rule consequence: + +1. Connection presence is a meaningful operational signal. +2. `Unknown`, `Connecting`, or missing connection status are not safe to treat as idle. +3. ExpressRoute gateway bypass / FastPath on any connection must be treated conservatively. + +### 3.5 Azure Monitor exposes authoritative gateway and connection activity metrics + +Microsoft documents Azure Monitor metrics for: + +- `Microsoft.Network/virtualNetworkGateways` +- `Microsoft.Network/connections` +- VPN gateway monitoring reference metrics for P2S and tunnel activity + +Documented metrics include: + +- `AverageBandwidth` +- `InboundFlowsCount` +- `OutboundFlowsCount` +- `P2SConnectionCount` +- `P2SBandwidth` +- `ExpressRouteGatewayBitsPerSecond` +- `ExpressRouteGatewayPacketsPerSecond` +- `ExpressRouteGatewayActiveFlows` +- `ScalableExpressRouteGatewayBitsPerSecond` +- `ScalableExpressRouteGatewayPacketsPerSecond` +- `ScalableExpressRouteGatewayActiveFlows` +- `BitsInPerSecond` +- `BitsOutPerSecond` + +Sources: + +- *Supported metrics for microsoft.network/virtualnetworkgateways* +- *Monitor VPN Gateway reference* +- *Supported metrics for Microsoft.Network/connections* + +URLs: + +- https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics +- https://learn.microsoft.com/en-us/azure/vpn-gateway/monitor-vpn-gateway-reference +- https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-connections-metrics + +Rule consequence: + +1. Idle evaluation should use documented Azure Monitor platform metrics rather than only ad-hoc control-plane heuristics. +2. P2S activity must be evaluated with P2S-specific metrics when P2S configuration exists. +3. Connection-level traffic metrics can be used as context or reinforcing evidence, but unresolved metrics should never be guessed. + +--- + +## 4. Detection Goal + +Emit a finding only when **all** of the following are true: + +1. `gateway.id` is present and non-empty +2. `gateway.name` is present and non-empty +3. the optional region filter matches the normalized location +4. `gateway.provisioning_state` resolves to exactly `"Succeeded"` +5. `gateway.gateway_type` resolves to exactly `"Vpn"` or `"ExpressRoute"` +6. `virtual_network_gateways.list_connections(...)` resolves reliably +7. no configured in-scope connection resource with type `IPsec`, `Vnet2Vnet`, or `ExpressRoute` is present +8. documented `allowVirtualWanTraffic` is not `True` +9. all applicable idle metrics resolve reliably for the same observation window +10. all applicable idle metrics satisfy the minimum coverage threshold +11. all applicable idle metrics are zero for that window + +If any required signal cannot be established reliably, skip rather than emit. + +This rule intentionally prioritizes **precision over recall** and skips unresolved or bypassed gateway topologies by design. + +--- + +## 5. Non-Goals + +This rule does **not** attempt to prove: + +- that deleting the gateway is safe +- that the topology has no disaster-recovery or failover purpose +- that no future branch/on-premises/VNet user will connect +- that a gateway with configured but quiet connections is unnecessary +- that a specific monthly saving exists + +--- + +## 6. Canonical Inputs + +### 6.1 Required control-plane surfaces + +The implementation may use: + +- subscription-wide gateway inventory +- per-gateway `get(...)` +- `virtual_network_gateways.list_connections(...)` +- Azure Monitor platform metrics on gateway and, if needed, connection resource IDs + +It must not require: + +- packet capture +- guest inspection +- external CMDB / ticketing data + +### 6.2 Idle window + +- Configurable parameter: none +- Fixed idle window: `30 days` + +Reason: + +- network gateways are high-impact infrastructure +- gateways may be intentionally quiet for DR or infrequent connectivity +- a longer window reduces false positives from short-lived outages, maintenance, or infrequent administrative access + +--- + +## 7. Normalization Contract + +| Field | Normalization | +|---|---| +| `location` | Lowercase ARM location string; compare by exact lowercase string equality only. Do not remove spaces, hyphens, or digits. | +| `provisioning_state` | Compare case-sensitively to exact `"Succeeded"` after SDK/raw resolution. | +| `gateway_type` | Compare case-sensitively to exact `"Vpn"` or `"ExpressRoute"` after SDK/raw resolution. | +| `connection_status` | Compare case-sensitively to documented values such as `"Connected"`, `"Connecting"`, `"Unknown"`, and `"NotConnected"`. Unknown or missing is not equivalent to inactive. | +| `in_scope_connection_resource` | A `Microsoft.Network/connections` resource returned by `virtual_network_gateways.list_connections(...)` whose resolved `connectionType` is exactly `IPsec`, `Vnet2Vnet`, or `ExpressRoute`. | +| `p2s_configured` | `True` when the resolved `vpnClientConfiguration` object contains any supported P2S field defined in section 9.3; otherwise `False` only when all listed P2S fields are confirmed absent or empty. | +| `express_route_gateway_bypass` | Treat `True` as FastPath/bypass enabled for the connection. | +| `virtual_wan_signal` | `True` only when the documented gateway field `allowVirtualWanTraffic` resolves to `True`; `False` when it resolves to `False`; otherwise unknown. This specification does not infer Virtual WAN or hub-managed topology from undocumented fields. | +| `metric_coverage_ratio` | `observed_parseable_buckets / expected_buckets` for the defined 30-day daily-bucket query. | +| `no_configured_connection_resources` | `True` only when `list_connections(...)` completes successfully, pagination is exhausted, and zero in-scope connection resources remain after type resolution. | +| `tags` | `gateway.tags or {}` — never `None` in output. Tags are contextual only for this rule. | + +--- + +## 8. Unified Decision Rule + +| # | Condition | Action | +|---|---|---| +| 8.1 | `id` absent, `None`, or empty | Skip | +| 8.2 | `name` absent, `None`, or empty | Skip | +| 8.3 | Region filter set and normalized location does not match | Skip | +| 8.4 | `provisioning_state` does not resolve to `"Succeeded"` | Skip | +| 8.5 | `gateway_type` does not resolve to `"Vpn"` or `"ExpressRoute"` | Skip | +| 8.6 | Any required control-plane surface is missing, conflicting, or unresolvable | Skip | +| 8.7 | Any configured `in_scope_connection_resource` is present | Skip | +| 8.8 | VPN gateway has confirmed P2S configuration present | Skip | +| 8.9 | Documented Virtual WAN signal is present (`allowVirtualWanTraffic == True`) | Skip | +| 8.10 | ExpressRoute standby/bypass topology is present (`adminState == "Disabled"` or any bypass-enabled connection) | Skip | +| 8.11 | Any applicable idle metric cannot be resolved reliably for the idle window | Skip | +| 8.12 | Any applicable idle metric fails the minimum coverage threshold for the idle window | Skip | +| 8.13 | Any applicable idle metric is non-zero over the idle window | Skip | +| 8.14 | All required signals resolve, no configured `in_scope_connection_resource` remains, no P2S configuration remains, documented Virtual WAN signal is absent, and applicable idle metrics are zero with sufficient coverage for 30 days | **EMIT** | + +--- + +## 9. Canonical Evaluation Contracts + +### 9.1 Gateway-state contract + +Resolve gateway provisioning state in this order: + +1. SDK projection such as `gateway.provisioning_state` +2. nested/raw properties projection such as `properties.provisioningState` +3. otherwise unknown + +Resolve gateway type in this order: + +1. SDK projection such as `gateway.gateway_type` +2. nested/raw properties projection such as `properties.gatewayType` +3. otherwise unknown + +Required behavior: + +1. Only exact `"Succeeded"` is eligible. +2. Only exact `"Vpn"` and `"ExpressRoute"` are in scope. +3. Unknown, conflicting, or any other values must skip. + +### 9.2 Connection-surface contract + +Resolve configured gateway connections using the gateway’s authoritative `virtual_network_gateways.list_connections(...)` surface. + +Canonical definition: + +- **In-scope connection resource:** a `Microsoft.Network/connections` resource returned by `virtual_network_gateways.list_connections(...)` whose resolved `connectionType` is exactly `IPsec`, `Vnet2Vnet`, or `ExpressRoute` + +Required behavior: + +1. Resolve connection type in this order: + - SDK projection such as `connection.connection_type` + - nested/raw properties projection such as `properties.connectionType` + - otherwise unknown +2. Treat any returned `in_scope_connection_resource` as configured connection presence. +3. Do **not** treat `connection_status == "NotConnected"` as equivalent to “no configured connection”. +4. An empty connection result is authoritative only when enumeration completes successfully, pagination is exhausted, and no partial-response ambiguity remains. +5. If any returned connection has unknown or conflicting type resolution, skip. +6. If connection listing fails, is partial, or cannot be resolved reliably, skip. +7. If any configured `in_scope_connection_resource` is present, the gateway must skip. + +Scope boundary: + +- This specification treats `virtual_network_gateways.list_connections(...)` as the sole authoritative ExpressRoute linkage surface. +- It does **not** infer ExpressRoute linkage from separate circuit inventory, peering inventory, route tables, or undocumented cross-resource associations outside that connection-listing surface. + +Rationale: + +Configured gateway connections represent intentional network topology. A conservative enterprise-quality rule should not emit merely because a configured connection is currently quiet or disconnected. + +### 9.3 P2S configuration contract + +For VPN gateways: + +1. Resolve `vpnClientConfiguration` in this order: + - SDK projection such as `gateway.vpn_client_configuration` + - nested/raw properties projection such as `properties.vpnClientConfiguration` + - otherwise unknown +2. For every supported P2S field group, resolve values using the same precedence rule: + - SDK projection on the resolved `vpnClientConfiguration` object + - raw camelCase property on the resolved `vpnClientConfiguration` object + - raw snake_case property on the resolved `vpnClientConfiguration` object + - otherwise unknown +3. Supported P2S field groups are: + - address pool: `vpn_client_address_pool` / `vpnClientAddressPool` + - client connection configurations: `vng_client_connection_configurations` / `vngClientConnectionConfigurations` + - root certificates: `vpn_client_root_certificates` / `vpnClientRootCertificates` + - revoked certificates: `vpn_client_revoked_certificates` / `vpnClientRevokedCertificates` + - authentication types: `vpn_authentication_types` / `vpnAuthenticationTypes` + - client protocols: `vpn_client_protocols` / `vpnClientProtocols`, including documented protocol selections such as `OpenVPN` and `IkeV2` + - AAD / Entra auth tenant: `aad_tenant` / `aadTenant` + - AAD / Entra auth audience: `aad_audience` / `aadAudience` + - AAD / Entra auth issuer: `aad_issuer` / `aadIssuer` +4. If any field group resolves to a present, non-empty configured value, treat P2S as configured. +5. If any field group has conflicting resolved values across the precedence chain, skip. +6. If P2S configuration presence cannot be resolved reliably, skip. +7. If any P2S configuration is present, skip rather than infer idleness from current client count alone. + +Rationale: + +Configured P2S gateways can be intentionally retained for remote access even when current client count is zero. + +### 9.4 ExpressRoute standby / bypass contract + +For ExpressRoute gateways: + +1. If `adminState == "Disabled"`, skip. +2. If any connection exposes `expressRouteGatewayBypass == True`, skip. +3. If `list_connections(...)` returns any `in_scope_connection_resource` whose resolved `connectionType` is exactly `ExpressRoute`, skip even when all observed traffic metrics are zero. +4. If bypass/standby-related state cannot be resolved reliably, skip. + +Rationale: + +Microsoft documents that ExpressRoute FastPath can bypass gateway data forwarding, so zero gateway traffic does not prove idleness in bypassed topologies. + +### 9.5 Unsupported Virtual WAN / hybrid-topology contract + +This specification does **not** define idle semantics for Virtual WAN / hub-managed / hybrid attachment patterns beyond the documented gateway field `allowVirtualWanTraffic`. + +Required behavior: + +1. Resolve the documented field `allowVirtualWanTraffic` from SDK/raw gateway payload. +2. If `allowVirtualWanTraffic == True`, skip. +3. If `allowVirtualWanTraffic` is absent or unresolved, do **not** infer Virtual WAN or hub-managed topology from undocumented fields. + +Rationale: + +Microsoft documents `allowVirtualWanTraffic` on the gateway resource shape, but this specification does not claim broader authoritative Virtual WAN linkage detection beyond that documented field. + +### 9.6 Idle-metrics contract + +All applicable metrics must be queried over the same window: + +- `window_end = now` +- `window_start = now - 30 days` +- `time_grain = 1 day` + +Required behavior: + +1. Metric absence, retrieval failure, empty/unusable series, or unparseable datapoints -> skip +2. Use platform metrics only; do not infer zero from missing data +3. For each required metric, compute coverage using daily buckets across the 30-day window +4. Minimum required coverage per metric: `>= 80%` of expected daily buckets +5. If any required metric falls below the coverage threshold, skip +6. Emit only when every parseable datapoint in every required metric is exactly zero over the window +7. Sparse or partially populated time series are not equivalent to zero usage + +#### 9.6.1 VPN gateways + +For VPN gateways, require all of the following gateway metrics to resolve and be zero: + +- `AverageBandwidth` +- `InboundFlowsCount` +- `OutboundFlowsCount` + +If the gateway family exposes P2S metrics despite no configured P2S surface, they may be included as reinforcing context only; they must not override the control-plane contract. + +#### 9.6.2 ExpressRoute gateways + +For ExpressRoute gateways, use the applicable metric family based on SKU / scale model: + +1. Standard / HighPerformance / UltraPerformance / AZ gateway family: + - `ExpressRouteGatewayBitsPerSecond` + - `ExpressRouteGatewayPacketsPerSecond` + - `ExpressRouteGatewayActiveFlows` +2. Scalable gateway family: + - `ScalableExpressRouteGatewayBitsPerSecond` + - `ScalableExpressRouteGatewayPacketsPerSecond` + - `ScalableExpressRouteGatewayActiveFlows` + +All applicable metrics must resolve and be zero for the window. + +### 9.7 Connection metrics as reinforcing diagnostics + +If connection resources are inspected before exclusion, `Microsoft.Network/connections` metrics such as: + +- `BitsInPerSecond` +- `BitsOutPerSecond` + +may be included as reinforcing diagnostics. + +However: + +1. connection metrics must not override the no-configured-connections contract +2. a configured connection with zero traffic is still a skip, not an emit + +--- + +## 10. Cost Model + +`estimated_monthly_cost_usd = None` + +Mandatory rules: + +1. Do **not** use flat hardcoded monthly SKU estimates +2. Do **not** infer cost from SKU alone +3. Do **not** infer cost from current traffic alone +4. State only that gateways have meaningful ongoing gateway compute cost and may also incur transfer-related charges depending on service and topology + +--- + +## 11. Finding Shape + +### 11.1 Required fields + +| Field | Value | +|---|---| +| `provider` | `"azure"` | +| `rule_id` | `"azure.virtual_network_gateway.idle"` | +| `resource_type` | `"azure.virtual_network_gateway"` | +| `resource_id` | original ARM id from `gateway.id` | +| `region` | normalized location | +| `risk` | `HIGH` | +| `confidence` | `HIGH` | +| `estimated_monthly_cost_usd` | `None` | + +### 11.2 Required evidence + +`signals_used` must clearly disclose: + +1. provisioning state is `"Succeeded"` +2. gateway type is `"Vpn"` or `"ExpressRoute"` +3. no configured `in_scope_connection_resource` was present +4. no P2S configuration was present for VPN gateways +5. no standby/bypass topology was present for ExpressRoute gateways +6. documented `allowVirtualWanTraffic` signal was not `True` +7. applicable idle metrics resolved to zero over 30 days with sufficient coverage + +`signals_not_checked` should include remaining blind spots such as: + +1. planned future connectivity rollout +2. DR / failover intent not visible in Azure control plane +3. organizational ownership / IaC intent +4. exact monthly billing amount + +### 11.3 Required details + +Details should include at least: + +- `resource_name` +- `resource_group` +- `subscription_id` +- `gateway_type` +- `provisioning_state` +- `sku_name` +- `sku_tier` +- `tags` + +May also include: + +- `p2s_configured` +- `configured_connection_count` +- `admin_state` +- `idle_window_days` +- `metric_coverage_ratio_by_metric` +- names of the gateway metrics used + +--- + +## 12. Failure Behavior + +- If subscription-wide gateway inventory fails, let the exception propagate +- If per-gateway `get(...)`, `list_connections(...)`, unsupported-topology resolution, or metric retrieval fails for a specific gateway, skip that gateway +- If a gateway record is malformed or missing required fields, skip that gateway +- Do not emit on partial or unresolved connection, P2S, bypass, unsupported-topology, or metric state diff --git a/tests/cleancloud/providers/azure/test_azure_vnet_gateway_idle.py b/tests/cleancloud/providers/azure/test_azure_vnet_gateway_idle.py index 0acca30..26ddf8f 100644 --- a/tests/cleancloud/providers/azure/test_azure_vnet_gateway_idle.py +++ b/tests/cleancloud/providers/azure/test_azure_vnet_gateway_idle.py @@ -1,312 +1,1441 @@ +""" +Tests for azure.virtual_network_gateway.idle -- spec-aligned. + +Covers: must-emit (VPN, ER standard, ER scalable), must-skip for all + exclusion conditions, failure behavior, finding shape, and unit + tests for all resolver and helper functions. +""" + +from datetime import datetime, timedelta, timezone from types import SimpleNamespace +from unittest.mock import MagicMock import pytest +from azure.core.exceptions import HttpResponseError, ServiceRequestError, ServiceResponseError from cleancloud.providers.azure.rules.vnet_gateway_idle import ( + _connections_gate, + _er_metric_family, + _evaluate_metric, + _gateway_admin_state_disabled, + _gateway_has_virtual_wan_traffic, + _MetricResult, + _resolve_connection_type, + _resolve_express_route_bypass, + _resolve_gateway_type, + _resolve_p2s_configured, + _resolve_provisioning_state, find_idle_vnet_gateways, ) +# --------------------------------------------------------------------------- +# Shared constants +# --------------------------------------------------------------------------- -def _make_gateway_resource(name, resource_group="rg-test"): - return SimpleNamespace( - id=f"/subscriptions/sub-123/resourceGroups/{resource_group}/providers/Microsoft.Network/virtualNetworkGateways/{name}", - name=name, - ) +_SUB = "sub-123" +_RG = "rg-test" +_GW_NAME = "my-vpn-gw" +_GW_ID = ( + f"/subscriptions/{_SUB}/resourceGroups/{_RG}" + f"/providers/Microsoft.Network/virtualNetworkGateways/{_GW_NAME}" +) +_ER_GW_NAME = "my-er-gw" +_ER_GW_ID = ( + f"/subscriptions/{_SUB}/resourceGroups/{_RG}" + f"/providers/Microsoft.Network/virtualNetworkGateways/{_ER_GW_NAME}" +) + + +# --------------------------------------------------------------------------- +# Fixture helpers +# --------------------------------------------------------------------------- -def _make_gateway( - name, +def _make_gw_resource(name=_GW_NAME, gw_id=_GW_ID): + """Stub returned by res_client.resources.list().""" + return SimpleNamespace(id=gw_id, name=name) + + +def _make_gw( + name=_GW_NAME, + gw_id=_GW_ID, + location="eastus", + provisioning_state="Succeeded", gateway_type="Vpn", sku_name="VpnGw1", sku_tier="VpnGw1", - location="eastus", tags=None, - provisioning_state="Succeeded", vpn_client_configuration=None, + allow_virtual_wan_traffic=None, + admin_state=None, + properties=None, ): + """Full gateway stub returned by net_client.virtual_network_gateways.get().""" return SimpleNamespace( - id=f"/subscriptions/sub-123/resourceGroups/rg-test/providers/Microsoft.Network/virtualNetworkGateways/{name}", + id=gw_id, name=name, location=location, - gateway_type=gateway_type, - sku=SimpleNamespace(name=sku_name, tier=sku_tier, capacity=2), provisioning_state=provisioning_state, - vpn_client_configuration=vpn_client_configuration, + gateway_type=gateway_type, + sku=SimpleNamespace(name=sku_name, tier=sku_tier), tags=tags, + vpn_client_configuration=vpn_client_configuration, + allow_virtual_wan_traffic=allow_virtual_wan_traffic, + admin_state=admin_state, + properties=properties, ) -def _make_connection(name, status="Connected"): - return SimpleNamespace( +def _make_er_gw( + name=_ER_GW_NAME, + gw_id=_ER_GW_ID, + sku_tier="HighPerformance", + admin_state="Enabled", # confirmed non-Disabled so ER gateways pass the fail-closed check + **kwargs, +): + """Convenience builder for an ExpressRoute gateway.""" + return _make_gw( name=name, - connection_status=status, + gw_id=gw_id, + gateway_type="ExpressRoute", + sku_name=sku_tier, + sku_tier=sku_tier, + admin_state=admin_state, + **kwargs, + ) + + +def _make_connection(connection_type="IPsec", express_route_gateway_bypass=None): + """Connection stub for list_connections().""" + return SimpleNamespace( + connection_type=connection_type, + express_route_gateway_bypass=express_route_gateway_bypass, + properties=None, ) -@pytest.fixture -def mock_clients(mocker): - """Create mock network and resource clients.""" - # Gateway resources (from ResourceManagementClient) - gateway_resources = [ - _make_gateway_resource("vpn-idle"), - _make_gateway_resource("vpn-active"), - _make_gateway_resource("er-idle"), - _make_gateway_resource("vpn-provisioning"), - _make_gateway_resource("vpn-with-p2s"), +def _zero_metric_response(num_buckets=30): + """Monitor metric response: 30 daily zero-total buckets, sufficient coverage.""" + now_utc = datetime.now(timezone.utc) + window_start = now_utc - timedelta(days=30) + data = [ + SimpleNamespace( + timestamp=window_start + timedelta(days=i, hours=12), + total=0.0, + ) + for i in range(num_buckets) ] + ts = SimpleNamespace(data=data) + metric = SimpleNamespace(timeseries=[ts]) + return SimpleNamespace(value=[metric]) - # Full gateway details (from NetworkManagementClient.get) - gateways = { - "vpn-idle": _make_gateway("vpn-idle", gateway_type="Vpn"), - "vpn-active": _make_gateway("vpn-active", gateway_type="Vpn"), - "er-idle": _make_gateway( - "er-idle", - gateway_type="ExpressRoute", - sku_name="Standard", - sku_tier="Standard", - ), - "vpn-provisioning": _make_gateway( - "vpn-provisioning", - gateway_type="Vpn", - provisioning_state="Updating", - ), - "vpn-with-p2s": _make_gateway( - "vpn-with-p2s", - gateway_type="Vpn", - vpn_client_configuration=SimpleNamespace( - vpn_client_address_pool=SimpleNamespace(address_prefixes=["10.0.0.0/24"]) - ), - ), - } - - # Connections per gateway - connections = { - "vpn-idle": [], # No connections - should be flagged - "vpn-active": [_make_connection("conn1", "Connected")], # Active - skip - "er-idle": [], # No connections - should be flagged - "vpn-provisioning": [], # Provisioning - skip - "vpn-with-p2s": [], # Has P2S config - skip - } - - # Mock resource client - resource_client = mocker.MagicMock() - resource_client.resources.list.return_value = gateway_resources - - # Mock network client - network_client = mocker.MagicMock() - network_client.virtual_network_gateways.get.side_effect = lambda rg, name: gateways[name] - network_client.virtual_network_gateways.list_connections.side_effect = ( - lambda rg, name: connections[name] - ) - return network_client, resource_client +def _active_metric_response(): + """Monitor metric response: non-zero totals.""" + now_utc = datetime.now(timezone.utc) + window_start = now_utc - timedelta(days=30) + data = [ + SimpleNamespace( + timestamp=window_start + timedelta(days=i, hours=12), + total=float(i + 1), + ) + for i in range(30) + ] + ts = SimpleNamespace(data=data) + metric = SimpleNamespace(timeseries=[ts]) + return SimpleNamespace(value=[metric]) -def test_find_idle_vnet_gateways(mock_clients): - network_client, resource_client = mock_clients +def _sparse_metric_response(): + """Monitor metric response: only 5 buckets -- below 80% coverage threshold.""" + return _zero_metric_response(num_buckets=5) - findings = find_idle_vnet_gateways( - subscription_id="sub-123", - credential=None, - region_filter="eastus", - client=network_client, - resource_client=resource_client, - ) - names = [f.details["resource_name"] for f in findings] - # Should flag idle gateways - assert len(findings) == 2 - assert "vpn-idle" in names - assert "er-idle" in names +def _make_clients( + gw=None, + connections=None, + metric_response=None, + gw_resource=None, +): + """ + Build (net_client, res_client, mon_client) mock triple. + Defaults to a VPN happy-path: Succeeded, no connections, zero metrics. + """ + if gw is None: + gw = _make_gw() + if connections is None: + connections = [] + if metric_response is None: + metric_response = _zero_metric_response() + if gw_resource is None: + gw_resource = _make_gw_resource() - # Not flagged - assert "vpn-active" not in names - assert "vpn-provisioning" not in names - assert "vpn-with-p2s" not in names + net = MagicMock() + net.virtual_network_gateways.get.return_value = gw + net.virtual_network_gateways.list_connections.return_value = iter(connections) - # Verify finding fields - for f in findings: - assert f.provider == "azure" - assert f.rule_id == "azure.virtual_network_gateway.idle" - assert f.confidence.value == "medium" - assert f.risk.value == "high" + res = MagicMock() + res.resources.list.return_value = [gw_resource] + mon = MagicMock() + mon.metrics.list.return_value = metric_response -def test_find_idle_vnet_gateways_empty_subscription(mocker): - resource_client = mocker.MagicMock() - resource_client.resources.list.return_value = [] + return net, res, mon - network_client = mocker.MagicMock() - findings = find_idle_vnet_gateways( - subscription_id="sub-123", +def _run(net, res, mon, region_filter=None): + return find_idle_vnet_gateways( + subscription_id=_SUB, credential=None, - client=network_client, - resource_client=resource_client, + region_filter=region_filter, + client=net, + resource_client=res, + monitor_client=mon, ) - assert findings == [] -def test_find_idle_vnet_gateways_region_filter(mocker): - gateway_resources = [ - _make_gateway_resource("vpn-east"), - _make_gateway_resource("vpn-west"), - ] +# --------------------------------------------------------------------------- +# TestMustEmit +# --------------------------------------------------------------------------- - gateways = { - "vpn-east": _make_gateway("vpn-east", location="eastus"), - "vpn-west": _make_gateway("vpn-west", location="westus"), - } - resource_client = mocker.MagicMock() - resource_client.resources.list.return_value = gateway_resources +class TestMustEmit: + def test_vpn_gateway_no_connections_zero_metrics(self): + net, res, mon = _make_clients() + findings = _run(net, res, mon) + assert len(findings) == 1 + assert findings[0].details["resource_name"] == _GW_NAME - network_client = mocker.MagicMock() - network_client.virtual_network_gateways.get.side_effect = lambda rg, name: gateways[name] - network_client.virtual_network_gateways.list_connections.return_value = [] + def test_er_standard_gateway_emits(self): + gw = _make_er_gw() + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, gw_resource=gw_resource) + findings = _run(net, res, mon) + assert len(findings) == 1 + assert findings[0].details["gateway_type"] == "ExpressRoute" - findings = find_idle_vnet_gateways( - subscription_id="sub-123", - credential=None, - region_filter="eastus", - client=network_client, - resource_client=resource_client, - ) + def test_er_scalable_gateway_emits(self): + gw = _make_er_gw(sku_tier="ErGwScale") + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, gw_resource=gw_resource) + findings = _run(net, res, mon) + assert len(findings) == 1 - assert len(findings) == 1 - assert "vpn-east" in findings[0].resource_id + def test_tags_passed_through(self): + gw = _make_gw(tags={"env": "test"}) + net, res, mon = _make_clients(gw=gw) + findings = _run(net, res, mon) + assert findings[0].details["tags"] == {"env": "test"} + def test_tags_none_becomes_empty_dict(self): + net, res, mon = _make_clients(gw=_make_gw(tags=None)) + findings = _run(net, res, mon) + assert findings[0].details["tags"] == {} -def test_find_idle_vnet_gateways_expressroute_with_connection(mocker): - """ExpressRoute gateway with active connection should NOT be flagged.""" - gateway_resources = [_make_gateway_resource("er-active")] + def test_empty_subscription_returns_no_findings(self): + net, res, mon = _make_clients() + res.resources.list.return_value = [] + findings = _run(net, res, mon) + assert findings == [] - gateways = { - "er-active": _make_gateway( - "er-active", - gateway_type="ExpressRoute", - sku_name="HighPerformance", - ), - } + def test_multiple_vpn_gateways_both_idle(self): + gw1 = _make_gw(name="gw1", gw_id=f"{_GW_ID}-1") + gw2 = _make_gw(name="gw2", gw_id=f"{_GW_ID}-2") - resource_client = mocker.MagicMock() - resource_client.resources.list.return_value = gateway_resources + r1 = _make_gw_resource(name="gw1", gw_id=f"{_GW_ID}-1") + r2 = _make_gw_resource(name="gw2", gw_id=f"{_GW_ID}-2") - network_client = mocker.MagicMock() - network_client.virtual_network_gateways.get.side_effect = lambda rg, name: gateways[name] - network_client.virtual_network_gateways.list_connections.return_value = [ - _make_connection("circuit1", "Connected") - ] + net = MagicMock() + net.virtual_network_gateways.get.side_effect = lambda rg, n: {"gw1": gw1, "gw2": gw2}[n] + net.virtual_network_gateways.list_connections.return_value = iter([]) - findings = find_idle_vnet_gateways( - subscription_id="sub-123", - credential=None, - client=network_client, - resource_client=resource_client, - ) + res = MagicMock() + res.resources.list.return_value = [r1, r2] - assert len(findings) == 0 + mon = MagicMock() + mon.metrics.list.return_value = _zero_metric_response() + findings = find_idle_vnet_gateways( + subscription_id=_SUB, + credential=None, + client=net, + resource_client=res, + monitor_client=mon, + ) + assert len(findings) == 2 -def test_find_idle_vnet_gateways_cost_estimates(mocker): - """Verify cost estimates are included for different SKUs.""" - gateway_resources = [ - _make_gateway_resource("vpn-basic"), - _make_gateway_resource("vpn-gw3"), - _make_gateway_resource("er-ultra"), - ] - gateways = { - "vpn-basic": _make_gateway("vpn-basic", sku_name="Basic", sku_tier="Basic"), - "vpn-gw3": _make_gateway("vpn-gw3", sku_name="VpnGw3", sku_tier="VpnGw3"), - "er-ultra": _make_gateway( - "er-ultra", - gateway_type="ExpressRoute", - sku_name="UltraPerformance", - sku_tier="UltraPerformance", - ), - } - - resource_client = mocker.MagicMock() - resource_client.resources.list.return_value = gateway_resources - - network_client = mocker.MagicMock() - network_client.virtual_network_gateways.get.side_effect = lambda rg, name: gateways[name] - network_client.virtual_network_gateways.list_connections.return_value = [] - - findings = find_idle_vnet_gateways( - subscription_id="sub-123", - credential=None, - client=network_client, - resource_client=resource_client, - ) +# --------------------------------------------------------------------------- +# TestIdNameGuards +# --------------------------------------------------------------------------- - assert len(findings) == 3 - by_name = {f.details["resource_name"]: f for f in findings} - assert "$27" in by_name["vpn-basic"].details["cost_estimate"] - assert "$930" in by_name["vpn-gw3"].details["cost_estimate"] - assert "$670" in by_name["er-ultra"].details["cost_estimate"] +class TestIdNameGuards: + def test_id_none_skips(self): + r = _make_gw_resource() + r.id = None + net, res, mon = _make_clients(gw_resource=r) + assert _run(net, res, mon) == [] - # Verify numeric cost fields - assert by_name["vpn-basic"].estimated_monthly_cost_usd == 27.0 - assert by_name["vpn-gw3"].estimated_monthly_cost_usd == 930.0 - assert by_name["er-ultra"].estimated_monthly_cost_usd == 670.0 + def test_id_empty_skips(self): + r = _make_gw_resource() + r.id = "" + net, res, mon = _make_clients(gw_resource=r) + assert _run(net, res, mon) == [] + def test_name_none_skips(self): + r = _make_gw_resource() + r.name = None + net, res, mon = _make_clients(gw_resource=r) + assert _run(net, res, mon) == [] -def test_find_idle_vnet_gateways_connection_status_none_not_flagged(mocker): - """Gateway with connections where status is None (unpopulated by list API) should NOT be flagged.""" - gateway_resources = [_make_gateway_resource("vpn-status-unknown")] + def test_name_empty_skips(self): + r = _make_gw_resource() + r.name = "" + net, res, mon = _make_clients(gw_resource=r) + assert _run(net, res, mon) == [] - gateways = { - "vpn-status-unknown": _make_gateway("vpn-status-unknown"), - } + def test_malformed_id_no_resource_group_skips(self): + r = _make_gw_resource(gw_id="/invalid/path/no/resourceGroups") + net, res, mon = _make_clients(gw_resource=r) + assert _run(net, res, mon) == [] - resource_client = mocker.MagicMock() - resource_client.resources.list.return_value = gateway_resources - network_client = mocker.MagicMock() - network_client.virtual_network_gateways.get.side_effect = lambda rg, name: gateways[name] - # Azure list_connections() often returns None status even for live connections - network_client.virtual_network_gateways.list_connections.return_value = [ - _make_connection("conn1", None) - ] +# --------------------------------------------------------------------------- +# TestRegionFilter +# --------------------------------------------------------------------------- - findings = find_idle_vnet_gateways( - subscription_id="sub-123", - credential=None, - client=network_client, - resource_client=resource_client, - ) - assert len(findings) == 0 +class TestRegionFilter: + def test_no_filter_emits(self): + net, res, mon = _make_clients(gw=_make_gw(location="westus")) + assert len(_run(net, res, mon)) == 1 + def test_matching_region_emits(self): + net, res, mon = _make_clients(gw=_make_gw(location="eastus")) + assert len(_run(net, res, mon, region_filter="eastus")) == 1 -def test_find_idle_vnet_gateways_disconnected_connection(mocker): - """Gateway with disconnected connection should be flagged.""" - gateway_resources = [_make_gateway_resource("vpn-disconnected")] + def test_matching_region_case_insensitive(self): + net, res, mon = _make_clients(gw=_make_gw(location="eastus")) + assert len(_run(net, res, mon, region_filter="EastUS")) == 1 - gateways = { - "vpn-disconnected": _make_gateway("vpn-disconnected"), - } + def test_mismatched_region_skips(self): + net, res, mon = _make_clients(gw=_make_gw(location="westus")) + assert _run(net, res, mon, region_filter="eastus") == [] - resource_client = mocker.MagicMock() - resource_client.resources.list.return_value = gateway_resources - network_client = mocker.MagicMock() - network_client.virtual_network_gateways.get.side_effect = lambda rg, name: gateways[name] - # Connection exists but is disconnected - network_client.virtual_network_gateways.list_connections.return_value = [ - _make_connection("conn1", "Disconnected") - ] +# --------------------------------------------------------------------------- +# TestProvisioningStateContract +# --------------------------------------------------------------------------- - findings = find_idle_vnet_gateways( - subscription_id="sub-123", - credential=None, - client=network_client, - resource_client=resource_client, - ) - assert len(findings) == 1 - assert findings[0].details["total_connections"] == 1 - assert findings[0].details["active_connections"] == 0 +class TestProvisioningStateContract: + def test_succeeded_emits(self): + net, res, mon = _make_clients(gw=_make_gw(provisioning_state="Succeeded")) + assert len(_run(net, res, mon)) == 1 + + def test_creating_skips(self): + net, res, mon = _make_clients(gw=_make_gw(provisioning_state="Creating")) + assert _run(net, res, mon) == [] + + def test_updating_skips(self): + net, res, mon = _make_clients(gw=_make_gw(provisioning_state="Updating")) + assert _run(net, res, mon) == [] + + def test_failed_skips(self): + net, res, mon = _make_clients(gw=_make_gw(provisioning_state="Failed")) + assert _run(net, res, mon) == [] + + def test_none_state_skips(self): + net, res, mon = _make_clients(gw=_make_gw(provisioning_state=None)) + assert _run(net, res, mon) == [] + + def test_conflict_state_skips(self): + gw = _make_gw(provisioning_state="Succeeded") + gw.properties = SimpleNamespace(provisioningState="Failed") + net, res, mon = _make_clients(gw=gw) + assert _run(net, res, mon) == [] + + def test_nested_only_succeeded_emits(self): + gw = _make_gw(provisioning_state=None) + gw.properties = SimpleNamespace(provisioningState="Succeeded") + net, res, mon = _make_clients(gw=gw) + assert len(_run(net, res, mon)) == 1 + + +# --------------------------------------------------------------------------- +# TestGatewayTypeContract +# --------------------------------------------------------------------------- + + +class TestGatewayTypeContract: + def test_vpn_type_emits(self): + net, res, mon = _make_clients(gw=_make_gw(gateway_type="Vpn")) + assert len(_run(net, res, mon)) == 1 + + def test_expressroute_type_emits(self): + gw = _make_er_gw() + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, gw_resource=gw_resource) + assert len(_run(net, res, mon)) == 1 + + def test_local_network_gateway_type_skips(self): + net, res, mon = _make_clients(gw=_make_gw(gateway_type="LocalNetworkGateway")) + assert _run(net, res, mon) == [] + + def test_none_gateway_type_skips(self): + net, res, mon = _make_clients(gw=_make_gw(gateway_type=None)) + assert _run(net, res, mon) == [] + + def test_conflict_gateway_type_skips(self): + gw = _make_gw(gateway_type="Vpn") + gw.properties = SimpleNamespace(gatewayType="ExpressRoute") + net, res, mon = _make_clients(gw=gw) + assert _run(net, res, mon) == [] + + def test_nested_only_vpn_emits(self): + gw = _make_gw(gateway_type=None) + gw.properties = SimpleNamespace(gatewayType="Vpn") + net, res, mon = _make_clients(gw=gw) + assert len(_run(net, res, mon)) == 1 + + +# --------------------------------------------------------------------------- +# TestVirtualWanContract +# --------------------------------------------------------------------------- + + +class TestVirtualWanContract: + def test_allow_virtual_wan_true_skips(self): + net, res, mon = _make_clients(gw=_make_gw(allow_virtual_wan_traffic=True)) + assert _run(net, res, mon) == [] + + def test_allow_virtual_wan_false_emits(self): + net, res, mon = _make_clients(gw=_make_gw(allow_virtual_wan_traffic=False)) + assert len(_run(net, res, mon)) == 1 + + def test_allow_virtual_wan_none_emits(self): + net, res, mon = _make_clients(gw=_make_gw(allow_virtual_wan_traffic=None)) + assert len(_run(net, res, mon)) == 1 + + def test_allow_virtual_wan_camel_case_true_skips(self): + gw = _make_gw(allow_virtual_wan_traffic=None) + # Inject via camelCase attribute directly + gw.allowVirtualWanTraffic = True + net, res, mon = _make_clients(gw=gw) + assert _run(net, res, mon) == [] + + +# --------------------------------------------------------------------------- +# TestExpressRouteAdminStateContract +# --------------------------------------------------------------------------- + + +class TestExpressRouteAdminStateContract: + def test_er_admin_state_disabled_skips(self): + gw = _make_er_gw(admin_state="Disabled") + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, gw_resource=gw_resource) + assert _run(net, res, mon) == [] + + def test_er_admin_state_enabled_emits(self): + gw = _make_er_gw(admin_state="Enabled") + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, gw_resource=gw_resource) + assert len(_run(net, res, mon)) == 1 + + def test_er_admin_state_none_value_skips(self): + # admin_state attribute present but value is None -> unresolvable -> skip + gw = _make_er_gw(admin_state=None) + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, gw_resource=gw_resource) + assert _run(net, res, mon) == [] + + def test_er_admin_state_unexpected_value_skips(self): + # Non-string value -> unresolvable -> skip (fail-closed) + gw = _make_er_gw(admin_state=0) + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, gw_resource=gw_resource) + assert _run(net, res, mon) == [] + + def test_vpn_admin_state_disabled_does_not_skip(self): + # adminState only gates ER gateways + net, res, mon = _make_clients(gw=_make_gw(admin_state="Disabled")) + assert len(_run(net, res, mon)) == 1 + + +# --------------------------------------------------------------------------- +# TestConnectionsContract +# --------------------------------------------------------------------------- + + +class TestConnectionsContract: + def test_ipsec_connection_skips(self): + net, res, mon = _make_clients(connections=[_make_connection("IPsec")]) + assert _run(net, res, mon) == [] + + def test_vnet2vnet_connection_skips(self): + net, res, mon = _make_clients(connections=[_make_connection("Vnet2Vnet")]) + assert _run(net, res, mon) == [] + + def test_expressroute_connection_skips(self): + net, res, mon = _make_clients(connections=[_make_connection("ExpressRoute")]) + assert _run(net, res, mon) == [] + + def test_connection_type_none_unresolvable_skips(self): + # Connection with no connection_type field -> _resolve_connection_type returns None -> skip + conn = SimpleNamespace(properties=None) # no connection_type attr + net, res, mon = _make_clients(connections=[conn]) + assert _run(net, res, mon) == [] + + def test_connection_type_conflict_skips(self): + conn = SimpleNamespace( + connection_type="IPsec", + properties=SimpleNamespace(connectionType="ExpressRoute"), + ) + net, res, mon = _make_clients(connections=[conn]) + assert _run(net, res, mon) == [] + + def test_out_of_scope_connection_type_emits(self): + # "MicrosoftPeering" is not in IN_SCOPE, and on VPN gateway bypass check is skipped + conn = _make_connection("MicrosoftPeering") + net, res, mon = _make_clients(connections=[conn]) + assert len(_run(net, res, mon)) == 1 + + def test_er_out_of_scope_connection_with_bypass_true_skips(self): + # On ER gateway: non-in-scope connection with bypass=True -> skip + conn = SimpleNamespace( + connection_type="MicrosoftPeering", + express_route_gateway_bypass=True, + properties=None, + ) + gw = _make_er_gw() + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, connections=[conn], gw_resource=gw_resource) + assert _run(net, res, mon) == [] + + def test_er_out_of_scope_connection_with_bypass_false_emits(self): + conn = SimpleNamespace( + connection_type="MicrosoftPeering", + express_route_gateway_bypass=False, + properties=None, + ) + gw = _make_er_gw() + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, connections=[conn], gw_resource=gw_resource) + assert len(_run(net, res, mon)) == 1 + + def test_notconnected_ipsec_connection_still_skips(self): + # spec 9.2.3: NotConnected status is still a configured connection + conn = SimpleNamespace( + connection_type="IPsec", + express_route_gateway_bypass=None, + properties=None, + ) + net, res, mon = _make_clients(connections=[conn]) + assert _run(net, res, mon) == [] + + def test_list_connections_http_error_skips_gateway(self): + net, res, mon = _make_clients() + net.virtual_network_gateways.list_connections.side_effect = HttpResponseError() + assert _run(net, res, mon) == [] + + def test_list_connections_service_request_error_skips_gateway(self): + net, res, mon = _make_clients() + net.virtual_network_gateways.list_connections.side_effect = ServiceRequestError("err") + assert _run(net, res, mon) == [] + + +# --------------------------------------------------------------------------- +# TestP2SContract +# --------------------------------------------------------------------------- + + +class TestP2SContract: + def test_vpn_vcc_explicit_null_emits(self): + # vpn_client_configuration attribute IS present, value is explicitly None -> confirmed no P2S + net, res, mon = _make_clients(gw=_make_gw(vpn_client_configuration=None)) + assert len(_run(net, res, mon)) == 1 + + def test_vpn_vcc_absent_from_all_sources_skips(self): + # vpn_client_configuration attribute absent entirely -> unresolvable -> skip + gw = SimpleNamespace( + id=_GW_ID, + name=_GW_NAME, + location="eastus", + provisioning_state="Succeeded", + gateway_type="Vpn", + sku=SimpleNamespace(name="VpnGw1", tier="VpnGw1"), + tags=None, + allow_virtual_wan_traffic=None, + admin_state=None, + properties=None, + # deliberately no vpn_client_configuration attribute + ) + net, res, mon = _make_clients(gw=gw) + assert _run(net, res, mon) == [] + + def test_vpn_p2s_address_pool_configured_skips(self): + vcc = SimpleNamespace(vpn_client_address_pool=["10.0.0.0/24"]) + net, res, mon = _make_clients(gw=_make_gw(vpn_client_configuration=vcc)) + assert _run(net, res, mon) == [] + + def test_vpn_p2s_aad_tenant_configured_skips(self): + vcc = SimpleNamespace(aad_tenant="https://login.microsoftonline.com/tenant-id") + net, res, mon = _make_clients(gw=_make_gw(vpn_client_configuration=vcc)) + assert _run(net, res, mon) == [] + + def test_vpn_p2s_protocols_configured_skips(self): + vcc = SimpleNamespace(vpn_client_protocols=["OpenVPN"]) + net, res, mon = _make_clients(gw=_make_gw(vpn_client_configuration=vcc)) + assert _run(net, res, mon) == [] + + def test_vpn_p2s_conflict_unresolvable_skips(self): + # sdk says non-empty, camelCase says empty -> conflict -> None -> skip + vcc = SimpleNamespace( + vpn_client_address_pool=["10.0.0.0/24"], + vpnClientAddressPool=[], # camelCase disagrees + ) + net, res, mon = _make_clients(gw=_make_gw(vpn_client_configuration=vcc)) + assert _run(net, res, mon) == [] + + def test_er_gateway_p2s_config_does_not_skip(self): + # P2S check is VPN-only; ER gateways ignore vpn_client_configuration + vcc = SimpleNamespace(vpn_client_address_pool=["10.0.0.0/24"]) + gw = _make_er_gw() + gw.vpn_client_configuration = vcc + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, gw_resource=gw_resource) + assert len(_run(net, res, mon)) == 1 + + +# --------------------------------------------------------------------------- +# TestMetricContract +# --------------------------------------------------------------------------- + + +class TestMetricContract: + def test_vpn_all_metrics_zero_emits(self): + net, res, mon = _make_clients(metric_response=_zero_metric_response()) + assert len(_run(net, res, mon)) == 1 + + def test_vpn_any_metric_active_skips(self): + net, res, mon = _make_clients(metric_response=_active_metric_response()) + assert _run(net, res, mon) == [] + + def test_vpn_metric_sparse_unknown_skips(self): + # < 80% coverage -> UNKNOWN -> not ZERO -> skip + net, res, mon = _make_clients(metric_response=_sparse_metric_response()) + assert _run(net, res, mon) == [] + + def test_vpn_metric_exception_unknown_skips(self): + net, res, mon = _make_clients() + mon.metrics.list.side_effect = HttpResponseError() + assert _run(net, res, mon) == [] + + def test_er_standard_uses_er_metrics(self): + gw = _make_er_gw(sku_tier="HighPerformance") + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, gw_resource=gw_resource) + findings = _run(net, res, mon) + assert len(findings) == 1 + used = findings[0].details["metrics_used"] + assert "ExpressRouteGatewayBitsPerSecond" in used + + def test_er_scalable_uses_scalable_metrics(self): + gw = _make_er_gw(sku_tier="ErGwScale") + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, gw_resource=gw_resource) + findings = _run(net, res, mon) + assert len(findings) == 1 + used = findings[0].details["metrics_used"] + assert "ScalableExpressRouteGatewayBitsPerSecond" in used + + def test_er_unknown_sku_tier_skips(self): + gw = _make_er_gw(sku_tier=None) + gw.sku = SimpleNamespace(name=None, tier=None) + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, gw_resource=gw_resource) + assert _run(net, res, mon) == [] + + def test_vpn_calls_three_metrics(self): + net, res, mon = _make_clients() + _run(net, res, mon) + assert mon.metrics.list.call_count == 3 + + def test_er_standard_calls_three_metrics(self): + gw = _make_er_gw(sku_tier="Standard") + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, gw_resource=gw_resource) + _run(net, res, mon) + assert mon.metrics.list.call_count == 3 + + def test_average_only_active_metric_skips(self): + # total is None for all datapoints; only average is populated and non-zero + now_utc = datetime.now(timezone.utc) + window_start = now_utc - timedelta(days=30) + data = [ + SimpleNamespace( + timestamp=window_start + timedelta(days=i, hours=12), + total=None, + average=float(i + 1), + maximum=None, + ) + for i in range(30) + ] + response = SimpleNamespace(value=[SimpleNamespace(timeseries=[SimpleNamespace(data=data)])]) + net, res, mon = _make_clients(metric_response=response) + assert _run(net, res, mon) == [] + + def test_average_only_zero_metric_emits(self): + # total absent; average=0.0 for all -> ZERO -> emits + now_utc = datetime.now(timezone.utc) + window_start = now_utc - timedelta(days=30) + data = [ + SimpleNamespace( + timestamp=window_start + timedelta(days=i, hours=12), + total=None, + average=0.0, + maximum=None, + ) + for i in range(30) + ] + response = SimpleNamespace(value=[SimpleNamespace(timeseries=[SimpleNamespace(data=data)])]) + net, res, mon = _make_clients(metric_response=response) + assert len(_run(net, res, mon)) == 1 + + def test_maximum_only_active_metric_skips(self): + # Only maximum is populated and non-zero + now_utc = datetime.now(timezone.utc) + window_start = now_utc - timedelta(days=30) + data = [ + SimpleNamespace( + timestamp=window_start + timedelta(days=i, hours=12), + total=None, + average=None, + maximum=1000.0, + ) + for i in range(30) + ] + response = SimpleNamespace(value=[SimpleNamespace(timeseries=[SimpleNamespace(data=data)])]) + net, res, mon = _make_clients(metric_response=response) + assert _run(net, res, mon) == [] + + def test_metric_with_timestamp_none_datapoint_skips(self): + # 29 valid zero datapoints + 1 with timestamp=None -> fail-closed -> skip + now_utc = datetime.now(timezone.utc) + window_start = now_utc - timedelta(days=30) + data = [ + SimpleNamespace(timestamp=window_start + timedelta(days=i, hours=12), total=0.0) + for i in range(29) + ] + [SimpleNamespace(timestamp=None, total=0.0)] + response = SimpleNamespace(value=[SimpleNamespace(timeseries=[SimpleNamespace(data=data)])]) + net, res, mon = _make_clients(metric_response=response) + assert _run(net, res, mon) == [] + + def test_metric_with_non_datetime_timestamp_skips(self): + # One datapoint has a string timestamp instead of datetime -> fail-closed -> skip + now_utc = datetime.now(timezone.utc) + window_start = now_utc - timedelta(days=30) + data = [ + SimpleNamespace(timestamp=window_start + timedelta(days=i, hours=12), total=0.0) + for i in range(29) + ] + [SimpleNamespace(timestamp="2024-01-15T00:00:00Z", total=0.0)] + response = SimpleNamespace(value=[SimpleNamespace(timeseries=[SimpleNamespace(data=data)])]) + net, res, mon = _make_clients(metric_response=response) + assert _run(net, res, mon) == [] + + def test_all_aggregations_none_skips(self): + # total, average, maximum all None -> no observed buckets -> UNKNOWN -> skip + now_utc = datetime.now(timezone.utc) + window_start = now_utc - timedelta(days=30) + data = [ + SimpleNamespace( + timestamp=window_start + timedelta(days=i, hours=12), + total=None, + average=None, + maximum=None, + ) + for i in range(30) + ] + response = SimpleNamespace(value=[SimpleNamespace(timeseries=[SimpleNamespace(data=data)])]) + net, res, mon = _make_clients(metric_response=response) + assert _run(net, res, mon) == [] + + +# --------------------------------------------------------------------------- +# TestFailureBehavior +# --------------------------------------------------------------------------- + + +class TestFailureBehavior: + def test_http_error_on_get_skips_gateway(self): + net, res, mon = _make_clients() + net.virtual_network_gateways.get.side_effect = HttpResponseError() + assert _run(net, res, mon) == [] + + def test_service_request_error_on_get_skips_gateway(self): + net, res, mon = _make_clients() + net.virtual_network_gateways.get.side_effect = ServiceRequestError("transport err") + assert _run(net, res, mon) == [] + + def test_service_response_error_on_get_skips_gateway(self): + net, res, mon = _make_clients() + net.virtual_network_gateways.get.side_effect = ServiceResponseError("stream closed") + assert _run(net, res, mon) == [] + + def test_runtime_error_on_get_propagates(self): + net, res, mon = _make_clients() + net.virtual_network_gateways.get.side_effect = RuntimeError("unexpected") + with pytest.raises(RuntimeError): + _run(net, res, mon) + + def test_http_error_on_list_connections_skips_gateway(self): + net, res, mon = _make_clients() + net.virtual_network_gateways.list_connections.side_effect = HttpResponseError() + assert _run(net, res, mon) == [] + + def test_service_request_error_on_list_connections_skips_gateway(self): + net, res, mon = _make_clients() + net.virtual_network_gateways.list_connections.side_effect = ServiceRequestError("err") + assert _run(net, res, mon) == [] + + def test_one_gateway_http_error_other_still_emits(self): + gw2 = _make_gw(name="gw2", gw_id=f"{_GW_ID}-2") + r1 = _make_gw_resource(name="gw1", gw_id=f"{_GW_ID}-1") + r2 = _make_gw_resource(name="gw2", gw_id=f"{_GW_ID}-2") + + def _get(rg, name): + if name == "gw1": + raise HttpResponseError() + return gw2 + + net = MagicMock() + net.virtual_network_gateways.get.side_effect = _get + net.virtual_network_gateways.list_connections.return_value = iter([]) + + res = MagicMock() + res.resources.list.return_value = [r1, r2] + + mon = MagicMock() + mon.metrics.list.return_value = _zero_metric_response() + + findings = find_idle_vnet_gateways( + subscription_id=_SUB, + credential=None, + client=net, + resource_client=res, + monitor_client=mon, + ) + assert len(findings) == 1 + assert findings[0].details["resource_name"] == "gw2" + + +# --------------------------------------------------------------------------- +# TestFindingShape +# --------------------------------------------------------------------------- + + +class TestFindingShape: + def setup_method(self): + net, res, mon = _make_clients() + findings = _run(net, res, mon) + assert findings + self.f = findings[0] + + def test_provider(self): + assert self.f.provider == "azure" + + def test_rule_id(self): + assert self.f.rule_id == "azure.virtual_network_gateway.idle" + + def test_resource_type(self): + assert self.f.resource_type == "azure.virtual_network_gateway" + + def test_resource_id_is_arm_id(self): + assert self.f.resource_id == _GW_ID + + def test_region_normalized(self): + assert self.f.region == "eastus" + + def test_confidence_high(self): + assert self.f.confidence.value == "high" + + def test_risk_high(self): + assert self.f.risk.value == "high" + + def test_estimated_cost_none(self): + assert self.f.estimated_monthly_cost_usd is None + + def test_details_resource_name(self): + assert self.f.details["resource_name"] == _GW_NAME + + def test_details_resource_group(self): + assert self.f.details["resource_group"] == _RG + + def test_details_subscription_id(self): + assert self.f.details["subscription_id"] == _SUB + + def test_details_gateway_type(self): + assert self.f.details["gateway_type"] == "Vpn" + + def test_details_provisioning_state(self): + assert self.f.details["provisioning_state"] == "Succeeded" + + def test_details_sku_name(self): + assert self.f.details["sku_name"] == "VpnGw1" + + def test_details_sku_tier(self): + assert self.f.details["sku_tier"] == "VpnGw1" + + def test_details_tags_empty_dict(self): + assert self.f.details["tags"] == {} + + def test_details_p2s_configured_false_for_vpn(self): + assert self.f.details["p2s_configured"] is False + + def test_details_idle_window_days(self): + assert self.f.details["idle_window_days"] == 30 + + def test_details_metrics_used_vpn(self): + used = self.f.details["metrics_used"] + assert "AverageBandwidth" in used + assert "InboundFlowsCount" in used + assert "OutboundFlowsCount" in used + + def test_evidence_has_signals_used(self): + assert self.f.evidence.signals_used + + def test_evidence_has_signals_not_checked(self): + assert self.f.evidence.signals_not_checked + + def test_er_p2s_configured_is_none(self): + gw = _make_er_gw() + gw_resource = _make_gw_resource(name=_ER_GW_NAME, gw_id=_ER_GW_ID) + net, res, mon = _make_clients(gw=gw, gw_resource=gw_resource) + findings = _run(net, res, mon) + assert findings[0].details["p2s_configured"] is None + + +# --------------------------------------------------------------------------- +# TestResolveProvisioningState (unit) +# --------------------------------------------------------------------------- + + +class TestResolveProvisioningState: + def test_sdk_only(self): + gw = SimpleNamespace(provisioning_state="Succeeded") + assert _resolve_provisioning_state(gw) == "Succeeded" + + def test_nested_only(self): + gw = SimpleNamespace( + provisioning_state=None, + properties=SimpleNamespace(provisioningState="Succeeded"), + ) + assert _resolve_provisioning_state(gw) == "Succeeded" + + def test_nested_snake_case(self): + gw = SimpleNamespace( + provisioning_state=None, + properties=SimpleNamespace(provisioning_state="Succeeded", provisioningState=None), + ) + assert _resolve_provisioning_state(gw) == "Succeeded" + + def test_both_match(self): + gw = SimpleNamespace( + provisioning_state="Succeeded", + properties=SimpleNamespace(provisioningState="Succeeded"), + ) + assert _resolve_provisioning_state(gw) == "Succeeded" + + def test_conflict_returns_none(self): + gw = SimpleNamespace( + provisioning_state="Succeeded", + properties=SimpleNamespace(provisioningState="Failed"), + ) + assert _resolve_provisioning_state(gw) is None + + def test_both_absent_returns_none(self): + gw = SimpleNamespace(provisioning_state=None, properties=None) + assert _resolve_provisioning_state(gw) is None + + +# --------------------------------------------------------------------------- +# TestResolveGatewayType (unit) +# --------------------------------------------------------------------------- + + +class TestResolveGatewayType: + def test_sdk_vpn(self): + gw = SimpleNamespace(gateway_type="Vpn") + assert _resolve_gateway_type(gw) == "Vpn" + + def test_nested_expressroute(self): + gw = SimpleNamespace( + gateway_type=None, + properties=SimpleNamespace(gatewayType="ExpressRoute"), + ) + assert _resolve_gateway_type(gw) == "ExpressRoute" + + def test_conflict_returns_none(self): + gw = SimpleNamespace( + gateway_type="Vpn", + properties=SimpleNamespace(gatewayType="ExpressRoute"), + ) + assert _resolve_gateway_type(gw) is None + + def test_both_absent_returns_none(self): + gw = SimpleNamespace(gateway_type=None, properties=None) + assert _resolve_gateway_type(gw) is None + + +# --------------------------------------------------------------------------- +# TestResolveConnectionType (unit) +# --------------------------------------------------------------------------- + + +class TestResolveConnectionType: + def test_sdk_ipsec(self): + conn = SimpleNamespace(connection_type="IPsec", properties=None) + assert _resolve_connection_type(conn) == "IPsec" + + def test_nested_vnet2vnet(self): + conn = SimpleNamespace( + connection_type=None, + properties=SimpleNamespace(connectionType="Vnet2Vnet"), + ) + assert _resolve_connection_type(conn) == "Vnet2Vnet" + + def test_conflict_returns_none(self): + conn = SimpleNamespace( + connection_type="IPsec", + properties=SimpleNamespace(connectionType="Vnet2Vnet"), + ) + assert _resolve_connection_type(conn) is None + + def test_both_absent_returns_none(self): + conn = SimpleNamespace(connection_type=None, properties=None) + assert _resolve_connection_type(conn) is None + + +# --------------------------------------------------------------------------- +# TestResolveExpressRouteBypass (unit) +# --------------------------------------------------------------------------- + + +class TestResolveExpressRouteBypass: + def test_bypass_true(self): + conn = SimpleNamespace(express_route_gateway_bypass=True, properties=None) + assert _resolve_express_route_bypass(conn) is True + + def test_bypass_false(self): + conn = SimpleNamespace(express_route_gateway_bypass=False, properties=None) + assert _resolve_express_route_bypass(conn) is False + + def test_bypass_non_bool_returns_none(self): + conn = SimpleNamespace(express_route_gateway_bypass="true", properties=None) + assert _resolve_express_route_bypass(conn) is None + + def test_sdk_nested_conflict_returns_none(self): + conn = SimpleNamespace( + express_route_gateway_bypass=True, + properties=SimpleNamespace(expressRouteGatewayBypass=False), + ) + assert _resolve_express_route_bypass(conn) is None + + def test_nested_true(self): + conn = SimpleNamespace(properties=SimpleNamespace(expressRouteGatewayBypass=True)) + # No sdk attr (getattr returns _SENTINEL) + assert _resolve_express_route_bypass(conn) is True + + +# --------------------------------------------------------------------------- +# TestResolveP2SConfigured (unit) +# --------------------------------------------------------------------------- + + +class TestResolveP2SConfigured: + def test_vcc_absent_from_all_sources_returns_none(self): + # Attribute not present anywhere -> P2S state unresolvable -> None + gw = SimpleNamespace() # no vpn_client_configuration attr, no properties + assert _resolve_p2s_configured(gw) is None + + def test_vpn_client_configuration_explicit_null_returns_false(self): + # SDK explicitly returns null for the field -> confirmed no P2S + gw = SimpleNamespace(vpn_client_configuration=None) + assert _resolve_p2s_configured(gw) is False + + def test_address_pool_nonempty_returns_true(self): + vcc = SimpleNamespace(vpn_client_address_pool=["10.0.0.0/24"]) + gw = SimpleNamespace(vpn_client_configuration=vcc) + assert _resolve_p2s_configured(gw) is True + + def test_all_fields_empty_returns_false(self): + vcc = SimpleNamespace( + vpn_client_address_pool=[], + vng_client_connection_configurations=[], + vpn_client_root_certificates=[], + vpn_client_revoked_certificates=[], + vpn_authentication_types=[], + vpn_client_protocols=[], + aad_tenant=None, + aad_audience=None, + aad_issuer=None, + ) + gw = SimpleNamespace(vpn_client_configuration=vcc) + assert _resolve_p2s_configured(gw) is False + + def test_aad_tenant_nonempty_returns_true(self): + vcc = SimpleNamespace(aad_tenant="https://login.microsoftonline.com/tenant") + gw = SimpleNamespace(vpn_client_configuration=vcc) + assert _resolve_p2s_configured(gw) is True + + def test_field_conflict_returns_none(self): + vcc = SimpleNamespace( + vpn_client_address_pool=["10.0.0.0/24"], + vpnClientAddressPool=[], # camelCase says empty + ) + gw = SimpleNamespace(vpn_client_configuration=vcc) + assert _resolve_p2s_configured(gw) is None + + def test_vcc_conflict_sdk_present_nested_absent_returns_none(self): + # SDK says vcc is non-None, nested says None -> conflict + gw = SimpleNamespace( + vpn_client_configuration=SimpleNamespace(aad_tenant=None), + properties=SimpleNamespace(vpnClientConfiguration=None), + ) + assert _resolve_p2s_configured(gw) is None + + +# --------------------------------------------------------------------------- +# TestErMetricFamily (unit) +# --------------------------------------------------------------------------- + + +class TestErMetricFamily: + def test_none_tier_returns_none(self): + assert _er_metric_family(None) is None + + def test_er_gw_scale_returns_scalable(self): + result = _er_metric_family("ErGwScale") + assert "ScalableExpressRouteGatewayBitsPerSecond" in result + + def test_standard_returns_standard(self): + result = _er_metric_family("Standard") + assert "ExpressRouteGatewayBitsPerSecond" in result + + def test_high_performance_returns_standard(self): + result = _er_metric_family("HighPerformance") + assert "ExpressRouteGatewayBitsPerSecond" in result + + def test_ultra_performance_returns_standard(self): + result = _er_metric_family("UltraPerformance") + assert "ExpressRouteGatewayBitsPerSecond" in result + + def test_er_gw1_az_returns_standard(self): + result = _er_metric_family("ErGw1AZ") + assert "ExpressRouteGatewayBitsPerSecond" in result + + +# --------------------------------------------------------------------------- +# TestGatewayAdminStateDisabled (unit) +# --------------------------------------------------------------------------- + + +class TestGatewayAdminStateDisabled: + def test_disabled_snake_case(self): + gw = SimpleNamespace(admin_state="Disabled") + assert _gateway_admin_state_disabled(gw) is True + + def test_enabled_snake_case(self): + gw = SimpleNamespace(admin_state="Enabled") + assert _gateway_admin_state_disabled(gw) is False + + def test_absent_from_all_sources_returns_none(self): + # Field not present anywhere -> unresolvable -> None (fail-closed) + gw = SimpleNamespace() + assert _gateway_admin_state_disabled(gw) is None + + def test_none_value_returns_none(self): + # Attribute present but value is None -> unresolvable + gw = SimpleNamespace(admin_state=None) + assert _gateway_admin_state_disabled(gw) is None + + def test_non_string_value_returns_none(self): + gw = SimpleNamespace(admin_state=42) + assert _gateway_admin_state_disabled(gw) is None + + def test_conflict_returns_none(self): + gw = SimpleNamespace( + admin_state="Enabled", + properties=SimpleNamespace(adminState="Disabled"), + ) + assert _gateway_admin_state_disabled(gw) is None + + def test_disabled_camel_case(self): + gw = SimpleNamespace(adminState="Disabled") + assert _gateway_admin_state_disabled(gw) is True + + def test_disabled_in_properties(self): + gw = SimpleNamespace(properties=SimpleNamespace(admin_state="Disabled")) + assert _gateway_admin_state_disabled(gw) is True + + def test_enabled_in_properties(self): + gw = SimpleNamespace(properties=SimpleNamespace(adminState="Enabled")) + assert _gateway_admin_state_disabled(gw) is False + + +# --------------------------------------------------------------------------- +# TestGatewayHasVirtualWanTraffic (unit) +# --------------------------------------------------------------------------- + + +class TestGatewayHasVirtualWanTraffic: + def test_true_snake_case(self): + gw = SimpleNamespace(allow_virtual_wan_traffic=True) + assert _gateway_has_virtual_wan_traffic(gw) is True + + def test_false_returns_false(self): + gw = SimpleNamespace(allow_virtual_wan_traffic=False) + assert _gateway_has_virtual_wan_traffic(gw) is False + + def test_none_returns_false(self): + gw = SimpleNamespace() + assert _gateway_has_virtual_wan_traffic(gw) is False + + def test_true_camel_case(self): + gw = SimpleNamespace(allowVirtualWanTraffic=True) + assert _gateway_has_virtual_wan_traffic(gw) is True + + def test_true_in_properties(self): + gw = SimpleNamespace(properties=SimpleNamespace(allow_virtual_wan_traffic=True)) + assert _gateway_has_virtual_wan_traffic(gw) is True + + +# --------------------------------------------------------------------------- +# TestConnectionsGate (unit) +# --------------------------------------------------------------------------- + + +class TestConnectionsGate: + def _net(self, connections): + net = MagicMock() + net.virtual_network_gateways.list_connections.return_value = iter(connections) + return net + + def test_no_connections_returns_true(self): + result = _connections_gate(self._net([]), "rg", "gw", "Vpn") + assert result is True + + def test_ipsec_connection_returns_false(self): + result = _connections_gate(self._net([_make_connection("IPsec")]), "rg", "gw", "Vpn") + assert result is False + + def test_vnet2vnet_returns_false(self): + result = _connections_gate(self._net([_make_connection("Vnet2Vnet")]), "rg", "gw", "Vpn") + assert result is False + + def test_expressroute_returns_false(self): + result = _connections_gate( + self._net([_make_connection("ExpressRoute")]), "rg", "gw", "ExpressRoute" + ) + assert result is False + + def test_unknown_type_returns_false(self): + conn = SimpleNamespace(properties=None) # no connection_type + result = _connections_gate(self._net([conn]), "rg", "gw", "Vpn") + assert result is False + + def test_out_of_scope_type_vpn_returns_true(self): + # "MicrosoftPeering" is not in-scope; no bypass check for VPN + result = _connections_gate( + self._net([_make_connection("MicrosoftPeering")]), "rg", "gw", "Vpn" + ) + assert result is True + + def test_out_of_scope_er_bypass_true_returns_false(self): + conn = SimpleNamespace( + connection_type="MicrosoftPeering", + express_route_gateway_bypass=True, + properties=None, + ) + result = _connections_gate(self._net([conn]), "rg", "gw", "ExpressRoute") + assert result is False + + def test_out_of_scope_er_bypass_false_returns_true(self): + conn = SimpleNamespace( + connection_type="MicrosoftPeering", + express_route_gateway_bypass=False, + properties=None, + ) + result = _connections_gate(self._net([conn]), "rg", "gw", "ExpressRoute") + assert result is True + + +# --------------------------------------------------------------------------- +# TestEvaluateMetric (unit) +# --------------------------------------------------------------------------- + + +class TestEvaluateMetric: + def _mon(self, response=None, side_effect=None): + mon = MagicMock() + if side_effect is not None: + mon.metrics.list.side_effect = side_effect + else: + mon.metrics.list.return_value = response + return mon + + def _window(self): + now = datetime.now(timezone.utc) + return now - timedelta(days=30), now + + def test_zero_totals_returns_zero(self): + start, end = self._window() + result = _evaluate_metric( + self._mon(_zero_metric_response()), "resource_id", "Metric", start, end + ) + assert result == _MetricResult.ZERO + + def test_nonzero_totals_returns_active(self): + start, end = self._window() + result = _evaluate_metric( + self._mon(_active_metric_response()), "resource_id", "Metric", start, end + ) + assert result == _MetricResult.ACTIVE + + def test_sparse_data_returns_unknown(self): + start, end = self._window() + result = _evaluate_metric( + self._mon(_sparse_metric_response()), "resource_id", "Metric", start, end + ) + assert result == _MetricResult.UNKNOWN + + def test_exception_returns_unknown(self): + start, end = self._window() + result = _evaluate_metric( + self._mon(side_effect=HttpResponseError()), + "resource_id", + "Metric", + start, + end, + ) + assert result == _MetricResult.UNKNOWN + + def test_empty_value_list_returns_unknown(self): + start, end = self._window() + mon = self._mon(SimpleNamespace(value=[])) + result = _evaluate_metric(mon, "resource_id", "Metric", start, end) + assert result == _MetricResult.UNKNOWN + + def test_no_value_attr_returns_unknown(self): + start, end = self._window() + mon = self._mon(SimpleNamespace()) # no .value attribute + result = _evaluate_metric(mon, "resource_id", "Metric", start, end) + assert result == _MetricResult.UNKNOWN + + def test_average_only_nonzero_returns_active(self): + start, end = self._window() + now_utc = datetime.now(timezone.utc) + window_start = now_utc - timedelta(days=30) + data = [ + SimpleNamespace( + timestamp=window_start + timedelta(days=i, hours=12), + total=None, + average=42.0, + maximum=None, + ) + for i in range(30) + ] + response = SimpleNamespace(value=[SimpleNamespace(timeseries=[SimpleNamespace(data=data)])]) + result = _evaluate_metric(self._mon(response), "res", "Metric", start, end) + assert result == _MetricResult.ACTIVE + + def test_average_only_zero_returns_zero(self): + start, end = self._window() + now_utc = datetime.now(timezone.utc) + window_start = now_utc - timedelta(days=30) + data = [ + SimpleNamespace( + timestamp=window_start + timedelta(days=i, hours=12), + total=None, + average=0.0, + maximum=None, + ) + for i in range(30) + ] + response = SimpleNamespace(value=[SimpleNamespace(timeseries=[SimpleNamespace(data=data)])]) + result = _evaluate_metric(self._mon(response), "res", "Metric", start, end) + assert result == _MetricResult.ZERO + + def test_all_aggregations_none_returns_unknown(self): + start, end = self._window() + now_utc = datetime.now(timezone.utc) + window_start = now_utc - timedelta(days=30) + data = [ + SimpleNamespace( + timestamp=window_start + timedelta(days=i, hours=12), + total=None, + average=None, + maximum=None, + ) + for i in range(30) + ] + response = SimpleNamespace(value=[SimpleNamespace(timeseries=[SimpleNamespace(data=data)])]) + result = _evaluate_metric(self._mon(response), "res", "Metric", start, end) + assert result == _MetricResult.UNKNOWN + + def test_max_takes_highest_aggregation(self): + # average=0 but maximum=5 -> ACTIVE (maximum wins) + start, end = self._window() + now_utc = datetime.now(timezone.utc) + window_start = now_utc - timedelta(days=30) + data = [ + SimpleNamespace( + timestamp=window_start + timedelta(days=i, hours=12), + total=0.0, + average=0.0, + maximum=5.0, + ) + for i in range(30) + ] + response = SimpleNamespace(value=[SimpleNamespace(timeseries=[SimpleNamespace(data=data)])]) + result = _evaluate_metric(self._mon(response), "res", "Metric", start, end) + assert result == _MetricResult.ACTIVE + + def test_timestamp_none_returns_unknown(self): + # Even 29 valid zero datapoints + 1 with timestamp=None -> fail-closed -> UNKNOWN + start, end = self._window() + now_utc = datetime.now(timezone.utc) + window_start = now_utc - timedelta(days=30) + data = [ + SimpleNamespace(timestamp=window_start + timedelta(days=i, hours=12), total=0.0) + for i in range(29) + ] + [SimpleNamespace(timestamp=None, total=0.0)] + response = SimpleNamespace(value=[SimpleNamespace(timeseries=[SimpleNamespace(data=data)])]) + result = _evaluate_metric(self._mon(response), "res", "Metric", start, end) + assert result == _MetricResult.UNKNOWN + + def test_timestamp_not_datetime_returns_unknown(self): + # String timestamp instead of datetime object -> fail-closed -> UNKNOWN + start, end = self._window() + now_utc = datetime.now(timezone.utc) + window_start = now_utc - timedelta(days=30) + data = [ + SimpleNamespace(timestamp=window_start + timedelta(days=i, hours=12), total=0.0) + for i in range(29) + ] + [SimpleNamespace(timestamp="2024-01-15T00:00:00Z", total=0.0)] + response = SimpleNamespace(value=[SimpleNamespace(timeseries=[SimpleNamespace(data=data)])]) + result = _evaluate_metric(self._mon(response), "res", "Metric", start, end) + assert result == _MetricResult.UNKNOWN + + def test_datapoints_outside_window_ignored(self): + start, end = self._window() + # All datapoints before the window + data = [SimpleNamespace(timestamp=start - timedelta(days=1), total=100.0)] + ts = SimpleNamespace(data=data) + metric = SimpleNamespace(timeseries=[ts]) + response = SimpleNamespace(value=[metric]) + # Coverage: 0/31 buckets -> UNKNOWN + result = _evaluate_metric(self._mon(response), "resource_id", "Metric", start, end) + assert result == _MetricResult.UNKNOWN From ba2792736a818b11f2d27991ddc41551403d37d0 Mon Sep 17 00:00:00 2001 From: Suresh Mandalapu Date: Thu, 23 Apr 2026 23:26:12 +0100 Subject: [PATCH 5/5] pr review --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0d10470..98c0db2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "cleancloud" -version = "1.24.0" +version = "1.25.0" description = "Read-only cloud hygiene for AWS, Azure, and GCP. Multi-account org scanning, CI/CD enforcement, and deterministic cost modeling. No agents, no telemetry." readme = "README.md" requires-python = ">=3.10"