Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
dafb3ee
Azure: detect and report missing custom data when IMDS expects it
cadejacobson Feb 26, 2026
b5a68cb
azure: test missing customdata path
cadejacobson Feb 26, 2026
044f1eb
Move location of experimental flag
cadejacobson Mar 2, 2026
6c3d1f3
Remove unused feature flag
cadejacobson Mar 2, 2026
9418021
Linting
cadejacobson Mar 2, 2026
a042b66
Merge main into branch azure-missing-customdata
cadejacobson Mar 17, 2026
a598ca1
Linting
cadejacobson Mar 17, 2026
3b3b08f
Create ReportableErrorCustomData
cadejacobson Mar 30, 2026
819e0e8
Structure comments like sentences
cadejacobson Mar 30, 2026
917c498
Parametrize the TestHasCustomDataFromImds
cadejacobson Mar 30, 2026
7bcc33a
Update userdata reference to custom data
cadejacobson Mar 30, 2026
2761645
Correct linting
cadejacobson Mar 30, 2026
3abbad3
Black formatter
cadejacobson Mar 30, 2026
2b58a18
Improve reportableError string
cadejacobson Mar 30, 2026
414a9ae
Support PPS != None versions
cadejacobson Mar 30, 2026
b454a07
Remove diagnostic message about IMDS
cadejacobson Apr 1, 2026
d576fd7
Add testing coverage to TestProvisioning
cadejacobson Apr 1, 2026
4fe61ff
Formatting
cadejacobson Apr 1, 2026
362e15d
Remove unclean tests
cadejacobson Apr 2, 2026
835512e
Parameterize reporting tests
cadejacobson Apr 2, 2026
0e7556f
Parametrize no reporting tests
cadejacobson Apr 2, 2026
29bbb41
Merge all tests into one with another matrix
cadejacobson Apr 3, 2026
323f9e7
Fix linting
cadejacobson Apr 3, 2026
8e9c10a
Merge branch 'main' into azure-missing-customdata
cadejacobson Apr 3, 2026
c1d6af2
Merge branch 'main' into azure-missing-customdata
cadejacobson Apr 3, 2026
bf27fd7
Add error message if flag is not enabled
cadejacobson Apr 14, 2026
85a5d65
Merge branch 'main' into azure-missing-customdata
cadejacobson Apr 14, 2026
b05bacf
Linting
cadejacobson Apr 14, 2026
fa26b2d
Improve error message with IMDS response
cadejacobson Apr 14, 2026
9647d27
Remove a call to _hascustomdata_from_imds
cadejacobson Apr 14, 2026
cbb26fc
Ruff formatting
cadejacobson Apr 14, 2026
96f9cf0
Improve docstring and hascustomdata branching
cadejacobson Apr 15, 2026
4a5ec4c
Add type hint to hascustomdata_from_imds
cadejacobson May 1, 2026
30eba62
Update imds to IMDS in comment
cadejacobson May 1, 2026
8264055
Standardize on self.seed instead of ovf_source
cadejacobson May 4, 2026
734b5dd
Merge branch 'main' into azure-missing-customdata
cadejacobson May 4, 2026
bc6954e
Merge branch 'main' into azure-missing-customdata
cadejacobson May 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 41 additions & 9 deletions cloudinit/sources/DataSourceAzure.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ def get_resource_disk_on_freebsd(port_id) -> Optional[str]:
"disk_aliases": {"ephemeral0": RESOURCE_DISK_PATH},
"apply_network_config": True, # Use IMDS published network configuration
"apply_network_config_for_secondary_ips": True, # Configure secondary ips
"experimental_fail_on_missing_customdata": False,
Comment thread
cadejacobson marked this conversation as resolved.
"experimental_skip_ready_report": False, # Skip final ready report
}

Expand Down Expand Up @@ -662,7 +663,6 @@ def crawl_metadata(self):
# it determines the value of ret. More specifically, the first one in
# the candidate list determines the path to take in order to get the
# metadata we need.
ovf_source = None
md = {"local-hostname": ""}
cfg = {"system_info": {"default_user": {"name": ""}}}
userdata_raw = ""
Expand All @@ -684,9 +684,9 @@ def crawl_metadata(self):
else:
md, userdata_raw, cfg, files = load_azure_ds_dir(src)

ovf_source = src
self.seed = src
report_diagnostic_event(
"Found provisioning metadata in %s" % ovf_source,
"Found provisioning metadata in %s" % self.seed,
logger_func=LOG.debug,
)
break
Expand Down Expand Up @@ -714,7 +714,7 @@ def crawl_metadata(self):
# not have UDF support. In either case, require IMDS metadata.
# If we require IMDS metadata, try harder to obtain networking, waiting
# for at least 20 minutes. Otherwise only wait 5 minutes.
requires_imds_metadata = bool(self._iso_dev) or ovf_source is None
requires_imds_metadata = bool(self._iso_dev) or self.seed is None
timeout_minutes = 20 if requires_imds_metadata else 5
try:
self._setup_ephemeral_networking(timeout_minutes=timeout_minutes)
Expand All @@ -730,14 +730,18 @@ def crawl_metadata(self):

imds_md = self.get_metadata_from_imds(report_failure=True)

if not imds_md and ovf_source is None:
if not imds_md and self.seed is None:
msg = "No OVF or IMDS available"
report_diagnostic_event(msg)
raise sources.InvalidMetaDataException(msg)

self.seed = self.seed or "IMDS"

# Refresh PPS type using metadata.
pps_type = self._determine_pps_type(cfg, imds_md)
if pps_type != PPSType.NONE:
self.seed = "IMDS"

if util.is_FreeBSD():
msg = "Free BSD is not supported for PPS VMs"
report_diagnostic_event(msg, logger_func=LOG.error)
Expand Down Expand Up @@ -777,7 +781,6 @@ def crawl_metadata(self):
# Report errors if IMDS network configuration is missing data.
self.validate_imds_network_metadata(imds_md=imds_md)

self.seed = ovf_source or "IMDS"
crawled_data.update(
{
"cfg": cfg,
Expand Down Expand Up @@ -816,9 +819,31 @@ def crawl_metadata(self):
logger_func=LOG.debug,
)

# only use userdata from imds if OVF did not provide custom data
# userdata provided by IMDS is always base64 encoded
# Only use userdata from IMDS if OVF did not provide custom data.
# Userdata provided by IMDS is always base64 encoded.
if not userdata_raw:
# First, check to see if the OVF was supposed to provide custom
# data. If it was supposed to and did not, we report failure.
has_custom_data = _hascustomdata_from_imds(imds_md)
if has_custom_data:
if self.ds_cfg.get("experimental_fail_on_missing_customdata"):
self._report_failure(
errors.ReportableErrorMissingCustomData(
pps_type=pps_type.value,
provisioning_media=self.seed,
)
)
else:
report_diagnostic_event(
"Did not find custom data in %s, IMDS returned"
" extended.compute.hasCustomData=%r"
% (
self.seed,
has_custom_data,
),
logger_func=LOG.error,
)

imds_userdata = _userdata_from_imds(imds_md)
if imds_userdata:
LOG.debug("Retrieved userdata from IMDS")
Expand All @@ -831,7 +856,7 @@ def crawl_metadata(self):
"Bad userdata in IMDS", logger_func=LOG.warning
)

if ovf_source == ddir:
if self.seed == ddir:
report_diagnostic_event(
"using files cached in %s" % ddir, logger_func=LOG.debug
)
Expand Down Expand Up @@ -1724,6 +1749,13 @@ def _userdata_from_imds(imds_data):
return None


def _hascustomdata_from_imds(imds_data: Dict) -> Optional[bool]:
try:
return imds_data["extended"]["compute"]["hasCustomData"]
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible for this key to be present but with a value not in (True, False)? I can't seem to find Azure docs on this field, but I can state that our instance data represents what appears to be a strict bool value ```
"extended": {
"compute": {
"hasCustomData": false,

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are correct. If present, this field should only be in the values of (True, False). There is a very very small chance that IMDS returns malformed JSON, at which point the field could be either incorrectly populated or not present, but this is an unexpected case.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have been using None to indicate the difference between False and not-present. Programmatically None and False are treated the same with the if statement, the but reported error has more clarity in case there's a situation where IMDS unexpectedly is missing data.

The alternative I suppose is to specifically raise ReportableErrorImdsMissingHasCustomData or something. The thing is that if IMDS metadata was fetched using the fallback api-version, we might not see it and force an error where there is none.

except KeyError:
return None
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return None
return False



def _hostname_from_imds(imds_data):
try:
return imds_data["compute"]["osProfile"]["computerName"]
Expand Down
13 changes: 13 additions & 0 deletions cloudinit/sources/azure/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,19 @@ def __init__(self, *, key: str, value: Any) -> None:
self.supporting_data["type"] = type(value).__name__


class ReportableErrorMissingCustomData(ReportableError):
def __init__(
self,
*,
pps_type: str,
provisioning_media: str,
) -> None:
super().__init__("failure to read customData while hasCustomData=true")

self.supporting_data["pps_type"] = pps_type
self.supporting_data["provisioning_media"] = provisioning_media


class ReportableErrorImdsMetadataParsingException(ReportableError):
def __init__(self, *, exception: ValueError) -> None:
super().__init__("error parsing IMDS metadata")
Expand Down
95 changes: 95 additions & 0 deletions tests/unittests/sources/test_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -5481,6 +5481,84 @@ def test_imds_failure_results_in_provisioning_failure(self):
assert len(self.mock_kvp_report_via_kvp.mock_calls) == 1
assert not self.mock_kvp_report_success_to_host.mock_calls

@pytest.mark.parametrize(
"flag_enabled",
[False, True],
)
@pytest.mark.parametrize(
"has_custom_data,custom_data",
[
(True, None),
(True, "myCustomData"),
(True, ""),
(False, None),
Comment on lines +5491 to +5494
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It appears that hasCustomData in IMDS compute are boolean values. Why is this test setting None, non-empty and empty strings for this value?are we testing.

I believe this test should probably only be checking hasCustomData in (True, False). Do I have an incorrect read on this?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this test, we specify the hasCustomData on the left param and the customData field on the right. The hasCustomData does only stay in the group (True, False), but for the CustomData, we do specify None as well as empty and non-empty strings to verify that we catch various styles of empty CustomData that could be found in the OVF source.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah the right is the actual customData found on provisioning media (ovf-env.xml), this is just enumerating the cases the possible values seen there (expected or not)

],
)
def test_missing_customdata_reporting(
self,
caplog,
flag_enabled,
has_custom_data,
custom_data,
):
"""Test failure reporting behavior based on custom data fields.

Failure is reported only when
experimental_fail_on_missing_customdata is True,
IMDS reports hasCustomData=True, and OVF has no custom data.
When the flag is not enabled but IMDS reports custom data
should be present, a diagnostic event is logged.
"""
self.azure_ds.ds_cfg["experimental_fail_on_missing_customdata"] = (
flag_enabled
)

imds_md = copy.deepcopy(self.imds_md)
imds_md["extended"]["compute"]["hasCustomData"] = has_custom_data

ovf = construct_ovf_env(
custom_data=custom_data,
provision_guest_proxy_agent=False,
)
md, ud, cfg = dsaz.read_azure_ovf(ovf)
self.mock_util_mount_cb.return_value = (md, ud, cfg, {})
self.mock_readurl.side_effect = [
mock.MagicMock(contents=json.dumps(imds_md).encode()),
]
self.mock_azure_get_metadata_from_fabric.return_value = []

self.azure_ds._check_and_get_data()

expect_failure = flag_enabled and has_custom_data and not custom_data
if expect_failure:
assert len(self.mock_kvp_report_via_kvp.mock_calls) == 1
assert (
len(self.mock_azure_report_failure_to_fabric.mock_calls) == 1
)
assert not self.mock_kvp_report_success_to_host.mock_calls
else:
assert not self.mock_kvp_report_via_kvp.mock_calls
assert not self.mock_azure_report_failure_to_fabric.mock_calls
assert len(self.mock_kvp_report_success_to_host.mock_calls) == 1

if custom_data:
assert self.azure_ds.userdata_raw == custom_data.encode("utf-8")
else:
assert self.azure_ds.userdata_raw == ""

# Verify diagnostic event for missing custom data when
# the experimental flag is not enabled.
expect_diagnostic = (
not flag_enabled and has_custom_data and not custom_data
)
if expect_diagnostic:
assert (
"Did not find custom data in /dev/sr0, IMDS returned"
" extended.compute.hasCustomData=True"
) in caplog.text
else:
assert "Did not find custom data in" not in caplog.text


class TestCheckAzureProxyAgent:
@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -5920,6 +5998,23 @@ def test_query_vm_id_vm_id_conversion_failure(
mock_convert_uuid.assert_called_once_with("test-system-uuid")


class TestHasCustomDataFromImds:
"""Unit tests for the _hascustomdata_from_imds helper."""

@pytest.mark.parametrize(
"imds_data,expected",
[
({"extended": {"compute": {"hasCustomData": True}}}, True),
({"extended": {"compute": {"hasCustomData": False}}}, False),
({}, None),
({"extended": {}}, None),
({"extended": {"compute": {}}}, None),
],
)
def test_hascustomdata_from_imds(self, imds_data, expected):
assert dsaz._hascustomdata_from_imds(imds_data) is expected


class TestHashPassword:
"""Tests for the hash_password function."""

Expand Down
Loading