Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 73 additions & 7 deletions snuba/web/rpc/common/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,12 @@ def _trace_item_filter_key_expression(
BUCKET_COUNT = 40


def transform_array_value(value: dict[str, str]) -> Any:
def transform_array_value(value: Any) -> Any:
Comment thread
manessaraj marked this conversation as resolved.
"""Decode one array element: with a String, Int, Double, or Bool tag."""
if not isinstance(value, dict):
raise BadSnubaRPCRequestException(
f"array element must be an object with a String/Int/Double/Bool tag, got {type(value).__name__}"
)
for t, v in value.items():
if t == "Int":
return int(v)
Expand All @@ -87,15 +92,76 @@ def transform_array_value(value: dict[str, str]) -> Any:
if t == "Bool":
return str(v).lower() == "true"
if t == "String":
return v
raise BadSnubaRPCRequestException(f"array value type unknown: {type(v)}")
return str(v)
raise BadSnubaRPCRequestException(
f"array value has no recognized tag, keys={list(value.keys())}"
)


def _flatten_attributes_array_json(node: dict[str, Any], prefix: str = "") -> dict[str, list[Any]]:
"""Flatten nested dicts into dotted keys (name -> list).
ClickHouse stores all JSON objects as flat and interprets dotted paths in a special way to support nesting
Docs: https://clickhouse.com/docs/sql-reference/data-types/newjson#handling-json-keys-with-dots
"""
out: dict[str, list[Any]] = {}
for k, v in node.items():
full_key = f"{prefix}.{k}" if prefix else k
if isinstance(v, list):
if full_key in out:
raise BadSnubaRPCRequestException(
f"duplicate attributes_array key after normalization: {full_key!r}"
)
out[full_key] = v
elif isinstance(v, dict):
nested = _flatten_attributes_array_json(v, full_key)
for nk, nv in nested.items():
if nk in out:
raise BadSnubaRPCRequestException(
f"duplicate attributes_array key after normalization: {nk!r}"
)
out[nk] = nv
else:
raise BadSnubaRPCRequestException(
f"attributes_array value at {full_key!r} must be a list or object, got {type(v).__name__}"
)
return out


def process_arrays(raw: str) -> dict[str, list[Any]]:
parsed = json.loads(raw) or {}
arrays = {}
for key, values in parsed.items():
arrays[key] = [transform_array_value(v) for v in values]
"""Parse attributes_array JSON into attribute name -> list of decoded values."""
if raw is None or (isinstance(raw, str) and raw.strip() == ""):
parsed: Any = {}
else:
try:
parsed = json.loads(raw)
except json.JSONDecodeError as e:
raise BadSnubaRPCRequestException(f"attributes_array is not valid JSON: {e}") from e

if parsed is None:
parsed = {}
if not isinstance(parsed, dict):
raise BadSnubaRPCRequestException(
f"attributes_array JSON must be an object, got {type(parsed).__name__}"
)

collected = _flatten_attributes_array_json(parsed)
arrays: dict[str, list[Any]] = {}
for key, values in collected.items():
if not isinstance(values, list):
raise BadSnubaRPCRequestException(
f"attributes_array key {key!r} must map to a list, got {type(values).__name__}"
)
parsed_elements: list[Any] = []
for i, elem in enumerate(values):
try:
parsed_elements.append(transform_array_value(elem))
except BadSnubaRPCRequestException:
raise
except Exception as e:
raise BadSnubaRPCRequestException(
f"invalid attributes_array element at {key!r}[{i}]: {e}"
) from e
arrays[key] = parsed_elements
return arrays


Expand Down
33 changes: 33 additions & 0 deletions tests/web/rpc/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
attribute_key_to_expression,
next_monday,
prev_monday,
process_arrays,
trace_item_filters_to_expression,
use_sampling_factor,
)
Expand All @@ -51,6 +52,38 @@
from tests.web.rpc.v1.test_utils import gen_item_message


class TestProcessArrays:
def test_flat_json(self) -> None:
raw = '{"tags":[{"String":"a"},{"String":"b"}]}'
assert process_arrays(raw) == {"tags": ["a", "b"]}

def test_nested_json_from_dotted_key_roundtrip(self) -> None:
"""Simulates ClickHouse toJSONString when dotted keys become nested objects."""
nested = '{"resource":{"process":{"command_args":[{"String":"node"},{"String":"--enable-source-maps"}]}}}'
assert process_arrays(nested) == {
"resource.process.command_args": ["node", "--enable-source-maps"],
}

def test_mixed_flat_and_nested(self) -> None:
raw = '{"tags":[{"String":"x"}],"resource":{"process":{"command_args":[{"Int":"1"}]}}}'
assert process_arrays(raw) == {
"tags": ["x"],
"resource.process.command_args": [1],
}

def test_invalid_json_raises(self) -> None:
with pytest.raises(BadSnubaRPCRequestException, match="not valid JSON"):
process_arrays("not json")

def test_non_object_root_raises(self) -> None:
with pytest.raises(BadSnubaRPCRequestException, match="must be an object"):
process_arrays("[1]")

def test_scalar_leaf_raises(self) -> None:
with pytest.raises(BadSnubaRPCRequestException, match="must be a list or object"):
process_arrays('{"a":{"b":"bad"}}')


class TestCommon:
def test_timestamp_rounding(self) -> None:
start = datetime(2025, 3, 10)
Expand Down
79 changes: 79 additions & 0 deletions tests/web/rpc/v1/test_endpoint_trace_item_details.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,85 @@ def test_endpoint_returns_array_attribute(self, eap: None, redis_db: None) -> No
assert cols_attr.value.WhichOneof("value") == "val_array"
assert [e.val_int for e in cols_attr.value.val_array.values] == [1, 3]

def test_dotted_key_array_attribute_parsed_properly(self, eap: None, redis_db: None) -> None:
trace_id = uuid.uuid4().hex
span_ts = BASE_TIME - timedelta(minutes=1)
storage = get_storage(StorageKey("eap_items"))
write_raw_unprocessed_events(
storage, # type: ignore
[
gen_item_message(
span_ts,
trace_id=trace_id,
remove_default_attributes=True,
attributes={
"resource.process.command_args": _str_tags_array(
"node", "--enable-source-maps"
),
},
),
],
)
start = Timestamp()
end = Timestamp()
start.FromDatetime(BASE_TIME - timedelta(hours=4))
end.GetCurrentTime()

spans = (
EndpointTraceItemTable()
.execute(
TraceItemTableRequest(
meta=RequestMeta(
project_ids=[1],
organization_id=1,
cogs_category="something",
referrer="something",
start_timestamp=start,
end_timestamp=end,
request_id=_REQUEST_ID,
trace_item_type=TraceItemType.TRACE_ITEM_TYPE_SPAN,
),
columns=[
Column(
key=AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.item_id")
),
Column(
key=AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.trace_id")
),
],
)
)
.column_values
)
idx = next(
i
for i, tr in enumerate(spans[1].results)
if tr.val_str.replace("-", "").lower() == trace_id.replace("-", "").lower()
)
item_id = spans[0].results[idx].val_str
trace_id_for_details = spans[1].results[idx].val_str

res = EndpointTraceItemDetails().execute(
TraceItemDetailsRequest(
meta=RequestMeta(
project_ids=[1],
organization_id=1,
cogs_category="something",
referrer="something",
start_timestamp=start,
end_timestamp=end,
request_id=_REQUEST_ID,
trace_item_type=TraceItemType.TRACE_ITEM_TYPE_SPAN,
),
item_id=item_id,
trace_id=trace_id_for_details,
)
)
attr = next((a for a in res.attributes if a.name == "resource.process.command_args"), None)
assert attr is not None
assert attr.value.WhichOneof("value") == "val_array"
assert [e.val_str for e in attr.value.val_array.values] == ["node", "--enable-source-maps"]

def test_endpoint_on_spans(self, setup_spans_in_db: Any) -> None:
end = Timestamp()
start = Timestamp()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3949,6 +3949,14 @@ def test_select_array_column_returns_val_array(self) -> None:
"cols": AnyValue(
array_value=ArrayValue(values=[AnyValue(int_value=v) for v in [1, 3]])
),
"resource.process.command_args": AnyValue(
array_value=ArrayValue(
values=[
AnyValue(string_value="node"),
AnyValue(string_value="--enable-source-maps"),
]
)
),
},
),
],
Expand All @@ -3967,6 +3975,11 @@ def test_select_array_column_returns_val_array(self) -> None:
Column(key=AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.item_id")),
Column(key=AttributeKey(type=AttributeKey.TYPE_ARRAY, name="tags")),
Column(key=AttributeKey(type=AttributeKey.TYPE_ARRAY, name="cols")),
Column(
key=AttributeKey(
type=AttributeKey.TYPE_ARRAY, name="resource.process.command_args"
)
),
],
)
response = EndpointTraceItemTable().execute(message)
Expand All @@ -3978,6 +3991,12 @@ def test_select_array_column_returns_val_array(self) -> None:
]
assert by_name["cols"].results[0].WhichOneof("value") == "val_array"
assert [e.val_int for e in by_name["cols"].results[0].val_array.values] == [1, 3]
assert (
by_name["resource.process.command_args"].results[0].WhichOneof("value") == "val_array"
)
assert [
e.val_str for e in by_name["resource.process.command_args"].results[0].val_array.values
] == ["node", "--enable-source-maps"]


class TestUtils:
Expand Down
Loading