From 0ae22a158ee38f79461ef3a793c0a0e98b136ab6 Mon Sep 17 00:00:00 2001 From: Harsh Deshwal Date: Mon, 20 Apr 2026 17:00:36 +0530 Subject: [PATCH 1/2] tools/lib/route: dedupe segment file mapping logic --- tools/lib/route.py | 113 ++++++++++---------------- tools/lib/tests/test_route_library.py | 98 +++++++++++++++++++++- 2 files changed, 139 insertions(+), 72 deletions(-) diff --git a/tools/lib/route.py b/tools/lib/route.py index 98334a06c86a6d..bd2680409de8c1 100644 --- a/tools/lib/route.py +++ b/tools/lib/route.py @@ -22,6 +22,15 @@ class FileName: class Route: + _SEGMENT_PATH_FIELDS = ( + (FileName.RLOG, "log_path"), + (FileName.QLOG, "qlog_path"), + (FileName.FCAMERA, "camera_path"), + (FileName.DCAMERA, "dcamera_path"), + (FileName.ECAMERA, "ecamera_path"), + (FileName.QCAMERA, "qcamera_path"), + ) + def __init__(self, name, data_dir=None): self._name = RouteName(name) self.files = None @@ -39,62 +48,58 @@ def name(self): def segments(self): return self._segments + def _paths_for_attr(self, attr_name): + path_by_seg_num = {s.name.segment_num: getattr(s, attr_name) for s in self._segments} + return [path_by_seg_num.get(i, None) for i in range(self.max_seg_number + 1)] + + @classmethod + def _segment_paths_from_files(cls, files, mode="first"): + segment_paths = {} + source = files if mode == "first" else reversed(files) + for path, filename in source: + for file_names, attr in cls._SEGMENT_PATH_FIELDS: + if filename in file_names and attr not in segment_paths: + segment_paths[attr] = path + break + + return tuple(segment_paths.get(attr) for _, attr in cls._SEGMENT_PATH_FIELDS) + + @classmethod + def _segment_from_files(cls, segment_name, files, mode="first"): + return Segment(segment_name, *cls._segment_paths_from_files(files, mode=mode)) + def log_paths(self): - log_path_by_seg_num = {s.name.segment_num: s.log_path for s in self._segments} - return [log_path_by_seg_num.get(i, None) for i in range(self.max_seg_number + 1)] + return self._paths_for_attr("log_path") def qlog_paths(self): - qlog_path_by_seg_num = {s.name.segment_num: s.qlog_path for s in self._segments} - return [qlog_path_by_seg_num.get(i, None) for i in range(self.max_seg_number + 1)] + return self._paths_for_attr("qlog_path") def camera_paths(self): - camera_path_by_seg_num = {s.name.segment_num: s.camera_path for s in self._segments} - return [camera_path_by_seg_num.get(i, None) for i in range(self.max_seg_number + 1)] + return self._paths_for_attr("camera_path") def dcamera_paths(self): - dcamera_path_by_seg_num = {s.name.segment_num: s.dcamera_path for s in self._segments} - return [dcamera_path_by_seg_num.get(i, None) for i in range(self.max_seg_number + 1)] + return self._paths_for_attr("dcamera_path") def ecamera_paths(self): - ecamera_path_by_seg_num = {s.name.segment_num: s.ecamera_path for s in self._segments} - return [ecamera_path_by_seg_num.get(i, None) for i in range(self.max_seg_number + 1)] + return self._paths_for_attr("ecamera_path") def qcamera_paths(self): - qcamera_path_by_seg_num = {s.name.segment_num: s.qcamera_path for s in self._segments} - return [qcamera_path_by_seg_num.get(i, None) for i in range(self.max_seg_number + 1)] + return self._paths_for_attr("qcamera_path") - # TODO: refactor this, it's super repetitive def _get_segments_remote(self): api = CommaApi(get_token()) route_files = api.get('v1/route/' + self.name.canonical_name + '/files') self.files = list(chain.from_iterable(route_files.values())) - segments = {} + segment_files = defaultdict(list) for url in self.files: _, dongle_id, time_str, segment_num, fn = urlparse(url).path.rsplit('/', maxsplit=4) segment_name = f'{dongle_id}|{time_str}--{segment_num}' - if segments.get(segment_name): - segments[segment_name] = Segment( - segment_name, - url if fn in FileName.RLOG else segments[segment_name].log_path, - url if fn in FileName.QLOG else segments[segment_name].qlog_path, - url if fn in FileName.FCAMERA else segments[segment_name].camera_path, - url if fn in FileName.DCAMERA else segments[segment_name].dcamera_path, - url if fn in FileName.ECAMERA else segments[segment_name].ecamera_path, - url if fn in FileName.QCAMERA else segments[segment_name].qcamera_path, - ) - else: - segments[segment_name] = Segment( - segment_name, - url if fn in FileName.RLOG else None, - url if fn in FileName.QLOG else None, - url if fn in FileName.FCAMERA else None, - url if fn in FileName.DCAMERA else None, - url if fn in FileName.ECAMERA else None, - url if fn in FileName.QCAMERA else None, - ) - - return sorted(segments.values(), key=lambda seg: seg.name.segment_num) + segment_files[segment_name].append((url, fn)) + + segments = [self._segment_from_files(segment_name, files, mode="last") + for segment_name, files in segment_files.items()] + return sorted(segments, key=lambda seg: seg.name.segment_num) def _get_segments_local(self, data_dir): files = os.listdir(data_dir) @@ -124,40 +129,8 @@ def _get_segments_local(self, data_dir): for seg_f in os.listdir(os.path.join(fullpath, seg_num)): segment_files[segment_name].append((os.path.join(fullpath, seg_num, seg_f), seg_f)) - segments = [] - for segment, files in segment_files.items(): - - try: - log_path = next(path for path, filename in files if filename in FileName.RLOG) - except StopIteration: - log_path = None - - try: - qlog_path = next(path for path, filename in files if filename in FileName.QLOG) - except StopIteration: - qlog_path = None - - try: - camera_path = next(path for path, filename in files if filename in FileName.FCAMERA) - except StopIteration: - camera_path = None - - try: - dcamera_path = next(path for path, filename in files if filename in FileName.DCAMERA) - except StopIteration: - dcamera_path = None - - try: - ecamera_path = next(path for path, filename in files if filename in FileName.ECAMERA) - except StopIteration: - ecamera_path = None - - try: - qcamera_path = next(path for path, filename in files if filename in FileName.QCAMERA) - except StopIteration: - qcamera_path = None - - segments.append(Segment(segment, log_path, qlog_path, camera_path, dcamera_path, ecamera_path, qcamera_path)) + segments = [self._segment_from_files(segment, files) + for segment, files in segment_files.items()] if len(segments) == 0: raise ValueError(f'Could not find segments for route {self.name.canonical_name} in data directory {data_dir}') diff --git a/tools/lib/tests/test_route_library.py b/tools/lib/tests/test_route_library.py index 491bb81327ccdf..29b222a9bb3474 100644 --- a/tools/lib/tests/test_route_library.py +++ b/tools/lib/tests/test_route_library.py @@ -1,9 +1,20 @@ from collections import namedtuple +import importlib +import sys +from types import ModuleType -from openpilot.tools.lib.route import SegmentName + +def _import_route_module(): + if "openpilot.tools.lib.auth_config" not in sys.modules: + auth_config = ModuleType("openpilot.tools.lib.auth_config") + auth_config.get_token = lambda: "test-token" + sys.modules["openpilot.tools.lib.auth_config"] = auth_config + return importlib.import_module("openpilot.tools.lib.route") class TestRouteLibrary: def test_segment_name_formats(self): + route_lib = _import_route_module() + Case = namedtuple('Case', ['input', 'expected_route', 'expected_segment_num', 'expected_data_dir']) cases = [ Case("a2a0ccea32023010|2023-07-27--13-01-19", "a2a0ccea32023010|2023-07-27--13-01-19", -1, None), @@ -17,7 +28,7 @@ def test_segment_name_formats(self): def _validate(case): route_or_segment_name = case.input - s = SegmentName(route_or_segment_name, allow_route_name=True) + s = route_lib.SegmentName(route_or_segment_name, allow_route_name=True) assert str(s.route_name) == case.expected_route assert s.segment_num == case.expected_segment_num @@ -25,3 +36,86 @@ def _validate(case): for case in cases: _validate(case) + + def test_local_segment_discovery_across_layouts(self, tmp_path): + route_lib = _import_route_module() + + route_name = "a2a0ccea32023010|2023-07-27--13-01-19" + + explorer_rlog = tmp_path / f"{route_name}--0--rlog.zst" + explorer_rlog.write_text("") + + op_segment_dir = tmp_path / f"{route_name}--1" + op_segment_dir.mkdir() + op_qlog = op_segment_dir / "qlog.bz2" + op_qlog.write_text("") + op_fcamera = op_segment_dir / "fcamera.hevc" + op_fcamera.write_text("") + + canonical_segment_dir = tmp_path / route_name / "2" + canonical_segment_dir.mkdir(parents=True) + canonical_dcamera = canonical_segment_dir / "dcamera.hevc" + canonical_dcamera.write_text("") + canonical_ecamera = canonical_segment_dir / "ecamera.hevc" + canonical_ecamera.write_text("") + + route = route_lib.Route(route_name, data_dir=str(tmp_path)) + + assert route.log_paths() == [str(explorer_rlog), None, None] + assert route.qlog_paths() == [None, str(op_qlog), None] + assert route.camera_paths() == [None, str(op_fcamera), None] + assert route.dcamera_paths() == [None, None, str(canonical_dcamera)] + assert route.ecamera_paths() == [None, None, str(canonical_ecamera)] + assert route.qcamera_paths() == [None, None, None] + + def test_remote_file_mapping_preserves_precedence(self, monkeypatch): + route_lib = _import_route_module() + + route_name = "a2a0ccea32023010|2023-07-27--13-01-19" + seg0_rlog_bz2 = "https://example.com/a2a0ccea32023010/2023-07-27--13-01-19/0/rlog.bz2" + seg0_rlog_zst = "https://example.com/a2a0ccea32023010/2023-07-27--13-01-19/0/rlog.zst" + seg1_qlog = "https://example.com/a2a0ccea32023010/2023-07-27--13-01-19/1/qlog.bz2" + seg2_qcamera = "https://example.com/a2a0ccea32023010/2023-07-27--13-01-19/2/qcamera.ts" + + route_files = { + "logs": [seg0_rlog_bz2, seg0_rlog_zst, seg1_qlog], + "cameras": [seg2_qcamera], + } + + class FakeApi: + def __init__(self, token): + assert token == "fake-token" + + def get(self, endpoint): + assert endpoint == f"v1/route/{route_name}/files" + return route_files + + monkeypatch.setattr(route_lib, "get_token", lambda: "fake-token") + monkeypatch.setattr(route_lib, "CommaApi", FakeApi) + + route = route_lib.Route(route_name) + + assert route.log_paths() == [seg0_rlog_zst, None, None] + assert route.qlog_paths() == [None, seg1_qlog, None] + assert route.qcamera_paths() == [None, None, seg2_qcamera] + + def test_sparse_segments_keep_none_holes(self, monkeypatch): + route_lib = _import_route_module() + + route_name = "a2a0ccea32023010|2023-07-27--13-01-19" + seg0_rlog = "https://example.com/a2a0ccea32023010/2023-07-27--13-01-19/0/rlog.zst" + seg2_rlog = "https://example.com/a2a0ccea32023010/2023-07-27--13-01-19/2/rlog.zst" + + class FakeApi: + def __init__(self, token): + assert token == "fake-token" + + def get(self, endpoint): + assert endpoint == f"v1/route/{route_name}/files" + return {"logs": [seg0_rlog, seg2_rlog]} + + monkeypatch.setattr(route_lib, "get_token", lambda: "fake-token") + monkeypatch.setattr(route_lib, "CommaApi", FakeApi) + + route = route_lib.Route(route_name) + assert route.log_paths() == [seg0_rlog, None, seg2_rlog] From e45b2218665ea2a9dc8a0011cd180aba7c0079bc Mon Sep 17 00:00:00 2001 From: Adeeb Shihadeh Date: Thu, 23 Apr 2026 11:50:20 -0700 Subject: [PATCH 2/2] too verbose --- tools/lib/tests/test_route_library.py | 98 +-------------------------- 1 file changed, 2 insertions(+), 96 deletions(-) diff --git a/tools/lib/tests/test_route_library.py b/tools/lib/tests/test_route_library.py index 29b222a9bb3474..491bb81327ccdf 100644 --- a/tools/lib/tests/test_route_library.py +++ b/tools/lib/tests/test_route_library.py @@ -1,20 +1,9 @@ from collections import namedtuple -import importlib -import sys -from types import ModuleType - -def _import_route_module(): - if "openpilot.tools.lib.auth_config" not in sys.modules: - auth_config = ModuleType("openpilot.tools.lib.auth_config") - auth_config.get_token = lambda: "test-token" - sys.modules["openpilot.tools.lib.auth_config"] = auth_config - return importlib.import_module("openpilot.tools.lib.route") +from openpilot.tools.lib.route import SegmentName class TestRouteLibrary: def test_segment_name_formats(self): - route_lib = _import_route_module() - Case = namedtuple('Case', ['input', 'expected_route', 'expected_segment_num', 'expected_data_dir']) cases = [ Case("a2a0ccea32023010|2023-07-27--13-01-19", "a2a0ccea32023010|2023-07-27--13-01-19", -1, None), @@ -28,7 +17,7 @@ def test_segment_name_formats(self): def _validate(case): route_or_segment_name = case.input - s = route_lib.SegmentName(route_or_segment_name, allow_route_name=True) + s = SegmentName(route_or_segment_name, allow_route_name=True) assert str(s.route_name) == case.expected_route assert s.segment_num == case.expected_segment_num @@ -36,86 +25,3 @@ def _validate(case): for case in cases: _validate(case) - - def test_local_segment_discovery_across_layouts(self, tmp_path): - route_lib = _import_route_module() - - route_name = "a2a0ccea32023010|2023-07-27--13-01-19" - - explorer_rlog = tmp_path / f"{route_name}--0--rlog.zst" - explorer_rlog.write_text("") - - op_segment_dir = tmp_path / f"{route_name}--1" - op_segment_dir.mkdir() - op_qlog = op_segment_dir / "qlog.bz2" - op_qlog.write_text("") - op_fcamera = op_segment_dir / "fcamera.hevc" - op_fcamera.write_text("") - - canonical_segment_dir = tmp_path / route_name / "2" - canonical_segment_dir.mkdir(parents=True) - canonical_dcamera = canonical_segment_dir / "dcamera.hevc" - canonical_dcamera.write_text("") - canonical_ecamera = canonical_segment_dir / "ecamera.hevc" - canonical_ecamera.write_text("") - - route = route_lib.Route(route_name, data_dir=str(tmp_path)) - - assert route.log_paths() == [str(explorer_rlog), None, None] - assert route.qlog_paths() == [None, str(op_qlog), None] - assert route.camera_paths() == [None, str(op_fcamera), None] - assert route.dcamera_paths() == [None, None, str(canonical_dcamera)] - assert route.ecamera_paths() == [None, None, str(canonical_ecamera)] - assert route.qcamera_paths() == [None, None, None] - - def test_remote_file_mapping_preserves_precedence(self, monkeypatch): - route_lib = _import_route_module() - - route_name = "a2a0ccea32023010|2023-07-27--13-01-19" - seg0_rlog_bz2 = "https://example.com/a2a0ccea32023010/2023-07-27--13-01-19/0/rlog.bz2" - seg0_rlog_zst = "https://example.com/a2a0ccea32023010/2023-07-27--13-01-19/0/rlog.zst" - seg1_qlog = "https://example.com/a2a0ccea32023010/2023-07-27--13-01-19/1/qlog.bz2" - seg2_qcamera = "https://example.com/a2a0ccea32023010/2023-07-27--13-01-19/2/qcamera.ts" - - route_files = { - "logs": [seg0_rlog_bz2, seg0_rlog_zst, seg1_qlog], - "cameras": [seg2_qcamera], - } - - class FakeApi: - def __init__(self, token): - assert token == "fake-token" - - def get(self, endpoint): - assert endpoint == f"v1/route/{route_name}/files" - return route_files - - monkeypatch.setattr(route_lib, "get_token", lambda: "fake-token") - monkeypatch.setattr(route_lib, "CommaApi", FakeApi) - - route = route_lib.Route(route_name) - - assert route.log_paths() == [seg0_rlog_zst, None, None] - assert route.qlog_paths() == [None, seg1_qlog, None] - assert route.qcamera_paths() == [None, None, seg2_qcamera] - - def test_sparse_segments_keep_none_holes(self, monkeypatch): - route_lib = _import_route_module() - - route_name = "a2a0ccea32023010|2023-07-27--13-01-19" - seg0_rlog = "https://example.com/a2a0ccea32023010/2023-07-27--13-01-19/0/rlog.zst" - seg2_rlog = "https://example.com/a2a0ccea32023010/2023-07-27--13-01-19/2/rlog.zst" - - class FakeApi: - def __init__(self, token): - assert token == "fake-token" - - def get(self, endpoint): - assert endpoint == f"v1/route/{route_name}/files" - return {"logs": [seg0_rlog, seg2_rlog]} - - monkeypatch.setattr(route_lib, "get_token", lambda: "fake-token") - monkeypatch.setattr(route_lib, "CommaApi", FakeApi) - - route = route_lib.Route(route_name) - assert route.log_paths() == [seg0_rlog, None, seg2_rlog]