From f82647c2c94b6e194820962de6cd784792a7cae5 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Thu, 9 Apr 2026 20:15:19 -0700 Subject: [PATCH 01/24] cherry picked changes related to teleop from good-remote-body --- .gitmodules | 1 + cereal/log.capnp | 7 + cereal/services.py | 2 + common/params_keys.h | 1 + selfdrive/pandad/pandad.cc | 15 +- selfdrive/selfdrived/selfdrived.py | 4 +- selfdrive/ui/soundd.py | 12 +- system/athena/athenad.py | 33 +- system/loggerd/encoderd.cc | 72 ++++- system/loggerd/loggerd.h | 6 +- system/manager/process_config.py | 10 +- system/webrtc/device/audio.py | 188 +++++++++++ system/webrtc/device/video.py | 88 +++++- system/webrtc/tests/test_audio.py | 104 ++++++ system/webrtc/tests/test_stream_session.py | 1 - system/webrtc/webrtcd.py | 350 +++++++++++++++++++-- teleoprtc_repo | 2 +- 17 files changed, 838 insertions(+), 58 deletions(-) create mode 100644 system/webrtc/device/audio.py create mode 100644 system/webrtc/tests/test_audio.py diff --git a/.gitmodules b/.gitmodules index ad6530de9ac910..b01ab88806ef28 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,6 +13,7 @@ [submodule "teleoprtc_repo"] path = teleoprtc_repo url = ../../commaai/teleoprtc + branch = fix/datachannel-double-counting [submodule "tinygrad"] path = tinygrad_repo url = https://github.com/tinygrad/tinygrad.git diff --git a/cereal/log.capnp b/cereal/log.capnp index 0f8b5470a44d1a..17e9b1a8646752 100644 --- a/cereal/log.capnp +++ b/cereal/log.capnp @@ -2356,6 +2356,10 @@ struct AudioFeedback { blockNum @1 :UInt16; } +struct SoundRequest { + sound @0 :Car.CarControl.HUDControl.AudibleAlert; +} + struct Touch { sec @0 :Int64; usec @1 :Int64; @@ -2474,6 +2478,9 @@ struct Event { livestreamWideRoadEncodeData @121 :EncodeData; livestreamDriverEncodeData @122 :EncodeData; + soundRequest @151 :SoundRequest; + webrtcAudioData @152 :AudioData; + # *********** Custom: reserved for forks *********** # DO change the name of the field diff --git a/cereal/services.py b/cereal/services.py index c8525c0dd3034e..257cafc98ac456 100755 --- a/cereal/services.py +++ b/cereal/services.py @@ -85,6 +85,8 @@ def __init__(self, should_log: bool, frequency: float, decimation: Optional[int] "rawAudioData": (False, 20.), "bookmarkButton": (True, 0., 1), "audioFeedback": (True, 0., 1), + "soundRequest": (False, 0.), + "webrtcAudioData": (False, 0.), "roadEncodeData": (False, 20., None, QueueSize.BIG), "driverEncodeData": (False, 20., None, QueueSize.BIG), "wideRoadEncodeData": (False, 20., None, QueueSize.BIG), diff --git a/common/params_keys.h b/common/params_keys.h index b81a373d0876ef..883950d0752374 100644 --- a/common/params_keys.h +++ b/common/params_keys.h @@ -78,6 +78,7 @@ inline static std::unordered_map keys = { {"LastUpdateTime", {PERSISTENT, TIME}}, {"LastUpdateUptimeOnroad", {PERSISTENT, FLOAT, "0.0"}}, {"LiveDelay", {PERSISTENT, BYTES}}, + {"LivestreamCamera", {CLEAR_ON_MANAGER_START, STRING, "driver"}}, {"LiveParameters", {PERSISTENT, JSON}}, {"LiveParametersV2", {PERSISTENT, BYTES}}, {"LiveTorqueParameters", {PERSISTENT | DONT_LOG, BYTES}}, diff --git a/selfdrive/pandad/pandad.cc b/selfdrive/pandad/pandad.cc index 28d459f458aed3..f0be7565162cac 100644 --- a/selfdrive/pandad/pandad.cc +++ b/selfdrive/pandad/pandad.cc @@ -340,10 +340,21 @@ void process_peripheral_state(Panda *panda, PubMaster *pm, bool no_fan_control) ir_pwr = 0; } + // turn off IR leds if body + std::string cp_bytes = params.get("CarParams"); + if (cp_bytes.size() > 0) { + AlignedBuffer aligned_buf; + capnp::FlatArrayMessageReader cmsg(aligned_buf.align(cp_bytes.data(), cp_bytes.size())); + cereal::CarParams::Reader CP = cmsg.getRoot(); + if (CP.getNotCar()) { + ir_pwr = 0; + } + } + if (ir_pwr != prev_ir_pwr || sm.frame % 100 == 0) { - int16_t ir_panda = util::map_val(ir_pwr, 0, 100, 0, MAX_IR_PANDA_VAL); + int16_t ir_panda = util::map_val(ir_pwr, 0, 100, 0, MAX_IR_PANDA_VAL); panda->set_ir_pwr(ir_panda); - Hardware::set_ir_power(ir_pwr); + Hardware::set_ir_power(ir_pwr); prev_ir_pwr = ir_pwr; } } diff --git a/selfdrive/selfdrived/selfdrived.py b/selfdrive/selfdrived/selfdrived.py index 6a294ca8d82786..046f54744a9f0b 100755 --- a/selfdrive/selfdrived/selfdrived.py +++ b/selfdrive/selfdrived/selfdrived.py @@ -75,6 +75,8 @@ def __init__(self, CP=None): self.car_state_sock = messaging.sub_sock('carState', timeout=20) ignore = self.sensor_packets + self.gps_packets + ['alertDebug', 'lateralManeuverPlan'] + if self.CP.notCar: + ignore += ['driverMonitoringState'] if SIMULATION: ignore += ['driverCameraState', 'managerState'] if REPLAY: @@ -192,7 +194,7 @@ def update_events(self, CS): if self.CP.notCar: # wait for everything to init first - if self.sm.frame > int(5. / DT_CTRL) and self.initialized: + if self.sm.frame > int(2. / DT_CTRL) and self.initialized: # body always wants to enable self.events.add(EventName.pcmEnable) diff --git a/selfdrive/ui/soundd.py b/selfdrive/ui/soundd.py index 8225efabf9af5a..b2783efebd25c6 100644 --- a/selfdrive/ui/soundd.py +++ b/selfdrive/ui/soundd.py @@ -3,7 +3,6 @@ import time import wave - from cereal import car, messaging from openpilot.common.basedir import BASEDIR from openpilot.common.filter_simple import FirstOrderFilter @@ -116,7 +115,9 @@ def get_sound_data(self, frames): # get "frames" worth of data from the current def callback(self, data_out: np.ndarray, frames: int, time, status) -> None: if status: cloudlog.warning(f"soundd stream over/underflow: {status}") - data_out[:frames, 0] = self.get_sound_data(frames) + sound = self.get_sound_data(frames) + np.clip(sound, -1.0, 1.0, out=sound) + data_out[:frames, 0] = sound def update_alert(self, new_alert): current_alert_played_once = self.current_alert == AudibleAlert.none or self.current_sound_frame > len(self.loaded_sounds[self.current_alert]) @@ -128,6 +129,11 @@ def update_alert(self, new_alert): self.current_sound_frame = 0 def get_audible_alert(self, sm): + if sm.updated['soundRequest']: + new_alert = sm['soundRequest'].sound.raw + if new_alert != AudibleAlert.none: + self.update_alert(new_alert) + if sm.updated['selfdriveState']: new_alert = sm['selfdriveState'].alertSound.raw self.update_alert(new_alert) @@ -153,7 +159,7 @@ def soundd_thread(self): # sounddevice must be imported after forking processes import sounddevice as sd - sm = messaging.SubMaster(['selfdriveState', 'soundPressure']) + sm = messaging.SubMaster(['selfdriveState', 'soundPressure', 'soundRequest']) with self.get_stream(sd) as stream: rk = Ratekeeper(20) diff --git a/system/athena/athenad.py b/system/athena/athenad.py index b52ef21ba63702..6b925fc2d24df3 100755 --- a/system/athena/athenad.py +++ b/system/athena/athenad.py @@ -28,7 +28,7 @@ create_connection) import cereal.messaging as messaging -from cereal import log +from cereal import car, log from cereal.services import SERVICE_LIST from openpilot.common.api import Api, get_key_pair from openpilot.common.utils import CallbackReader, get_upload_stream @@ -44,6 +44,7 @@ ATHENA_HOST = os.getenv('ATHENA_HOST', 'wss://athena.comma.ai') HANDLER_THREADS = int(os.getenv('HANDLER_THREADS', "4")) LOCAL_PORT_WHITELIST = {22, } # SSH +WEBRTCD_PORT = 5001 LOG_ATTR_NAME = 'user.upload' LOG_ATTR_VALUE_MAX_UNIX_TIME = int.to_bytes(2147483647, 4, sys.byteorder) @@ -536,6 +537,16 @@ def getSshAuthorizedKeys() -> str: def getGithubUsername() -> str: return cast(str, Params().get("GithubUsername") or "") + +@dispatcher.add_method +def getNotCar() -> bool: + cp_bytes = Params().get("CarParamsPersistent") + if cp_bytes is not None: + with car.CarParams.from_bytes(cp_bytes) as CP: + return CP.notCar + return False + + @dispatcher.add_method def getSimInfo(): return HARDWARE.get_sim_info() @@ -557,6 +568,26 @@ def getNetworks(): return HARDWARE.get_networks() +@dispatcher.add_method +def startJoystickStream(sdp: str) -> dict: + from openpilot.system.webrtc.webrtcd import StreamRequestBody + body = StreamRequestBody(sdp, ["driver"], ["testJoystick"], ["carState"]) + try: + resp = requests.post(f"http://localhost:{WEBRTCD_PORT}/stream", + json=asdict(body), timeout=10) + if not resp.ok: + try: + error_body = resp.json() + raise Exception(error_body.get("message", f"webrtcd returned {resp.status_code}")) + except ValueError: + resp.raise_for_status() + return resp.json() + except requests.ConnectTimeout: + raise Exception("webrtc took too long to respond. is it on?") from None + except requests.ConnectionError: + raise Exception("webrtc is not running. turn on comma body ignition.") from None + + @dispatcher.add_method def takeSnapshot() -> str | dict[str, str] | None: from openpilot.system.camerad.snapshot import jpeg_write, snapshot diff --git a/system/loggerd/encoderd.cc b/system/loggerd/encoderd.cc index 9d4b81a3f90230..656934f4a3d269 100644 --- a/system/loggerd/encoderd.cc +++ b/system/loggerd/encoderd.cc @@ -151,6 +151,76 @@ void encoderd_thread(const LogCameraInfo (&cameras)[N]) { } } +// Map param value to stream camera config +const LogCameraInfo *find_stream_camera(const std::string &name) { + if (name == "driver") return &stream_driver_camera_info; + return &stream_wide_road_camera_info; // default +} + +void stream_encoderd_thread() { + Params params; + + // Wait for cameras to be available + std::set available_streams; + while (!do_exit) { + available_streams = VisionIpcClient::getAvailableStreams("camerad", false); + if (!available_streams.empty()) break; + util::sleep_for(100); + } + + std::string active_camera = params.get("LivestreamCamera"); + if (active_camera.empty()) active_camera = "driver"; + + while (!do_exit) { + const LogCameraInfo *cam_info = find_stream_camera(active_camera); + + // Check that the requested camera stream is available + if (available_streams.find(cam_info->stream_type) == available_streams.end()) { + LOGE("stream encoder: camera %s not available, falling back", active_camera.c_str()); + active_camera = "wideRoad"; + cam_info = find_stream_camera(active_camera); + } + + VisionIpcClient vipc_client("camerad", cam_info->stream_type, false); + if (!vipc_client.connect(false)) { + util::sleep_for(5); + continue; + } + + const VisionBuf &buf_info = vipc_client.buffers[0]; + LOGW("stream encoder init %s %zux%zu", active_camera.c_str(), buf_info.width, buf_info.height); + assert(buf_info.width > 0 && buf_info.height > 0); + + const auto &encoder_info = cam_info->encoder_infos[0]; + auto encoder = std::make_unique(encoder_info, buf_info.width, buf_info.height); + encoder->encoder_open(); + + while (!do_exit) { + // Check for camera switch request + std::string requested = params.get("LivestreamCamera"); + if (!requested.empty() && requested != active_camera) { + LOGW("stream encoder switching from %s to %s", active_camera.c_str(), requested.c_str()); + active_camera = requested; + break; // break to reinit encoder with new camera + } + + VisionIpcBufExtra extra; + VisionBuf *buf = vipc_client.recv(&extra); + if (buf == nullptr) continue; + + // detect loop around and drop the frames + if (buf->get_frame_id() != extra.frame_id) continue; + + int out_id = encoder->encode_frame(buf, &extra); + if (out_id == -1) { + LOGE("stream encoder: failed to encode frame. frame_id: %d", extra.frame_id); + } + } + + encoder->encoder_close(); + } +} + int main(int argc, char* argv[]) { if (!Hardware::PC()) { int ret; @@ -162,7 +232,7 @@ int main(int argc, char* argv[]) { if (argc > 1) { std::string arg1(argv[1]); if (arg1 == "--stream") { - encoderd_thread(stream_cameras_logged); + stream_encoderd_thread(); } else { LOGE("Argument '%s' is not supported", arg1.c_str()); } diff --git a/system/loggerd/loggerd.h b/system/loggerd/loggerd.h index 6aa0c8be40b96f..22ad2f88b8abf9 100644 --- a/system/loggerd/loggerd.h +++ b/system/loggerd/loggerd.h @@ -47,8 +47,8 @@ struct EncoderSettings { } static EncoderSettings StreamEncoderSettings() { - int _stream_bitrate = getenv("STREAM_BITRATE") ? atoi(getenv("STREAM_BITRATE")) : 1'000'000; - return EncoderSettings{.encode_type = cereal::EncodeIndex::Type::QCAMERA_H264, .bitrate = _stream_bitrate , .gop_size = 15}; + int _stream_bitrate = getenv("STREAM_BITRATE") ? atoi(getenv("STREAM_BITRATE")) : 4'000'000; + return EncoderSettings{.encode_type = cereal::EncodeIndex::Type::QCAMERA_H264, .bitrate = _stream_bitrate , .gop_size = 5}; } }; @@ -169,4 +169,4 @@ const LogCameraInfo stream_driver_camera_info{ }; const LogCameraInfo cameras_logged[] = {road_camera_info, wide_road_camera_info, driver_camera_info}; -const LogCameraInfo stream_cameras_logged[] = {stream_road_camera_info, stream_wide_road_camera_info, stream_driver_camera_info}; +const LogCameraInfo stream_cameras_logged[] = {stream_wide_road_camera_info, stream_driver_camera_info}; diff --git a/system/manager/process_config.py b/system/manager/process_config.py index 7e96b7776a4f5f..1ffa22be3f0906 100644 --- a/system/manager/process_config.py +++ b/system/manager/process_config.py @@ -76,11 +76,11 @@ def and_(*fns): PythonProcess("webcamerad", "tools.webcam.camerad", driverview, enabled=WEBCAM), PythonProcess("proclogd", "system.proclogd", only_onroad, enabled=platform.system() != "Darwin"), PythonProcess("journald", "system.journald", only_onroad, platform.system() != "Darwin"), - PythonProcess("micd", "system.micd", iscar), + PythonProcess("micd", "system.micd", only_onroad), PythonProcess("timed", "system.timed", always_run, enabled=not PC), PythonProcess("modeld", "selfdrive.modeld.modeld", only_onroad), - PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", driverview, enabled=(WEBCAM or not PC)), + PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", and_(driverview, iscar), enabled=(WEBCAM or not PC)), PythonProcess("sensord", "system.sensord.sensord", only_onroad, enabled=not PC), PythonProcess("ui", "selfdrive.ui.ui", always_run, restart_if_crash=True), @@ -94,7 +94,7 @@ def and_(*fns): PythonProcess("selfdrived", "selfdrive.selfdrived.selfdrived", only_onroad), PythonProcess("card", "selfdrive.car.card", only_onroad), PythonProcess("deleter", "system.loggerd.deleter", always_run), - PythonProcess("dmonitoringd", "selfdrive.monitoring.dmonitoringd", driverview, enabled=(WEBCAM or not PC)), + PythonProcess("dmonitoringd", "selfdrive.monitoring.dmonitoringd", and_(driverview, iscar), enabled=(WEBCAM or not PC)), PythonProcess("qcomgpsd", "system.qcomgpsd.qcomgpsd", qcomgps, enabled=TICI), PythonProcess("pandad", "selfdrive.pandad.pandad", always_run), PythonProcess("paramsd", "selfdrive.locationd.paramsd", only_onroad), @@ -112,10 +112,10 @@ def and_(*fns): PythonProcess("statsd", "system.statsd", always_run), PythonProcess("feedbackd", "selfdrive.ui.feedback.feedbackd", only_onroad), + PythonProcess("webrtcd", "system.webrtc.webrtcd", notcar), + # debug procs NativeProcess("bridge", "cereal/messaging", ["./bridge"], notcar), - PythonProcess("webrtcd", "system.webrtc.webrtcd", notcar), - PythonProcess("webjoystick", "tools.bodyteleop.web", notcar), PythonProcess("joystick", "tools.joystick.joystick_control", and_(joystick, iscar)), ] diff --git a/system/webrtc/device/audio.py b/system/webrtc/device/audio.py new file mode 100644 index 00000000000000..440a5f4af6953a --- /dev/null +++ b/system/webrtc/device/audio.py @@ -0,0 +1,188 @@ +import asyncio +import fractions +import logging +import threading +import time +from collections import deque + +import numpy as np +from av import AudioFrame, AudioResampler +from aiortc.mediastreams import AudioStreamTrack, MediaStreamError + +from cereal import car, messaging + +AUDIO_PTIME = 0.020 +MIC_SAMPLE_RATE = 16000 +SPEAKER_SAMPLE_RATE = 48000 +LOGGER = logging.getLogger("webrtcd") + +AudibleAlert = car.CarControl.HUDControl.AudibleAlert +BODY_SOUND_ALERTS = { + "engage": AudibleAlert.engage, + "disengage": AudibleAlert.disengage, + "prompt": AudibleAlert.prompt, + "warning": AudibleAlert.warningImmediate, +} +BODY_SOUND_NAMES = frozenset(BODY_SOUND_ALERTS) + + +class PcmBuffer: + def __init__(self, dtype=np.int16): + self._chunks: deque[np.ndarray] = deque() + self._offset = 0 + self._size = 0 + self._dtype = dtype + + def push(self, samples: np.ndarray): + if samples.size == 0: + return + chunk = np.ascontiguousarray(samples, dtype=self._dtype) + self._chunks.append(chunk) + self._size += chunk.size + + def available(self) -> int: + return self._size + + def pop(self, size: int) -> np.ndarray: + out = np.zeros(size, dtype=self._dtype) + written = 0 + + while written < size and self._chunks: + chunk = self._chunks[0] + remaining = chunk.size - self._offset + take = min(size - written, remaining) + out[written:written + take] = chunk[self._offset:self._offset + take] + written += take + self._offset += take + + if self._offset >= chunk.size: + self._chunks.popleft() + self._offset = 0 + + self._size -= written + return out + + +class DeviceToWebAudioTrack(AudioStreamTrack): + def __init__(self): + super().__init__() + self._loop = asyncio.get_running_loop() + self._buffer = PcmBuffer() + self._buffer_event = asyncio.Event() + self._sample_rate = MIC_SAMPLE_RATE + self._samples_per_frame = int(self._sample_rate * AUDIO_PTIME) + self._time_base = fractions.Fraction(1, self._sample_rate) + self._running = True + self._thread = threading.Thread(target=self._poll_cereal, daemon=True) + self._thread.start() + + def _push_samples(self, samples: np.ndarray): + self._buffer.push(samples) + self._buffer_event.set() + + def _poll_cereal(self): + sm = messaging.SubMaster(['rawAudioData']) + while self._running: + sm.update(20) + if not sm.updated['rawAudioData']: + continue + + raw_bytes = sm['rawAudioData'].data + if not raw_bytes: + continue + + # .copy() required: frombuffer is a view over the cereal message buffer, invalidated by next sm.update() + samples = np.frombuffer(raw_bytes, dtype=np.int16).copy() + self._loop.call_soon_threadsafe(self._push_samples, samples) + + async def _next_frame_samples(self) -> np.ndarray: + while self.readyState == "live": + if self._buffer.available() >= self._samples_per_frame: + return self._buffer.pop(self._samples_per_frame) + + await self._buffer_event.wait() + self._buffer_event.clear() + + raise MediaStreamError + + async def _next_timestamp(self) -> int: + if not hasattr(self, "_timestamp"): + self._start = time.monotonic() + self._timestamp = 0 + return self._timestamp + + self._timestamp += self._samples_per_frame + wait = self._start + (self._timestamp / self._sample_rate) - time.monotonic() + if wait > 0: + await asyncio.sleep(wait) + return self._timestamp + + async def recv(self): + if self.readyState != "live": + raise MediaStreamError + + frame_samples = await self._next_frame_samples() + timestamp = await self._next_timestamp() + + frame = AudioFrame(format="s16", layout="mono", samples=self._samples_per_frame) + frame.planes[0].update(frame_samples.tobytes()) + frame.pts = timestamp + frame.sample_rate = self._sample_rate + frame.time_base = self._time_base + return frame + + def stop(self): + super().stop() + self._running = False + try: + self._loop.call_soon_threadsafe(self._buffer_event.set) + except RuntimeError: + self._buffer_event.set() + + +class WebToDeviceAudioTrack: + def __init__(self): + self._pm = messaging.PubMaster(['soundRequest', 'webrtcAudioData']) + self._task: asyncio.Task | None = None + + def play_sound(self, sound_name: str): + msg = messaging.new_message('soundRequest') + msg.soundRequest.sound = BODY_SOUND_ALERTS[sound_name] + self._pm.send('soundRequest', msg) + + def start_track(self, track): + self._task = asyncio.create_task(self._consume_track(track)) + + def _send_audio_data(self, data: bytes): + msg = messaging.new_message('webrtcAudioData') + msg.webrtcAudioData.data = data + msg.webrtcAudioData.sampleRate = SPEAKER_SAMPLE_RATE + self._pm.send('webrtcAudioData', msg) + + async def _consume_track(self, track): + resampler = AudioResampler(format='s16', layout='mono', rate=SPEAKER_SAMPLE_RATE) + try: + while True: + frame = await track.recv() + for resampled in resampler.resample(frame): + self._send_audio_data(resampled.planes[0].to_bytes()) + except MediaStreamError: + LOGGER.info("Incoming browser audio track ended") + except asyncio.CancelledError: + raise + except Exception: + LOGGER.exception("BodySpeaker track consumption error") + + async def stop(self): + if self._task is not None and not self._task.done(): + self._task.cancel() + try: + await self._task + except asyncio.CancelledError: + pass + self._task = None + + +# Backwards-compatible aliases while older call sites are updated. +BodyMicAudioTrack = DeviceToWebAudioTrack +BodySpeaker = WebToDeviceAudioTrack diff --git a/system/webrtc/device/video.py b/system/webrtc/device/video.py index 50feab4f4a910d..d1a5e60885c287 100644 --- a/system/webrtc/device/video.py +++ b/system/webrtc/device/video.py @@ -1,4 +1,5 @@ import asyncio +import struct import time import av @@ -7,38 +8,95 @@ from cereal import messaging from openpilot.common.realtime import DT_MDL, DT_DMON +# arbitrary 16-byte UUID identifying openpilot frame-timing SEI messages +TIMING_SEI_UUID = bytes([ + 0xa5, 0xe0, 0xc4, 0xa4, 0x5b, 0x6e, 0x4e, 0x1e, + 0x9c, 0x7e, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, +]) + + +def _escape_rbsp(data: bytes) -> bytearray: + """ + Prevents frame bytes that might escape into NAL start code. + Adds 0x03 after two consecutive 0x00 0x00 to escape this. + """ + out = bytearray() + zeros = 0 + for b in data: + if zeros >= 2 and b <= 3: + out.append(3) + zeros = 0 + zeros = zeros + 1 if b == 0 else 0 + out.append(b) + return out + + +def create_timing_sei(capture_ms: float, encode_ms: float, send_delay_ms: float, send_wall_ms: float) -> bytes: + """Build an H.264 SEI NAL (user_data_unregistered) carrying frame timing.""" + ts_data = struct.pack('>4d', capture_ms, encode_ms, send_delay_ms, send_wall_ms) + sei_payload = TIMING_SEI_UUID + ts_data # 16 + 32 = 48 bytes + + # payload_type=5, payload_size=48, then RBSP stop bit + rbsp = bytes([5, len(sei_payload)]) + sei_payload + bytes([0x80]) + escaped = _escape_rbsp(rbsp) + + # start-code (4 bytes) + NAL header (forbidden=0, ref_idc=0, type=6 SEI) + return b'\x00\x00\x00\x01\x06' + bytes(escaped) + class LiveStreamVideoStreamTrack(TiciVideoStreamTrack): - camera_to_sock_mapping = { - "driver": "livestreamDriverEncodeData", - "wideRoad": "livestreamWideRoadEncodeData", - "road": "livestreamRoadEncodeData", + camera_config = { + "driver": (DT_DMON, "livestreamDriverEncodeData"), + "wideRoad": (DT_MDL, "livestreamWideRoadEncodeData"), } def __init__(self, camera_type: str): - dt = DT_DMON if camera_type == "driver" else DT_MDL + dt, _ = self.camera_config[camera_type] super().__init__(camera_type, dt) - self._sock = messaging.sub_sock(self.camera_to_sock_mapping[camera_type], conflate=True) - self._pts = 0 + self._camera_type = "" + self._sock = None + self._set_camera(camera_type) self._t0_ns = time.monotonic_ns() + self.timing_sei_enabled = False - async def recv(self): + def _set_camera(self, camera_type: str): + self._camera_type = camera_type + _, sock_name = self.camera_config[camera_type] + self._sock = messaging.sub_sock(sock_name, conflate=True) + + def switch_camera(self, camera_type: str): + if camera_type not in self.camera_config or camera_type == self._camera_type: + return + self._set_camera(camera_type) + + async def _recv_message(self): while True: msg = messaging.recv_one_or_none(self._sock) if msg is not None: - break + return msg await asyncio.sleep(0.005) - evta = getattr(msg, msg.which()) + def _build_frame_data(self, msg) -> bytes: + encode_data = getattr(msg, msg.which()) + if not self.timing_sei_enabled: + return encode_data.header + encode_data.data - packet = av.Packet(evta.header + evta.data) - packet.time_base = self._time_base + capture_ms = (encode_data.idx.timestampEof - encode_data.idx.timestampSof) / 1e6 + encode_ms = (msg.logMonoTime - encode_data.idx.timestampEof) / 1e6 + send_delay_ms = (time.monotonic_ns() - msg.logMonoTime) / 1e6 + send_wall_ms = time.time() * 1000 # noqa: TID251 + sei_nal = create_timing_sei(capture_ms, encode_ms, send_delay_ms, send_wall_ms) + return encode_data.header + sei_nal + encode_data.data - self._pts = ((time.monotonic_ns() - self._t0_ns) * self._clock_rate) // 1_000_000_000 - packet.pts = self._pts - self.log_debug("track sending frame %d", self._pts) + async def recv(self): + msg = await self._recv_message() + packet = av.Packet(self._build_frame_data(msg)) + packet.time_base = self._time_base + pts = ((time.monotonic_ns() - self._t0_ns) * self._clock_rate) // 1_000_000_000 + packet.pts = pts + self.log_debug("track sending frame %d", pts) return packet def codec_preference(self) -> str | None: diff --git a/system/webrtc/tests/test_audio.py b/system/webrtc/tests/test_audio.py new file mode 100644 index 00000000000000..760c02929af12e --- /dev/null +++ b/system/webrtc/tests/test_audio.py @@ -0,0 +1,104 @@ +import asyncio +import math +import time +from types import SimpleNamespace + +import numpy as np +import pytest +from aiortc.mediastreams import VideoStreamTrack + +from openpilot.system.webrtc.device import audio as audio_module +from openpilot.system.webrtc.webrtcd import StreamSession + + +AUDIO_RECVONLY_OFFER_SDP = """v=0 +o=- 3910210904 3910210904 IN IP4 0.0.0.0 +s=- +t=0 0 +a=group:BUNDLE 0 +a=msid-semantic:WMS * +m=audio 9 UDP/TLS/RTP/SAVPF 96 0 8 +c=IN IP4 0.0.0.0 +a=recvonly +a=extmap:1 urn:ietf:params:rtp-hdrext:sdes:mid +a=extmap:2 urn:ietf:params:rtp-hdrext:ssrc-audio-level +a=mid:0 +a=msid:eb1d3f1a-569a-465f-b419-319477bfded6 e44eecb2-1a04-4547-97d8-481389f50d5b +a=rtcp:9 IN IP4 0.0.0.0 +a=rtcp-mux +a=ssrc:1233332626 cname:ca4dede8-4994-4a6d-9ae3-923b28177ca5 +a=rtpmap:96 opus/48000/2 +a=rtpmap:0 PCMU/8000 +a=rtpmap:8 PCMA/8000 +a=ice-ufrag:1234 +a=ice-pwd:1234 +a=fingerprint:sha-256 40:4B:14:CF:70:B8:67:E1:B1:FF:7E:F9:22:6E:60:7D:73:B5:1E:38:4B:10:20:9C:CD:1C:47:02:52:ED:45:25 +a=setup:actpass""" + + +def tone_chunk(samples: int = 800, sample_rate: int = audio_module.MIC_SAMPLE_RATE) -> bytes: + t = np.arange(samples, dtype=np.float32) / sample_rate + pcm = (0.4 * np.sin(2 * math.pi * 440.0 * t) * 32767).astype(np.int16) + return pcm.tobytes() + + +class FakeSubMaster: + def __init__(self, payload: bytes): + self.updated = {'rawAudioData': False} + self._payload = payload + self._msg = SimpleNamespace(data=b'', sampleRate=audio_module.MIC_SAMPLE_RATE) + + def update(self, timeout: int): + time.sleep(0.005) + self.updated['rawAudioData'] = True + self._msg.data = self._payload + + def __getitem__(self, key: str): + assert key == 'rawAudioData' + return self._msg + + +async def wait_for_buffer(track: audio_module.DeviceToWebAudioTrack, timeout: float = 1.0): + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + if track._buffer.available() >= track._samples_per_frame: + return + await asyncio.sleep(0.01) + raise TimeoutError("audio track did not buffer a full frame") + + +@pytest.mark.asyncio +async def test_device_to_web_audio_track_reads_raw_audio(monkeypatch): + payload = tone_chunk() + monkeypatch.setattr(audio_module.messaging, "SubMaster", lambda services: FakeSubMaster(payload)) + + track = audio_module.DeviceToWebAudioTrack() + try: + await wait_for_buffer(track) + frame = await asyncio.wait_for(track.recv(), timeout=1) + finally: + track.stop() + track._thread.join(timeout=1) + + pcm = frame.to_ndarray() + assert frame.sample_rate == audio_module.MIC_SAMPLE_RATE + assert frame.samples == int(audio_module.MIC_SAMPLE_RATE * audio_module.AUDIO_PTIME) + assert pcm.shape[-1] == frame.samples + assert np.abs(pcm).sum() > 0 + + +@pytest.mark.asyncio +async def test_stream_session_uses_device_to_web_audio_track(monkeypatch): + payload = tone_chunk() + monkeypatch.setattr(audio_module.messaging, "SubMaster", lambda services: FakeSubMaster(payload)) + monkeypatch.setattr("openpilot.system.webrtc.device.video.LiveStreamVideoStreamTrack", lambda camera_type: VideoStreamTrack()) + monkeypatch.setattr("openpilot.system.webrtc.webrtcd.Params", lambda: SimpleNamespace(get=lambda key: None)) + + session = StreamSession(AUDIO_RECVONLY_OFFER_SDP, [], [], [], audio_output=None, debug_mode=False) + try: + assert isinstance(session.outgoing_audio_track, audio_module.DeviceToWebAudioTrack) + finally: + if session.outgoing_audio_track is not None: + session.outgoing_audio_track.stop() + session.outgoing_audio_track._thread.join(timeout=1) + await session.stream.stop() diff --git a/system/webrtc/tests/test_stream_session.py b/system/webrtc/tests/test_stream_session.py index f44d217d58ced6..21122855a6ba67 100644 --- a/system/webrtc/tests/test_stream_session.py +++ b/system/webrtc/tests/test_stream_session.py @@ -84,4 +84,3 @@ def test_livestream_track(self, mocker): start_pts = packet.pts assert abs(i + packet.pts - (start_pts + (((time.monotonic_ns() - start_ns) * VIDEO_CLOCK_RATE) // 1_000_000_000))) < 450 #5ms assert packet.size == 0 - diff --git a/system/webrtc/webrtcd.py b/system/webrtc/webrtcd.py index d2c90cafb5b2e6..5b45588f88e4dc 100755 --- a/system/webrtc/webrtcd.py +++ b/system/webrtc/webrtcd.py @@ -3,8 +3,11 @@ import argparse import asyncio import json -import uuid import logging +import os +import ssl +import subprocess +import uuid from dataclasses import dataclass, field from typing import Any, TYPE_CHECKING @@ -20,6 +23,7 @@ from openpilot.system.webrtc.schema import generate_field from cereal import messaging, log +from openpilot.common.params import Params class CerealOutgoingMessageProxy: @@ -115,21 +119,55 @@ async def add_services_if_needed(self, services): self.sock[service] = messaging.pub_sock(service) +def validate_body_sound_name(sound_name: Any) -> str: + from openpilot.system.webrtc.device.audio import BODY_SOUND_NAMES + + if sound_name not in BODY_SOUND_NAMES: + raise ValueError(f"unsupported body sound: {sound_name}") + return sound_name + + class StreamSession: shared_pub_master = DynamicPubMaster([]) - def __init__(self, sdp: str, cameras: list[str], incoming_services: list[str], outgoing_services: list[str], debug_mode: bool = False): - from aiortc.mediastreams import VideoStreamTrack + def __init__(self, sdp: str, cameras: list[str], incoming_services: list[str], outgoing_services: list[str], + audio_output=None, debug_mode: bool = False): + from aiortc.mediastreams import AudioStreamTrack, VideoStreamTrack + from openpilot.system.webrtc.device.audio import DeviceToWebAudioTrack, WebToDeviceAudioTrack from openpilot.system.webrtc.device.video import LiveStreamVideoStreamTrack from teleoprtc import WebRTCAnswerBuilder from teleoprtc.info import parse_info_from_offer + self.logger = logging.getLogger("webrtcd") config = parse_info_from_offer(sdp) builder = WebRTCAnswerBuilder(sdp) - assert len(cameras) == config.n_expected_camera_tracks, "Incoming stream has misconfigured number of video tracks" - for cam in cameras: - builder.add_video_stream(cam, LiveStreamVideoStreamTrack(cam) if not debug_mode else VideoStreamTrack()) + # Use the camera the encoder is currently active on, so reconnects don't get a blank stream + active_camera = Params().get("LivestreamCamera") or "driver" + if active_camera not in ("driver", "wideRoad"): + active_camera = "driver" + self.video_track = LiveStreamVideoStreamTrack(active_camera) if not debug_mode else VideoStreamTrack() + builder.add_video_stream(active_camera, self.video_track) + + self.outgoing_audio_track: DeviceToWebAudioTrack | None = None + if config.expected_audio_track: + try: + if debug_mode: + builder.add_audio_stream(AudioStreamTrack()) + else: + self.outgoing_audio_track = DeviceToWebAudioTrack() + builder.add_audio_stream(self.outgoing_audio_track) + except Exception: + self.logger.exception("Failed to initialize body microphone track") + + self.audio_output: WebToDeviceAudioTrack | None = audio_output if (config.incoming_audio_track or config.incoming_datachannel) else None + if self.audio_output is None and (config.incoming_audio_track or config.incoming_datachannel): + try: + self.audio_output = WebToDeviceAudioTrack() + except Exception: + self.logger.exception("Failed to initialize body speaker output") + if config.incoming_audio_track: + builder.offer_to_receive_audio_stream() self.stream = builder.stream() self.identifier = str(uuid.uuid4()) @@ -145,54 +183,125 @@ def __init__(self, sdp: str, cameras: list[str], incoming_services: list[str], o self.outgoing_bridge_runner = CerealProxyRunner(self.outgoing_bridge) self.run_task: asyncio.Task | None = None - self.logger = logging.getLogger("webrtcd") - self.logger.info("New stream session (%s), cameras %s, incoming services %s, outgoing services %s", - self.identifier, cameras, incoming_services, outgoing_services) + self.cleaned_up = False + self.logger.info( + "New stream session (%s), cameras %s, incoming services %s, outgoing services %s, send audio %s, receive audio %s", + self.identifier, cameras, incoming_services, outgoing_services, config.expected_audio_track, config.incoming_audio_track, + ) def start(self): self.run_task = asyncio.create_task(self.run()) def stop(self): - if self.run_task.done(): + if self.run_task is None or self.run_task.done(): return self.run_task.cancel() self.run_task = None - asyncio.run(self.post_run_cleanup()) + try: + loop = asyncio.get_running_loop() + except RuntimeError: + asyncio.run(self.post_run_cleanup()) + else: + loop.create_task(self.post_run_cleanup()) async def get_answer(self): return await self.stream.start() + def _handle_clock_sync(self, payload: dict): + import time as _time + data = payload.get("data", {}) + if data.get("action") != "ping": + return + pong = json.dumps({ + "type": "clockSync", + "data": { + "action": "pong", + "browserSendTime": data.get("browserSendTime"), + "deviceTime": _time.time() * 1000, # noqa: TID251 + } + }) + try: + if self.stream.has_messaging_channel(): + self.stream.get_messaging_channel().send(pong) + except Exception: + self.logger.warning("Failed to send clockSync pong") + + def _handle_switch_camera(self, payload: dict): + data = payload.get("data") + if not isinstance(data, dict): + return + camera = data.get("camera") + if camera in ("driver", "wideRoad"): + try: + Params().put("LivestreamCamera", camera) + except Exception: + self.logger.warning("Failed to write LivestreamCamera param") + if hasattr(self, 'video_track') and hasattr(self.video_track, 'switch_camera'): + self.video_track.switch_camera(camera) + self.logger.info("Switched livestream camera to %s", camera) + async def message_handler(self, message: bytes): - assert self.incoming_bridge is not None try: - self.incoming_bridge.send(message) + payload = json.loads(message) if isinstance(message, (bytes, str)) else None + if isinstance(payload, dict) and payload.get("type") == "switchCamera": + self._handle_switch_camera(payload) + return + if isinstance(payload, dict) and payload.get("type") == "clockSync": + self._handle_clock_sync(payload) + return + if isinstance(payload, dict) and payload.get("type") == "enableTimingSei": + enabled = bool(payload.get("data", {}).get("enabled")) + if hasattr(self.video_track, 'timing_sei_enabled'): + self.video_track.timing_sei_enabled = enabled + self.logger.info("Timing SEI %s", "enabled" if enabled else "disabled") + return + if self.incoming_bridge is not None: + self.incoming_bridge.send(message) except Exception: self.logger.exception("Cereal incoming proxy failure") async def run(self): try: await self.stream.wait_for_connection() + if self.audio_output is not None and self.stream.has_incoming_audio_track(): + self.audio_output.start_track(self.stream.get_incoming_audio_track()) if self.stream.has_messaging_channel(): if self.incoming_bridge is not None: await self.shared_pub_master.add_services_if_needed(self.incoming_bridge_services) - self.stream.set_message_handler(self.message_handler) + self.stream.set_message_handler(self.message_handler) if self.outgoing_bridge_runner is not None: channel = self.stream.get_messaging_channel() self.outgoing_bridge_runner.proxy.add_channel(channel) self.outgoing_bridge_runner.start() + # Tell the client which camera is currently active + if self.stream.has_messaging_channel(): + try: + active = getattr(self.video_track, '_camera_type', 'driver') + self.stream.get_messaging_channel().send(json.dumps({"type": "activeCamera", "data": {"camera": active}})) + except Exception: + pass self.logger.info("Stream session (%s) connected", self.identifier) await self.stream.wait_for_disconnection() - await self.post_run_cleanup() self.logger.info("Stream session (%s) ended", self.identifier) except Exception: self.logger.exception("Stream session failure") + finally: + await self.post_run_cleanup() async def post_run_cleanup(self): + if self.cleaned_up: + return + self.cleaned_up = True await self.stream.stop() if self.outgoing_bridge is not None: self.outgoing_bridge_runner.stop() + if self.outgoing_audio_track is not None: + self.outgoing_audio_track.stop() + if self.audio_output is not None: + await self.audio_output.stop() + Params().put_bool("JoystickDebugMode", False) @dataclass @@ -203,18 +312,120 @@ class StreamRequestBody: bridge_services_out: list[str] = field(default_factory=list) +@dataclass +class SoundRequestBody: + sound: str + +def _add_cors_headers(request: 'web.Request', response: 'web.Response'): + response.headers["Access-Control-Allow-Origin"] = "*" + response.headers["Access-Control-Allow-Headers"] = "Content-Type" + response.headers["Access-Control-Allow-Methods"] = "POST, OPTIONS" + response.headers["Access-Control-Allow-Private-Network"] = "true" + + +@web.middleware +async def cors_middleware(request: 'web.Request', handler): + try: + response = await handler(request) + except web.HTTPException as ex: + _add_cors_headers(request, ex) + raise + _add_cors_headers(request, response) + return response + + +async def stream_options(request: 'web.Request'): + response = web.Response() + _add_cors_headers(request, response) + return response + + +REQUIRED_VIDEO_CODEC = "H264" + +def _validate_sdp_video_codecs(sdp: str): + import aiortc.sdp + desc = aiortc.sdp.SessionDescription.parse(sdp) + required_mime = f"video/{REQUIRED_VIDEO_CODEC}" + for m in desc.media: + if m.kind != "video": + continue + offered_mimes = {c.mimeType for c in m.rtp.codecs} + if required_mime not in offered_mimes: + raise web.HTTPBadRequest( + text=json.dumps({"error": "unsupported_codec", "message": f"Frontend must offer {REQUIRED_VIDEO_CODEC} via setCodecPreferences()"}), + content_type="application/json", + ) + + +def _cleanup_stale_streams(stream_dict: dict): + stale = [sid for sid, s in stream_dict.items() if s.run_task is None or s.run_task.done()] + for sid in stale: + del stream_dict[sid] + + +def _get_active_streams(stream_dict: dict) -> list[str]: + return [sid for sid, s in stream_dict.items() if s.run_task is not None and not s.run_task.done()] + + async def get_stream(request: 'web.Request'): - stream_dict, debug_mode = request.app['streams'], request.app['debug'] - raw_body = await request.json() - body = StreamRequestBody(**raw_body) + logger = logging.getLogger("webrtcd") + try: + stream_dict, debug_mode = request.app['streams'], request.app['debug'] + + _cleanup_stale_streams(stream_dict) + + active_streams = _get_active_streams(stream_dict) + if active_streams: + raise web.HTTPConflict( + text=json.dumps({"error": "already_connected", "message": "Another device is already connected to the stream"}), + content_type="application/json", + ) + + raw_body = await request.json() + body = StreamRequestBody(**raw_body) + _validate_sdp_video_codecs(body.sdp) + + session = StreamSession(body.sdp, body.cameras, body.bridge_services_in, body.bridge_services_out, + request.app['body_audio_output'], debug_mode) + answer = await session.get_answer() + session.start() + Params().put_bool("JoystickDebugMode", True) + + stream_dict[session.identifier] = session - session = StreamSession(body.sdp, body.cameras, body.bridge_services_in, body.bridge_services_out, debug_mode) - answer = await session.get_answer() - session.start() + active_camera = getattr(session.video_track, '_camera_type', 'driver') + response = web.json_response({"sdp": answer.sdp, "type": answer.type, "activeCamera": active_camera}) + _add_cors_headers(request, response) + return response + except web.HTTPException: + raise + except Exception: + logger.exception("Error in /stream handler") + raise - stream_dict[session.identifier] = session - return web.json_response({"sdp": answer.sdp, "type": answer.type}) +async def post_sound(request: 'web.Request'): + try: + raw_body = await request.json() + body = SoundRequestBody(**raw_body) + sound_name = validate_body_sound_name(body.sound) + except web.HTTPException: + raise + except (TypeError, ValueError, json.JSONDecodeError) as err: + raise web.HTTPBadRequest( + text=json.dumps({"error": "invalid_sound", "message": str(err)}), + content_type="application/json", + ) from err + + audio_output = request.app['body_audio_output'] + if audio_output is None: + raise web.HTTPServiceUnavailable( + text=json.dumps({"error": "audio_unavailable", "message": "Body audio output is unavailable"}), + content_type="application/json", + ) + + audio_output.play_sound(sound_name) + return web.Response(status=200, text="OK") async def get_schema(request: 'web.Request'): @@ -224,6 +435,32 @@ async def get_schema(request: 'web.Request'): schema_dict = {s: generate_field(log.Event.schema.fields[s]) for s in services} return web.json_response(schema_dict) +TRUST_HTML = """ +comma body + +
+

SSL Certificate Accepted

+

You can close this tab and return to the connect app.

+ +
""" + + +async def get_trust(request: 'web.Request'): + return web.Response(content_type="text/html", text=TRUST_HTML) + + async def post_notify(request: 'web.Request'): try: payload = await request.json() @@ -242,25 +479,88 @@ async def post_notify(request: 'web.Request'): async def on_shutdown(app: 'web.Application'): for session in app['streams'].values(): session.stop() + if app.get('body_audio_output') is not None: + await app['body_audio_output'].stop() del app['streams'] +CERT_PATH = "/data/webrtc_cert.pem" +KEY_PATH = "/data/webrtc_key.pem" + + +def create_ssl_cert(): + logger = logging.getLogger("webrtcd") + try: + proc = subprocess.run( + f'openssl req -x509 -newkey rsa:4096 -nodes -out {CERT_PATH} -keyout {KEY_PATH} ' # noqa: ISC002 + f'-days 365 -subj "/C=US/ST=California/O=commaai/OU=comma body"', + capture_output=True, shell=True, + ) + proc.check_returncode() + except subprocess.CalledProcessError as ex: + raise ValueError(f"Error creating SSL certificate:\n[stdout]\n{proc.stdout.decode()}\n[stderr]\n{proc.stderr.decode()}") from ex + logger.info("SSL certificate created") + + +def create_ssl_context(): + logger = logging.getLogger("webrtcd") + if not os.path.exists(CERT_PATH) or not os.path.exists(KEY_PATH): + logger.info("Creating SSL certificate...") + create_ssl_cert() + else: + logger.info("SSL certificate exists") + ssl_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + ssl_ctx.load_cert_chain(CERT_PATH, KEY_PATH) + return ssl_ctx + + def webrtcd_thread(host: str, port: int, debug: bool): + from openpilot.system.webrtc.device.audio import WebToDeviceAudioTrack + logging.basicConfig(level=logging.CRITICAL, handlers=[logging.StreamHandler()]) logging_level = logging.DEBUG if debug else logging.INFO logging.getLogger("WebRTCStream").setLevel(logging_level) logging.getLogger("webrtcd").setLevel(logging_level) - app = web.Application() + logger = logging.getLogger("webrtcd") + app = web.Application(middlewares=[cors_middleware]) app['streams'] = dict() app['debug'] = debug + try: + app['body_audio_output'] = WebToDeviceAudioTrack() + except Exception: + logger.exception("Failed to initialize shared body audio output") + app['body_audio_output'] = None app.on_shutdown.append(on_shutdown) + app.router.add_route("OPTIONS", "/stream", stream_options) + app.router.add_route("OPTIONS", "/sound", stream_options) app.router.add_post("/stream", get_stream) + app.router.add_post("/sound", post_sound) app.router.add_post("/notify", post_notify) app.router.add_get("/schema", get_schema) + app.router.add_get("/trust", get_trust) + + https_port = port + 1 + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + runner = web.AppRunner(app) + + async def start(): + await runner.setup() + + http_site = web.TCPSite(runner, host, port) + await http_site.start() + logger.info("HTTP server running on %s:%d", host, port) + + https_site = web.TCPSite(runner, host, https_port, ssl_context=create_ssl_context()) + await https_site.start() + logger.info("HTTPS server running on %s:%d", host, https_port) - web.run_app(app, host=host, port=port) + loop.run_until_complete(start()) + loop.run_forever() def main(): diff --git a/teleoprtc_repo b/teleoprtc_repo index 389815b8ca5302..2a8e152bc077e5 160000 --- a/teleoprtc_repo +++ b/teleoprtc_repo @@ -1 +1 @@ -Subproject commit 389815b8ca5302ce7c1504b7841d4eb61a8cd51b +Subproject commit 2a8e152bc077e5ba4bfa78144efc5f2eae3e581c From 0c9b62d0dbbbcf60b456d9b6d961a0a1417c688f Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Fri, 10 Apr 2026 10:49:53 -0700 Subject: [PATCH 02/24] remove changes that were split into new pr --- selfdrive/pandad/pandad.cc | 11 ----------- selfdrive/selfdrived/selfdrived.py | 4 +--- system/manager/process_config.py | 8 ++++---- 3 files changed, 5 insertions(+), 18 deletions(-) diff --git a/selfdrive/pandad/pandad.cc b/selfdrive/pandad/pandad.cc index f0be7565162cac..6fcd6219dedfc5 100644 --- a/selfdrive/pandad/pandad.cc +++ b/selfdrive/pandad/pandad.cc @@ -340,17 +340,6 @@ void process_peripheral_state(Panda *panda, PubMaster *pm, bool no_fan_control) ir_pwr = 0; } - // turn off IR leds if body - std::string cp_bytes = params.get("CarParams"); - if (cp_bytes.size() > 0) { - AlignedBuffer aligned_buf; - capnp::FlatArrayMessageReader cmsg(aligned_buf.align(cp_bytes.data(), cp_bytes.size())); - cereal::CarParams::Reader CP = cmsg.getRoot(); - if (CP.getNotCar()) { - ir_pwr = 0; - } - } - if (ir_pwr != prev_ir_pwr || sm.frame % 100 == 0) { int16_t ir_panda = util::map_val(ir_pwr, 0, 100, 0, MAX_IR_PANDA_VAL); panda->set_ir_pwr(ir_panda); diff --git a/selfdrive/selfdrived/selfdrived.py b/selfdrive/selfdrived/selfdrived.py index 046f54744a9f0b..6a294ca8d82786 100755 --- a/selfdrive/selfdrived/selfdrived.py +++ b/selfdrive/selfdrived/selfdrived.py @@ -75,8 +75,6 @@ def __init__(self, CP=None): self.car_state_sock = messaging.sub_sock('carState', timeout=20) ignore = self.sensor_packets + self.gps_packets + ['alertDebug', 'lateralManeuverPlan'] - if self.CP.notCar: - ignore += ['driverMonitoringState'] if SIMULATION: ignore += ['driverCameraState', 'managerState'] if REPLAY: @@ -194,7 +192,7 @@ def update_events(self, CS): if self.CP.notCar: # wait for everything to init first - if self.sm.frame > int(2. / DT_CTRL) and self.initialized: + if self.sm.frame > int(5. / DT_CTRL) and self.initialized: # body always wants to enable self.events.add(EventName.pcmEnable) diff --git a/system/manager/process_config.py b/system/manager/process_config.py index 1ffa22be3f0906..574432333ad708 100644 --- a/system/manager/process_config.py +++ b/system/manager/process_config.py @@ -80,7 +80,7 @@ def and_(*fns): PythonProcess("timed", "system.timed", always_run, enabled=not PC), PythonProcess("modeld", "selfdrive.modeld.modeld", only_onroad), - PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", and_(driverview, iscar), enabled=(WEBCAM or not PC)), + PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", driverview, enabled=(WEBCAM or not PC)), PythonProcess("sensord", "system.sensord.sensord", only_onroad, enabled=not PC), PythonProcess("ui", "selfdrive.ui.ui", always_run, restart_if_crash=True), @@ -94,7 +94,7 @@ def and_(*fns): PythonProcess("selfdrived", "selfdrive.selfdrived.selfdrived", only_onroad), PythonProcess("card", "selfdrive.car.card", only_onroad), PythonProcess("deleter", "system.loggerd.deleter", always_run), - PythonProcess("dmonitoringd", "selfdrive.monitoring.dmonitoringd", and_(driverview, iscar), enabled=(WEBCAM or not PC)), + PythonProcess("dmonitoringd", "selfdrive.monitoring.dmonitoringd", driverview, enabled=(WEBCAM or not PC)), PythonProcess("qcomgpsd", "system.qcomgpsd.qcomgpsd", qcomgps, enabled=TICI), PythonProcess("pandad", "selfdrive.pandad.pandad", always_run), PythonProcess("paramsd", "selfdrive.locationd.paramsd", only_onroad), @@ -112,10 +112,10 @@ def and_(*fns): PythonProcess("statsd", "system.statsd", always_run), PythonProcess("feedbackd", "selfdrive.ui.feedback.feedbackd", only_onroad), - PythonProcess("webrtcd", "system.webrtc.webrtcd", notcar), - # debug procs NativeProcess("bridge", "cereal/messaging", ["./bridge"], notcar), + PythonProcess("webrtcd", "system.webrtc.webrtcd", notcar), + PythonProcess("webjoystick", "tools.bodyteleop.web", notcar), PythonProcess("joystick", "tools.joystick.joystick_control", and_(joystick, iscar)), ] From d79d6a9e6bac125e2f28cc7b7d7e5b8b68fdd9a7 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Fri, 10 Apr 2026 13:06:17 -0700 Subject: [PATCH 03/24] sound bites over datachannel instead of /sound --- system/webrtc/webrtcd.py | 73 ++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/system/webrtc/webrtcd.py b/system/webrtc/webrtcd.py index 5b45588f88e4dc..d3e147b8db7270 100755 --- a/system/webrtc/webrtcd.py +++ b/system/webrtc/webrtcd.py @@ -240,6 +240,27 @@ def _handle_switch_camera(self, payload: dict): self.video_track.switch_camera(camera) self.logger.info("Switched livestream camera to %s", camera) + def _handle_play_sound(self, payload: dict): + data = payload.get("data") + if not isinstance(data, dict): + self.logger.warning("Ignoring malformed playSound request") + return + + try: + sound_name = validate_body_sound_name(data.get("sound")) + except ValueError: + self.logger.warning("Ignoring invalid playSound request: %s", data.get("sound")) + return + + if self.audio_output is None: + self.logger.warning("Ignoring playSound request; body audio output unavailable") + return + + try: + self.audio_output.play_sound(sound_name) + except Exception: + self.logger.exception("Failed to play body sound") + async def message_handler(self, message: bytes): try: payload = json.loads(message) if isinstance(message, (bytes, str)) else None @@ -255,6 +276,9 @@ async def message_handler(self, message: bytes): self.video_track.timing_sei_enabled = enabled self.logger.info("Timing SEI %s", "enabled" if enabled else "disabled") return + if isinstance(payload, dict) and payload.get("type") == "playSound": + self._handle_play_sound(payload) + return if self.incoming_bridge is not None: self.incoming_bridge.send(message) except Exception: @@ -311,11 +335,6 @@ class StreamRequestBody: bridge_services_in: list[str] = field(default_factory=list) bridge_services_out: list[str] = field(default_factory=list) - -@dataclass -class SoundRequestBody: - sound: str - def _add_cors_headers(request: 'web.Request', response: 'web.Response'): response.headers["Access-Control-Allow-Origin"] = "*" response.headers["Access-Control-Allow-Headers"] = "Content-Type" @@ -404,30 +423,6 @@ async def get_stream(request: 'web.Request'): raise -async def post_sound(request: 'web.Request'): - try: - raw_body = await request.json() - body = SoundRequestBody(**raw_body) - sound_name = validate_body_sound_name(body.sound) - except web.HTTPException: - raise - except (TypeError, ValueError, json.JSONDecodeError) as err: - raise web.HTTPBadRequest( - text=json.dumps({"error": "invalid_sound", "message": str(err)}), - content_type="application/json", - ) from err - - audio_output = request.app['body_audio_output'] - if audio_output is None: - raise web.HTTPServiceUnavailable( - text=json.dumps({"error": "audio_unavailable", "message": "Body audio output is unavailable"}), - content_type="application/json", - ) - - audio_output.play_sound(sound_name) - return web.Response(status=200, text="OK") - - async def get_schema(request: 'web.Request'): services = request.query["services"].split(",") services = [s for s in services if s] @@ -514,14 +509,9 @@ def create_ssl_context(): return ssl_ctx -def webrtcd_thread(host: str, port: int, debug: bool): +def create_app(debug: bool) -> web.Application: from openpilot.system.webrtc.device.audio import WebToDeviceAudioTrack - logging.basicConfig(level=logging.CRITICAL, handlers=[logging.StreamHandler()]) - logging_level = logging.DEBUG if debug else logging.INFO - logging.getLogger("WebRTCStream").setLevel(logging_level) - logging.getLogger("webrtcd").setLevel(logging_level) - logger = logging.getLogger("webrtcd") app = web.Application(middlewares=[cors_middleware]) @@ -534,12 +524,21 @@ def webrtcd_thread(host: str, port: int, debug: bool): app['body_audio_output'] = None app.on_shutdown.append(on_shutdown) app.router.add_route("OPTIONS", "/stream", stream_options) - app.router.add_route("OPTIONS", "/sound", stream_options) app.router.add_post("/stream", get_stream) - app.router.add_post("/sound", post_sound) app.router.add_post("/notify", post_notify) app.router.add_get("/schema", get_schema) app.router.add_get("/trust", get_trust) + return app + + +def webrtcd_thread(host: str, port: int, debug: bool): + logging.basicConfig(level=logging.CRITICAL, handlers=[logging.StreamHandler()]) + logging_level = logging.DEBUG if debug else logging.INFO + logging.getLogger("WebRTCStream").setLevel(logging_level) + logging.getLogger("webrtcd").setLevel(logging_level) + logger = logging.getLogger("webrtcd") + + app = create_app(debug) https_port = port + 1 From 101a73a3c161b33178e51df00c3e9352efd9c1cc Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Fri, 10 Apr 2026 19:06:52 -0700 Subject: [PATCH 04/24] move ssl stuff to different branch --- system/webrtc/webrtcd.py | 83 +--------------------------------------- 1 file changed, 1 insertion(+), 82 deletions(-) diff --git a/system/webrtc/webrtcd.py b/system/webrtc/webrtcd.py index d3e147b8db7270..1078d0076b227c 100755 --- a/system/webrtc/webrtcd.py +++ b/system/webrtc/webrtcd.py @@ -4,9 +4,6 @@ import asyncio import json import logging -import os -import ssl -import subprocess import uuid from dataclasses import dataclass, field from typing import Any, TYPE_CHECKING @@ -430,31 +427,6 @@ async def get_schema(request: 'web.Request'): schema_dict = {s: generate_field(log.Event.schema.fields[s]) for s in services} return web.json_response(schema_dict) -TRUST_HTML = """ -comma body - -
-

SSL Certificate Accepted

-

You can close this tab and return to the connect app.

- -
""" - - -async def get_trust(request: 'web.Request'): - return web.Response(content_type="text/html", text=TRUST_HTML) - async def post_notify(request: 'web.Request'): try: @@ -479,36 +451,6 @@ async def on_shutdown(app: 'web.Application'): del app['streams'] -CERT_PATH = "/data/webrtc_cert.pem" -KEY_PATH = "/data/webrtc_key.pem" - - -def create_ssl_cert(): - logger = logging.getLogger("webrtcd") - try: - proc = subprocess.run( - f'openssl req -x509 -newkey rsa:4096 -nodes -out {CERT_PATH} -keyout {KEY_PATH} ' # noqa: ISC002 - f'-days 365 -subj "/C=US/ST=California/O=commaai/OU=comma body"', - capture_output=True, shell=True, - ) - proc.check_returncode() - except subprocess.CalledProcessError as ex: - raise ValueError(f"Error creating SSL certificate:\n[stdout]\n{proc.stdout.decode()}\n[stderr]\n{proc.stderr.decode()}") from ex - logger.info("SSL certificate created") - - -def create_ssl_context(): - logger = logging.getLogger("webrtcd") - if not os.path.exists(CERT_PATH) or not os.path.exists(KEY_PATH): - logger.info("Creating SSL certificate...") - create_ssl_cert() - else: - logger.info("SSL certificate exists") - ssl_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) - ssl_ctx.load_cert_chain(CERT_PATH, KEY_PATH) - return ssl_ctx - - def create_app(debug: bool) -> web.Application: from openpilot.system.webrtc.device.audio import WebToDeviceAudioTrack @@ -527,7 +469,6 @@ def create_app(debug: bool) -> web.Application: app.router.add_post("/stream", get_stream) app.router.add_post("/notify", post_notify) app.router.add_get("/schema", get_schema) - app.router.add_get("/trust", get_trust) return app @@ -536,30 +477,8 @@ def webrtcd_thread(host: str, port: int, debug: bool): logging_level = logging.DEBUG if debug else logging.INFO logging.getLogger("WebRTCStream").setLevel(logging_level) logging.getLogger("webrtcd").setLevel(logging_level) - logger = logging.getLogger("webrtcd") - app = create_app(debug) - - https_port = port + 1 - - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - runner = web.AppRunner(app) - - async def start(): - await runner.setup() - - http_site = web.TCPSite(runner, host, port) - await http_site.start() - logger.info("HTTP server running on %s:%d", host, port) - - https_site = web.TCPSite(runner, host, https_port, ssl_context=create_ssl_context()) - await https_site.start() - logger.info("HTTPS server running on %s:%d", host, https_port) - - loop.run_until_complete(start()) - loop.run_forever() + web.run_app(app, host=host, port=port) def main(): From 15ef6600ad8d0537d403ad53b3a914162cefaa36 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Fri, 10 Apr 2026 20:25:45 -0700 Subject: [PATCH 05/24] clean up webrtcd --- system/webrtc/webrtcd.py | 155 ++++++++++++--------------------------- 1 file changed, 45 insertions(+), 110 deletions(-) diff --git a/system/webrtc/webrtcd.py b/system/webrtc/webrtcd.py index 1078d0076b227c..e299f5fad44cfe 100755 --- a/system/webrtc/webrtcd.py +++ b/system/webrtc/webrtcd.py @@ -19,10 +19,14 @@ from aiortc.rtcdatachannel import RTCDataChannel from openpilot.system.webrtc.schema import generate_field +from openpilot.system.webrtc.utils import clock_sync_build_json, validate_sound_name from cereal import messaging, log from openpilot.common.params import Params +from openpilot.system.webrtc.device.audio import BODY_SOUND_NAMES +REQUIRED_VIDEO_CODEC = "H264" + class CerealOutgoingMessageProxy: def __init__(self, sm: messaging.SubMaster): self.sm = sm @@ -116,14 +120,6 @@ async def add_services_if_needed(self, services): self.sock[service] = messaging.pub_sock(service) -def validate_body_sound_name(sound_name: Any) -> str: - from openpilot.system.webrtc.device.audio import BODY_SOUND_NAMES - - if sound_name not in BODY_SOUND_NAMES: - raise ValueError(f"unsupported body sound: {sound_name}") - return sound_name - - class StreamSession: shared_pub_master = DynamicPubMaster([]) @@ -204,80 +200,42 @@ def stop(self): async def get_answer(self): return await self.stream.start() - def _handle_clock_sync(self, payload: dict): - import time as _time - data = payload.get("data", {}) - if data.get("action") != "ping": - return - pong = json.dumps({ - "type": "clockSync", - "data": { - "action": "pong", - "browserSendTime": data.get("browserSendTime"), - "deviceTime": _time.time() * 1000, # noqa: TID251 - } - }) - try: - if self.stream.has_messaging_channel(): - self.stream.get_messaging_channel().send(pong) - except Exception: - self.logger.warning("Failed to send clockSync pong") - - def _handle_switch_camera(self, payload: dict): - data = payload.get("data") - if not isinstance(data, dict): - return - camera = data.get("camera") - if camera in ("driver", "wideRoad"): - try: - Params().put("LivestreamCamera", camera) - except Exception: - self.logger.warning("Failed to write LivestreamCamera param") - if hasattr(self, 'video_track') and hasattr(self.video_track, 'switch_camera'): - self.video_track.switch_camera(camera) - self.logger.info("Switched livestream camera to %s", camera) - - def _handle_play_sound(self, payload: dict): - data = payload.get("data") - if not isinstance(data, dict): - self.logger.warning("Ignoring malformed playSound request") - return - - try: - sound_name = validate_body_sound_name(data.get("sound")) - except ValueError: - self.logger.warning("Ignoring invalid playSound request: %s", data.get("sound")) - return - - if self.audio_output is None: - self.logger.warning("Ignoring playSound request; body audio output unavailable") - return - - try: - self.audio_output.play_sound(sound_name) - except Exception: - self.logger.exception("Failed to play body sound") - async def message_handler(self, message: bytes): try: payload = json.loads(message) if isinstance(message, (bytes, str)) else None - if isinstance(payload, dict) and payload.get("type") == "switchCamera": - self._handle_switch_camera(payload) - return - if isinstance(payload, dict) and payload.get("type") == "clockSync": - self._handle_clock_sync(payload) - return - if isinstance(payload, dict) and payload.get("type") == "enableTimingSei": - enabled = bool(payload.get("data", {}).get("enabled")) - if hasattr(self.video_track, 'timing_sei_enabled'): - self.video_track.timing_sei_enabled = enabled - self.logger.info("Timing SEI %s", "enabled" if enabled else "disabled") - return - if isinstance(payload, dict) and payload.get("type") == "playSound": - self._handle_play_sound(payload) - return - if self.incoming_bridge is not None: + if isinstance(payload, dict): + data = payload.get("data") + if not isinstance(data, dict): + raise ValueError + + if payload.get("type") == "switchCamera": + camera = data.get("camera") + if camera in ("driver", "wideRoad"): + Params().put("LivestreamCamera", camera) + if hasattr(self, 'video_track') and hasattr(self.video_track, 'switch_camera'): + self.video_track.switch_camera(camera) + self.logger.info("Switched livestream camera to %s", camera) + else: raise ValueError + + elif payload.get("type") == "clockSync": + pong = clock_sync_build_json(payload) + self.stream.get_messaging_channel().send(pong) + + elif payload.get("type") == "enableTimingSei": + enabled = bool(payload.get("data", {}).get("enabled")) + if hasattr(self.video_track, 'timing_sei_enabled'): + self.video_track.timing_sei_enabled = enabled + + elif payload.get("type") == "playSound": + sound = payload.get("sound") + if sound in BODY_SOUND_NAMES: + self.audio_output.play_sound(sound) + + elif self.incoming_bridge is not None: self.incoming_bridge.send(message) + + except ValueError: + self.logger.warning("Ignoring malformed request: %s", payload) except Exception: self.logger.exception("Cereal incoming proxy failure") @@ -294,13 +252,10 @@ async def run(self): channel = self.stream.get_messaging_channel() self.outgoing_bridge_runner.proxy.add_channel(channel) self.outgoing_bridge_runner.start() - # Tell the client which camera is currently active - if self.stream.has_messaging_channel(): - try: - active = getattr(self.video_track, '_camera_type', 'driver') - self.stream.get_messaging_channel().send(json.dumps({"type": "activeCamera", "data": {"camera": active}})) - except Exception: - pass + # tell the client which camera is currently active + active = getattr(self.video_track, '_camera_type', 'driver') + self.stream.get_messaging_channel().send(json.dumps({"type": "activeCamera", "data": {"camera": active}})) + self.logger.info("Stream session (%s) connected", self.identifier) await self.stream.wait_for_disconnection() @@ -332,7 +287,7 @@ class StreamRequestBody: bridge_services_in: list[str] = field(default_factory=list) bridge_services_out: list[str] = field(default_factory=list) -def _add_cors_headers(request: 'web.Request', response: 'web.Response'): +def _add_cors_headers(_, response: 'web.Response'): response.headers["Access-Control-Allow-Origin"] = "*" response.headers["Access-Control-Allow-Headers"] = "Content-Type" response.headers["Access-Control-Allow-Methods"] = "POST, OPTIONS" @@ -356,8 +311,6 @@ async def stream_options(request: 'web.Request'): return response -REQUIRED_VIDEO_CODEC = "H264" - def _validate_sdp_video_codecs(sdp: str): import aiortc.sdp desc = aiortc.sdp.SessionDescription.parse(sdp) @@ -373,25 +326,16 @@ def _validate_sdp_video_codecs(sdp: str): ) -def _cleanup_stale_streams(stream_dict: dict): - stale = [sid for sid, s in stream_dict.items() if s.run_task is None or s.run_task.done()] - for sid in stale: - del stream_dict[sid] - - -def _get_active_streams(stream_dict: dict) -> list[str]: - return [sid for sid, s in stream_dict.items() if s.run_task is not None and not s.run_task.done()] - - async def get_stream(request: 'web.Request'): logger = logging.getLogger("webrtcd") try: stream_dict, debug_mode = request.app['streams'], request.app['debug'] - _cleanup_stale_streams(stream_dict) + # cleanup old streams + for sid in [sid for sid, s in stream_dict.items() if s.run_task is None or s.run_task.done()]: + del stream_dict[sid] - active_streams = _get_active_streams(stream_dict) - if active_streams: + if stream_dict: raise web.HTTPConflict( text=json.dumps({"error": "already_connected", "message": "Another device is already connected to the stream"}), content_type="application/json", @@ -452,18 +396,9 @@ async def on_shutdown(app: 'web.Application'): def create_app(debug: bool) -> web.Application: - from openpilot.system.webrtc.device.audio import WebToDeviceAudioTrack - - logger = logging.getLogger("webrtcd") - app = web.Application(middlewares=[cors_middleware]) app['streams'] = dict() app['debug'] = debug - try: - app['body_audio_output'] = WebToDeviceAudioTrack() - except Exception: - logger.exception("Failed to initialize shared body audio output") - app['body_audio_output'] = None app.on_shutdown.append(on_shutdown) app.router.add_route("OPTIONS", "/stream", stream_options) app.router.add_post("/stream", get_stream) From ba6f8dfdb8dcc65d682eb0cec3093f9d6b6e3bbc Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Sun, 12 Apr 2026 16:08:56 -0700 Subject: [PATCH 06/24] cleanup --- selfdrive/ui/soundd.py | 88 ++++++++++++++++++++++++- system/webrtc/device/audio.py | 65 ++++++++++--------- system/webrtc/tests/test_audio.py | 104 ------------------------------ system/webrtc/utils.py | 15 +++++ 4 files changed, 136 insertions(+), 136 deletions(-) delete mode 100644 system/webrtc/tests/test_audio.py create mode 100644 system/webrtc/utils.py diff --git a/selfdrive/ui/soundd.py b/selfdrive/ui/soundd.py index b2783efebd25c6..68a3997e05625f 100644 --- a/selfdrive/ui/soundd.py +++ b/selfdrive/ui/soundd.py @@ -1,5 +1,7 @@ +from collections import deque import math import numpy as np +import threading import time import wave @@ -15,6 +17,8 @@ SAMPLE_RATE = 48000 SAMPLE_BUFFER = 4096 # (approx 100ms) +MAX_WEBRTC_BUFFER_SAMPLES = SAMPLE_RATE +WEBRTC_START_BUFFER_SAMPLES = SAMPLE_BUFFER + (SAMPLE_RATE // 50) # keep headroom over 20ms WebRTC chunks MAX_VOLUME = 1.0 MIN_VOLUME = 0.1 ALERT_RAMP_TIME = 4 # seconds to ramp to max volume for warningImmediate @@ -75,6 +79,11 @@ def __init__(self): self.selfdrive_timeout_alert = False self.spl_filter_weighted = FirstOrderFilter(0, 2.5, FILTER_DT, initialized=False) + self.webrtc_buffer: deque[np.ndarray] = deque() + self.webrtc_buffer_offset = 0 + self.webrtc_buffer_size = 0 + self.webrtc_playing = False + self.webrtc_lock = threading.Lock() def load_sounds(self): self.loaded_sounds: dict[int, np.ndarray] = {} @@ -112,10 +121,83 @@ def get_sound_data(self, frames): # get "frames" worth of data from the current return ret * self.current_volume + def _trim_webrtc_buffer(self): + overflow = self.webrtc_buffer_size - MAX_WEBRTC_BUFFER_SAMPLES + while overflow > 0 and self.webrtc_buffer: + chunk = self.webrtc_buffer[0] + available = chunk.size - self.webrtc_buffer_offset + drop = min(overflow, available) + self.webrtc_buffer_offset += drop + self.webrtc_buffer_size -= drop + overflow -= drop + + if self.webrtc_buffer_offset >= chunk.size: + self.webrtc_buffer.popleft() + self.webrtc_buffer_offset = 0 + + def _clear_webrtc_buffer_locked(self): + self.webrtc_buffer.clear() + self.webrtc_buffer_offset = 0 + self.webrtc_buffer_size = 0 + + def add_webrtc_audio(self, audio_data: bytes, sample_rate: int): + if sample_rate != SAMPLE_RATE: + cloudlog.warning(f"soundd dropping webrtc audio with unexpected sample rate: {sample_rate}") + return + if not audio_data: + return + + samples = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / (2**15) + if samples.size == 0: + return + + with self.webrtc_lock: + self.webrtc_buffer.append(samples) + self.webrtc_buffer_size += samples.size + self._trim_webrtc_buffer() + + def get_webrtc_audio(self, frames: int) -> np.ndarray: + out = np.zeros(frames, dtype=np.float32) + + with self.webrtc_lock: + if not self.webrtc_playing: + if self.webrtc_buffer_size < max(frames, WEBRTC_START_BUFFER_SAMPLES): + return out + self.webrtc_playing = True + + if self.webrtc_buffer_size < frames: + self._clear_webrtc_buffer_locked() + self.webrtc_playing = False + return out + + written = 0 + while written < frames and self.webrtc_buffer: + chunk = self.webrtc_buffer[0] + available = chunk.size - self.webrtc_buffer_offset + take = min(frames - written, available) + out[written:written + take] = chunk[self.webrtc_buffer_offset:self.webrtc_buffer_offset + take] + written += take + self.webrtc_buffer_offset += take + + if self.webrtc_buffer_offset >= chunk.size: + self.webrtc_buffer.popleft() + self.webrtc_buffer_offset = 0 + + self.webrtc_buffer_size -= written + + return out + + def webrtc_audio_thread(self, sock) -> None: + while True: + for msg in messaging.drain_sock(sock, wait_for_one=True): + audio = msg.webrtcAudioData + self.add_webrtc_audio(audio.data, audio.sampleRate) + def callback(self, data_out: np.ndarray, frames: int, time, status) -> None: if status: cloudlog.warning(f"soundd stream over/underflow: {status}") sound = self.get_sound_data(frames) + sound += self.get_webrtc_audio(frames) np.clip(sound, -1.0, 1.0, out=sound) data_out[:frames, 0] = sound @@ -129,12 +211,14 @@ def update_alert(self, new_alert): self.current_sound_frame = 0 def get_audible_alert(self, sm): + sound_request_updated = False if sm.updated['soundRequest']: new_alert = sm['soundRequest'].sound.raw if new_alert != AudibleAlert.none: self.update_alert(new_alert) + sound_request_updated = True - if sm.updated['selfdriveState']: + if sm.updated['selfdriveState'] and not sound_request_updated: new_alert = sm['selfdriveState'].alertSound.raw self.update_alert(new_alert) elif check_selfdrive_timeout_alert(sm): @@ -160,6 +244,8 @@ def soundd_thread(self): import sounddevice as sd sm = messaging.SubMaster(['selfdriveState', 'soundPressure', 'soundRequest']) + webrtc_audio_sock = messaging.sub_sock('webrtcAudioData', conflate=False) + threading.Thread(target=self.webrtc_audio_thread, args=(webrtc_audio_sock,), daemon=True).start() with self.get_stream(sd) as stream: rk = Ratekeeper(20) diff --git a/system/webrtc/device/audio.py b/system/webrtc/device/audio.py index 440a5f4af6953a..1dae547d21c0da 100644 --- a/system/webrtc/device/audio.py +++ b/system/webrtc/device/audio.py @@ -26,41 +26,49 @@ BODY_SOUND_NAMES = frozenset(BODY_SOUND_ALERTS) +def audio_plane_to_bytes(plane) -> bytes: + to_bytes = getattr(plane, "to_bytes", None) + if callable(to_bytes): + return to_bytes() + + # PyAV 16 dropped AudioPlane.to_bytes(), but still exposes the plane buffer. + return memoryview(plane).tobytes() + + +def audio_frame_to_bytes(frame: AudioFrame) -> bytes: + return b"".join(audio_plane_to_bytes(plane) for plane in frame.planes) + + class PcmBuffer: def __init__(self, dtype=np.int16): self._chunks: deque[np.ndarray] = deque() - self._offset = 0 self._size = 0 self._dtype = dtype - def push(self, samples: np.ndarray): - if samples.size == 0: - return - chunk = np.ascontiguousarray(samples, dtype=self._dtype) - self._chunks.append(chunk) - self._size += chunk.size - - def available(self) -> int: + def __len__(self) -> int: return self._size - def pop(self, size: int) -> np.ndarray: - out = np.zeros(size, dtype=self._dtype) - written = 0 + def push(self, samples: np.ndarray): + if samples.size: + chunk = np.ascontiguousarray(samples, dtype=self._dtype) + self._chunks.append(chunk) + self._size += chunk.size - while written < size and self._chunks: - chunk = self._chunks[0] - remaining = chunk.size - self._offset - take = min(size - written, remaining) - out[written:written + take] = chunk[self._offset:self._offset + take] - written += take - self._offset += take + def pop(self, size: int) -> np.ndarray: + if size > len(self): + raise ValueError(f"requested {size} samples, only {len(self)} available") - if self._offset >= chunk.size: - self._chunks.popleft() - self._offset = 0 + self._size -= size + parts: list[np.ndarray] = [] + while size: + chunk = self._chunks.popleft() + take = min(size, chunk.size) + parts.append(chunk[:take]) + if take < chunk.size: + self._chunks.appendleft(chunk[take:]) + size -= take - self._size -= written - return out + return np.concatenate(parts) if parts else np.empty(0, dtype=self._dtype) class DeviceToWebAudioTrack(AudioStreamTrack): @@ -97,7 +105,7 @@ def _poll_cereal(self): async def _next_frame_samples(self) -> np.ndarray: while self.readyState == "live": - if self._buffer.available() >= self._samples_per_frame: + if len(self._buffer) >= self._samples_per_frame: return self._buffer.pop(self._samples_per_frame) await self._buffer_event.wait() @@ -165,7 +173,7 @@ async def _consume_track(self, track): while True: frame = await track.recv() for resampled in resampler.resample(frame): - self._send_audio_data(resampled.planes[0].to_bytes()) + self._send_audio_data(audio_frame_to_bytes(resampled)) except MediaStreamError: LOGGER.info("Incoming browser audio track ended") except asyncio.CancelledError: @@ -181,8 +189,3 @@ async def stop(self): except asyncio.CancelledError: pass self._task = None - - -# Backwards-compatible aliases while older call sites are updated. -BodyMicAudioTrack = DeviceToWebAudioTrack -BodySpeaker = WebToDeviceAudioTrack diff --git a/system/webrtc/tests/test_audio.py b/system/webrtc/tests/test_audio.py deleted file mode 100644 index 760c02929af12e..00000000000000 --- a/system/webrtc/tests/test_audio.py +++ /dev/null @@ -1,104 +0,0 @@ -import asyncio -import math -import time -from types import SimpleNamespace - -import numpy as np -import pytest -from aiortc.mediastreams import VideoStreamTrack - -from openpilot.system.webrtc.device import audio as audio_module -from openpilot.system.webrtc.webrtcd import StreamSession - - -AUDIO_RECVONLY_OFFER_SDP = """v=0 -o=- 3910210904 3910210904 IN IP4 0.0.0.0 -s=- -t=0 0 -a=group:BUNDLE 0 -a=msid-semantic:WMS * -m=audio 9 UDP/TLS/RTP/SAVPF 96 0 8 -c=IN IP4 0.0.0.0 -a=recvonly -a=extmap:1 urn:ietf:params:rtp-hdrext:sdes:mid -a=extmap:2 urn:ietf:params:rtp-hdrext:ssrc-audio-level -a=mid:0 -a=msid:eb1d3f1a-569a-465f-b419-319477bfded6 e44eecb2-1a04-4547-97d8-481389f50d5b -a=rtcp:9 IN IP4 0.0.0.0 -a=rtcp-mux -a=ssrc:1233332626 cname:ca4dede8-4994-4a6d-9ae3-923b28177ca5 -a=rtpmap:96 opus/48000/2 -a=rtpmap:0 PCMU/8000 -a=rtpmap:8 PCMA/8000 -a=ice-ufrag:1234 -a=ice-pwd:1234 -a=fingerprint:sha-256 40:4B:14:CF:70:B8:67:E1:B1:FF:7E:F9:22:6E:60:7D:73:B5:1E:38:4B:10:20:9C:CD:1C:47:02:52:ED:45:25 -a=setup:actpass""" - - -def tone_chunk(samples: int = 800, sample_rate: int = audio_module.MIC_SAMPLE_RATE) -> bytes: - t = np.arange(samples, dtype=np.float32) / sample_rate - pcm = (0.4 * np.sin(2 * math.pi * 440.0 * t) * 32767).astype(np.int16) - return pcm.tobytes() - - -class FakeSubMaster: - def __init__(self, payload: bytes): - self.updated = {'rawAudioData': False} - self._payload = payload - self._msg = SimpleNamespace(data=b'', sampleRate=audio_module.MIC_SAMPLE_RATE) - - def update(self, timeout: int): - time.sleep(0.005) - self.updated['rawAudioData'] = True - self._msg.data = self._payload - - def __getitem__(self, key: str): - assert key == 'rawAudioData' - return self._msg - - -async def wait_for_buffer(track: audio_module.DeviceToWebAudioTrack, timeout: float = 1.0): - deadline = time.monotonic() + timeout - while time.monotonic() < deadline: - if track._buffer.available() >= track._samples_per_frame: - return - await asyncio.sleep(0.01) - raise TimeoutError("audio track did not buffer a full frame") - - -@pytest.mark.asyncio -async def test_device_to_web_audio_track_reads_raw_audio(monkeypatch): - payload = tone_chunk() - monkeypatch.setattr(audio_module.messaging, "SubMaster", lambda services: FakeSubMaster(payload)) - - track = audio_module.DeviceToWebAudioTrack() - try: - await wait_for_buffer(track) - frame = await asyncio.wait_for(track.recv(), timeout=1) - finally: - track.stop() - track._thread.join(timeout=1) - - pcm = frame.to_ndarray() - assert frame.sample_rate == audio_module.MIC_SAMPLE_RATE - assert frame.samples == int(audio_module.MIC_SAMPLE_RATE * audio_module.AUDIO_PTIME) - assert pcm.shape[-1] == frame.samples - assert np.abs(pcm).sum() > 0 - - -@pytest.mark.asyncio -async def test_stream_session_uses_device_to_web_audio_track(monkeypatch): - payload = tone_chunk() - monkeypatch.setattr(audio_module.messaging, "SubMaster", lambda services: FakeSubMaster(payload)) - monkeypatch.setattr("openpilot.system.webrtc.device.video.LiveStreamVideoStreamTrack", lambda camera_type: VideoStreamTrack()) - monkeypatch.setattr("openpilot.system.webrtc.webrtcd.Params", lambda: SimpleNamespace(get=lambda key: None)) - - session = StreamSession(AUDIO_RECVONLY_OFFER_SDP, [], [], [], audio_output=None, debug_mode=False) - try: - assert isinstance(session.outgoing_audio_track, audio_module.DeviceToWebAudioTrack) - finally: - if session.outgoing_audio_track is not None: - session.outgoing_audio_track.stop() - session.outgoing_audio_track._thread.join(timeout=1) - await session.stream.stop() diff --git a/system/webrtc/utils.py b/system/webrtc/utils.py new file mode 100644 index 00000000000000..afee39e77c3ce7 --- /dev/null +++ b/system/webrtc/utils.py @@ -0,0 +1,15 @@ +import json + +def clock_sync_build_json(payload: dict) -> str | None: + import time as _time + data = payload.get("data", {}) + if data.get("action") != "ping": + raise ValueError + return json.dumps({ + "type": "clockSync", + "data": { + "action": "pong", + "browserSendTime": data.get("browserSendTime"), + "deviceTime": _time.time() * 1000, # noqa: TID251 + } + }) From c096646cd23ca3f5e14234ca8123b27d142cb6d8 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Mon, 13 Apr 2026 09:46:49 -0700 Subject: [PATCH 07/24] clean: remove 2-way audio, use existing cereal bridge better in webrtcd --- cereal/log.capnp | 11 +- cereal/services.py | 1 + selfdrive/ui/soundd.py | 101 ++---------------- system/athena/athenad.py | 2 +- system/loggerd/encoderd.cc | 33 +++--- system/webrtc/device/audio.py | 191 ---------------------------------- system/webrtc/webrtcd.py | 129 ++++++++--------------- 7 files changed, 78 insertions(+), 390 deletions(-) delete mode 100644 system/webrtc/device/audio.py diff --git a/cereal/log.capnp b/cereal/log.capnp index 17e9b1a8646752..047f36427e9c30 100644 --- a/cereal/log.capnp +++ b/cereal/log.capnp @@ -2360,6 +2360,15 @@ struct SoundRequest { sound @0 :Car.CarControl.HUDControl.AudibleAlert; } +struct LiveStreamCamera { + camera @0 :CameraType; + + enum CameraType { + driver @0; + wideRoad @1; + } +} + struct Touch { sec @0 :Int64; usec @1 :Int64; @@ -2479,7 +2488,7 @@ struct Event { livestreamDriverEncodeData @122 :EncodeData; soundRequest @151 :SoundRequest; - webrtcAudioData @152 :AudioData; + liveStreamCamera @152 :LiveStreamCamera; # *********** Custom: reserved for forks *********** diff --git a/cereal/services.py b/cereal/services.py index 257cafc98ac456..8376198064c264 100755 --- a/cereal/services.py +++ b/cereal/services.py @@ -86,6 +86,7 @@ def __init__(self, should_log: bool, frequency: float, decimation: Optional[int] "bookmarkButton": (True, 0., 1), "audioFeedback": (True, 0., 1), "soundRequest": (False, 0.), + "liveStreamCamera": (False, 0.), "webrtcAudioData": (False, 0.), "roadEncodeData": (False, 20., None, QueueSize.BIG), "driverEncodeData": (False, 20., None, QueueSize.BIG), diff --git a/selfdrive/ui/soundd.py b/selfdrive/ui/soundd.py index 68a3997e05625f..09d8935806fdde 100644 --- a/selfdrive/ui/soundd.py +++ b/selfdrive/ui/soundd.py @@ -1,7 +1,5 @@ -from collections import deque import math import numpy as np -import threading import time import wave @@ -17,8 +15,6 @@ SAMPLE_RATE = 48000 SAMPLE_BUFFER = 4096 # (approx 100ms) -MAX_WEBRTC_BUFFER_SAMPLES = SAMPLE_RATE -WEBRTC_START_BUFFER_SAMPLES = SAMPLE_BUFFER + (SAMPLE_RATE // 50) # keep headroom over 20ms WebRTC chunks MAX_VOLUME = 1.0 MIN_VOLUME = 0.1 ALERT_RAMP_TIME = 4 # seconds to ramp to max volume for warningImmediate @@ -79,11 +75,6 @@ def __init__(self): self.selfdrive_timeout_alert = False self.spl_filter_weighted = FirstOrderFilter(0, 2.5, FILTER_DT, initialized=False) - self.webrtc_buffer: deque[np.ndarray] = deque() - self.webrtc_buffer_offset = 0 - self.webrtc_buffer_size = 0 - self.webrtc_playing = False - self.webrtc_lock = threading.Lock() def load_sounds(self): self.loaded_sounds: dict[int, np.ndarray] = {} @@ -121,85 +112,10 @@ def get_sound_data(self, frames): # get "frames" worth of data from the current return ret * self.current_volume - def _trim_webrtc_buffer(self): - overflow = self.webrtc_buffer_size - MAX_WEBRTC_BUFFER_SAMPLES - while overflow > 0 and self.webrtc_buffer: - chunk = self.webrtc_buffer[0] - available = chunk.size - self.webrtc_buffer_offset - drop = min(overflow, available) - self.webrtc_buffer_offset += drop - self.webrtc_buffer_size -= drop - overflow -= drop - - if self.webrtc_buffer_offset >= chunk.size: - self.webrtc_buffer.popleft() - self.webrtc_buffer_offset = 0 - - def _clear_webrtc_buffer_locked(self): - self.webrtc_buffer.clear() - self.webrtc_buffer_offset = 0 - self.webrtc_buffer_size = 0 - - def add_webrtc_audio(self, audio_data: bytes, sample_rate: int): - if sample_rate != SAMPLE_RATE: - cloudlog.warning(f"soundd dropping webrtc audio with unexpected sample rate: {sample_rate}") - return - if not audio_data: - return - - samples = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / (2**15) - if samples.size == 0: - return - - with self.webrtc_lock: - self.webrtc_buffer.append(samples) - self.webrtc_buffer_size += samples.size - self._trim_webrtc_buffer() - - def get_webrtc_audio(self, frames: int) -> np.ndarray: - out = np.zeros(frames, dtype=np.float32) - - with self.webrtc_lock: - if not self.webrtc_playing: - if self.webrtc_buffer_size < max(frames, WEBRTC_START_BUFFER_SAMPLES): - return out - self.webrtc_playing = True - - if self.webrtc_buffer_size < frames: - self._clear_webrtc_buffer_locked() - self.webrtc_playing = False - return out - - written = 0 - while written < frames and self.webrtc_buffer: - chunk = self.webrtc_buffer[0] - available = chunk.size - self.webrtc_buffer_offset - take = min(frames - written, available) - out[written:written + take] = chunk[self.webrtc_buffer_offset:self.webrtc_buffer_offset + take] - written += take - self.webrtc_buffer_offset += take - - if self.webrtc_buffer_offset >= chunk.size: - self.webrtc_buffer.popleft() - self.webrtc_buffer_offset = 0 - - self.webrtc_buffer_size -= written - - return out - - def webrtc_audio_thread(self, sock) -> None: - while True: - for msg in messaging.drain_sock(sock, wait_for_one=True): - audio = msg.webrtcAudioData - self.add_webrtc_audio(audio.data, audio.sampleRate) - def callback(self, data_out: np.ndarray, frames: int, time, status) -> None: if status: cloudlog.warning(f"soundd stream over/underflow: {status}") - sound = self.get_sound_data(frames) - sound += self.get_webrtc_audio(frames) - np.clip(sound, -1.0, 1.0, out=sound) - data_out[:frames, 0] = sound + data_out[:frames, 0] = self.get_sound_data(frames) def update_alert(self, new_alert): current_alert_played_once = self.current_alert == AudibleAlert.none or self.current_sound_frame > len(self.loaded_sounds[self.current_alert]) @@ -211,14 +127,7 @@ def update_alert(self, new_alert): self.current_sound_frame = 0 def get_audible_alert(self, sm): - sound_request_updated = False - if sm.updated['soundRequest']: - new_alert = sm['soundRequest'].sound.raw - if new_alert != AudibleAlert.none: - self.update_alert(new_alert) - sound_request_updated = True - - if sm.updated['selfdriveState'] and not sound_request_updated: + if sm.updated['selfdriveState']: new_alert = sm['selfdriveState'].alertSound.raw self.update_alert(new_alert) elif check_selfdrive_timeout_alert(sm): @@ -227,6 +136,10 @@ def get_audible_alert(self, sm): elif self.selfdrive_timeout_alert: self.update_alert(AudibleAlert.none) self.selfdrive_timeout_alert = False + elif sm.updated['soundRequest']: + new_alert = sm['soundRequest'].sound.raw + if new_alert != AudibleAlert.none: + self.update_alert(new_alert) def calculate_volume(self, weighted_db): volume = ((weighted_db - AMBIENT_DB) / DB_SCALE) * (MAX_VOLUME - MIN_VOLUME) + MIN_VOLUME @@ -244,8 +157,6 @@ def soundd_thread(self): import sounddevice as sd sm = messaging.SubMaster(['selfdriveState', 'soundPressure', 'soundRequest']) - webrtc_audio_sock = messaging.sub_sock('webrtcAudioData', conflate=False) - threading.Thread(target=self.webrtc_audio_thread, args=(webrtc_audio_sock,), daemon=True).start() with self.get_stream(sd) as stream: rk = Ratekeeper(20) diff --git a/system/athena/athenad.py b/system/athena/athenad.py index 6b925fc2d24df3..86053a64ac9845 100755 --- a/system/athena/athenad.py +++ b/system/athena/athenad.py @@ -571,7 +571,7 @@ def getNetworks(): @dispatcher.add_method def startJoystickStream(sdp: str) -> dict: from openpilot.system.webrtc.webrtcd import StreamRequestBody - body = StreamRequestBody(sdp, ["driver"], ["testJoystick"], ["carState"]) + body = StreamRequestBody(sdp, ["driver"], ["testJoystick", "soundRequest", "liveStreamCamera"], ["carState"]) try: resp = requests.post(f"http://localhost:{WEBRTCD_PORT}/stream", json=asdict(body), timeout=10) diff --git a/system/loggerd/encoderd.cc b/system/loggerd/encoderd.cc index 656934f4a3d269..10657d32c3d603 100644 --- a/system/loggerd/encoderd.cc +++ b/system/loggerd/encoderd.cc @@ -151,15 +151,12 @@ void encoderd_thread(const LogCameraInfo (&cameras)[N]) { } } -// Map param value to stream camera config -const LogCameraInfo *find_stream_camera(const std::string &name) { - if (name == "driver") return &stream_driver_camera_info; - return &stream_wide_road_camera_info; // default +const LogCameraInfo *find_stream_camera(cereal::LiveStreamCamera::CameraType type) { + if (type == cereal::LiveStreamCamera::CameraType::DRIVER) return &stream_driver_camera_info; + return &stream_wide_road_camera_info; } void stream_encoderd_thread() { - Params params; - // Wait for cameras to be available std::set available_streams; while (!do_exit) { @@ -168,16 +165,17 @@ void stream_encoderd_thread() { util::sleep_for(100); } - std::string active_camera = params.get("LivestreamCamera"); - if (active_camera.empty()) active_camera = "driver"; + SubMaster sm({"liveStreamCamera"}); + + auto active_camera = cereal::LiveStreamCamera::CameraType::DRIVER; while (!do_exit) { const LogCameraInfo *cam_info = find_stream_camera(active_camera); // Check that the requested camera stream is available if (available_streams.find(cam_info->stream_type) == available_streams.end()) { - LOGE("stream encoder: camera %s not available, falling back", active_camera.c_str()); - active_camera = "wideRoad"; + LOGE("stream encoder: requested camera not available, falling back to wideRoad"); + active_camera = cereal::LiveStreamCamera::CameraType::WIDE_ROAD; cam_info = find_stream_camera(active_camera); } @@ -188,7 +186,7 @@ void stream_encoderd_thread() { } const VisionBuf &buf_info = vipc_client.buffers[0]; - LOGW("stream encoder init %s %zux%zu", active_camera.c_str(), buf_info.width, buf_info.height); + LOGW("stream encoder init %zux%zu", buf_info.width, buf_info.height); assert(buf_info.width > 0 && buf_info.height > 0); const auto &encoder_info = cam_info->encoder_infos[0]; @@ -197,11 +195,14 @@ void stream_encoderd_thread() { while (!do_exit) { // Check for camera switch request - std::string requested = params.get("LivestreamCamera"); - if (!requested.empty() && requested != active_camera) { - LOGW("stream encoder switching from %s to %s", active_camera.c_str(), requested.c_str()); - active_camera = requested; - break; // break to reinit encoder with new camera + sm.update(0); + if (sm.updated("liveStreamCamera")) { + auto requested = sm["liveStreamCamera"].getLiveStreamCamera().getCamera(); + if (requested != active_camera) { + LOGW("stream encoder switching camera"); + active_camera = requested; + break; // break to reinit encoder with new camera + } } VisionIpcBufExtra extra; diff --git a/system/webrtc/device/audio.py b/system/webrtc/device/audio.py deleted file mode 100644 index 1dae547d21c0da..00000000000000 --- a/system/webrtc/device/audio.py +++ /dev/null @@ -1,191 +0,0 @@ -import asyncio -import fractions -import logging -import threading -import time -from collections import deque - -import numpy as np -from av import AudioFrame, AudioResampler -from aiortc.mediastreams import AudioStreamTrack, MediaStreamError - -from cereal import car, messaging - -AUDIO_PTIME = 0.020 -MIC_SAMPLE_RATE = 16000 -SPEAKER_SAMPLE_RATE = 48000 -LOGGER = logging.getLogger("webrtcd") - -AudibleAlert = car.CarControl.HUDControl.AudibleAlert -BODY_SOUND_ALERTS = { - "engage": AudibleAlert.engage, - "disengage": AudibleAlert.disengage, - "prompt": AudibleAlert.prompt, - "warning": AudibleAlert.warningImmediate, -} -BODY_SOUND_NAMES = frozenset(BODY_SOUND_ALERTS) - - -def audio_plane_to_bytes(plane) -> bytes: - to_bytes = getattr(plane, "to_bytes", None) - if callable(to_bytes): - return to_bytes() - - # PyAV 16 dropped AudioPlane.to_bytes(), but still exposes the plane buffer. - return memoryview(plane).tobytes() - - -def audio_frame_to_bytes(frame: AudioFrame) -> bytes: - return b"".join(audio_plane_to_bytes(plane) for plane in frame.planes) - - -class PcmBuffer: - def __init__(self, dtype=np.int16): - self._chunks: deque[np.ndarray] = deque() - self._size = 0 - self._dtype = dtype - - def __len__(self) -> int: - return self._size - - def push(self, samples: np.ndarray): - if samples.size: - chunk = np.ascontiguousarray(samples, dtype=self._dtype) - self._chunks.append(chunk) - self._size += chunk.size - - def pop(self, size: int) -> np.ndarray: - if size > len(self): - raise ValueError(f"requested {size} samples, only {len(self)} available") - - self._size -= size - parts: list[np.ndarray] = [] - while size: - chunk = self._chunks.popleft() - take = min(size, chunk.size) - parts.append(chunk[:take]) - if take < chunk.size: - self._chunks.appendleft(chunk[take:]) - size -= take - - return np.concatenate(parts) if parts else np.empty(0, dtype=self._dtype) - - -class DeviceToWebAudioTrack(AudioStreamTrack): - def __init__(self): - super().__init__() - self._loop = asyncio.get_running_loop() - self._buffer = PcmBuffer() - self._buffer_event = asyncio.Event() - self._sample_rate = MIC_SAMPLE_RATE - self._samples_per_frame = int(self._sample_rate * AUDIO_PTIME) - self._time_base = fractions.Fraction(1, self._sample_rate) - self._running = True - self._thread = threading.Thread(target=self._poll_cereal, daemon=True) - self._thread.start() - - def _push_samples(self, samples: np.ndarray): - self._buffer.push(samples) - self._buffer_event.set() - - def _poll_cereal(self): - sm = messaging.SubMaster(['rawAudioData']) - while self._running: - sm.update(20) - if not sm.updated['rawAudioData']: - continue - - raw_bytes = sm['rawAudioData'].data - if not raw_bytes: - continue - - # .copy() required: frombuffer is a view over the cereal message buffer, invalidated by next sm.update() - samples = np.frombuffer(raw_bytes, dtype=np.int16).copy() - self._loop.call_soon_threadsafe(self._push_samples, samples) - - async def _next_frame_samples(self) -> np.ndarray: - while self.readyState == "live": - if len(self._buffer) >= self._samples_per_frame: - return self._buffer.pop(self._samples_per_frame) - - await self._buffer_event.wait() - self._buffer_event.clear() - - raise MediaStreamError - - async def _next_timestamp(self) -> int: - if not hasattr(self, "_timestamp"): - self._start = time.monotonic() - self._timestamp = 0 - return self._timestamp - - self._timestamp += self._samples_per_frame - wait = self._start + (self._timestamp / self._sample_rate) - time.monotonic() - if wait > 0: - await asyncio.sleep(wait) - return self._timestamp - - async def recv(self): - if self.readyState != "live": - raise MediaStreamError - - frame_samples = await self._next_frame_samples() - timestamp = await self._next_timestamp() - - frame = AudioFrame(format="s16", layout="mono", samples=self._samples_per_frame) - frame.planes[0].update(frame_samples.tobytes()) - frame.pts = timestamp - frame.sample_rate = self._sample_rate - frame.time_base = self._time_base - return frame - - def stop(self): - super().stop() - self._running = False - try: - self._loop.call_soon_threadsafe(self._buffer_event.set) - except RuntimeError: - self._buffer_event.set() - - -class WebToDeviceAudioTrack: - def __init__(self): - self._pm = messaging.PubMaster(['soundRequest', 'webrtcAudioData']) - self._task: asyncio.Task | None = None - - def play_sound(self, sound_name: str): - msg = messaging.new_message('soundRequest') - msg.soundRequest.sound = BODY_SOUND_ALERTS[sound_name] - self._pm.send('soundRequest', msg) - - def start_track(self, track): - self._task = asyncio.create_task(self._consume_track(track)) - - def _send_audio_data(self, data: bytes): - msg = messaging.new_message('webrtcAudioData') - msg.webrtcAudioData.data = data - msg.webrtcAudioData.sampleRate = SPEAKER_SAMPLE_RATE - self._pm.send('webrtcAudioData', msg) - - async def _consume_track(self, track): - resampler = AudioResampler(format='s16', layout='mono', rate=SPEAKER_SAMPLE_RATE) - try: - while True: - frame = await track.recv() - for resampled in resampler.resample(frame): - self._send_audio_data(audio_frame_to_bytes(resampled)) - except MediaStreamError: - LOGGER.info("Incoming browser audio track ended") - except asyncio.CancelledError: - raise - except Exception: - LOGGER.exception("BodySpeaker track consumption error") - - async def stop(self): - if self._task is not None and not self._task.done(): - self._task.cancel() - try: - await self._task - except asyncio.CancelledError: - pass - self._task = None diff --git a/system/webrtc/webrtcd.py b/system/webrtc/webrtcd.py index e299f5fad44cfe..d76010338e708f 100755 --- a/system/webrtc/webrtcd.py +++ b/system/webrtc/webrtcd.py @@ -19,12 +19,11 @@ from aiortc.rtcdatachannel import RTCDataChannel from openpilot.system.webrtc.schema import generate_field -from openpilot.system.webrtc.utils import clock_sync_build_json, validate_sound_name +from openpilot.system.webrtc.utils import clock_sync_build_json from cereal import messaging, log from openpilot.common.params import Params -from openpilot.system.webrtc.device.audio import BODY_SOUND_NAMES - +INITIAL_CAMERA = "driver" REQUIRED_VIDEO_CODEC = "H264" class CerealOutgoingMessageProxy: @@ -123,44 +122,16 @@ async def add_services_if_needed(self, services): class StreamSession: shared_pub_master = DynamicPubMaster([]) - def __init__(self, sdp: str, cameras: list[str], incoming_services: list[str], outgoing_services: list[str], - audio_output=None, debug_mode: bool = False): - from aiortc.mediastreams import AudioStreamTrack, VideoStreamTrack - from openpilot.system.webrtc.device.audio import DeviceToWebAudioTrack, WebToDeviceAudioTrack + def __init__(self, sdp: str, cameras: list[str], incoming_services: list[str], outgoing_services: list[str], debug_mode: bool = False): + from aiortc.mediastreams import VideoStreamTrack from openpilot.system.webrtc.device.video import LiveStreamVideoStreamTrack from teleoprtc import WebRTCAnswerBuilder - from teleoprtc.info import parse_info_from_offer self.logger = logging.getLogger("webrtcd") - config = parse_info_from_offer(sdp) builder = WebRTCAnswerBuilder(sdp) - # Use the camera the encoder is currently active on, so reconnects don't get a blank stream - active_camera = Params().get("LivestreamCamera") or "driver" - if active_camera not in ("driver", "wideRoad"): - active_camera = "driver" - self.video_track = LiveStreamVideoStreamTrack(active_camera) if not debug_mode else VideoStreamTrack() - builder.add_video_stream(active_camera, self.video_track) - - self.outgoing_audio_track: DeviceToWebAudioTrack | None = None - if config.expected_audio_track: - try: - if debug_mode: - builder.add_audio_stream(AudioStreamTrack()) - else: - self.outgoing_audio_track = DeviceToWebAudioTrack() - builder.add_audio_stream(self.outgoing_audio_track) - except Exception: - self.logger.exception("Failed to initialize body microphone track") - - self.audio_output: WebToDeviceAudioTrack | None = audio_output if (config.incoming_audio_track or config.incoming_datachannel) else None - if self.audio_output is None and (config.incoming_audio_track or config.incoming_datachannel): - try: - self.audio_output = WebToDeviceAudioTrack() - except Exception: - self.logger.exception("Failed to initialize body speaker output") - if config.incoming_audio_track: - builder.offer_to_receive_audio_stream() + self.video_track = LiveStreamVideoStreamTrack(INITIAL_CAMERA) if not debug_mode else VideoStreamTrack() + builder.add_video_stream(INITIAL_CAMERA, self.video_track) self.stream = builder.stream() self.identifier = str(uuid.uuid4()) @@ -178,8 +149,8 @@ def __init__(self, sdp: str, cameras: list[str], incoming_services: list[str], o self.run_task: asyncio.Task | None = None self.cleaned_up = False self.logger.info( - "New stream session (%s), cameras %s, incoming services %s, outgoing services %s, send audio %s, receive audio %s", - self.identifier, cameras, incoming_services, outgoing_services, config.expected_audio_track, config.incoming_audio_track, + "New stream session (%s), cameras %s, incoming services %s, outgoing services %s", + self.identifier, cameras, incoming_services, outgoing_services, ) def start(self): @@ -200,40 +171,33 @@ def stop(self): async def get_answer(self): return await self.stream.start() - async def message_handler(self, message: bytes): + def message_handler(self, message: bytes): try: payload = json.loads(message) if isinstance(message, (bytes, str)) else None - if isinstance(payload, dict): - data = payload.get("data") - if not isinstance(data, dict): - raise ValueError - - if payload.get("type") == "switchCamera": - camera = data.get("camera") + if not isinstance(payload, dict): + raise ValueError + msg_type = payload.get("type") + + if msg_type == "clockSync": + pong = clock_sync_build_json(payload) + self.stream.get_messaging_channel().send(pong) + return + + if msg_type == "enableTimingSei": + enabled = bool(payload.get("data", {}).get("enabled")) + if hasattr(self.video_track, 'timing_sei_enabled'): + self.video_track.timing_sei_enabled = enabled + return + + if self.incoming_bridge is not None: + if msg_type == "liveStreamCamera": + camera = payload.get("data").get("camera") if camera in ("driver", "wideRoad"): - Params().put("LivestreamCamera", camera) if hasattr(self, 'video_track') and hasattr(self.video_track, 'switch_camera'): self.video_track.switch_camera(camera) - self.logger.info("Switched livestream camera to %s", camera) - else: raise ValueError - - elif payload.get("type") == "clockSync": - pong = clock_sync_build_json(payload) - self.stream.get_messaging_channel().send(pong) - - elif payload.get("type") == "enableTimingSei": - enabled = bool(payload.get("data", {}).get("enabled")) - if hasattr(self.video_track, 'timing_sei_enabled'): - self.video_track.timing_sei_enabled = enabled - - elif payload.get("type") == "playSound": - sound = payload.get("sound") - if sound in BODY_SOUND_NAMES: - self.audio_output.play_sound(sound) - - elif self.incoming_bridge is not None: + elif msg_type == "soundRequest": + return self.incoming_bridge.send(message) - except ValueError: self.logger.warning("Ignoring malformed request: %s", payload) except Exception: @@ -242,19 +206,16 @@ async def message_handler(self, message: bytes): async def run(self): try: await self.stream.wait_for_connection() - if self.audio_output is not None and self.stream.has_incoming_audio_track(): - self.audio_output.start_track(self.stream.get_incoming_audio_track()) if self.stream.has_messaging_channel(): if self.incoming_bridge is not None: await self.shared_pub_master.add_services_if_needed(self.incoming_bridge_services) + # set camera to default + self.incoming_bridge.send(json.dumps({"type": "liveStreamCamera", "data": {"camera": INITIAL_CAMERA}}).encode()) self.stream.set_message_handler(self.message_handler) if self.outgoing_bridge_runner is not None: channel = self.stream.get_messaging_channel() self.outgoing_bridge_runner.proxy.add_channel(channel) self.outgoing_bridge_runner.start() - # tell the client which camera is currently active - active = getattr(self.video_track, '_camera_type', 'driver') - self.stream.get_messaging_channel().send(json.dumps({"type": "activeCamera", "data": {"camera": active}})) self.logger.info("Stream session (%s) connected", self.identifier) @@ -273,10 +234,6 @@ async def post_run_cleanup(self): await self.stream.stop() if self.outgoing_bridge is not None: self.outgoing_bridge_runner.stop() - if self.outgoing_audio_track is not None: - self.outgoing_audio_track.stop() - if self.audio_output is not None: - await self.audio_output.stop() Params().put_bool("JoystickDebugMode", False) @@ -331,30 +288,29 @@ async def get_stream(request: 'web.Request'): try: stream_dict, debug_mode = request.app['streams'], request.app['debug'] - # cleanup old streams - for sid in [sid for sid, s in stream_dict.items() if s.run_task is None or s.run_task.done()]: + # disconnect any other active stream + for sid, s in list(stream_dict.items()): + if s.run_task and not s.run_task.done(): + try: + ch = s.stream.get_messaging_channel() + ch.send(json.dumps({"type": "connectionReplaced", "data": "Another device has connected, closing this session."})) + except Exception: + pass + s.stop() del stream_dict[sid] - if stream_dict: - raise web.HTTPConflict( - text=json.dumps({"error": "already_connected", "message": "Another device is already connected to the stream"}), - content_type="application/json", - ) - raw_body = await request.json() body = StreamRequestBody(**raw_body) _validate_sdp_video_codecs(body.sdp) - session = StreamSession(body.sdp, body.cameras, body.bridge_services_in, body.bridge_services_out, - request.app['body_audio_output'], debug_mode) + session = StreamSession(body.sdp, body.cameras, body.bridge_services_in, body.bridge_services_out, debug_mode) answer = await session.get_answer() session.start() Params().put_bool("JoystickDebugMode", True) stream_dict[session.identifier] = session - active_camera = getattr(session.video_track, '_camera_type', 'driver') - response = web.json_response({"sdp": answer.sdp, "type": answer.type, "activeCamera": active_camera}) + response = web.json_response({"sdp": answer.sdp, "type": answer.type}) _add_cors_headers(request, response) return response except web.HTTPException: @@ -387,6 +343,7 @@ async def post_notify(request: 'web.Request'): return web.Response(status=200, text="OK") + async def on_shutdown(app: 'web.Application'): for session in app['streams'].values(): session.stop() From 4e183e83a5b8c8e759ccdf58fa52d09aa1e7a76a Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Mon, 13 Apr 2026 10:13:59 -0700 Subject: [PATCH 08/24] clean up webrtcd --- system/webrtc/webrtcd.py | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/system/webrtc/webrtcd.py b/system/webrtc/webrtcd.py index d76010338e708f..17153e13b5087c 100755 --- a/system/webrtc/webrtcd.py +++ b/system/webrtc/webrtcd.py @@ -126,10 +126,13 @@ def __init__(self, sdp: str, cameras: list[str], incoming_services: list[str], o from aiortc.mediastreams import VideoStreamTrack from openpilot.system.webrtc.device.video import LiveStreamVideoStreamTrack from teleoprtc import WebRTCAnswerBuilder + from teleoprtc.info import parse_info_from_offer - self.logger = logging.getLogger("webrtcd") + config = parse_info_from_offer(sdp) builder = WebRTCAnswerBuilder(sdp) + assert len(cameras) == config.n_expected_camera_tracks, "Incoming stream has misconfigured number of video tracks" + self.video_track = LiveStreamVideoStreamTrack(INITIAL_CAMERA) if not debug_mode else VideoStreamTrack() builder.add_video_stream(INITIAL_CAMERA, self.video_track) @@ -147,7 +150,7 @@ def __init__(self, sdp: str, cameras: list[str], incoming_services: list[str], o self.outgoing_bridge_runner = CerealProxyRunner(self.outgoing_bridge) self.run_task: asyncio.Task | None = None - self.cleaned_up = False + self.logger = logging.getLogger("webrtcd") self.logger.info( "New stream session (%s), cameras %s, incoming services %s, outgoing services %s", self.identifier, cameras, incoming_services, outgoing_services, @@ -161,12 +164,7 @@ def stop(self): return self.run_task.cancel() self.run_task = None - try: - loop = asyncio.get_running_loop() - except RuntimeError: - asyncio.run(self.post_run_cleanup()) - else: - loop.create_task(self.post_run_cleanup()) + asyncio.get_running_loop().create_task(self.post_run_cleanup()) async def get_answer(self): return await self.stream.start() @@ -228,9 +226,6 @@ async def run(self): await self.post_run_cleanup() async def post_run_cleanup(self): - if self.cleaned_up: - return - self.cleaned_up = True await self.stream.stop() if self.outgoing_bridge is not None: self.outgoing_bridge_runner.stop() @@ -262,12 +257,6 @@ async def cors_middleware(request: 'web.Request', handler): return response -async def stream_options(request: 'web.Request'): - response = web.Response() - _add_cors_headers(request, response) - return response - - def _validate_sdp_video_codecs(sdp: str): import aiortc.sdp desc = aiortc.sdp.SessionDescription.parse(sdp) @@ -347,8 +336,6 @@ async def post_notify(request: 'web.Request'): async def on_shutdown(app: 'web.Application'): for session in app['streams'].values(): session.stop() - if app.get('body_audio_output') is not None: - await app['body_audio_output'].stop() del app['streams'] @@ -357,7 +344,6 @@ def create_app(debug: bool) -> web.Application: app['streams'] = dict() app['debug'] = debug app.on_shutdown.append(on_shutdown) - app.router.add_route("OPTIONS", "/stream", stream_options) app.router.add_post("/stream", get_stream) app.router.add_post("/notify", post_notify) app.router.add_get("/schema", get_schema) From b0faa2ebe08f3a67fec73be9cf276630186948be Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Mon, 13 Apr 2026 10:17:36 -0700 Subject: [PATCH 09/24] clean diff --- common/params_keys.h | 1 - system/manager/process_config.py | 2 +- system/webrtc/tests/test_stream_session.py | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/common/params_keys.h b/common/params_keys.h index 883950d0752374..b81a373d0876ef 100644 --- a/common/params_keys.h +++ b/common/params_keys.h @@ -78,7 +78,6 @@ inline static std::unordered_map keys = { {"LastUpdateTime", {PERSISTENT, TIME}}, {"LastUpdateUptimeOnroad", {PERSISTENT, FLOAT, "0.0"}}, {"LiveDelay", {PERSISTENT, BYTES}}, - {"LivestreamCamera", {CLEAR_ON_MANAGER_START, STRING, "driver"}}, {"LiveParameters", {PERSISTENT, JSON}}, {"LiveParametersV2", {PERSISTENT, BYTES}}, {"LiveTorqueParameters", {PERSISTENT | DONT_LOG, BYTES}}, diff --git a/system/manager/process_config.py b/system/manager/process_config.py index 574432333ad708..7e96b7776a4f5f 100644 --- a/system/manager/process_config.py +++ b/system/manager/process_config.py @@ -76,7 +76,7 @@ def and_(*fns): PythonProcess("webcamerad", "tools.webcam.camerad", driverview, enabled=WEBCAM), PythonProcess("proclogd", "system.proclogd", only_onroad, enabled=platform.system() != "Darwin"), PythonProcess("journald", "system.journald", only_onroad, platform.system() != "Darwin"), - PythonProcess("micd", "system.micd", only_onroad), + PythonProcess("micd", "system.micd", iscar), PythonProcess("timed", "system.timed", always_run, enabled=not PC), PythonProcess("modeld", "selfdrive.modeld.modeld", only_onroad), diff --git a/system/webrtc/tests/test_stream_session.py b/system/webrtc/tests/test_stream_session.py index 21122855a6ba67..f44d217d58ced6 100644 --- a/system/webrtc/tests/test_stream_session.py +++ b/system/webrtc/tests/test_stream_session.py @@ -84,3 +84,4 @@ def test_livestream_track(self, mocker): start_pts = packet.pts assert abs(i + packet.pts - (start_pts + (((time.monotonic_ns() - start_ns) * VIDEO_CLOCK_RATE) // 1_000_000_000))) < 450 #5ms assert packet.size == 0 + From 04112bc0a01238a8ec7db66a2f7557a10ca81ae9 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Mon, 13 Apr 2026 10:33:49 -0700 Subject: [PATCH 10/24] fix sound bites --- selfdrive/ui/soundd.py | 10 ++++++---- system/webrtc/webrtcd.py | 2 -- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/selfdrive/ui/soundd.py b/selfdrive/ui/soundd.py index 09d8935806fdde..17ef3209adeff9 100644 --- a/selfdrive/ui/soundd.py +++ b/selfdrive/ui/soundd.py @@ -127,6 +127,12 @@ def update_alert(self, new_alert): self.current_sound_frame = 0 def get_audible_alert(self, sm): + if sm.updated['soundRequest']: + new_alert = sm['soundRequest'].sound.raw + if new_alert != AudibleAlert.none: + self.update_alert(new_alert) + return + if sm.updated['selfdriveState']: new_alert = sm['selfdriveState'].alertSound.raw self.update_alert(new_alert) @@ -136,10 +142,6 @@ def get_audible_alert(self, sm): elif self.selfdrive_timeout_alert: self.update_alert(AudibleAlert.none) self.selfdrive_timeout_alert = False - elif sm.updated['soundRequest']: - new_alert = sm['soundRequest'].sound.raw - if new_alert != AudibleAlert.none: - self.update_alert(new_alert) def calculate_volume(self, weighted_db): volume = ((weighted_db - AMBIENT_DB) / DB_SCALE) * (MAX_VOLUME - MIN_VOLUME) + MIN_VOLUME diff --git a/system/webrtc/webrtcd.py b/system/webrtc/webrtcd.py index 17153e13b5087c..17d1898b854c94 100755 --- a/system/webrtc/webrtcd.py +++ b/system/webrtc/webrtcd.py @@ -193,8 +193,6 @@ def message_handler(self, message: bytes): if camera in ("driver", "wideRoad"): if hasattr(self, 'video_track') and hasattr(self.video_track, 'switch_camera'): self.video_track.switch_camera(camera) - elif msg_type == "soundRequest": - return self.incoming_bridge.send(message) except ValueError: self.logger.warning("Ignoring malformed request: %s", payload) From 42b1e3d7739e1f801bc5d4810a51cb3d4b052955 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Mon, 13 Apr 2026 10:35:46 -0700 Subject: [PATCH 11/24] teleoprtc pin to master commit --- teleoprtc_repo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/teleoprtc_repo b/teleoprtc_repo index 2a8e152bc077e5..389815b8ca5302 160000 --- a/teleoprtc_repo +++ b/teleoprtc_repo @@ -1 +1 @@ -Subproject commit 2a8e152bc077e5ba4bfa78144efc5f2eae3e581c +Subproject commit 389815b8ca5302ce7c1504b7841d4eb61a8cd51b From b1333cf62acb0314ffa31011be502e5321973751 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Mon, 13 Apr 2026 10:44:32 -0700 Subject: [PATCH 12/24] remove webrtcAudio --- cereal/services.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cereal/services.py b/cereal/services.py index 8376198064c264..23a536375a68a2 100755 --- a/cereal/services.py +++ b/cereal/services.py @@ -87,7 +87,6 @@ def __init__(self, should_log: bool, frequency: float, decimation: Optional[int] "audioFeedback": (True, 0., 1), "soundRequest": (False, 0.), "liveStreamCamera": (False, 0.), - "webrtcAudioData": (False, 0.), "roadEncodeData": (False, 20., None, QueueSize.BIG), "driverEncodeData": (False, 20., None, QueueSize.BIG), "wideRoadEncodeData": (False, 20., None, QueueSize.BIG), From 3440fd4ac9829556c8b5abce71fbcdde0f5685da Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Mon, 13 Apr 2026 13:55:58 -0700 Subject: [PATCH 13/24] 1 stream encode packet, simplify video stream --- cereal/log.capnp | 6 ++- cereal/services.py | 3 +- selfdrive/ui/soundd.py | 10 ++--- system/athena/athenad.py | 2 +- system/loggerd/encoderd.cc | 55 ++++++++++--------------- system/loggerd/loggerd.h | 27 +++--------- system/webrtc/device/video.py | 77 ++++++++--------------------------- system/webrtc/webrtcd.py | 11 +---- 8 files changed, 54 insertions(+), 137 deletions(-) diff --git a/cereal/log.capnp b/cereal/log.capnp index 047f36427e9c30..091f55e029e2c1 100644 --- a/cereal/log.capnp +++ b/cereal/log.capnp @@ -2487,8 +2487,10 @@ struct Event { livestreamWideRoadEncodeData @121 :EncodeData; livestreamDriverEncodeData @122 :EncodeData; - soundRequest @151 :SoundRequest; - liveStreamCamera @152 :LiveStreamCamera; + livestreamCameraEncodeData @151 :EncodeData; + livestreamCameraSwitch @152 :LiveStreamCamera; + + soundRequest @153 :SoundRequest; # *********** Custom: reserved for forks *********** diff --git a/cereal/services.py b/cereal/services.py index 23a536375a68a2..cdfa1d6db59a09 100755 --- a/cereal/services.py +++ b/cereal/services.py @@ -86,7 +86,7 @@ def __init__(self, should_log: bool, frequency: float, decimation: Optional[int] "bookmarkButton": (True, 0., 1), "audioFeedback": (True, 0., 1), "soundRequest": (False, 0.), - "liveStreamCamera": (False, 0.), + "livestreamCameraSwitch": (False, 0.), "roadEncodeData": (False, 20., None, QueueSize.BIG), "driverEncodeData": (False, 20., None, QueueSize.BIG), "wideRoadEncodeData": (False, 20., None, QueueSize.BIG), @@ -102,6 +102,7 @@ def __init__(self, should_log: bool, frequency: float, decimation: Optional[int] "livestreamWideRoadEncodeData": (False, 20., None, QueueSize.MEDIUM), "livestreamRoadEncodeData": (False, 20., None, QueueSize.MEDIUM), "livestreamDriverEncodeData": (False, 20., None, QueueSize.MEDIUM), + "livestreamCameraEncodeData": (False, 20., None, QueueSize.MEDIUM), "customReservedRawData0": (True, 0.), "customReservedRawData1": (True, 0.), "customReservedRawData2": (True, 0.), diff --git a/selfdrive/ui/soundd.py b/selfdrive/ui/soundd.py index 17ef3209adeff9..86c36498ae155b 100644 --- a/selfdrive/ui/soundd.py +++ b/selfdrive/ui/soundd.py @@ -127,13 +127,9 @@ def update_alert(self, new_alert): self.current_sound_frame = 0 def get_audible_alert(self, sm): - if sm.updated['soundRequest']: - new_alert = sm['soundRequest'].sound.raw - if new_alert != AudibleAlert.none: - self.update_alert(new_alert) - return - - if sm.updated['selfdriveState']: + if sm.updated['soundRequest'] and sm['soundRequest'].sound.raw != AudibleAlert.none: + self.update_alert(sm['soundRequest'].sound.raw) + elif sm.updated['selfdriveState']: new_alert = sm['selfdriveState'].alertSound.raw self.update_alert(new_alert) elif check_selfdrive_timeout_alert(sm): diff --git a/system/athena/athenad.py b/system/athena/athenad.py index 86053a64ac9845..7dc7e8c24967cc 100755 --- a/system/athena/athenad.py +++ b/system/athena/athenad.py @@ -571,7 +571,7 @@ def getNetworks(): @dispatcher.add_method def startJoystickStream(sdp: str) -> dict: from openpilot.system.webrtc.webrtcd import StreamRequestBody - body = StreamRequestBody(sdp, ["driver"], ["testJoystick", "soundRequest", "liveStreamCamera"], ["carState"]) + body = StreamRequestBody(sdp, ["driver"], ["testJoystick", "soundRequest", "livestreamCameraSwitch"], ["carState"]) try: resp = requests.post(f"http://localhost:{WEBRTCD_PORT}/stream", json=asdict(body), timeout=10) diff --git a/system/loggerd/encoderd.cc b/system/loggerd/encoderd.cc index 10657d32c3d603..93b9494848db41 100644 --- a/system/loggerd/encoderd.cc +++ b/system/loggerd/encoderd.cc @@ -151,35 +151,20 @@ void encoderd_thread(const LogCameraInfo (&cameras)[N]) { } } -const LogCameraInfo *find_stream_camera(cereal::LiveStreamCamera::CameraType type) { - if (type == cereal::LiveStreamCamera::CameraType::DRIVER) return &stream_driver_camera_info; - return &stream_wide_road_camera_info; -} - -void stream_encoderd_thread() { - // Wait for cameras to be available - std::set available_streams; +template +void stream_encoderd_thread(const LogCameraInfo (&cameras)[N]) { + // Wait for camerad to publish at least one stream while (!do_exit) { - available_streams = VisionIpcClient::getAvailableStreams("camerad", false); - if (!available_streams.empty()) break; + if (!VisionIpcClient::getAvailableStreams("camerad", false).empty()) break; util::sleep_for(100); } - SubMaster sm({"liveStreamCamera"}); + SubMaster sm({"livestreamCameraSwitch"}); - auto active_camera = cereal::LiveStreamCamera::CameraType::DRIVER; + const LogCameraInfo *active_cam = &cameras[0]; while (!do_exit) { - const LogCameraInfo *cam_info = find_stream_camera(active_camera); - - // Check that the requested camera stream is available - if (available_streams.find(cam_info->stream_type) == available_streams.end()) { - LOGE("stream encoder: requested camera not available, falling back to wideRoad"); - active_camera = cereal::LiveStreamCamera::CameraType::WIDE_ROAD; - cam_info = find_stream_camera(active_camera); - } - - VisionIpcClient vipc_client("camerad", cam_info->stream_type, false); + VisionIpcClient vipc_client("camerad", active_cam->stream_type, false); if (!vipc_client.connect(false)) { util::sleep_for(5); continue; @@ -189,19 +174,23 @@ void stream_encoderd_thread() { LOGW("stream encoder init %zux%zu", buf_info.width, buf_info.height); assert(buf_info.width > 0 && buf_info.height > 0); - const auto &encoder_info = cam_info->encoder_infos[0]; - auto encoder = std::make_unique(encoder_info, buf_info.width, buf_info.height); + // Each stream camera has exactly one encoder + auto encoder = std::make_unique(active_cam->encoder_infos[0], buf_info.width, buf_info.height); encoder->encoder_open(); while (!do_exit) { - // Check for camera switch request sm.update(0); - if (sm.updated("liveStreamCamera")) { - auto requested = sm["liveStreamCamera"].getLiveStreamCamera().getCamera(); - if (requested != active_camera) { + if (sm.updated("livestreamCameraSwitch")) { + auto requested = sm["livestreamCameraSwitch"].getLivestreamCameraSwitch().getCamera(); + VisionStreamType requested_stream = requested == cereal::LiveStreamCamera::CameraType::DRIVER + ? VISION_STREAM_DRIVER : VISION_STREAM_WIDE_ROAD; + // Switch camera if the request differs from the current one + if (requested_stream != active_cam->stream_type) { LOGW("stream encoder switching camera"); - active_camera = requested; - break; // break to reinit encoder with new camera + auto it = std::find_if(std::begin(cameras), std::end(cameras), + [requested_stream](const auto &cam) { return cam.stream_type == requested_stream; }); + if (it != std::end(cameras)) active_cam = &(*it); + break; // Reinit encoder with the new camera } } @@ -209,11 +198,9 @@ void stream_encoderd_thread() { VisionBuf *buf = vipc_client.recv(&extra); if (buf == nullptr) continue; - // detect loop around and drop the frames if (buf->get_frame_id() != extra.frame_id) continue; - int out_id = encoder->encode_frame(buf, &extra); - if (out_id == -1) { + if (encoder->encode_frame(buf, &extra) == -1) { LOGE("stream encoder: failed to encode frame. frame_id: %d", extra.frame_id); } } @@ -233,7 +220,7 @@ int main(int argc, char* argv[]) { if (argc > 1) { std::string arg1(argv[1]); if (arg1 == "--stream") { - stream_encoderd_thread(); + stream_encoderd_thread(stream_cameras_logged); } else { LOGE("Argument '%s' is not supported", arg1.c_str()); } diff --git a/system/loggerd/loggerd.h b/system/loggerd/loggerd.h index 22ad2f88b8abf9..804550d80c772c 100644 --- a/system/loggerd/loggerd.h +++ b/system/loggerd/loggerd.h @@ -100,28 +100,13 @@ const EncoderInfo main_driver_encoder_info = { INIT_ENCODE_FUNCTIONS(DriverEncode), }; -const EncoderInfo stream_road_encoder_info = { - .publish_name = "livestreamRoadEncodeData", - //.thumbnail_name = "thumbnail", - .record = false, - .get_settings = [](int){return EncoderSettings::StreamEncoderSettings();}, - INIT_ENCODE_FUNCTIONS(LivestreamRoadEncode), -}; - -const EncoderInfo stream_wide_road_encoder_info = { - .publish_name = "livestreamWideRoadEncodeData", +const EncoderInfo stream_encoder_info = { + .publish_name = "livestreamCameraEncodeData", .record = false, .get_settings = [](int){return EncoderSettings::StreamEncoderSettings();}, INIT_ENCODE_FUNCTIONS(LivestreamWideRoadEncode), }; -const EncoderInfo stream_driver_encoder_info = { - .publish_name = "livestreamDriverEncodeData", - .record = false, - .get_settings = [](int){return EncoderSettings::StreamEncoderSettings();}, - INIT_ENCODE_FUNCTIONS(LivestreamDriverEncode), -}; - const EncoderInfo qcam_encoder_info = { .publish_name = "qRoadEncodeData", .filename = "qcamera.ts", @@ -153,20 +138,20 @@ const LogCameraInfo driver_camera_info{ const LogCameraInfo stream_road_camera_info{ .thread_name = "road_cam_encoder", .stream_type = VISION_STREAM_ROAD, - .encoder_infos = {stream_road_encoder_info} + .encoder_infos = {stream_encoder_info} }; const LogCameraInfo stream_wide_road_camera_info{ .thread_name = "wide_road_cam_encoder", .stream_type = VISION_STREAM_WIDE_ROAD, - .encoder_infos = {stream_wide_road_encoder_info} + .encoder_infos = {stream_encoder_info} }; const LogCameraInfo stream_driver_camera_info{ .thread_name = "driver_cam_encoder", .stream_type = VISION_STREAM_DRIVER, - .encoder_infos = {stream_driver_encoder_info} + .encoder_infos = {stream_encoder_info} }; const LogCameraInfo cameras_logged[] = {road_camera_info, wide_road_camera_info, driver_camera_info}; -const LogCameraInfo stream_cameras_logged[] = {stream_wide_road_camera_info, stream_driver_camera_info}; +const LogCameraInfo stream_cameras_logged[] = {stream_driver_camera_info, stream_wide_road_camera_info}; diff --git a/system/webrtc/device/video.py b/system/webrtc/device/video.py index d1a5e60885c287..85509cf7f25b0f 100644 --- a/system/webrtc/device/video.py +++ b/system/webrtc/device/video.py @@ -13,84 +13,39 @@ 0xa5, 0xe0, 0xc4, 0xa4, 0x5b, 0x6e, 0x4e, 0x1e, 0x9c, 0x7e, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, ]) - - -def _escape_rbsp(data: bytes) -> bytearray: - """ - Prevents frame bytes that might escape into NAL start code. - Adds 0x03 after two consecutive 0x00 0x00 to escape this. - """ - out = bytearray() - zeros = 0 - for b in data: - if zeros >= 2 and b <= 3: - out.append(3) - zeros = 0 - zeros = zeros + 1 if b == 0 else 0 - out.append(b) - return out - - -def create_timing_sei(capture_ms: float, encode_ms: float, send_delay_ms: float, send_wall_ms: float) -> bytes: - """Build an H.264 SEI NAL (user_data_unregistered) carrying frame timing.""" - ts_data = struct.pack('>4d', capture_ms, encode_ms, send_delay_ms, send_wall_ms) - sei_payload = TIMING_SEI_UUID + ts_data # 16 + 32 = 48 bytes - - # payload_type=5, payload_size=48, then RBSP stop bit - rbsp = bytes([5, len(sei_payload)]) + sei_payload + bytes([0x80]) - escaped = _escape_rbsp(rbsp) - - # start-code (4 bytes) + NAL header (forbidden=0, ref_idc=0, type=6 SEI) - return b'\x00\x00\x00\x01\x06' + bytes(escaped) +_SEI_PREFIX = b'\x00\x00\x00\x01\x06\x05\x30' + TIMING_SEI_UUID class LiveStreamVideoStreamTrack(TiciVideoStreamTrack): - camera_config = { - "driver": (DT_DMON, "livestreamDriverEncodeData"), - "wideRoad": (DT_MDL, "livestreamWideRoadEncodeData"), - } - def __init__(self, camera_type: str): - dt, _ = self.camera_config[camera_type] + dt = DT_DMON if camera_type == "driver" else DT_MDL super().__init__(camera_type, dt) - self._camera_type = "" - self._sock = None - self._set_camera(camera_type) + self._sock = messaging.sub_sock("livestreamCameraEncodeData", conflate=True) self._t0_ns = time.monotonic_ns() self.timing_sei_enabled = False - def _set_camera(self, camera_type: str): - self._camera_type = camera_type - _, sock_name = self.camera_config[camera_type] - self._sock = messaging.sub_sock(sock_name, conflate=True) - - def switch_camera(self, camera_type: str): - if camera_type not in self.camera_config or camera_type == self._camera_type: - return - self._set_camera(camera_type) - - async def _recv_message(self): - while True: - msg = messaging.recv_one_or_none(self._sock) - if msg is not None: - return msg - await asyncio.sleep(0.005) - def _build_frame_data(self, msg) -> bytes: encode_data = getattr(msg, msg.which()) if not self.timing_sei_enabled: return encode_data.header + encode_data.data - capture_ms = (encode_data.idx.timestampEof - encode_data.idx.timestampSof) / 1e6 - encode_ms = (msg.logMonoTime - encode_data.idx.timestampEof) / 1e6 - send_delay_ms = (time.monotonic_ns() - msg.logMonoTime) / 1e6 - send_wall_ms = time.time() * 1000 # noqa: TID251 - sei_nal = create_timing_sei(capture_ms, encode_ms, send_delay_ms, send_wall_ms) + idx = encode_data.idx + sei_nal = _SEI_PREFIX + struct.pack('>4d', + (idx.timestampEof - idx.timestampSof) / 1e6, + (msg.logMonoTime - idx.timestampEof) / 1e6, + (time.monotonic_ns() - msg.logMonoTime) / 1e6, + time.time() * 1000, # noqa: TID251 + ) + b'\x80' return encode_data.header + sei_nal + encode_data.data async def recv(self): - msg = await self._recv_message() + while True: + msg = messaging.recv_one_or_none(self._sock) + if msg is not None: + break + await asyncio.sleep(0.005) + packet = av.Packet(self._build_frame_data(msg)) packet.time_base = self._time_base diff --git a/system/webrtc/webrtcd.py b/system/webrtc/webrtcd.py index 17d1898b854c94..97f5056dad3df6 100755 --- a/system/webrtc/webrtcd.py +++ b/system/webrtc/webrtcd.py @@ -126,13 +126,9 @@ def __init__(self, sdp: str, cameras: list[str], incoming_services: list[str], o from aiortc.mediastreams import VideoStreamTrack from openpilot.system.webrtc.device.video import LiveStreamVideoStreamTrack from teleoprtc import WebRTCAnswerBuilder - from teleoprtc.info import parse_info_from_offer - config = parse_info_from_offer(sdp) builder = WebRTCAnswerBuilder(sdp) - assert len(cameras) == config.n_expected_camera_tracks, "Incoming stream has misconfigured number of video tracks" - self.video_track = LiveStreamVideoStreamTrack(INITIAL_CAMERA) if not debug_mode else VideoStreamTrack() builder.add_video_stream(INITIAL_CAMERA, self.video_track) @@ -188,11 +184,6 @@ def message_handler(self, message: bytes): return if self.incoming_bridge is not None: - if msg_type == "liveStreamCamera": - camera = payload.get("data").get("camera") - if camera in ("driver", "wideRoad"): - if hasattr(self, 'video_track') and hasattr(self.video_track, 'switch_camera'): - self.video_track.switch_camera(camera) self.incoming_bridge.send(message) except ValueError: self.logger.warning("Ignoring malformed request: %s", payload) @@ -206,7 +197,7 @@ async def run(self): if self.incoming_bridge is not None: await self.shared_pub_master.add_services_if_needed(self.incoming_bridge_services) # set camera to default - self.incoming_bridge.send(json.dumps({"type": "liveStreamCamera", "data": {"camera": INITIAL_CAMERA}}).encode()) + self.incoming_bridge.send(json.dumps({"type": "livestreamCameraSwitch", "data": {"camera": INITIAL_CAMERA}}).encode()) self.stream.set_message_handler(self.message_handler) if self.outgoing_bridge_runner is not None: channel = self.stream.get_messaging_channel() From f07c1a445eda5ca96cfa8fbb4607b3ebfc871ac0 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:11:10 -0700 Subject: [PATCH 14/24] clean encoder stream thread --- system/loggerd/encoderd.cc | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/system/loggerd/encoderd.cc b/system/loggerd/encoderd.cc index 93b9494848db41..b00b7b66aed33a 100644 --- a/system/loggerd/encoderd.cc +++ b/system/loggerd/encoderd.cc @@ -153,14 +153,12 @@ void encoderd_thread(const LogCameraInfo (&cameras)[N]) { template void stream_encoderd_thread(const LogCameraInfo (&cameras)[N]) { - // Wait for camerad to publish at least one stream while (!do_exit) { if (!VisionIpcClient::getAvailableStreams("camerad", false).empty()) break; util::sleep_for(100); } SubMaster sm({"livestreamCameraSwitch"}); - const LogCameraInfo *active_cam = &cameras[0]; while (!do_exit) { @@ -170,36 +168,35 @@ void stream_encoderd_thread(const LogCameraInfo (&cameras)[N]) { continue; } + // init encoder const VisionBuf &buf_info = vipc_client.buffers[0]; LOGW("stream encoder init %zux%zu", buf_info.width, buf_info.height); assert(buf_info.width > 0 && buf_info.height > 0); - - // Each stream camera has exactly one encoder auto encoder = std::make_unique(active_cam->encoder_infos[0], buf_info.width, buf_info.height); encoder->encoder_open(); while (!do_exit) { sm.update(0); + + // Switch camera if the request differs from the current one if (sm.updated("livestreamCameraSwitch")) { auto requested = sm["livestreamCameraSwitch"].getLivestreamCameraSwitch().getCamera(); VisionStreamType requested_stream = requested == cereal::LiveStreamCamera::CameraType::DRIVER ? VISION_STREAM_DRIVER : VISION_STREAM_WIDE_ROAD; - // Switch camera if the request differs from the current one if (requested_stream != active_cam->stream_type) { LOGW("stream encoder switching camera"); auto it = std::find_if(std::begin(cameras), std::end(cameras), [requested_stream](const auto &cam) { return cam.stream_type == requested_stream; }); if (it != std::end(cameras)) active_cam = &(*it); - break; // Reinit encoder with the new camera + break; // reinit encoder with new camera selection } } + // encode frame VisionIpcBufExtra extra; VisionBuf *buf = vipc_client.recv(&extra); if (buf == nullptr) continue; - if (buf->get_frame_id() != extra.frame_id) continue; - if (encoder->encode_frame(buf, &extra) == -1) { LOGE("stream encoder: failed to encode frame. frame_id: %d", extra.frame_id); } From a2aab550f6b82d14b31dad796d6366a5ad1638a9 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:20:46 -0700 Subject: [PATCH 15/24] inline clock sync util --- system/webrtc/utils.py | 15 --------------- system/webrtc/webrtcd.py | 8 +++++--- 2 files changed, 5 insertions(+), 18 deletions(-) delete mode 100644 system/webrtc/utils.py diff --git a/system/webrtc/utils.py b/system/webrtc/utils.py deleted file mode 100644 index afee39e77c3ce7..00000000000000 --- a/system/webrtc/utils.py +++ /dev/null @@ -1,15 +0,0 @@ -import json - -def clock_sync_build_json(payload: dict) -> str | None: - import time as _time - data = payload.get("data", {}) - if data.get("action") != "ping": - raise ValueError - return json.dumps({ - "type": "clockSync", - "data": { - "action": "pong", - "browserSendTime": data.get("browserSendTime"), - "deviceTime": _time.time() * 1000, # noqa: TID251 - } - }) diff --git a/system/webrtc/webrtcd.py b/system/webrtc/webrtcd.py index 97f5056dad3df6..bcacf906981f2f 100755 --- a/system/webrtc/webrtcd.py +++ b/system/webrtc/webrtcd.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 - +import time import argparse import asyncio import json @@ -19,7 +19,6 @@ from aiortc.rtcdatachannel import RTCDataChannel from openpilot.system.webrtc.schema import generate_field -from openpilot.system.webrtc.utils import clock_sync_build_json from cereal import messaging, log from openpilot.common.params import Params @@ -173,7 +172,10 @@ def message_handler(self, message: bytes): msg_type = payload.get("type") if msg_type == "clockSync": - pong = clock_sync_build_json(payload) + data = payload.get("data", {}) + pong = json.dumps({"type": "clockSync", "data": { + "action": "pong", "browserSendTime": data.get("browserSendTime"), "deviceTime": time.time() * 1000, # noqa: TID251 + }}) self.stream.get_messaging_channel().send(pong) return From a18aedba20838b963504a5ba7b846d4d3c75fe30 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:23:23 -0700 Subject: [PATCH 16/24] update test --- system/webrtc/device/video.py | 1 - system/webrtc/tests/test_stream_session.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/system/webrtc/device/video.py b/system/webrtc/device/video.py index 85509cf7f25b0f..54f333b9689817 100644 --- a/system/webrtc/device/video.py +++ b/system/webrtc/device/video.py @@ -1,7 +1,6 @@ import asyncio import struct import time - import av from teleoprtc.tracks import TiciVideoStreamTrack diff --git a/system/webrtc/tests/test_stream_session.py b/system/webrtc/tests/test_stream_session.py index f44d217d58ced6..9730f9e16e3d07 100644 --- a/system/webrtc/tests/test_stream_session.py +++ b/system/webrtc/tests/test_stream_session.py @@ -67,7 +67,7 @@ def test_incoming_proxy(self, mocker): mocked_pubmaster.reset_mock() def test_livestream_track(self, mocker): - fake_msg = messaging.new_message("livestreamDriverEncodeData") + fake_msg = messaging.new_message("livestreamCameraEncodeData") config = {"receive.return_value": fake_msg.to_bytes()} mocker.patch("msgq.SubSocket", spec=True, **config) From ff0c2e36a613a82bf3979d875e8b9917e5611786 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:29:30 -0700 Subject: [PATCH 17/24] clean diff --- system/webrtc/webrtcd.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/system/webrtc/webrtcd.py b/system/webrtc/webrtcd.py index bcacf906981f2f..60036f9f0efa45 100755 --- a/system/webrtc/webrtcd.py +++ b/system/webrtc/webrtcd.py @@ -233,7 +233,7 @@ class StreamRequestBody: def _add_cors_headers(_, response: 'web.Response'): response.headers["Access-Control-Allow-Origin"] = "*" response.headers["Access-Control-Allow-Headers"] = "Content-Type" - response.headers["Access-Control-Allow-Methods"] = "POST, OPTIONS" + response.headers["Access-Control-Allow-Methods"] = "POST" response.headers["Access-Control-Allow-Private-Network"] = "true" @@ -330,23 +330,21 @@ async def on_shutdown(app: 'web.Application'): del app['streams'] -def create_app(debug: bool) -> web.Application: +def webrtcd_thread(host: str, port: int, debug: bool): + logging.basicConfig(level=logging.CRITICAL, handlers=[logging.StreamHandler()]) + logging_level = logging.DEBUG if debug else logging.INFO + logging.getLogger("WebRTCStream").setLevel(logging_level) + logging.getLogger("webrtcd").setLevel(logging_level) + app = web.Application(middlewares=[cors_middleware]) + app['streams'] = dict() app['debug'] = debug app.on_shutdown.append(on_shutdown) app.router.add_post("/stream", get_stream) app.router.add_post("/notify", post_notify) app.router.add_get("/schema", get_schema) - return app - -def webrtcd_thread(host: str, port: int, debug: bool): - logging.basicConfig(level=logging.CRITICAL, handlers=[logging.StreamHandler()]) - logging_level = logging.DEBUG if debug else logging.INFO - logging.getLogger("WebRTCStream").setLevel(logging_level) - logging.getLogger("webrtcd").setLevel(logging_level) - app = create_app(debug) web.run_app(app, host=host, port=port) From 3d35313845b1efd82ac8f60543fb4828f1479ce5 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:30:12 -0700 Subject: [PATCH 18/24] change gitmodules back to master teleoprtc --- .gitmodules | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index b01ab88806ef28..ad6530de9ac910 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,7 +13,6 @@ [submodule "teleoprtc_repo"] path = teleoprtc_repo url = ../../commaai/teleoprtc - branch = fix/datachannel-double-counting [submodule "tinygrad"] path = tinygrad_repo url = https://github.com/tinygrad/tinygrad.git From 43cef76fd08ab399500739bc1531b25d418e04cf Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Tue, 14 Apr 2026 14:46:03 -0700 Subject: [PATCH 19/24] don't start DM on body --- selfdrive/selfdrived/selfdrived.py | 2 ++ system/manager/process_config.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/selfdrive/selfdrived/selfdrived.py b/selfdrive/selfdrived/selfdrived.py index 6a294ca8d82786..21e79d1f7d765b 100755 --- a/selfdrive/selfdrived/selfdrived.py +++ b/selfdrive/selfdrived/selfdrived.py @@ -75,6 +75,8 @@ def __init__(self, CP=None): self.car_state_sock = messaging.sub_sock('carState', timeout=20) ignore = self.sensor_packets + self.gps_packets + ['alertDebug', 'lateralManeuverPlan'] + if self.CP.notCar: + ignore += ['driverMonitoringState'] if SIMULATION: ignore += ['driverCameraState', 'managerState'] if REPLAY: diff --git a/system/manager/process_config.py b/system/manager/process_config.py index 7e96b7776a4f5f..770c4b407fdae7 100644 --- a/system/manager/process_config.py +++ b/system/manager/process_config.py @@ -80,7 +80,7 @@ def and_(*fns): PythonProcess("timed", "system.timed", always_run, enabled=not PC), PythonProcess("modeld", "selfdrive.modeld.modeld", only_onroad), - PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", driverview, enabled=(WEBCAM or not PC)), + PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", and_(driverview, iscar), enabled=(WEBCAM or not PC)), PythonProcess("sensord", "system.sensord.sensord", only_onroad, enabled=not PC), PythonProcess("ui", "selfdrive.ui.ui", always_run, restart_if_crash=True), @@ -94,7 +94,7 @@ def and_(*fns): PythonProcess("selfdrived", "selfdrive.selfdrived.selfdrived", only_onroad), PythonProcess("card", "selfdrive.car.card", only_onroad), PythonProcess("deleter", "system.loggerd.deleter", always_run), - PythonProcess("dmonitoringd", "selfdrive.monitoring.dmonitoringd", driverview, enabled=(WEBCAM or not PC)), + PythonProcess("dmonitoringd", "selfdrive.monitoring.dmonitoringd", and_(driverview, iscar), enabled=(WEBCAM or not PC)), PythonProcess("qcomgpsd", "system.qcomgpsd.qcomgpsd", qcomgps, enabled=TICI), PythonProcess("pandad", "selfdrive.pandad.pandad", always_run), PythonProcess("paramsd", "selfdrive.locationd.paramsd", only_onroad), From cba8bd3da0b178c6b4243472ca60b821bcdf0a8a Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Wed, 15 Apr 2026 16:58:29 -0700 Subject: [PATCH 20/24] direct connection cors --- system/webrtc/webrtcd.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/system/webrtc/webrtcd.py b/system/webrtc/webrtcd.py index 60036f9f0efa45..8b1c119191247f 100755 --- a/system/webrtc/webrtcd.py +++ b/system/webrtc/webrtcd.py @@ -220,7 +220,6 @@ async def post_run_cleanup(self): await self.stream.stop() if self.outgoing_bridge is not None: self.outgoing_bridge_runner.stop() - Params().put_bool("JoystickDebugMode", False) @dataclass @@ -233,7 +232,7 @@ class StreamRequestBody: def _add_cors_headers(_, response: 'web.Response'): response.headers["Access-Control-Allow-Origin"] = "*" response.headers["Access-Control-Allow-Headers"] = "Content-Type" - response.headers["Access-Control-Allow-Methods"] = "POST" + response.headers["Access-Control-Allow-Methods"] = "POST, OPTIONS" response.headers["Access-Control-Allow-Private-Network"] = "true" @@ -248,6 +247,12 @@ async def cors_middleware(request: 'web.Request', handler): return response +async def stream_options(request: 'web.Request'): + response = web.Response() + _add_cors_headers(request, response) + return response + + def _validate_sdp_video_codecs(sdp: str): import aiortc.sdp desc = aiortc.sdp.SessionDescription.parse(sdp) @@ -286,7 +291,6 @@ async def get_stream(request: 'web.Request'): session = StreamSession(body.sdp, body.cameras, body.bridge_services_in, body.bridge_services_out, debug_mode) answer = await session.get_answer() session.start() - Params().put_bool("JoystickDebugMode", True) stream_dict[session.identifier] = session @@ -341,6 +345,7 @@ def webrtcd_thread(host: str, port: int, debug: bool): app['streams'] = dict() app['debug'] = debug app.on_shutdown.append(on_shutdown) + app.router.add_route("OPTIONS", "/stream", stream_options) app.router.add_post("/stream", get_stream) app.router.add_post("/notify", post_notify) app.router.add_get("/schema", get_schema) From 24033bdd9f1faba81803e94c85f4d6837d064381 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Wed, 15 Apr 2026 17:40:11 -0700 Subject: [PATCH 21/24] remove unused param --- system/webrtc/webrtcd.py | 1 - 1 file changed, 1 deletion(-) diff --git a/system/webrtc/webrtcd.py b/system/webrtc/webrtcd.py index 8b1c119191247f..05d6522638a947 100755 --- a/system/webrtc/webrtcd.py +++ b/system/webrtc/webrtcd.py @@ -20,7 +20,6 @@ from openpilot.system.webrtc.schema import generate_field from cereal import messaging, log -from openpilot.common.params import Params INITIAL_CAMERA = "driver" REQUIRED_VIDEO_CODEC = "H264" From c5c6c1fdeddb6f842a0b32b906f7854d0055fcd6 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Sat, 18 Apr 2026 12:31:04 -0700 Subject: [PATCH 22/24] move ignore dmonitoring to seperate PR --- selfdrive/selfdrived/selfdrived.py | 2 -- system/manager/process_config.py | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/selfdrive/selfdrived/selfdrived.py b/selfdrive/selfdrived/selfdrived.py index 046f54744a9f0b..dbc3174af74e6e 100755 --- a/selfdrive/selfdrived/selfdrived.py +++ b/selfdrive/selfdrived/selfdrived.py @@ -75,8 +75,6 @@ def __init__(self, CP=None): self.car_state_sock = messaging.sub_sock('carState', timeout=20) ignore = self.sensor_packets + self.gps_packets + ['alertDebug', 'lateralManeuverPlan'] - if self.CP.notCar: - ignore += ['driverMonitoringState'] if SIMULATION: ignore += ['driverCameraState', 'managerState'] if REPLAY: diff --git a/system/manager/process_config.py b/system/manager/process_config.py index 770c4b407fdae7..7e96b7776a4f5f 100644 --- a/system/manager/process_config.py +++ b/system/manager/process_config.py @@ -80,7 +80,7 @@ def and_(*fns): PythonProcess("timed", "system.timed", always_run, enabled=not PC), PythonProcess("modeld", "selfdrive.modeld.modeld", only_onroad), - PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", and_(driverview, iscar), enabled=(WEBCAM or not PC)), + PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", driverview, enabled=(WEBCAM or not PC)), PythonProcess("sensord", "system.sensord.sensord", only_onroad, enabled=not PC), PythonProcess("ui", "selfdrive.ui.ui", always_run, restart_if_crash=True), @@ -94,7 +94,7 @@ def and_(*fns): PythonProcess("selfdrived", "selfdrive.selfdrived.selfdrived", only_onroad), PythonProcess("card", "selfdrive.car.card", only_onroad), PythonProcess("deleter", "system.loggerd.deleter", always_run), - PythonProcess("dmonitoringd", "selfdrive.monitoring.dmonitoringd", and_(driverview, iscar), enabled=(WEBCAM or not PC)), + PythonProcess("dmonitoringd", "selfdrive.monitoring.dmonitoringd", driverview, enabled=(WEBCAM or not PC)), PythonProcess("qcomgpsd", "system.qcomgpsd.qcomgpsd", qcomgps, enabled=TICI), PythonProcess("pandad", "selfdrive.pandad.pandad", always_run), PythonProcess("paramsd", "selfdrive.locationd.paramsd", only_onroad), From 0945c973dd178177d627477a92b7bcc297e442c0 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Sat, 18 Apr 2026 12:35:23 -0700 Subject: [PATCH 23/24] update log.capnp numbers --- cereal/log.capnp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cereal/log.capnp b/cereal/log.capnp index 0aedab18bedf1f..2fbb39c71aaf49 100644 --- a/cereal/log.capnp +++ b/cereal/log.capnp @@ -2556,10 +2556,10 @@ struct Event { livestreamWideRoadEncodeData @121 :EncodeData; livestreamDriverEncodeData @122 :EncodeData; - livestreamCameraEncodeData @151 :EncodeData; - livestreamCameraSwitch @152 :LiveStreamCamera; + livestreamCameraEncodeData @152 :EncodeData; + livestreamCameraSwitch @153 :LiveStreamCamera; - soundRequest @153 :SoundRequest; + soundRequest @154 :SoundRequest; # *********** Custom: reserved for forks *********** From 1f12b9402e28913c366ef90f196976aa9a94aba8 Mon Sep 17 00:00:00 2001 From: stefpi <19478336+stefpi@users.noreply.github.com> Date: Mon, 11 May 2026 11:38:26 -0700 Subject: [PATCH 24/24] better stream shutdown + replace to resolve commIssues --- system/webrtc/webrtcd.py | 73 +++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/system/webrtc/webrtcd.py b/system/webrtc/webrtcd.py index 05d6522638a947..0b64d579384858 100755 --- a/system/webrtc/webrtcd.py +++ b/system/webrtc/webrtcd.py @@ -2,6 +2,7 @@ import time import argparse import asyncio +import contextlib import json import logging import uuid @@ -85,11 +86,16 @@ def start(self): assert self.task is None self.task = asyncio.create_task(self.run()) - def stop(self): - if self.task is None or self.task.done(): + async def stop(self): + if self.task is None: return - self.task.cancel() + task = self.task self.task = None + if task.done(): + return + task.cancel() + with contextlib.suppress(asyncio.CancelledError): + await task async def run(self): from aiortc.exceptions import InvalidStateError @@ -144,6 +150,8 @@ def __init__(self, sdp: str, cameras: list[str], incoming_services: list[str], o self.outgoing_bridge_runner = CerealProxyRunner(self.outgoing_bridge) self.run_task: asyncio.Task | None = None + self._cleanup_lock = asyncio.Lock() + self._cleanup_done = False self.logger = logging.getLogger("webrtcd") self.logger.info( "New stream session (%s), cameras %s, incoming services %s, outgoing services %s", @@ -153,12 +161,13 @@ def __init__(self, sdp: str, cameras: list[str], incoming_services: list[str], o def start(self): self.run_task = asyncio.create_task(self.run()) - def stop(self): - if self.run_task is None or self.run_task.done(): - return - self.run_task.cancel() + async def stop(self): + if self.run_task is not None and not self.run_task.done() and self.run_task is not asyncio.current_task(): + self.run_task.cancel() + with contextlib.suppress(asyncio.CancelledError): + await self.run_task self.run_task = None - asyncio.get_running_loop().create_task(self.post_run_cleanup()) + await self.post_run_cleanup() async def get_answer(self): return await self.stream.start() @@ -216,9 +225,13 @@ async def run(self): await self.post_run_cleanup() async def post_run_cleanup(self): - await self.stream.stop() - if self.outgoing_bridge is not None: - self.outgoing_bridge_runner.stop() + async with self._cleanup_lock: + if self._cleanup_done: + return + self._cleanup_done = True + if self.outgoing_bridge_runner is not None: + await self.outgoing_bridge_runner.stop() + await self.stream.stop() @dataclass @@ -272,26 +285,31 @@ async def get_stream(request: 'web.Request'): try: stream_dict, debug_mode = request.app['streams'], request.app['debug'] - # disconnect any other active stream - for sid, s in list(stream_dict.items()): - if s.run_task and not s.run_task.done(): - try: - ch = s.stream.get_messaging_channel() - ch.send(json.dumps({"type": "connectionReplaced", "data": "Another device has connected, closing this session."})) - except Exception: - pass - s.stop() - del stream_dict[sid] - raw_body = await request.json() body = StreamRequestBody(**raw_body) _validate_sdp_video_codecs(body.sdp) - session = StreamSession(body.sdp, body.cameras, body.bridge_services_in, body.bridge_services_out, debug_mode) - answer = await session.get_answer() - session.start() + async with request.app['stream_lock']: + # Fully disconnect any other active stream before starting the replacement. + for sid, s in list(stream_dict.items()): + if s.run_task and not s.run_task.done(): + try: + ch = s.stream.get_messaging_channel() + ch.send(json.dumps({"type": "connectionReplaced", "data": "Another device has connected, closing this session."})) + except Exception: + pass + await s.stop() + del stream_dict[sid] + + session = StreamSession(body.sdp, body.cameras, body.bridge_services_in, body.bridge_services_out, debug_mode) + try: + answer = await session.get_answer() + except Exception: + await session.stop() + raise + session.start() - stream_dict[session.identifier] = session + stream_dict[session.identifier] = session response = web.json_response({"sdp": answer.sdp, "type": answer.type}) _add_cors_headers(request, response) @@ -329,7 +347,7 @@ async def post_notify(request: 'web.Request'): async def on_shutdown(app: 'web.Application'): for session in app['streams'].values(): - session.stop() + await session.stop() del app['streams'] @@ -342,6 +360,7 @@ def webrtcd_thread(host: str, port: int, debug: bool): app = web.Application(middlewares=[cors_middleware]) app['streams'] = dict() + app['stream_lock'] = asyncio.Lock() app['debug'] = debug app.on_shutdown.append(on_shutdown) app.router.add_route("OPTIONS", "/stream", stream_options)