diff --git a/alioth/src/board/board.rs b/alioth/src/board/board.rs index 99c399ec..40c761e6 100644 --- a/alioth/src/board/board.rs +++ b/alioth/src/board/board.rs @@ -165,7 +165,7 @@ impl CpuConfig { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BoardState { - Created, + Paused, Running, Shutdown, RebootPending, @@ -249,7 +249,7 @@ where vfio_containers: Mutex::new(HashMap::new()), mp_sync: Mutex::new(MpSync { - state: BoardState::Created, + state: BoardState::Paused, count: 0, fatal: false, }), @@ -258,13 +258,17 @@ where } pub fn boot(&self) -> Result<()> { + self.resume() + } + + pub fn resume(&self) -> Result<()> { let mut mp_sync = self.mp_sync.lock(); - if mp_sync.state == BoardState::Created { + if mp_sync.state == BoardState::Paused { mp_sync.state = BoardState::Running; } else { return error::UnexpectedState { state: mp_sync.state, - want: BoardState::Created, + want: BoardState::Paused, } .fail(); } @@ -272,6 +276,21 @@ where Ok(()) } + pub fn pause(&self) -> Result<()> { + let vcpus = self.vcpus.read(); + let mut mp_sync = self.mp_sync.lock(); + if mp_sync.state != BoardState::Running { + return error::UnexpectedState { + state: mp_sync.state, + want: BoardState::Running, + } + .fail(); + } + mp_sync.state = BoardState::Paused; + self.stop_other_vcpus(None, &vcpus)?; + Ok(()) + } + fn load_payload(&self) -> Result { let payload = self.payload.read(); let Some(payload) = payload.as_ref() else { @@ -333,7 +352,7 @@ where Ok(()) } - fn vcpu_loop(&self, vcpu: &mut ::Vcpu, index: u16) -> Result { + fn vcpu_loop(&self, vcpu: &mut ::Vcpu, index: u16) -> Result { let mut vm_entry = VmEntry::None; loop { let vm_exit = vcpu.run(vm_entry).context(error::RunVcpu { index })?; @@ -341,19 +360,16 @@ where #[cfg(target_arch = "x86_64")] VmExit::Io { port, write, size } => self.memory.handle_io(port, write, size)?, VmExit::Mmio { addr, write, size } => self.memory.handle_mmio(addr, write, size)?, - VmExit::Shutdown => { - log::info!("VCPU-{index} requested shutdown"); - break Ok(false); - } - VmExit::Reboot => { - break Ok(true); - } + VmExit::Shutdown => break Ok(BoardState::Shutdown), + VmExit::Reboot => break Ok(BoardState::RebootPending), + VmExit::Paused => break Ok(BoardState::Paused), VmExit::Interrupted => { let mp_sync = self.mp_sync.lock(); match mp_sync.state { BoardState::Shutdown => VmEntry::Shutdown, BoardState::RebootPending => VmEntry::Reboot, - _ => VmEntry::None, + BoardState::Paused => VmEntry::Pause, + BoardState::Running => VmEntry::None, } } VmExit::ConvertMemory { gpa, size, private } => { @@ -393,66 +409,88 @@ where } } - fn run_vcpu_inner(&self, index: u16, event_tx: &Sender) -> Result<(), Error> { - let mut vcpu = self.create_vcpu(index)?; - self.notify_vmm(index, event_tx)?; - self.init_vcpu(index, &mut vcpu)?; - - let mut mp_sync = self.mp_sync.lock(); - while mp_sync.state == BoardState::Created { - self.cond_var.wait(&mut mp_sync); - } - if mp_sync.state != BoardState::Running { - return Ok(()); + fn boot_init_sync(&self, index: u16, vcpu: &mut V::Vcpu) -> Result<()> { + let vcpus = self.vcpus.read(); + self.coco_init(index)?; + if index == 0 { + self.create_ram()?; + for (port, dev) in self.io_devs.read().iter() { + self.memory.add_io_dev(*port, dev.clone())?; + } + #[cfg(target_arch = "aarch64")] + for (addr, dev) in self.mmio_devs.read().iter() { + self.memory.add_region(*addr, dev.clone())?; + } + self.add_pci_devs()?; + let init_state = self.load_payload()?; + self.init_boot_vcpu(vcpu, &init_state)?; + self.create_firmware_data(&init_state)?; } - drop(mp_sync); + self.init_ap(index, vcpu, &vcpus)?; + self.coco_finalize(index, &vcpus)?; + self.sync_vcpus(&vcpus) + } - loop { - let vcpus = self.vcpus.read(); - self.coco_init(index)?; - if index == 0 { - self.create_ram()?; - for (port, dev) in self.io_devs.read().iter() { - self.memory.add_io_dev(*port, dev.clone())?; - } - #[cfg(target_arch = "aarch64")] - for (addr, dev) in self.mmio_devs.read().iter() { - self.memory.add_region(*addr, dev.clone())?; + fn stop_other_vcpus(&self, current: Option, vcpus: &VcpuGuard) -> Result<()> { + for (index, handle) in vcpus.iter().enumerate() { + let index = index as u16; + if let Some(current) = current { + if current == index { + continue; } - self.add_pci_devs()?; - let init_state = self.load_payload()?; - self.init_boot_vcpu(&mut vcpu, &init_state)?; - self.create_firmware_data(&init_state)?; + log::info!("VCPU-{current}: stopping VCPU-{index}"); + } else { + log::info!("Stopping VCPU-{index}"); } - self.init_ap(index, &mut vcpu, &vcpus)?; - self.coco_finalize(index, &vcpus)?; - self.sync_vcpus(&vcpus)?; - drop(vcpus); + let identity = self.encode_cpu_identity(index); + self.vm + .stop_vcpu(identity, handle) + .context(error::StopVcpu { index })?; + } + Ok(()) + } - let maybe_reboot = self.vcpu_loop(&mut vcpu, index); + fn run_vcpu_inner(&self, index: u16, event_tx: &Sender) -> Result<(), Error> { + let mut vcpu = self.create_vcpu(index)?; + self.notify_vmm(index, event_tx)?; + self.init_vcpu(index, &mut vcpu)?; - let vcpus = self.vcpus.read(); + 'reboot: loop { let mut mp_sync = self.mp_sync.lock(); - if mp_sync.state == BoardState::Running { - mp_sync.state = if matches!(maybe_reboot, Ok(true)) { - BoardState::RebootPending - } else { - BoardState::Shutdown - }; - for (another, handle) in vcpus.iter().enumerate() { - if index == another as u16 { - continue; - } - log::info!("VCPU-{index}: stopping VCPU-{another}"); - self.vm - .stop_vcpu(self.encode_cpu_identity(another as u16), handle) - .context(error::StopVcpu { - index: another as u16, - })?; + loop { + match mp_sync.state { + BoardState::Paused => self.cond_var.wait(&mut mp_sync), + BoardState::Running => break, + BoardState::Shutdown => break 'reboot Ok(()), + BoardState::RebootPending => mp_sync.state = BoardState::Running, } } drop(mp_sync); - self.sync_vcpus(&vcpus)?; + + self.boot_init_sync(index, &mut vcpu)?; + + let request = 'pause: loop { + let request = self.vcpu_loop(&mut vcpu, index); + + let vcpus = self.vcpus.read(); + let mut mp_sync = self.mp_sync.lock(); + if mp_sync.state == BoardState::Running { + mp_sync.state = match request { + Ok(BoardState::RebootPending) => BoardState::RebootPending, + Ok(BoardState::Paused) => BoardState::Paused, + _ => BoardState::Shutdown, + }; + log::trace!("VCPU-{index}: change state to {:?}", mp_sync.state); + self.stop_other_vcpus(Some(index), &vcpus)?; + } + loop { + match mp_sync.state { + BoardState::Running => break, + BoardState::Paused => self.cond_var.wait(&mut mp_sync), + BoardState::RebootPending | BoardState::Shutdown => break 'pause request, + } + } + }; if index == 0 { self.pci_bus.segment.reset().context(error::ResetPci)?; @@ -460,15 +498,10 @@ where } self.reset_vcpu(index, &mut vcpu)?; - if let Err(e) = maybe_reboot { - break Err(e); - } + request?; - let mut mp_sync = self.mp_sync.lock(); - if mp_sync.state == BoardState::Shutdown { - break Ok(()); - } - mp_sync.state = BoardState::Running; + let vcpus = self.vcpus.read(); + self.sync_vcpus(&vcpus)?; } } @@ -490,7 +523,7 @@ where return Ok(()); } - log::warn!("VCPU-{index} reported error, unblocking other VCPUs..."); + log::warn!("VCPU-{index} reported error {ret:?}, unblocking other VCPUs..."); let mut mp_sync = self.mp_sync.lock(); mp_sync.fatal = true; if mp_sync.count > 0 { diff --git a/alioth/src/hv/hv.rs b/alioth/src/hv/hv.rs index 104ea1ff..a0d79a4a 100644 --- a/alioth/src/hv/hv.rs +++ b/alioth/src/hv/hv.rs @@ -420,12 +420,14 @@ pub enum VmExit { }, Shutdown, Reboot, + Paused, Interrupted, } #[derive(Debug, Clone, PartialEq, Eq)] pub enum VmEntry { None, + Pause, Shutdown, Reboot, #[cfg(target_arch = "x86_64")] diff --git a/alioth/src/hv/hvf/vcpu/vcpu.rs b/alioth/src/hv/hvf/vcpu/vcpu.rs index 2c36bf7b..a4ef7c61 100644 --- a/alioth/src/hv/hvf/vcpu/vcpu.rs +++ b/alioth/src/hv/hvf/vcpu/vcpu.rs @@ -164,6 +164,7 @@ impl Vcpu for HvfVcpu { VmEntry::Mmio { data } => self.entry_mmio(data)?, VmEntry::Shutdown => return Ok(VmExit::Shutdown), VmEntry::Reboot => return Ok(VmExit::Reboot), + VmEntry::Pause => return Ok(VmExit::Paused), } if !self.power_on.load(Ordering::Relaxed) { diff --git a/alioth/src/hv/kvm/kvm.rs b/alioth/src/hv/kvm/kvm.rs index 97f5db65..55fdb0dc 100644 --- a/alioth/src/hv/kvm/kvm.rs +++ b/alioth/src/hv/kvm/kvm.rs @@ -92,6 +92,8 @@ pub enum KvmError { #[cfg(target_arch = "aarch64")] #[snafu(display("Failed to configure device attributes"))] DeviceAttr { error: std::io::Error }, + #[snafu(display("Failed to configure kvmclock"))] + KvmClockCtrl { error: std::io::Error }, } #[derive(Debug)] diff --git a/alioth/src/hv/kvm/vcpu/vcpu.rs b/alioth/src/hv/kvm/vcpu/vcpu.rs index eacf8c2d..47669904 100644 --- a/alioth/src/hv/kvm/vcpu/vcpu.rs +++ b/alioth/src/hv/kvm/vcpu/vcpu.rs @@ -179,7 +179,7 @@ impl Vcpu for KvmVcpu { } } VmEntry::Mmio { data } => self.entry_mmio(data), - VmEntry::Shutdown | VmEntry::Reboot => self.set_immediate_exit(true), + VmEntry::Shutdown | VmEntry::Reboot | VmEntry::Pause => self.set_immediate_exit(true), }; let ret = unsafe { kvm_run(&self.fd) }; match ret { @@ -193,6 +193,14 @@ impl Vcpu for KvmVcpu { self.set_immediate_exit(false); Ok(VmExit::Reboot) } + (ErrorKind::Interrupted, VmEntry::Pause) => { + #[cfg(target_arch = "x86_64")] + if let Err(e) = self.kvmclock_ctrl() { + log::error!("Failed to control kvmclock: {e:?}"); + } + self.set_immediate_exit(false); + Ok(VmExit::Paused) + } (ErrorKind::Interrupted, _) => Ok(VmExit::Interrupted), _ => Err(e).context(error::RunVcpu), }, diff --git a/alioth/src/hv/kvm/vcpu/vcpu_x86_64.rs b/alioth/src/hv/kvm/vcpu/vcpu_x86_64.rs index c3406aa7..b4bbd3f3 100644 --- a/alioth/src/hv/kvm/vcpu/vcpu_x86_64.rs +++ b/alioth/src/hv/kvm/vcpu/vcpu_x86_64.rs @@ -27,8 +27,8 @@ use crate::hv::kvm::vm::KvmVm; use crate::hv::{Error, Result, error}; use crate::sys::kvm::{ KVM_MAX_CPUID_ENTRIES, KvmCpuid2, KvmCpuid2Flag, KvmCpuidEntry2, KvmMsrEntry, KvmMsrs, KvmRegs, - MAX_IO_MSRS, kvm_create_vcpu, kvm_get_regs, kvm_get_sregs, kvm_get_sregs2, kvm_set_cpuid2, - kvm_set_msrs, kvm_set_regs, kvm_set_sregs, kvm_set_sregs2, + MAX_IO_MSRS, kvm_create_vcpu, kvm_get_regs, kvm_get_sregs, kvm_get_sregs2, kvm_kvmclock_ctrl, + kvm_set_cpuid2, kvm_set_msrs, kvm_set_regs, kvm_set_sregs, kvm_set_sregs2, }; #[derive(Debug)] @@ -158,6 +158,11 @@ impl KvmVcpu { Ok(unsafe { OwnedFd::from_raw_fd(fd) }) } + pub fn kvmclock_ctrl(&mut self) -> Result<()> { + unsafe { kvm_kvmclock_ctrl(&self.fd) }.context(kvm_error::KvmClockCtrl)?; + Ok(()) + } + fn get_kvm_regs(&self) -> Result { let kvm_regs = unsafe { kvm_get_regs(&self.fd) }.context(error::VcpuReg)?; Ok(kvm_regs)