Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 105 additions & 72 deletions alioth/src/board/board.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ impl CpuConfig {

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BoardState {
Created,
Paused,
Running,
Shutdown,
RebootPending,
Expand Down Expand Up @@ -249,7 +249,7 @@ where
vfio_containers: Mutex::new(HashMap::new()),

mp_sync: Mutex::new(MpSync {
state: BoardState::Created,
state: BoardState::Paused,
count: 0,
fatal: false,
}),
Expand All @@ -258,20 +258,39 @@ where
}

pub fn boot(&self) -> Result<()> {
self.resume()
}

pub fn resume(&self) -> Result<()> {
let mut mp_sync = self.mp_sync.lock();
if mp_sync.state == BoardState::Created {
if mp_sync.state == BoardState::Paused {
mp_sync.state = BoardState::Running;
} else {
return error::UnexpectedState {
state: mp_sync.state,
want: BoardState::Created,
want: BoardState::Paused,
}
.fail();
}
self.cond_var.notify_all();
Ok(())
}

pub fn pause(&self) -> Result<()> {
let vcpus = self.vcpus.read();
let mut mp_sync = self.mp_sync.lock();
if mp_sync.state != BoardState::Running {
return error::UnexpectedState {
state: mp_sync.state,
want: BoardState::Running,
}
.fail();
}
mp_sync.state = BoardState::Paused;
self.stop_other_vcpus(None, &vcpus)?;
Ok(())
}

fn load_payload(&self) -> Result<InitState, Error> {
let payload = self.payload.read();
let Some(payload) = payload.as_ref() else {
Expand Down Expand Up @@ -333,27 +352,24 @@ where
Ok(())
}

fn vcpu_loop(&self, vcpu: &mut <V as Vm>::Vcpu, index: u16) -> Result<bool, Error> {
fn vcpu_loop(&self, vcpu: &mut <V as Vm>::Vcpu, index: u16) -> Result<BoardState> {
let mut vm_entry = VmEntry::None;
loop {
let vm_exit = vcpu.run(vm_entry).context(error::RunVcpu { index })?;
vm_entry = match vm_exit {
#[cfg(target_arch = "x86_64")]
VmExit::Io { port, write, size } => self.memory.handle_io(port, write, size)?,
VmExit::Mmio { addr, write, size } => self.memory.handle_mmio(addr, write, size)?,
VmExit::Shutdown => {
log::info!("VCPU-{index} requested shutdown");
break Ok(false);
}
VmExit::Reboot => {
break Ok(true);
}
VmExit::Shutdown => break Ok(BoardState::Shutdown),
VmExit::Reboot => break Ok(BoardState::RebootPending),
VmExit::Paused => break Ok(BoardState::Paused),
VmExit::Interrupted => {
let mp_sync = self.mp_sync.lock();
match mp_sync.state {
BoardState::Shutdown => VmEntry::Shutdown,
BoardState::RebootPending => VmEntry::Reboot,
_ => VmEntry::None,
BoardState::Paused => VmEntry::Pause,
BoardState::Running => VmEntry::None,
}
}
VmExit::ConvertMemory { gpa, size, private } => {
Expand Down Expand Up @@ -393,82 +409,99 @@ where
}
}

fn run_vcpu_inner(&self, index: u16, event_tx: &Sender<u16>) -> Result<(), Error> {
let mut vcpu = self.create_vcpu(index)?;
self.notify_vmm(index, event_tx)?;
self.init_vcpu(index, &mut vcpu)?;

let mut mp_sync = self.mp_sync.lock();
while mp_sync.state == BoardState::Created {
self.cond_var.wait(&mut mp_sync);
}
if mp_sync.state != BoardState::Running {
return Ok(());
fn boot_init_sync(&self, index: u16, vcpu: &mut V::Vcpu) -> Result<()> {
let vcpus = self.vcpus.read();
self.coco_init(index)?;
if index == 0 {
self.create_ram()?;
for (port, dev) in self.io_devs.read().iter() {
self.memory.add_io_dev(*port, dev.clone())?;
}
#[cfg(target_arch = "aarch64")]
for (addr, dev) in self.mmio_devs.read().iter() {
self.memory.add_region(*addr, dev.clone())?;
}
self.add_pci_devs()?;
let init_state = self.load_payload()?;
self.init_boot_vcpu(vcpu, &init_state)?;
self.create_firmware_data(&init_state)?;
}
drop(mp_sync);
self.init_ap(index, vcpu, &vcpus)?;
self.coco_finalize(index, &vcpus)?;
self.sync_vcpus(&vcpus)
}

loop {
let vcpus = self.vcpus.read();
self.coco_init(index)?;
if index == 0 {
self.create_ram()?;
for (port, dev) in self.io_devs.read().iter() {
self.memory.add_io_dev(*port, dev.clone())?;
}
#[cfg(target_arch = "aarch64")]
for (addr, dev) in self.mmio_devs.read().iter() {
self.memory.add_region(*addr, dev.clone())?;
fn stop_other_vcpus(&self, current: Option<u16>, vcpus: &VcpuGuard) -> Result<()> {
for (index, handle) in vcpus.iter().enumerate() {
let index = index as u16;
if let Some(current) = current {
if current == index {
continue;
}
self.add_pci_devs()?;
let init_state = self.load_payload()?;
self.init_boot_vcpu(&mut vcpu, &init_state)?;
self.create_firmware_data(&init_state)?;
log::info!("VCPU-{current}: stopping VCPU-{index}");
} else {
log::info!("Stopping VCPU-{index}");
}
self.init_ap(index, &mut vcpu, &vcpus)?;
self.coco_finalize(index, &vcpus)?;
self.sync_vcpus(&vcpus)?;
drop(vcpus);
let identity = self.encode_cpu_identity(index);
self.vm
.stop_vcpu(identity, handle)
.context(error::StopVcpu { index })?;
}
Ok(())
}

let maybe_reboot = self.vcpu_loop(&mut vcpu, index);
fn run_vcpu_inner(&self, index: u16, event_tx: &Sender<u16>) -> Result<(), Error> {
let mut vcpu = self.create_vcpu(index)?;
self.notify_vmm(index, event_tx)?;
self.init_vcpu(index, &mut vcpu)?;

let vcpus = self.vcpus.read();
'reboot: loop {
let mut mp_sync = self.mp_sync.lock();
if mp_sync.state == BoardState::Running {
mp_sync.state = if matches!(maybe_reboot, Ok(true)) {
BoardState::RebootPending
} else {
BoardState::Shutdown
};
for (another, handle) in vcpus.iter().enumerate() {
if index == another as u16 {
continue;
}
log::info!("VCPU-{index}: stopping VCPU-{another}");
self.vm
.stop_vcpu(self.encode_cpu_identity(another as u16), handle)
.context(error::StopVcpu {
index: another as u16,
})?;
loop {
match mp_sync.state {
BoardState::Paused => self.cond_var.wait(&mut mp_sync),
BoardState::Running => break,
BoardState::Shutdown => break 'reboot Ok(()),
BoardState::RebootPending => mp_sync.state = BoardState::Running,
}
}
drop(mp_sync);
self.sync_vcpus(&vcpus)?;

self.boot_init_sync(index, &mut vcpu)?;

let request = 'pause: loop {
let request = self.vcpu_loop(&mut vcpu, index);

let vcpus = self.vcpus.read();
let mut mp_sync = self.mp_sync.lock();
if mp_sync.state == BoardState::Running {
mp_sync.state = match request {
Ok(BoardState::RebootPending) => BoardState::RebootPending,
Ok(BoardState::Paused) => BoardState::Paused,
_ => BoardState::Shutdown,
};
log::trace!("VCPU-{index}: change state to {:?}", mp_sync.state);
self.stop_other_vcpus(Some(index), &vcpus)?;
}
loop {
match mp_sync.state {
BoardState::Running => break,
BoardState::Paused => self.cond_var.wait(&mut mp_sync),
BoardState::RebootPending | BoardState::Shutdown => break 'pause request,
}
}
};

if index == 0 {
self.pci_bus.segment.reset().context(error::ResetPci)?;
self.memory.reset()?;
}
self.reset_vcpu(index, &mut vcpu)?;

if let Err(e) = maybe_reboot {
break Err(e);
}
request?;

let mut mp_sync = self.mp_sync.lock();
if mp_sync.state == BoardState::Shutdown {
break Ok(());
}
mp_sync.state = BoardState::Running;
let vcpus = self.vcpus.read();
self.sync_vcpus(&vcpus)?;
}
}

Expand All @@ -490,7 +523,7 @@ where
return Ok(());
}

log::warn!("VCPU-{index} reported error, unblocking other VCPUs...");
log::warn!("VCPU-{index} reported error {ret:?}, unblocking other VCPUs...");
let mut mp_sync = self.mp_sync.lock();
mp_sync.fatal = true;
if mp_sync.count > 0 {
Expand Down
2 changes: 2 additions & 0 deletions alioth/src/hv/hv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,12 +420,14 @@ pub enum VmExit {
},
Shutdown,
Reboot,
Paused,
Interrupted,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum VmEntry {
None,
Pause,
Shutdown,
Reboot,
#[cfg(target_arch = "x86_64")]
Expand Down
1 change: 1 addition & 0 deletions alioth/src/hv/hvf/vcpu/vcpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ impl Vcpu for HvfVcpu {
VmEntry::Mmio { data } => self.entry_mmio(data)?,
VmEntry::Shutdown => return Ok(VmExit::Shutdown),
VmEntry::Reboot => return Ok(VmExit::Reboot),
VmEntry::Pause => return Ok(VmExit::Paused),
}

if !self.power_on.load(Ordering::Relaxed) {
Expand Down
2 changes: 2 additions & 0 deletions alioth/src/hv/kvm/kvm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ pub enum KvmError {
#[cfg(target_arch = "aarch64")]
#[snafu(display("Failed to configure device attributes"))]
DeviceAttr { error: std::io::Error },
#[snafu(display("Failed to configure kvmclock"))]
KvmClockCtrl { error: std::io::Error },
}

#[derive(Debug)]
Expand Down
10 changes: 9 additions & 1 deletion alioth/src/hv/kvm/vcpu/vcpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ impl Vcpu for KvmVcpu {
}
}
VmEntry::Mmio { data } => self.entry_mmio(data),
VmEntry::Shutdown | VmEntry::Reboot => self.set_immediate_exit(true),
VmEntry::Shutdown | VmEntry::Reboot | VmEntry::Pause => self.set_immediate_exit(true),
};
let ret = unsafe { kvm_run(&self.fd) };
match ret {
Expand All @@ -193,6 +193,14 @@ impl Vcpu for KvmVcpu {
self.set_immediate_exit(false);
Ok(VmExit::Reboot)
}
(ErrorKind::Interrupted, VmEntry::Pause) => {
#[cfg(target_arch = "x86_64")]
if let Err(e) = self.kvmclock_ctrl() {
log::error!("Failed to control kvmclock: {e:?}");
}
self.set_immediate_exit(false);
Ok(VmExit::Paused)
}
(ErrorKind::Interrupted, _) => Ok(VmExit::Interrupted),
_ => Err(e).context(error::RunVcpu),
},
Expand Down
9 changes: 7 additions & 2 deletions alioth/src/hv/kvm/vcpu/vcpu_x86_64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ use crate::hv::kvm::vm::KvmVm;
use crate::hv::{Error, Result, error};
use crate::sys::kvm::{
KVM_MAX_CPUID_ENTRIES, KvmCpuid2, KvmCpuid2Flag, KvmCpuidEntry2, KvmMsrEntry, KvmMsrs, KvmRegs,
MAX_IO_MSRS, kvm_create_vcpu, kvm_get_regs, kvm_get_sregs, kvm_get_sregs2, kvm_set_cpuid2,
kvm_set_msrs, kvm_set_regs, kvm_set_sregs, kvm_set_sregs2,
MAX_IO_MSRS, kvm_create_vcpu, kvm_get_regs, kvm_get_sregs, kvm_get_sregs2, kvm_kvmclock_ctrl,
kvm_set_cpuid2, kvm_set_msrs, kvm_set_regs, kvm_set_sregs, kvm_set_sregs2,
};

#[derive(Debug)]
Expand Down Expand Up @@ -158,6 +158,11 @@ impl KvmVcpu {
Ok(unsafe { OwnedFd::from_raw_fd(fd) })
}

pub fn kvmclock_ctrl(&mut self) -> Result<()> {
unsafe { kvm_kvmclock_ctrl(&self.fd) }.context(kvm_error::KvmClockCtrl)?;
Ok(())
}

fn get_kvm_regs(&self) -> Result<KvmRegs> {
let kvm_regs = unsafe { kvm_get_regs(&self.fd) }.context(error::VcpuReg)?;
Ok(kvm_regs)
Expand Down