diff --git a/init/init.c b/init/init.c index 2f1e74af5..170d7104c 100644 --- a/init/init.c +++ b/init/init.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #define KRUN_EXIT_CODE_IOCTL 0x7602 #define KRUN_REMOVE_ROOT_DIR_IOCTL 0x7603 +#define KRUN_EXIT_VM_IOCTL 0x7604 #define KRUN_MAGIC "KRUN" #define KRUN_FOOTER_LEN 12 @@ -1126,6 +1128,41 @@ void set_exit_code(int code) close(fd); } +static void request_vm_exit(void) +{ + int fd; + int ret; + int virtiofs_check; + + virtiofs_check = is_virtiofs("/"); + if (virtiofs_check < 0) { + printf("Warning: Could not determine filesystem type for root\n"); + } + + if (virtiofs_check == 0) { + return; + } + + fd = open("/", O_RDONLY); + if (fd < 0) { + perror("Couldn't open root filesystem to request VM exit"); + return; + } + + ret = ioctl(fd, KRUN_EXIT_VM_IOCTL, 0); + if (ret < 0) { + perror("Error using the ioctl to request VM exit"); + } + + close(fd); +} + +static void shutdown_vm(void) +{ + sync(); + request_vm_exit(); +} + #if __linux__ int try_mount(const char *source, const char *target, const char *fstype, unsigned long mountflags, const void *data) @@ -1413,13 +1450,12 @@ int main(int argc, char **argv) // Not the first child, ignore it. }; - // The workload's entrypoint has exited, record its exit code and exit - // ourselves. if (WIFEXITED(status)) { set_exit_code(WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { set_exit_code(WTERMSIG(status) + 128); } + shutdown_vm(); } return 0; diff --git a/src/devices/src/virtio/fs/device.rs b/src/devices/src/virtio/fs/device.rs index bc877bc24..7bf8d7cb7 100644 --- a/src/devices/src/virtio/fs/device.rs +++ b/src/devices/src/virtio/fs/device.rs @@ -2,7 +2,7 @@ use crossbeam_channel::Sender; use std::cmp; use std::io::Write; -use std::sync::atomic::{AtomicI32, AtomicU64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU64, Ordering}; use std::sync::Arc; use std::thread::JoinHandle; @@ -51,6 +51,8 @@ pub struct Fs { worker_thread: Option>, worker_stopfd: EventFd, exit_code: Arc, + exit_request: Arc, + exit_evt: EventFd, #[cfg(target_os = "macos")] map_sender: Option>, } @@ -60,8 +62,12 @@ impl Fs { fs_id: String, shared_dir: String, exit_code: Arc, + exit_request: Arc, allow_root_dir_delete: bool, + virtio_exit_evt_miss + exit_evt: EventFd, read_only: bool, + main ) -> super::Result { let avail_features = (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_RING_F_EVENT_IDX); @@ -87,6 +93,8 @@ impl Fs { worker_thread: None, worker_stopfd: EventFd::new(EFD_NONBLOCK).map_err(FsError::EventFd)?, exit_code, + exit_request, + exit_evt, #[cfg(target_os = "macos")] map_sender: None, }) @@ -190,6 +198,8 @@ impl VirtioDevice for Fs { self.read_only, self.worker_stopfd.try_clone().unwrap(), self.exit_code.clone(), + self.exit_request.clone(), + self.exit_evt.try_clone().unwrap(), #[cfg(target_os = "macos")] self.map_sender.clone(), ) diff --git a/src/devices/src/virtio/fs/filesystem.rs b/src/devices/src/virtio/fs/filesystem.rs index ef286e312..7de6cbd1e 100644 --- a/src/devices/src/virtio/fs/filesystem.rs +++ b/src/devices/src/virtio/fs/filesystem.rs @@ -13,7 +13,7 @@ use std::ffi::{CStr, CString}; use std::fs::File; use std::io; use std::mem; -use std::sync::atomic::AtomicI32; +use std::sync::atomic::{AtomicBool, AtomicI32}; use std::sync::{Arc, Mutex}; use std::time::Duration; @@ -1170,6 +1170,7 @@ pub trait FileSystem { in_size: u32, out_size: u32, exit_code: &Arc, + exit_request: &Arc, ) -> io::Result> { Err(io::Error::from_raw_os_error(bindings::LINUX_ENOSYS)) } diff --git a/src/devices/src/virtio/fs/linux/passthrough.rs b/src/devices/src/virtio/fs/linux/passthrough.rs index e5ca21a03..6f3da4004 100644 --- a/src/devices/src/virtio/fs/linux/passthrough.rs +++ b/src/devices/src/virtio/fs/linux/passthrough.rs @@ -2179,6 +2179,7 @@ impl FileSystem for PassthroughFs { _in_size: u32, out_size: u32, exit_code: &Arc, + exit_request: &Arc, ) -> io::Result> { const VIRTIO_IOC_MAGIC: u8 = b'v'; @@ -2198,6 +2199,10 @@ impl FileSystem for PassthroughFs { const VIRTIO_IOC_REMOVE_ROOT_DIR_REQ: u32 = request_code_none!(VIRTIO_IOC_MAGIC, VIRTIO_IOC_REMOVE_ROOT_DIR_CODE) as u32; + const VIRTIO_IOC_TYPE_EXIT_REQUEST: u8 = 4; + const VIRTIO_IOC_EXIT_REQUEST_REQ: u32 = + request_code_none!(VIRTIO_IOC_MAGIC, VIRTIO_IOC_TYPE_EXIT_REQUEST) as u32; + match cmd { VIRTIO_IOC_EXPORT_FD_REQ => { if out_size as usize != VIRTIO_IOC_EXPORT_FD_SIZE { @@ -2229,9 +2234,15 @@ impl FileSystem for PassthroughFs { Ok(ret) } VIRTIO_IOC_EXIT_CODE_REQ => { + debug!("virtiofs exit-code ioctl received: {}", arg as i32); exit_code.store(arg as i32, Ordering::SeqCst); Ok(Vec::new()) } + VIRTIO_IOC_EXIT_REQUEST_REQ => { + debug!("virtiofs explicit-exit ioctl received"); + exit_request.store(true, Ordering::SeqCst); + Ok(Vec::new()) + } VIRTIO_IOC_REMOVE_ROOT_DIR_REQ if self.cfg.allow_root_dir_delete => { std::fs::remove_dir_all(&self.cfg.root_dir)?; Ok(Vec::new()) diff --git a/src/devices/src/virtio/fs/macos/passthrough.rs b/src/devices/src/virtio/fs/macos/passthrough.rs index 9bf05734a..bd012665a 100644 --- a/src/devices/src/virtio/fs/macos/passthrough.rs +++ b/src/devices/src/virtio/fs/macos/passthrough.rs @@ -2438,17 +2438,23 @@ impl FileSystem for PassthroughFs { _in_size: u32, _out_size: u32, exit_code: &Arc, + exit_request: &Arc, ) -> io::Result> { // We can't use nix::request_code_none here since it's system-dependent // and we need the value from Linux. const VIRTIO_IOC_EXIT_CODE_REQ: u32 = 0x7602; const VIRTIO_IOC_REMOVE_ROOT_DIR_REQ: u32 = 0x7603; + const VIRTIO_IOC_EXIT_REQUEST_REQ: u32 = 0x7604; match cmd { VIRTIO_IOC_EXIT_CODE_REQ => { exit_code.store(arg as i32, Ordering::SeqCst); Ok(Vec::new()) } + VIRTIO_IOC_EXIT_REQUEST_REQ => { + exit_request.store(true, Ordering::SeqCst); + Ok(Vec::new()) + } VIRTIO_IOC_REMOVE_ROOT_DIR_REQ if self.cfg.allow_root_dir_delete => { std::fs::remove_dir_all(&self.cfg.root_dir)?; Ok(Vec::new()) diff --git a/src/devices/src/virtio/fs/server.rs b/src/devices/src/virtio/fs/server.rs index a6b436a35..028f68268 100644 --- a/src/devices/src/virtio/fs/server.rs +++ b/src/devices/src/virtio/fs/server.rs @@ -12,7 +12,7 @@ use std::ffi::{CStr, CString}; use std::fs::File; use std::io::{self, Read, Write}; use std::mem::size_of; -use std::sync::atomic::{AtomicI32, AtomicU64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU64, Ordering}; use std::sync::Arc; use vm_memory::ByteValued; @@ -85,6 +85,7 @@ impl Server { w: Writer, shm_region: &Option, exit_code: &Arc, + exit_request: &Arc, #[cfg(target_os = "macos")] map_sender: &Option>, ) -> Result { let in_header: InHeader = r.read_obj().map_err(Error::DecodeMessage)?; @@ -134,7 +135,7 @@ impl Server { x if x == Opcode::Interrupt as u32 => self.interrupt(in_header), x if x == Opcode::Bmap as u32 => self.bmap(in_header, r, w), x if x == Opcode::Destroy as u32 => self.destroy(), - x if x == Opcode::Ioctl as u32 => self.ioctl(in_header, r, w, exit_code), + x if x == Opcode::Ioctl as u32 => self.ioctl(in_header, r, w, exit_code, exit_request), x if x == Opcode::Poll as u32 => self.poll(in_header, r, w), x if x == Opcode::NotifyReply as u32 => self.notify_reply(in_header, r, w), x if x == Opcode::BatchForget as u32 => self.batch_forget(in_header, r, w), @@ -1185,6 +1186,7 @@ impl Server { mut r: Reader, w: Writer, exit_code: &Arc, + exit_request: &Arc, ) -> Result { let IoctlIn { fh, @@ -1205,6 +1207,7 @@ impl Server { in_size, out_size, exit_code, + exit_request, ) { Ok(data) => { let out = IoctlOut { diff --git a/src/devices/src/virtio/fs/worker.rs b/src/devices/src/virtio/fs/worker.rs index c6de1944e..f8265303e 100644 --- a/src/devices/src/virtio/fs/worker.rs +++ b/src/devices/src/virtio/fs/worker.rs @@ -5,7 +5,7 @@ use utils::worker_message::WorkerMessage; use std::io; use std::os::fd::AsRawFd; -use std::sync::atomic::AtomicI32; +use std::sync::atomic::{AtomicBool, AtomicI32, Ordering}; use std::sync::Arc; use std::thread; @@ -65,6 +65,8 @@ pub struct FsWorker { server: FsServer, stop_fd: EventFd, exit_code: Arc, + exit_request: Arc, + exit_evt: EventFd, #[cfg(target_os = "macos")] map_sender: Option>, } @@ -81,6 +83,8 @@ impl FsWorker { read_only: bool, stop_fd: EventFd, exit_code: Arc, + exit_request: Arc, + exit_evt: EventFd, #[cfg(target_os = "macos")] map_sender: Option>, ) -> Result { let server = if read_only { @@ -97,6 +101,8 @@ impl FsWorker { server, stop_fd, exit_code, + exit_request, + exit_evt, #[cfg(target_os = "macos")] map_sender, }) @@ -203,6 +209,7 @@ impl FsWorker { writer, &self.shm_region, &self.exit_code, + &self.exit_request, #[cfg(target_os = "macos")] &self.map_sender, ) { @@ -216,6 +223,14 @@ impl FsWorker { if queue.needs_notification(&self.mem).unwrap() { self.interrupt.signal_used_queue(); } + + if self.exit_request.swap(false, Ordering::SeqCst) { + debug!("virtiofs explicit exit request received; signaling VMM exit event"); + if let Err(e) = self.exit_evt.write(1) { + error!("failed to signal VMM exit event: {e}"); + } + return; + } } } } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 1c94f8b36..b541ac528 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -15,6 +15,8 @@ use std::io::{self, IsTerminal, Read}; use std::os::fd::AsRawFd; use std::os::fd::{BorrowedFd, FromRawFd}; use std::path::PathBuf; +#[cfg(not(any(feature = "tee", feature = "aws-nitro")))] +use std::sync::atomic::AtomicBool; use std::sync::atomic::AtomicI32; use std::sync::{Arc, Mutex}; @@ -950,6 +952,8 @@ pub fn build_microvm( // We use this atomic to record the exit code set by init/init.c in the VM. let exit_code = Arc::new(AtomicI32::new(i32::MAX)); + #[cfg(not(any(feature = "tee", feature = "aws-nitro")))] + let exit_request = Arc::new(AtomicBool::new(false)); let mut vmm = Vmm { guest_memory, @@ -1031,6 +1035,13 @@ pub fn build_microvm( attach_input_devices(&mut vmm, &vm_resources.input_backends, intc.clone())?; } + #[cfg(not(any(feature = "tee", feature = "aws-nitro")))] + let fs_exit_evt = vmm + .exit_evt + .try_clone() + .map_err(Error::EventFd) + .map_err(StartMicrovmError::Internal)?; + #[cfg(not(any(feature = "tee", feature = "aws-nitro")))] attach_fs_devices( &mut vmm, @@ -1040,6 +1051,8 @@ pub fn build_microvm( export_table, intc.clone(), exit_code, + exit_request, + fs_exit_evt, #[cfg(target_os = "macos")] _sender, )?; @@ -1878,6 +1891,7 @@ fn attach_mmio_device( } #[cfg(not(any(feature = "tee", feature = "aws-nitro")))] +#[allow(clippy::too_many_arguments)] fn attach_fs_devices( vmm: &mut Vmm, fs_devs: &[FsDeviceConfig], @@ -1885,6 +1899,8 @@ fn attach_fs_devices( #[cfg(not(feature = "tee"))] export_table: Option, intc: IrqChip, exit_code: Arc, + exit_request: Arc, + exit_evt: EventFd, #[cfg(target_os = "macos")] map_sender: Sender, ) -> std::result::Result<(), StartMicrovmError> { use self::StartMicrovmError::*; @@ -1895,7 +1911,12 @@ fn attach_fs_devices( config.fs_id.clone(), config.shared_dir.clone(), exit_code.clone(), + exit_request.clone(), config.allow_root_dir_delete, + exit_evt + .try_clone() + .map_err(Error::EventFd) + .map_err(Internal)?, config.read_only, ) .unwrap(),