diff --git a/rust-vmm-ci b/rust-vmm-ci index af547082..0b1cb863 160000 --- a/rust-vmm-ci +++ b/rust-vmm-ci @@ -1 +1 @@ -Subproject commit af547082938b44b69b6f1bb44c1af8b405d7650c +Subproject commit 0b1cb86353cc093f2e17d7e9f6820de80a6c274d diff --git a/vhost-user-backend/Cargo.toml b/vhost-user-backend/Cargo.toml index a7c27597..03225dcc 100644 --- a/vhost-user-backend/Cargo.toml +++ b/vhost-user-backend/Cargo.toml @@ -13,8 +13,10 @@ xen = ["vm-memory/xen", "vhost/xen"] postcopy = ["vhost/postcopy", "userfaultfd"] [dependencies] +bitflags = "2.9.1" libc = "0.2.39" log = "0.4.17" +mio = { version = "1.0.4", features = ["os-poll", "os-ext"] } userfaultfd = { version = "0.8.1", optional = true } vhost = { path = "../vhost", version = "0.14.0", features = ["vhost-user-backend"] } virtio-bindings = { workspace = true } diff --git a/vhost-user-backend/src/backend.rs b/vhost-user-backend/src/backend.rs index 42c92f90..840caa85 100644 --- a/vhost-user-backend/src/backend.rs +++ b/vhost-user-backend/src/backend.rs @@ -23,13 +23,13 @@ use std::io::Result; use std::ops::Deref; use std::sync::{Arc, Mutex, RwLock}; +use mio::Interest; use vhost::vhost_user::message::{ VhostTransferStateDirection, VhostTransferStatePhase, VhostUserProtocolFeatures, VhostUserSharedMsg, }; use vhost::vhost_user::Backend; use vm_memory::bitmap::Bitmap; -use vmm_sys_util::epoll::EventSet; use vmm_sys_util::eventfd::EventFd; use vhost::vhost_user::GpuBackend; @@ -144,7 +144,7 @@ pub trait VhostUserBackend: Send + Sync { fn handle_event( &self, device_event: u16, - evset: EventSet, + evset: Interest, vrings: &[Self::Vring], thread_id: usize, ) -> Result<()>; @@ -287,7 +287,7 @@ pub trait VhostUserBackendMut: Send + Sync { fn handle_event( &mut self, device_event: u16, - evset: EventSet, + evset: Interest, vrings: &[Self::Vring], thread_id: usize, ) -> Result<()>; @@ -389,7 +389,7 @@ impl VhostUserBackend for Arc { fn handle_event( &self, device_event: u16, - evset: EventSet, + evset: Interest, vrings: &[Self::Vring], thread_id: usize, ) -> Result<()> { @@ -478,7 +478,7 @@ impl VhostUserBackend for Mutex { fn handle_event( &self, device_event: u16, - evset: EventSet, + evset: Interest, vrings: &[Self::Vring], thread_id: usize, ) -> Result<()> { @@ -570,7 +570,7 @@ impl VhostUserBackend for RwLock { fn handle_event( &self, device_event: u16, - evset: EventSet, + evset: Interest, vrings: &[Self::Vring], thread_id: usize, ) -> Result<()> { @@ -600,6 +600,7 @@ pub mod tests { use super::*; use crate::VringRwLock; use libc::EFD_NONBLOCK; + use mio::Interest; use std::sync::Mutex; use uuid::Uuid; use vm_memory::{GuestAddress, GuestMemoryAtomic, GuestMemoryMmap}; @@ -707,7 +708,7 @@ pub mod tests { fn handle_event( &mut self, _device_event: u16, - _evset: EventSet, + _evset: Interest, _vrings: &[VringRwLock], _thread_id: usize, ) -> Result<()> { @@ -793,7 +794,7 @@ pub mod tests { let vring = VringRwLock::new(mem, 0x1000).unwrap(); backend - .handle_event(0x1, EventSet::IN, &[vring], 0) + .handle_event(0x1, Interest::READABLE, &[vring], 0) .unwrap(); backend.reset_device(); diff --git a/vhost-user-backend/src/event_loop.rs b/vhost-user-backend/src/event_loop.rs index 1b3dfe0a..56c12af2 100644 --- a/vhost-user-backend/src/event_loop.rs +++ b/vhost-user-backend/src/event_loop.rs @@ -3,53 +3,128 @@ // // SPDX-License-Identifier: Apache-2.0 +use std::collections::HashSet; use std::fmt::{Display, Formatter}; use std::io::{self, Result}; use std::marker::PhantomData; use std::os::unix::io::{AsRawFd, RawFd}; +use std::sync::Mutex; -use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet}; +use mio::event::Event; +use mio::unix::SourceFd; +use mio::{Events, Interest, Poll, Registry, Token}; use vmm_sys_util::eventfd::EventFd; use super::backend::VhostUserBackend; use super::vring::VringT; +use bitflags::bitflags; /// Errors related to vring epoll event handling. #[derive(Debug)] -pub enum VringEpollError { +pub enum VringPollError { /// Failed to create epoll file descriptor. - EpollCreateFd(io::Error), + PollerCreate(io::Error), /// Failed while waiting for events. - EpollWait(io::Error), + PollerWait(io::Error), /// Could not register exit event RegisterExitEvent(io::Error), /// Failed to read the event from kick EventFd. HandleEventReadKick(io::Error), /// Failed to handle the event from the backend. HandleEventBackendHandling(io::Error), + /// Failed to clone registry. + RegistryClone(io::Error), } -impl Display for VringEpollError { +impl Display for VringPollError { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { match self { - VringEpollError::EpollCreateFd(e) => write!(f, "cannot create epoll fd: {e}"), - VringEpollError::EpollWait(e) => write!(f, "failed to wait for epoll event: {e}"), - VringEpollError::RegisterExitEvent(e) => write!(f, "cannot register exit event: {e}"), - VringEpollError::HandleEventReadKick(e) => { + VringPollError::PollerCreate(e) => write!(f, "cannot create poller: {e}"), + VringPollError::PollerWait(e) => write!(f, "failed to wait for poller event: {e}"), + VringPollError::RegisterExitEvent(e) => write!(f, "cannot register exit event: {e}"), + VringPollError::HandleEventReadKick(e) => { write!(f, "cannot read vring kick event: {e}") } - VringEpollError::HandleEventBackendHandling(e) => { - write!(f, "failed to handle epoll event: {e}") + VringPollError::HandleEventBackendHandling(e) => { + write!(f, "failed to handle poll event: {e}") } + VringPollError::RegistryClone(e) => write!(f, "cannot clone poller's registry: {e}"), } } } -impl std::error::Error for VringEpollError {} +impl std::error::Error for VringPollError {} /// Result of vring epoll operations. -pub type VringEpollResult = std::result::Result; +pub type VringEpollResult = std::result::Result; +bitflags! { + #[derive(Debug, PartialEq, PartialOrd)] + pub struct EventSet: u32 { + const READABLE = 1u32; + const WRITABLE = 2u32; + } +} + +impl From for Interest { + fn from(value: EventSet) -> Self { + let mut interest = None; + if value == EventSet::READABLE { + interest = interest + .map(|interest| Interest::READABLE | interest) + .or(Some(Interest::READABLE)); + } + if value == EventSet::WRITABLE { + interest = interest + .map(|interest| Interest::WRITABLE | interest) + .or(Some(Interest::WRITABLE)); + } + return interest.expect("Interest is invalid"); + } +} + +// According https://github.com/tokio-rs/mio/blob/cd972977e7a8234e62c7ba8bd4d4ec1da208e576/src/interest.rs#L27 +// the function is implemented to convert Event into Interest +fn convert_event_to_interest(value: &Event) -> Option { + let mut interest = None; + if value.is_readable() { + interest = interest + .map(|interest| Interest::READABLE | interest) + .or(Some(Interest::READABLE)); + } + if value.is_writable() { + interest = interest + .map(|interest| Interest::WRITABLE | interest) + .or(Some(Interest::WRITABLE)); + } + #[cfg(any( + target_os = "dragonfly", + target_os = "freebsd", + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + if value.is_aio() { + interest = interest + .map(|interest| Interest::AIO | interest) + .or(Some(Interest::AIO)); + } + #[cfg(target_os = "freebsd")] + if value.is_lio() { + interest = interest + .map(|interest| Interest::LIO | interest) + .or(Some(Interest::LIO)); + } + #[cfg(any(target_os = "linux", target_os = "android"))] + if value.is_priority() { + interest = interest + .map(|interest| Interest::PRIORITY | interest) + .or(Some(Interest::PRIORITY)); + } + interest +} /// Epoll event handler to manage and process epoll events for registered file descriptor. /// /// The `VringEpollHandler` structure provides interfaces to: @@ -57,7 +132,11 @@ pub type VringEpollResult = std::result::Result; /// - remove registered file descriptors from the epoll fd /// - run the event loop to handle pending events on the epoll fd pub struct VringEpollHandler { - epoll: Epoll, + poller: Mutex, + registry: Registry, + // Record the registered fd. + // Because in mio, consecutive calls to register is unspecified behavior. + fd_set: Mutex>, backend: T, vrings: Vec, thread_id: usize, @@ -84,22 +163,32 @@ where vrings: Vec, thread_id: usize, ) -> VringEpollResult { - let epoll = Epoll::new().map_err(VringEpollError::EpollCreateFd)?; + let poller = Poll::new().map_err(VringPollError::PollerCreate)?; let exit_event_fd = backend.exit_event(thread_id); + let fd_set = Mutex::new(HashSet::new()); + let registry = poller + .registry() + .try_clone() + .map_err(VringPollError::RegistryClone)?; if let Some(exit_event_fd) = &exit_event_fd { let id = backend.num_queues(); - epoll - .ctl( - ControlOperation::Add, - exit_event_fd.as_raw_fd(), - EpollEvent::new(EventSet::IN, id as u64), + + registry + .register( + &mut SourceFd(&exit_event_fd.as_raw_fd()), + Token(id as usize), + Interest::READABLE, ) - .map_err(VringEpollError::RegisterExitEvent)?; + .map_err(VringPollError::RegisterExitEvent)?; + + fd_set.lock().unwrap().insert(exit_event_fd.as_raw_fd()); } Ok(VringEpollHandler { - epoll, + poller: Mutex::new(poller), + registry, + fd_set, backend, vrings, thread_id, @@ -135,13 +224,30 @@ where } pub(crate) fn register_event(&self, fd: RawFd, ev_type: EventSet, data: u64) -> Result<()> { - self.epoll - .ctl(ControlOperation::Add, fd, EpollEvent::new(ev_type, data)) + let mut fd_set = self.fd_set.lock().unwrap(); + if fd_set.contains(&fd) { + return Err(io::Error::from_raw_os_error(libc::EEXIST)); + } + self.registry + .register(&mut SourceFd(&fd), Token(data as usize), ev_type.into()) + .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + fd_set.insert(fd); + Ok(()) } - pub(crate) fn unregister_event(&self, fd: RawFd, ev_type: EventSet, data: u64) -> Result<()> { - self.epoll - .ctl(ControlOperation::Delete, fd, EpollEvent::new(ev_type, data)) + pub(crate) fn unregister_event(&self, fd: RawFd, _ev_type: EventSet, _data: u64) -> Result<()> { + let mut fd_set = self.fd_set.lock().unwrap(); + if !fd_set.contains(&fd) { + return Err(io::Error::from_raw_os_error(libc::ENOENT)); + } + self.registry.deregister(&mut SourceFd(&fd)).map_err(|e| { + io::Error::new( + io::ErrorKind::Other, + format!("Failed to deregister fd {}: {}", fd, e), + ) + })?; + fd_set.remove(&fd); + Ok(()) } /// Run the event poll loop to handle all pending events on registered fds. @@ -150,41 +256,23 @@ where /// associated with the backend. pub(crate) fn run(&self) -> VringEpollResult<()> { const EPOLL_EVENTS_LEN: usize = 100; - let mut events = vec![EpollEvent::new(EventSet::empty(), 0); EPOLL_EVENTS_LEN]; - - 'epoll: loop { - let num_events = match self.epoll.wait(-1, &mut events[..]) { - Ok(res) => res, - Err(e) => { - if e.kind() == io::ErrorKind::Interrupted { - // It's well defined from the epoll_wait() syscall - // documentation that the epoll loop can be interrupted - // before any of the requested events occurred or the - // timeout expired. In both those cases, epoll_wait() - // returns an error of type EINTR, but this should not - // be considered as a regular error. Instead it is more - // appropriate to retry, by calling into epoll_wait(). - continue; - } - return Err(VringEpollError::EpollWait(e)); - } - }; - - for event in events.iter().take(num_events) { - let evset = match EventSet::from_bits(event.events) { - Some(evset) => evset, - None => { - let evbits = event.events; - println!("epoll: ignoring unknown event set: 0x{evbits:x}"); - continue; - } - }; - let ev_type = event.data() as u16; - - // handle_event() returns true if an event is received from the exit event fd. - if self.handle_event(ev_type, evset)? { - break 'epoll; + let mut events = Events::with_capacity(EPOLL_EVENTS_LEN); + 'poll: loop { + self.poller + .lock() + .unwrap() + .poll(&mut events, None) + .map_err(VringPollError::PollerWait)?; + + for event in events.iter() { + let token = event.token(); + + let interest_event = convert_event_to_interest(event); + if let Some(interest_event) = interest_event { + if self.handle_event(token.0 as u16, interest_event)? { + break 'poll; + } } } } @@ -192,7 +280,7 @@ where Ok(()) } - fn handle_event(&self, device_event: u16, evset: EventSet) -> VringEpollResult { + fn handle_event(&self, device_event: u16, event: Interest) -> VringEpollResult { if self.exit_event_fd.is_some() && device_event as usize == self.backend.num_queues() { return Ok(true); } @@ -201,7 +289,7 @@ where let vring = &self.vrings[device_event as usize]; let enabled = vring .read_kick() - .map_err(VringEpollError::HandleEventReadKick)?; + .map_err(VringPollError::HandleEventReadKick)?; // If the vring is not enabled, it should not be processed. if !enabled { @@ -210,8 +298,8 @@ where } self.backend - .handle_event(device_event, evset, &self.vrings, self.thread_id) - .map_err(VringEpollError::HandleEventBackendHandling)?; + .handle_event(device_event, event, &self.vrings, self.thread_id) + .map_err(VringPollError::HandleEventBackendHandling)?; Ok(false) } @@ -219,7 +307,7 @@ where impl AsRawFd for VringEpollHandler { fn as_raw_fd(&self) -> RawFd { - self.epoll.as_raw_fd() + self.poller.lock().unwrap().as_raw_fd() } } @@ -244,29 +332,32 @@ mod tests { let eventfd = EventFd::new(0).unwrap(); handler - .register_listener(eventfd.as_raw_fd(), EventSet::IN, 3) + .register_listener(eventfd.as_raw_fd(), EventSet::READABLE, 3) .unwrap(); // Register an already registered fd. handler - .register_listener(eventfd.as_raw_fd(), EventSet::IN, 3) + .register_listener(eventfd.as_raw_fd(), EventSet::READABLE, 3) .unwrap_err(); // Register an invalid data. handler - .register_listener(eventfd.as_raw_fd(), EventSet::IN, 1) + .register_listener(eventfd.as_raw_fd(), EventSet::READABLE, 1) .unwrap_err(); handler - .unregister_listener(eventfd.as_raw_fd(), EventSet::IN, 3) + .unregister_listener(eventfd.as_raw_fd(), EventSet::READABLE, 3) .unwrap(); // unregister an already unregistered fd. handler - .unregister_listener(eventfd.as_raw_fd(), EventSet::IN, 3) + .unregister_listener(eventfd.as_raw_fd(), EventSet::READABLE, 3) .unwrap_err(); // unregister an invalid data. handler - .unregister_listener(eventfd.as_raw_fd(), EventSet::IN, 1) + .unregister_listener(eventfd.as_raw_fd(), EventSet::READABLE, 1) .unwrap_err(); // Check we retrieve the correct file descriptor - assert_eq!(handler.as_raw_fd(), handler.epoll.as_raw_fd()); + assert_eq!( + handler.as_raw_fd(), + handler.poller.lock().unwrap().as_raw_fd() + ); } } diff --git a/vhost-user-backend/src/handler.rs b/vhost-user-backend/src/handler.rs index 33008756..29a705ca 100644 --- a/vhost-user-backend/src/handler.rs +++ b/vhost-user-backend/src/handler.rs @@ -14,6 +14,7 @@ use std::sync::Arc; use std::thread; use crate::bitmap::{BitmapReplace, MemRegionBitmap, MmapLogReg}; +use crate::event_loop::EventSet; #[cfg(feature = "postcopy")] use userfaultfd::{Uffd, UffdBuilder}; use vhost::vhost_user::message::{ @@ -31,11 +32,10 @@ use virtio_bindings::bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX; use virtio_queue::{Error as VirtQueError, QueueT}; use vm_memory::mmap::NewBitmap; use vm_memory::{GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryMmap, GuestRegionMmap}; -use vmm_sys_util::epoll::EventSet; use super::backend::VhostUserBackend; use super::event_loop::VringEpollHandler; -use super::event_loop::{VringEpollError, VringEpollResult}; +use super::event_loop::{VringEpollResult, VringPollError}; use super::vring::VringT; use super::GM; @@ -50,7 +50,7 @@ pub enum VhostUserHandlerError { /// Failed to create a `Vring`. CreateVring(VirtQueError), /// Failed to create vring worker. - CreateEpollHandler(VringEpollError), + CreateEpollHandler(VringPollError), /// Failed to spawn vring worker. SpawnVringWorker(io::Error), /// Could not find the mapping from memory regions. @@ -212,7 +212,11 @@ where if shifted_queues_mask & 1u64 == 1u64 { let evt_idx = queues_mask.count_ones() - shifted_queues_mask.count_ones(); self.handlers[thread_index] - .register_event(fd.as_raw_fd(), EventSet::IN, u64::from(evt_idx)) + .register_event( + fd.as_raw_fd(), + EventSet::READABLE, + u64::from(evt_idx), + ) .map_err(VhostUserError::ReqHandlerError)?; break; } @@ -436,7 +440,11 @@ where if shifted_queues_mask & 1u64 == 1u64 { let evt_idx = queues_mask.count_ones() - shifted_queues_mask.count_ones(); self.handlers[thread_index] - .unregister_event(fd.as_raw_fd(), EventSet::IN, u64::from(evt_idx)) + .unregister_event( + fd.as_raw_fd(), + EventSet::READABLE, + u64::from(evt_idx), + ) .map_err(VhostUserError::ReqHandlerError)?; break; } diff --git a/vhost-user-backend/src/lib.rs b/vhost-user-backend/src/lib.rs index f0fd3768..3a49db94 100644 --- a/vhost-user-backend/src/lib.rs +++ b/vhost-user-backend/src/lib.rs @@ -23,7 +23,7 @@ mod backend; pub use self::backend::{VhostUserBackend, VhostUserBackendMut}; mod event_loop; -pub use self::event_loop::VringEpollHandler; +pub use self::event_loop::{EventSet, VringEpollHandler}; mod handler; pub use self::handler::VhostUserHandlerError; diff --git a/vhost-user-backend/tests/vhost-user-server.rs b/vhost-user-backend/tests/vhost-user-server.rs index e3712994..f013fccd 100644 --- a/vhost-user-backend/tests/vhost-user-server.rs +++ b/vhost-user-backend/tests/vhost-user-server.rs @@ -6,6 +6,7 @@ use std::path::Path; use std::sync::{Arc, Barrier, Mutex}; use std::thread; +use mio::Interest; use uuid::Uuid; use vhost::vhost_user::message::{ VhostUserConfigFlags, VhostUserHeaderFlag, VhostUserInflight, VhostUserProtocolFeatures, @@ -17,7 +18,6 @@ use vhost_user_backend::{VhostUserBackendMut, VhostUserDaemon, VringRwLock}; use vm_memory::{ FileOffset, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic, GuestMemoryMmap, }; -use vmm_sys_util::epoll::EventSet; use vmm_sys_util::eventfd::EventFd; struct MockVhostBackend { @@ -114,7 +114,7 @@ impl VhostUserBackendMut for MockVhostBackend { fn handle_event( &mut self, _device_event: u16, - _evset: EventSet, + _evset: Interest, _vrings: &[VringRwLock], _thread_id: usize, ) -> Result<()> {