Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions Guide/src/reference/openvmm/management/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,12 @@ For `--virtio-rng` and `--virtio-console`, use their separate PCIe port flags:
--vhost-user /tmp/virtiofsd.sock,type=fs,tag=myfs,pcie_port=rp0
```

**VFIO device assignment** (Linux only): `--vfio`
**VFIO device assignment** (Linux only): `--vfio` (and optional `--iommu`)

```sh
--vfio rp0:0000:01:00.0
# Legacy VFIO group/container path:
Comment thread
jstarks marked this conversation as resolved.
--vfio host=0000:01:00.0,port=rp0

# Modern VFIO cdev + iommufd path (Linux >= 6.6):
--iommu id=iommu0 --vfio host=0000:01:00.0,port=rp0,iommu=iommu0
```
41 changes: 34 additions & 7 deletions Guide/src/user_guide/openvmm/vfio.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ Use the `--vfio` flag to assign the device to a PCIe root port. You also need to
sudo openvmm \
--pcie-root-complex rc0 \
--pcie-root-port rc0:rp0 \
--vfio rp0:0000:01:00.0 \
--vfio host=0000:01:00.0,port=rp0 \
--kernel /path/to/vmlinux \
--initrd /path/to/initrd \
--cmdline "console=ttyS0" \
Expand All @@ -113,20 +113,47 @@ sudo openvmm \
--processors 2
```

The `--vfio` syntax is `<port_name>:<pci_bdf>`:
The `--vfio` value is a comma-separated list of `key=value` pairs:

- `rp0` — the name of the PCIe root port to attach the device to (must match a `--pcie-root-port` name)
- `0000:01:00.0` — the PCI BDF of the VFIO device on the host
- `host=<pci_bdf>` (required) — the PCI BDF of the VFIO device on the host (e.g., `0000:01:00.0`)
- `port=<name>` (required) — the name of the PCIe root port to attach the device to (must match a `--pcie-root-port` name)
- `iommu=<id>` (optional) — reference to an `--iommu` context; see [Using iommufd (cdev path)](#using-iommufd-cdev-path) below

```admonish tip
You can assign multiple devices by adding more root ports and `--vfio` flags:

--pcie-root-port rc0:rp0 \
--pcie-root-port rc0:rp1 \
--vfio rp0:0000:01:00.0 \
--vfio rp1:334c:00:00.0
--vfio host=0000:01:00.0,port=rp0 \
--vfio host=334c:00:00.0,port=rp1
```
Comment thread
jstarks marked this conversation as resolved.

### Using iommufd (cdev path)

By default, `--vfio` uses the legacy VFIO group/container interface with the
Type1v2 IOMMU driver. On hosts with Linux kernel 6.6 or newer, OpenVMM can
instead use the modern VFIO cdev (per-device fd) + iommufd interface. Enable
it by declaring an `--iommu` context and referencing it from each `--vfio`
device with the `iommu=` key:

```bash
sudo openvmm \
--pcie-root-complex rc0 \
--pcie-root-port rc0:rp0 \
--iommu id=iommu0 \
--vfio host=0000:01:00.0,port=rp0,iommu=iommu0 \
...
```

The `--iommu` syntax is `id=<name>`. All `--vfio` devices that reference the
same `id` share a single iommufd IOAS (one set of IOMMU page tables and one
DMA mapper registration). The IOAS is allocated on demand the first time a
device referencing the id is opened.

Devices opened via the cdev path read their device node from
`/sys/bus/pci/devices/<pci_id>/vfio-dev/vfioN` and open
`/dev/vfio/devices/vfioN` instead of `/dev/vfio/<group>`.

## Step 6: Verify in the guest

If the guest boots with PCI support, the assigned device should be visible:
Expand Down Expand Up @@ -164,7 +191,7 @@ Then request hugepage-backed RAM with the `--memory` option:
sudo openvmm \
--pcie-root-complex rc0 \
--pcie-root-port rc0:rp0 \
--vfio rp0:0000:01:00.0 \
--vfio host=0000:01:00.0,port=rp0 \
--kernel /path/to/vmlinux \
--initrd /path/to/initrd \
--cmdline "console=ttyS0" \
Expand Down
31 changes: 27 additions & 4 deletions openvmm/openvmm_core/src/worker/dispatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,9 @@ struct LoadedVmInner {
/// VFIO container manager inspect handle (Linux only).
#[cfg(target_os = "linux")]
vfio_inspect: Option<vfio_assigned_device::manager::VfioManagerClient>,
/// VFIO cdev + iommufd manager inspect handle (Linux only).
#[cfg(target_os = "linux")]
vfio_cdev_inspect: Option<vfio_assigned_device::manager::VfioCdevManagerClient>,

// relay halt messages, intercepting reset if configured.
halt_recv: mesh::Receiver<HaltReason>,
Expand Down Expand Up @@ -1953,10 +1956,11 @@ impl InitializedVm {
// Register the VFIO resolver, which spawns a container manager task
// internally to share containers across assigned devices.
#[cfg(target_os = "linux")]
let vfio_inspect = {
let (vfio_inspect, vfio_cdev_inspect) = {
let dma_mapper_client = memory_manager.dma_mapper_client();
let vfio_resolver = vfio_assigned_device::resolver::VfioDeviceResolver::new(
driver_source.builder().build("vfio-container-mgr"),
memory_manager.dma_mapper_client(),
dma_mapper_client.clone(),
);
let handle = vfio_resolver.inspect_handle();
resolver.add_async_resolver::<
Expand All @@ -1965,7 +1969,23 @@ impl InitializedVm {
vfio_assigned_device_resources::VfioDeviceHandle,
_,
>(vfio_resolver);
Some(handle)

// Register the VFIO cdev + iommufd resolver for devices opened
// via the cdev interface. Spawns a VfioCdevManager task that
// shares IOAS contexts across devices with the same --iommu ID.
let cdev_resolver = vfio_assigned_device::resolver::VfioCdevDeviceResolver::new(
driver_source.builder().build("vfio-cdev-mgr"),
dma_mapper_client,
);
let cdev_handle = cdev_resolver.inspect_handle();
resolver.add_async_resolver::<
vm_resource::kind::PciDeviceHandleKind,
_,
vfio_assigned_device_resources::VfioCdevDeviceHandle,
_,
>(cdev_resolver);

(Some(handle), Some(cdev_handle))
};
Comment thread
jstarks marked this conversation as resolved.

// Resolve PCIe devices concurrently.
Expand Down Expand Up @@ -2545,6 +2565,8 @@ impl InitializedVm {
vmgs_client_inspect_handle,
#[cfg(target_os = "linux")]
vfio_inspect,
#[cfg(target_os = "linux")]
vfio_cdev_inspect,
halt_recv,
client_notify_send,
automatic_guest_reset: cfg.automatic_guest_reset,
Expand Down Expand Up @@ -2937,7 +2959,8 @@ impl LoadedVm {
.field("resolver", &self.inner.resolver)
.field("vmgs", &self.inner.vmgs_client_inspect_handle);
#[cfg(target_os = "linux")]
resp.field("vfio", &self.inner.vfio_inspect);
resp.field("vfio", &self.inner.vfio_inspect)
.field("vfio_cdev", &self.inner.vfio_cdev_inspect);
}),
},
Event::VmRpc(Err(_)) => break,
Expand Down
173 changes: 156 additions & 17 deletions openvmm/openvmm_entry/src/cli_args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -904,18 +904,35 @@ Assign a host PCI device to the guest via Linux VFIO.
The device must be bound to vfio-pci on the host before starting the VM.

Examples:
# Assign NVMe controller to root port rp0
--vfio rp0:0000:01:00.0
--vfio host=0000:01:00.0,port=rp0
--vfio host=0000:01:00.0,port=rp0,iommu=iommu0

Keys:
host=<pci_bdf> (required) PCI address on the host
port=<name> (required) Root port or downstream switch port name
iommu=<id> (optional) Reference to an --iommu object. When present,
uses VFIO cdev + iommufd instead of the legacy group path.
"#)]
Comment thread
jstarks marked this conversation as resolved.
#[cfg(target_os = "linux")]
#[clap(long, conflicts_with("pcat"))]
pub vfio: Vec<VfioDeviceCli>,

Syntax: <port_name>:<pci_bdf>
/// Create an iommufd context for VFIO cdev device assignment
#[clap(long_help = r#"
Declare an iommufd context. Opens /dev/iommu so it can be referenced by
--vfio devices via the iommu=<id> key. The associated IOAS is allocated
the first time a --vfio device referring to this id is opened.

Requires Linux kernel >= 6.6 with iommufd support.

Examples:
--iommu id=iommu0 --vfio host=0000:01:00.0,port=rp0,iommu=iommu0
Comment thread
jstarks marked this conversation as resolved.

port_name Root port or downstream switch port name
pci_bdf PCI domain:bus:device.function of the VFIO device on
the host (use lspci -D to find it)
Syntax: id=<name>
"#)]
#[cfg(target_os = "linux")]
#[clap(long, conflicts_with("pcat"))]
pub vfio: Vec<VfioDeviceCli>,
pub iommu: Vec<IommuCli>,
}

impl Options {
Expand Down Expand Up @@ -2427,40 +2444,101 @@ impl FromStr for PcieRemoteCli {
}

/// CLI configuration for a VFIO-assigned PCI device.
///
/// Syntax: `host=<bdf>,port=<name>[,iommu=<id>]`
#[cfg(target_os = "linux")]
#[derive(Clone, Debug)]
pub struct VfioDeviceCli {
/// Name of the PCIe downstream port to attach to.
pub port_name: String,
/// PCI BDF address of the device on the host (e.g., "0000:01:00.0").
pub pci_id: String,
/// Optional iommufd context ID. When set, uses VFIO cdev + iommufd
/// instead of the legacy group/container path.
pub iommu: Option<String>,
}

#[cfg(target_os = "linux")]
impl FromStr for VfioDeviceCli {
type Err = anyhow::Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
let (port_name, pci_id) = s
.split_once(':')
.context("expected <port_name>:<pci_bdf> (e.g., rp0:0000:01:00.0)")?;
let mut host: Option<String> = None;
let mut port: Option<String> = None;
let mut iommu: Option<String> = None;

if port_name.is_empty() {
anyhow::bail!("port name cannot be empty");
for kv in s.split(',') {
let (key, value) = kv
.split_once('=')
.context("expected key=value pair (e.g., host=0000:01:00.0,port=rp0)")?;
if value.is_empty() {
anyhow::bail!("--vfio: '{key}=' value cannot be empty");
}
match key {
"host" => {
if host.is_some() {
anyhow::bail!("duplicate --vfio key: 'host'");
}
host = Some(value.to_string());
}
"port" => {
if port.is_some() {
anyhow::bail!("duplicate --vfio key: 'port'");
}
port = Some(value.to_string());
}
"iommu" => {
if iommu.is_some() {
anyhow::bail!("duplicate --vfio key: 'iommu'");
}
iommu = Some(value.to_string());
}
_ => anyhow::bail!("unknown --vfio key: '{key}'"),
}
}

if pci_id.is_empty() {
anyhow::bail!("PCI address cannot be empty");
}
let pci_id = host.context("--vfio: 'host=' is required")?;
let port_name = port.context("--vfio: 'port=' is required")?;

// Reject path separators to prevent sysfs path traversal via Path::join.
if pci_id.contains('/') || pci_id.contains("..") {
anyhow::bail!("PCI address must not contain path separators");
}

Ok(VfioDeviceCli {
port_name: port_name.to_string(),
pci_id: pci_id.to_string(),
port_name,
pci_id,
iommu,
})
}
}

/// CLI configuration for an iommufd context.
///
/// Syntax: `id=<name>`
#[cfg(target_os = "linux")]
#[derive(Clone, Debug)]
pub struct IommuCli {
/// Unique identifier for this iommufd context.
pub id: String,
}

#[cfg(target_os = "linux")]
impl FromStr for IommuCli {
type Err = anyhow::Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
let (key, value) = s
.split_once('=')
.context("expected id=<name> (e.g., id=iommu0)")?;
if key != "id" {
anyhow::bail!("expected 'id=<name>', got '{key}=...'");
}
if value.is_empty() {
anyhow::bail!("iommu id cannot be empty");
}
Ok(IommuCli {
id: value.to_string(),
})
}
Comment thread
jstarks marked this conversation as resolved.
}
Expand Down Expand Up @@ -3834,4 +3912,65 @@ mod tests {
let opt = Options::try_parse_from(["openvmm", "--pidfile", "/tmp/test.pid"]).unwrap();
assert_eq!(opt.pidfile, Some(PathBuf::from("/tmp/test.pid")));
}

#[cfg(target_os = "linux")]
#[test]
fn test_vfio_device_cli_parse() {
// Required keys only.
let v = VfioDeviceCli::from_str("host=0000:01:00.0,port=rp0").unwrap();
assert_eq!(v.pci_id, "0000:01:00.0");
assert_eq!(v.port_name, "rp0");
assert_eq!(v.iommu, None);

// With optional iommu= key. Keys may appear in any order.
let v = VfioDeviceCli::from_str("port=rp1,iommu=iommu0,host=0000:02:00.0").unwrap();
assert_eq!(v.pci_id, "0000:02:00.0");
assert_eq!(v.port_name, "rp1");
assert_eq!(v.iommu.as_deref(), Some("iommu0"));
}

#[cfg(target_os = "linux")]
#[test]
fn test_vfio_device_cli_errors() {
// Missing required keys.
assert!(VfioDeviceCli::from_str("port=rp0").is_err());
assert!(VfioDeviceCli::from_str("host=0000:01:00.0").is_err());

// Unknown key.
assert!(VfioDeviceCli::from_str("host=0000:01:00.0,port=rp0,foo=bar").is_err());

// Duplicate keys are rejected.
assert!(VfioDeviceCli::from_str("host=0000:01:00.0,host=0000:02:00.0,port=rp0").is_err());
assert!(VfioDeviceCli::from_str("host=0000:01:00.0,port=rp0,port=rp1").is_err());
assert!(VfioDeviceCli::from_str("host=0000:01:00.0,port=rp0,iommu=a,iommu=b").is_err());

// Empty values are rejected.
assert!(VfioDeviceCli::from_str("host=,port=rp0").is_err());
assert!(VfioDeviceCli::from_str("host=0000:01:00.0,port=").is_err());
assert!(VfioDeviceCli::from_str("host=0000:01:00.0,port=rp0,iommu=").is_err());

// Missing '=' separator.
assert!(VfioDeviceCli::from_str("host").is_err());
assert!(VfioDeviceCli::from_str("host=0000:01:00.0,port=rp0,iommu").is_err());

// Path-traversal characters in the host BDF are rejected.
assert!(VfioDeviceCli::from_str("host=../../etc/passwd,port=rp0").is_err());
assert!(VfioDeviceCli::from_str("host=foo/bar,port=rp0").is_err());
}

#[cfg(target_os = "linux")]
#[test]
fn test_iommu_cli_parse() {
let c = IommuCli::from_str("id=iommu0").unwrap();
assert_eq!(c.id, "iommu0");

// Wrong key.
assert!(IommuCli::from_str("name=iommu0").is_err());

// Missing '=' separator.
assert!(IommuCli::from_str("iommu0").is_err());

// Empty id.
assert!(IommuCli::from_str("id=").is_err());
}
}
Loading
Loading