Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ and this project adheres to

### Added

- Added `ksm_mergeable` to `/machine-config` to let operators explicitly mark
anonymous guest memory as mergeable with Linux Kernel Samepage Merging.

### Changed

### Deprecated
Expand Down
2 changes: 2 additions & 0 deletions docs/device-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ specification:
| | mem_size_mib | O | O | O | O | O | O | O | O | O |
| | track_dirty_pages | O | O | O | O | O | O | O | O | O |
| | vcpu_count | O | O | O | O | O | O | O | O | O |
| | ksm_mergeable | O | O | O | O | O | O | O | O | O |
| `Metrics` | metrics_path | O | O | O | O | O | O | O | O | O |
| `MmdsConfig` | network_interfaces | O | O | O | O | **R** | O | O | O | O |
| | version | O | O | O | O | **R** | O | O | O | O |
Expand Down Expand Up @@ -142,6 +143,7 @@ specification:
| | mem_size_mib | O | O | O | O | O | O | O |
| | track_dirty_pages | O | O | O | O | O | O | O |
| | vcpu_count | O | O | O | O | O | O | O |
| | ksm_mergeable | O | O | O | O | O | O | O |
| | vmm_version | O | O | O | O | O | O | O |
| `MemoryHotplugStatus ` | total_size_mib | O | O | O | O | O | O | **R** |
| | slot_size_mib | O | O | O | O | O | O | **R** |
Expand Down
4 changes: 4 additions & 0 deletions docs/hugepages.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ benefits of using huge pages. This is because KVM will unconditionally establish
guest page tables at 4K granularity if dirty page tracking is enabled, even if
the host uses huge mappings.

KSM mergeable memory cannot be enabled with hugetlbfs-backed guest memory.
Requests that set both `huge_pages` to `2M` and `ksm_mergeable` to `true` are
rejected.

The traditional balloon device reports free pages at 4k granularity, this means
the device is unable to reclaim the hugepage backing of the guest and drop RSS.
However, the balloon can still be inflated and used to restrict memory usage in
Expand Down
5 changes: 5 additions & 0 deletions docs/prod-host-setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,11 @@ to mitigate [side channel issues](https://eprint.iacr.org/2013/448.pdf) that
rely on page deduplication for revealing what memory pages are accessed by
another process.

Firecracker does not mark guest memory as mergeable by default. Operators that
explicitly enable `ksm_mergeable` in `/machine-config` should only do so for
deployments where the page-deduplication side channel risk is acceptable, for
example when tenant separation is not required.

##### Use memory with Rowhammer mitigation support

Rowhammer is a memory side-channel issue that can lead to unauthorized cross-
Expand Down
13 changes: 12 additions & 1 deletion src/firecracker/src/api_server/request/machine_configuration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ mod tests {
cpu_template: None,
track_dirty_pages: Some(false),
huge_pages: Some(expected),
ksm_mergeable: Some(false),
#[cfg(feature = "gdb")]
gdb_socket_path: None,
};
Expand All @@ -144,6 +145,7 @@ mod tests {
cpu_template: Some(StaticCpuTemplate::None),
track_dirty_pages: Some(false),
huge_pages: Some(HugePageConfig::None),
ksm_mergeable: Some(false),
#[cfg(feature = "gdb")]
gdb_socket_path: None,
};
Expand All @@ -165,6 +167,7 @@ mod tests {
cpu_template: None,
track_dirty_pages: Some(true),
huge_pages: Some(HugePageConfig::None),
ksm_mergeable: Some(false),
#[cfg(feature = "gdb")]
gdb_socket_path: None,
};
Expand All @@ -190,6 +193,7 @@ mod tests {
cpu_template: Some(StaticCpuTemplate::T2),
track_dirty_pages: Some(true),
huge_pages: Some(HugePageConfig::None),
ksm_mergeable: Some(false),
#[cfg(feature = "gdb")]
gdb_socket_path: None,
};
Expand All @@ -208,7 +212,8 @@ mod tests {
"vcpu_count": 8,
"mem_size_mib": 1024,
"smt": true,
"track_dirty_pages": true
"track_dirty_pages": true,
"ksm_mergeable": true
}"#;
let expected_config = MachineConfigUpdate {
vcpu_count: Some(8),
Expand All @@ -217,6 +222,7 @@ mod tests {
cpu_template: None,
track_dirty_pages: Some(true),
huge_pages: Some(HugePageConfig::None),
ksm_mergeable: Some(true),
#[cfg(feature = "gdb")]
gdb_socket_path: None,
};
Expand Down Expand Up @@ -245,6 +251,11 @@ mod tests {
}"#;
parse_patch_machine_config(&Body::new(body)).unwrap();

let body = r#"{
"ksm_mergeable": true
}"#;
parse_patch_machine_config(&Body::new(body)).unwrap();

// On aarch64, CPU template is also not patch compatible.
let body = r#"{
"cpu_template": "T2"
Expand Down
11 changes: 10 additions & 1 deletion src/firecracker/swagger/firecracker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,8 @@ paths:
If 2M hugetlbfs pages are specified, then `mem_size_mib` must be a multiple of 2.
If any of the parameters has an incorrect value, the whole update fails.
All parameters that are optional and are not specified are set to their default values
(smt = false, track_dirty_pages = false, cpu_template = None, huge_pages = None).
(smt = false, track_dirty_pages = false, cpu_template = None, huge_pages = None,
ksm_mergeable = false).
operationId: putMachineConfiguration
parameters:
- name: body
Expand Down Expand Up @@ -1444,6 +1445,14 @@ definitions:
- None
- 2M
description: Which huge pages configuration (if any) should be used to back guest memory.
ksm_mergeable:
type: boolean
description:
Marks anonymous guest memory as mergeable with Linux Kernel Samepage Merging (KSM).
This can improve host memory efficiency when KSM is enabled by the host operator.
It is disabled by default because page deduplication can enable side channels, and it
cannot be used with hugetlbfs-backed guest memory.
default: false

MemoryBackend:
type: object
Expand Down
1 change: 1 addition & 0 deletions src/vmm/src/devices/virtio/block/virtio/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ pub mod tests {
[(GuestAddress(0), MEM_LEN)].into_iter(),
true,
HugePageConfig::None,
false,
)
.unwrap()
.into_iter()
Expand Down
1 change: 1 addition & 0 deletions src/vmm/src/devices/virtio/mem/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,7 @@ pub(crate) mod test_utils {
std::iter::once((addr, mib_to_bytes(1024))),
false,
HugePageConfig::None,
false,
)
.unwrap()
.pop()
Expand Down
1 change: 1 addition & 0 deletions src/vmm/src/devices/virtio/vhost_user.rs
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ pub(crate) mod tests {
libc::MAP_PRIVATE,
Some(file),
false,
false,
)
.unwrap()
.into_iter()
Expand Down
4 changes: 3 additions & 1 deletion src/vmm/src/persist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ pub fn restore_from_snapshot(
cpu_template: Some(microvm_state.vm_info.cpu_template),
track_dirty_pages: Some(track_dirty_pages),
huge_pages: Some(microvm_state.vm_info.huge_pages),
ksm_mergeable: Some(false),
#[cfg(feature = "gdb")]
gdb_socket_path: None,
})
Expand Down Expand Up @@ -572,7 +573,8 @@ fn create_guest_memory(
track_dirty_pages: bool,
huge_pages: HugePageConfig,
) -> Result<(Vec<GuestRegionMmap>, Vec<GuestRegionUffdMapping>), GuestMemoryFromUffdError> {
let guest_memory = memory::anonymous(mem_state.regions(), track_dirty_pages, huge_pages)?;
let guest_memory =
memory::anonymous(mem_state.regions(), track_dirty_pages, huge_pages, false)?;
let mut backend_mappings = Vec::with_capacity(guest_memory.len());
let mut offset = 0;
for mem_region in guest_memory.iter() {
Expand Down
15 changes: 15 additions & 0 deletions src/vmm/src/resources.rs
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,10 @@ impl VmResources {
// a single way of backing guest memory for vhost-user and non-vhost-user cases,
// that would not be worth the effort.
if vhost_user_device_used {
if self.machine_config.ksm_mergeable {
return Err(MemoryError::KsmWithSharedMemory);
}

memory::memfd_backed(
regions,
self.machine_config.track_dirty_pages,
Expand All @@ -511,6 +515,7 @@ impl VmResources {
regions.iter().copied(),
self.machine_config.track_dirty_pages,
self.machine_config.huge_pages,
self.machine_config.ksm_mergeable,
)
}
}
Expand Down Expand Up @@ -1428,6 +1433,7 @@ mod tests {
cpu_template: Some(StaticCpuTemplate::V1N1),
track_dirty_pages: Some(false),
huge_pages: Some(HugePageConfig::None),
ksm_mergeable: Some(false),
#[cfg(feature = "gdb")]
gdb_socket_path: None,
};
Expand Down Expand Up @@ -1517,6 +1523,15 @@ mod tests {
// trigger the "ballooning incompatible with huge pages" check.
vm_resources.balloon = BalloonBuilder::new();
vm_resources.update_machine_config(&aux_vm_config).unwrap();

// KSM mergeable memory is incompatible with hugetlbfs-backed memory.
aux_vm_config.ksm_mergeable = Some(true);
assert_eq!(
vm_resources
.update_machine_config(&aux_vm_config)
.unwrap_err(),
MachineConfigError::KsmWithHugePages
);
}

#[test]
Expand Down
4 changes: 2 additions & 2 deletions src/vmm/src/test_utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ pub fn single_region_mem_at_raw(at: u64, size: usize) -> Vec<GuestRegionMmap> {
/// Creates a [`GuestMemoryMmap`] with multiple regions and without dirty page tracking.
pub fn multi_region_mem(regions: &[(GuestAddress, usize)]) -> GuestMemoryMmap {
GuestRegionCollection::from_regions(
memory::anonymous(regions.iter().copied(), false, HugePageConfig::None)
memory::anonymous(regions.iter().copied(), false, HugePageConfig::None, false)
.expect("Cannot initialize memory")
.into_iter()
.map(|region| GuestRegionMmapExt::dram_from_mmap_region(region, 0))
Expand All @@ -54,7 +54,7 @@ pub fn multi_region_mem(regions: &[(GuestAddress, usize)]) -> GuestMemoryMmap {
}

pub fn multi_region_mem_raw(regions: &[(GuestAddress, usize)]) -> Vec<GuestRegionMmap> {
memory::anonymous(regions.iter().copied(), false, HugePageConfig::None)
memory::anonymous(regions.iter().copied(), false, HugePageConfig::None, false)
.expect("Cannot initialize memory")
}

Expand Down
34 changes: 33 additions & 1 deletion src/vmm/src/vmm_config/machine_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ pub enum MachineConfigError {
SmtNotSupported,
/// Could not determine host kernel version when checking hugetlbfs compatibility
KernelVersion,
/// KSM mergeable memory cannot be enabled with hugetlbfs-backed guest memory.
KsmWithHugePages,
}

/// Describes the possible (huge)page configurations for a microVM's memory.
Expand Down Expand Up @@ -113,6 +115,9 @@ pub struct MachineConfig {
/// Configures what page size Firecracker should use to back guest memory.
#[serde(default)]
pub huge_pages: HugePageConfig,
/// Marks anonymous guest memory as mergeable by KSM.
#[serde(default)]
pub ksm_mergeable: bool,
/// GDB socket address.
#[cfg(feature = "gdb")]
#[serde(default, skip_serializing_if = "Option::is_none")]
Expand Down Expand Up @@ -155,6 +160,7 @@ impl Default for MachineConfig {
cpu_template: None,
track_dirty_pages: false,
huge_pages: HugePageConfig::None,
ksm_mergeable: false,
#[cfg(feature = "gdb")]
gdb_socket_path: None,
}
Expand Down Expand Up @@ -188,6 +194,9 @@ pub struct MachineConfigUpdate {
/// Configures what page size Firecracker should use to back guest memory.
#[serde(default)]
pub huge_pages: Option<HugePageConfig>,
/// Marks anonymous guest memory as mergeable by KSM.
#[serde(default)]
pub ksm_mergeable: Option<bool>,
/// GDB socket address.
#[cfg(feature = "gdb")]
#[serde(default)]
Expand All @@ -212,6 +221,7 @@ impl From<MachineConfig> for MachineConfigUpdate {
cpu_template: cfg.static_template(),
track_dirty_pages: Some(cfg.track_dirty_pages),
huge_pages: Some(cfg.huge_pages),
ksm_mergeable: Some(cfg.ksm_mergeable),
#[cfg(feature = "gdb")]
gdb_socket_path: cfg.gdb_socket_path,
}
Expand Down Expand Up @@ -261,11 +271,16 @@ impl MachineConfig {

let mem_size_mib = update.mem_size_mib.unwrap_or(self.mem_size_mib);
let page_config = update.huge_pages.unwrap_or(self.huge_pages);
let ksm_mergeable = update.ksm_mergeable.unwrap_or(self.ksm_mergeable);

if mem_size_mib == 0 || !page_config.is_valid_mem_size(mem_size_mib) {
return Err(MachineConfigError::InvalidMemorySize);
}

if ksm_mergeable && page_config.is_hugetlbfs() {
return Err(MachineConfigError::KsmWithHugePages);
}

let cpu_template = match update.cpu_template {
None => self.cpu_template.clone(),
Some(StaticCpuTemplate::None) => None,
Expand All @@ -279,6 +294,7 @@ impl MachineConfig {
cpu_template,
track_dirty_pages: update.track_dirty_pages.unwrap_or(self.track_dirty_pages),
huge_pages: page_config,
ksm_mergeable,
#[cfg(feature = "gdb")]
gdb_socket_path: update.gdb_socket_path.clone(),
})
Expand All @@ -288,7 +304,9 @@ impl MachineConfig {
#[cfg(test)]
mod tests {
use crate::cpu_config::templates::{CpuTemplateType, CustomCpuTemplate, StaticCpuTemplate};
use crate::vmm_config::machine_config::MachineConfig;
use crate::vmm_config::machine_config::{
HugePageConfig, MachineConfig, MachineConfigError, MachineConfigUpdate,
};

// Ensure the special (de)serialization logic for the cpu_template field works:
// only static cpu templates can be specified via the machine-config endpoint, but
Expand Down Expand Up @@ -335,4 +353,18 @@ mod tests {

assert!(deserialized.cpu_template.is_none());
}

#[test]
fn test_ksm_mergeable_with_huge_pages_fails() {
let update = MachineConfigUpdate {
ksm_mergeable: Some(true),
huge_pages: Some(HugePageConfig::Hugetlbfs2M),
..Default::default()
};

assert_eq!(
MachineConfig::default().update(&update),
Err(MachineConfigError::KsmWithHugePages)
);
}
}
Loading