Skip to content
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
f6d25c1
something that compiles
jordanhendricks Mar 20, 2026
5dbf46c
starting to sketch out sled-agent attest code
jordanhendricks Mar 20, 2026
e12a38f
mvp attestation??
jordanhendricks Mar 20, 2026
6335323
remove dep on libipcc
jordanhendricks Mar 20, 2026
ef01e4b
make boot digest parseable
jordanhendricks Mar 20, 2026
591b9f5
ready for a racklette spin
jordanhendricks Mar 20, 2026
4ca28cb
paper over async/sync/async bits
iximeow Mar 21, 2026
5f12a78
added recv channel for vm conf in attestation server
jordanhendricks Mar 24, 2026
b1c710c
moved tcp attest server inside of vm objects
jordanhendricks Mar 26, 2026
e4b4a52
remove warning
jordanhendricks Mar 26, 2026
1c55d2b
start adding boot digest stuff
jordanhendricks Mar 26, 2026
1c6ed47
might have strung all the needful through propolis-server?
iximeow Mar 26, 2026
14122a2
clippy lints and cargo fmt
iximeow Mar 26, 2026
449a3b2
racklette debug :(
iximeow Mar 26, 2026
19cfbf7
more debugging
iximeow Mar 26, 2026
d89273b
restore 4ca28cbe
iximeow Mar 26, 2026
2d0a0e4
remove todo file from tree
jordanhendricks Mar 27, 2026
fea9dbb
bump dice-util/vm-attest for AttestAsync
iximeow Mar 30, 2026
60c8c04
enforce read-only boot disk
jordanhendricks Mar 30, 2026
9efdfb6
rev dice-util and vm-attest further
iximeow Mar 30, 2026
b137a90
rev dice-util, vm-attest
iximeow Apr 1, 2026
cf55c6e
shuffle things around to be able to reign in a cancelled init task
iximeow Apr 1, 2026
7f84255
halt cleanup
iximeow Apr 1, 2026
776795a
cleaning up some todos
jordanhendricks Apr 1, 2026
9af75aa
how had i not rebuilt the server...??
iximeow Apr 1, 2026
60935ca
testing a phd fix
jordanhendricks Apr 1, 2026
50c24ff
my turn to not compile propolis-server
jordanhendricks Apr 1, 2026
014950e
first round of review feedback: minor things
jordanhendricks Apr 3, 2026
71b14da
compiling, my bad
jordanhendricks Apr 3, 2026
2d8818d
add retries for crucible reads
jordanhendricks Apr 3, 2026
38cb234
nits from eliza (ty!)
jordanhendricks Apr 5, 2026
c096720
final bits of review feedback, comments, add sleep between crucible f…
jordanhendricks Apr 6, 2026
e6bbd3a
clean up log todo
jordanhendricks Apr 6, 2026
1ff4e3e
hopefully resolve merge conflict with master
jordanhendricks Apr 6, 2026
26f31f0
Merge remote-tracking branch 'origin' into jhendricks/rfd-605
jordanhendricks Apr 6, 2026
786ef27
more eliza review feedback
jordanhendricks Apr 6, 2026
0f843dd
final bits of review feedback?
jordanhendricks Apr 6, 2026
a42814d
fix clippy CI job
jordanhendricks Apr 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,638 changes: 1,406 additions & 232 deletions Cargo.lock

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ sled-agent-client = { git = "https://github.com/oxidecomputer/omicron", branch =
crucible = { git = "https://github.com/oxidecomputer/crucible", rev = "a945a32ba9e1f2098ce3a8963765f1894f37110b" }
crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "a945a32ba9e1f2098ce3a8963765f1894f37110b" }

# Attestation
dice-verifier = { git = "https://github.com/oxidecomputer/dice-util", rev = "1d3084b514389847e8e0f5d966d2be4f18d02d32", features = ["sled-agent"] }
vm-attest = { git = "https://github.com/oxidecomputer/vm-attest", rev = "2cdd17580a4fc6c871d24797016af8dbaac9421d", default-features = false }

# External dependencies
anyhow = "1.0"
async-trait = "0.1.88"
Expand Down Expand Up @@ -163,6 +167,7 @@ serde_arrays = "0.1"
serde_derive = "1.0"
serde_json = "1.0"
serde_test = "1.0.138"
sha2 = "0.10.9"
slog = "2.7"
slog-async = "2.8"
slog-bunyan = "2.4.0"
Expand Down
1 change: 1 addition & 0 deletions bin/propolis-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ rgb_frame.workspace = true
rfb = { workspace = true, features = ["tungstenite"] }
uuid.workspace = true
usdt.workspace = true
vm-attest.workspace = true
base64.workspace = true
schemars = { workspace = true, features = ["chrono", "uuid1"] }

Expand Down
139 changes: 124 additions & 15 deletions bin/propolis-server/src/lib/initializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

use std::convert::TryInto;
use std::fs::File;
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::num::{NonZeroU8, NonZeroUsize};
use std::os::unix::fs::FileTypeExt;
use std::sync::Arc;
Expand All @@ -25,6 +24,9 @@ use crucible_client_types::VolumeConstructionRequest;
pub use nexus_client::Client as NexusClient;
use oximeter::types::ProducerRegistry;
use oximeter_instruments::kstat::KstatSampler;
use propolis::attestation;
use propolis::attestation::server::AttestationServerConfig;
use propolis::attestation::server::AttestationSock;
use propolis::block;
use propolis::chardev::{self, BlockingSource, Source};
use propolis::common::{Lifecycle, GB, MB, PAGE_SIZE};
Expand Down Expand Up @@ -96,6 +98,12 @@ pub enum MachineInitError {
#[error("boot order entry {0:?} does not refer to an attached disk")]
BootOrderEntryWithoutDevice(SpecKey),

#[error(
"disk device {device_id:?} refers to a \
non-existent block backend {backend_id:?}"
)]
DeviceWithoutBlockBackend { device_id: SpecKey, backend_id: SpecKey },

#[error("boot entry {0:?} refers to a device on non-zero PCI bus {1}")]
BootDeviceOnDownstreamPciBus(SpecKey, u8),

Expand All @@ -105,6 +113,9 @@ pub enum MachineInitError {
#[error("failed to specialize CPUID for vcpu {0}")]
CpuidSpecializationFailed(i32, #[source] propolis::cpuid::SpecializeError),

#[error("failed to start attestation server")]
AttestationServer(#[source] std::io::Error),

#[cfg(feature = "falcon")]
#[error("softnpu p9 device missing")]
SoftNpuP9Missing,
Expand Down Expand Up @@ -478,31 +489,25 @@ impl MachineInitializer<'_> {
Ok(())
}

pub fn initialize_vsock(
pub async fn initialize_vsock(
&mut self,
chipset: &RegisteredChipset,
) -> Result<(), MachineInitError> {
attest_cfg: Option<AttestationServerConfig>,
) -> Result<Option<AttestationSock>, MachineInitError> {
use propolis::vsock::proxy::VsockPortMapping;

// OANA Port 605 - VM Attestation RFD 605
const ATTESTATION_PORT: u16 = 605;
const ATTESTATION_ADDR: SocketAddr = SocketAddr::new(
IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
ATTESTATION_PORT,
);

if let Some(vsock) = &self.spec.vsock {
let bdf: pci::Bdf = vsock.spec.pci_path.into();

let mappings = vec![VsockPortMapping::new(
ATTESTATION_PORT.into(),
ATTESTATION_ADDR,
attestation::ATTESTATION_PORT.into(),
attestation::ATTESTATION_ADDR,
)];

let guest_cid = GuestCid::try_from(vsock.spec.guest_cid)
.context("guest cid")?;
.context("could not parse guest cid")?;
// While the spec does not recommend how large the virtio descriptor
// table should be we sized this appropriately in testing so
// table should be, we sized this appropriately in testing, so
// that the guest is able to move vsock packets at a reasonable
// throughput without the need to be much larger.
let num_queues = 256;
Expand All @@ -516,9 +521,23 @@ impl MachineInitializer<'_> {

self.devices.insert(vsock.id.clone(), device.clone());
chipset.pci_attach(bdf, device);

// Spawn attestation server that will go over the vsock device
if let Some(cfg) = attest_cfg {
let attest = AttestationSock::new(
self.log.new(slog::o!("component" => "attestation-server")),
cfg.sled_agent_addr,
)
.await
.map_err(MachineInitError::AttestationServer)?;
return Ok(Some(attest));
}
} else {
info!(self.log, "no vsock device in instance spec");
return Ok(None);
}

Ok(())
Ok(None)
}

async fn create_storage_backend_from_spec(
Expand Down Expand Up @@ -672,6 +691,96 @@ impl MachineInitializer<'_> {
}
}

/// Collect the necessary information out of the VM under construction into
/// the provided `AttestationSocketInit`. This is expected to populate
/// `attest_init` with information so the caller can spawn off
/// `AttestationSockInit::run`.
pub fn prepare_rot_initializer(
&self,
vm_rot: &mut AttestationSock,
) -> Result<(), MachineInitError> {
let uuid = self.properties.id;

// The first boot entry is a key into `self.spec.disks`, which is how
// we'll get to a Crucible volume backing this boot option.
let boot_disk_entry =
self.spec.boot_settings.as_ref().and_then(|settings| {
if settings.order.len() >= 2 {
// In a rack we only configure propolis-server with zero or
// one boot disks. It's possible to provide a fuller list,
// and in the future the product may actually expose such a
// capability. At that time, we'll need to have a reckoning
// for what "boot disk measurement" from the RoT actually
// means; it probably "should" be "the measurement of the
// disk that EDK2 decided to boot into", but that
// communication to and from the guest is a little more
// complicated than we want or need to build out today.
//
// Since as the system exists we either have no specific
// boot disk (and don't know where the guest is expected to
// end up), or one boot disk (and can determine which disk
// to collect a measurement of before even running guest
// firmware), we encode this expectation up front. If the
// product has changed such that this assert is reached,
// "that's exciting!" and "sorry for crashing your
// Propolis".
panic!(
"Unsupported VM RoT configuration: \
more than one boot disk"
);
}

settings.order.first()
});

let crucible_volume = if let Some(entry) = boot_disk_entry {
let disk_dev =
self.spec.disks.get(&entry.device_id).ok_or_else(|| {
MachineInitError::BootOrderEntryWithoutDevice(
entry.device_id.clone(),
)
})?;

let backend_id = match &disk_dev.device_spec {
spec::StorageDevice::Virtio(disk) => &disk.backend_id,
spec::StorageDevice::Nvme(disk) => &disk.backend_id,
};

let Some(block_backend) = self.block_backends.get(backend_id)
else {
return Err(MachineInitError::DeviceWithoutBlockBackend {
device_id: entry.device_id.to_owned(),
backend_id: backend_id.to_owned(),
});
};

if let Some(backend) =
block_backend.as_any().downcast_ref::<block::CrucibleBackend>()
{
if backend.is_read_only() {
Some(backend.clone_volume())
} else {
// Disk must be read-only to be used for attestation.
slog::info!(self.log, "boot disk is not read-only (and will not be used for attestations)");
None
}
} else {
// Probably fine, just not handled right now.
slog::warn!(
self.log,
"VM RoT ignoring boot disk: not a Crucible volume"
);
None
}
} else {
None
};

vm_rot.prepare_instance_conf(uuid, crucible_volume);

Ok(())
}

/// Initializes the storage devices and backends listed in this
/// initializer's instance spec.
///
Expand Down
7 changes: 7 additions & 0 deletions bin/propolis-server/src/lib/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ use internal_dns_resolver::{ResolveError, Resolver};
use internal_dns_types::names::ServiceName;
pub use nexus_client::Client as NexusClient;
use oximeter::types::ProducerRegistry;
use propolis::attestation::server::AttestationServerConfig;
use propolis_api_types::disk::{
InstanceVCRReplace, SnapshotRequestPathParams, VCRRequestPathParams,
VolumeStatus, VolumeStatusPathParams,
Expand Down Expand Up @@ -95,6 +96,9 @@ pub struct StaticConfig {
/// The configuration to use when setting up this server's Oximeter
/// endpoint.
metrics: Option<MetricsEndpointConfig>,

/// TODO: comment
attest_config: Option<AttestationServerConfig>,
}

/// Context accessible from HTTP callbacks.
Expand All @@ -113,6 +117,7 @@ impl DropshotEndpointContext {
use_reservoir: bool,
log: slog::Logger,
metric_config: Option<MetricsEndpointConfig>,
attest_config: Option<AttestationServerConfig>,
) -> Self {
let vnc_server = VncServer::new(log.clone());
Self {
Expand All @@ -121,6 +126,7 @@ impl DropshotEndpointContext {
bootrom_version,
use_reservoir,
metrics: metric_config,
attest_config,
},
vnc_server,
vm: crate::vm::Vm::new(&log),
Expand Down Expand Up @@ -245,6 +251,7 @@ impl PropolisServerApi for PropolisServerImpl {
nexus_client,
vnc_server: server_context.vnc_server.clone(),
local_server_addr: rqctx.server.local_addr,
attest_config: server_context.static_config.attest_config,
};

let vm_init = match init {
Expand Down
12 changes: 11 additions & 1 deletion bin/propolis-server/src/lib/vm/ensure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,8 @@ async fn initialize_vm_objects(
&properties,
))?;
init.initialize_network_devices(&chipset).await?;
init.initialize_vsock(&chipset)?;
let mut attest_handle =
init.initialize_vsock(&chipset, options.attest_config).await?;

#[cfg(feature = "failure-injection")]
init.initialize_test_devices();
Expand All @@ -581,6 +582,14 @@ async fn initialize_vm_objects(
let ramfb =
init.initialize_fwcfg(spec.board.cpus, &options.bootrom_version)?;

// If we have a VM RoT, that RoT needs to be able to collect some
// information about the guest before it can be actually usable. It will do
// that asynchronously, but have to provide references for initial necessary
// VM state.
if let Some(attest_handle) = attest_handle.as_mut() {
init.prepare_rot_initializer(attest_handle)?;
}

init.register_guest_hv_interface(guest_hv_lifecycle);
init.initialize_cpus().await?;

Expand Down Expand Up @@ -642,6 +651,7 @@ async fn initialize_vm_objects(
com1,
framebuffer: Some(ramfb),
ps2ctrl,
attest_handle,
};

// Another really terrible hack. As we've found in Propolis#1008, brk()
Expand Down
3 changes: 3 additions & 0 deletions bin/propolis-server/src/lib/vm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ use state_publisher::StatePublisher;
use tokio::sync::{oneshot, watch, RwLock, RwLockReadGuard};

use crate::{server::MetricsEndpointConfig, spec::Spec, vnc::VncServer};
use propolis::attestation::server::AttestationServerConfig;

mod active;
pub(crate) mod ensure;
Expand Down Expand Up @@ -309,6 +310,8 @@ pub(super) struct EnsureOptions {
/// The address of this Propolis process, used by the live migration
/// protocol to transfer serial console connections.
pub(super) local_server_addr: SocketAddr,

pub(super) attest_config: Option<AttestationServerConfig>,
}

impl Vm {
Expand Down
12 changes: 11 additions & 1 deletion bin/propolis-server/src/lib/vm/objects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use std::{

use futures::{future::BoxFuture, stream::FuturesUnordered, StreamExt};
use propolis::{
attestation,
hw::{ps2::ctrl::PS2Ctrl, qemu::ramfb::RamFb, uart::LpcUart},
vmm::VmmHdl,
Machine,
Expand Down Expand Up @@ -51,6 +52,7 @@ pub(super) struct InputVmObjects {
pub com1: Arc<Serial<LpcUart>>,
pub framebuffer: Option<Arc<RamFb>>,
pub ps2ctrl: Arc<PS2Ctrl>,
pub attest_handle: Option<attestation::server::AttestationSock>,
}

/// The collection of objects and state that make up a Propolis instance.
Expand Down Expand Up @@ -86,6 +88,9 @@ pub(crate) struct VmObjectsLocked {

/// A handle to the VM's PS/2 controller.
ps2ctrl: Arc<PS2Ctrl>,

/// A handle to the VM's attestation server.
attest_handle: Option<attestation::server::AttestationSock>,
}

impl VmObjects {
Expand Down Expand Up @@ -126,6 +131,7 @@ impl VmObjectsLocked {
com1: input.com1,
framebuffer: input.framebuffer,
ps2ctrl: input.ps2ctrl,
attest_handle: input.attest_handle,
}
}

Expand Down Expand Up @@ -371,7 +377,7 @@ impl VmObjectsLocked {

/// Stops all of a VM's devices and detaches its block backends from their
/// devices.
async fn halt_devices(&self) {
async fn halt_devices(&mut self) {
// Take care not to wedge the runtime with any device halt
// implementations which might block.
tokio::task::block_in_place(|| {
Expand All @@ -386,6 +392,10 @@ impl VmObjectsLocked {
backend.stop().await;
backend.attachment().detach();
}

if let Some(attest_handle) = self.attest_handle.take() {
attest_handle.halt().await;
}
}

/// Resets a VM's kernel vCPU objects to their initial states.
Expand Down
Loading