Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
f6d25c1
something that compiles
jordanhendricks Mar 20, 2026
5dbf46c
starting to sketch out sled-agent attest code
jordanhendricks Mar 20, 2026
e12a38f
mvp attestation??
jordanhendricks Mar 20, 2026
6335323
remove dep on libipcc
jordanhendricks Mar 20, 2026
ef01e4b
make boot digest parseable
jordanhendricks Mar 20, 2026
591b9f5
ready for a racklette spin
jordanhendricks Mar 20, 2026
4ca28cb
paper over async/sync/async bits
iximeow Mar 21, 2026
5f12a78
added recv channel for vm conf in attestation server
jordanhendricks Mar 24, 2026
b1c710c
moved tcp attest server inside of vm objects
jordanhendricks Mar 26, 2026
e4b4a52
remove warning
jordanhendricks Mar 26, 2026
1c55d2b
start adding boot digest stuff
jordanhendricks Mar 26, 2026
1c6ed47
might have strung all the needful through propolis-server?
iximeow Mar 26, 2026
14122a2
clippy lints and cargo fmt
iximeow Mar 26, 2026
449a3b2
racklette debug :(
iximeow Mar 26, 2026
19cfbf7
more debugging
iximeow Mar 26, 2026
d89273b
restore 4ca28cbe
iximeow Mar 26, 2026
2d0a0e4
remove todo file from tree
jordanhendricks Mar 27, 2026
fea9dbb
bump dice-util/vm-attest for AttestAsync
iximeow Mar 30, 2026
60c8c04
enforce read-only boot disk
jordanhendricks Mar 30, 2026
9efdfb6
rev dice-util and vm-attest further
iximeow Mar 30, 2026
b137a90
rev dice-util, vm-attest
iximeow Apr 1, 2026
cf55c6e
shuffle things around to be able to reign in a cancelled init task
iximeow Apr 1, 2026
7f84255
halt cleanup
iximeow Apr 1, 2026
776795a
cleaning up some todos
jordanhendricks Apr 1, 2026
9af75aa
how had i not rebuilt the server...??
iximeow Apr 1, 2026
60935ca
testing a phd fix
jordanhendricks Apr 1, 2026
50c24ff
my turn to not compile propolis-server
jordanhendricks Apr 1, 2026
014950e
first round of review feedback: minor things
jordanhendricks Apr 3, 2026
71b14da
compiling, my bad
jordanhendricks Apr 3, 2026
2d8818d
add retries for crucible reads
jordanhendricks Apr 3, 2026
38cb234
nits from eliza (ty!)
jordanhendricks Apr 5, 2026
c096720
final bits of review feedback, comments, add sleep between crucible f…
jordanhendricks Apr 6, 2026
e6bbd3a
clean up log todo
jordanhendricks Apr 6, 2026
1ff4e3e
hopefully resolve merge conflict with master
jordanhendricks Apr 6, 2026
26f31f0
Merge remote-tracking branch 'origin' into jhendricks/rfd-605
jordanhendricks Apr 6, 2026
786ef27
more eliza review feedback
jordanhendricks Apr 6, 2026
0f843dd
final bits of review feedback?
jordanhendricks Apr 6, 2026
a42814d
fix clippy CI job
jordanhendricks Apr 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,637 changes: 1,405 additions & 232 deletions Cargo.lock

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@ sled-agent-client = { git = "https://github.com/oxidecomputer/omicron", branch =
crucible = { git = "https://github.com/oxidecomputer/crucible", rev = "a945a32ba9e1f2098ce3a8963765f1894f37110b" }
crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "a945a32ba9e1f2098ce3a8963765f1894f37110b" }

# TODO: pin these to git SHAs
# Attestation
#dice-verifier = { git = "https://github.com/oxidecomputer/dice-util", branch = "jhendricks/update-sled-agent-types-versions", features = ["sled-agent"] }
dice-verifier = { git = "https://github.com/oxidecomputer/dice-util", features = ["sled-agent"] }
vm-attest = { git = "https://github.com/oxidecomputer/vm-attest", rev = "a7c2a341866e359a3126aaaa67823ec5097000cd", default-features = false }
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

most of the Cargo.lock weirdness from dice-verifier -> sled-agent-client -> omciron-common (some previous rev) and that's where the later API dependency stuff we saw in Omicron comes up when building the tuf. sled-agent-client re-exports items out of propolis-client which means we end up in a situation where propolis-server depends on a different rev of propolis-client and everything's Weird.

i'm not totally sure what we want or need to do about this, particularly because we're definitely not using the propolis-client-related parts of sled-agent! we're just using one small part of the API for the RoT calls. but sled-agent and propolis are (i think?) updated in the same deployment unit so the cyclic dependency is fine.


# External dependencies
anyhow = "1.0"
async-trait = "0.1.88"
Expand Down Expand Up @@ -163,6 +169,7 @@ serde_arrays = "0.1"
serde_derive = "1.0"
serde_json = "1.0"
serde_test = "1.0.138"
sha2 = "0.10.9"
slog = "2.7"
slog-async = "2.8"
slog-bunyan = "2.4.0"
Expand Down Expand Up @@ -201,3 +208,6 @@ zerocopy = "0.8.25"
# [patch."https://github.com/oxidecomputer/crucible"]
# crucible = { path = "../crucible/upstairs" }
# crucible-client-types = { path = "../crucible/crucible-client-types" }

#[patch."https://github.com/oxidecomputer/dice-util"]
#dice-verifier = { path = "/home/jordan/src/dice-util/verifier", features = ["sled-agent"] }
1 change: 1 addition & 0 deletions bin/propolis-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ rgb_frame.workspace = true
rfb = { workspace = true, features = ["tungstenite"] }
uuid.workspace = true
usdt.workspace = true
vm-attest.workspace = true
base64.workspace = true
schemars = { workspace = true, features = ["chrono", "uuid1"] }

Expand Down
120 changes: 107 additions & 13 deletions bin/propolis-server/src/lib/initializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

use std::convert::TryInto;
use std::fs::File;
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::num::{NonZeroU8, NonZeroUsize};
use std::os::unix::fs::FileTypeExt;
use std::sync::Arc;
Expand All @@ -25,6 +24,10 @@ use crucible_client_types::VolumeConstructionRequest;
pub use nexus_client::Client as NexusClient;
use oximeter::types::ProducerRegistry;
use oximeter_instruments::kstat::KstatSampler;
use propolis::attestation;
use propolis::attestation::server::AttestationServerConfig;
use propolis::attestation::server::AttestationSock;
use propolis::attestation::server::AttestationSockInit;
use propolis::block;
use propolis::chardev::{self, BlockingSource, Source};
use propolis::common::{Lifecycle, GB, MB, PAGE_SIZE};
Expand Down Expand Up @@ -105,6 +108,9 @@ pub enum MachineInitError {
#[error("failed to specialize CPUID for vcpu {0}")]
CpuidSpecializationFailed(i32, #[source] propolis::cpuid::SpecializeError),

#[error("failed to start attestation server")]
AttestationServer(#[source] std::io::Error),

#[cfg(feature = "falcon")]
#[error("softnpu p9 device missing")]
SoftNpuP9Missing,
Expand Down Expand Up @@ -478,25 +484,23 @@ impl MachineInitializer<'_> {
Ok(())
}

pub fn initialize_vsock(
pub async fn initialize_vsock(
&mut self,
chipset: &RegisteredChipset,
) -> Result<(), MachineInitError> {
attest_cfg: Option<AttestationServerConfig>,
) -> Result<
(Option<AttestationSock>, Option<AttestationSockInit>),
MachineInitError,
> {
use propolis::vsock::proxy::VsockPortMapping;

// OANA Port 605 - VM Attestation RFD 605
const ATTESTATION_PORT: u16 = 605;
const ATTESTATION_ADDR: SocketAddr = SocketAddr::new(
IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
ATTESTATION_PORT,
);

// TODO: early return if none?
if let Some(vsock) = &self.spec.vsock {
let bdf: pci::Bdf = vsock.spec.pci_path.into();

let mappings = vec![VsockPortMapping::new(
ATTESTATION_PORT.into(),
ATTESTATION_ADDR,
attestation::ATTESTATION_PORT.into(),
attestation::ATTESTATION_ADDR,
)];

let guest_cid = GuestCid::try_from(vsock.spec.guest_cid)
Expand All @@ -516,9 +520,20 @@ impl MachineInitializer<'_> {

self.devices.insert(vsock.id.clone(), device.clone());
chipset.pci_attach(bdf, device);

// Spawn attestation server that will go over the vsock
if let Some(cfg) = attest_cfg {
let (attest, attest_init) = AttestationSock::new(
self.log.new(slog::o!("component" => "attestation-server")),
cfg.sled_agent_addr,
)
.await
.map_err(MachineInitError::AttestationServer)?;
return Ok((Some(attest), Some(attest_init)));
}
}

Ok(())
Ok((None, None))
}

async fn create_storage_backend_from_spec(
Expand Down Expand Up @@ -672,6 +687,85 @@ impl MachineInitializer<'_> {
}
}

/// Collect the necessary information out of the VM under construction into the provided
/// `AttestationSocketInit`. This is expected to populate `attest_init` with information so the
/// caller can spawn off `AttestationSockInit::run`.
pub fn prepare_rot_initializer(
&self,
attest_init: &mut AttestationSockInit,
) {
let uuid = self.properties.id;

attest_init.instance_uuid = Some(uuid);

// The first boot entry is a key into `self.spec.disks`, which is how we'll get to a
// Crucible volume backing this boot option.
//
// TODO: remove this, but for reference:
// > if let Some(spec) = self.spec.disks.et(&boot_entry.device_id)
let boot_disk_entry = self.spec.boot_settings.as_ref()
.and_then(|settings| {
if settings.order.len() >= 2 {
// In a rack we only configure propolis-server with zero or one boot disks.
// It's possible to provide a fuller list, and in the future the product may
// actually expose such a capability. At that time, we'll need to have a
// reckoning for what "boot disk measurement" from the RoT actually means; it
// probably "should" be "the measurement of the disk that EDK2 decided to boot
// into", but that communication to and from the guest is a little more
// complicated than we want or need to build out today.
//
// Since as the system exists we either have no specific boot disk (and don't
// know where the guest is expected to end up), or one boot disk (and can
// determine which disk to collect a measurement of before even running guest
// firmware), we encode this expectation up front. If the product has changed
// such that this assert is reached, "that's exciting!" and "sorry for crashing
// your Propolis".
panic!("Unsupported VM RoT configuration: more than one boot disk");
}

settings.order.first()
});

if let Some(boot_entry) = boot_disk_entry {
let disk_entry = self.spec.disks.get(&boot_entry.device_id)
.expect("TODO: crosscheck against boot config stuff: boot entry is valid");

let backend_id = match &disk_entry.device_spec {
spec::StorageDevice::Virtio(disk) => &disk.backend_id,
spec::StorageDevice::Nvme(disk) => &disk.backend_id,
};

let volume = match self.block_backends.get(backend_id) {
Some(block_backend) => {
let crucible_backend = match block_backend
.as_any()
.downcast_ref::<block::CrucibleBackend>(
) {
Some(backend) => backend,
None => {
// Probably fine, just not handled right now.
slog::error!(
self.log,
"boot disk is not a Crucible volume"
);
return;
}
};
crucible_backend.clone_volume()
}
None => {
slog::error!(
self.log,
"boot disk does not name a block backend?!"
);
return;
}
};

attest_init.volume_ref = Some(volume);
}
}

/// Initializes the storage devices and backends listed in this
/// initializer's instance spec.
///
Expand Down
7 changes: 7 additions & 0 deletions bin/propolis-server/src/lib/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ use internal_dns_resolver::{ResolveError, Resolver};
use internal_dns_types::names::ServiceName;
pub use nexus_client::Client as NexusClient;
use oximeter::types::ProducerRegistry;
use propolis::attestation::server::AttestationServerConfig;
use propolis_api_types::disk::{
InstanceVCRReplace, SnapshotRequestPathParams, VCRRequestPathParams,
VolumeStatus, VolumeStatusPathParams,
Expand Down Expand Up @@ -95,6 +96,9 @@ pub struct StaticConfig {
/// The configuration to use when setting up this server's Oximeter
/// endpoint.
metrics: Option<MetricsEndpointConfig>,

/// TODO: comment
attest_config: Option<AttestationServerConfig>,
}

/// Context accessible from HTTP callbacks.
Expand All @@ -113,6 +117,7 @@ impl DropshotEndpointContext {
use_reservoir: bool,
log: slog::Logger,
metric_config: Option<MetricsEndpointConfig>,
attest_config: Option<AttestationServerConfig>,
) -> Self {
let vnc_server = VncServer::new(log.clone());
Self {
Expand All @@ -121,6 +126,7 @@ impl DropshotEndpointContext {
bootrom_version,
use_reservoir,
metrics: metric_config,
attest_config,
},
vnc_server,
vm: crate::vm::Vm::new(&log),
Expand Down Expand Up @@ -245,6 +251,7 @@ impl PropolisServerApi for PropolisServerImpl {
nexus_client,
vnc_server: server_context.vnc_server.clone(),
local_server_addr: rqctx.server.local_addr,
attest_config: server_context.static_config.attest_config,
};

let vm_init = match init {
Expand Down
14 changes: 13 additions & 1 deletion bin/propolis-server/src/lib/vm/ensure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,8 @@ async fn initialize_vm_objects(
&properties,
))?;
init.initialize_network_devices(&chipset).await?;
init.initialize_vsock(&chipset)?;
let (tcp_attest, attest_init) =
init.initialize_vsock(&chipset, options.attest_config).await?;

#[cfg(feature = "failure-injection")]
init.initialize_test_devices();
Expand All @@ -578,6 +579,16 @@ async fn initialize_vm_objects(
.initialize_storage_devices(&chipset, options.nexus_client.clone())
.await?;

// If we have a VM RoT, that RoT needs to be able to collect some
// information about the guest before it can be actually usable. That
// information collection can - at the moment - happen entirely
// asynchronously. So, prepare the RoT initialization if necessary, then
// spawn it off to run independently.
if let Some(mut attest_init) = attest_init {
init.prepare_rot_initializer(&mut attest_init);
tokio::spawn(attest_init.run());
}

let ramfb =
init.initialize_fwcfg(spec.board.cpus, &options.bootrom_version)?;

Expand Down Expand Up @@ -642,6 +653,7 @@ async fn initialize_vm_objects(
com1,
framebuffer: Some(ramfb),
ps2ctrl,
tcp_attest,
};

// Another really terrible hack. As we've found in Propolis#1008, brk()
Expand Down
3 changes: 3 additions & 0 deletions bin/propolis-server/src/lib/vm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ use state_publisher::StatePublisher;
use tokio::sync::{oneshot, watch, RwLock, RwLockReadGuard};

use crate::{server::MetricsEndpointConfig, spec::Spec, vnc::VncServer};
use propolis::attestation::server::AttestationServerConfig;

mod active;
pub(crate) mod ensure;
Expand Down Expand Up @@ -309,6 +310,8 @@ pub(super) struct EnsureOptions {
/// The address of this Propolis process, used by the live migration
/// protocol to transfer serial console connections.
pub(super) local_server_addr: SocketAddr,

pub(super) attest_config: Option<AttestationServerConfig>,
}

impl Vm {
Expand Down
10 changes: 10 additions & 0 deletions bin/propolis-server/src/lib/vm/objects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use std::{

use futures::{future::BoxFuture, stream::FuturesUnordered, StreamExt};
use propolis::{
attestation,
hw::{ps2::ctrl::PS2Ctrl, qemu::ramfb::RamFb, uart::LpcUart},
vmm::VmmHdl,
Machine,
Expand Down Expand Up @@ -51,6 +52,7 @@ pub(super) struct InputVmObjects {
pub com1: Arc<Serial<LpcUart>>,
pub framebuffer: Option<Arc<RamFb>>,
pub ps2ctrl: Arc<PS2Ctrl>,
pub tcp_attest: Option<attestation::server::AttestationSock>,
}

/// The collection of objects and state that make up a Propolis instance.
Expand Down Expand Up @@ -86,6 +88,13 @@ pub(crate) struct VmObjectsLocked {

/// A handle to the VM's PS/2 controller.
ps2ctrl: Arc<PS2Ctrl>,

/// Attestation server.
//
// This is held here only to keep the attestation server *somewhere*, but
// it's never used after being spawned.
#[allow(dead_code)]
tcp_attest: Option<attestation::server::AttestationSock>,
}

impl VmObjects {
Expand Down Expand Up @@ -126,6 +135,7 @@ impl VmObjectsLocked {
com1: input.com1,
framebuffer: input.framebuffer,
ps2ctrl: input.ps2ctrl,
tcp_attest: input.tcp_attest,
}
}

Expand Down
Loading
Loading