diff --git a/Cargo.lock b/Cargo.lock
index cf3463c39d..242211d85c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4848,6 +4848,7 @@ dependencies = [
  "chipset_device",
  "device_emulators",
  "disk_backend",
+ "disklayer_ram",
  "futures",
  "futures-concurrency",
  "guestmem",
diff --git a/vm/devices/storage/nvme/Cargo.toml b/vm/devices/storage/nvme/Cargo.toml
index 1edd3971f0..172a5aaeab 100644
--- a/vm/devices/storage/nvme/Cargo.toml
+++ b/vm/devices/storage/nvme/Cargo.toml
@@ -41,6 +41,7 @@ unicycle.workspace = true
 zerocopy = { workspace = true, features = ["alloc"] }
 
 [dev-dependencies]
+disklayer_ram.workspace = true
 user_driver.workspace = true
 
 [lints]
diff --git a/vm/devices/storage/nvme/src/tests/controller_tests.rs b/vm/devices/storage/nvme/src/tests/controller_tests.rs
index 0607f02dd4..8e22138c1f 100644
--- a/vm/devices/storage/nvme/src/tests/controller_tests.rs
+++ b/vm/devices/storage/nvme/src/tests/controller_tests.rs
@@ -8,11 +8,13 @@ use crate::NvmeControllerCaps;
 use crate::PAGE_SIZE64;
 use crate::prp::PrpRange;
 use crate::spec;
+use crate::spec::nvm;
 use crate::tests::test_helpers::read_completion_from_queue;
 use crate::tests::test_helpers::test_memory;
 use crate::tests::test_helpers::write_command_to_queue;
 use chipset_device::mmio::MmioIntercept;
 use chipset_device::pci::PciConfigSpace;
+use disklayer_ram::ram_disk;
 use guestmem::GuestMemory;
 use guid::Guid;
 use pal_async::DefaultDriver;
@@ -326,3 +328,423 @@ async fn test_send_identify(driver: DefaultDriver) {
     let cqe = read_completion_from_queue(&gm, &dm1, 0);
     assert_eq!(cqe.status.status(), spec::Status::SUCCESS.0);
 }
+
+// =============================================================================
+// Regression tests for the I/O worker `io_count` accounting.
+//
+// Each I/O queue worker (one per I/O CQ) tracks `sq.io_count` — the number of
+// dispatched I/Os that have not yet been credited back. Every dispatched I/O
+// must increment exactly once and decrement exactly once over its lifetime,
+// regardless of whether the completion is posted directly to the CQ or first
+// queued in `state.completions` (because the CQ was full) and posted later.
+// Two failure modes have historically lived in this code:
+//
+//   * Inline completions for commands targeting an invalid namespace are
+//     synthesized without incrementing `io_count`. If the decrement path
+//     decrements anyway, `io_count` underflows and panics.
+//
+//   * Dispatched I/Os that hit a full CQ get re-queued in `state.completions`
+//     and drained later as `Event::CompletionReady`. If the decrement is
+//     skipped for that drain path, `io_count` leaks; after enough leaks it
+//     exceeds `MAX_IO_QUEUE_DEPTH` and the SQ is permanently throttled.
+//
+// The tests below exercise both shapes through the real `NvmeController`.
+
+/// Helper: create an I/O completion queue and an I/O submission queue bound
+/// to it via admin commands, draining the admin completions as it goes.
+/// Returns the next free admin slot.
+#[expect(clippy::too_many_arguments)]
+async fn create_io_queue_pair(
+    nvmec: &mut NvmeController,
+    gm: &GuestMemory,
+    admin_cq_buf: &PrpRange,
+    admin_sq_buf: &PrpRange,
+    int_controller: &TestPciInterruptController,
+    driver: DefaultDriver,
+    starting_admin_slot: u32,
+    qid: u16,
+    cq_gpa: u64,
+    sq_gpa: u64,
+    cq_qsize_z: u16,
+    sq_qsize_z: u16,
+    cq_iv: u16,
+) -> u32 {
+    let mut admin_slot = starting_admin_slot;
+
+    // CREATE_IO_COMPLETION_QUEUE
+    let mut command = spec::Command::new_zeroed();
+    command
+        .cdw0
+        .set_opcode(spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0);
+    command.cdw10 = spec::Cdw10CreateIoQueue::new()
+        .with_qid(qid)
+        .with_qsize_z(cq_qsize_z)
+        .into();
+    command.cdw11 = spec::Cdw11CreateIoCompletionQueue::new()
+        .with_pc(true)
+        .with_ien(true)
+        .with_iv(cq_iv)
+        .into();
+    command.dptr[0] = cq_gpa;
+    command.cdw0.set_cid(0xc100 + qid);
+
+    write_command_to_queue(gm, admin_sq_buf, admin_slot as usize, &command);
+    nvmec
+        .write_bar0(0x1000, (admin_slot + 1).as_bytes())
+        .unwrap();
+    wait_for_msi(driver.clone(), int_controller, 1000, 0xfeed0000, 0x1111).await;
+    let cqe = read_completion_from_queue(gm, admin_cq_buf, admin_slot as usize);
+    assert_eq!(cqe.status.status(), spec::Status::SUCCESS.0);
+    assert_eq!(cqe.cid, 0xc100 + qid);
+    admin_slot += 1;
+
+    // CREATE_IO_SUBMISSION_QUEUE bound to the CQ just created.
+    let mut command = spec::Command::new_zeroed();
+    command
+        .cdw0
+        .set_opcode(spec::AdminOpcode::CREATE_IO_SUBMISSION_QUEUE.0);
+    command.cdw10 = spec::Cdw10CreateIoQueue::new()
+        .with_qid(qid)
+        .with_qsize_z(sq_qsize_z)
+        .into();
+    command.cdw11 = spec::Cdw11CreateIoSubmissionQueue::new()
+        .with_pc(true)
+        .with_qprio(0)
+        .with_cqid(qid)
+        .into();
+    command.dptr[0] = sq_gpa;
+    command.cdw0.set_cid(0xc200 + qid);
+
+    write_command_to_queue(gm, admin_sq_buf, admin_slot as usize, &command);
+    nvmec
+        .write_bar0(0x1000, (admin_slot + 1).as_bytes())
+        .unwrap();
+    wait_for_msi(driver.clone(), int_controller, 1000, 0xfeed0000, 0x1111).await;
+    let cqe = read_completion_from_queue(gm, admin_cq_buf, admin_slot as usize);
+    assert_eq!(cqe.status.status(), spec::Status::SUCCESS.0);
+    assert_eq!(cqe.cid, 0xc200 + qid);
+    admin_slot + 1
+}
+
+/// Doorbell offset for SQ `qid`. NVMe doorbell stride is 4 bytes and the
+/// admin SQ is at offset `0x1000`; SQ `qid` is at `0x1000 + (2*qid)*4`.
+const fn sq_db(qid: u16) -> u64 {
+    0x1000 + (2 * qid as u64) * 4
+}
+
+/// Doorbell offset for CQ `qid`. CQ `qid` is at `0x1000 + (2*qid + 1)*4`.
+const fn cq_db(qid: u16) -> u64 {
+    0x1000 + (2 * qid as u64 + 1) * 4
+}
+
+/// Smoke test: an I/O command targeting a non-existent namespace must
+/// complete with INVALID_NAMESPACE_OR_FORMAT instead of panicking.
+///
+/// The original bug here was an unconditional `sq.io_count -= 1` at the
+/// bottom of the I/O worker loop, which underflowed because the inline
+/// invalid-namespace completion never incremented `io_count`.
+#[async_test]
+async fn test_io_command_invalid_namespace(driver: DefaultDriver) {
+    let admin_cq_buf = PrpRange::new(vec![0], 0, PAGE_SIZE64).unwrap();
+    let admin_sq_buf = PrpRange::new(vec![0x1000], 0, PAGE_SIZE64).unwrap();
+    let gm = test_memory();
+    let int_controller = TestPciInterruptController::new();
+
+    let mut nvmec = instantiate_and_build_admin_queue(
+        &admin_cq_buf,
+        64,
+        &admin_sq_buf,
+        64,
+        true,
+        Some(&int_controller),
+        driver.clone(),
+        &gm,
+    )
+    .await;
+
+    // Set up MSI-X for the I/O CQ (vector 1).
+    write_msix_table_entry(&mut nvmec, 1, 0xfeed0000, 0x2222, false);
+
+    let io_cq_gpa: u64 = 0x4000;
+    let io_sq_gpa: u64 = 0x5000;
+    let _admin_slot = create_io_queue_pair(
+        &mut nvmec,
+        &gm,
+        &admin_cq_buf,
+        &admin_sq_buf,
+        &int_controller,
+        driver.clone(),
+        0,
+        /* qid = */ 1,
+        io_cq_gpa,
+        io_sq_gpa,
+        /* cq_qsize_z = */ 16,
+        /* sq_qsize_z = */ 16,
+        /* cq_iv = */ 1,
+    )
+    .await;
+
+    // Send a READ to NSID=0xFFFF (no namespaces are attached, so any NSID
+    // is invalid).
+    let mut io_cmd = spec::Command::new_zeroed();
+    io_cmd.cdw0.set_opcode(nvm::NvmOpcode::READ.0);
+    io_cmd.cdw0.set_cid(42);
+    io_cmd.nsid = 0xFFFF;
+
+    let io_sq_buf = PrpRange::new(vec![io_sq_gpa], 0, PAGE_SIZE64).unwrap();
+    let io_cq_buf = PrpRange::new(vec![io_cq_gpa], 0, PAGE_SIZE64).unwrap();
+    write_command_to_queue(&gm, &io_sq_buf, 0, &io_cmd);
+    nvmec.write_bar0(sq_db(1), 1u32.as_bytes()).unwrap();
+
+    wait_for_msi(driver.clone(), &int_controller, 1000, 0xfeed0000, 0x2222).await;
+
+    let cqe = read_completion_from_queue(&gm, &io_cq_buf, 0);
+    assert_eq!(cqe.cid, 42);
+    assert_eq!(
+        cqe.status.status(),
+        spec::Status::INVALID_NAMESPACE_OR_FORMAT.0,
+        "command to invalid namespace should return INVALID_NAMESPACE_OR_FORMAT"
+    );
+}
+
+/// Regression test: when multiple invalid-namespace inline completions are
+/// queued in `state.completions` (because the CQ is full) and the SQ is then
+/// deleted before they drain, `delete_sq` must not decrement `io_count` for
+/// them — those completions never incremented it.
+///
+/// Pre-fix, `delete_sq` unconditionally decremented `io_count` for every
+/// queued completion belonging to the SQ being deleted. Because inline
+/// invalid-namespace completions never incremented `io_count`, this
+/// underflowed (the worker had `io_count == 0`) and panicked. The panic
+/// killed the admin worker task, so the DELETE_SQ completion would never
+/// be posted and this test would time out waiting for its admin MSI.
+#[async_test]
+async fn test_invalid_namespace_queued_then_delete_sq(driver: DefaultDriver) {
+    let admin_cq_buf = PrpRange::new(vec![0], 0, PAGE_SIZE64).unwrap();
+    let admin_sq_buf = PrpRange::new(vec![0x1000], 0, PAGE_SIZE64).unwrap();
+    let gm = test_memory();
+    let int_controller = TestPciInterruptController::new();
+
+    let mut nvmec = instantiate_and_build_admin_queue(
+        &admin_cq_buf,
+        64,
+        &admin_sq_buf,
+        64,
+        true,
+        Some(&int_controller),
+        driver.clone(),
+        &gm,
+    )
+    .await;
+
+    // Set up MSI-X for the I/O CQ (vector 1).
+    write_msix_table_entry(&mut nvmec, 1, 0xfeed0000, 0x2222, false);
+
+    // Create a tiny I/O CQ (qsize_z=1 → 2 entries, only 1 usable before
+    // `cq.write` returns `Ok(false)` and pushes onto `state.completions`),
+    // and a comfortably-sized I/O SQ.
+    let io_cq_gpa: u64 = 0x4000;
+    let io_sq_gpa: u64 = 0x5000;
+    let mut admin_slot = create_io_queue_pair(
+        &mut nvmec,
+        &gm,
+        &admin_cq_buf,
+        &admin_sq_buf,
+        &int_controller,
+        driver.clone(),
+        0,
+        /* qid = */ 1,
+        io_cq_gpa,
+        io_sq_gpa,
+        /* cq_qsize_z = */ 1,
+        /* sq_qsize_z = */ 16,
+        /* cq_iv = */ 1,
+    )
+    .await;
+
+    let io_sq_buf = PrpRange::new(vec![io_sq_gpa], 0, PAGE_SIZE64).unwrap();
+
+    // Submit four invalid-NS READs without consuming any CQ completions.
+    // The first one fills the CQ; the remaining three are queued in
+    // `state.completions` with `decrement_io_count: false`.
+    for i in 0..4u16 {
+        let mut io_cmd = spec::Command::new_zeroed();
+        io_cmd.cdw0.set_opcode(nvm::NvmOpcode::READ.0);
+        io_cmd.cdw0.set_cid(100 + i);
+        io_cmd.nsid = 0xFFFF; // invalid
+        write_command_to_queue(&gm, &io_sq_buf, i as usize, &io_cmd);
+    }
+    nvmec.write_bar0(sq_db(1), 4u32.as_bytes()).unwrap();
+
+    // Wait for the single CQ slot to be filled (the first interrupt), so we
+    // know the worker has had a chance to process the SQ entries and queue
+    // the rest in `state.completions`. (We deliberately do *not* bump the
+    // CQ head doorbell — leaving the queued completions in place for
+    // `delete_sq` to encounter.)
+    wait_for_msi(driver.clone(), &int_controller, 1000, 0xfeed0000, 0x2222).await;
+    // Let the worker finish queueing the rest before we issue DELETE_SQ.
+    let mut backoff = Backoff::new(&driver);
+    backoff.back_off().await;
+
+    // DELETE_IO_SUBMISSION_QUEUE for our SQ. Pre-fix, `delete_sq` would
+    // panic on `sq.io_count -= 1` while retaining `state.completions`
+    // (io_count was zero throughout because invalid-NS never increments
+    // it). The admin worker would die and we'd never see this completion.
+    let mut command = spec::Command::new_zeroed();
+    command
+        .cdw0
+        .set_opcode(spec::AdminOpcode::DELETE_IO_SUBMISSION_QUEUE.0);
+    command.cdw10 = spec::Cdw10DeleteIoQueue::new().with_qid(1).into();
+    command.cdw0.set_cid(50);
+    write_command_to_queue(&gm, &admin_sq_buf, admin_slot as usize, &command);
+    nvmec
+        .write_bar0(0x1000, (admin_slot + 1).as_bytes())
+        .unwrap();
+
+    // We expect the admin completion for DELETE_SQ to come back. If the
+    // worker panicked, this will time out.
+    wait_for_msi(driver.clone(), &int_controller, 5000, 0xfeed0000, 0x1111).await;
+    let cqe = read_completion_from_queue(&gm, &admin_cq_buf, admin_slot as usize);
+    assert_eq!(
+        cqe.status.status(),
+        spec::Status::SUCCESS.0,
+        "DELETE_SQ should complete cleanly even with inline invalid-NS \
+         completions queued in state.completions"
+    );
+    assert_eq!(cqe.cid, 50);
+    admin_slot += 1;
+
+    // Sanity check: the controller is still healthy enough to accept
+    // another admin command (a panicked admin worker would not respond).
+    let mut command = spec::Command::new_zeroed();
+    command.cdw0.set_opcode(spec::AdminOpcode::IDENTIFY.0);
+    command.cdw10 = u32::from(spec::Cdw10Identify::new().with_cns(spec::Cns::CONTROLLER.0));
+    command.dptr[0] = 0x6000;
+    command.cdw0.set_cid(51);
+    write_command_to_queue(&gm, &admin_sq_buf, admin_slot as usize, &command);
+    nvmec
+        .write_bar0(0x1000, (admin_slot + 1).as_bytes())
+        .unwrap();
+    wait_for_msi(driver.clone(), &int_controller, 1000, 0xfeed0000, 0x1111).await;
+    let cqe = read_completion_from_queue(&gm, &admin_cq_buf, admin_slot as usize);
+    assert_eq!(cqe.status.status(), spec::Status::SUCCESS.0);
+    assert_eq!(cqe.cid, 51);
+}
+
+/// Regression test: dispatched I/Os whose completion hits a full CQ are
+/// re-queued in `state.completions`. When drained later as
+/// `Event::CompletionReady`, the dispatch-side `io_count += 1` must still
+/// be balanced by a `-= 1`; otherwise `io_count` leaks and eventually
+/// exceeds `MAX_IO_QUEUE_DEPTH`, permanently throttling the SQ.
+///
+/// Setup: a 2-entry I/O CQ (one usable slot) and a 16-entry SQ filled
+/// with FLUSH commands against a ram-disk-backed namespace. With the
+/// tiny CQ, dispatched completions almost immediately back up into
+/// `state.completions`. With a working accounting scheme, the worker
+/// drains all 16 as the test bumps the CQ head doorbell. With a broken
+/// scheme, only the first ~9 dispatch and complete; the remaining SQ
+/// entries are abandoned because `io_count` stays at `MAX_IO_QUEUE_DEPTH`,
+/// and waiting for those completions times out.
+#[async_test]
+async fn test_full_cq_does_not_leak_io_count(driver: DefaultDriver) {
+    let admin_cq_buf = PrpRange::new(vec![0], 0, PAGE_SIZE64).unwrap();
+    let admin_sq_buf = PrpRange::new(vec![0x1000], 0, PAGE_SIZE64).unwrap();
+    let gm = test_memory();
+    let int_controller = TestPciInterruptController::new();
+
+    let mut nvmec = instantiate_and_build_admin_queue(
+        &admin_cq_buf,
+        64,
+        &admin_sq_buf,
+        64,
+        true,
+        Some(&int_controller),
+        driver.clone(),
+        &gm,
+    )
+    .await;
+
+    // Attach a 1 MiB ram disk as nsid=1 so FLUSHes on it actually exercise
+    // the dispatched-I/O code path (rather than the inline invalid-NS
+    // path). FLUSH on a ram disk is effectively a no-op, so it completes
+    // quickly and predictably.
+    let disk = ram_disk(1 << 20, /* read_only = */ false).unwrap();
+    nvmec.client().add_namespace(1, disk).await.unwrap();
+
+    // Set up MSI-X for the I/O CQ (vector 1).
+    write_msix_table_entry(&mut nvmec, 1, 0xfeed0000, 0x2222, false);
+
+    // Tiny CQ (qsize_z=1 → 2 entries, capacity 1), generous SQ.
+    let io_cq_gpa: u64 = 0x4000;
+    let io_sq_gpa: u64 = 0x5000;
+    let _admin_slot = create_io_queue_pair(
+        &mut nvmec,
+        &gm,
+        &admin_cq_buf,
+        &admin_sq_buf,
+        &int_controller,
+        driver.clone(),
+        0,
+        /* qid = */ 1,
+        io_cq_gpa,
+        io_sq_gpa,
+        /* cq_qsize_z = */ 1,
+        /* sq_qsize_z = */ 31,
+        /* cq_iv = */ 1,
+    )
+    .await;
+
+    let io_sq_buf = PrpRange::new(vec![io_sq_gpa], 0, PAGE_SIZE64).unwrap();
+    let io_cq_buf = PrpRange::new(vec![io_cq_gpa], 0, PAGE_SIZE64).unwrap();
+
+    // Submit 16 FLUSH commands. With CQ capacity 1, the worker can only
+    // post one completion directly; the rest must be queued in
+    // `state.completions` and drained as we bump the CQ head doorbell.
+    const N: u16 = 16;
+    for i in 0..N {
+        let mut io_cmd = spec::Command::new_zeroed();
+        io_cmd.cdw0.set_opcode(nvm::NvmOpcode::FLUSH.0);
+        io_cmd.cdw0.set_cid(200 + i);
+        io_cmd.nsid = 1;
+        write_command_to_queue(&gm, &io_sq_buf, i as usize, &io_cmd);
+    }
+    nvmec.write_bar0(sq_db(1), (N as u32).as_bytes()).unwrap();
+
+    // Drain all N completions. Each iteration: wait for the next CQ MSI,
+    // read the entry at the current CQ slot, bump the CQ head doorbell to
+    // free the slot. The worker must refill the slot from
+    // `state.completions` (and dispatch more SQ entries as its `io_count`
+    // drops below `MAX_IO_QUEUE_DEPTH`). With a leaking `io_count`, the
+    // SQ stalls and not all N will arrive.
+    let mut received = std::collections::BTreeSet::new();
+    for i in 0..N as usize {
+        // CQ has 2 slots (qsize_z=1 → len 2), so the producer's tail
+        // alternates between slots 0 and 1 as it wraps.
+        let slot = i % 2;
+        wait_for_msi(driver.clone(), &int_controller, 5000, 0xfeed0000, 0x2222).await;
+        let cqe = read_completion_from_queue(&gm, &io_cq_buf, slot);
+        assert_eq!(
+            cqe.status.status(),
+            spec::Status::SUCCESS.0,
+            "FLUSH #{} (cid={}) returned status {:#x}",
+            i,
+            cqe.cid,
+            cqe.status.status()
+        );
+        assert!(
+            received.insert(cqe.cid),
+            "duplicate completion for cid {}",
+            cqe.cid
+        );
+        // Advance the head doorbell past the slot we just consumed.
+        let new_head = (((i + 1) % 2) as u32).to_le();
+        nvmec.write_bar0(cq_db(1), new_head.as_bytes()).unwrap();
+    }
+
+    // Every submitted command must have completed exactly once.
+    let expected: std::collections::BTreeSet<u16> = (200..200 + N).collect();
+    assert_eq!(
+        received, expected,
+        "missing FLUSH completions — likely io_count leak throttled the SQ"
+    );
+}
diff --git a/vm/devices/storage/nvme/src/workers/io.rs b/vm/devices/storage/nvme/src/workers/io.rs
index 9cfb80d10b..1c4444468d 100644
--- a/vm/devices/storage/nvme/src/workers/io.rs
+++ b/vm/devices/storage/nvme/src/workers/io.rs
@@ -222,6 +222,7 @@ impl IoHandler {
                 Event::Sq(sq_idx, r) => {
                     let command = r?;
                     let cid = command.cdw0.cid();
+                    state.sqs[sq_idx].io_count += 1;
 
                     if let Some(ns) = state.namespaces.get(&command.nsid) {
                         let ns = ns.clone();
@@ -246,10 +247,13 @@ impl IoHandler {
                             }
                         });
                         state.ios.push(io);
-                        state.sqs[sq_idx].io_count += 1;
                         continue;
                     }
 
+                    // Invalid namespace — synthesize the completion inline.
+                    // `io_count` was already incremented above so the
+                    // unconditional decrement on the completion-posting path
+                    // (or in `delete_sq`/`state.completions` drain) balances.
                     IoResult {
                         cid,
                         sq_idx,