Skip to content

Commit ec9f7f3

Browse files
committed
fewer copies
1 parent d8ef88e commit ec9f7f3

File tree

6 files changed

+117
-129
lines changed

6 files changed

+117
-129
lines changed

turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs

Lines changed: 80 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -182,23 +182,16 @@ impl<'e, B: BackingStorage> ExecuteContextImpl<'e, B> {
182182
self.backend.should_restore() && self.backend.local_is_partial
183183
}
184184

185-
fn restore_task_data(
185+
fn lookup_task_data(
186186
&self,
187187
task_id: TaskId,
188188
category: SpecificTaskDataCategory,
189-
) -> TaskStorage {
189+
) -> Option<turbo_persistence::ArcBytes> {
190190
if !self.should_check_backing_storage() {
191-
// If we don't need to restore, we can just return an empty storage
192-
return TaskStorage::default();
191+
return None;
193192
}
194-
let mut storage = TaskStorage::default();
195-
let result = self
196-
.backend
197-
.backing_storage
198-
.lookup_data(task_id, category, &mut storage);
199-
200-
match result {
201-
Ok(()) => storage,
193+
match self.backend.backing_storage.lookup_data(task_id, category) {
194+
Ok(bytes) => bytes,
202195
Err(e) => {
203196
panic!(
204197
"Failed to restore task data (corrupted database or bug): {:?}",
@@ -531,28 +524,39 @@ impl<'e, B: BackingStorage> ExecuteContext<'e> for ExecuteContextImpl<'e, B> {
531524
category.includes_meta() && !task.flags.is_restored(TaskDataCategory::Meta);
532525

533526
if needs_data || needs_meta {
534-
// Avoid holding the lock too long since this can also affect other tasks
535-
// Drop lock once, do all I/O, then re-acquire once
527+
// Avoid holding the lock too long since this can also affect other tasks.
528+
// Drop lock once, do all I/O (returning raw bytes), then re-acquire once
529+
// and decode directly into the live task — no scratch TaskStorage needed.
536530
drop(task);
537531

538-
let storage_data = needs_data
539-
.then(|| self.restore_task_data(task_id, SpecificTaskDataCategory::Data));
540-
let storage_meta = needs_meta
541-
.then(|| self.restore_task_data(task_id, SpecificTaskDataCategory::Meta));
532+
let bytes_data = needs_data
533+
.then(|| self.lookup_task_data(task_id, SpecificTaskDataCategory::Data));
534+
let bytes_meta = needs_meta
535+
.then(|| self.lookup_task_data(task_id, SpecificTaskDataCategory::Meta));
542536

543537
task = self.backend.storage.access_mut(task_id);
544538

545-
// Handle race conditions and merge
546-
if let Some(storage) = storage_data
547-
&& !task.flags.is_restored(TaskDataCategory::Data)
548-
{
549-
task.restore_from(storage, TaskDataCategory::Data);
539+
// Handle race conditions and decode directly into live storage.
540+
// set_restored is called even when no bytes found (task simply has no
541+
// persisted data for this category).
542+
if needs_data && !task.flags.is_restored(TaskDataCategory::Data) {
543+
if let Some(bytes) = bytes_data.flatten() {
544+
let mut decoder = new_turbo_bincode_decoder(&bytes);
545+
task.decode(SpecificTaskDataCategory::Data, &mut decoder)
546+
.unwrap_or_else(|e| {
547+
panic!("Failed to decode Data for {task_id:?}: {e:?}")
548+
});
549+
}
550550
task.flags.set_restored(TaskDataCategory::Data);
551551
}
552-
if let Some(storage) = storage_meta
553-
&& !task.flags.is_restored(TaskDataCategory::Meta)
554-
{
555-
task.restore_from(storage, TaskDataCategory::Meta);
552+
if needs_meta && !task.flags.is_restored(TaskDataCategory::Meta) {
553+
if let Some(bytes) = bytes_meta.flatten() {
554+
let mut decoder = new_turbo_bincode_decoder(&bytes);
555+
task.decode(SpecificTaskDataCategory::Meta, &mut decoder)
556+
.unwrap_or_else(|e| {
557+
panic!("Failed to decode Meta for {task_id:?}: {e:?}")
558+
});
559+
}
556560
task.flags.set_restored(TaskDataCategory::Meta);
557561
}
558562
}
@@ -625,47 +629,69 @@ impl<'e, B: BackingStorage> ExecuteContext<'e> for ExecuteContextImpl<'e, B> {
625629
category.includes_meta() && !task2.flags.is_restored(TaskDataCategory::Meta);
626630

627631
if needs_data1 || needs_meta1 || needs_data2 || needs_meta2 {
628-
// Avoid holding the lock too long since this can also affect other tasks
629-
// Drop locks once, do all I/O, then re-acquire once
632+
// Avoid holding the lock too long since this can also affect other tasks.
633+
// Drop locks once, do all I/O (returning raw bytes), then re-acquire once
634+
// and decode directly into the live tasks — no scratch TaskStorage needed.
630635
drop(task1);
631636
drop(task2);
632637

633-
let storage_data1 = needs_data1
634-
.then(|| self.restore_task_data(task_id1, SpecificTaskDataCategory::Data));
635-
let storage_meta1 = needs_meta1
636-
.then(|| self.restore_task_data(task_id1, SpecificTaskDataCategory::Meta));
637-
let storage_data2 = needs_data2
638-
.then(|| self.restore_task_data(task_id2, SpecificTaskDataCategory::Data));
639-
let storage_meta2 = needs_meta2
640-
.then(|| self.restore_task_data(task_id2, SpecificTaskDataCategory::Meta));
638+
let bytes_data1 = needs_data1
639+
.then(|| self.lookup_task_data(task_id1, SpecificTaskDataCategory::Data));
640+
let bytes_meta1 = needs_meta1
641+
.then(|| self.lookup_task_data(task_id1, SpecificTaskDataCategory::Meta));
642+
let bytes_data2 = needs_data2
643+
.then(|| self.lookup_task_data(task_id2, SpecificTaskDataCategory::Data));
644+
let bytes_meta2 = needs_meta2
645+
.then(|| self.lookup_task_data(task_id2, SpecificTaskDataCategory::Meta));
641646

642647
let (t1, t2) = self.backend.storage.access_pair_mut(task_id1, task_id2);
643648
task1 = t1;
644649
task2 = t2;
645650

646-
// Merge results, handling race conditions
647-
if let Some(storage) = storage_data1
648-
&& !task1.flags.is_restored(TaskDataCategory::Data)
649-
{
650-
task1.restore_from(storage, TaskDataCategory::Data);
651+
// Decode directly into live storage, handling race conditions.
652+
// set_restored is called even when no bytes found.
653+
if needs_data1 && !task1.flags.is_restored(TaskDataCategory::Data) {
654+
if let Some(bytes) = bytes_data1.flatten() {
655+
let mut decoder = new_turbo_bincode_decoder(&bytes);
656+
task1
657+
.decode(SpecificTaskDataCategory::Data, &mut decoder)
658+
.unwrap_or_else(|e| {
659+
panic!("Failed to decode Data for {task_id1:?}: {e:?}")
660+
});
661+
}
651662
task1.flags.set_restored(TaskDataCategory::Data);
652663
}
653-
if let Some(storage) = storage_meta1
654-
&& !task1.flags.is_restored(TaskDataCategory::Meta)
655-
{
656-
task1.restore_from(storage, TaskDataCategory::Meta);
664+
if needs_meta1 && !task1.flags.is_restored(TaskDataCategory::Meta) {
665+
if let Some(bytes) = bytes_meta1.flatten() {
666+
let mut decoder = new_turbo_bincode_decoder(&bytes);
667+
task1
668+
.decode(SpecificTaskDataCategory::Meta, &mut decoder)
669+
.unwrap_or_else(|e| {
670+
panic!("Failed to decode Meta for {task_id1:?}: {e:?}")
671+
});
672+
}
657673
task1.flags.set_restored(TaskDataCategory::Meta);
658674
}
659-
if let Some(storage) = storage_data2
660-
&& !task2.flags.is_restored(TaskDataCategory::Data)
661-
{
662-
task2.restore_from(storage, TaskDataCategory::Data);
675+
if needs_data2 && !task2.flags.is_restored(TaskDataCategory::Data) {
676+
if let Some(bytes) = bytes_data2.flatten() {
677+
let mut decoder = new_turbo_bincode_decoder(&bytes);
678+
task2
679+
.decode(SpecificTaskDataCategory::Data, &mut decoder)
680+
.unwrap_or_else(|e| {
681+
panic!("Failed to decode Data for {task_id2:?}: {e:?}")
682+
});
683+
}
663684
task2.flags.set_restored(TaskDataCategory::Data);
664685
}
665-
if let Some(storage) = storage_meta2
666-
&& !task2.flags.is_restored(TaskDataCategory::Meta)
667-
{
668-
task2.restore_from(storage, TaskDataCategory::Meta);
686+
if needs_meta2 && !task2.flags.is_restored(TaskDataCategory::Meta) {
687+
if let Some(bytes) = bytes_meta2.flatten() {
688+
let mut decoder = new_turbo_bincode_decoder(&bytes);
689+
task2
690+
.decode(SpecificTaskDataCategory::Meta, &mut decoder)
691+
.unwrap_or_else(|e| {
692+
panic!("Failed to decode Meta for {task_id2:?}: {e:?}")
693+
});
694+
}
669695
task2.flags.set_restored(TaskDataCategory::Meta);
670696
}
671697
}

turbopack/crates/turbo-tasks-backend/src/backing_storage.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@ use anyhow::Result;
44
use either::Either;
55
use smallvec::SmallVec;
66
use turbo_bincode::TurboBincodeBuffer;
7+
use turbo_persistence::ArcBytes;
78
use turbo_tasks::{TaskId, backend::CachedTaskType};
89

910
use crate::{
10-
backend::{AnyOperation, SpecificTaskDataCategory, storage_schema::TaskStorage},
11+
backend::{AnyOperation, SpecificTaskDataCategory},
1112
utils::chunked_vec::ChunkedVec,
1213
};
1314

@@ -69,14 +70,16 @@ pub trait BackingStorageSealed: 'static + Send + Sync {
6970
/// The caller must verify each returned TaskId by comparing the stored task type which will
7071
/// require a second database read
7172
fn lookup_task_candidates(&self, key: &CachedTaskType) -> Result<SmallVec<[TaskId; 1]>>;
72-
/// Looks up and decodes persisted data for a single task, updating the provided storage with
73-
/// data from the database in the given category.
73+
/// Looks up raw persisted bytes for a single task.
74+
///
75+
/// Returns `None` if the task has no persisted data for the given category.
76+
/// The caller is responsible for decoding the returned bytes. The returned
77+
/// [`ArcBytes`] is ref-counted and zero-copy when backed by a memory-mapped file.
7478
fn lookup_data(
7579
&self,
7680
task_id: TaskId,
7781
category: SpecificTaskDataCategory,
78-
storage: &mut TaskStorage,
79-
) -> Result<()>;
82+
) -> Result<Option<ArcBytes>>;
8083

8184
/// Batch lookup raw bytes for multiple tasks, calling
8285
/// `callback(index, Option<&[u8]>)` for every task in `task_ids` immediately after its
@@ -151,9 +154,8 @@ where
151154
&self,
152155
task_id: TaskId,
153156
category: SpecificTaskDataCategory,
154-
storage: &mut TaskStorage,
155-
) -> Result<()> {
156-
either::for_both!(self, this => this.lookup_data(task_id, category, storage))
157+
) -> Result<Option<ArcBytes>> {
158+
either::for_both!(self, this => this.lookup_data(task_id, category))
157159
}
158160

159161
fn batch_lookup_data(

turbopack/crates/turbo-tasks-backend/src/database/key_value_database.rs

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use anyhow::Result;
22
use smallvec::SmallVec;
3-
use turbo_persistence::{FamilyConfig, FamilyKind};
3+
use turbo_persistence::{ArcBytes, FamilyConfig, FamilyKind};
44

55
use crate::database::write_batch::ConcurrentWriteBatch;
66

@@ -57,21 +57,13 @@ pub trait KeyValueDatabase {
5757
false
5858
}
5959

60-
type ValueBuffer<'l>: std::borrow::Borrow<[u8]>
61-
where
62-
Self: 'l;
63-
64-
fn get(&self, key_space: KeySpace, key: &[u8]) -> Result<Option<Self::ValueBuffer<'_>>>;
60+
fn get(&self, key_space: KeySpace, key: &[u8]) -> Result<Option<ArcBytes>>;
6561
/// Looks up a key and returns all matching values.
6662
///
6763
/// Useful for keyspaces where keys are hashes and collisions are possible (e.g., TaskCache).
6864
/// The default implementation returns at most one value (from `get`), but implementations
6965
/// that support multiple values per key should override this.
70-
fn get_multiple(
71-
&self,
72-
key_space: KeySpace,
73-
key: &[u8],
74-
) -> Result<SmallVec<[Self::ValueBuffer<'_>; 1]>> {
66+
fn get_multiple(&self, key_space: KeySpace, key: &[u8]) -> Result<SmallVec<[ArcBytes; 1]>> {
7567
Ok(self.get(key_space, key)?.into_iter().collect())
7668
}
7769

@@ -89,7 +81,7 @@ pub trait KeyValueDatabase {
8981
) -> Result<()> {
9082
for (index, key) in keys.iter().enumerate() {
9183
let value = self.get(key_space, key)?;
92-
let opt_bytes = value.as_ref().map(std::borrow::Borrow::<[u8]>::borrow);
84+
let opt_bytes = value.as_deref();
9385
callback(index, opt_bytes)?;
9486
}
9587
Ok(())

turbopack/crates/turbo-tasks-backend/src/database/noop_kv.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use anyhow::Result;
2+
use turbo_persistence::ArcBytes;
23

34
use crate::database::{
45
key_value_database::{KeySpace, KeyValueDatabase},
@@ -8,12 +9,7 @@ use crate::database::{
89
pub struct NoopKvDb;
910

1011
impl KeyValueDatabase for NoopKvDb {
11-
type ValueBuffer<'l>
12-
= &'l [u8]
13-
where
14-
Self: 'l;
15-
16-
fn get(&self, _key_space: KeySpace, _key: &[u8]) -> Result<Option<Self::ValueBuffer<'_>>> {
12+
fn get(&self, _key_space: KeySpace, _key: &[u8]) -> Result<Option<ArcBytes>> {
1713
Ok(None)
1814
}
1915

turbopack/crates/turbo-tasks-backend/src/database/turbo/mod.rs

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -87,12 +87,7 @@ impl KeyValueDatabase for TurboKeyValueDatabase {
8787
self.db.is_empty()
8888
}
8989

90-
type ValueBuffer<'l>
91-
= ArcBytes
92-
where
93-
Self: 'l;
94-
95-
fn get(&self, key_space: KeySpace, key: &[u8]) -> Result<Option<Self::ValueBuffer<'_>>> {
90+
fn get(&self, key_space: KeySpace, key: &[u8]) -> Result<Option<ArcBytes>> {
9691
self.db.get(key_space as usize, &key)
9792
}
9893

@@ -105,11 +100,7 @@ impl KeyValueDatabase for TurboKeyValueDatabase {
105100
self.db.batch_get_with(key_space as usize, keys, callback)
106101
}
107102

108-
fn get_multiple(
109-
&self,
110-
key_space: KeySpace,
111-
key: &[u8],
112-
) -> Result<SmallVec<[Self::ValueBuffer<'_>; 1]>> {
103+
fn get_multiple(&self, key_space: KeySpace, key: &[u8]) -> Result<SmallVec<[ArcBytes; 1]>> {
113104
self.db.get_multiple(key_space as usize, &key)
114105
}
115106

0 commit comments

Comments
 (0)