Skip to content

Commit 805b915

Browse files
committed
fetch: respect HTTP Cache-Control headers with TTL-based invalidation (#91729)
1 parent a88b102 commit 805b915

File tree

2 files changed

+471
-21
lines changed

2 files changed

+471
-21
lines changed

turbopack/crates/turbo-tasks-fetch/src/client.rs

Lines changed: 241 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,18 @@
1-
use std::{hash::Hash, sync::LazyLock};
1+
use std::{
2+
cmp::max,
3+
fmt::{Display, Formatter},
4+
hash::Hash,
5+
sync::LazyLock,
6+
time::{Duration, SystemTime},
7+
};
28

39
use anyhow::Result;
410
use quick_cache::sync::Cache;
511
use turbo_rcstr::RcStr;
6-
use turbo_tasks::{Completion, ReadRef, Vc, duration_span};
12+
use turbo_tasks::{
13+
Completion, FxIndexSet, InvalidationReason, InvalidationReasonKind, Invalidator, ReadRef,
14+
ResolvedVc, Vc, duration_span, util::StaticOrArc,
15+
};
716

817
use crate::{FetchError, FetchResult, HttpResponse, HttpResponseBody};
918

@@ -19,8 +28,21 @@ static CLIENT_CACHE: LazyLock<Cache<ReadRef<FetchClientConfig>, reqwest::Client>
1928
/// This is needed because [`reqwest::ClientBuilder`] does not implement the required traits. This
2029
/// factory cannot be a closure because closures do not implement `Eq` or `Hash`.
2130
#[turbo_tasks::value(shared)]
22-
#[derive(Hash, Default)]
23-
pub struct FetchClientConfig {}
31+
#[derive(Hash)]
32+
pub struct FetchClientConfig {
33+
/// Minimum cache TTL in seconds. Responses with a `Cache-Control: max-age` shorter than this
34+
/// will be clamped to this value. This prevents pathologically short timeouts from causing an
35+
/// invalidation bomb. Defaults to 1 hour.
36+
pub min_cache_control_secs: u64,
37+
}
38+
39+
impl Default for FetchClientConfig {
40+
fn default() -> Self {
41+
Self {
42+
min_cache_control_secs: 60 * 60,
43+
}
44+
}
45+
}
2446

2547
impl FetchClientConfig {
2648
/// Returns a cached instance of `reqwest::Client` it exists, otherwise constructs a new one.
@@ -34,7 +56,7 @@ impl FetchClientConfig {
3456
/// The reqwest client fails to construct if the TLS backend cannot be initialized, or the
3557
/// resolver cannot load the system configuration. These failures should be treated as
3658
/// cached for some amount of time, but ultimately transient (e.g. using
37-
/// [`turbo_tasks::mark_session_dependent`]).
59+
/// `session_dependent`).
3860
pub fn try_get_cached_reqwest_client(
3961
self: ReadRef<FetchClientConfig>,
4062
) -> reqwest::Result<reqwest::Client> {
@@ -78,17 +100,66 @@ impl FetchClientConfig {
78100
}
79101
}
80102

103+
/// Invalidation was caused by a max-age deadline returned by a server
104+
#[derive(PartialEq, Eq, Hash)]
105+
pub(crate) struct HttpTimeout {}
106+
107+
impl InvalidationReason for HttpTimeout {
108+
fn kind(&self) -> Option<StaticOrArc<dyn InvalidationReasonKind>> {
109+
Some(StaticOrArc::Static(&HTTP_TIMEOUT_KIND))
110+
}
111+
}
112+
113+
impl Display for HttpTimeout {
114+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
115+
write!(f, "http max-age timeout")
116+
}
117+
}
118+
119+
/// Invalidation kind for [Write]
120+
#[derive(PartialEq, Eq, Hash)]
121+
struct HttpTimeoutKind;
122+
123+
static HTTP_TIMEOUT_KIND: HttpTimeoutKind = HttpTimeoutKind;
124+
125+
impl InvalidationReasonKind for HttpTimeoutKind {
126+
fn fmt(
127+
&self,
128+
reasons: &FxIndexSet<StaticOrArc<dyn InvalidationReason>>,
129+
f: &mut Formatter<'_>,
130+
) -> std::fmt::Result {
131+
write!(f, "{} fetches timed out", reasons.len(),)
132+
}
133+
}
134+
135+
/// Internal result from `fetch_inner` that includes the invalidator for TTL-based re-fetching.
136+
#[turbo_tasks::value(shared)]
137+
struct FetchInnerResult {
138+
result: ResolvedVc<FetchResult>,
139+
/// Invalidator for the `fetch_inner` task. Used by the outer `fetch` to set up a timer that
140+
/// triggers re-fetching when the Cache-Control max-age expires.
141+
invalidator: Option<Invalidator>,
142+
/// Absolute deadline (seconds since UNIX epoch) after which the cached response should be
143+
/// re-fetched. Computed as `now + max-age` at fetch time. An absolute timestamp is used
144+
/// instead of a relative duration so that the remaining TTL is correct on warm cache restore.
145+
deadline_secs: Option<u64>,
146+
}
147+
81148
#[turbo_tasks::value_impl]
82149
impl FetchClientConfig {
150+
/// Performs the actual HTTP request. This task is `network` but NOT `session_dependent`, so
151+
/// its cached result survives restarts. The outer `fetch` task (which IS `session_dependent`)
152+
/// reads the cached invalidator and sets up a timer for TTL-based re-fetching.
83153
#[turbo_tasks::function(network)]
84-
pub async fn fetch(
154+
async fn fetch_inner(
85155
self: Vc<FetchClientConfig>,
86156
url: RcStr,
87157
user_agent: Option<RcStr>,
88-
) -> Result<Vc<FetchResult>> {
158+
) -> Result<Vc<FetchInnerResult>> {
89159
let url_ref = &*url;
90160
let this = self.await?;
91-
let response_result: reqwest::Result<HttpResponse> = async move {
161+
let min_cache_control_secs = this.min_cache_control_secs;
162+
let response_result: reqwest::Result<(HttpResponse, Option<u64>)> = async move {
92163
let reqwest_client = this.try_get_cached_reqwest_client()?;
93164

94165
let mut builder = reqwest_client.get(url_ref);
@@ -103,34 +174,152 @@ impl FetchClientConfig {
103174
.and_then(|r| r.error_for_status())?;
104175

105176
let status = response.status().as_u16();
177+
let max_age = parse_cache_control(response.headers());
106178

107179
let body = {
108180
let _span = duration_span!("fetch response", url = url_ref);
109181
response.bytes().await?
110182
}
111183
.to_vec();
112184

113-
Ok(HttpResponse {
114-
status,
115-
body: HttpResponseBody(body).resolved_cell(),
116-
})
185+
Ok((
186+
HttpResponse {
187+
status,
188+
body: HttpResponseBody(body).resolved_cell(),
189+
},
190+
max_age,
191+
))
117192
}
118193
.await;
119194

120195
match response_result {
121-
Ok(resp) => Ok(Vc::cell(Ok(resp.resolved_cell()))),
196+
Ok((resp, max_age_secs)) => {
197+
if let Some(max_age_secs) = max_age_secs {
198+
let max_age_secs = max(max_age_secs, min_cache_control_secs);
199+
let deadline_secs = {
200+
// Transform the relative offset to an absolute deadline so it can be
201+
// cached.
202+
let now = SystemTime::now()
203+
.duration_since(SystemTime::UNIX_EPOCH)
204+
.expect("system clock is before UNIX epoch")
205+
.as_secs();
206+
now + max_age_secs
207+
};
208+
let invalidator = turbo_tasks::get_invalidator();
209+
Ok(FetchInnerResult {
210+
result: ResolvedVc::cell(Ok(resp.resolved_cell())),
211+
invalidator,
212+
deadline_secs: Some(deadline_secs),
213+
}
214+
.cell())
215+
} else {
216+
Completion::session_dependent().await?;
217+
Ok(FetchInnerResult {
218+
result: ResolvedVc::cell(Ok(resp.resolved_cell())),
219+
invalidator: None,
220+
deadline_secs: None,
221+
}
222+
.cell())
223+
}
224+
}
122225
Err(err) => {
123-
// the client failed to construct or the HTTP request failed
124-
// Mark session dependent so we get retried in the next sessions
125-
// In dev our caller will keep going, but in prod builds this will fail the build
126-
// anyway.
226+
// Read session_dependent_completion so that this task is re-dirtied on session
227+
// restore. This ensures transient errors (network down, DNS failure) are retried
228+
// on the next session without a timer or busy-loop.
127229
Completion::session_dependent().as_side_effect().await?;
128-
Ok(Vc::cell(Err(
129-
FetchError::from_reqwest_error(&err, &url).resolved_cell()
130-
)))
230+
Ok(FetchInnerResult {
231+
result: ResolvedVc::cell(Err(
232+
FetchError::from_reqwest_error(&err, &url).resolved_cell()
233+
)),
234+
235+
invalidator: None,
236+
deadline_secs: None,
237+
}
238+
.cell())
239+
}
240+
}
241+
}
242+
243+
/// Fetches the given URL and returns the response. Results are cached across sessions using
244+
/// TTL from the response's `Cache-Control: max-age` header.
245+
///
246+
/// This is the outer task in a two-task pattern:
247+
/// - `fetch` (session_dependent): always re-executes on restore, reads the cached inner result,
248+
/// and spawns a timer for mid-session TTL expiry.
249+
/// - `fetch_inner` (network, NOT session_dependent): performs the actual HTTP request and stays
250+
/// cached across restarts. Returns an `Invalidator` that the outer task uses to trigger
251+
/// re-fetching when the TTL expires.
252+
#[turbo_tasks::function(network, session_dependent)]
253+
pub async fn fetch(
254+
self: Vc<FetchClientConfig>,
255+
url: RcStr,
256+
user_agent: Option<RcStr>,
257+
) -> Result<Vc<FetchResult>> {
258+
let FetchInnerResult {
259+
result,
260+
deadline_secs,
261+
invalidator,
262+
} = *self.fetch_inner(url, user_agent).await?;
263+
264+
// Set up a timer to invalidate fetch_inner when the TTL expires.
265+
// On warm cache restore, this re-executes (session_dependent), reads the persisted
266+
// deadline from fetch_inner's cached result, and starts a timer for the remaining time.
267+
//
268+
// Skip when dependency tracking is disabled (e.g. one-shot `next build`) since
269+
// invalidation panics without dependency tracking and the timer would be wasted work.
270+
if turbo_tasks::turbo_tasks().is_tracking_dependencies()
271+
&& let (Some(deadline_secs), Some(invalidator)) = (deadline_secs, invalidator)
272+
{
273+
// transform absolute deadline back to a relative duration for the sleep call
274+
let now = SystemTime::now()
275+
.duration_since(SystemTime::UNIX_EPOCH)
276+
.expect("system clock is before UNIX epoch")
277+
.as_secs();
278+
let remaining = Duration::from_secs(deadline_secs.saturating_sub(now));
279+
// NOTE: in the case where the deadline is expired on session start this timeout will
280+
// immediately invalidate and race with us returning. This is basically fine since in
281+
// the most common case the actual fetch result is identical so this gives us a kind of
282+
// 'stale while revalidate' feature.
283+
// alternatively we could synchronously invalidate and re-execute `fetch-inner` but that
284+
// simply adds latency in the common case where our fetch is identical.
285+
// NOTE(2): if for some reason `fetch` is re-executed but `fetch-inner` isn't we could
286+
// end up with multiple timers. Currently there is no known case where this could
287+
// happen, if it somehow does we could end up with redundant invalidations and
288+
// re-fetches. The solution is to detect this with a mutable hash map on
289+
// FetchClientConfig to track outstanding timers and cancel them.
290+
turbo_tasks::spawn(async move {
291+
tokio::time::sleep(remaining).await;
292+
invalidator.invalidate_with_reason(&*turbo_tasks::turbo_tasks(), HttpTimeout {});
293+
});
294+
}
295+
296+
Ok(*result)
297+
}
298+
}
299+
300+
/// Parses the `max-age` directive from a `Cache-Control` header value.
301+
/// Returns the max-age in seconds, or `None` if not present or unparseable.
302+
/// None means we shouldn't cache longer than the current session
303+
fn parse_cache_control(headers: &reqwest::header::HeaderMap) -> Option<u64> {
304+
let value = headers.get(reqwest::header::CACHE_CONTROL)?.to_str().ok()?;
305+
let mut max_age = None;
306+
for directive in value.split(',') {
307+
let (key, val) = {
308+
if let Some(index) = directive.find('=') {
309+
(directive[0..index].trim(), Some(&directive[index + 1..]))
310+
} else {
311+
(directive.trim(), None)
131312
}
313+
};
314+
if key.eq_ignore_ascii_case("max-age")
315+
&& let Some(val) = val
316+
{
317+
max_age = val.trim().parse().ok();
318+
} else if key.eq_ignore_ascii_case("no-cache") || key.eq_ignore_ascii_case("no-store") {
319+
return None;
132320
}
133321
}
322+
max_age
134323
}
135324

136325
#[doc(hidden)]
@@ -142,3 +331,35 @@ pub fn __test_only_reqwest_client_cache_clear() {
142331
pub fn __test_only_reqwest_client_cache_len() -> usize {
143332
CLIENT_CACHE.len()
144333
}
334+
335+
#[cfg(test)]
336+
mod tests {
337+
use reqwest::header::{CACHE_CONTROL, HeaderMap, HeaderValue};
338+
339+
use super::parse_cache_control;
340+
341+
fn headers(value: &str) -> HeaderMap {
342+
let mut h = HeaderMap::new();
343+
h.insert(CACHE_CONTROL, HeaderValue::from_str(value).unwrap());
344+
h
345+
}
346+
347+
#[test]
348+
fn max_age() {
349+
assert_eq!(parse_cache_control(&headers("max-age=300")), Some(300));
350+
assert_eq!(parse_cache_control(&headers("MAX-AGE = 300")), Some(300));
351+
assert_eq!(
352+
parse_cache_control(&headers("public, max-age=3600, must-revalidate")),
353+
Some(3600)
354+
);
355+
}
356+
357+
#[test]
358+
fn no_cache_headers() {
359+
assert_eq!(parse_cache_control(&headers("NO-CACHE")), None);
360+
assert_eq!(parse_cache_control(&headers("no-cache")), None);
361+
assert_eq!(parse_cache_control(&headers("no-store")), None);
362+
assert_eq!(parse_cache_control(&headers("max-age=300, no-store")), None);
363+
assert_eq!(parse_cache_control(&HeaderMap::new()), None);
364+
}
365+
}

0 commit comments

Comments
 (0)