1- use std:: { hash:: Hash , sync:: LazyLock } ;
1+ use std:: {
2+ cmp:: max,
3+ fmt:: { Display , Formatter } ,
4+ hash:: Hash ,
5+ sync:: LazyLock ,
6+ time:: { Duration , SystemTime } ,
7+ } ;
28
39use anyhow:: Result ;
410use quick_cache:: sync:: Cache ;
511use turbo_rcstr:: RcStr ;
6- use turbo_tasks:: { Completion , ReadRef , Vc , duration_span} ;
12+ use turbo_tasks:: {
13+ Completion , FxIndexSet , InvalidationReason , InvalidationReasonKind , Invalidator , ReadRef ,
14+ ResolvedVc , Vc , duration_span, util:: StaticOrArc ,
15+ } ;
716
817use crate :: { FetchError , FetchResult , HttpResponse , HttpResponseBody } ;
918
@@ -19,8 +28,21 @@ static CLIENT_CACHE: LazyLock<Cache<ReadRef<FetchClientConfig>, reqwest::Client>
1928/// This is needed because [`reqwest::ClientBuilder`] does not implement the required traits. This
2029/// factory cannot be a closure because closures do not implement `Eq` or `Hash`.
2130#[ turbo_tasks:: value( shared) ]
22- #[ derive( Hash , Default ) ]
23- pub struct FetchClientConfig { }
31+ #[ derive( Hash ) ]
32+ pub struct FetchClientConfig {
33+ /// Minimum cache TTL in seconds. Responses with a `Cache-Control: max-age` shorter than this
34+ /// will be clamped to this value. This prevents pathologically short timeouts from causing an
35+ /// invalidation bomb. Defaults to 1 hour.
36+ pub min_cache_control_secs : u64 ,
37+ }
38+
39+ impl Default for FetchClientConfig {
40+ fn default ( ) -> Self {
41+ Self {
42+ min_cache_control_secs : 60 * 60 ,
43+ }
44+ }
45+ }
2446
2547impl FetchClientConfig {
2648 /// Returns a cached instance of `reqwest::Client` it exists, otherwise constructs a new one.
@@ -34,7 +56,7 @@ impl FetchClientConfig {
3456 /// The reqwest client fails to construct if the TLS backend cannot be initialized, or the
3557 /// resolver cannot load the system configuration. These failures should be treated as
3658 /// cached for some amount of time, but ultimately transient (e.g. using
37- /// [`turbo_tasks::mark_session_dependent`] ).
59+ /// `session_dependent` ).
3860 pub fn try_get_cached_reqwest_client (
3961 self : ReadRef < FetchClientConfig > ,
4062 ) -> reqwest:: Result < reqwest:: Client > {
@@ -78,17 +100,66 @@ impl FetchClientConfig {
78100 }
79101}
80102
103+ /// Invalidation was caused by a max-age deadline returned by a server
104+ #[ derive( PartialEq , Eq , Hash ) ]
105+ pub ( crate ) struct HttpTimeout { }
106+
107+ impl InvalidationReason for HttpTimeout {
108+ fn kind ( & self ) -> Option < StaticOrArc < dyn InvalidationReasonKind > > {
109+ Some ( StaticOrArc :: Static ( & HTTP_TIMEOUT_KIND ) )
110+ }
111+ }
112+
113+ impl Display for HttpTimeout {
114+ fn fmt ( & self , f : & mut Formatter < ' _ > ) -> std:: fmt:: Result {
115+ write ! ( f, "http max-age timeout" )
116+ }
117+ }
118+
119+ /// Invalidation kind for [Write]
120+ #[ derive( PartialEq , Eq , Hash ) ]
121+ struct HttpTimeoutKind ;
122+
123+ static HTTP_TIMEOUT_KIND : HttpTimeoutKind = HttpTimeoutKind ;
124+
125+ impl InvalidationReasonKind for HttpTimeoutKind {
126+ fn fmt (
127+ & self ,
128+ reasons : & FxIndexSet < StaticOrArc < dyn InvalidationReason > > ,
129+ f : & mut Formatter < ' _ > ,
130+ ) -> std:: fmt:: Result {
131+ write ! ( f, "{} fetches timed out" , reasons. len( ) , )
132+ }
133+ }
134+
135+ /// Internal result from `fetch_inner` that includes the invalidator for TTL-based re-fetching.
136+ #[ turbo_tasks:: value( shared) ]
137+ struct FetchInnerResult {
138+ result : ResolvedVc < FetchResult > ,
139+ /// Invalidator for the `fetch_inner` task. Used by the outer `fetch` to set up a timer that
140+ /// triggers re-fetching when the Cache-Control max-age expires.
141+ invalidator : Option < Invalidator > ,
142+ /// Absolute deadline (seconds since UNIX epoch) after which the cached response should be
143+ /// re-fetched. Computed as `now + max-age` at fetch time. An absolute timestamp is used
144+ /// instead of a relative duration so that the remaining TTL is correct on warm cache restore.
145+ deadline_secs : Option < u64 > ,
146+ }
147+
81148#[ turbo_tasks:: value_impl]
82149impl FetchClientConfig {
150+ /// Performs the actual HTTP request. This task is `network` but NOT `session_dependent`, so
151+ /// its cached result survives restarts. The outer `fetch` task (which IS `session_dependent`)
152+ /// reads the cached invalidator and sets up a timer for TTL-based re-fetching.
83153 #[ turbo_tasks:: function( network) ]
84- pub async fn fetch (
154+ async fn fetch_inner (
85155 self : Vc < FetchClientConfig > ,
86156 url : RcStr ,
87157 user_agent : Option < RcStr > ,
88- ) -> Result < Vc < FetchResult > > {
158+ ) -> Result < Vc < FetchInnerResult > > {
89159 let url_ref = & * url;
90160 let this = self . await ?;
91- let response_result: reqwest:: Result < HttpResponse > = async move {
161+ let min_cache_control_secs = this. min_cache_control_secs ;
162+ let response_result: reqwest:: Result < ( HttpResponse , Option < u64 > ) > = async move {
92163 let reqwest_client = this. try_get_cached_reqwest_client ( ) ?;
93164
94165 let mut builder = reqwest_client. get ( url_ref) ;
@@ -103,34 +174,152 @@ impl FetchClientConfig {
103174 . and_then ( |r| r. error_for_status ( ) ) ?;
104175
105176 let status = response. status ( ) . as_u16 ( ) ;
177+ let max_age = parse_cache_control ( response. headers ( ) ) ;
106178
107179 let body = {
108180 let _span = duration_span ! ( "fetch response" , url = url_ref) ;
109181 response. bytes ( ) . await ?
110182 }
111183 . to_vec ( ) ;
112184
113- Ok ( HttpResponse {
114- status,
115- body : HttpResponseBody ( body) . resolved_cell ( ) ,
116- } )
185+ Ok ( (
186+ HttpResponse {
187+ status,
188+ body : HttpResponseBody ( body) . resolved_cell ( ) ,
189+ } ,
190+ max_age,
191+ ) )
117192 }
118193 . await ;
119194
120195 match response_result {
121- Ok ( resp) => Ok ( Vc :: cell ( Ok ( resp. resolved_cell ( ) ) ) ) ,
196+ Ok ( ( resp, max_age_secs) ) => {
197+ if let Some ( max_age_secs) = max_age_secs {
198+ let max_age_secs = max ( max_age_secs, min_cache_control_secs) ;
199+ let deadline_secs = {
200+ // Transform the relative offset to an absolute deadline so it can be
201+ // cached.
202+ let now = SystemTime :: now ( )
203+ . duration_since ( SystemTime :: UNIX_EPOCH )
204+ . expect ( "system clock is before UNIX epoch" )
205+ . as_secs ( ) ;
206+ now + max_age_secs
207+ } ;
208+ let invalidator = turbo_tasks:: get_invalidator ( ) ;
209+ Ok ( FetchInnerResult {
210+ result : ResolvedVc :: cell ( Ok ( resp. resolved_cell ( ) ) ) ,
211+ invalidator,
212+ deadline_secs : Some ( deadline_secs) ,
213+ }
214+ . cell ( ) )
215+ } else {
216+ Completion :: session_dependent ( ) . await ?;
217+ Ok ( FetchInnerResult {
218+ result : ResolvedVc :: cell ( Ok ( resp. resolved_cell ( ) ) ) ,
219+ invalidator : None ,
220+ deadline_secs : None ,
221+ }
222+ . cell ( ) )
223+ }
224+ }
122225 Err ( err) => {
123- // the client failed to construct or the HTTP request failed
124- // Mark session dependent so we get retried in the next sessions
125- // In dev our caller will keep going, but in prod builds this will fail the build
126- // anyway.
226+ // Read session_dependent_completion so that this task is re-dirtied on session
227+ // restore. This ensures transient errors (network down, DNS failure) are retried
228+ // on the next session without a timer or busy-loop.
127229 Completion :: session_dependent ( ) . as_side_effect ( ) . await ?;
128- Ok ( Vc :: cell ( Err (
129- FetchError :: from_reqwest_error ( & err, & url) . resolved_cell ( )
130- ) ) )
230+ Ok ( FetchInnerResult {
231+ result : ResolvedVc :: cell ( Err (
232+ FetchError :: from_reqwest_error ( & err, & url) . resolved_cell ( )
233+ ) ) ,
234+
235+ invalidator : None ,
236+ deadline_secs : None ,
237+ }
238+ . cell ( ) )
239+ }
240+ }
241+ }
242+
243+ /// Fetches the given URL and returns the response. Results are cached across sessions using
244+ /// TTL from the response's `Cache-Control: max-age` header.
245+ ///
246+ /// This is the outer task in a two-task pattern:
247+ /// - `fetch` (session_dependent): always re-executes on restore, reads the cached inner result,
248+ /// and spawns a timer for mid-session TTL expiry.
249+ /// - `fetch_inner` (network, NOT session_dependent): performs the actual HTTP request and stays
250+ /// cached across restarts. Returns an `Invalidator` that the outer task uses to trigger
251+ /// re-fetching when the TTL expires.
252+ #[ turbo_tasks:: function( network, session_dependent) ]
253+ pub async fn fetch (
254+ self : Vc < FetchClientConfig > ,
255+ url : RcStr ,
256+ user_agent : Option < RcStr > ,
257+ ) -> Result < Vc < FetchResult > > {
258+ let FetchInnerResult {
259+ result,
260+ deadline_secs,
261+ invalidator,
262+ } = * self . fetch_inner ( url, user_agent) . await ?;
263+
264+ // Set up a timer to invalidate fetch_inner when the TTL expires.
265+ // On warm cache restore, this re-executes (session_dependent), reads the persisted
266+ // deadline from fetch_inner's cached result, and starts a timer for the remaining time.
267+ //
268+ // Skip when dependency tracking is disabled (e.g. one-shot `next build`) since
269+ // invalidation panics without dependency tracking and the timer would be wasted work.
270+ if turbo_tasks:: turbo_tasks ( ) . is_tracking_dependencies ( )
271+ && let ( Some ( deadline_secs) , Some ( invalidator) ) = ( deadline_secs, invalidator)
272+ {
273+ // transform absolute deadline back to a relative duration for the sleep call
274+ let now = SystemTime :: now ( )
275+ . duration_since ( SystemTime :: UNIX_EPOCH )
276+ . expect ( "system clock is before UNIX epoch" )
277+ . as_secs ( ) ;
278+ let remaining = Duration :: from_secs ( deadline_secs. saturating_sub ( now) ) ;
279+ // NOTE: in the case where the deadline is expired on session start this timeout will
280+ // immediately invalidate and race with us returning. This is basically fine since in
281+ // the most common case the actual fetch result is identical so this gives us a kind of
282+ // 'stale while revalidate' feature.
283+ // alternatively we could synchronously invalidate and re-execute `fetch-inner` but that
284+ // simply adds latency in the common case where our fetch is identical.
285+ // NOTE(2): if for some reason `fetch` is re-executed but `fetch-inner` isn't we could
286+ // end up with multiple timers. Currently there is no known case where this could
287+ // happen, if it somehow does we could end up with redundant invalidations and
288+ // re-fetches. The solution is to detect this with a mutable hash map on
289+ // FetchClientConfig to track outstanding timers and cancel them.
290+ turbo_tasks:: spawn ( async move {
291+ tokio:: time:: sleep ( remaining) . await ;
292+ invalidator. invalidate_with_reason ( & * turbo_tasks:: turbo_tasks ( ) , HttpTimeout { } ) ;
293+ } ) ;
294+ }
295+
296+ Ok ( * result)
297+ }
298+ }
299+
300+ /// Parses the `max-age` directive from a `Cache-Control` header value.
301+ /// Returns the max-age in seconds, or `None` if not present or unparseable.
302+ /// None means we shouldn't cache longer than the current session
303+ fn parse_cache_control ( headers : & reqwest:: header:: HeaderMap ) -> Option < u64 > {
304+ let value = headers. get ( reqwest:: header:: CACHE_CONTROL ) ?. to_str ( ) . ok ( ) ?;
305+ let mut max_age = None ;
306+ for directive in value. split ( ',' ) {
307+ let ( key, val) = {
308+ if let Some ( index) = directive. find ( '=' ) {
309+ ( directive[ 0 ..index] . trim ( ) , Some ( & directive[ index + 1 ..] ) )
310+ } else {
311+ ( directive. trim ( ) , None )
131312 }
313+ } ;
314+ if key. eq_ignore_ascii_case ( "max-age" )
315+ && let Some ( val) = val
316+ {
317+ max_age = val. trim ( ) . parse ( ) . ok ( ) ;
318+ } else if key. eq_ignore_ascii_case ( "no-cache" ) || key. eq_ignore_ascii_case ( "no-store" ) {
319+ return None ;
132320 }
133321 }
322+ max_age
134323}
135324
136325#[ doc( hidden) ]
@@ -142,3 +331,35 @@ pub fn __test_only_reqwest_client_cache_clear() {
142331pub fn __test_only_reqwest_client_cache_len ( ) -> usize {
143332 CLIENT_CACHE . len ( )
144333}
334+
335+ #[ cfg( test) ]
336+ mod tests {
337+ use reqwest:: header:: { CACHE_CONTROL , HeaderMap , HeaderValue } ;
338+
339+ use super :: parse_cache_control;
340+
341+ fn headers ( value : & str ) -> HeaderMap {
342+ let mut h = HeaderMap :: new ( ) ;
343+ h. insert ( CACHE_CONTROL , HeaderValue :: from_str ( value) . unwrap ( ) ) ;
344+ h
345+ }
346+
347+ #[ test]
348+ fn max_age ( ) {
349+ assert_eq ! ( parse_cache_control( & headers( "max-age=300" ) ) , Some ( 300 ) ) ;
350+ assert_eq ! ( parse_cache_control( & headers( "MAX-AGE = 300" ) ) , Some ( 300 ) ) ;
351+ assert_eq ! (
352+ parse_cache_control( & headers( "public, max-age=3600, must-revalidate" ) ) ,
353+ Some ( 3600 )
354+ ) ;
355+ }
356+
357+ #[ test]
358+ fn no_cache_headers ( ) {
359+ assert_eq ! ( parse_cache_control( & headers( "NO-CACHE" ) ) , None ) ;
360+ assert_eq ! ( parse_cache_control( & headers( "no-cache" ) ) , None ) ;
361+ assert_eq ! ( parse_cache_control( & headers( "no-store" ) ) , None ) ;
362+ assert_eq ! ( parse_cache_control( & headers( "max-age=300, no-store" ) ) , None ) ;
363+ assert_eq ! ( parse_cache_control( & HeaderMap :: new( ) ) , None ) ;
364+ }
365+ }
0 commit comments