Back out "Back out "[sl] http: add "per batch" concurrent request limit""

muirdm · facebook-github-bot · commit dd1b2ff0f239 · 2025-05-21T11:29:46.000-07:00
Summary:
We backed this out thinking it could have caused S513355, but we later determined it was innocent.

Summary from original diff D73602841:

I think it is useful to have two concurrent request limits - one global (per client object), and one per batch of requests.

By batch, I mean, for example, when you fetch 10 million files and split into 1_000 separate requests each with 10_000 files - you have a batch of 1k requests. You don't really want or need to to fire off a lot of requests at once because you can saturate your bandwidth with a small number of requests, and having more requests than you need can unnecessarily overload the server, and make individual requests take longer than otherwise needed to finish, which increases the odds of timeouts or other network errors.

The global limit will be set higher, allowing for other concurrent requests to not be starved by a single big batch.

Reviewed By: lmvasquezg

Differential Revision: D75107491

fbshipit-source-id: b105f319e731ad3cef6adcb8ac3e4574d015d16c
diff --git a/eden/scm/lib/edenapi/src/builder.rs b/eden/scm/lib/edenapi/src/builder.rs
@@ -210,6 +210,9 @@ impl HttpClientBuilder {
         let max_requests = get_config(config, "edenapi", "max-concurrent-requests")?
             .or(get_config(config, "edenapi", "maxrequests")?);
 
+        let max_requests_per_batch =
+            get_config(config, "edenapi", "max-concurrent-requests-per-batch")?;
+
         let try_route_consistently =
             get_config(config, "edenapi", "try-route-consistently")?.unwrap_or_default();
 
@@ -305,6 +308,7 @@ impl HttpClientBuilder {
         let mut http_config = hg_http::http_config(config, &server_url)?;
         http_config.verbose_stats |= debug;
         http_config.max_concurrent_requests = max_requests;
+        http_config.max_concurrent_requests_per_batch = max_requests_per_batch;
 
         let builder = HttpClientBuilder {
             repo_name,
diff --git a/eden/scm/lib/http-client/src/client.rs b/eden/scm/lib/http-client/src/client.rs
@@ -65,6 +65,13 @@ pub struct Config {
     // library to limit the number of in-flight requests separately, _before_ the
     // requests are given to curl.
     pub max_concurrent_requests: Option<usize>,
+
+    // Limit the number of concurrent requests for a "batch" of requests passed at once to
+    // client.send_async(). This allows us to have a high global request limit for small
+    // "random" requests, while having lower limits for heavy requests (e.g. fetch 10m
+    // files across 1000 requests).
+    pub max_concurrent_requests_per_batch: Option<usize>,
+
     // Escape hatch to turn off our request limiting.
     pub limit_requests: bool,
     // Escape hatch to turn off our response body limiting.
@@ -98,6 +105,7 @@ impl Default for Config {
             client_info: None,
             disable_tls_verification: false,
             max_concurrent_requests: None, // No limit by default
+            max_concurrent_requests_per_batch: None,
             limit_requests: true,
             limit_response_buffering: true,
             unix_socket_domains: HashSet::new(),
@@ -251,23 +259,37 @@ impl HttpClient {
     /// until all of the transfers are complete, and will return
     /// the total stats across all transfers when complete.
     pub fn stream(&self, requests: Vec<StreamRequest>) -> Result<Stats, HttpClientError> {
+        // This is a "local" limit for how many concurrent requests we allow for a single
+        // batch of requests. Requests are still subject to the global limit via self.claimer.
+        let mut allowed_requests = self
+            .config
+            .max_concurrent_requests_per_batch
+            .unwrap_or(requests.len());
+
         // Add as many of remaining requests to the handle as we can, limited by the claimer.
-        let try_add =
-            |h: &MultiDriver, reqs: &mut IntoIter<StreamRequest>| -> Result<(), HttpClientError> {
-                for claim in self.claimer.try_claim_requests(reqs.len()) {
-                    let mut request = match reqs.next() {
-                        Some(request) => request,
-                        // Shouldn't happen, but just in case.
-                        None => break,
-                    };
-
-                    self.event_listeners
-                        .trigger_new_request(request.request.ctx_mut());
-                    h.add(request.into_easy(claim)?)?;
-                }
+        let try_add = |h: &MultiDriver,
+                       reqs: &mut IntoIter<StreamRequest>,
+                       allowed_requests: &mut usize|
+         -> Result<(), HttpClientError> {
+            for claim in self
+                .claimer
+                .try_claim_requests((*allowed_requests).min(reqs.len()))
+            {
+                let mut request = match reqs.next() {
+                    Some(request) => request,
+                    // Shouldn't happen, but just in case.
+                    None => break,
+                };
+
+                self.event_listeners
+                    .trigger_new_request(request.request.ctx_mut());
+                h.add(request.into_easy(claim)?)?;
+
+                *allowed_requests -= 1;
+            }
 
-                Ok(())
-            };
+            Ok(())
+        };
 
         let mut requests = requests.into_iter();
         let mut stats = Stats::default();
@@ -282,7 +304,7 @@ impl HttpClient {
             let driver = MultiDriver::new(multi.get(), self.config.verbose_stats);
 
             // Add requests to the driver. This can add anywhere from zero to all the requests.
-            try_add(&driver, &mut requests)?;
+            try_add(&driver, &mut requests, &mut allowed_requests)?;
 
             let mut tls_error = false;
             let result = driver
@@ -296,14 +318,17 @@ impl HttpClient {
 
                     self.report_result_and_drop_receiver(res)?;
 
+                    allowed_requests += 1;
+
                     // A request finished - let's see if there are pending requests we can now add
                     // to this multi. This allows pending requests to proceed without needing to
                     // wait for _all_ in-progress requests to finish. Note that there may be other
                     // curl multis active bound by the same request limit, so it is still possible
                     // for our pending requests to wait longer than they need to (i.e. when a
                     // request finishes on a different multi, our loop here will still wait for one
                     // of our requests to finish before trying to enqueue new requests).
-                    try_add(&driver, &mut requests).map_err(|err| Abort::WithReason(err.into()))
+                    try_add(&driver, &mut requests, &mut allowed_requests)
+                        .map_err(|err| Abort::WithReason(err.into()))
                 })
                 .inspect(|stats| {
                     self.event_listeners.trigger_stats(stats);