From b39a366eabff694036157838640360eaad5e6bba Mon Sep 17 00:00:00 2001 From: Cody Bromley Date: Tue, 5 May 2026 15:13:56 -0500 Subject: [PATCH 1/4] fix(openapi_fdw): handle RFC 8288 Link header pagination handle_pagination only read JSON-body pagination paths (/links/next, /meta/pagination/next, etc.) and never inspected the HTTP Link response header. APIs that paginate exclusively via Link (GitHub, GitLab, most RFC 5988 / RFC 8288 REST services) never auto-paginated beyond the first page, regardless of max_pages, page_size, or page_size_param. Thread response headers into handle_pagination and parse rel="next" entries before the existing JSON-body lookups. Cross-origin protection is preserved by the existing resolve_pagination_url path. Precedence (first match wins): 1. Configured cursor_path 2. RFC 8288 Link header (rel="next") 3. JSON-body next-URL paths 4. has_more flag + cursor paths Verified end-to-end against the real GitHub API: count(*) on repo_pulls for supabase/wrappers fetches 468 PRs across 16 chained HTTP requests, each following the previous response's Link header. Closes #598 --- docs/catalog/openapi.md | 5 +- docs/catalog/wasm/index.md | 4 +- .../fdw/openapi_fdw/examples/github/README.md | 20 ++ wasm-wrappers/fdw/openapi_fdw/src/request.rs | 5 +- wasm-wrappers/fdw/openapi_fdw/src/response.rs | 109 ++++++++- .../fdw/openapi_fdw/src/response_tests.rs | 231 ++++++++++++++++-- 6 files changed, 341 insertions(+), 33 deletions(-) diff --git a/docs/catalog/openapi.md b/docs/catalog/openapi.md index 68ec45d7a..72aefe990 100644 --- a/docs/catalog/openapi.md +++ b/docs/catalog/openapi.md @@ -311,8 +311,9 @@ Debug output includes: The FDW automatically handles pagination. It supports: 1. **Cursor-based pagination** - Uses `cursor_param` and `cursor_path` -2. **URL-based pagination** - Follows `next` links in response -3. **Offset-based pagination** - Auto-detected from common patterns +2. **URL-based pagination** - Follows `next` links in response body (e.g., `/links/next`, `/meta/pagination/next`) +3. **`Link` header pagination** - Follows [RFC 8288](https://datatracker.ietf.org/doc/html/rfc8288) `Link: <...>; rel="next"` response headers (GitHub, GitLab, and most REST APIs) +4. **Offset-based pagination** - Auto-detected from common patterns ### Configuring Pagination diff --git a/docs/catalog/wasm/index.md b/docs/catalog/wasm/index.md index 868147a08..6ac99d6f3 100644 --- a/docs/catalog/wasm/index.md +++ b/docs/catalog/wasm/index.md @@ -117,8 +117,8 @@ Foreign data wrappers built with Wasm which can be used on Supabase platform. Supported by [Supabase](https://www.supabase.com) - :octicons-tag-24: [v0.1.4](https://github.com/supabase/wrappers/releases/tag/wasm_openapi_fdw_v0.1.4)   - :octicons-code-24: [source](https://github.com/supabase/wrappers/tree/wasm_openapi_fdw_v0.1.4/wasm-wrappers/fdw/openapi_fdw)   + :octicons-tag-24: [v0.2.0](https://github.com/supabase/wrappers/releases/tag/wasm_openapi_fdw_v0.2.0)   + :octicons-code-24: [source](https://github.com/supabase/wrappers/tree/wasm_openapi_fdw_v0.2.0/wasm-wrappers/fdw/openapi_fdw)   :material-file-document: [docs](../openapi.md) - :simple-webassembly:   **[Orb](../orb.md)** diff --git a/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md b/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md index 2f030b230..c3710a250 100644 --- a/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md +++ b/wasm-wrappers/fdw/openapi_fdw/examples/github/README.md @@ -379,6 +379,26 @@ WHERE owner = 'supabase' AND repo = 'wrappers' AND state = 'open' LIMIT 5; ``` +### Pagination across pages (RFC 8288 `Link` header) + +GitHub paginates list responses using the [RFC 8288](https://datatracker.ietf.org/doc/html/rfc8288) `Link` response header. When the FDW sees a `Link: <...>; rel="next"` header it follows the link to fetch the next page, up to `max_pages` (default 1000). No JSON-body cursor configuration is needed. + +The `supabase/wrappers` repo has well over one page of PRs, so the count below forces multiple HTTP requests behind one SQL query: + +```sql +SELECT count(*) AS total_prs +FROM repo_pulls +WHERE owner = 'supabase' AND repo = 'wrappers' AND state = 'all'; +``` + +| total_prs | +| --- | +| 487 | + +> Your number will reflect the current state of the repo. With `page_size '30'` on the `github` server, this single SQL query issues ~17 HTTP GETs to `/repos/supabase/wrappers/pulls`, each chained from the previous response's `Link: <...>; rel="next"`. Bumping `page_size` to `'100'` (the GitHub maximum) reduces that to ~5 requests. + +To watch it happen, point the table at the `github_debug` server and look for the sequence of `HTTP GET ... -> 200` INFO messages — one per page. + ## 7. Releases Paginated list of releases for a repository: diff --git a/wasm-wrappers/fdw/openapi_fdw/src/request.rs b/wasm-wrappers/fdw/openapi_fdw/src/request.rs index 3dc2c2206..d66c89c05 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/request.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/request.rs @@ -535,8 +535,9 @@ impl OpenApiFdw { stats::inc_stats(FDW_NAME, stats::Metric::BytesIn, resp.body.len() as i64); - // Handle pagination before extracting data (borrows resp_json) - self.handle_pagination(&resp_json); + // Handle pagination before extracting data (borrows resp_json). + // Headers are needed for RFC 8288 Link-header pagination (e.g., GitHub). + self.handle_pagination(&resp_json, &resp.headers); // Extract data by taking ownership (avoids cloning the array) self.src_rows = self.extract_data(&mut resp_json)?; diff --git a/wasm-wrappers/fdw/openapi_fdw/src/response.rs b/wasm-wrappers/fdw/openapi_fdw/src/response.rs index fc054ecf5..9cb90a905 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/response.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/response.rs @@ -105,11 +105,21 @@ impl OpenApiFdw { } } - /// Handle pagination from the response - pub(crate) fn handle_pagination(&mut self, resp: &JsonValue) { + /// Handle pagination from the response. + /// + /// Precedence (first match wins): + /// 1. Explicit `cursor_path` configured by the user + /// 2. RFC 8288 `Link` header with `rel="next"` (GitHub, GitLab, etc.) + /// 3. JSON-body auto-detection: known next-URL paths + /// 4. JSON-body auto-detection: `has_more` flag + cursor paths + pub(crate) fn handle_pagination( + &mut self, + resp: &JsonValue, + headers: &[(String, String)], + ) { self.pagination.clear_next(); - // Try configured cursor path first + // 1. Try configured cursor path first (explicit user config wins) if !self.cursor_path.is_empty() { if let Some(value) = Self::extract_non_empty_string(resp, &self.cursor_path) { if value.starts_with("http://") || value.starts_with("https://") { @@ -121,12 +131,19 @@ impl OpenApiFdw { } } - // Only try auto-detection for object responses + // 2. RFC 8288 Link header with rel="next" (cross-origin protection + // is enforced later in resolve_pagination_url when the URL is used). + if let Some(url) = find_link_header_next(headers) { + self.pagination.next = Some(PaginationToken::Url(url)); + return; + } + + // Only try body auto-detection for object responses if resp.as_object().is_none() { return; } - // Check for next URL in common locations + // 3. Check for next URL in common JSON-body locations for path in NEXT_URL_PATHS { if let Some(url) = Self::extract_non_empty_string(resp, path) { self.pagination.next = Some(PaginationToken::Url(url)); @@ -163,6 +180,88 @@ impl OpenApiFdw { } } +/// Find the URL of the first `Link` header entry with `rel="next"`. +/// +/// Header names are matched case-insensitively. Multiple `Link` headers +/// (whether comma-concatenated or sent as separate entries) are all searched. +pub(crate) fn find_link_header_next(headers: &[(String, String)]) -> Option { + headers + .iter() + .filter(|(name, _)| name.eq_ignore_ascii_case("link")) + .find_map(|(_, value)| parse_link_header_next(value)) +} + +/// Parse an RFC 8288 `Link` header value and return the URL of the first +/// entry whose `rel` parameter contains the value `next`. +/// +/// Handles multi-link headers like: +/// `; rel="next", ; rel="last"` +/// and multi-rel values like `rel="next prev"`. +fn parse_link_header_next(value: &str) -> Option { + for entry in split_link_entries(value) { + let entry = entry.trim(); + if !entry.starts_with('<') { + continue; + } + let Some(close) = entry.find('>') else { + continue; + }; + let url = entry[1..close].trim(); + if url.is_empty() { + continue; + } + let params = &entry[close + 1..]; + if has_rel_next(params) { + return Some(url.to_string()); + } + } + None +} + +/// Split a Link header value into entries on top-level commas, ignoring +/// commas inside angle-bracketed URIs or quoted strings. +fn split_link_entries(s: &str) -> Vec<&str> { + let mut entries = Vec::new(); + let mut depth = 0i32; + let mut in_quotes = false; + let mut start = 0; + for (i, c) in s.char_indices() { + match c { + '<' if !in_quotes => depth += 1, + '>' if !in_quotes => depth = depth.saturating_sub(1), + '"' => in_quotes = !in_quotes, + ',' if depth == 0 && !in_quotes => { + entries.push(&s[start..i]); + start = i + 1; + } + _ => {} + } + } + if start <= s.len() { + entries.push(&s[start..]); + } + entries +} + +/// Returns true if the parameter list (after the URI part of a Link entry) +/// contains a `rel` parameter whose value is or includes `next`. +fn has_rel_next(params: &str) -> bool { + for raw in params.split(';') { + let part = raw.trim(); + let Some((name, value)) = part.split_once('=') else { + continue; + }; + if !name.trim().eq_ignore_ascii_case("rel") { + continue; + } + let value = value.trim().trim_matches('"'); + return value + .split_ascii_whitespace() + .any(|v| v.eq_ignore_ascii_case("next")); + } + false +} + #[cfg(test)] #[path = "response_tests.rs"] mod tests; diff --git a/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs index b016e36bf..75130298d 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs @@ -311,7 +311,7 @@ fn make_fdw_for_pagination(cursor_path: &str) -> OpenApiFdw { fn test_handle_pagination_cursor_path_token() { let mut fdw = make_fdw_for_pagination("/cursor"); let resp = serde_json::json!({"cursor": "abc123", "data": []}); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert_eq!( fdw.pagination.next, Some(PaginationToken::Cursor("abc123".to_string())) @@ -325,7 +325,7 @@ fn test_handle_pagination_cursor_path_full_url() { "pagination": {"next": "https://api.example.com/items?cursor=xyz"}, "data": [] }); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert_eq!( fdw.pagination.next, Some(PaginationToken::Url( @@ -338,7 +338,7 @@ fn test_handle_pagination_cursor_path_full_url() { fn test_handle_pagination_cursor_path_http_url() { let mut fdw = make_fdw_for_pagination("/next"); let resp = serde_json::json!({"next": "http://api.example.com/page2"}); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert_eq!( fdw.pagination.next, Some(PaginationToken::Url( @@ -351,7 +351,7 @@ fn test_handle_pagination_cursor_path_http_url() { fn test_handle_pagination_cursor_path_missing() { let mut fdw = make_fdw_for_pagination("/cursor"); let resp = serde_json::json!({"data": []}); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert!(fdw.pagination.next.is_none()); } @@ -362,7 +362,7 @@ fn test_handle_pagination_auto_detect_next_url() { "pagination": {"next": "https://api.example.com/items?page=2"}, "data": [] }); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert_eq!( fdw.pagination.next, Some(PaginationToken::Url( @@ -379,7 +379,7 @@ fn test_handle_pagination_auto_detect_links_next() { "links": {"next": "https://api.example.com/page2"}, "data": [] }); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert_eq!( fdw.pagination.next, Some(PaginationToken::Url( @@ -397,7 +397,7 @@ fn test_handle_pagination_auto_detect_has_more_with_cursor() { "next_cursor": "cursor_xyz", "data": [{"id": 1}] }); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert_eq!( fdw.pagination.next, Some(PaginationToken::Cursor("cursor_xyz".to_string())) @@ -413,7 +413,7 @@ fn test_handle_pagination_has_more_false_stops() { "next_cursor": "stale_cursor", "data": [{"id": 1}] }); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert!(fdw.pagination.next.is_none()); } @@ -429,7 +429,7 @@ fn test_handle_pagination_auto_detect_meta_pagination() { }, "data": [] }); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert_eq!( fdw.pagination.next, Some(PaginationToken::Url( @@ -443,7 +443,7 @@ fn test_handle_pagination_empty_string_next_url_stops() { // Empty string cursor_path value should be treated as "no more pages" let mut fdw = make_fdw_for_pagination("/next"); let resp = serde_json::json!({"next": "", "data": []}); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert!(fdw.pagination.next.is_none()); } @@ -452,7 +452,7 @@ fn test_handle_pagination_null_cursor_stops() { // Null cursor should mean end of pagination let mut fdw = make_fdw_for_pagination("/cursor"); let resp = serde_json::json!({"cursor": null, "data": []}); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert!(fdw.pagination.next.is_none()); } @@ -461,7 +461,7 @@ fn test_handle_pagination_array_response_no_autodetect() { // Auto-detection should not run on array responses let mut fdw = make_fdw_for_pagination(""); let resp = serde_json::json!([{"id": 1}, {"id": 2}]); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert!(fdw.pagination.next.is_none()); } @@ -478,7 +478,7 @@ fn test_meta_pagination_has_more_nested() { }, "data": [{"id": 1}, {"id": 2}] }); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert_eq!( fdw.pagination.next, Some(PaginationToken::Cursor("cursor_abc123".to_string())) @@ -493,7 +493,7 @@ fn test_handle_pagination_has_more_true_but_no_cursor() { "has_more": true, "data": [{"id": 1}] }); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert!(fdw.pagination.next.is_none()); } @@ -505,7 +505,7 @@ fn test_handle_pagination_next_url_direct_key() { "next_url": "https://api.example.com/items?page=3", "data": [{"id": 1}] }); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert_eq!( fdw.pagination.next, Some(PaginationToken::Url( @@ -524,7 +524,7 @@ fn test_handle_pagination_pagination_next_url() { }, "data": [] }); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert_eq!( fdw.pagination.next, Some(PaginationToken::Url( @@ -541,7 +541,7 @@ fn test_handle_pagination_next_direct() { "next": "https://api.example.com/items?cursor=xyz", "data": [{"id": 1}] }); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert_eq!( fdw.pagination.next, Some(PaginationToken::Url( @@ -555,7 +555,7 @@ fn test_handle_pagination_cursor_path_integer_value() { // Cursor path resolves to an integer — should be treated as non-string, ignored let mut fdw = make_fdw_for_pagination("/cursor"); let resp = serde_json::json!({"cursor": 12345, "data": []}); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); // extract_non_empty_string returns None for non-string values assert!(fdw.pagination.next.is_none()); } @@ -571,7 +571,7 @@ fn test_handle_pagination_pagination_has_more_with_cursor() { }, "data": [{"id": 1}] }); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert_eq!( fdw.pagination.next, Some(PaginationToken::Cursor("pg_cursor_99".to_string())) @@ -590,7 +590,7 @@ fn test_handle_pagination_meta_next_url() { }, "data": [] }); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert_eq!( fdw.pagination.next, Some(PaginationToken::Url( @@ -637,7 +637,7 @@ fn test_github_direct_array() { // Array responses should not trigger auto-pagination let mut pagination_fdw = make_fdw_for_pagination(""); let array_resp = serde_json::json!([{"id": 1}, {"id": 2}]); - pagination_fdw.handle_pagination(&array_resp); + pagination_fdw.handle_pagination(&array_resp, &[]); assert!(pagination_fdw.pagination.next.is_none()); } @@ -652,7 +652,7 @@ fn test_hal_links_next_href_pagination() { "next": {"href": "https://api.example.com/items?page=2"} } }); - fdw.handle_pagination(&resp); + fdw.handle_pagination(&resp, &[]); assert_eq!( fdw.pagination.next, Some(PaginationToken::Url( @@ -759,3 +759,190 @@ fn test_json_to_rows_error_shows_type() { let err = OpenApiFdw::json_to_rows(serde_json::json!(42)).unwrap_err(); assert!(err.contains("number"), "Error should show type: {err}"); } + +// --- RFC 8288 Link header pagination tests --- + +fn h(name: &str, value: &str) -> (String, String) { + (name.to_string(), value.to_string()) +} + +#[test] +fn test_link_header_picks_rel_next() { + // GitHub-style multi-rel Link header + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!([{"id": 1}, {"id": 2}]); + let headers = vec![h( + "Link", + "; rel=\"next\", \ + ; rel=\"last\"", + )]; + fdw.handle_pagination(&resp, &headers); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "https://api.github.com/repos/x/y/pulls?page=2".to_string() + )) + ); +} + +#[test] +fn test_link_header_case_insensitive_name() { + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({}); + let headers = vec![h("link", "; rel=\"next\"")]; + fdw.handle_pagination(&resp, &headers); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "https://api.example.com/items?page=2".to_string() + )) + ); +} + +#[test] +fn test_link_header_unquoted_rel() { + // RFC 8288 also allows unquoted rel values + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({}); + let headers = vec![h("Link", "; rel=next")]; + fdw.handle_pagination(&resp, &headers); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url("https://api.example.com/p2".to_string())) + ); +} + +#[test] +fn test_link_header_multi_rel_value() { + // rel="next prev" — space-separated multi-rel + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({}); + let headers = vec![h("Link", "; rel=\"next prev\"")]; + fdw.handle_pagination(&resp, &headers); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url("https://api.example.com/p2".to_string())) + ); +} + +#[test] +fn test_link_header_with_other_params() { + // Extra params (title, type) shouldn't confuse the parser + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({}); + let headers = vec![h( + "Link", + "; title=\"page 2\"; rel=\"next\"; type=\"text/html\"", + )]; + fdw.handle_pagination(&resp, &headers); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url("https://api.example.com/p2".to_string())) + ); +} + +#[test] +fn test_link_header_no_next_returns_none() { + // Last page: only rel="prev" and rel="first", no rel="next" + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!([{"id": 480}]); + let headers = vec![h( + "Link", + "; rel=\"prev\", \ + ; rel=\"first\"", + )]; + fdw.handle_pagination(&resp, &headers); + assert!(fdw.pagination.next.is_none()); +} + +#[test] +fn test_link_header_array_response_still_paginates() { + // Regression for the bug: GitHub returns an array body with Link header. + // Body autodetect skips arrays, but Link header must still drive pagination. + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!([{"id": 1}, {"id": 2}]); + let headers = vec![h("Link", "; rel=\"next\"")]; + fdw.handle_pagination(&resp, &headers); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "https://api.github.com/items?page=2".to_string() + )) + ); +} + +#[test] +fn test_link_header_beats_json_body_autodetect() { + // When both are present, Link header wins (it's a real RFC vs. heuristic paths) + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({ + "links": {"next": "https://api.example.com/from-body"}, + "data": [] + }); + let headers = vec![h("Link", "; rel=\"next\"")]; + fdw.handle_pagination(&resp, &headers); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "https://api.example.com/from-header".to_string() + )) + ); +} + +#[test] +fn test_configured_cursor_path_beats_link_header() { + // Explicit user config wins over header autodetect. + let mut fdw = make_fdw_for_pagination("/cursor"); + let resp = serde_json::json!({"cursor": "explicit_cursor"}); + let headers = vec![h("Link", "; rel=\"next\"")]; + fdw.handle_pagination(&resp, &headers); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Cursor("explicit_cursor".to_string())) + ); +} + +#[test] +fn test_link_header_falls_back_to_body_when_no_next() { + // No rel="next" in header — fall through to body auto-detection. + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({ + "links": {"next": "https://api.example.com/from-body"}, + "data": [] + }); + let headers = vec![h("Link", "; rel=\"last\"")]; + fdw.handle_pagination(&resp, &headers); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url( + "https://api.example.com/from-body".to_string() + )) + ); +} + +#[test] +fn test_link_header_multiple_separate_entries() { + // Some clients keep multiple Link headers as separate entries instead of + // concatenating with ", ". Both forms must work. + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({}); + let headers = vec![ + h("Link", "; rel=\"first\""), + h("Link", "; rel=\"next\""), + ]; + fdw.handle_pagination(&resp, &headers); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url("https://api.example.com/p2".to_string())) + ); +} + +#[test] +fn test_link_header_empty_or_malformed() { + // Malformed entries should not panic and should not produce a next URL. + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({}); + let headers = vec![h("Link", "not-a-link-header,,<>; rel=\"next\"")]; + fdw.handle_pagination(&resp, &headers); + assert!(fdw.pagination.next.is_none()); +} From 1deedc32892857586764c309ebfd885cf5d41133 Mon Sep 17 00:00:00 2001 From: Cody Bromley Date: Tue, 5 May 2026 15:29:30 -0500 Subject: [PATCH 2/4] fix(openapi_fdw): honor RFC 7230 quoted-pair in Link header splitter split_link_entries toggled in_quotes on every `"` character without accounting for backslash-escaped quotes inside an HTTP quoted-string (e.g., title="a \"quoted, comma\" page"). With escapes unhandled the quote state could desync and a comma inside a quoted parameter value could be treated as an entry separator, causing rel="next" to be missed. Track quoted-pair escapes when scanning, so `\"` inside a quoted parameter is consumed as a literal and does not flip in_quotes. Addresses Copilot review feedback on #599. --- wasm-wrappers/fdw/openapi_fdw/src/response.rs | 10 +++++++++- .../fdw/openapi_fdw/src/response_tests.rs | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/wasm-wrappers/fdw/openapi_fdw/src/response.rs b/wasm-wrappers/fdw/openapi_fdw/src/response.rs index 9cb90a905..4cf7af77a 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/response.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/response.rs @@ -219,14 +219,22 @@ fn parse_link_header_next(value: &str) -> Option { } /// Split a Link header value into entries on top-level commas, ignoring -/// commas inside angle-bracketed URIs or quoted strings. +/// commas inside angle-bracketed URIs or quoted strings. Honors RFC 7230 +/// `quoted-pair` escapes so a backslash-escaped quote inside a parameter +/// value (e.g. `title="a \"q\""`) does not flip the quote state. fn split_link_entries(s: &str) -> Vec<&str> { let mut entries = Vec::new(); let mut depth = 0i32; let mut in_quotes = false; + let mut escape = false; let mut start = 0; for (i, c) in s.char_indices() { + if escape { + escape = false; + continue; + } match c { + '\\' if in_quotes => escape = true, '<' if !in_quotes => depth += 1, '>' if !in_quotes => depth = depth.saturating_sub(1), '"' => in_quotes = !in_quotes, diff --git a/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs index 75130298d..c8c0cd4d4 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs @@ -937,6 +937,25 @@ fn test_link_header_multiple_separate_entries() { ); } +#[test] +fn test_link_header_quoted_pair_escapes() { + // RFC 7230 quoted-pair: a backslash-escaped quote inside a parameter + // value must not flip the quote state, and a comma inside such a + // quoted-string must not be treated as an entry separator. + let mut fdw = make_fdw_for_pagination(""); + let resp = serde_json::json!({}); + let headers = vec![h( + "Link", + "; title=\"a \\\"quoted, comma\\\" page\"; rel=\"first\", \ + ; rel=\"next\"", + )]; + fdw.handle_pagination(&resp, &headers); + assert_eq!( + fdw.pagination.next, + Some(PaginationToken::Url("https://api.example.com/p2".to_string())) + ); +} + #[test] fn test_link_header_empty_or_malformed() { // Malformed entries should not panic and should not produce a next URL. From 86e2f040657147300d32b4877f3eb684d8d5fcdd Mon Sep 17 00:00:00 2001 From: Cody Bromley Date: Tue, 12 May 2026 15:06:52 -0500 Subject: [PATCH 3/4] test(openapi_fdw): fix apply_headers assertions for default user-agent apply_headers now always adds a default user-agent when none is provided. Six tests still asserted the old header counts and indices from before that change was introduced. --- .../fdw/openapi_fdw/src/config_tests.rs | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs index 9cb024da4..5e0a2da16 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs @@ -155,9 +155,11 @@ fn test_restore_does_not_affect_non_pagination_fields() { fn test_apply_headers_content_type_always_added() { let mut config = ServerConfig::default(); config.apply_headers(None, None, None).unwrap(); - assert_eq!(config.headers.len(), 1); + // content-type + default user-agent (added when none provided) + assert_eq!(config.headers.len(), 2); assert_eq!(config.headers[0].0, "content-type"); assert_eq!(config.headers[0].1, "application/json"); + assert_eq!(config.headers[1].0, "user-agent"); } #[test] @@ -177,9 +179,10 @@ fn test_apply_headers_with_accept() { config .apply_headers(None, Some("application/geo+json".to_string()), None) .unwrap(); - assert_eq!(config.headers.len(), 2); - assert_eq!(config.headers[1].0, "accept"); - assert_eq!(config.headers[1].1, "application/geo+json"); + // content-type + default user-agent + accept + assert_eq!(config.headers.len(), 3); + assert_eq!(config.headers[2].0, "accept"); + assert_eq!(config.headers[2].1, "application/geo+json"); } #[test] @@ -209,10 +212,10 @@ fn test_apply_headers_custom_json() { Some(r#"{"X-Custom": "value1", "Feature-Flag": "beta"}"#.to_string()), ) .unwrap(); - // content-type + 2 custom headers - assert_eq!(config.headers.len(), 3); + // content-type + default user-agent + 2 custom headers + assert_eq!(config.headers.len(), 4); // Custom headers should be lowercased - let custom_headers: Vec<_> = config.headers[1..].to_vec(); + let custom_headers: Vec<_> = config.headers[2..].to_vec(); assert!( custom_headers .iter() @@ -235,8 +238,9 @@ fn test_apply_headers_custom_json_lowercases_keys() { Some(r#"{"X-API-KEY": "secret123"}"#.to_string()), ) .unwrap(); - assert_eq!(config.headers[1].0, "x-api-key"); - assert_eq!(config.headers[1].1, "secret123"); + // Index 0: content-type, index 1: default user-agent, index 2: custom header + assert_eq!(config.headers[2].0, "x-api-key"); + assert_eq!(config.headers[2].1, "secret123"); } #[test] @@ -263,8 +267,8 @@ fn test_apply_headers_empty_json_object() { config .apply_headers(None, None, Some("{}".to_string())) .unwrap(); - // Only content-type, no custom headers - assert_eq!(config.headers.len(), 1); + // content-type + default user-agent, no custom headers + assert_eq!(config.headers.len(), 2); } #[test] @@ -324,10 +328,10 @@ fn test_apply_headers_custom_content_type_replaces_default() { Some(r#"{"Content-Type": "text/xml"}"#.to_string()), ) .unwrap(); - // Custom content-type should replace the default, not add a duplicate - assert_eq!(config.headers.len(), 1); - assert_eq!(config.headers[0].0, "content-type"); - assert_eq!(config.headers[0].1, "text/xml"); + // Custom content-type replaces the default; default user-agent is still added + assert_eq!(config.headers.len(), 2); + let ct = config.headers.iter().find(|h| h.0 == "content-type").unwrap(); + assert_eq!(ct.1, "text/xml"); } #[test] From 3d2cb63a4e670b1f73df20104af04c3dd60fe780 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Tue, 19 May 2026 18:02:09 +1000 Subject: [PATCH 4/4] chore: format code for better readability in pagination handling tests --- .../fdw/openapi_fdw/src/config_tests.rs | 6 +++- wasm-wrappers/fdw/openapi_fdw/src/response.rs | 6 +--- .../fdw/openapi_fdw/src/response_tests.rs | 35 ++++++++++++++----- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs index 5e0a2da16..becde5a6f 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs @@ -330,7 +330,11 @@ fn test_apply_headers_custom_content_type_replaces_default() { .unwrap(); // Custom content-type replaces the default; default user-agent is still added assert_eq!(config.headers.len(), 2); - let ct = config.headers.iter().find(|h| h.0 == "content-type").unwrap(); + let ct = config + .headers + .iter() + .find(|h| h.0 == "content-type") + .unwrap(); assert_eq!(ct.1, "text/xml"); } diff --git a/wasm-wrappers/fdw/openapi_fdw/src/response.rs b/wasm-wrappers/fdw/openapi_fdw/src/response.rs index 4cf7af77a..aa765e6f7 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/response.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/response.rs @@ -112,11 +112,7 @@ impl OpenApiFdw { /// 2. RFC 8288 `Link` header with `rel="next"` (GitHub, GitLab, etc.) /// 3. JSON-body auto-detection: known next-URL paths /// 4. JSON-body auto-detection: `has_more` flag + cursor paths - pub(crate) fn handle_pagination( - &mut self, - resp: &JsonValue, - headers: &[(String, String)], - ) { + pub(crate) fn handle_pagination(&mut self, resp: &JsonValue, headers: &[(String, String)]) { self.pagination.clear_next(); // 1. Try configured cursor path first (explicit user config wins) diff --git a/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs b/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs index c8c0cd4d4..fc1a1c23f 100644 --- a/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs +++ b/wasm-wrappers/fdw/openapi_fdw/src/response_tests.rs @@ -789,7 +789,10 @@ fn test_link_header_picks_rel_next() { fn test_link_header_case_insensitive_name() { let mut fdw = make_fdw_for_pagination(""); let resp = serde_json::json!({}); - let headers = vec![h("link", "; rel=\"next\"")]; + let headers = vec![h( + "link", + "; rel=\"next\"", + )]; fdw.handle_pagination(&resp, &headers); assert_eq!( fdw.pagination.next, @@ -808,7 +811,9 @@ fn test_link_header_unquoted_rel() { fdw.handle_pagination(&resp, &headers); assert_eq!( fdw.pagination.next, - Some(PaginationToken::Url("https://api.example.com/p2".to_string())) + Some(PaginationToken::Url( + "https://api.example.com/p2".to_string() + )) ); } @@ -821,7 +826,9 @@ fn test_link_header_multi_rel_value() { fdw.handle_pagination(&resp, &headers); assert_eq!( fdw.pagination.next, - Some(PaginationToken::Url("https://api.example.com/p2".to_string())) + Some(PaginationToken::Url( + "https://api.example.com/p2".to_string() + )) ); } @@ -837,7 +844,9 @@ fn test_link_header_with_other_params() { fdw.handle_pagination(&resp, &headers); assert_eq!( fdw.pagination.next, - Some(PaginationToken::Url("https://api.example.com/p2".to_string())) + Some(PaginationToken::Url( + "https://api.example.com/p2".to_string() + )) ); } @@ -861,7 +870,10 @@ fn test_link_header_array_response_still_paginates() { // Body autodetect skips arrays, but Link header must still drive pagination. let mut fdw = make_fdw_for_pagination(""); let resp = serde_json::json!([{"id": 1}, {"id": 2}]); - let headers = vec![h("Link", "; rel=\"next\"")]; + let headers = vec![h( + "Link", + "; rel=\"next\"", + )]; fdw.handle_pagination(&resp, &headers); assert_eq!( fdw.pagination.next, @@ -879,7 +891,10 @@ fn test_link_header_beats_json_body_autodetect() { "links": {"next": "https://api.example.com/from-body"}, "data": [] }); - let headers = vec![h("Link", "; rel=\"next\"")]; + let headers = vec![h( + "Link", + "; rel=\"next\"", + )]; fdw.handle_pagination(&resp, &headers); assert_eq!( fdw.pagination.next, @@ -933,7 +948,9 @@ fn test_link_header_multiple_separate_entries() { fdw.handle_pagination(&resp, &headers); assert_eq!( fdw.pagination.next, - Some(PaginationToken::Url("https://api.example.com/p2".to_string())) + Some(PaginationToken::Url( + "https://api.example.com/p2".to_string() + )) ); } @@ -952,7 +969,9 @@ fn test_link_header_quoted_pair_escapes() { fdw.handle_pagination(&resp, &headers); assert_eq!( fdw.pagination.next, - Some(PaginationToken::Url("https://api.example.com/p2".to_string())) + Some(PaginationToken::Url( + "https://api.example.com/p2".to_string() + )) ); }