Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docs/catalog/openapi.md
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,9 @@ Debug output includes:
The FDW automatically handles pagination. It supports:

1. **Cursor-based pagination** - Uses `cursor_param` and `cursor_path`
2. **URL-based pagination** - Follows `next` links in response
3. **Offset-based pagination** - Auto-detected from common patterns
2. **URL-based pagination** - Follows `next` links in response body (e.g., `/links/next`, `/meta/pagination/next`)
3. **`Link` header pagination** - Follows [RFC 8288](https://datatracker.ietf.org/doc/html/rfc8288) `Link: <...>; rel="next"` response headers (GitHub, GitLab, and most REST APIs)
4. **Offset-based pagination** - Auto-detected from common patterns

### Configuring Pagination

Expand Down
4 changes: 2 additions & 2 deletions docs/catalog/wasm/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ Foreign data wrappers built with Wasm which can be used on Supabase platform.

Supported by [Supabase](https://www.supabase.com)

:octicons-tag-24: [v0.1.4](https://github.com/supabase/wrappers/releases/tag/wasm_openapi_fdw_v0.1.4) &nbsp;
:octicons-code-24: [source](https://github.com/supabase/wrappers/tree/wasm_openapi_fdw_v0.1.4/wasm-wrappers/fdw/openapi_fdw) &nbsp;
:octicons-tag-24: [v0.2.0](https://github.com/supabase/wrappers/releases/tag/wasm_openapi_fdw_v0.2.0) &nbsp;
:octicons-code-24: [source](https://github.com/supabase/wrappers/tree/wasm_openapi_fdw_v0.2.0/wasm-wrappers/fdw/openapi_fdw) &nbsp;
:material-file-document: [docs](../openapi.md)

- :simple-webassembly: &nbsp; **[Orb](../orb.md)**
Expand Down
20 changes: 20 additions & 0 deletions wasm-wrappers/fdw/openapi_fdw/examples/github/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,26 @@ WHERE owner = 'supabase' AND repo = 'wrappers' AND state = 'open'
LIMIT 5;
```

### Pagination across pages (RFC 8288 `Link` header)

GitHub paginates list responses using the [RFC 8288](https://datatracker.ietf.org/doc/html/rfc8288) `Link` response header. When the FDW sees a `Link: <...>; rel="next"` header it follows the link to fetch the next page, up to `max_pages` (default 1000). No JSON-body cursor configuration is needed.

The `supabase/wrappers` repo has well over one page of PRs, so the count below forces multiple HTTP requests behind one SQL query:

```sql
SELECT count(*) AS total_prs
FROM repo_pulls
WHERE owner = 'supabase' AND repo = 'wrappers' AND state = 'all';
```

| total_prs |
| --- |
| 487 |

> Your number will reflect the current state of the repo. With `page_size '30'` on the `github` server, this single SQL query issues ~17 HTTP GETs to `/repos/supabase/wrappers/pulls`, each chained from the previous response's `Link: <...>; rel="next"`. Bumping `page_size` to `'100'` (the GitHub maximum) reduces that to ~5 requests.

To watch it happen, point the table at the `github_debug` server and look for the sequence of `HTTP GET ... -> 200` INFO messages — one per page.

## 7. Releases

Paginated list of releases for a repository:
Expand Down
38 changes: 23 additions & 15 deletions wasm-wrappers/fdw/openapi_fdw/src/config_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,11 @@ fn test_restore_does_not_affect_non_pagination_fields() {
fn test_apply_headers_content_type_always_added() {
let mut config = ServerConfig::default();
config.apply_headers(None, None, None).unwrap();
assert_eq!(config.headers.len(), 1);
// content-type + default user-agent (added when none provided)
assert_eq!(config.headers.len(), 2);
assert_eq!(config.headers[0].0, "content-type");
assert_eq!(config.headers[0].1, "application/json");
assert_eq!(config.headers[1].0, "user-agent");
}

#[test]
Expand All @@ -177,9 +179,10 @@ fn test_apply_headers_with_accept() {
config
.apply_headers(None, Some("application/geo+json".to_string()), None)
.unwrap();
assert_eq!(config.headers.len(), 2);
assert_eq!(config.headers[1].0, "accept");
assert_eq!(config.headers[1].1, "application/geo+json");
// content-type + default user-agent + accept
assert_eq!(config.headers.len(), 3);
assert_eq!(config.headers[2].0, "accept");
assert_eq!(config.headers[2].1, "application/geo+json");
}

#[test]
Expand Down Expand Up @@ -209,10 +212,10 @@ fn test_apply_headers_custom_json() {
Some(r#"{"X-Custom": "value1", "Feature-Flag": "beta"}"#.to_string()),
)
.unwrap();
// content-type + 2 custom headers
assert_eq!(config.headers.len(), 3);
// content-type + default user-agent + 2 custom headers
assert_eq!(config.headers.len(), 4);
// Custom headers should be lowercased
let custom_headers: Vec<_> = config.headers[1..].to_vec();
let custom_headers: Vec<_> = config.headers[2..].to_vec();
assert!(
custom_headers
.iter()
Expand All @@ -235,8 +238,9 @@ fn test_apply_headers_custom_json_lowercases_keys() {
Some(r#"{"X-API-KEY": "secret123"}"#.to_string()),
)
.unwrap();
assert_eq!(config.headers[1].0, "x-api-key");
assert_eq!(config.headers[1].1, "secret123");
// Index 0: content-type, index 1: default user-agent, index 2: custom header
assert_eq!(config.headers[2].0, "x-api-key");
assert_eq!(config.headers[2].1, "secret123");
}

#[test]
Expand All @@ -263,8 +267,8 @@ fn test_apply_headers_empty_json_object() {
config
.apply_headers(None, None, Some("{}".to_string()))
.unwrap();
// Only content-type, no custom headers
assert_eq!(config.headers.len(), 1);
// content-type + default user-agent, no custom headers
assert_eq!(config.headers.len(), 2);
}

#[test]
Expand Down Expand Up @@ -324,10 +328,14 @@ fn test_apply_headers_custom_content_type_replaces_default() {
Some(r#"{"Content-Type": "text/xml"}"#.to_string()),
)
.unwrap();
// Custom content-type should replace the default, not add a duplicate
assert_eq!(config.headers.len(), 1);
assert_eq!(config.headers[0].0, "content-type");
assert_eq!(config.headers[0].1, "text/xml");
// Custom content-type replaces the default; default user-agent is still added
assert_eq!(config.headers.len(), 2);
let ct = config
.headers
.iter()
.find(|h| h.0 == "content-type")
.unwrap();
assert_eq!(ct.1, "text/xml");
}

#[test]
Expand Down
5 changes: 3 additions & 2 deletions wasm-wrappers/fdw/openapi_fdw/src/request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -535,8 +535,9 @@ impl OpenApiFdw {

stats::inc_stats(FDW_NAME, stats::Metric::BytesIn, resp.body.len() as i64);

// Handle pagination before extracting data (borrows resp_json)
self.handle_pagination(&resp_json);
// Handle pagination before extracting data (borrows resp_json).
// Headers are needed for RFC 8288 Link-header pagination (e.g., GitHub).
self.handle_pagination(&resp_json, &resp.headers);

// Extract data by taking ownership (avoids cloning the array)
self.src_rows = self.extract_data(&mut resp_json)?;
Expand Down
113 changes: 108 additions & 5 deletions wasm-wrappers/fdw/openapi_fdw/src/response.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,17 @@ impl OpenApiFdw {
}
}

/// Handle pagination from the response
pub(crate) fn handle_pagination(&mut self, resp: &JsonValue) {
/// Handle pagination from the response.
///
/// Precedence (first match wins):
/// 1. Explicit `cursor_path` configured by the user
/// 2. RFC 8288 `Link` header with `rel="next"` (GitHub, GitLab, etc.)
/// 3. JSON-body auto-detection: known next-URL paths
/// 4. JSON-body auto-detection: `has_more` flag + cursor paths
pub(crate) fn handle_pagination(&mut self, resp: &JsonValue, headers: &[(String, String)]) {
self.pagination.clear_next();

// Try configured cursor path first
// 1. Try configured cursor path first (explicit user config wins)
if !self.cursor_path.is_empty() {
if let Some(value) = Self::extract_non_empty_string(resp, &self.cursor_path) {
if value.starts_with("http://") || value.starts_with("https://") {
Expand All @@ -121,12 +127,19 @@ impl OpenApiFdw {
}
}

// Only try auto-detection for object responses
// 2. RFC 8288 Link header with rel="next" (cross-origin protection
// is enforced later in resolve_pagination_url when the URL is used).
if let Some(url) = find_link_header_next(headers) {
self.pagination.next = Some(PaginationToken::Url(url));
return;
}

// Only try body auto-detection for object responses
if resp.as_object().is_none() {
return;
}

// Check for next URL in common locations
// 3. Check for next URL in common JSON-body locations
for path in NEXT_URL_PATHS {
if let Some(url) = Self::extract_non_empty_string(resp, path) {
self.pagination.next = Some(PaginationToken::Url(url));
Expand Down Expand Up @@ -163,6 +176,96 @@ impl OpenApiFdw {
}
}

/// Find the URL of the first `Link` header entry with `rel="next"`.
///
/// Header names are matched case-insensitively. Multiple `Link` headers
/// (whether comma-concatenated or sent as separate entries) are all searched.
pub(crate) fn find_link_header_next(headers: &[(String, String)]) -> Option<String> {
headers
.iter()
.filter(|(name, _)| name.eq_ignore_ascii_case("link"))
.find_map(|(_, value)| parse_link_header_next(value))
}

/// Parse an RFC 8288 `Link` header value and return the URL of the first
/// entry whose `rel` parameter contains the value `next`.
///
/// Handles multi-link headers like:
/// `<https://api/items?page=2>; rel="next", <https://api/items?page=10>; rel="last"`
/// and multi-rel values like `rel="next prev"`.
fn parse_link_header_next(value: &str) -> Option<String> {
for entry in split_link_entries(value) {
let entry = entry.trim();
if !entry.starts_with('<') {
continue;
}
let Some(close) = entry.find('>') else {
continue;
};
let url = entry[1..close].trim();
if url.is_empty() {
continue;
}
let params = &entry[close + 1..];
if has_rel_next(params) {
return Some(url.to_string());
}
}
None
}

/// Split a Link header value into entries on top-level commas, ignoring
/// commas inside angle-bracketed URIs or quoted strings. Honors RFC 7230
/// `quoted-pair` escapes so a backslash-escaped quote inside a parameter
/// value (e.g. `title="a \"q\""`) does not flip the quote state.
fn split_link_entries(s: &str) -> Vec<&str> {
let mut entries = Vec::new();
let mut depth = 0i32;
let mut in_quotes = false;
let mut escape = false;
let mut start = 0;
for (i, c) in s.char_indices() {
if escape {
escape = false;
continue;
}
match c {
'\\' if in_quotes => escape = true,
'<' if !in_quotes => depth += 1,
'>' if !in_quotes => depth = depth.saturating_sub(1),
'"' => in_quotes = !in_quotes,
',' if depth == 0 && !in_quotes => {
Comment thread
codybrom marked this conversation as resolved.
entries.push(&s[start..i]);
start = i + 1;
}
_ => {}
}
}
if start <= s.len() {
entries.push(&s[start..]);
}
entries
}

/// Returns true if the parameter list (after the URI part of a Link entry)
/// contains a `rel` parameter whose value is or includes `next`.
fn has_rel_next(params: &str) -> bool {
for raw in params.split(';') {
let part = raw.trim();
let Some((name, value)) = part.split_once('=') else {
continue;
};
if !name.trim().eq_ignore_ascii_case("rel") {
continue;
}
let value = value.trim().trim_matches('"');
return value
.split_ascii_whitespace()
.any(|v| v.eq_ignore_ascii_case("next"));
}
false
}

#[cfg(test)]
#[path = "response_tests.rs"]
mod tests;
Loading
Loading