Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docs/catalog/openapi.md
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,9 @@ Debug output includes:
The FDW automatically handles pagination. It supports:

1. **Cursor-based pagination** - Uses `cursor_param` and `cursor_path`
2. **URL-based pagination** - Follows `next` links in response
3. **Offset-based pagination** - Auto-detected from common patterns
2. **URL-based pagination** - Follows `next` links in response body (e.g., `/links/next`, `/meta/pagination/next`)
3. **`Link` header pagination** - Follows [RFC 8288](https://datatracker.ietf.org/doc/html/rfc8288) `Link: <...>; rel="next"` response headers (GitHub, GitLab, and most REST APIs)
4. **Offset-based pagination** - Auto-detected from common patterns

### Configuring Pagination

Expand Down
4 changes: 2 additions & 2 deletions docs/catalog/wasm/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ Foreign data wrappers built with Wasm which can be used on Supabase platform.

Supported by [Supabase](https://www.supabase.com)

:octicons-tag-24: [v0.1.4](https://github.com/supabase/wrappers/releases/tag/wasm_openapi_fdw_v0.1.4) &nbsp;
:octicons-code-24: [source](https://github.com/supabase/wrappers/tree/wasm_openapi_fdw_v0.1.4/wasm-wrappers/fdw/openapi_fdw) &nbsp;
:octicons-tag-24: [v0.2.0](https://github.com/supabase/wrappers/releases/tag/wasm_openapi_fdw_v0.2.0) &nbsp;
:octicons-code-24: [source](https://github.com/supabase/wrappers/tree/wasm_openapi_fdw_v0.2.0/wasm-wrappers/fdw/openapi_fdw) &nbsp;
:material-file-document: [docs](../openapi.md)

- :simple-webassembly: &nbsp; **[Orb](../orb.md)**
Expand Down
20 changes: 20 additions & 0 deletions wasm-wrappers/fdw/openapi_fdw/examples/github/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,26 @@ WHERE owner = 'supabase' AND repo = 'wrappers' AND state = 'open'
LIMIT 5;
```

### Pagination across pages (RFC 8288 `Link` header)

GitHub paginates list responses using the [RFC 8288](https://datatracker.ietf.org/doc/html/rfc8288) `Link` response header. When the FDW sees a `Link: <...>; rel="next"` header it follows the link to fetch the next page, up to `max_pages` (default 1000). No JSON-body cursor configuration is needed.

The `supabase/wrappers` repo has well over one page of PRs, so the count below forces multiple HTTP requests behind one SQL query:

```sql
SELECT count(*) AS total_prs
FROM repo_pulls
WHERE owner = 'supabase' AND repo = 'wrappers' AND state = 'all';
```

| total_prs |
| --- |
| 487 |

> Your number will reflect the current state of the repo. With `page_size '30'` on the `github` server, this single SQL query issues ~17 HTTP GETs to `/repos/supabase/wrappers/pulls`, each chained from the previous response's `Link: <...>; rel="next"`. Bumping `page_size` to `'100'` (the GitHub maximum) reduces that to ~5 requests.

To watch it happen, point the table at the `github_debug` server and look for the sequence of `HTTP GET ... -> 200` INFO messages — one per page.

## 7. Releases

Paginated list of releases for a repository:
Expand Down
5 changes: 3 additions & 2 deletions wasm-wrappers/fdw/openapi_fdw/src/request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -535,8 +535,9 @@ impl OpenApiFdw {

stats::inc_stats(FDW_NAME, stats::Metric::BytesIn, resp.body.len() as i64);

// Handle pagination before extracting data (borrows resp_json)
self.handle_pagination(&resp_json);
// Handle pagination before extracting data (borrows resp_json).
// Headers are needed for RFC 8288 Link-header pagination (e.g., GitHub).
self.handle_pagination(&resp_json, &resp.headers);

// Extract data by taking ownership (avoids cloning the array)
self.src_rows = self.extract_data(&mut resp_json)?;
Expand Down
109 changes: 104 additions & 5 deletions wasm-wrappers/fdw/openapi_fdw/src/response.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,21 @@ impl OpenApiFdw {
}
}

/// Handle pagination from the response
pub(crate) fn handle_pagination(&mut self, resp: &JsonValue) {
/// Handle pagination from the response.
///
/// Precedence (first match wins):
/// 1. Explicit `cursor_path` configured by the user
/// 2. RFC 8288 `Link` header with `rel="next"` (GitHub, GitLab, etc.)
/// 3. JSON-body auto-detection: known next-URL paths
/// 4. JSON-body auto-detection: `has_more` flag + cursor paths
pub(crate) fn handle_pagination(
&mut self,
resp: &JsonValue,
headers: &[(String, String)],
) {
self.pagination.clear_next();

// Try configured cursor path first
// 1. Try configured cursor path first (explicit user config wins)
if !self.cursor_path.is_empty() {
if let Some(value) = Self::extract_non_empty_string(resp, &self.cursor_path) {
if value.starts_with("http://") || value.starts_with("https://") {
Expand All @@ -121,12 +131,19 @@ impl OpenApiFdw {
}
}

// Only try auto-detection for object responses
// 2. RFC 8288 Link header with rel="next" (cross-origin protection
// is enforced later in resolve_pagination_url when the URL is used).
if let Some(url) = find_link_header_next(headers) {
self.pagination.next = Some(PaginationToken::Url(url));
return;
}

// Only try body auto-detection for object responses
if resp.as_object().is_none() {
return;
}

// Check for next URL in common locations
// 3. Check for next URL in common JSON-body locations
for path in NEXT_URL_PATHS {
if let Some(url) = Self::extract_non_empty_string(resp, path) {
self.pagination.next = Some(PaginationToken::Url(url));
Expand Down Expand Up @@ -163,6 +180,88 @@ impl OpenApiFdw {
}
}

/// Find the URL of the first `Link` header entry with `rel="next"`.
///
/// Header names are matched case-insensitively. Multiple `Link` headers
/// (whether comma-concatenated or sent as separate entries) are all searched.
pub(crate) fn find_link_header_next(headers: &[(String, String)]) -> Option<String> {
headers
.iter()
.filter(|(name, _)| name.eq_ignore_ascii_case("link"))
.find_map(|(_, value)| parse_link_header_next(value))
}

/// Parse an RFC 8288 `Link` header value and return the URL of the first
/// entry whose `rel` parameter contains the value `next`.
///
/// Handles multi-link headers like:
/// `<https://api/items?page=2>; rel="next", <https://api/items?page=10>; rel="last"`
/// and multi-rel values like `rel="next prev"`.
fn parse_link_header_next(value: &str) -> Option<String> {
for entry in split_link_entries(value) {
let entry = entry.trim();
if !entry.starts_with('<') {
continue;
}
let Some(close) = entry.find('>') else {
continue;
};
let url = entry[1..close].trim();
if url.is_empty() {
continue;
}
let params = &entry[close + 1..];
if has_rel_next(params) {
return Some(url.to_string());
}
}
None
}

/// Split a Link header value into entries on top-level commas, ignoring
/// commas inside angle-bracketed URIs or quoted strings.
fn split_link_entries(s: &str) -> Vec<&str> {
let mut entries = Vec::new();
let mut depth = 0i32;
let mut in_quotes = false;
let mut start = 0;
for (i, c) in s.char_indices() {
match c {
'<' if !in_quotes => depth += 1,
'>' if !in_quotes => depth = depth.saturating_sub(1),
'"' => in_quotes = !in_quotes,
',' if depth == 0 && !in_quotes => {
Comment thread
codybrom marked this conversation as resolved.
entries.push(&s[start..i]);
start = i + 1;
}
_ => {}
}
}
if start <= s.len() {
entries.push(&s[start..]);
}
entries
}

/// Returns true if the parameter list (after the URI part of a Link entry)
/// contains a `rel` parameter whose value is or includes `next`.
fn has_rel_next(params: &str) -> bool {
for raw in params.split(';') {
let part = raw.trim();
let Some((name, value)) = part.split_once('=') else {
continue;
};
if !name.trim().eq_ignore_ascii_case("rel") {
continue;
}
let value = value.trim().trim_matches('"');
return value
.split_ascii_whitespace()
.any(|v| v.eq_ignore_ascii_case("next"));
}
false
}

#[cfg(test)]
#[path = "response_tests.rs"]
mod tests;
Loading
Loading