-
Notifications
You must be signed in to change notification settings - Fork 0
[codex] Defer Deepwell text block S3 cleanup until commit from PR #1 #75
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,9 +31,83 @@ use rsmq_async::Rsmq; | |
| use s3::bucket::Bucket; | ||
| use sea_orm::DatabaseTransaction; | ||
| use std::collections::HashSet; | ||
| use std::sync::Arc; | ||
| use std::sync::{Arc, Mutex}; | ||
| use tokio::sync::OnceCell; | ||
|
|
||
| #[derive(Debug)] | ||
| enum PostCommitAction { | ||
| DeleteTextBlockObjects(Vec<String>), | ||
| } | ||
|
|
||
| /// Request-scoped work that must only run after the database transaction commits. | ||
| /// | ||
| /// These actions protect transaction consistency: database rollback must not | ||
| /// delete external objects that still describe committed rows. The queue is | ||
| /// intentionally best-effort and in-memory; guaranteed reclamation across | ||
| /// process crashes or persistent S3 failures requires a separate durable | ||
| /// cleanup outbox. | ||
| #[derive(Debug, Clone, Default)] | ||
| pub struct PostCommitActions { | ||
| actions: Arc<Mutex<Vec<PostCommitAction>>>, | ||
| } | ||
|
|
||
| impl PostCommitActions { | ||
| pub fn delete_text_block_objects<I>(&self, filenames: I) | ||
| where | ||
| I: IntoIterator<Item = String>, | ||
| { | ||
| let filenames: Vec<String> = filenames | ||
| .into_iter() | ||
| .filter(|filename| !filename.is_empty()) | ||
| .collect(); | ||
| if filenames.is_empty() { | ||
| return; | ||
| } | ||
|
|
||
| self.actions | ||
| .lock() | ||
| .expect("post-commit actions mutex poisoned") | ||
| .push(PostCommitAction::DeleteTextBlockObjects(filenames)); | ||
| } | ||
|
|
||
| pub fn pending_count(&self) -> usize { | ||
| self.actions | ||
| .lock() | ||
| .expect("post-commit actions mutex poisoned") | ||
| .len() | ||
| } | ||
|
|
||
| pub async fn run_after_commit(&self, state: &ServerState) { | ||
| let actions = { | ||
| let mut actions = self | ||
| .actions | ||
| .lock() | ||
| .expect("post-commit actions mutex poisoned"); | ||
| std::mem::take(&mut *actions) | ||
| }; | ||
|
|
||
| for action in actions { | ||
| match action { | ||
| PostCommitAction::DeleteTextBlockObjects(filenames) => { | ||
| delete_text_block_objects_after_commit(state, filenames).await; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| async fn delete_text_block_objects_after_commit( | ||
| state: &ServerState, | ||
| filenames: Vec<String>, | ||
| ) { | ||
| let bucket = &state.s3_tblocks_bucket; | ||
| for filename in filenames { | ||
| if let Err(error) = bucket.delete_object(&filename).await { | ||
| warn!("Failed to delete committed stale S3 text block {filename}: {error}"); | ||
| } | ||
| } | ||
| } | ||
|
Comment on lines
+80
to
+109
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗄️ Data Integrity & Integration | 🔵 Trivial Make post-commit cleanup durable if S3 reclamation must be guaranteed.
|
||
|
|
||
| /// Per-request context derived from HTTP headers by the middleware layer. | ||
| #[derive(Debug, Clone, Default)] | ||
| pub struct RequestContext { | ||
|
|
@@ -84,6 +158,7 @@ pub struct ServiceContext<'txn> { | |
| state: ServerState, | ||
| transaction: &'txn DatabaseTransaction, | ||
| request_ctx: RequestContext, | ||
| post_commit_actions: PostCommitActions, | ||
| user_permissions: OnceCell<HashSet<Permission<'static>>>, | ||
| } | ||
|
|
||
|
|
@@ -97,6 +172,7 @@ impl<'txn> ServiceContext<'txn> { | |
| state: Arc::clone(state), | ||
| transaction, | ||
| request_ctx: RequestContext::default(), | ||
| post_commit_actions: PostCommitActions::default(), | ||
| user_permissions: OnceCell::new(), | ||
| } | ||
| } | ||
|
|
@@ -109,6 +185,17 @@ impl<'txn> ServiceContext<'txn> { | |
| } | ||
| } | ||
|
|
||
| #[inline] | ||
| pub fn with_post_commit_actions( | ||
| self, | ||
| post_commit_actions: PostCommitActions, | ||
| ) -> Self { | ||
| Self { | ||
| post_commit_actions, | ||
| ..self | ||
| } | ||
| } | ||
|
|
||
| #[inline] | ||
| /// Internal method to update the request context, for use in testing only. | ||
| pub fn set_request_for_test(&mut self, request_ctx: RequestContext) { | ||
|
|
@@ -164,6 +251,11 @@ impl<'txn> ServiceContext<'txn> { | |
| self.transaction | ||
| } | ||
|
|
||
| #[inline] | ||
| pub fn post_commit_actions(&self) -> &PostCommitActions { | ||
| &self.post_commit_actions | ||
| } | ||
|
|
||
| #[inline] | ||
| pub fn request(&self) -> &RequestContext { | ||
| &self.request_ctx | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| /* | ||
| * tests/text_block.rs | ||
| * | ||
| * DEEPWELL - Wikijump API provider and database manager | ||
| * Copyright (C) 2019-2026 Wikijump Team | ||
| * | ||
| * This program is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU Affero General Public License as published by | ||
| * the Free Software Foundation, either version 3 of the License, or | ||
| * (at your option) any later version. | ||
| * | ||
| * This program is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU Affero General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU Affero General Public License | ||
| * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #[allow(unused_imports)] | ||
| mod common; | ||
|
|
||
| use self::common::TestRunner; | ||
| use deepwell::services::PostCommitActions; | ||
| use uuid::Uuid; | ||
|
|
||
| #[tokio::test] | ||
| async fn post_commit_text_block_cleanup_deletes_only_after_drain() { | ||
| let runner = TestRunner::setup().await; | ||
| let bucket = runner.context().s3_tblocks_bucket(); | ||
| let filename = format!("test-post-commit-text-block-{}", Uuid::new_v4()); | ||
|
|
||
| bucket | ||
| .put_object_with_content_type(&filename, b"stale text block", "text/plain") | ||
| .await | ||
| .expect("test text-block object should upload"); | ||
| assert_object_status(bucket, &filename, 200).await; | ||
|
|
||
| let actions = PostCommitActions::default(); | ||
| actions.delete_text_block_objects([filename.clone()]); | ||
| assert_eq!(actions.pending_count(), 1); | ||
|
|
||
| actions.run_after_commit(runner.state()).await; | ||
|
|
||
| assert_eq!(actions.pending_count(), 0); | ||
| assert_object_status(bucket, &filename, 404).await; | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn queued_text_block_cleanup_preserves_objects_when_not_drained() { | ||
| let runner = TestRunner::setup().await; | ||
| let bucket = runner.context().s3_tblocks_bucket(); | ||
| let filename = format!("test-rollback-text-block-{}", Uuid::new_v4()); | ||
|
|
||
| bucket | ||
| .put_object_with_content_type(&filename, b"rollback text block", "text/plain") | ||
| .await | ||
| .expect("test text-block object should upload"); | ||
| assert_object_status(bucket, &filename, 200).await; | ||
|
|
||
| let actions = PostCommitActions::default(); | ||
| actions.delete_text_block_objects([filename.clone()]); | ||
| assert_eq!(actions.pending_count(), 1); | ||
|
|
||
| assert_object_status(bucket, &filename, 200).await; | ||
|
|
||
| bucket | ||
| .delete_object(&filename) | ||
| .await | ||
| .expect("rollback preservation test object should clean up"); | ||
| } | ||
|
Comment on lines
+50
to
+72
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 📐 Maintainability & Code Quality | 🔵 Trivial | 🏗️ Heavy lift Exercise a real rollback instead of only skipping the drain. This proves queued actions are inert until |
||
|
|
||
| async fn assert_object_status( | ||
| bucket: &s3::bucket::Bucket, | ||
| filename: &str, | ||
| expected: u16, | ||
| ) { | ||
| let (_head, status) = bucket | ||
| .head_object(filename) | ||
| .await | ||
| .expect("test text-block object HEAD should complete"); | ||
| assert_eq!( | ||
| status, expected, | ||
| "unexpected S3 status for text-block object {filename}", | ||
| ); | ||
| } | ||
|
Comment on lines
+74
to
+87
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🩺 Stability & Availability | 🟡 Minor 🧩 Analysis chain🌐 Web query:
💡 Result: In the rust-s3 crate (version 0.37 and later), the Citations:
🏁 Script executed: #!/bin/bash
set -euo pipefail
# Inspect the relevant test and the rust-s3 dependency version used by the repo.
git ls-files deepwell/tests/text_block.rs Cargo.toml Cargo.lock | sed -n '1,20p'
printf '\n== deepwell/tests/text_block.rs ==\n'
cat -n deepwell/tests/text_block.rs | sed -n '1,180p'
printf '\n== rust-s3 references ==\n'
rg -n 'rust-s3|s3 =' Cargo.toml Cargo.lock deepwell -g 'Cargo.toml' -g 'Cargo.lock' -g '*.toml' -g '*.lock' -g '*.rs'Repository: Rokurolize/wikijump Length of output: 4148 🏁 Script executed: #!/bin/bash
set -euo pipefail
# Inspect the relevant test and the rust-s3 dependency version used by the repo.
git ls-files deepwell/tests/text_block.rs Cargo.toml Cargo.lock | sed -n '1,20p'
printf '\n== deepwell/tests/text_block.rs ==\n'
cat -n deepwell/tests/text_block.rs | sed -n '1,180p'
printf '\n== rust-s3 references ==\n'
rg -n 'rust-s3|s3 =' Cargo.toml Cargo.lock deepwell -g 'Cargo.toml' -g 'Cargo.lock' -g '*.toml' -g '*.lock' -g '*.rs'Repository: Rokurolize/wikijump Length of output: 4148 🏁 Script executed: #!/bin/bash
set -euo pipefail
# Inspect the relevant test and the rust-s3 dependency version used by the repo.
git ls-files deepwell/tests/text_block.rs Cargo.toml Cargo.lock | sed -n '1,20p'
printf '\n== deepwell/tests/text_block.rs ==\n'
cat -n deepwell/tests/text_block.rs | sed -n '1,180p'
printf '\n== rust-s3 references ==\n'
rg -n 'rust-s3|s3 =' Cargo.toml Cargo.lock deepwell -g 'Cargo.toml' -g 'Cargo.lock' -g '*.toml' -g '*.lock' -g '*.rs'Repository: Rokurolize/wikijump Length of output: 4148 🏁 Script executed: #!/bin/bash
set -euo pipefail
git ls-files deepwell/tests/text_block.rs Cargo.toml Cargo.lock | sed -n '1,20p'
printf '\n== deepwell/tests/text_block.rs ==\n'
cat -n deepwell/tests/text_block.rs | sed -n '1,180p'
printf '\n== rust-s3 references ==\n'
rg -n 'rust-s3|s3 =' Cargo.toml Cargo.lock deepwell -g 'Cargo.toml' -g 'Cargo.lock' -g '*.toml' -g '*.lock' -g '*.rs'Repository: Rokurolize/wikijump Length of output: 4148 Handle the 404 case explicitly. |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🚀 Performance & Scalability | 🔵 Trivial | ⚡ Quick win
Post-commit deletes run inline before returning the response.
run_after_commitawaits each S3delete_objectsequentially on the request task, so the client response is held until cleanup finishes (and slow/failing S3 inflates latency). Consider spawning the drain on a background task (tokio::spawn) so cleanup doesn't block the response path.