diff --git a/book/fault_proofs/challenger.md b/book/fault_proofs/challenger.md index 89035817d..26bbab18e 100644 --- a/book/fault_proofs/challenger.md +++ b/book/fault_proofs/challenger.md @@ -56,6 +56,7 @@ Either `PRIVATE_KEY` or both `SIGNER_URL` and `SIGNER_ADDRESS` must be set for t | `FETCH_INTERVAL` | Polling interval in seconds | `30` | | `CHALLENGER_METRICS_PORT` | The port to expose metrics on. Update prometheus.yml to use this port, if using docker compose. | `9001` | | `MALICIOUS_CHALLENGE_PERCENTAGE` | Percentage (0.0-100.0) of valid games to challenge for testing defense mechanisms | `0.0` | +| `CHALLENGER_BACKUP_PATH` | Path to backup file for persisting challenger state across restarts. Enables faster recovery by restoring cached state instead of re-syncing from the factory. | (disabled) | ```env # Required Configuration @@ -72,6 +73,7 @@ CHALLENGER_METRICS_PORT=9001 # The port to expose metrics on # Testing Configuration (Optional) MALICIOUS_CHALLENGE_PERCENTAGE=0.0 # Percentage of valid games to challenge for testing (0.0 = disabled) +CHALLENGER_BACKUP_PATH= # persist state across restarts (e.g. /backup/challenger_state.json) ``` ## Running diff --git a/book/fault_proofs/docker.md b/book/fault_proofs/docker.md index 1a8e8860f..3a0619aff 100644 --- a/book/fault_proofs/docker.md +++ b/book/fault_proofs/docker.md @@ -52,6 +52,7 @@ PRIVATE_KEY= # Private key for transaction signing # Optional Configuration FETCH_INTERVAL=30 # Polling interval in seconds +CHALLENGER_BACKUP_PATH= # persist state across restarts (e.g. /backup/challenger_state.json) ``` 2. Start the services: diff --git a/fault-proof/src/backup.rs b/fault-proof/src/backup.rs index cf5d99211..3817ed1e7 100644 --- a/fault-proof/src/backup.rs +++ b/fault-proof/src/backup.rs @@ -1,6 +1,6 @@ -//! Simple file-based state persistence for proposer recovery. +//! Simple file-based state persistence for proposer and challenger recovery. //! -//! On restart, the proposer can restore its cursor and game cache from a backup file, +//! On restart, the proposer/challenger can restore its cursor and game cache from a backup file, //! avoiding a full re-sync from the factory contract. use std::{io::Write, path::Path}; @@ -11,7 +11,23 @@ use alloy_primitives::U256; use anyhow::{bail, Context, Result}; use serde::{Deserialize, Serialize}; -use crate::proposer::Game; +use crate::{challenger::Game as ChallengerGame, proposer::Game}; + +/// Atomically save a serializable value as pretty-printed JSON (temp file + fsync + rename). +fn save_json(value: &impl Serialize, path: &Path, label: &str) -> Result<()> { + let json = serde_json::to_string_pretty(value) + .with_context(|| format!("failed to serialize {label} backup"))?; + let dir = path.parent().unwrap_or(Path::new(".")); + let mut temp = NamedTempFile::new_in(dir) + .with_context(|| format!("failed to create {label} backup temp file"))?; + temp.write_all(json.as_bytes()) + .with_context(|| format!("failed to write {label} backup temp file"))?; + temp.as_file() + .sync_all() + .with_context(|| format!("failed to sync {label} backup temp file"))?; + temp.persist(path).with_context(|| format!("failed to persist {label} backup file"))?; + Ok(()) +} /// Current backup format version. Increment when making breaking changes. pub const BACKUP_VERSION: u32 = 1; @@ -52,18 +68,8 @@ impl ProposerBackup { Ok(()) } - /// Save the backup to a file as JSON (atomic via temp file + rename with fsync). pub fn save(&self, path: &Path) -> Result<()> { - let json = - serde_json::to_string_pretty(self).context("failed to serialize proposer backup")?; - - let dir = path.parent().unwrap_or(Path::new(".")); - let mut temp = - NamedTempFile::new_in(dir).context("failed to create proposer backup temp file")?; - temp.write_all(json.as_bytes()).context("failed to write proposer backup temp file")?; - temp.as_file().sync_all().context("failed to sync proposer backup temp file")?; - temp.persist(path).context("failed to persist proposer backup file")?; - + save_json(self, path, "proposer")?; tracing::debug!(?path, games = self.games.len(), "Proposer state backed up"); Ok(()) } @@ -106,6 +112,71 @@ impl ProposerBackup { } } +// ==================== Challenger Backup ==================== + +/// Current challenger backup format version. Increment when making breaking changes. +pub const CHALLENGER_BACKUP_VERSION: u32 = 1; + +/// Serializable backup of the challenger state. +#[derive(Serialize, Deserialize)] +pub struct ChallengerBackup { + pub version: u32, + pub cursor: U256, + pub games: Vec, +} + +impl ChallengerBackup { + pub fn new(cursor: U256, games: Vec) -> Self { + Self { version: CHALLENGER_BACKUP_VERSION, cursor, games } + } + + /// Validate backup integrity. + pub fn validate(&self) -> Result<()> { + // Cursor with no games indicates a stale or corrupted backup. + if self.games.is_empty() && self.cursor > U256::ZERO { + bail!("cursor exists but no games"); + } + Ok(()) + } + + pub fn save(&self, path: &Path) -> Result<()> { + save_json(self, path, "challenger")?; + tracing::debug!(?path, games = self.games.len(), "Challenger state backed up"); + Ok(()) + } + + /// Load and validate a backup from file. Returns None if unavailable or invalid. + pub fn load(path: &Path) -> Option { + let json = std::fs::read_to_string(path).ok()?; + + let backup = match serde_json::from_str::(&json) { + Ok(b) => b, + Err(e) => { + tracing::warn!(?path, error = %e, "Failed to parse challenger backup, starting fresh"); + return None; + } + }; + + if backup.version != CHALLENGER_BACKUP_VERSION { + tracing::warn!( + ?path, + backup_version = backup.version, + current_version = CHALLENGER_BACKUP_VERSION, + "Challenger backup version mismatch, starting fresh" + ); + return None; + } + + if let Err(e) = backup.validate() { + tracing::warn!(?path, error = %e, "Challenger backup validation failed, starting fresh"); + return None; + } + + tracing::info!(?path, games = backup.games.len(), "Challenger backup loaded"); + Some(backup) + } +} + #[cfg(test)] mod tests { use super::*; @@ -169,4 +240,55 @@ mod tests { "ProposerBackup schema changed! Bump BACKUP_VERSION in backup.rs" ); } + + #[test] + fn challenger_backup_schema_guard() { + use crate::contract::{GameStatus, ProposalStatus}; + use alloy_primitives::Address; + + let game = ChallengerGame { + index: U256::ZERO, + address: Address::ZERO, + parent_index: 0, + l2_block_number: U256::ZERO, + is_invalid: false, + status: GameStatus::IN_PROGRESS, + proposal_status: ProposalStatus::Unchallenged, + should_attempt_to_challenge: false, + should_attempt_to_resolve: false, + should_attempt_to_claim_bond: false, + }; + + let json = serde_json::to_value(&game).unwrap(); + let mut keys: Vec<_> = json.as_object().unwrap().keys().cloned().collect(); + keys.sort(); + + assert_eq!( + keys, + vec![ + "address", + "index", + "is_invalid", + "l2_block_number", + "parent_index", + "proposal_status", + "should_attempt_to_challenge", + "should_attempt_to_claim_bond", + "should_attempt_to_resolve", + "status", + ], + "ChallengerGame schema changed! Bump CHALLENGER_BACKUP_VERSION in backup.rs" + ); + + let backup = ChallengerBackup::new(U256::ZERO, vec![]); + let json = serde_json::to_value(&backup).unwrap(); + let mut keys: Vec<_> = json.as_object().unwrap().keys().cloned().collect(); + keys.sort(); + + assert_eq!( + keys, + vec!["cursor", "games", "version"], + "ChallengerBackup schema changed! Bump CHALLENGER_BACKUP_VERSION in backup.rs" + ); + } } diff --git a/fault-proof/src/challenger.rs b/fault-proof/src/challenger.rs index 33568d925..a5d529029 100644 --- a/fault-proof/src/challenger.rs +++ b/fault-proof/src/challenger.rs @@ -1,5 +1,6 @@ use std::{ collections::HashMap, + path::Path, sync::{Arc, OnceLock}, time::Duration, }; @@ -9,9 +10,11 @@ use alloy_primitives::{Address, U256}; use alloy_provider::{Provider, ProviderBuilder}; use anyhow::{bail, Context, Result}; use rand::{rngs::StdRng, Rng, SeedableRng}; +use serde::{Deserialize, Serialize}; use tokio::{sync::Mutex, time}; use crate::{ + backup::ChallengerBackup, config::ChallengerConfig, contract::{ AnchorStateRegistry::AnchorStateRegistryInstance, @@ -97,6 +100,8 @@ where continue } + self.backup().await; + if let Err(e) = self.handle_game_challenging().await { tracing::warn!("Failed to handle game challenging: {:?}", e); } @@ -137,10 +142,31 @@ where Ok(()) } - /// Validates startup and initializes state. + /// Validates startup, initializes state, and restores from backup if available. async fn validate_and_init(&self) -> Result<()> { let bond = self.startup_validations().await?; self.init_state(bond); + + if let Some(path) = &self.config.backup_path { + if let Some(parent) = path.parent() { + if !parent.as_os_str().is_empty() && !parent.exists() { + anyhow::bail!("backup path parent directory does not exist: {:?}", parent); + } + } + + let dir = path.parent().unwrap_or(Path::new(".")); + tempfile::NamedTempFile::new_in(dir) + .with_context(|| format!("backup path is not writable: {:?}", path))?; + + if let Some(restored) = ChallengerState::try_restore(path) { + let mut state = self.state.lock().await; + state.cursor = restored.cursor; + state.games = restored.games; + } else if path.exists() { + tracing::warn!(?path, "Failed to restore challenger state from backup"); + } + } + Ok(()) } @@ -648,6 +674,21 @@ where Ok(()) } + /// Backup challenger state to disk. No-op if backup_path is not configured. + async fn backup(&self) { + let Some(path) = &self.config.backup_path else { return }; + let backup = { + let state = self.state.lock().await; + state.to_backup() + }; + let path = path.clone(); + tokio::task::spawn_blocking(move || { + if let Err(e) = backup.save(&path) { + tracing::warn!("Failed to backup challenger state: {:?}", e); + } + }); + } + // ==================== Integration Test Helpers ==================== /// Returns a copy of a game's full internal state for testing. @@ -672,7 +713,7 @@ where } } -#[derive(Clone)] +#[derive(Clone, Serialize, Deserialize)] pub struct Game { pub index: U256, pub address: Address, @@ -690,3 +731,28 @@ pub struct ChallengerState { cursor: U256, games: HashMap, } + +impl ChallengerState { + fn to_backup(&self) -> ChallengerBackup { + ChallengerBackup::new(self.cursor, self.games.values().cloned().collect()) + } + + fn from_backup(backup: ChallengerBackup) -> Self { + Self { + cursor: backup.cursor, + games: backup.games.into_iter().map(|g| (g.index, g)).collect(), + } + } + + pub fn try_restore(path: &Path) -> Option { + let backup = ChallengerBackup::load(path)?; + let state = Self::from_backup(backup); + tracing::info!( + ?path, + games = state.games.len(), + cursor = %state.cursor, + "Challenger state restored from backup" + ); + Some(state) + } +} diff --git a/fault-proof/src/config.rs b/fault-proof/src/config.rs index 8ee930e93..b507889cb 100644 --- a/fault-proof/src/config.rs +++ b/fault-proof/src/config.rs @@ -297,6 +297,9 @@ pub struct ChallengerConfig { /// Set to 0.0 (default) for production use (honest challenging only). /// Set to >0.0 for testing defense mechanisms. pub malicious_challenge_percentage: f64, + + /// Path to backup file for persisting challenger state across restarts. + pub backup_path: Option, } impl ChallengerConfig { @@ -316,6 +319,7 @@ impl ChallengerConfig { malicious_challenge_percentage: env::var("MALICIOUS_CHALLENGE_PERCENTAGE") .unwrap_or("0.0".to_string()) .parse()?, + backup_path: env::var("CHALLENGER_BACKUP_PATH").ok().map(PathBuf::from), }) } @@ -330,6 +334,7 @@ impl ChallengerConfig { fetch_interval = self.fetch_interval, metrics_port = self.metrics_port, malicious_challenge_percentage = self.malicious_challenge_percentage, + backup_path = ?self.backup_path, "Challenger configuration loaded" ); } diff --git a/fault-proof/tests/backup.rs b/fault-proof/tests/backup.rs index 5c487dfd4..46aa3d578 100644 --- a/fault-proof/tests/backup.rs +++ b/fault-proof/tests/backup.rs @@ -4,7 +4,8 @@ use std::path::PathBuf; use alloy_primitives::{Address, B256, U256}; use fault_proof::{ - backup::{ProposerBackup, BACKUP_VERSION}, + backup::{ChallengerBackup, ProposerBackup, BACKUP_VERSION, CHALLENGER_BACKUP_VERSION}, + challenger::Game as ChallengerGame, contract::{GameStatus, ProposalStatus}, proposer::Game, }; @@ -138,6 +139,111 @@ mod persistence { } } +// ==================== Challenger Backup Tests ==================== + +fn test_challenger_game(index: u64, parent_index: u32) -> ChallengerGame { + ChallengerGame { + index: U256::from(index), + address: Address::ZERO, + parent_index, + l2_block_number: U256::from(index + 100), + is_invalid: false, + status: GameStatus::IN_PROGRESS, + proposal_status: ProposalStatus::Unchallenged, + should_attempt_to_challenge: false, + should_attempt_to_resolve: false, + should_attempt_to_claim_bond: false, + } +} + +mod challenger_validation { + use super::*; + use rstest::rstest; + + #[rstest] + #[case::empty(U256::ZERO, &[], true)] + #[case::single_game(U256::from(0), &[0u64], true)] + #[case::many_games(U256::from(5), &[0, 1, 2, 3, 4, 5], true)] + #[case::cursor_without_games(U256::from(5), &[], false)] + fn test_validation(#[case] cursor: U256, #[case] game_indices: &[u64], #[case] valid: bool) { + let games = game_indices.iter().map(|&idx| test_challenger_game(idx, u32::MAX)).collect(); + let backup = ChallengerBackup::new(cursor, games); + assert_eq!(backup.validate().is_ok(), valid); + } +} + +mod challenger_persistence { + use super::*; + + fn temp_backup_path() -> (TempDir, PathBuf) { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("challenger_backup.json"); + (dir, path) + } + + #[test] + fn save_and_load_roundtrip() { + let (_dir, path) = temp_backup_path(); + + let original = ChallengerBackup::new( + U256::from(10), + vec![ + test_challenger_game(0, u32::MAX), + test_challenger_game(5, 0), + test_challenger_game(10, 5), + ], + ); + + original.save(&path).unwrap(); + let loaded = ChallengerBackup::load(&path).unwrap(); + + assert_eq!(loaded.version, CHALLENGER_BACKUP_VERSION); + assert_eq!(loaded.cursor, U256::from(10)); + assert_eq!(loaded.games.len(), 3); + } + + #[test] + fn load_nonexistent_returns_none() { + let path = PathBuf::from("/nonexistent/challenger_backup.json"); + assert!(ChallengerBackup::load(&path).is_none()); + } + + #[test] + fn load_invalid_json_returns_none() { + let (_dir, path) = temp_backup_path(); + std::fs::write(&path, "not valid json").unwrap(); + assert!(ChallengerBackup::load(&path).is_none()); + } + + #[test] + fn load_version_mismatch_returns_none() { + let (_dir, path) = temp_backup_path(); + + let json = serde_json::json!({ + "version": CHALLENGER_BACKUP_VERSION + 1, + "cursor": "0x0", + "games": [] + }); + std::fs::write(&path, json.to_string()).unwrap(); + + assert!(ChallengerBackup::load(&path).is_none()); + } + + #[test] + fn load_validation_failure_returns_none() { + let (_dir, path) = temp_backup_path(); + + let json = serde_json::json!({ + "version": CHALLENGER_BACKUP_VERSION, + "cursor": "0x5", + "games": [] + }); + std::fs::write(&path, json.to_string()).unwrap(); + + assert!(ChallengerBackup::load(&path).is_none()); + } +} + #[cfg(feature = "integration")] mod integration { use std::sync::Arc; diff --git a/fault-proof/tests/common/process.rs b/fault-proof/tests/common/process.rs index 9798d958c..eb10974ba 100644 --- a/fault-proof/tests/common/process.rs +++ b/fault-proof/tests/common/process.rs @@ -119,6 +119,7 @@ pub async fn new_challenger( game_type, metrics_port: 9001, malicious_challenge_percentage: malicious_percentage.unwrap_or(0.0), + backup_path: None, }; let l1_provider = ProviderBuilder::default().connect_http(rpc_config.l1_rpc.clone());