From d49940fb4d5d90c1fb10ab6c6026ab4d3da0e2d4 Mon Sep 17 00:00:00 2001 From: jyn Date: Wed, 3 Jul 2024 13:31:49 -0400 Subject: [PATCH 1/2] fix typo in thread_check --- pgrx-pg-sys/src/submodules/thread_check.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgrx-pg-sys/src/submodules/thread_check.rs b/pgrx-pg-sys/src/submodules/thread_check.rs index cf3a1673d6..75bb248a10 100644 --- a/pgrx-pg-sys/src/submodules/thread_check.rs +++ b/pgrx-pg-sys/src/submodules/thread_check.rs @@ -116,7 +116,7 @@ fn thread_id_check_failed() -> ! { // I don't think this can ever happen, but it would be a bug if it could. assert_ne!(is_os_main_thread(), Some(true), "`pgrx` active thread is not the main thread!?"); panic!( - "{}: postgres FFI may not not be called from multiple threads.", + "{}: postgres FFI may not be called from multiple threads.", std::panic::Location::caller() ); } From 1e675a37df99d90879611b531ee9d53fa26bca8d Mon Sep 17 00:00:00 2001 From: jyn Date: Wed, 3 Jul 2024 13:33:02 -0400 Subject: [PATCH 2/2] don't swallow panics from spawned threads pgrx has somewhat complex panic handling. it looks something like this: 1. when a thread panics, the panic hook captures a backtrace and saves it in a thread-local for later. 2. the thread unwinds until it hits an FFI boundary (usually `run_guarded`). that downcasts the panic, takes the backtrace out of the thread-local, and hooks into postgres' `longjmp` mechanism 3. i forget what happens after this, i think it resumes unwinding once it's past the FFI barrier there is a slight problem here: we are using a thread-local to store the backtrace. if the panic does not happen on the main thread (for example, because a spawned thread tries to call into postgres and hits the check in `check_active_thread`), the backtrace will be lost. worse, if the main thread then unwinds in response to the panic, pgrx will use *its* backtrace instead of that of the worker thread. there are two main approaches we considered to fixing this: 1. fix the backtrace not to use a thread-local, so we can attach panics in spawned threads to a pgrx connection the way we would for the main thread. 2. stop handling panics in spawned threads altogether (and use the default hook). the downside of approach 1 is that there may not *be* a pgrx connection to attach to. the connection may have already closed, or the active connection may not be related to the thread that panicked, or we may be shutting down and will never check for the panic. in those cases the panic information will be missing or wrong. the downside of approach 2 is that it does not integrate with postgres' error handling mechanism, and in particular is not reported to psql. however, it does allow for developers using pgrx to handle the panic themselves, for example by handling the result from `JoinHandle::join`, in which case it *will* be reported to psql. this takes approach 2. we may want to reconsider this in the future, or perhaps add a helper library so that it's easy for applications to pass the panic into the main thread. --- note that the default panic handler in the standard library behaves quite poorly when multiple threads panic at once (it's sound, but the output is very hard to read). this being fixed in a separate PR upstream; see rust-lang/rust 127397. --- pgrx-pg-sys/src/submodules/panic.rs | 25 +++++++++++++++------- pgrx-pg-sys/src/submodules/thread_check.rs | 2 +- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/pgrx-pg-sys/src/submodules/panic.rs b/pgrx-pg-sys/src/submodules/panic.rs index 0a621bd61c..c05e63f2d8 100644 --- a/pgrx-pg-sys/src/submodules/panic.rs +++ b/pgrx-pg-sys/src/submodules/panic.rs @@ -304,14 +304,23 @@ fn take_panic_location() -> ErrorReportLocation { } pub fn register_pg_guard_panic_hook() { - std::panic::set_hook(Box::new(|info| { - PANIC_LOCATION.with(|thread_local| { - thread_local.replace({ - let mut info: ErrorReportLocation = info.into(); - info.backtrace = Some(std::backtrace::Backtrace::capture()); - Some(info) - }) - }); + use super::thread_check::is_os_main_thread; + + let default_hook = std::panic::take_hook(); + std::panic::set_hook(Box::new(move |info: _| { + if is_os_main_thread() == Some(true) { + // if this is the main thread, swallow the panic message and use postgres' error-reporting mechanism. + PANIC_LOCATION.with(|thread_local| { + thread_local.replace({ + let mut info: ErrorReportLocation = info.into(); + info.backtrace = Some(std::backtrace::Backtrace::capture()); + Some(info) + }) + }); + } else { + // if this isn't the main thread, we don't know which connection to associate the panic with. + default_hook(info) + } })) } diff --git a/pgrx-pg-sys/src/submodules/thread_check.rs b/pgrx-pg-sys/src/submodules/thread_check.rs index 75bb248a10..19d02c0437 100644 --- a/pgrx-pg-sys/src/submodules/thread_check.rs +++ b/pgrx-pg-sys/src/submodules/thread_check.rs @@ -47,7 +47,7 @@ pub(crate) fn check_active_thread() { /// Concretely, it is very important that this not return `Some(false)` /// incorrectly, but the other values are less important. Callers generally /// should compare the result against `Some(false)`. -fn is_os_main_thread() -> Option { +pub(super) fn is_os_main_thread() -> Option { #[cfg(any(target_os = "macos", target_os = "openbsd", target_os = "freebsd"))] return unsafe { match libc::pthread_main_np() {