Skip to content

Issue using async_io Timer in both 1.83 and 1.85 #258

@npmenard

Description

@npmenard

Hi esp-rs team, I have been running into issues while using async_io::Timer in ESP32 projects with both rust 1.83 and rust 1.85. With rust 1.83 the program would ultimately run into a deadlock. The program hangs in concurrent-queue while trying to update atomic variables.

With rust 1.85 I have similar behavior with the addition of running into LoadProhibited or DoubleException errors.

Given that the behavior differs between 1.83 and 1.85, and that the deadlock relates to atomics I am filling this bug here let me know if this is the correct location and/or if you need more data.

This is the code I tried (also available at https://github.com/npmenard/cqueue-deadlock) in both case I am using the esp-idf-template against esp-idf 5.2.3

use std::{future::Future, task::Poll, time::Duration};

use async_io::block_on;
use esp_idf_svc::{
    eventloop::EspSystemEventLoop,
    hal::prelude::Peripherals,
    nvs::EspDefaultNvsPartition,
    wifi::{BlockingWifi, ClientConfiguration, Configuration, EspWifi},
};
use futures_lite::StreamExt;

const SSID: &str = env!("WIFI_SSID");
const PASSWORD: &str = env!("WIFI_PASS");

struct FutureExpample {
    large: [u32; 200],
}

impl Future for FutureExpample {
    type Output = u32;
    fn poll(
        self: std::pin::Pin<&mut Self>,
        _: &mut std::task::Context<'_>,
    ) -> std::task::Poll<Self::Output> {
        let will_complete = unsafe { esp_idf_svc::sys::esp_random() } % 2;
        if will_complete == 1 {
            return Poll::Ready(
                self.large[unsafe { esp_idf_svc::sys::esp_random() % (self.large.len() as u32 - 1) }
                    as usize],
            );
        }
        Poll::Pending
    }
}

async fn runner() {
    use async_io::Timer;
    use futures_util::stream::FuturesUnordered;

    loop {
        let mut futures = FuturesUnordered::new();
        for _ in 0..100 {
            futures.push(async {
                let fut = FutureExpample {
                    large: [0xDEADBEFF_u32; 200],
                };
                futures_lite::future::or(fut, async {
                    let _ = Timer::after(Duration::from_millis(unsafe {
                        esp_idf_svc::sys::esp_random() % 1000
                    } as u64))
                    .await;
                    0
                })
                .await
            });
        }
        while let Some(_) = futures.next().await {}
        log::info!("next loop");
    }
}

fn main() {
    esp_idf_svc::sys::link_patches();
    esp_idf_svc::log::EspLogger::initialize_default();
    esp_idf_svc::sys::esp!(unsafe {
        esp_idf_svc::sys::esp_vfs_eventfd_register(&esp_idf_svc::sys::esp_vfs_eventfd_config_t {
            max_fds: 5,
        })
    })
    .unwrap();

    let peripherals = Peripherals::take().unwrap();
    let sys_loop = EspSystemEventLoop::take().unwrap();
    let nvs = EspDefaultNvsPartition::take().unwrap();

    let mut wifi = BlockingWifi::wrap(
        EspWifi::new(peripherals.modem, sys_loop.clone(), Some(nvs)).unwrap(),
        sys_loop,
    )
    .unwrap();

    start_wifi(&mut wifi);

    let ip_info = wifi.wifi().sta_netif().get_ip_info().unwrap();

    let exec = async_executor::LocalExecutor::new();

    block_on(exec.run(runner()));
}

fn start_wifi(wifi: &mut BlockingWifi<EspWifi<'static>>) {
    let wifi_configuration: Configuration = Configuration::Client(ClientConfiguration {
        ssid: SSID.try_into().unwrap(),
        bssid: None,
        auth_method: esp_idf_svc::wifi::AuthMethod::WPA2Personal,
        password: PASSWORD.try_into().unwrap(),
        channel: None,
        ..Default::default()
    });

    wifi.set_configuration(&wifi_configuration).unwrap();
    wifi.start().unwrap();
    wifi.connect().unwrap();
    wifi.wait_netif_up().unwrap();
}

The code Loop should run indefinitely (works on x86)

Bug

Deadlock example with 1.83

After a few minutes the WDT will trigger because of a deadlock in concurrent-queue
pthread task and main task race to do a push/pop to the queue but neither is able to make progress

I (112643) cqueue_deadlock: next loop
E (122641) task_wdt: Task watchdog got triggered. The following tasks/users did not reset the watchdog in time:
E (122641) task_wdt:  - IDLE0 (CPU 0)
E (122641) task_wdt: Tasks currently running:
E (122641) task_wdt: CPU 0: pthread
E (122641) task_wdt: CPU 1: IDLE1
E (122641) task_wdt: Aborting.
E (122641) task_wdt: Print CPU 0 (current core) backtrace




Backtrace: 0x400df26f:0x3ffe7790 0x400de03d:0x3ffe77e0 0x400dce05:0x3ffe7800 0x400dcb14:0x3ffe7860 0x400dceaa:0x3ffe7950 0x400ddbae:0x3ffe7a60 0x400df55c:0x3ffe7b40 0x400dd80b:0x3ffe7b60 0x400f8b33:0x3ffe7bb0 0x400f89b4:0x3ffe7bd0 0x4010e95c:0x3ffe7bf0
0x400df26f - concurrent_queue::bounded::Bounded<T>::pop
    at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/concurrent-queue-2.5.0/src/bounded.rs:228
0x400de03d - concurrent_queue::ConcurrentQueue<T>::pop
    at ??:??
0x400dce05 - <concurrent_queue::TryIter<T> as core::iter::traits::iterator::Iterator>::next
    at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/concurrent-queue-2.5.0/src/lib.rs:494
0x400dcb14 - async_io::reactor::Reactor::process_timers
    at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/async-io-2.4.0/src/reactor.rs:211
0x400dceaa - async_io::reactor::ReactorLock::react
    at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/async-io-2.4.0/src/reactor.rs:280
0x400ddbae - async_io::driver::main_loop
    at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/async-io-2.4.0/src/driver.rs:69
0x400df55c - async_io::driver::unparker::{{closure}}::{{closure}}
    at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/async-io-2.4.0/src/driver.rs:33
0x400dd80b - std::thread::Builder::spawn_unchecked_::{{closure}}::{{closure}}
    at /opt/rust/rustup/toolchains/esp/lib/rustlib/src/rust/library/std/src/thread/mod.rs:538
0x400f8b33 - <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once
    at /opt/rust/rustup/toolchains/esp/lib/rustlib/src/rust/library/alloc/src/boxed.rs:2454
0x400f89b4 - std::sys::pal::unix::thread::Thread::new::thread_start
    at /opt/rust/rustup/toolchains/esp/lib/rustlib/src/rust/library/std/src/sys/pal/unix/thread.rs:105
0x4010e95c - pthread_task_func
    at /opt/esp/esp-idf/components/pthread/pthread.c:196




ELF file SHA256: 000000000

CPU halted.

Bug with 1.85

After a few minutes the program will crash with double exception of load prohibited (see below) or deadlock as well

I (16759) cqueue_deadlock: next loop
Guru Meditation Error: Core  0 panic'ed (Double exception).

Core  0 register dump:
PC      : 0x40095722  PS      : 0x00040c36  A0      : 0x00000027  A1      : 0x3ffe77c0
A2      : 0x3ffb3dc0  A3      : 0x00000000  A4      : 0x00000000  A5      : 0x00000000
0x3ffb3dc0 - async_io::reactor::Reactor::get::REACTOR
    at ??:??
A6      : 0x0000c350  A7      : 0x3ffdd090  A8      : 0x800de157  A9      : 0x3ffe7780
A10     : 0x00000002  A11     : 0x3ffcc7ac  A12     : 0x00000000  A13     : 0x3ffcc7b8
A14     : 0x0000c350  A15     : 0x3ffcc7b4  SAR     : 0x0000000c  EXCCAUSE: 0x00000002
EXCVADDR: 0x400840c2  LBEG    : 0x40083b65  LEND    : 0x40083b6d  LCOUNT  : 0x00000027
0x40083b65 - esp_timer_impl_get_counter_reg
    at /opt/esp/esp-idf/components/esp_timer/src/esp_timer_impl_lac.c:118
0x40083b6d - esp_timer_impl_get_counter_reg
    at /opt/esp/esp-idf/components/esp_timer/src/esp_timer_impl_lac.c:128


Backtrace: 0x4009571f:0x3ffe77c0 0x00000024:0x400840f2 |<-CORRUPTED




ELF file SHA256: 000000000

CPU halted.
Guru Meditation Error: Core  0 panic'ed (LoadProhibited). Exception was unhandled.

Core  0 register dump:
PC      : 0x400df89b  PS      : 0x00060c30  A0      : 0x00060423  A1      : 0x3ffbd8e0
0x400df89b - core::task::wake::Context::waker
    at /opt/rust/rustup/toolchains/esp/lib/rustlib/src/rust/library/core/src/task/wake.rs:250
A2      : 0x3ffbfdb4  A3      : 0x00000004  A4      : 0x00060820  A5      : 0x00000000
A6      : 0x00000000  A7      : 0x00000000  A8      : 0x00060820  A9      : 0x3ffbd8c0
A10     : 0x3ffb3dc0  A11     : 0xffffffff  A12     : 0x0000004c  A13     : 0x00000000
0x3ffb3dc0 - async_io::reactor::Reactor::get::REACTOR
    at ??:??
A14     : 0x0000004c  A15     : 0x00000002  SAR     : 0x0000000c  EXCCAUSE: 0x0000001c
EXCVADDR: 0x00060820  LBEG    : 0x40083b65  LEND    : 0x40083b6d  LCOUNT  : 0x00000027
0x40083b65 - esp_timer_impl_get_counter_reg
    at /opt/esp/esp-idf/components/esp_timer/src/esp_timer_impl_lac.c:118
0x40083b6d - esp_timer_impl_get_counter_reg
    at /opt/esp/esp-idf/components/esp_timer/src/esp_timer_impl_lac.c:128


Backtrace: 0x400df898:0x3ffbd8e0 0x00060420:0x3ffbd910 |<-CORRUPTED
0x400df898 - async_lock::once_cell::OnceCell<T>::get_or_init_blocking
    at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-1949cf8c6b5b557f/async-lock-3.4.0/src/once_cell.rs:516

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions