tokio/runtime/time/
entry.rs

1//! Timer state structures.
2//!
3//! This module contains the heart of the intrusive timer implementation, and as
4//! such the structures inside are full of tricky concurrency and unsafe code.
5//!
6//! # Ground rules
7//!
8//! The heart of the timer implementation here is the [`TimerShared`] structure,
9//! shared between the [`TimerEntry`] and the driver. Generally, we permit access
10//! to [`TimerShared`] ONLY via either 1) a mutable reference to [`TimerEntry`] or
11//! 2) a held driver lock.
12//!
13//! It follows from this that any changes made while holding BOTH 1 and 2 will
14//! be reliably visible, regardless of ordering. This is because of the `acq/rel`
15//! fences on the driver lock ensuring ordering with 2, and rust mutable
16//! reference rules for 1 (a mutable reference to an object can't be passed
17//! between threads without an `acq/rel` barrier, and same-thread we have local
18//! happens-before ordering).
19//!
20//! # State field
21//!
22//! Each timer has a state field associated with it. This field contains either
23//! the current scheduled time, or a special flag value indicating its state.
24//! This state can either indicate that the timer is on the 'pending' queue (and
25//! thus will be fired with an `Ok(())` result soon) or that it has already been
26//! fired/deregistered.
27//!
28//! This single state field allows for code that is firing the timer to
29//! synchronize with any racing `reset` calls reliably.
30//!
31//! # Cached vs true timeouts
32//!
33//! To allow for the use case of a timeout that is periodically reset before
34//! expiration to be as lightweight as possible, we support optimistically
35//! lock-free timer resets, in the case where a timer is rescheduled to a later
36//! point than it was originally scheduled for.
37//!
38//! This is accomplished by lazily rescheduling timers. That is, we update the
39//! state field with the true expiration of the timer from the holder of
40//! the [`TimerEntry`]. When the driver services timers (ie, whenever it's
41//! walking lists of timers), it checks this "true when" value, and reschedules
42//! based on it.
43//!
44//! We do, however, also need to track what the expiration time was when we
45//! originally registered the timer; this is used to locate the right linked
46//! list when the timer is being cancelled. This is referred to as the "cached
47//! when" internally.
48//!
49//! There is of course a race condition between timer reset and timer
50//! expiration. If the driver fails to observe the updated expiration time, it
51//! could trigger expiration of the timer too early. However, because
52//! [`mark_pending`][mark_pending] performs a compare-and-swap, it will identify this race and
53//! refuse to mark the timer as pending.
54//!
55//! [mark_pending]: TimerHandle::mark_pending
56
57use crate::loom::cell::UnsafeCell;
58use crate::loom::sync::atomic::AtomicU64;
59use crate::loom::sync::atomic::Ordering;
60
61use crate::runtime::scheduler;
62use crate::sync::AtomicWaker;
63use crate::time::Instant;
64use crate::util::linked_list;
65
66use std::cell::UnsafeCell as StdUnsafeCell;
67use std::task::{Context, Poll, Waker};
68use std::{marker::PhantomPinned, pin::Pin, ptr::NonNull};
69
70type TimerResult = Result<(), crate::time::error::Error>;
71
72const STATE_DEREGISTERED: u64 = u64::MAX;
73const STATE_PENDING_FIRE: u64 = STATE_DEREGISTERED - 1;
74const STATE_MIN_VALUE: u64 = STATE_PENDING_FIRE;
75/// The largest safe integer to use for ticks.
76///
77/// This value should be updated if any other signal values are added above.
78pub(super) const MAX_SAFE_MILLIS_DURATION: u64 = STATE_MIN_VALUE - 1;
79
80/// This structure holds the current shared state of the timer - its scheduled
81/// time (if registered), or otherwise the result of the timer completing, as
82/// well as the registered waker.
83///
84/// Generally, the `StateCell` is only permitted to be accessed from two contexts:
85/// Either a thread holding the corresponding `&mut TimerEntry`, or a thread
86/// holding the timer driver lock. The write actions on the `StateCell` amount to
87/// passing "ownership" of the `StateCell` between these contexts; moving a timer
88/// from the `TimerEntry` to the driver requires _both_ holding the `&mut
89/// TimerEntry` and the driver lock, while moving it back (firing the timer)
90/// requires only the driver lock.
91pub(super) struct StateCell {
92    /// Holds either the scheduled expiration time for this timer, or (if the
93    /// timer has been fired and is unregistered), `u64::MAX`.
94    state: AtomicU64,
95    /// If the timer is fired (an Acquire order read on state shows
96    /// `u64::MAX`), holds the result that should be returned from
97    /// polling the timer. Otherwise, the contents are unspecified and reading
98    /// without holding the driver lock is undefined behavior.
99    result: UnsafeCell<TimerResult>,
100    /// The currently-registered waker
101    waker: AtomicWaker,
102}
103
104impl Default for StateCell {
105    fn default() -> Self {
106        Self::new()
107    }
108}
109
110impl std::fmt::Debug for StateCell {
111    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112        write!(f, "StateCell({:?})", self.read_state())
113    }
114}
115
116impl StateCell {
117    fn new() -> Self {
118        Self {
119            state: AtomicU64::new(STATE_DEREGISTERED),
120            result: UnsafeCell::new(Ok(())),
121            waker: AtomicWaker::new(),
122        }
123    }
124
125    fn is_pending(&self) -> bool {
126        self.state.load(Ordering::Relaxed) == STATE_PENDING_FIRE
127    }
128
129    /// Returns the current expiration time, or None if not currently scheduled.
130    fn when(&self) -> Option<u64> {
131        let cur_state = self.state.load(Ordering::Relaxed);
132
133        if cur_state == STATE_DEREGISTERED {
134            None
135        } else {
136            Some(cur_state)
137        }
138    }
139
140    /// If the timer is completed, returns the result of the timer. Otherwise,
141    /// returns None and registers the waker.
142    fn poll(&self, waker: &Waker) -> Poll<TimerResult> {
143        // We must register first. This ensures that either `fire` will
144        // observe the new waker, or we will observe a racing fire to have set
145        // the state, or both.
146        self.waker.register_by_ref(waker);
147
148        self.read_state()
149    }
150
151    fn read_state(&self) -> Poll<TimerResult> {
152        let cur_state = self.state.load(Ordering::Acquire);
153
154        if cur_state == STATE_DEREGISTERED {
155            // SAFETY: The driver has fired this timer; this involves writing
156            // the result, and then writing (with release ordering) the state
157            // field.
158            Poll::Ready(unsafe { self.result.with(|p| *p) })
159        } else {
160            Poll::Pending
161        }
162    }
163
164    /// Marks this timer as being moved to the pending list, if its scheduled
165    /// time is not after `not_after`.
166    ///
167    /// If the timer is scheduled for a time after `not_after`, returns an Err
168    /// containing the current scheduled time.
169    ///
170    /// SAFETY: Must hold the driver lock.
171    unsafe fn mark_pending(&self, not_after: u64) -> Result<(), u64> {
172        // Quick initial debug check to see if the timer is already fired. Since
173        // firing the timer can only happen with the driver lock held, we know
174        // we shouldn't be able to "miss" a transition to a fired state, even
175        // with relaxed ordering.
176        let mut cur_state = self.state.load(Ordering::Relaxed);
177
178        loop {
179            // improve the error message for things like
180            // https://github.com/tokio-rs/tokio/issues/3675
181            assert!(
182                cur_state < STATE_MIN_VALUE,
183                "mark_pending called when the timer entry is in an invalid state"
184            );
185
186            if cur_state > not_after {
187                break Err(cur_state);
188            }
189
190            match self.state.compare_exchange_weak(
191                cur_state,
192                STATE_PENDING_FIRE,
193                Ordering::AcqRel,
194                Ordering::Acquire,
195            ) {
196                Ok(_) => break Ok(()),
197                Err(actual_state) => cur_state = actual_state,
198            }
199        }
200    }
201
202    /// Fires the timer, setting the result to the provided result.
203    ///
204    /// Returns:
205    /// * `Some(waker)` - if fired and a waker needs to be invoked once the
206    ///   driver lock is released
207    /// * `None` - if fired and a waker does not need to be invoked, or if
208    ///   already fired
209    ///
210    /// SAFETY: The driver lock must be held.
211    unsafe fn fire(&self, result: TimerResult) -> Option<Waker> {
212        // Quick initial check to see if the timer is already fired. Since
213        // firing the timer can only happen with the driver lock held, we know
214        // we shouldn't be able to "miss" a transition to a fired state, even
215        // with relaxed ordering.
216        let cur_state = self.state.load(Ordering::Relaxed);
217        if cur_state == STATE_DEREGISTERED {
218            return None;
219        }
220
221        // SAFETY: We assume the driver lock is held and the timer is not
222        // fired, so only the driver is accessing this field.
223        //
224        // We perform a release-ordered store to state below, to ensure this
225        // write is visible before the state update is visible.
226        unsafe { self.result.with_mut(|p| *p = result) };
227
228        self.state.store(STATE_DEREGISTERED, Ordering::Release);
229
230        self.waker.take_waker()
231    }
232
233    /// Marks the timer as registered (poll will return None) and sets the
234    /// expiration time.
235    ///
236    /// While this function is memory-safe, it should only be called from a
237    /// context holding both `&mut TimerEntry` and the driver lock.
238    fn set_expiration(&self, timestamp: u64) {
239        debug_assert!(timestamp < STATE_MIN_VALUE);
240
241        // We can use relaxed ordering because we hold the driver lock and will
242        // fence when we release the lock.
243        self.state.store(timestamp, Ordering::Relaxed);
244    }
245
246    /// Attempts to adjust the timer to a new timestamp.
247    ///
248    /// If the timer has already been fired, is pending firing, or the new
249    /// timestamp is earlier than the old timestamp, (or occasionally
250    /// spuriously) returns Err without changing the timer's state. In this
251    /// case, the timer must be deregistered and re-registered.
252    fn extend_expiration(&self, new_timestamp: u64) -> Result<(), ()> {
253        let mut prior = self.state.load(Ordering::Relaxed);
254        loop {
255            if new_timestamp < prior || prior >= STATE_MIN_VALUE {
256                return Err(());
257            }
258
259            match self.state.compare_exchange_weak(
260                prior,
261                new_timestamp,
262                Ordering::AcqRel,
263                Ordering::Acquire,
264            ) {
265                Ok(_) => return Ok(()),
266                Err(true_prior) => prior = true_prior,
267            }
268        }
269    }
270
271    /// Returns true if the state of this timer indicates that the timer might
272    /// be registered with the driver. This check is performed with relaxed
273    /// ordering, but is conservative - if it returns false, the timer is
274    /// definitely _not_ registered.
275    pub(super) fn might_be_registered(&self) -> bool {
276        self.state.load(Ordering::Relaxed) != u64::MAX
277    }
278}
279
280/// A timer entry.
281///
282/// This is the handle to a timer that is controlled by the requester of the
283/// timer. As this participates in intrusive data structures, it must be pinned
284/// before polling.
285#[derive(Debug)]
286pub(crate) struct TimerEntry {
287    /// Arc reference to the runtime handle. We can only free the driver after
288    /// deregistering everything from their respective timer wheels.
289    driver: scheduler::Handle,
290    /// Shared inner structure; this is part of an intrusive linked list, and
291    /// therefore other references can exist to it while mutable references to
292    /// Entry exist.
293    ///
294    /// This is manipulated only under the inner mutex. TODO: Can we use loom
295    /// cells for this?
296    inner: StdUnsafeCell<Option<TimerShared>>,
297    /// Deadline for the timer. This is used to register on the first
298    /// poll, as we can't register prior to being pinned.
299    deadline: Instant,
300    /// Whether the deadline has been registered.
301    registered: bool,
302    /// Ensure the type is !Unpin
303    _m: std::marker::PhantomPinned,
304}
305
306unsafe impl Send for TimerEntry {}
307unsafe impl Sync for TimerEntry {}
308
309/// An `TimerHandle` is the (non-enforced) "unique" pointer from the driver to the
310/// timer entry. Generally, at most one `TimerHandle` exists for a timer at a time
311/// (enforced by the timer state machine).
312///
313/// SAFETY: An `TimerHandle` is essentially a raw pointer, and the usual caveats
314/// of pointer safety apply. In particular, `TimerHandle` does not itself enforce
315/// that the timer does still exist; however, normally an `TimerHandle` is created
316/// immediately before registering the timer, and is consumed when firing the
317/// timer, to help minimize mistakes. Still, because `TimerHandle` cannot enforce
318/// memory safety, all operations are unsafe.
319#[derive(Debug)]
320pub(crate) struct TimerHandle {
321    inner: NonNull<TimerShared>,
322}
323
324pub(super) type EntryList = crate::util::linked_list::LinkedList<TimerShared, TimerShared>;
325
326/// The shared state structure of a timer. This structure is shared between the
327/// frontend (`Entry`) and driver backend.
328///
329/// Note that this structure is located inside the `TimerEntry` structure.
330pub(crate) struct TimerShared {
331    /// A link within the doubly-linked list of timers on a particular level and
332    /// slot. Valid only if state is equal to Registered.
333    ///
334    /// Only accessed under the entry lock.
335    pointers: linked_list::Pointers<TimerShared>,
336
337    /// The expiration time for which this entry is currently registered.
338    /// Generally owned by the driver, but is accessed by the entry when not
339    /// registered.
340    cached_when: AtomicU64,
341
342    /// Current state. This records whether the timer entry is currently under
343    /// the ownership of the driver, and if not, its current state (not
344    /// complete, fired, error, etc).
345    state: StateCell,
346
347    _p: PhantomPinned,
348}
349
350unsafe impl Send for TimerShared {}
351unsafe impl Sync for TimerShared {}
352
353impl std::fmt::Debug for TimerShared {
354    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
355        f.debug_struct("TimerShared")
356            .field("cached_when", &self.cached_when.load(Ordering::Relaxed))
357            .field("state", &self.state)
358            .finish()
359    }
360}
361
362generate_addr_of_methods! {
363    impl<> TimerShared {
364        unsafe fn addr_of_pointers(self: NonNull<Self>) -> NonNull<linked_list::Pointers<TimerShared>> {
365            &self.pointers
366        }
367    }
368}
369
370impl TimerShared {
371    pub(super) fn new() -> Self {
372        Self {
373            cached_when: AtomicU64::new(0),
374            pointers: linked_list::Pointers::new(),
375            state: StateCell::default(),
376            _p: PhantomPinned,
377        }
378    }
379
380    /// Gets the cached time-of-expiration value.
381    pub(super) fn cached_when(&self) -> u64 {
382        // Cached-when is only accessed under the driver lock, so we can use relaxed
383        self.cached_when.load(Ordering::Relaxed)
384    }
385
386    /// Gets the true time-of-expiration value, and copies it into the cached
387    /// time-of-expiration value.
388    ///
389    /// SAFETY: Must be called with the driver lock held, and when this entry is
390    /// not in any timer wheel lists.
391    pub(super) unsafe fn sync_when(&self) -> u64 {
392        let true_when = self.true_when();
393
394        self.cached_when.store(true_when, Ordering::Relaxed);
395
396        true_when
397    }
398
399    /// Sets the cached time-of-expiration value.
400    ///
401    /// SAFETY: Must be called with the driver lock held, and when this entry is
402    /// not in any timer wheel lists.
403    unsafe fn set_cached_when(&self, when: u64) {
404        self.cached_when.store(when, Ordering::Relaxed);
405    }
406
407    /// Returns the true time-of-expiration value, with relaxed memory ordering.
408    pub(super) fn true_when(&self) -> u64 {
409        self.state.when().expect("Timer already fired")
410    }
411
412    /// Sets the true time-of-expiration value, even if it is less than the
413    /// current expiration or the timer is deregistered.
414    ///
415    /// SAFETY: Must only be called with the driver lock held and the entry not
416    /// in the timer wheel.
417    pub(super) unsafe fn set_expiration(&self, t: u64) {
418        self.state.set_expiration(t);
419        self.cached_when.store(t, Ordering::Relaxed);
420    }
421
422    /// Sets the true time-of-expiration only if it is after the current.
423    pub(super) fn extend_expiration(&self, t: u64) -> Result<(), ()> {
424        self.state.extend_expiration(t)
425    }
426
427    /// Returns a `TimerHandle` for this timer.
428    pub(super) fn handle(&self) -> TimerHandle {
429        TimerHandle {
430            inner: NonNull::from(self),
431        }
432    }
433
434    /// Returns true if the state of this timer indicates that the timer might
435    /// be registered with the driver. This check is performed with relaxed
436    /// ordering, but is conservative - if it returns false, the timer is
437    /// definitely _not_ registered.
438    pub(super) fn might_be_registered(&self) -> bool {
439        self.state.might_be_registered()
440    }
441}
442
443unsafe impl linked_list::Link for TimerShared {
444    type Handle = TimerHandle;
445
446    type Target = TimerShared;
447
448    fn as_raw(handle: &Self::Handle) -> NonNull<Self::Target> {
449        handle.inner
450    }
451
452    unsafe fn from_raw(ptr: NonNull<Self::Target>) -> Self::Handle {
453        TimerHandle { inner: ptr }
454    }
455
456    unsafe fn pointers(
457        target: NonNull<Self::Target>,
458    ) -> NonNull<linked_list::Pointers<Self::Target>> {
459        TimerShared::addr_of_pointers(target)
460    }
461}
462
463// ===== impl Entry =====
464
465impl TimerEntry {
466    #[track_caller]
467    pub(crate) fn new(handle: scheduler::Handle, deadline: Instant) -> Self {
468        // Panic if the time driver is not enabled
469        let _ = handle.driver().time();
470
471        Self {
472            driver: handle,
473            inner: StdUnsafeCell::new(None),
474            deadline,
475            registered: false,
476            _m: std::marker::PhantomPinned,
477        }
478    }
479
480    fn is_inner_init(&self) -> bool {
481        unsafe { &*self.inner.get() }.is_some()
482    }
483
484    // This lazy initialization is for performance purposes.
485    fn inner(&self) -> &TimerShared {
486        let inner = unsafe { &*self.inner.get() };
487        if inner.is_none() {
488            unsafe {
489                *self.inner.get() = Some(TimerShared::new());
490            }
491        }
492        return inner.as_ref().unwrap();
493    }
494
495    pub(crate) fn deadline(&self) -> Instant {
496        self.deadline
497    }
498
499    pub(crate) fn is_elapsed(&self) -> bool {
500        self.is_inner_init() && !self.inner().state.might_be_registered() && self.registered
501    }
502
503    /// Cancels and deregisters the timer. This operation is irreversible.
504    pub(crate) fn cancel(self: Pin<&mut Self>) {
505        // Avoid calling the `clear_entry` method, because it has not been initialized yet.
506        if !self.is_inner_init() {
507            return;
508        }
509        // We need to perform an acq/rel fence with the driver thread, and the
510        // simplest way to do so is to grab the driver lock.
511        //
512        // Why is this necessary? We're about to release this timer's memory for
513        // some other non-timer use. However, we've been doing a bunch of
514        // relaxed (or even non-atomic) writes from the driver thread, and we'll
515        // be doing more from _this thread_ (as this memory is interpreted as
516        // something else).
517        //
518        // It is critical to ensure that, from the point of view of the driver,
519        // those future non-timer writes happen-after the timer is fully fired,
520        // and from the purpose of this thread, the driver's writes all
521        // happen-before we drop the timer. This in turn requires us to perform
522        // an acquire-release barrier in _both_ directions between the driver
523        // and dropping thread.
524        //
525        // The lock acquisition in clear_entry serves this purpose. All of the
526        // driver manipulations happen with the lock held, so we can just take
527        // the lock and be sure that this drop happens-after everything the
528        // driver did so far and happens-before everything the driver does in
529        // the future. While we have the lock held, we also go ahead and
530        // deregister the entry if necessary.
531        unsafe { self.driver().clear_entry(NonNull::from(self.inner())) };
532    }
533
534    pub(crate) fn reset(mut self: Pin<&mut Self>, new_time: Instant, reregister: bool) {
535        let this = unsafe { self.as_mut().get_unchecked_mut() };
536        this.deadline = new_time;
537        this.registered = reregister;
538
539        let tick = self.driver().time_source().deadline_to_tick(new_time);
540
541        if self.inner().extend_expiration(tick).is_ok() {
542            return;
543        }
544
545        if reregister {
546            unsafe {
547                self.driver()
548                    .reregister(&self.driver.driver().io, tick, self.inner().into());
549            }
550        }
551    }
552
553    pub(crate) fn poll_elapsed(
554        mut self: Pin<&mut Self>,
555        cx: &mut Context<'_>,
556    ) -> Poll<Result<(), super::Error>> {
557        assert!(
558            !self.driver().is_shutdown(),
559            "{}",
560            crate::util::error::RUNTIME_SHUTTING_DOWN_ERROR
561        );
562
563        if !self.registered {
564            let deadline = self.deadline;
565            self.as_mut().reset(deadline, true);
566        }
567
568        self.inner().state.poll(cx.waker())
569    }
570
571    pub(crate) fn driver(&self) -> &super::Handle {
572        self.driver.driver().time()
573    }
574
575    #[cfg(all(tokio_unstable, feature = "tracing"))]
576    pub(crate) fn clock(&self) -> &super::Clock {
577        self.driver.driver().clock()
578    }
579}
580
581impl TimerHandle {
582    pub(super) unsafe fn cached_when(&self) -> u64 {
583        unsafe { self.inner.as_ref().cached_when() }
584    }
585
586    pub(super) unsafe fn sync_when(&self) -> u64 {
587        unsafe { self.inner.as_ref().sync_when() }
588    }
589
590    pub(super) unsafe fn is_pending(&self) -> bool {
591        unsafe { self.inner.as_ref().state.is_pending() }
592    }
593
594    /// Forcibly sets the true and cached expiration times to the given tick.
595    ///
596    /// SAFETY: The caller must ensure that the handle remains valid, the driver
597    /// lock is held, and that the timer is not in any wheel linked lists.
598    pub(super) unsafe fn set_expiration(&self, tick: u64) {
599        self.inner.as_ref().set_expiration(tick);
600    }
601
602    /// Attempts to mark this entry as pending. If the expiration time is after
603    /// `not_after`, however, returns an Err with the current expiration time.
604    ///
605    /// If an `Err` is returned, the `cached_when` value will be updated to this
606    /// new expiration time.
607    ///
608    /// SAFETY: The caller must ensure that the handle remains valid, the driver
609    /// lock is held, and that the timer is not in any wheel linked lists.
610    /// After returning Ok, the entry must be added to the pending list.
611    pub(super) unsafe fn mark_pending(&self, not_after: u64) -> Result<(), u64> {
612        match self.inner.as_ref().state.mark_pending(not_after) {
613            Ok(()) => {
614                // mark this as being on the pending queue in cached_when
615                self.inner.as_ref().set_cached_when(u64::MAX);
616                Ok(())
617            }
618            Err(tick) => {
619                self.inner.as_ref().set_cached_when(tick);
620                Err(tick)
621            }
622        }
623    }
624
625    /// Attempts to transition to a terminal state. If the state is already a
626    /// terminal state, does nothing.
627    ///
628    /// Because the entry might be dropped after the state is moved to a
629    /// terminal state, this function consumes the handle to ensure we don't
630    /// access the entry afterwards.
631    ///
632    /// Returns the last-registered waker, if any.
633    ///
634    /// SAFETY: The driver lock must be held while invoking this function, and
635    /// the entry must not be in any wheel linked lists.
636    pub(super) unsafe fn fire(self, completed_state: TimerResult) -> Option<Waker> {
637        self.inner.as_ref().state.fire(completed_state)
638    }
639}
640
641impl Drop for TimerEntry {
642    fn drop(&mut self) {
643        unsafe { Pin::new_unchecked(self) }.as_mut().cancel();
644    }
645}