tokio/runtime/time/entry.rs
1//! Timer state structures.
2//!
3//! This module contains the heart of the intrusive timer implementation, and as
4//! such the structures inside are full of tricky concurrency and unsafe code.
5//!
6//! # Ground rules
7//!
8//! The heart of the timer implementation here is the [`TimerShared`] structure,
9//! shared between the [`TimerEntry`] and the driver. Generally, we permit access
10//! to [`TimerShared`] ONLY via either 1) a mutable reference to [`TimerEntry`] or
11//! 2) a held driver lock.
12//!
13//! It follows from this that any changes made while holding BOTH 1 and 2 will
14//! be reliably visible, regardless of ordering. This is because of the `acq/rel`
15//! fences on the driver lock ensuring ordering with 2, and rust mutable
16//! reference rules for 1 (a mutable reference to an object can't be passed
17//! between threads without an `acq/rel` barrier, and same-thread we have local
18//! happens-before ordering).
19//!
20//! # State field
21//!
22//! Each timer has a state field associated with it. This field contains either
23//! the current scheduled time, or a special flag value indicating its state.
24//! This state can either indicate that the timer is on the 'pending' queue (and
25//! thus will be fired with an `Ok(())` result soon) or that it has already been
26//! fired/deregistered.
27//!
28//! This single state field allows for code that is firing the timer to
29//! synchronize with any racing `reset` calls reliably.
30//!
31//! # Cached vs true timeouts
32//!
33//! To allow for the use case of a timeout that is periodically reset before
34//! expiration to be as lightweight as possible, we support optimistically
35//! lock-free timer resets, in the case where a timer is rescheduled to a later
36//! point than it was originally scheduled for.
37//!
38//! This is accomplished by lazily rescheduling timers. That is, we update the
39//! state field with the true expiration of the timer from the holder of
40//! the [`TimerEntry`]. When the driver services timers (ie, whenever it's
41//! walking lists of timers), it checks this "true when" value, and reschedules
42//! based on it.
43//!
44//! We do, however, also need to track what the expiration time was when we
45//! originally registered the timer; this is used to locate the right linked
46//! list when the timer is being cancelled. This is referred to as the "cached
47//! when" internally.
48//!
49//! There is of course a race condition between timer reset and timer
50//! expiration. If the driver fails to observe the updated expiration time, it
51//! could trigger expiration of the timer too early. However, because
52//! [`mark_pending`][mark_pending] performs a compare-and-swap, it will identify this race and
53//! refuse to mark the timer as pending.
54//!
55//! [mark_pending]: TimerHandle::mark_pending
56
57use crate::loom::cell::UnsafeCell;
58use crate::loom::sync::atomic::AtomicU64;
59use crate::loom::sync::atomic::Ordering;
60
61use crate::runtime::scheduler;
62use crate::sync::AtomicWaker;
63use crate::time::Instant;
64use crate::util::linked_list;
65
66use std::cell::UnsafeCell as StdUnsafeCell;
67use std::task::{Context, Poll, Waker};
68use std::{marker::PhantomPinned, pin::Pin, ptr::NonNull};
69
70type TimerResult = Result<(), crate::time::error::Error>;
71
72const STATE_DEREGISTERED: u64 = u64::MAX;
73const STATE_PENDING_FIRE: u64 = STATE_DEREGISTERED - 1;
74const STATE_MIN_VALUE: u64 = STATE_PENDING_FIRE;
75/// The largest safe integer to use for ticks.
76///
77/// This value should be updated if any other signal values are added above.
78pub(super) const MAX_SAFE_MILLIS_DURATION: u64 = STATE_MIN_VALUE - 1;
79
80/// This structure holds the current shared state of the timer - its scheduled
81/// time (if registered), or otherwise the result of the timer completing, as
82/// well as the registered waker.
83///
84/// Generally, the `StateCell` is only permitted to be accessed from two contexts:
85/// Either a thread holding the corresponding `&mut TimerEntry`, or a thread
86/// holding the timer driver lock. The write actions on the `StateCell` amount to
87/// passing "ownership" of the `StateCell` between these contexts; moving a timer
88/// from the `TimerEntry` to the driver requires _both_ holding the `&mut
89/// TimerEntry` and the driver lock, while moving it back (firing the timer)
90/// requires only the driver lock.
91pub(super) struct StateCell {
92 /// Holds either the scheduled expiration time for this timer, or (if the
93 /// timer has been fired and is unregistered), `u64::MAX`.
94 state: AtomicU64,
95 /// If the timer is fired (an Acquire order read on state shows
96 /// `u64::MAX`), holds the result that should be returned from
97 /// polling the timer. Otherwise, the contents are unspecified and reading
98 /// without holding the driver lock is undefined behavior.
99 result: UnsafeCell<TimerResult>,
100 /// The currently-registered waker
101 waker: AtomicWaker,
102}
103
104impl Default for StateCell {
105 fn default() -> Self {
106 Self::new()
107 }
108}
109
110impl std::fmt::Debug for StateCell {
111 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112 write!(f, "StateCell({:?})", self.read_state())
113 }
114}
115
116impl StateCell {
117 fn new() -> Self {
118 Self {
119 state: AtomicU64::new(STATE_DEREGISTERED),
120 result: UnsafeCell::new(Ok(())),
121 waker: AtomicWaker::new(),
122 }
123 }
124
125 fn is_pending(&self) -> bool {
126 self.state.load(Ordering::Relaxed) == STATE_PENDING_FIRE
127 }
128
129 /// Returns the current expiration time, or None if not currently scheduled.
130 fn when(&self) -> Option<u64> {
131 let cur_state = self.state.load(Ordering::Relaxed);
132
133 if cur_state == STATE_DEREGISTERED {
134 None
135 } else {
136 Some(cur_state)
137 }
138 }
139
140 /// If the timer is completed, returns the result of the timer. Otherwise,
141 /// returns None and registers the waker.
142 fn poll(&self, waker: &Waker) -> Poll<TimerResult> {
143 // We must register first. This ensures that either `fire` will
144 // observe the new waker, or we will observe a racing fire to have set
145 // the state, or both.
146 self.waker.register_by_ref(waker);
147
148 self.read_state()
149 }
150
151 fn read_state(&self) -> Poll<TimerResult> {
152 let cur_state = self.state.load(Ordering::Acquire);
153
154 if cur_state == STATE_DEREGISTERED {
155 // SAFETY: The driver has fired this timer; this involves writing
156 // the result, and then writing (with release ordering) the state
157 // field.
158 Poll::Ready(unsafe { self.result.with(|p| *p) })
159 } else {
160 Poll::Pending
161 }
162 }
163
164 /// Marks this timer as being moved to the pending list, if its scheduled
165 /// time is not after `not_after`.
166 ///
167 /// If the timer is scheduled for a time after `not_after`, returns an Err
168 /// containing the current scheduled time.
169 ///
170 /// SAFETY: Must hold the driver lock.
171 unsafe fn mark_pending(&self, not_after: u64) -> Result<(), u64> {
172 // Quick initial debug check to see if the timer is already fired. Since
173 // firing the timer can only happen with the driver lock held, we know
174 // we shouldn't be able to "miss" a transition to a fired state, even
175 // with relaxed ordering.
176 let mut cur_state = self.state.load(Ordering::Relaxed);
177
178 loop {
179 // improve the error message for things like
180 // https://github.com/tokio-rs/tokio/issues/3675
181 assert!(
182 cur_state < STATE_MIN_VALUE,
183 "mark_pending called when the timer entry is in an invalid state"
184 );
185
186 if cur_state > not_after {
187 break Err(cur_state);
188 }
189
190 match self.state.compare_exchange_weak(
191 cur_state,
192 STATE_PENDING_FIRE,
193 Ordering::AcqRel,
194 Ordering::Acquire,
195 ) {
196 Ok(_) => break Ok(()),
197 Err(actual_state) => cur_state = actual_state,
198 }
199 }
200 }
201
202 /// Fires the timer, setting the result to the provided result.
203 ///
204 /// Returns:
205 /// * `Some(waker)` - if fired and a waker needs to be invoked once the
206 /// driver lock is released
207 /// * `None` - if fired and a waker does not need to be invoked, or if
208 /// already fired
209 ///
210 /// SAFETY: The driver lock must be held.
211 unsafe fn fire(&self, result: TimerResult) -> Option<Waker> {
212 // Quick initial check to see if the timer is already fired. Since
213 // firing the timer can only happen with the driver lock held, we know
214 // we shouldn't be able to "miss" a transition to a fired state, even
215 // with relaxed ordering.
216 let cur_state = self.state.load(Ordering::Relaxed);
217 if cur_state == STATE_DEREGISTERED {
218 return None;
219 }
220
221 // SAFETY: We assume the driver lock is held and the timer is not
222 // fired, so only the driver is accessing this field.
223 //
224 // We perform a release-ordered store to state below, to ensure this
225 // write is visible before the state update is visible.
226 unsafe { self.result.with_mut(|p| *p = result) };
227
228 self.state.store(STATE_DEREGISTERED, Ordering::Release);
229
230 self.waker.take_waker()
231 }
232
233 /// Marks the timer as registered (poll will return None) and sets the
234 /// expiration time.
235 ///
236 /// While this function is memory-safe, it should only be called from a
237 /// context holding both `&mut TimerEntry` and the driver lock.
238 fn set_expiration(&self, timestamp: u64) {
239 debug_assert!(timestamp < STATE_MIN_VALUE);
240
241 // We can use relaxed ordering because we hold the driver lock and will
242 // fence when we release the lock.
243 self.state.store(timestamp, Ordering::Relaxed);
244 }
245
246 /// Attempts to adjust the timer to a new timestamp.
247 ///
248 /// If the timer has already been fired, is pending firing, or the new
249 /// timestamp is earlier than the old timestamp, (or occasionally
250 /// spuriously) returns Err without changing the timer's state. In this
251 /// case, the timer must be deregistered and re-registered.
252 fn extend_expiration(&self, new_timestamp: u64) -> Result<(), ()> {
253 let mut prior = self.state.load(Ordering::Relaxed);
254 loop {
255 if new_timestamp < prior || prior >= STATE_MIN_VALUE {
256 return Err(());
257 }
258
259 match self.state.compare_exchange_weak(
260 prior,
261 new_timestamp,
262 Ordering::AcqRel,
263 Ordering::Acquire,
264 ) {
265 Ok(_) => return Ok(()),
266 Err(true_prior) => prior = true_prior,
267 }
268 }
269 }
270
271 /// Returns true if the state of this timer indicates that the timer might
272 /// be registered with the driver. This check is performed with relaxed
273 /// ordering, but is conservative - if it returns false, the timer is
274 /// definitely _not_ registered.
275 pub(super) fn might_be_registered(&self) -> bool {
276 self.state.load(Ordering::Relaxed) != u64::MAX
277 }
278}
279
280/// A timer entry.
281///
282/// This is the handle to a timer that is controlled by the requester of the
283/// timer. As this participates in intrusive data structures, it must be pinned
284/// before polling.
285#[derive(Debug)]
286pub(crate) struct TimerEntry {
287 /// Arc reference to the runtime handle. We can only free the driver after
288 /// deregistering everything from their respective timer wheels.
289 driver: scheduler::Handle,
290 /// Shared inner structure; this is part of an intrusive linked list, and
291 /// therefore other references can exist to it while mutable references to
292 /// Entry exist.
293 ///
294 /// This is manipulated only under the inner mutex. TODO: Can we use loom
295 /// cells for this?
296 inner: StdUnsafeCell<Option<TimerShared>>,
297 /// Deadline for the timer. This is used to register on the first
298 /// poll, as we can't register prior to being pinned.
299 deadline: Instant,
300 /// Whether the deadline has been registered.
301 registered: bool,
302 /// Ensure the type is !Unpin
303 _m: std::marker::PhantomPinned,
304}
305
306unsafe impl Send for TimerEntry {}
307unsafe impl Sync for TimerEntry {}
308
309/// An `TimerHandle` is the (non-enforced) "unique" pointer from the driver to the
310/// timer entry. Generally, at most one `TimerHandle` exists for a timer at a time
311/// (enforced by the timer state machine).
312///
313/// SAFETY: An `TimerHandle` is essentially a raw pointer, and the usual caveats
314/// of pointer safety apply. In particular, `TimerHandle` does not itself enforce
315/// that the timer does still exist; however, normally an `TimerHandle` is created
316/// immediately before registering the timer, and is consumed when firing the
317/// timer, to help minimize mistakes. Still, because `TimerHandle` cannot enforce
318/// memory safety, all operations are unsafe.
319#[derive(Debug)]
320pub(crate) struct TimerHandle {
321 inner: NonNull<TimerShared>,
322}
323
324pub(super) type EntryList = crate::util::linked_list::LinkedList<TimerShared, TimerShared>;
325
326/// The shared state structure of a timer. This structure is shared between the
327/// frontend (`Entry`) and driver backend.
328///
329/// Note that this structure is located inside the `TimerEntry` structure.
330pub(crate) struct TimerShared {
331 /// A link within the doubly-linked list of timers on a particular level and
332 /// slot. Valid only if state is equal to Registered.
333 ///
334 /// Only accessed under the entry lock.
335 pointers: linked_list::Pointers<TimerShared>,
336
337 /// The expiration time for which this entry is currently registered.
338 /// Generally owned by the driver, but is accessed by the entry when not
339 /// registered.
340 cached_when: AtomicU64,
341
342 /// Current state. This records whether the timer entry is currently under
343 /// the ownership of the driver, and if not, its current state (not
344 /// complete, fired, error, etc).
345 state: StateCell,
346
347 _p: PhantomPinned,
348}
349
350unsafe impl Send for TimerShared {}
351unsafe impl Sync for TimerShared {}
352
353impl std::fmt::Debug for TimerShared {
354 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
355 f.debug_struct("TimerShared")
356 .field("cached_when", &self.cached_when.load(Ordering::Relaxed))
357 .field("state", &self.state)
358 .finish()
359 }
360}
361
362generate_addr_of_methods! {
363 impl<> TimerShared {
364 unsafe fn addr_of_pointers(self: NonNull<Self>) -> NonNull<linked_list::Pointers<TimerShared>> {
365 &self.pointers
366 }
367 }
368}
369
370impl TimerShared {
371 pub(super) fn new() -> Self {
372 Self {
373 cached_when: AtomicU64::new(0),
374 pointers: linked_list::Pointers::new(),
375 state: StateCell::default(),
376 _p: PhantomPinned,
377 }
378 }
379
380 /// Gets the cached time-of-expiration value.
381 pub(super) fn cached_when(&self) -> u64 {
382 // Cached-when is only accessed under the driver lock, so we can use relaxed
383 self.cached_when.load(Ordering::Relaxed)
384 }
385
386 /// Gets the true time-of-expiration value, and copies it into the cached
387 /// time-of-expiration value.
388 ///
389 /// SAFETY: Must be called with the driver lock held, and when this entry is
390 /// not in any timer wheel lists.
391 pub(super) unsafe fn sync_when(&self) -> u64 {
392 let true_when = self.true_when();
393
394 self.cached_when.store(true_when, Ordering::Relaxed);
395
396 true_when
397 }
398
399 /// Sets the cached time-of-expiration value.
400 ///
401 /// SAFETY: Must be called with the driver lock held, and when this entry is
402 /// not in any timer wheel lists.
403 unsafe fn set_cached_when(&self, when: u64) {
404 self.cached_when.store(when, Ordering::Relaxed);
405 }
406
407 /// Returns the true time-of-expiration value, with relaxed memory ordering.
408 pub(super) fn true_when(&self) -> u64 {
409 self.state.when().expect("Timer already fired")
410 }
411
412 /// Sets the true time-of-expiration value, even if it is less than the
413 /// current expiration or the timer is deregistered.
414 ///
415 /// SAFETY: Must only be called with the driver lock held and the entry not
416 /// in the timer wheel.
417 pub(super) unsafe fn set_expiration(&self, t: u64) {
418 self.state.set_expiration(t);
419 self.cached_when.store(t, Ordering::Relaxed);
420 }
421
422 /// Sets the true time-of-expiration only if it is after the current.
423 pub(super) fn extend_expiration(&self, t: u64) -> Result<(), ()> {
424 self.state.extend_expiration(t)
425 }
426
427 /// Returns a `TimerHandle` for this timer.
428 pub(super) fn handle(&self) -> TimerHandle {
429 TimerHandle {
430 inner: NonNull::from(self),
431 }
432 }
433
434 /// Returns true if the state of this timer indicates that the timer might
435 /// be registered with the driver. This check is performed with relaxed
436 /// ordering, but is conservative - if it returns false, the timer is
437 /// definitely _not_ registered.
438 pub(super) fn might_be_registered(&self) -> bool {
439 self.state.might_be_registered()
440 }
441}
442
443unsafe impl linked_list::Link for TimerShared {
444 type Handle = TimerHandle;
445
446 type Target = TimerShared;
447
448 fn as_raw(handle: &Self::Handle) -> NonNull<Self::Target> {
449 handle.inner
450 }
451
452 unsafe fn from_raw(ptr: NonNull<Self::Target>) -> Self::Handle {
453 TimerHandle { inner: ptr }
454 }
455
456 unsafe fn pointers(
457 target: NonNull<Self::Target>,
458 ) -> NonNull<linked_list::Pointers<Self::Target>> {
459 TimerShared::addr_of_pointers(target)
460 }
461}
462
463// ===== impl Entry =====
464
465impl TimerEntry {
466 #[track_caller]
467 pub(crate) fn new(handle: scheduler::Handle, deadline: Instant) -> Self {
468 // Panic if the time driver is not enabled
469 let _ = handle.driver().time();
470
471 Self {
472 driver: handle,
473 inner: StdUnsafeCell::new(None),
474 deadline,
475 registered: false,
476 _m: std::marker::PhantomPinned,
477 }
478 }
479
480 fn is_inner_init(&self) -> bool {
481 unsafe { &*self.inner.get() }.is_some()
482 }
483
484 // This lazy initialization is for performance purposes.
485 fn inner(&self) -> &TimerShared {
486 let inner = unsafe { &*self.inner.get() };
487 if inner.is_none() {
488 unsafe {
489 *self.inner.get() = Some(TimerShared::new());
490 }
491 }
492 return inner.as_ref().unwrap();
493 }
494
495 pub(crate) fn deadline(&self) -> Instant {
496 self.deadline
497 }
498
499 pub(crate) fn is_elapsed(&self) -> bool {
500 self.is_inner_init() && !self.inner().state.might_be_registered() && self.registered
501 }
502
503 /// Cancels and deregisters the timer. This operation is irreversible.
504 pub(crate) fn cancel(self: Pin<&mut Self>) {
505 // Avoid calling the `clear_entry` method, because it has not been initialized yet.
506 if !self.is_inner_init() {
507 return;
508 }
509 // We need to perform an acq/rel fence with the driver thread, and the
510 // simplest way to do so is to grab the driver lock.
511 //
512 // Why is this necessary? We're about to release this timer's memory for
513 // some other non-timer use. However, we've been doing a bunch of
514 // relaxed (or even non-atomic) writes from the driver thread, and we'll
515 // be doing more from _this thread_ (as this memory is interpreted as
516 // something else).
517 //
518 // It is critical to ensure that, from the point of view of the driver,
519 // those future non-timer writes happen-after the timer is fully fired,
520 // and from the purpose of this thread, the driver's writes all
521 // happen-before we drop the timer. This in turn requires us to perform
522 // an acquire-release barrier in _both_ directions between the driver
523 // and dropping thread.
524 //
525 // The lock acquisition in clear_entry serves this purpose. All of the
526 // driver manipulations happen with the lock held, so we can just take
527 // the lock and be sure that this drop happens-after everything the
528 // driver did so far and happens-before everything the driver does in
529 // the future. While we have the lock held, we also go ahead and
530 // deregister the entry if necessary.
531 unsafe { self.driver().clear_entry(NonNull::from(self.inner())) };
532 }
533
534 pub(crate) fn reset(mut self: Pin<&mut Self>, new_time: Instant, reregister: bool) {
535 let this = unsafe { self.as_mut().get_unchecked_mut() };
536 this.deadline = new_time;
537 this.registered = reregister;
538
539 let tick = self.driver().time_source().deadline_to_tick(new_time);
540
541 if self.inner().extend_expiration(tick).is_ok() {
542 return;
543 }
544
545 if reregister {
546 unsafe {
547 self.driver()
548 .reregister(&self.driver.driver().io, tick, self.inner().into());
549 }
550 }
551 }
552
553 pub(crate) fn poll_elapsed(
554 mut self: Pin<&mut Self>,
555 cx: &mut Context<'_>,
556 ) -> Poll<Result<(), super::Error>> {
557 assert!(
558 !self.driver().is_shutdown(),
559 "{}",
560 crate::util::error::RUNTIME_SHUTTING_DOWN_ERROR
561 );
562
563 if !self.registered {
564 let deadline = self.deadline;
565 self.as_mut().reset(deadline, true);
566 }
567
568 self.inner().state.poll(cx.waker())
569 }
570
571 pub(crate) fn driver(&self) -> &super::Handle {
572 self.driver.driver().time()
573 }
574
575 #[cfg(all(tokio_unstable, feature = "tracing"))]
576 pub(crate) fn clock(&self) -> &super::Clock {
577 self.driver.driver().clock()
578 }
579}
580
581impl TimerHandle {
582 pub(super) unsafe fn cached_when(&self) -> u64 {
583 unsafe { self.inner.as_ref().cached_when() }
584 }
585
586 pub(super) unsafe fn sync_when(&self) -> u64 {
587 unsafe { self.inner.as_ref().sync_when() }
588 }
589
590 pub(super) unsafe fn is_pending(&self) -> bool {
591 unsafe { self.inner.as_ref().state.is_pending() }
592 }
593
594 /// Forcibly sets the true and cached expiration times to the given tick.
595 ///
596 /// SAFETY: The caller must ensure that the handle remains valid, the driver
597 /// lock is held, and that the timer is not in any wheel linked lists.
598 pub(super) unsafe fn set_expiration(&self, tick: u64) {
599 self.inner.as_ref().set_expiration(tick);
600 }
601
602 /// Attempts to mark this entry as pending. If the expiration time is after
603 /// `not_after`, however, returns an Err with the current expiration time.
604 ///
605 /// If an `Err` is returned, the `cached_when` value will be updated to this
606 /// new expiration time.
607 ///
608 /// SAFETY: The caller must ensure that the handle remains valid, the driver
609 /// lock is held, and that the timer is not in any wheel linked lists.
610 /// After returning Ok, the entry must be added to the pending list.
611 pub(super) unsafe fn mark_pending(&self, not_after: u64) -> Result<(), u64> {
612 match self.inner.as_ref().state.mark_pending(not_after) {
613 Ok(()) => {
614 // mark this as being on the pending queue in cached_when
615 self.inner.as_ref().set_cached_when(u64::MAX);
616 Ok(())
617 }
618 Err(tick) => {
619 self.inner.as_ref().set_cached_when(tick);
620 Err(tick)
621 }
622 }
623 }
624
625 /// Attempts to transition to a terminal state. If the state is already a
626 /// terminal state, does nothing.
627 ///
628 /// Because the entry might be dropped after the state is moved to a
629 /// terminal state, this function consumes the handle to ensure we don't
630 /// access the entry afterwards.
631 ///
632 /// Returns the last-registered waker, if any.
633 ///
634 /// SAFETY: The driver lock must be held while invoking this function, and
635 /// the entry must not be in any wheel linked lists.
636 pub(super) unsafe fn fire(self, completed_state: TimerResult) -> Option<Waker> {
637 self.inner.as_ref().state.fire(completed_state)
638 }
639}
640
641impl Drop for TimerEntry {
642 fn drop(&mut self) {
643 unsafe { Pin::new_unchecked(self) }.as_mut().cancel();
644 }
645}