cuprate_database/backend/heed/
env.rs

1//! Implementation of `trait Env` for `heed`.
2
3//---------------------------------------------------------------------------------------------------- Import
4use std::{
5    cell::RefCell,
6    num::NonZeroUsize,
7    sync::{RwLock, RwLockReadGuard},
8};
9
10use heed::{DatabaseFlags, EnvFlags, EnvOpenOptions};
11
12use crate::{
13    backend::heed::{
14        database::{HeedTableRo, HeedTableRw},
15        storable::StorableHeed,
16        types::HeedDb,
17    },
18    config::{Config, SyncMode},
19    database::{DatabaseIter, DatabaseRo, DatabaseRw},
20    env::{Env, EnvInner},
21    error::{DbResult, InitError, RuntimeError},
22    key::{Key, KeyCompare},
23    resize::ResizeAlgorithm,
24    table::Table,
25};
26
27//---------------------------------------------------------------------------------------------------- ConcreteEnv
28/// A strongly typed, concrete database environment, backed by `heed`.
29pub struct ConcreteEnv {
30    /// The actual database environment.
31    ///
32    /// # Why `RwLock`?
33    /// We need mutual exclusive access to the environment for resizing.
34    ///
35    /// Using 2 atomics for mutual exclusion was considered:
36    /// - `currently_resizing: AtomicBool`
37    /// - `reader_count: AtomicUsize`
38    ///
39    /// This is how `monerod` does it:
40    /// <https://github.com/monero-project/monero/blob/059028a30a8ae9752338a7897329fe8012a310d5/src/blockchain_db/lmdb/db_lmdb.cpp#L354-L355>
41    ///
42    /// `currently_resizing` would be set to `true` on resizes and
43    /// `reader_count` would be spinned on until 0, at which point
44    /// we are safe to resize.
45    ///
46    /// Although, 3 atomic operations (check atomic bool, `reader_count++`, `reader_count--`)
47    /// turns out to be roughly as expensive as acquiring a non-contended `RwLock`,
48    /// the CPU sleeping instead of spinning is much better too.
49    ///
50    /// # `unwrap()`
51    /// This will be [`unwrap()`]ed everywhere.
52    ///
53    /// If lock is poisoned, we want all of Cuprate to panic.
54    env: RwLock<heed::Env>,
55
56    /// The configuration we were opened with
57    /// (and in current use).
58    pub(super) config: Config,
59}
60
61impl Drop for ConcreteEnv {
62    fn drop(&mut self) {
63        // INVARIANT: drop(ConcreteEnv) must sync.
64        //
65        // SOMEDAY:
66        // "if the environment has the MDB_NOSYNC flag set the flushes will be omitted,
67        // and with MDB_MAPASYNC they will be asynchronous."
68        // <http://www.lmdb.tech/doc/group__mdb.html#ga85e61f05aa68b520cc6c3b981dba5037>
69        //
70        // We need to do `mdb_env_set_flags(&env, MDB_NOSYNC|MDB_ASYNCMAP, 0)`
71        // to clear the no sync and async flags such that the below `self.sync()`
72        // _actually_ synchronously syncs.
73        if let Err(_e) = Env::sync(self) {
74            // TODO: log error?
75        }
76
77        // TODO: log that we are dropping the database.
78
79        // TODO: use tracing.
80        // <https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/lmdb.h#L49-L61>
81        let result = self.env.read().unwrap().clear_stale_readers();
82        match result {
83            Ok(n) => println!("LMDB stale readers cleared: {n}"),
84            Err(e) => println!("LMDB stale reader clear error: {e:?}"),
85        }
86    }
87}
88
89//---------------------------------------------------------------------------------------------------- Env Impl
90impl Env for ConcreteEnv {
91    const MANUAL_RESIZE: bool = true;
92    const SYNCS_PER_TX: bool = false;
93    type EnvInner<'env> = RwLockReadGuard<'env, heed::Env>;
94    type TxRo<'tx> = heed::RoTxn<'tx>;
95
96    /// HACK:
97    /// `heed::RwTxn` is wrapped in `RefCell` to allow:
98    /// - opening a database with only a `&` to it
99    /// - allowing 1 write tx to open multiple tables
100    ///
101    /// Our mutable accesses are safe and will not panic as:
102    /// - Write transactions are `!Sync`
103    /// - A table operation does not hold a reference to the inner cell
104    ///   once the call is over
105    /// - The function to manipulate the table takes the same type
106    ///   of reference that the `RefCell` gets for that function
107    ///
108    /// Also see:
109    /// - <https://github.com/Cuprate/cuprate/pull/102#discussion_r1548695610>
110    /// - <https://github.com/Cuprate/cuprate/pull/104>
111    type TxRw<'tx> = RefCell<heed::RwTxn<'tx>>;
112
113    #[cold]
114    #[inline(never)] // called once.
115    fn open(config: Config) -> Result<Self, InitError> {
116        // <https://github.com/monero-project/monero/blob/059028a30a8ae9752338a7897329fe8012a310d5/src/blockchain_db/lmdb/db_lmdb.cpp#L1324>
117
118        let mut env_open_options = EnvOpenOptions::new();
119
120        // Map our `Config` sync mode to the LMDB environment flags.
121        //
122        // <https://github.com/monero-project/monero/blob/059028a30a8ae9752338a7897329fe8012a310d5/src/blockchain_db/lmdb/db_lmdb.cpp#L1324>
123        let flags = match config.sync_mode {
124            SyncMode::Safe => EnvFlags::empty(),
125            // TODO: impl `FastThenSafe`
126            SyncMode::FastThenSafe | SyncMode::Fast => {
127                EnvFlags::NO_SYNC | EnvFlags::WRITE_MAP | EnvFlags::MAP_ASYNC
128            }
129        };
130
131        // SAFETY: the flags we're setting are 'unsafe'
132        // from a data durability perspective, although,
133        // the user config wanted this.
134        //
135        // MAYBE: We may need to open/create tables with certain flags
136        // <https://github.com/monero-project/monero/blob/059028a30a8ae9752338a7897329fe8012a310d5/src/blockchain_db/lmdb/db_lmdb.cpp#L1324>
137        // MAYBE: Set comparison functions for certain tables
138        // <https://github.com/monero-project/monero/blob/059028a30a8ae9752338a7897329fe8012a310d5/src/blockchain_db/lmdb/db_lmdb.cpp#L1324>
139        unsafe {
140            env_open_options.flags(flags | EnvFlags::NO_READ_AHEAD);
141        }
142
143        // Set the memory map size to
144        // (current disk size) + (a bit of leeway)
145        // to account for empty databases where we
146        // need to write same tables.
147        #[expect(clippy::cast_possible_truncation, reason = "only 64-bit targets")]
148        let disk_size_bytes = match std::fs::File::open(&config.db_file) {
149            Ok(file) => file.metadata()?.len() as usize,
150            // The database file doesn't exist, 0 bytes.
151            Err(io_err) if io_err.kind() == std::io::ErrorKind::NotFound => 0,
152            Err(io_err) => return Err(io_err.into()),
153        };
154        // Add leeway space.
155        let memory_map_size = crate::resize::fixed_bytes(disk_size_bytes, 1_000_000 /* 1MB */);
156        env_open_options.map_size(memory_map_size.get());
157
158        // Set the max amount of database tables.
159        // We know at compile time how many tables there are.
160        // SOMEDAY: ...how many?
161        env_open_options.max_dbs(32);
162
163        // LMDB documentation:
164        // ```
165        // Number of slots in the reader table.
166        // This value was chosen somewhat arbitrarily. 126 readers plus a
167        // couple mutexes fit exactly into 8KB on my development machine.
168        // ```
169        // <https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/mdb.c#L794-L799>
170        //
171        // So, we're going to be following these rules:
172        // - Use at least 126 reader threads
173        // - Add 16 extra reader threads if <126
174        //
175        // FIXME: This behavior is from `monerod`:
176        // <https://github.com/monero-project/monero/blob/059028a30a8ae9752338a7897329fe8012a310d5/src/blockchain_db/lmdb/db_lmdb.cpp#L1324>
177        // I believe this could be adjusted percentage-wise so very high
178        // thread PCs can benefit from something like (cuprated + anything that uses the DB in the future).
179        // For now:
180        // - No other program using our DB exists
181        // - Almost no-one has a 126+ thread CPU
182        let reader_threads = u32::try_from(config.reader_threads.get()).unwrap_or(u32::MAX);
183        env_open_options.max_readers(if reader_threads < 110 {
184            126
185        } else {
186            reader_threads.saturating_add(16)
187        });
188
189        // Create the database directory if it doesn't exist.
190        std::fs::create_dir_all(config.db_directory())?;
191        // Open the environment in the user's PATH.
192        // SAFETY: LMDB uses a memory-map backed file.
193        // <https://docs.rs/heed/0.20.0/heed/struct.EnvOpenOptions.html#method.open>
194        let env = unsafe { env_open_options.open(config.db_directory())? };
195
196        Ok(Self {
197            env: RwLock::new(env),
198            config,
199        })
200    }
201
202    fn config(&self) -> &Config {
203        &self.config
204    }
205
206    fn sync(&self) -> DbResult<()> {
207        Ok(self.env.read().unwrap().force_sync()?)
208    }
209
210    fn resize_map(&self, resize_algorithm: Option<ResizeAlgorithm>) -> NonZeroUsize {
211        let resize_algorithm = resize_algorithm.unwrap_or_else(|| self.config().resize_algorithm);
212
213        let current_size_bytes = self.current_map_size();
214        let new_size_bytes = resize_algorithm.resize(current_size_bytes);
215
216        // SAFETY:
217        // Resizing requires that we have
218        // exclusive access to the database environment.
219        // Our `heed::Env` is wrapped within a `RwLock`,
220        // and we have a WriteGuard to it, so we're safe.
221        //
222        // <http://www.lmdb.tech/doc/group__mdb.html#gaa2506ec8dab3d969b0e609cd82e619e5>
223        unsafe {
224            // INVARIANT: `resize()` returns a valid `usize` to resize to.
225            self.env
226                .write()
227                .unwrap()
228                .resize(new_size_bytes.get())
229                .unwrap();
230        }
231
232        new_size_bytes
233    }
234
235    #[inline]
236    fn current_map_size(&self) -> usize {
237        self.env.read().unwrap().info().map_size
238    }
239
240    #[inline]
241    fn env_inner(&self) -> Self::EnvInner<'_> {
242        self.env.read().unwrap()
243    }
244}
245
246//---------------------------------------------------------------------------------------------------- EnvInner Impl
247impl<'env> EnvInner<'env> for RwLockReadGuard<'env, heed::Env>
248where
249    Self: 'env,
250{
251    type Ro<'a> = heed::RoTxn<'a>;
252
253    type Rw<'a> = RefCell<heed::RwTxn<'a>>;
254
255    #[inline]
256    fn tx_ro(&self) -> DbResult<Self::Ro<'_>> {
257        Ok(self.read_txn()?)
258    }
259
260    #[inline]
261    fn tx_rw(&self) -> DbResult<Self::Rw<'_>> {
262        Ok(RefCell::new(self.write_txn()?))
263    }
264
265    #[inline]
266    fn open_db_ro<T: Table>(
267        &self,
268        tx_ro: &Self::Ro<'_>,
269    ) -> DbResult<impl DatabaseRo<T> + DatabaseIter<T>> {
270        // Open up a read-only database using our table's const metadata.
271        //
272        // INVARIANT: LMDB caches the ordering / comparison function from [`EnvInner::create_db`],
273        // and we're relying on that since we aren't setting that here.
274        // <https://github.com/Cuprate/cuprate/pull/198#discussion_r1659422277>
275        Ok(HeedTableRo {
276            db: self
277                .open_database(tx_ro, Some(T::NAME))?
278                .ok_or(RuntimeError::TableNotFound)?,
279            tx_ro,
280        })
281    }
282
283    #[inline]
284    fn open_db_rw<T: Table>(&self, tx_rw: &Self::Rw<'_>) -> DbResult<impl DatabaseRw<T>> {
285        // Open up a read/write database using our table's const metadata.
286        //
287        // INVARIANT: LMDB caches the ordering / comparison function from [`EnvInner::create_db`],
288        // and we're relying on that since we aren't setting that here.
289        // <https://github.com/Cuprate/cuprate/pull/198#discussion_r1659422277>
290        Ok(HeedTableRw {
291            db: self.create_database(&mut tx_rw.borrow_mut(), Some(T::NAME))?,
292            tx_rw,
293        })
294    }
295
296    fn create_db<T: Table>(&self, tx_rw: &Self::Rw<'_>) -> DbResult<()> {
297        // Create a database using our:
298        // - [`Table`]'s const metadata.
299        // - (potentially) our [`Key`] comparison function
300        let mut tx_rw = tx_rw.borrow_mut();
301        let mut db = self.database_options();
302        db.name(T::NAME);
303
304        // Set the key comparison behavior.
305        match <T::Key>::KEY_COMPARE {
306            // Use LMDB's default comparison function.
307            KeyCompare::Default => {
308                db.create(&mut tx_rw)?;
309            }
310
311            // Instead of setting a custom [`heed::Comparator`],
312            // use this LMDB flag; it is ~10% faster.
313            KeyCompare::Number => {
314                db.flags(DatabaseFlags::INTEGER_KEY).create(&mut tx_rw)?;
315            }
316
317            // Use a custom comparison function if specified.
318            KeyCompare::Custom(_) => {
319                db.key_comparator::<StorableHeed<T::Key>>()
320                    .create(&mut tx_rw)?;
321            }
322        }
323
324        Ok(())
325    }
326
327    #[inline]
328    fn clear_db<T: Table>(&self, tx_rw: &mut Self::Rw<'_>) -> DbResult<()> {
329        let tx_rw = tx_rw.get_mut();
330
331        // Open the table. We don't care about flags or key
332        // comparison behavior since we're clearing it anyway.
333        let db: HeedDb<T::Key, T::Value> = self
334            .open_database(tx_rw, Some(T::NAME))?
335            .ok_or(RuntimeError::TableNotFound)?;
336
337        db.clear(tx_rw)?;
338
339        Ok(())
340    }
341}
342
343//---------------------------------------------------------------------------------------------------- Tests
344#[cfg(test)]
345mod tests {}