cuprate_database/backend/heed/env.rs
1//! Implementation of `trait Env` for `heed`.
2
3//---------------------------------------------------------------------------------------------------- Import
4use std::{
5 cell::RefCell,
6 num::NonZeroUsize,
7 sync::{RwLock, RwLockReadGuard},
8};
9
10use heed::{DatabaseFlags, EnvFlags, EnvOpenOptions};
11use tracing::{debug, warn};
12
13use crate::{
14 backend::heed::{
15 database::{HeedTableRo, HeedTableRw},
16 storable::StorableHeed,
17 types::HeedDb,
18 },
19 config::{Config, SyncMode},
20 database::{DatabaseIter, DatabaseRo, DatabaseRw},
21 env::{Env, EnvInner},
22 error::{DbResult, InitError, RuntimeError},
23 key::{Key, KeyCompare},
24 resize::ResizeAlgorithm,
25 table::Table,
26};
27
28//---------------------------------------------------------------------------------------------------- ConcreteEnv
29/// A strongly typed, concrete database environment, backed by `heed`.
30pub struct ConcreteEnv {
31 /// The actual database environment.
32 ///
33 /// # Why `RwLock`?
34 /// We need mutual exclusive access to the environment for resizing.
35 ///
36 /// Using 2 atomics for mutual exclusion was considered:
37 /// - `currently_resizing: AtomicBool`
38 /// - `reader_count: AtomicUsize`
39 ///
40 /// This is how `monerod` does it:
41 /// <https://github.com/monero-project/monero/blob/059028a30a8ae9752338a7897329fe8012a310d5/src/blockchain_db/lmdb/db_lmdb.cpp#L354-L355>
42 ///
43 /// `currently_resizing` would be set to `true` on resizes and
44 /// `reader_count` would be spinned on until 0, at which point
45 /// we are safe to resize.
46 ///
47 /// Although, 3 atomic operations (check atomic bool, `reader_count++`, `reader_count--`)
48 /// turns out to be roughly as expensive as acquiring a non-contended `RwLock`,
49 /// the CPU sleeping instead of spinning is much better too.
50 ///
51 /// # `unwrap()`
52 /// This will be [`unwrap()`]ed everywhere.
53 ///
54 /// If lock is poisoned, we want all of Cuprate to panic.
55 env: RwLock<heed::Env>,
56
57 /// The configuration we were opened with
58 /// (and in current use).
59 pub(super) config: Config,
60}
61
62impl Drop for ConcreteEnv {
63 fn drop(&mut self) {
64 // INVARIANT: drop(ConcreteEnv) must sync.
65 //
66 // SOMEDAY:
67 // "if the environment has the MDB_NOSYNC flag set the flushes will be omitted,
68 // and with MDB_MAPASYNC they will be asynchronous."
69 // <http://www.lmdb.tech/doc/group__mdb.html#ga85e61f05aa68b520cc6c3b981dba5037>
70 //
71 // We need to do `mdb_env_set_flags(&env, MDB_NOSYNC|MDB_ASYNCMAP, 0)`
72 // to clear the no sync and async flags such that the below `self.sync()`
73 // _actually_ synchronously syncs.
74 if let Err(e) = Env::sync(self) {
75 warn!("Env sync error: {e}");
76 }
77
78 // <https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/lmdb.h#L49-L61>
79 let result = self.env.read().unwrap().clear_stale_readers();
80
81 match result {
82 Ok(n) => debug!("LMDB stale readers cleared: {n}"),
83 Err(e) => debug!("LMDB stale reader clear error: {e:?}"),
84 }
85 }
86}
87
88//---------------------------------------------------------------------------------------------------- Env Impl
89impl Env for ConcreteEnv {
90 const MANUAL_RESIZE: bool = true;
91 const SYNCS_PER_TX: bool = false;
92 type EnvInner<'env> = RwLockReadGuard<'env, heed::Env>;
93 type TxRo<'tx> = heed::RoTxn<'tx>;
94
95 /// HACK:
96 /// `heed::RwTxn` is wrapped in `RefCell` to allow:
97 /// - opening a database with only a `&` to it
98 /// - allowing 1 write tx to open multiple tables
99 ///
100 /// Our mutable accesses are safe and will not panic as:
101 /// - Write transactions are `!Sync`
102 /// - A table operation does not hold a reference to the inner cell
103 /// once the call is over
104 /// - The function to manipulate the table takes the same type
105 /// of reference that the `RefCell` gets for that function
106 ///
107 /// Also see:
108 /// - <https://github.com/Cuprate/cuprate/pull/102#discussion_r1548695610>
109 /// - <https://github.com/Cuprate/cuprate/pull/104>
110 type TxRw<'tx> = RefCell<heed::RwTxn<'tx>>;
111
112 #[cold]
113 #[inline(never)] // called once.
114 fn open(config: Config) -> Result<Self, InitError> {
115 // <https://github.com/monero-project/monero/blob/059028a30a8ae9752338a7897329fe8012a310d5/src/blockchain_db/lmdb/db_lmdb.cpp#L1324>
116
117 let mut env_open_options = EnvOpenOptions::new();
118
119 // Map our `Config` sync mode to the LMDB environment flags.
120 //
121 // <https://github.com/monero-project/monero/blob/059028a30a8ae9752338a7897329fe8012a310d5/src/blockchain_db/lmdb/db_lmdb.cpp#L1324>
122 let flags = match config.sync_mode {
123 SyncMode::Safe => EnvFlags::empty(),
124 // TODO: impl `FastThenSafe`
125 SyncMode::FastThenSafe | SyncMode::Fast => {
126 EnvFlags::NO_SYNC | EnvFlags::WRITE_MAP | EnvFlags::MAP_ASYNC
127 }
128 };
129
130 // SAFETY: the flags we're setting are 'unsafe'
131 // from a data durability perspective, although,
132 // the user config wanted this.
133 //
134 // MAYBE: We may need to open/create tables with certain flags
135 // <https://github.com/monero-project/monero/blob/059028a30a8ae9752338a7897329fe8012a310d5/src/blockchain_db/lmdb/db_lmdb.cpp#L1324>
136 // MAYBE: Set comparison functions for certain tables
137 // <https://github.com/monero-project/monero/blob/059028a30a8ae9752338a7897329fe8012a310d5/src/blockchain_db/lmdb/db_lmdb.cpp#L1324>
138 unsafe {
139 env_open_options.flags(flags | EnvFlags::NO_READ_AHEAD);
140 }
141
142 // Set the memory map size to
143 // (current disk size) + (a bit of leeway)
144 // to account for empty databases where we
145 // need to write same tables.
146 #[expect(clippy::cast_possible_truncation, reason = "only 64-bit targets")]
147 let disk_size_bytes = match std::fs::File::open(&config.db_file) {
148 Ok(file) => file.metadata()?.len() as usize,
149 // The database file doesn't exist, 0 bytes.
150 Err(io_err) if io_err.kind() == std::io::ErrorKind::NotFound => 0,
151 Err(io_err) => return Err(io_err.into()),
152 };
153 // Add leeway space.
154 let memory_map_size = crate::resize::fixed_bytes(disk_size_bytes, 1_000_000 /* 1MB */);
155 env_open_options.map_size(memory_map_size.get());
156
157 // Set the max amount of database tables.
158 // We know at compile time how many tables there are.
159 // SOMEDAY: ...how many?
160 env_open_options.max_dbs(32);
161
162 // LMDB documentation:
163 // ```
164 // Number of slots in the reader table.
165 // This value was chosen somewhat arbitrarily. 126 readers plus a
166 // couple mutexes fit exactly into 8KB on my development machine.
167 // ```
168 // <https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/mdb.c#L794-L799>
169 //
170 // So, we're going to be following these rules:
171 // - Use at least 126 reader threads
172 // - Add 16 extra reader threads if <126
173 //
174 // FIXME: This behavior is from `monerod`:
175 // <https://github.com/monero-project/monero/blob/059028a30a8ae9752338a7897329fe8012a310d5/src/blockchain_db/lmdb/db_lmdb.cpp#L1324>
176 // I believe this could be adjusted percentage-wise so very high
177 // thread PCs can benefit from something like (cuprated + anything that uses the DB in the future).
178 // For now:
179 // - No other program using our DB exists
180 // - Almost no-one has a 126+ thread CPU
181 let reader_threads = u32::try_from(config.reader_threads.get()).unwrap_or(u32::MAX);
182 env_open_options.max_readers(if reader_threads < 110 {
183 126
184 } else {
185 reader_threads.saturating_add(16)
186 });
187
188 // Create the database directory if it doesn't exist.
189 std::fs::create_dir_all(config.db_directory())?;
190 // Open the environment in the user's PATH.
191 // SAFETY: LMDB uses a memory-map backed file.
192 // <https://docs.rs/heed/0.20.0/heed/struct.EnvOpenOptions.html#method.open>
193 let env = unsafe { env_open_options.open(config.db_directory())? };
194
195 Ok(Self {
196 env: RwLock::new(env),
197 config,
198 })
199 }
200
201 fn config(&self) -> &Config {
202 &self.config
203 }
204
205 fn sync(&self) -> DbResult<()> {
206 Ok(self.env.read().unwrap().force_sync()?)
207 }
208
209 fn resize_map(&self, resize_algorithm: Option<ResizeAlgorithm>) -> NonZeroUsize {
210 let resize_algorithm = resize_algorithm.unwrap_or_else(|| self.config().resize_algorithm);
211
212 let current_size_bytes = self.current_map_size();
213 let new_size_bytes = resize_algorithm.resize(current_size_bytes);
214
215 // SAFETY:
216 // Resizing requires that we have
217 // exclusive access to the database environment.
218 // Our `heed::Env` is wrapped within a `RwLock`,
219 // and we have a WriteGuard to it, so we're safe.
220 //
221 // <http://www.lmdb.tech/doc/group__mdb.html#gaa2506ec8dab3d969b0e609cd82e619e5>
222 unsafe {
223 // INVARIANT: `resize()` returns a valid `usize` to resize to.
224 self.env
225 .write()
226 .unwrap()
227 .resize(new_size_bytes.get())
228 .unwrap();
229 }
230
231 new_size_bytes
232 }
233
234 #[inline]
235 fn current_map_size(&self) -> usize {
236 self.env.read().unwrap().info().map_size
237 }
238
239 #[inline]
240 fn env_inner(&self) -> Self::EnvInner<'_> {
241 self.env.read().unwrap()
242 }
243}
244
245//---------------------------------------------------------------------------------------------------- EnvInner Impl
246impl<'env> EnvInner<'env> for RwLockReadGuard<'env, heed::Env>
247where
248 Self: 'env,
249{
250 type Ro<'a> = heed::RoTxn<'a>;
251
252 type Rw<'a> = RefCell<heed::RwTxn<'a>>;
253
254 #[inline]
255 fn tx_ro(&self) -> DbResult<Self::Ro<'_>> {
256 Ok(self.read_txn()?)
257 }
258
259 #[inline]
260 fn tx_rw(&self) -> DbResult<Self::Rw<'_>> {
261 Ok(RefCell::new(self.write_txn()?))
262 }
263
264 #[inline]
265 fn open_db_ro<T: Table>(
266 &self,
267 tx_ro: &Self::Ro<'_>,
268 ) -> DbResult<impl DatabaseRo<T> + DatabaseIter<T>> {
269 // Open up a read-only database using our table's const metadata.
270 //
271 // INVARIANT: LMDB caches the ordering / comparison function from [`EnvInner::create_db`],
272 // and we're relying on that since we aren't setting that here.
273 // <https://github.com/Cuprate/cuprate/pull/198#discussion_r1659422277>
274 Ok(HeedTableRo {
275 db: self
276 .open_database(tx_ro, Some(T::NAME))?
277 .ok_or(RuntimeError::TableNotFound)?,
278 tx_ro,
279 })
280 }
281
282 #[inline]
283 fn open_db_rw<T: Table>(&self, tx_rw: &Self::Rw<'_>) -> DbResult<impl DatabaseRw<T>> {
284 // Open up a read/write database using our table's const metadata.
285 //
286 // INVARIANT: LMDB caches the ordering / comparison function from [`EnvInner::create_db`],
287 // and we're relying on that since we aren't setting that here.
288 // <https://github.com/Cuprate/cuprate/pull/198#discussion_r1659422277>
289 Ok(HeedTableRw {
290 db: self.create_database(&mut tx_rw.borrow_mut(), Some(T::NAME))?,
291 tx_rw,
292 })
293 }
294
295 fn create_db<T: Table>(&self, tx_rw: &Self::Rw<'_>) -> DbResult<()> {
296 // Create a database using our:
297 // - [`Table`]'s const metadata.
298 // - (potentially) our [`Key`] comparison function
299 let mut tx_rw = tx_rw.borrow_mut();
300 let mut db = self.database_options();
301 db.name(T::NAME);
302
303 // Set the key comparison behavior.
304 match <T::Key>::KEY_COMPARE {
305 // Use LMDB's default comparison function.
306 KeyCompare::Default => {
307 db.create(&mut tx_rw)?;
308 }
309
310 // Instead of setting a custom [`heed::Comparator`],
311 // use this LMDB flag; it is ~10% faster.
312 KeyCompare::Number => {
313 db.flags(DatabaseFlags::INTEGER_KEY).create(&mut tx_rw)?;
314 }
315
316 // Use a custom comparison function if specified.
317 KeyCompare::Custom(_) => {
318 db.key_comparator::<StorableHeed<T::Key>>()
319 .create(&mut tx_rw)?;
320 }
321 }
322
323 Ok(())
324 }
325
326 #[inline]
327 fn clear_db<T: Table>(&self, tx_rw: &mut Self::Rw<'_>) -> DbResult<()> {
328 let tx_rw = tx_rw.get_mut();
329
330 // Open the table. We don't care about flags or key
331 // comparison behavior since we're clearing it anyway.
332 let db: HeedDb<T::Key, T::Value> = self
333 .open_database(tx_rw, Some(T::NAME))?
334 .ok_or(RuntimeError::TableNotFound)?;
335
336 db.clear(tx_rw)?;
337
338 Ok(())
339 }
340}
341
342//---------------------------------------------------------------------------------------------------- Tests
343#[cfg(test)]
344mod tests {}