heed/
cookbook.rs

Help
1//! A cookbook of examples on how to use heed. Here is the list of the different topics you can learn about:
2//!
3//! - [Decode Values on Demand](#decode-values-on-demand)
4//! - [Listing and Opening the Named Databases](#listing-and-opening-the-named-databases)
5//! - [Create Custom and Prefix Codecs](#create-custom-and-prefix-codecs)
6//! - [Change the Environment Size Dynamically](#change-the-environment-size-dynamically)
7//! - [Advanced Multithreaded Access of Entries](#advanced-multithreaded-access-of-entries)
8//!
9//! # Decode Values on Demand
10//!
11//! Sometimes, you need to iterate on the content of a database and
12//! conditionnaly decode the value depending on the key. You can use the
13//! [`Database::lazily_decode_data`] method to indicate this to heed.
14//!
15//! ```
16//! use std::collections::HashMap;
17//! use std::error::Error;
18//! use std::fs;
19//! use std::path::Path;
20//!
21//! use heed::types::*;
22//! use heed::{Database, EnvOpenOptions};
23//!
24//! pub type StringMap = HashMap<String, String>;
25//!
26//! fn main() -> Result<(), Box<dyn Error + Send + Sync>> {
27//!     let path = Path::new("target").join("heed.mdb");
28//!
29//!     fs::create_dir_all(&path)?;
30//!
31//!     let env = unsafe {
32//!         EnvOpenOptions::new()
33//!             .map_size(1024 * 1024 * 100) // 100 MiB
34//!             .open(&path)?
35//!     };
36//!
37//!     let mut wtxn = env.write_txn()?;
38//!     let db: Database<Str, SerdeJson<StringMap>> = env.create_database(&mut wtxn, None)?;
39//!
40//!     fill_with_data(&mut wtxn, db)?;
41//!
42//!     // We make sure that iterating over this database will
43//!     // not deserialize the values. We just want to decode
44//!     // the value corresponding to 43th key.
45//!     for (i, result) in db.lazily_decode_data().iter(&wtxn)?.enumerate() {
46//!         let (_key, lazy_value) = result?;
47//!         if i == 43 {
48//!             // This is where the magic happens. We receive a Lazy type
49//!             // that wraps a slice of bytes. We can decode on purpose.
50//!             let value = lazy_value.decode()?;
51//!             assert_eq!(value.get("secret"), Some(&String::from("434343")));
52//!             break;
53//!         }
54//!     }
55//!
56//!     Ok(())
57//! }
58//!
59//! fn fill_with_data(
60//!     wtxn: &mut heed::RwTxn,
61//!     db: Database<Str, SerdeJson<StringMap>>,
62//! ) -> heed::Result<()> {
63//!     // This represents a very big value that we only want to decode when necessary.
64//!     let mut big_string_map = HashMap::new();
65//!     big_string_map.insert("key1".into(), "I am a very long string".into());
66//!     big_string_map.insert("key2".into(), "I am a also very long string".into());
67//!
68//!     for i in 0..100 {
69//!         let key = format!("{i:5}");
70//!         big_string_map.insert("secret".into(), format!("{i}{i}{i}"));
71//!         db.put(wtxn, &key, &big_string_map)?;
72//!     }
73//!     Ok(())
74//! }
75//! ```
76//!
77//! # Listing and Opening the Named Databases
78//!
79//! Sometimes it is useful to list the databases available in an environment.
80//! LMDB automatically stores their names in the unnamed database, a database that doesn't
81//! need to be created in which you can write.
82//!
83//! Once you create new databases, after defining the [`EnvOpenOptions::max_dbs`]
84//! parameter, the names of those databases are automatically stored in the unnamed one.
85//!
86//! ```
87//! use std::error::Error;
88//! use std::fs;
89//! use std::path::Path;
90//!
91//! use heed::types::*;
92//! use heed::{Database, EnvOpenOptions};
93//!
94//! fn main() -> Result<(), Box<dyn Error>> {
95//!     let env_path = Path::new("target").join("heed.mdb");
96//!
97//!     fs::create_dir_all(&env_path)?;
98//!
99//!     let env = unsafe {
100//!         EnvOpenOptions::new()
101//!             .map_size(10 * 1024 * 1024) // 10MB
102//!             .max_dbs(3) // Number of opened databases
103//!             .open(env_path)?
104//!     };
105//!
106//!     let rtxn = env.read_txn()?;
107//!     // The database names are mixed with the user entries therefore we prefer
108//!     // ignoring the values and try to open the databases one by one using the keys.
109//!     let unnamed: Database<Str, DecodeIgnore> =
110//!         env.open_database(&rtxn, None)?.expect("the unnamed database always exists");
111//!
112//!     // The unnamed (or main) database contains the other
113//!     // database names associated to empty values.
114//!     for result in unnamed.iter(&rtxn)? {
115//!         let (name, ()) = result?;
116//!
117//!         if let Ok(Some(_db)) = env.open_database::<Str, Bytes>(&rtxn, Some(name)) {
118//!             // We succeeded into opening a new database that
119//!             // contains strings associated to raw bytes.
120//!         }
121//!     }
122//!
123//!     // When opening databases in a read-only transaction
124//!     // you must commit your read transaction to make your
125//!     // freshly opened databases globally available.
126//!     rtxn.commit()?;
127//!
128//!     // If you abort (or drop) your read-only transaction
129//!     // the database handle will be invalid outside
130//!     // the transaction scope.
131//!
132//!     Ok(())
133//! }
134//! ```
135//!
136//! # Create Custom and Prefix Codecs
137//!
138//! With heed you can store any kind of data and serialize it the way you want.
139//! To do so you'll need to create a codec by using the [`BytesEncode`] and [`BytesDecode`] traits.
140//!
141//! Now imagine that your data is lexicographically well ordered. You can now leverage
142//! the use of prefix codecs. Those are classic codecs but are only used to encode key prefixes.
143//!
144//! In this example we will store logs associated to a timestamp. By encoding the timestamp
145//! in big endian we can create a prefix codec that restricts a subset of the data. It is recommended
146//! to create codecs to encode prefixes when possible instead of using a slice of bytes.
147//!
148//! ```
149//! use std::borrow::Cow;
150//! use std::error::Error;
151//! use std::fs;
152//! use std::path::Path;
153//!
154//! use heed::types::*;
155//! use heed::{BoxedError, BytesDecode, BytesEncode, Database, EnvOpenOptions};
156//!
157//! #[derive(Debug, PartialEq, Eq)]
158//! pub enum Level {
159//!     Debug,
160//!     Warn,
161//!     Error,
162//! }
163//!
164//! #[derive(Debug, PartialEq, Eq)]
165//! pub struct LogKey {
166//!     timestamp: u32,
167//!     level: Level,
168//! }
169//!
170//! pub struct LogKeyCodec;
171//!
172//! impl<'a> BytesEncode<'a> for LogKeyCodec {
173//!     type EItem = LogKey;
174//!
175//!     /// Encodes the u32 timestamp in big endian followed by the log level with a single byte.
176//!     fn bytes_encode(log: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
177//!         let (timestamp_bytes, level_byte) = match log {
178//!             LogKey { timestamp, level: Level::Debug } => (timestamp.to_be_bytes(), 0),
179//!             LogKey { timestamp, level: Level::Warn } => (timestamp.to_be_bytes(), 1),
180//!             LogKey { timestamp, level: Level::Error } => (timestamp.to_be_bytes(), 2),
181//!         };
182//!
183//!         let mut output = Vec::new();
184//!         output.extend_from_slice(&timestamp_bytes);
185//!         output.push(level_byte);
186//!         Ok(Cow::Owned(output))
187//!     }
188//! }
189//!
190//! impl<'a> BytesDecode<'a> for LogKeyCodec {
191//!     type DItem = LogKey;
192//!
193//!     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
194//!         use std::mem::size_of;
195//!
196//!         let timestamp = match bytes.get(..size_of::<u32>()) {
197//!             Some(bytes) => bytes.try_into().map(u32::from_be_bytes).unwrap(),
198//!             None => return Err("invalid log key: cannot extract timestamp".into()),
199//!         };
200//!
201//!         let level = match bytes.get(size_of::<u32>()) {
202//!             Some(&0) => Level::Debug,
203//!             Some(&1) => Level::Warn,
204//!             Some(&2) => Level::Error,
205//!             Some(_) => return Err("invalid log key: invalid log level".into()),
206//!             None => return Err("invalid log key: cannot extract log level".into()),
207//!         };
208//!
209//!         Ok(LogKey { timestamp, level })
210//!     }
211//! }
212//!
213//! /// Encodes the high part of a timestamp. As it is located
214//! /// at the start of the key it can be used to only return
215//! /// the logs that appeared during a, rather long, period.
216//! pub struct LogAtHalfTimestampCodec;
217//!
218//! impl<'a> BytesEncode<'a> for LogAtHalfTimestampCodec {
219//!     type EItem = u32;
220//!
221//!     /// This method encodes only the prefix of the keys in this particular case, the timestamp.
222//!     fn bytes_encode(half_timestamp: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
223//!         Ok(Cow::Owned(half_timestamp.to_be_bytes()[..2].to_vec()))
224//!     }
225//! }
226//!
227//! impl<'a> BytesDecode<'a> for LogAtHalfTimestampCodec {
228//!     type DItem = LogKey;
229//!
230//!     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
231//!         LogKeyCodec::bytes_decode(bytes)
232//!     }
233//! }
234//!
235//! fn main() -> Result<(), Box<dyn Error>> {
236//!     let path = Path::new("target").join("heed.mdb");
237//!
238//!     fs::create_dir_all(&path)?;
239//!
240//!     let env = unsafe {
241//!         EnvOpenOptions::new()
242//!             .map_size(10 * 1024 * 1024) // 10MB
243//!             .max_dbs(3000)
244//!             .open(path)?
245//!     };
246//!
247//!     let mut wtxn = env.write_txn()?;
248//!     let db: Database<LogKeyCodec, Str> = env.create_database(&mut wtxn, None)?;
249//!
250//!     db.put(
251//!         &mut wtxn,
252//!         &LogKey { timestamp: 1608326232, level: Level::Debug },
253//!         "this is a very old log",
254//!     )?;
255//!     db.put(
256//!         &mut wtxn,
257//!         &LogKey { timestamp: 1708326232, level: Level::Debug },
258//!         "fibonacci was executed in 21ms",
259//!     )?;
260//!     db.put(&mut wtxn, &LogKey { timestamp: 1708326242, level: Level::Error }, "fibonacci crashed")?;
261//!     db.put(
262//!         &mut wtxn,
263//!         &LogKey { timestamp: 1708326272, level: Level::Warn },
264//!         "fibonacci is running since 12s",
265//!     )?;
266//!
267//!     // We change the way we want to read our database by changing the key codec.
268//!     // In this example we can prefix search only for the logs between a period of time
269//!     // (the two high bytes of the u32 timestamp).
270//!     let iter = db.remap_key_type::<LogAtHalfTimestampCodec>().prefix_iter(&wtxn, &1708326232)?;
271//!
272//!     // As we filtered the log for a specific
273//!     // period of time we must not see the very old log.
274//!     for result in iter {
275//!         let (LogKey { timestamp: _, level: _ }, content) = result?;
276//!         assert_ne!(content, "this is a very old log");
277//!     }
278//!
279//!     Ok(())
280//! }
281//! ```
282//!
283//! # Change the Environment Size Dynamically
284//!
285//! You must specify the maximum size of an LMDB environment when you open it.
286//! Environment do not dynamically increase there size for performance reasons and also to
287//! have more control on it.
288//!
289//! Here is a simple example on the way to go to dynamically increase the size
290//! of an environment when you detect that it is going out of space.
291//!
292//! ```
293//! use std::error::Error;
294//! use std::fs;
295//! use std::path::Path;
296//!
297//! use heed::types::*;
298//! use heed::{Database, EnvOpenOptions};
299//!
300//! fn main() -> Result<(), Box<dyn Error>> {
301//!     let path = Path::new("target").join("small-space.mdb");
302//!
303//!     fs::create_dir_all(&path)?;
304//!
305//!     let env = unsafe {
306//!         EnvOpenOptions::new()
307//!             .map_size(16384) // one page
308//!             .open(&path)?
309//!     };
310//!
311//!     let mut wtxn = env.write_txn()?;
312//!     let db: Database<Str, Str> = env.create_database(&mut wtxn, None)?;
313//!
314//!     // Ho! Crap! We don't have enough space in this environment...
315//!     assert!(matches!(
316//!         fill_with_data(&mut wtxn, db),
317//!         Err(heed::Error::Mdb(heed::MdbError::MapFull))
318//!     ));
319//!
320//!     drop(wtxn);
321//!
322//!     // We need to increase the page size and we can only do that
323//!     // when no transaction are running so closing the env is easier.
324//!     env.prepare_for_closing().wait();
325//!
326//!     let env = unsafe {
327//!         EnvOpenOptions::new()
328//!             .map_size(10 * 16384) // 10 pages
329//!             .open(&path)?
330//!     };
331//!
332//!     let mut wtxn = env.write_txn()?;
333//!     let db: Database<Str, Str> = env.create_database(&mut wtxn, None)?;
334//!
335//!     // We now have enough space in the env to store all of our entries.
336//!     assert!(matches!(fill_with_data(&mut wtxn, db), Ok(())));
337//!
338//!     Ok(())
339//! }
340//!
341//! fn fill_with_data(wtxn: &mut heed::RwTxn, db: Database<Str, Str>) -> heed::Result<()> {
342//!     for i in 0..1000 {
343//!         let key = i.to_string();
344//!         db.put(wtxn, &key, "I am a very long string")?;
345//!     }
346//!     Ok(())
347//! }
348//! ```
349//!
350//! # Advanced Multithreaded Access of Entries
351//!
352//! LMDB disallow sharing cursors amongs threads. It is only possible to send
353//! them between threads when the heed `read-txn-no-tls` feature is enabled.
354//!
355//! This limits some usecases that require a parallel access to the content of the databases
356//! to process stuff faster. This is the case of arroy, a multithreads fast approximate
357//! neighbors search library. I wrote [an article explaining how
358//! to read entries in parallel][arroy article].
359//!
360//! It is forbidden to write in an environement while reading in it. However, it is possible
361//! to keep pointers to the values of the entries returned by LMDB. Those pointers are valid
362//! until the end of the transaction.
363//!
364//! Here is a small example on how to declare a datastructure to be used in parallel across thread,
365//! safely. The unsafe part declare that the datastructure can be shared between thread despite
366//! the write transaction not being `Send` nor `Sync`.
367//!
368//! [arroy article]: https://blog.kerollmops.com/multithreading-and-memory-mapping-refining-ann-performance-with-arroy
369//!
370//! ```
371//! use std::collections::HashMap;
372//! use std::error::Error;
373//! use std::fs;
374//! use std::path::Path;
375//!
376//! use heed::types::*;
377//! use heed::{Database, EnvOpenOptions, RoTxn};
378//!
379//! fn main() -> Result<(), Box<dyn Error + Send + Sync>> {
380//!     let path = Path::new("target").join("heed.mdb");
381//!
382//!     fs::create_dir_all(&path)?;
383//!
384//!     let env = unsafe {
385//!         EnvOpenOptions::new()
386//!             .map_size(1024 * 1024 * 100) // 100 MiB
387//!             .open(&path)?
388//!     };
389//!
390//!     let mut wtxn = env.write_txn()?;
391//!     let db: Database<Str, Str> = env.create_database(&mut wtxn, None)?;
392//!
393//!     fill_with_data(&mut wtxn, db)?;
394//!
395//!     let immutable_map = ImmutableMap::from_db(&wtxn, db)?;
396//!
397//!     // We can share the immutable map over multiple threads because it is Sync.
398//!     // It is safe because we keep the write transaction lifetime in this type.
399//!     std::thread::scope(|s| {
400//!         s.spawn(|| {
401//!             let value = immutable_map.get("10");
402//!             assert_eq!(value, Some("I am a very long string"));
403//!         });
404//!         s.spawn(|| {
405//!             let value = immutable_map.get("20");
406//!             assert_eq!(value, Some("I am a very long string"));
407//!         });
408//!     });
409//!
410//!     // You can see that we always have it on the main thread.
411//!     // We didn't sent it over threads.
412//!     let value = immutable_map.get("50");
413//!     assert_eq!(value, Some("I am a very long string"));
414//!
415//!     Ok(())
416//! }
417//!
418//! fn fill_with_data(wtxn: &mut heed::RwTxn, db: Database<Str, Str>) -> heed::Result<()> {
419//!     for i in 0..100 {
420//!         let key = i.to_string();
421//!         db.put(wtxn, &key, "I am a very long string")?;
422//!     }
423//!     Ok(())
424//! }
425//!
426//! struct ImmutableMap<'a> {
427//!     map: HashMap<&'a str, &'a str>,
428//! }
429//!
430//! impl<'t> ImmutableMap<'t> {
431//!     fn from_db(rtxn: &'t RoTxn, db: Database<Str, Str>) -> heed::Result<Self> {
432//!         let mut map = HashMap::new();
433//!         for result in db.iter(rtxn)? {
434//!             let (k, v) = result?;
435//!             map.insert(k, v);
436//!         }
437//!         Ok(ImmutableMap { map })
438//!     }
439//!
440//!     fn get(&self, key: &str) -> Option<&'t str> {
441//!         self.map.get(key).copied()
442//!     }
443//! }
444//!
445//! unsafe impl Sync for ImmutableMap<'_> {}
446//! ```
447//!
448
449// To let cargo generate doc links
450#![allow(unused_imports)]
451
452use crate::{BytesDecode, BytesEncode, Database, EnvOpenOptions};
heed/cookbook.rs

heed/
cookbook.rs