redb/tree_store/page_store/
region.rs

1use crate::tree_store::page_store::bitmap::BtreeBitmap;
2use crate::tree_store::page_store::buddy_allocator::BuddyAllocator;
3use crate::tree_store::page_store::cached_file::PagedCachedFile;
4use crate::tree_store::page_store::header::DatabaseHeader;
5use crate::tree_store::page_store::layout::DatabaseLayout;
6use crate::tree_store::page_store::page_manager::{INITIAL_REGIONS, MAX_MAX_PAGE_ORDER};
7use crate::tree_store::page_store::xxh3_checksum;
8use crate::tree_store::PageNumber;
9use crate::Result;
10use std::cmp::{self, max};
11use std::mem::size_of;
12
13const REGION_FORMAT_VERSION: u8 = 1;
14const ALLOCATOR_LENGTH_OFFSET: usize = 4;
15const ALLOCATOR_OFFSET: usize = ALLOCATOR_LENGTH_OFFSET + size_of::<u32>();
16
17// Tracks the page orders that MAY BE free in each region. This data structure is optimistic, so
18// a region may not actually have a page free for a given order
19pub(crate) struct RegionTracker {
20    order_trackers: Vec<BtreeBitmap>,
21}
22
23impl RegionTracker {
24    pub(crate) fn new(regions: u32, orders: u8) -> Self {
25        let mut data = vec![];
26        for _ in 0..orders {
27            data.push(BtreeBitmap::new(regions));
28        }
29        Self {
30            order_trackers: data,
31        }
32    }
33
34    // Format:
35    // num_orders: u32 number of order allocators
36    // allocator_len: u32 length of each allocator
37    // data: BtreeBitmap data for each order
38    pub(super) fn to_vec(&self) -> Vec<u8> {
39        let mut result = vec![];
40        let orders: u32 = self.order_trackers.len().try_into().unwrap();
41        let allocator_len: u32 = self.order_trackers[0].to_vec().len().try_into().unwrap();
42        result.extend(orders.to_le_bytes());
43        result.extend(allocator_len.to_le_bytes());
44        for order in &self.order_trackers {
45            result.extend(&order.to_vec());
46        }
47        result
48    }
49
50    // May contain trailing data
51    pub(super) fn from_page(page: &[u8]) -> Self {
52        let orders = u32::from_le_bytes(page[..size_of::<u32>()].try_into().unwrap());
53        let allocator_len = u32::from_le_bytes(
54            page[size_of::<u32>()..2 * size_of::<u32>()]
55                .try_into()
56                .unwrap(),
57        ) as usize;
58        let mut data = vec![];
59        let mut start = 2 * size_of::<u32>();
60        for _ in 0..orders {
61            data.push(BtreeBitmap::from_bytes(
62                &page[start..(start + allocator_len)],
63            ));
64            start += allocator_len;
65        }
66
67        Self {
68            order_trackers: data,
69        }
70    }
71
72    pub(crate) fn find_free(&self, order: u8) -> Option<u32> {
73        self.order_trackers[order as usize].find_first_unset()
74    }
75
76    pub(crate) fn mark_free(&mut self, order: u8, region: u32) {
77        let order: usize = order.into();
78        for i in 0..=order {
79            self.order_trackers[i].clear(region);
80        }
81    }
82
83    pub(crate) fn mark_full(&mut self, order: u8, region: u32) {
84        let order: usize = order.into();
85        assert!(order < self.order_trackers.len());
86        for i in order..self.order_trackers.len() {
87            self.order_trackers[i].set(region);
88        }
89    }
90
91    fn expand(&mut self, new_capacity: u32) {
92        let mut new_trackers = vec![];
93        for order in 0..self.order_trackers.len() {
94            let mut new_bitmap = BtreeBitmap::new(new_capacity);
95            for region in 0..self.order_trackers[order].len() {
96                if !self.order_trackers[order].get(region) {
97                    new_bitmap.clear(region);
98                }
99            }
100            new_trackers.push(new_bitmap);
101        }
102
103        self.order_trackers = new_trackers;
104    }
105
106    fn capacity(&self) -> u32 {
107        self.order_trackers[0].capacity()
108    }
109
110    fn len(&self) -> u32 {
111        self.order_trackers[0].len()
112    }
113}
114
115pub(crate) fn new_allocators(layout: DatabaseLayout) -> Vec<BuddyAllocator> {
116    let mut result = vec![];
117    for i in 0..layout.num_regions() {
118        let region_layout = layout.region_layout(i);
119        let allocator = BuddyAllocator::new(
120            region_layout.num_pages(),
121            layout.full_region_layout().num_pages(),
122        );
123        result.push(allocator);
124    }
125    result
126}
127
128pub(super) struct Allocators {
129    pub(super) region_tracker: RegionTracker,
130    pub(super) region_allocators: Vec<BuddyAllocator>,
131}
132
133impl Allocators {
134    pub(super) fn new(layout: DatabaseLayout) -> Self {
135        let mut region_allocators = vec![];
136        let initial_regions = max(INITIAL_REGIONS, layout.num_regions());
137        let mut region_tracker = RegionTracker::new(initial_regions, MAX_MAX_PAGE_ORDER + 1);
138        for i in 0..layout.num_regions() {
139            let region_layout = layout.region_layout(i);
140            let allocator = BuddyAllocator::new(
141                region_layout.num_pages(),
142                layout.full_region_layout().num_pages(),
143            );
144            let max_order = allocator.get_max_order();
145            region_tracker.mark_free(max_order, i);
146            region_allocators.push(allocator);
147        }
148
149        Self {
150            region_tracker,
151            region_allocators,
152        }
153    }
154
155    #[cfg(any(test, fuzzing))]
156    pub(super) fn all_allocated(&self) -> Vec<PageNumber> {
157        let mut pages = vec![];
158        for (i, allocator) in self.region_allocators.iter().enumerate() {
159            allocator.get_allocated_pages(i.try_into().unwrap(), &mut pages);
160        }
161        pages
162    }
163
164    pub(crate) fn xxh3_hash(&self) -> u128 {
165        // Ignore the region tracker because it is an optimistic cache, and so may not match
166        // between repairs of the allocators
167        let mut result = 0;
168        for allocator in &self.region_allocators {
169            result ^= xxh3_checksum(&allocator.to_vec());
170        }
171        result
172    }
173
174    pub(super) fn from_bytes(header: &DatabaseHeader, storage: &PagedCachedFile) -> Result<Self> {
175        let page_size = header.page_size();
176        let region_header_size =
177            header.layout().full_region_layout().get_header_pages() * page_size;
178        let region_size = u64::from(header.layout().full_region_layout().num_pages())
179            * u64::from(page_size)
180            + u64::from(region_header_size);
181        let range = header.region_tracker().address_range(
182            page_size.into(),
183            region_size,
184            region_header_size.into(),
185            page_size,
186        );
187        let len: usize = (range.end - range.start).try_into().unwrap();
188        let region_tracker = storage.read_direct(range.start, len)?;
189        let mut region_allocators = vec![];
190        let layout = header.layout();
191        for i in 0..layout.num_regions() {
192            let base = layout.region_base_address(i);
193            let header_len: usize = layout
194                .region_layout(i)
195                .data_section()
196                .start
197                .try_into()
198                .unwrap();
199
200            let mem = storage.read_direct(base, header_len)?;
201            region_allocators.push(RegionHeader::deserialize(&mem));
202        }
203
204        Ok(Self {
205            region_tracker: RegionTracker::from_page(&region_tracker),
206            region_allocators,
207        })
208    }
209
210    pub(super) fn flush_to(
211        &self,
212        region_tracker_page: PageNumber,
213        layout: DatabaseLayout,
214        storage: &PagedCachedFile,
215    ) -> Result {
216        let page_size = layout.full_region_layout().page_size();
217        let region_header_size =
218            u64::from(layout.full_region_layout().get_header_pages() * page_size);
219        let region_size = u64::from(layout.full_region_layout().num_pages()) * u64::from(page_size)
220            + region_header_size;
221        let mut region_tracker_mem = {
222            let range = region_tracker_page.address_range(
223                page_size.into(),
224                region_size,
225                region_header_size,
226                page_size,
227            );
228            let len: usize = (range.end - range.start).try_into().unwrap();
229            storage.write(range.start, len, false)?
230        };
231        let tracker_bytes = self.region_tracker.to_vec();
232        region_tracker_mem.mem_mut()[..tracker_bytes.len()].copy_from_slice(&tracker_bytes);
233
234        assert_eq!(self.region_allocators.len(), layout.num_regions() as usize);
235        for i in 0..layout.num_regions() {
236            let base = layout.region_base_address(i);
237            let len: usize = layout
238                .region_layout(i)
239                .data_section()
240                .start
241                .try_into()
242                .unwrap();
243
244            let mut mem = storage.write(base, len, false)?;
245            RegionHeader::serialize(&self.region_allocators[i as usize], mem.mem_mut());
246        }
247
248        Ok(())
249    }
250
251    pub(super) fn resize_to(&mut self, new_layout: DatabaseLayout) {
252        let shrink = match (new_layout.num_regions() as usize).cmp(&self.region_allocators.len()) {
253            cmp::Ordering::Less => true,
254            cmp::Ordering::Equal => {
255                let allocator = self.region_allocators.last().unwrap();
256                let last_region = new_layout
257                    .trailing_region_layout()
258                    .unwrap_or_else(|| new_layout.full_region_layout());
259                match last_region.num_pages().cmp(&allocator.len()) {
260                    cmp::Ordering::Less => true,
261                    cmp::Ordering::Equal => {
262                        // No-op
263                        return;
264                    }
265                    cmp::Ordering::Greater => false,
266                }
267            }
268            cmp::Ordering::Greater => false,
269        };
270
271        if shrink {
272            // Drop all regions that were removed
273            for i in new_layout.num_regions()..(self.region_allocators.len().try_into().unwrap()) {
274                self.region_tracker.mark_full(0, i);
275            }
276            self.region_allocators
277                .drain((new_layout.num_regions() as usize)..);
278
279            // Resize the last region
280            let last_region = new_layout
281                .trailing_region_layout()
282                .unwrap_or_else(|| new_layout.full_region_layout());
283            let allocator = self.region_allocators.last_mut().unwrap();
284            if allocator.len() > last_region.num_pages() {
285                allocator.resize(last_region.num_pages());
286            }
287        } else {
288            let old_num_regions = self.region_allocators.len();
289            for i in 0..new_layout.num_regions() {
290                let new_region = new_layout.region_layout(i);
291                if (i as usize) < old_num_regions {
292                    let allocator = &mut self.region_allocators[i as usize];
293                    assert!(new_region.num_pages() >= allocator.len());
294                    if new_region.num_pages() != allocator.len() {
295                        allocator.resize(new_region.num_pages());
296                        let highest_free = allocator.highest_free_order().unwrap();
297                        self.region_tracker.mark_free(highest_free, i);
298                    }
299                } else {
300                    // brand new region
301                    let allocator = BuddyAllocator::new(
302                        new_region.num_pages(),
303                        new_layout.full_region_layout().num_pages(),
304                    );
305                    let highest_free = allocator.highest_free_order().unwrap();
306                    // TODO: we should be calling .capacity(), and resizing if possible
307                    if i >= self.region_tracker.len() {
308                        self.region_tracker
309                            .expand(self.region_tracker.capacity() * 2);
310                    }
311                    self.region_tracker.mark_free(highest_free, i);
312                    self.region_allocators.push(allocator);
313                }
314            }
315        }
316    }
317}
318
319// Region header
320// 1 byte: region format version
321// 3 bytes: padding
322// 4 bytes: length of the allocator state in bytes
323// n bytes: the allocator state
324pub(crate) struct RegionHeader {}
325
326impl RegionHeader {
327    pub(crate) fn header_pages_expensive(page_size: u32, pages_per_region: u32) -> u32 {
328        let page_size = u64::from(page_size);
329        // TODO: this is kind of expensive. Maybe it should be cached
330        let allocator = BuddyAllocator::new(pages_per_region, pages_per_region);
331        let result = 8u64 + allocator.to_vec().len() as u64;
332        ((result + page_size - 1) / page_size).try_into().unwrap()
333    }
334
335    fn serialize(allocator: &BuddyAllocator, output: &mut [u8]) {
336        let serialized = allocator.to_vec();
337        let len: u32 = serialized.len().try_into().unwrap();
338        output[0] = REGION_FORMAT_VERSION;
339        output[ALLOCATOR_LENGTH_OFFSET..(ALLOCATOR_LENGTH_OFFSET + size_of::<u32>())]
340            .copy_from_slice(&len.to_le_bytes());
341        output[ALLOCATOR_OFFSET..(ALLOCATOR_OFFSET + serialized.len())]
342            .copy_from_slice(&serialized);
343    }
344
345    fn deserialize(data: &[u8]) -> BuddyAllocator {
346        assert_eq!(REGION_FORMAT_VERSION, data[0]);
347        let allocator_len = u32::from_le_bytes(
348            data[ALLOCATOR_LENGTH_OFFSET..(ALLOCATOR_LENGTH_OFFSET + size_of::<u32>())]
349                .try_into()
350                .unwrap(),
351        ) as usize;
352        BuddyAllocator::from_bytes(&data[ALLOCATOR_OFFSET..(ALLOCATOR_OFFSET + allocator_len)])
353    }
354}