ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
MMappedUUIDHashTable.cpp
Go to the documentation of this file.
1/**
2 * @file MMappedUUIDHashTable.cpp
3 * @brief Open-addressing hash table over a memory-mapped file: implementation.
4 *
5 * Implements all methods of @c MMappedUUIDHashTable declared in
6 * @c MMappedUUIDHashTable.h:
7 * - @c MMappedUUIDHashTable(): open/create the backing file and map it.
8 * - @c ~MMappedUUIDHashTable(): sync and unmap.
9 * - @c add(): insert a UUID and assign the next sequential integer.
10 * - @c operator[](): look up an integer by UUID.
11 * - @c sync(): flush dirty pages with @c msync().
12 *
13 * Internal helpers:
14 * - @c mmap(): map (or remap) @p length bytes of the backing file.
15 * - @c grow(): double the table size and rehash.
16 * - @c find(): locate the slot index for a UUID (or @c NOTHING if absent).
17 * - @c set(): write a key-value pair into the table.
18 */
20
21#include <cassert>
22#include <cerrno>
23#include <cstring>
24#include <new>
25#include <stdexcept>
26#include <vector>
27
28#include <fcntl.h>
29#include <unistd.h>
30
31#include <sys/mman.h>
32
33MMappedUUIDHashTable::MMappedUUIDHashTable(const char *filename, bool read_only)
34{
35 fd=open(filename, O_CREAT|(read_only?O_RDONLY:O_RDWR), 0600); // flawfinder: ignore
36 if(fd==-1)
37 throw std::runtime_error(strerror(errno));
38
39 auto size = lseek(fd, 0, SEEK_END);
40 lseek(fd, 0, SEEK_SET);
41
42 bool empty=false;
43
44 if(size==0) {
45 empty=true;
47 if(ftruncate(fd, size))
48 throw std::runtime_error(strerror(errno));
49 }
50
51 mmap(size, read_only);
52
53 if(empty) {
55 table->nb_elements = 0;
56 table->next_value = 0;
57 for(unsigned long i=0; i<table->capacity(); ++i) {
58 table->t[i].value = NOTHING;
59 }
60 }
61}
62
63void MMappedUUIDHashTable::mmap(size_t length, bool read_only)
64{
65 table = reinterpret_cast<table_t *>(::mmap(
66 NULL,
67 length,
68 PROT_READ|(read_only?0:PROT_WRITE),
69 MAP_SHARED,
70 fd,
71 0));
72 if(table == MAP_FAILED)
73 throw std::runtime_error(strerror(errno));
74}
75
77{
78 sync();
79
80 std::vector<value_t> elements;
81 elements.reserve(table->nb_elements);
82 for(unsigned long i=0; i<table->capacity(); ++i)
83 if(table->t[i].value != NOTHING)
84 elements.push_back(table->t[i]);
85
86 auto new_log_size = table->log_size+1;
88
89 auto new_size = table_t::sizeForLogSize(new_log_size);
90 if(ftruncate(fd, new_size))
91 throw std::runtime_error(strerror(errno));
92 mmap(new_size, false);
93
94 table->log_size = new_log_size;
95 for(unsigned long i=0; i<table->capacity(); ++i) {
96 table->t[i].value = NOTHING;
97 }
98 for(const auto &u: elements)
99 set(u.uuid, u.value);
100}
101
107
109{
110 auto k = hash(u);
111 while(table->t[k].value != NOTHING &&
112 std::memcmp(&table->t[k].uuid, &u, sizeof(pg_uuid_t))) {
113 k = (k+1) % table->capacity();
114 }
115
116 return k;
117}
118
120{
121 auto k = find(u);
122
123 return table->t[k].value;
124}
125
126std::pair<unsigned long,bool> MMappedUUIDHashTable::add(pg_uuid_t u)
127{
128 auto k = find(u);
129
130 if(table->t[k].value == NOTHING) {
132 grow();
133 }
134 k = find(u);
136 table->t[k].uuid = u;
137 return std::make_pair(table->t[k].value = table->next_value++, true);
138 } else
139 return std::make_pair(table->t[k].value, false);
140}
141
142// Only used when growing the table, so no need to check/update nb_elements
143void MMappedUUIDHashTable::set(pg_uuid_t u, unsigned long i)
144{
145 table->t[find(u)] = {u, i};
146}
147
Open-addressing hash table mapping UUIDs to integers, backed by an mmap file.
void set(pg_uuid_t u, unsigned long i)
Store the mapping u → i in the table.
unsigned long hash(pg_uuid_t u) const
Compute the starting slot index for UUID u.
void grow()
Double the table capacity and rehash all existing entries.
int fd
File descriptor of the backing mmap file.
std::pair< unsigned long, bool > add(pg_uuid_t u)
Insert UUID u, assigning it the next available integer.
static constexpr unsigned STARTING_LOG_SIZE
Initial log2 capacity (65 536 slots).
unsigned long find(pg_uuid_t u) const
Find the slot index of u, or NOTHING if absent.
unsigned long operator[](pg_uuid_t u) const
Look up the integer index for UUID u.
void mmap(size_t length, bool read_only)
Map length bytes from the backing file (read-write or read-only).
~MMappedUUIDHashTable()
Sync and unmap the file.
void sync()
Flush dirty pages to the backing file with msync().
MMappedUUIDHashTable(const char *filename, bool read_only)
Open (or create) the mmap-backed hash table.
static constexpr unsigned long NOTHING
Sentinel returned by operator[]() when the UUID is not present.
static constexpr double MAXIMUM_LOAD_FACTOR
Rehash when this fraction of slots is occupied.
table_t * table
Pointer to the memory-mapped table header.
On-disk layout of the hash table stored in the mmap file.
static constexpr unsigned logSizeForSize(std::size_t size)
Compute the log2 of the slot count from the file size.
value_t t[]
Flexible array of hash-table slots.
static constexpr std::size_t sizeForLogSize(unsigned ls)
Compute the file size required for a table with 2^ls slots.
unsigned log_size
log2 of the number of slots
unsigned long nb_elements
Current number of stored key-value pairs.
unsigned long next_value
Next integer value to assign to a new UUID.
constexpr unsigned long capacity()
Maximum number of slots in the table (2^log_size).
unsigned long value
Associated integer (gate index), or 0 if slot is empty.
UUID structure.