ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
MMappedUUIDHashTable.cpp
Go to the documentation of this file.
1/**
2 * @file MMappedUUIDHashTable.cpp
3 * @brief Open-addressing hash table over a memory-mapped file: implementation.
4 *
5 * Implements all methods of @c MMappedUUIDHashTable declared in
6 * @c MMappedUUIDHashTable.h:
7 * - @c MMappedUUIDHashTable(): open/create the backing file and map it.
8 * - @c ~MMappedUUIDHashTable(): sync and unmap.
9 * - @c add(): insert a UUID and assign the next sequential integer.
10 * - @c operator[](): look up an integer by UUID.
11 * - @c sync(): flush dirty pages with @c msync().
12 *
13 * Internal helpers:
14 * - @c mmap(): map (or remap) @p length bytes of the backing file.
15 * - @c grow(): double the table size and rehash.
16 * - @c find(): locate the slot index for a UUID (or @c NOTHING if absent).
17 * - @c set(): write a key-value pair into the table.
18 */
20
21#include <cassert>
22#include <cerrno>
23#include <cstdint>
24#include <cstring>
25#include <new>
26#include <stdexcept>
27#include <string>
28#include <vector>
29
30#include <fcntl.h>
31#include <unistd.h>
32
33#include <sys/mman.h>
34
35MMappedUUIDHashTable::MMappedUUIDHashTable(const char *filename, bool read_only, uint64_t magic_value)
36{
37 fd=open(filename, O_CREAT|(read_only?O_RDONLY:O_RDWR), 0600); // flawfinder: ignore
38 if(fd==-1)
39 throw std::runtime_error(strerror(errno));
40
41 auto size = lseek(fd, 0, SEEK_END);
42 lseek(fd, 0, SEEK_SET);
43
44 bool empty=false;
45
46 if(size==0) {
47 empty=true;
49 if(ftruncate(fd, size))
50 throw std::runtime_error(strerror(errno));
51 }
52
53 mmap(size, read_only);
54
55 if(empty) {
56 table->magic = magic_value;
57 table->version = 1;
58 table->elem_size = static_cast<uint16_t>(sizeof(value_t));
59 table->_reserved = 0;
60 table->log_size = table_t::logSizeForSize(size);
61 table->nb_elements = 0;
62 table->next_value = 0;
63 for(unsigned long i=0; i<table->capacity(); ++i) {
64 table->t[i].value = NOTHING;
65 }
66 } else {
67 if(table->magic != magic_value)
68 throw std::runtime_error("ProvSQL mmap: wrong file type (magic mismatch)");
69 if(table->version != 1)
70 throw std::runtime_error("ProvSQL mmap: unsupported format version "
71 + std::to_string(table->version));
72 if(table->elem_size != sizeof(value_t))
73 throw std::runtime_error("ProvSQL mmap: element size mismatch (recompile required)");
74 }
75}
76
77void MMappedUUIDHashTable::mmap(size_t length, bool read_only)
78{
79 table = reinterpret_cast<table_t *>(::mmap(
80 NULL,
81 length,
82 PROT_READ|(read_only?0:PROT_WRITE),
83 MAP_SHARED,
84 fd,
85 0));
86 if(table == MAP_FAILED)
87 throw std::runtime_error(strerror(errno));
88}
89
91{
92 sync();
93
94 std::vector<value_t> elements;
95 elements.reserve(table->nb_elements);
96 for(unsigned long i=0; i<table->capacity(); ++i)
97 if(table->t[i].value != NOTHING)
98 elements.push_back(table->t[i]);
99
100 auto new_log_size = table->log_size+1;
101 munmap(table, table_t::sizeForLogSize(table->log_size));
102
103 auto new_size = table_t::sizeForLogSize(new_log_size);
104 if(ftruncate(fd, new_size))
105 throw std::runtime_error(strerror(errno));
106 mmap(new_size, false);
107
108 table->log_size = new_log_size;
109 for(unsigned long i=0; i<table->capacity(); ++i) {
110 table->t[i].value = NOTHING;
111 }
112 for(const auto &u: elements)
113 set(u.uuid, u.value);
114}
115
121
123{
124 auto k = hash(u);
125 while(table->t[k].value != NOTHING &&
126 std::memcmp(&table->t[k].uuid, &u, sizeof(pg_uuid_t))) {
127 k = (k+1) % table->capacity();
128 }
129
130 return k;
131}
132
134{
135 auto k = find(u);
136
137 return table->t[k].value;
138}
139
140std::pair<unsigned long,bool> MMappedUUIDHashTable::add(pg_uuid_t u)
141{
142 auto k = find(u);
143
144 if(table->t[k].value == NOTHING) {
145 if(table->nb_elements >= MAXIMUM_LOAD_FACTOR * table->capacity()) {
146 grow();
147 }
148 k = find(u);
149 ++table->nb_elements;
150 table->t[k].uuid = u;
151 return std::make_pair(table->t[k].value = table->next_value++, true);
152 } else
153 return std::make_pair(table->t[k].value, false);
154}
155
156// Only used when growing the table, so no need to check/update nb_elements
157void MMappedUUIDHashTable::set(pg_uuid_t u, unsigned long i)
158{
159 table->t[find(u)] = {u, i};
160}
161
163{
164 msync(table, table_t::sizeForLogSize(table->log_size), MS_SYNC);
165}
Open-addressing hash table mapping UUIDs to integers, backed by an mmap file.
void set(pg_uuid_t u, unsigned long i)
Store the mapping u → i in the table.
unsigned long hash(pg_uuid_t u) const
Compute the starting slot index for UUID u.
void grow()
Double the table capacity and rehash all existing entries.
int fd
File descriptor of the backing mmap file.
std::pair< unsigned long, bool > add(pg_uuid_t u)
Insert UUID u, assigning it the next available integer.
MMappedUUIDHashTable(const char *filename, bool read_only, uint64_t magic)
Open (or create) the mmap-backed hash table.
static constexpr unsigned STARTING_LOG_SIZE
Initial log2 capacity (65 536 slots).
unsigned long find(pg_uuid_t u) const
Find the slot index of u, or NOTHING if absent.
unsigned long operator[](pg_uuid_t u) const
Look up the integer index for UUID u.
void mmap(size_t length, bool read_only)
Map length bytes from the backing file (read-write or read-only).
~MMappedUUIDHashTable()
Sync and unmap the file.
void sync()
Flush dirty pages to the backing file with msync().
static constexpr unsigned long NOTHING
Sentinel returned by operator[]() when the UUID is not present.
static constexpr double MAXIMUM_LOAD_FACTOR
Rehash when this fraction of slots is occupied.
table_t * table
Pointer to the memory-mapped table header.
On-disk layout of the hash table stored in the mmap file.
static constexpr unsigned logSizeForSize(std::size_t size)
Compute the log2 of the slot count from the file size.
static constexpr std::size_t sizeForLogSize(unsigned ls)
Compute the file size required for a table with 2^ls slots.
One slot in the hash table: a UUID key and its associated integer value.
UUID structure.