ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
MMappedUUIDHashTable.h
Go to the documentation of this file.
1/**
2 * @file MMappedUUIDHashTable.h
3 * @brief Open-addressing hash table mapping UUIDs to integers, backed by an mmap file.
4 *
5 * @c MMappedUUIDHashTable provides a persistent hash table that maps
6 * 128-bit UUID keys to sequential unsigned-long integers (used as gate
7 * indices into the @c MMappedVector of @c GateInformation). The table
8 * is stored in a memory-mapped file so that it survives PostgreSQL
9 * restarts and is accessible by multiple processes.
10 *
11 * Design constraints:
12 * - **Append-only**: elements can be added but never removed.
13 * - **Open addressing**: collisions are resolved by linear probing.
14 * - **Trivial hash**: the first 8 bytes of the UUID are reinterpreted as
15 * a 64-bit integer and taken modulo the table capacity. UUIDs are
16 * generated uniformly at random (version 4), so this is effectively
17 * uniform.
18 * - **Automatic growth**: when the load factor exceeds @c MAXIMUM_LOAD_FACTOR
19 * the table is doubled in size and rehashed.
20 *
21 * Access to the table from multiple processes is serialised via the
22 * ProvSQL LWLock in @c provsqlSharedState.
23 */
24#ifndef MMAPPED_UUID_HASH_TABLE_H
25#define MMAPPED_UUID_HASH_TABLE_H
26
27#include <cstddef>
28#include <cstdint>
29#include <utility>
30
31extern "C" {
32#include "provsql_utils.h"
33}
34
35/**
36 * @brief Persistent open-addressing hash table mapping UUIDs to integers.
37 */
39{
40/** @brief One slot in the hash table: a UUID key and its associated integer value. */
41struct value_t {
42 pg_uuid_t uuid; ///< Key
43 unsigned long value; ///< Associated integer (gate index), or 0 if slot is empty
44};
45
46/**
47 * @brief On-disk layout of the hash table stored in the mmap file.
48 *
49 * The header fields are followed by a flexible array of @c value_t slots.
50 */
51struct table_t {
52 /**
53 * @brief Compute the file size required for a table with @c 2^ls slots.
54 * @param ls Log2 of the desired slot count.
55 * @return Required file size in bytes.
56 */
57 static constexpr std::size_t sizeForLogSize(unsigned ls) {
58 return offsetof(table_t, t) + (1 << ls)*sizeof(value_t);
59 }
60 /**
61 * @brief Compute the log2 of the slot count from the file size.
62 * @param size File size in bytes.
63 * @return Log2 of the number of slots that fit in @p size.
64 */
65 static constexpr unsigned logSizeForSize(std::size_t size) {
66 size -= offsetof(table_t, t);
67 size /= sizeof(value_t);
68 size >>= 1;
69 unsigned log_size=0;
70 while(size) {
71 size >>= 1;
72 ++log_size;
73 }
74 return log_size;
75 }
76 /**
77 * @brief Maximum number of slots in the table (@c 2^log_size).
78 * @return Current capacity (number of available hash-table slots).
79 */
80 constexpr unsigned long capacity() {
81 return 1u << log_size;
82 }
83
84 uint64_t magic; ///< File-type identifier
85 uint16_t version; ///< Format version (currently 1)
86 uint16_t elem_size; ///< sizeof(value_t) at write time
87 uint32_t _reserved; ///< Padding, must be 0
88 unsigned log_size; ///< log2 of the number of slots
89 unsigned long nb_elements; ///< Current number of stored key-value pairs
90 unsigned long next_value; ///< Next integer value to assign to a new UUID
91 value_t t[]; ///< Flexible array of hash-table slots
92};
93
94int fd; ///< File descriptor of the backing mmap file
95table_t *table; ///< Pointer to the memory-mapped table header
96
97/** @brief Initial log2 capacity (65 536 slots). */
98static constexpr unsigned STARTING_LOG_SIZE=16;
99/** @brief Rehash when this fraction of slots is occupied. */
100static constexpr double MAXIMUM_LOAD_FACTOR=.5;
101
102/**
103 * @brief Compute the starting slot index for UUID @p u.
104 *
105 * Reinterprets the first 8 bytes of @p u as a 64-bit integer and takes
106 * it modulo the current capacity.
107 * @param u UUID to hash.
108 * @return Slot index in [0, capacity).
109 */
110inline unsigned long hash(pg_uuid_t u) const {
111 return *reinterpret_cast<unsigned long*>(&u) % (1 << table->log_size);
112};
113
114/**
115 * @brief Find the slot index of @p u, or @c NOTHING if absent.
116 * @param u UUID to look up.
117 * @return Slot index, or @c NOTHING if @p u is not in the table.
118 */
119unsigned long find(pg_uuid_t u) const;
120/**
121 * @brief Map @p length bytes from the backing file (read-write or read-only).
122 * @param length Number of bytes to map.
123 * @param read_only If @c true, map read-only.
124 */
125void mmap(size_t length, bool read_only);
126/** @brief Double the table capacity and rehash all existing entries. */
127void grow();
128/**
129 * @brief Store the mapping @p u → @p i in the table.
130 * @param u UUID key to store.
131 * @param i Integer value to associate with @p u.
132 */
133void set(pg_uuid_t u, unsigned long i);
134
135public:
136/** @brief Sentinel returned by @c operator[]() when the UUID is not present. */
137static constexpr unsigned long NOTHING=static_cast<unsigned long>(-1);
138
139/**
140 * @brief Open (or create) the mmap-backed hash table.
141 *
142 * @param filename Path to the backing file (created if absent).
143 * @param read_only If @c true, map the file read-only (no new entries
144 * can be inserted).
145 * @param magic Expected magic value for format validation.
146 */
147MMappedUUIDHashTable(const char *filename, bool read_only, uint64_t magic);
148/** @brief Sync and unmap the file. */
150
151/**
152 * @brief Look up the integer index for UUID @p u.
153 *
154 * @param u The UUID to look up.
155 * @return The associated integer, or @c NOTHING if @p u is absent.
156 */
157unsigned long operator[](pg_uuid_t u) const;
158
159/**
160 * @brief Insert UUID @p u, assigning it the next available integer.
161 *
162 * If @p u is already present the existing value is returned without
163 * modification.
164 *
165 * @param u UUID to insert.
166 * @return A pair @c {value, inserted} where @c inserted is @c true if
167 * a new entry was created.
168 */
169std::pair<unsigned long,bool> add(pg_uuid_t u);
170
171/**
172 * @brief Return the number of UUID→integer pairs currently stored.
173 * @return Element count.
174 */
175inline unsigned long nbElements() const {
176 return table->nb_elements;
177}
178
179/**
180 * @brief Flush dirty pages to the backing file with @c msync().
181 */
182void sync();
183};
184
185 #endif /* MMAPPED_UUID_HASH_TABLE_H */
void set(pg_uuid_t u, unsigned long i)
Store the mapping u → i in the table.
unsigned long hash(pg_uuid_t u) const
Compute the starting slot index for UUID u.
void grow()
Double the table capacity and rehash all existing entries.
int fd
File descriptor of the backing mmap file.
std::pair< unsigned long, bool > add(pg_uuid_t u)
Insert UUID u, assigning it the next available integer.
MMappedUUIDHashTable(const char *filename, bool read_only, uint64_t magic)
Open (or create) the mmap-backed hash table.
static constexpr unsigned STARTING_LOG_SIZE
Initial log2 capacity (65 536 slots).
unsigned long find(pg_uuid_t u) const
Find the slot index of u, or NOTHING if absent.
unsigned long operator[](pg_uuid_t u) const
Look up the integer index for UUID u.
void mmap(size_t length, bool read_only)
Map length bytes from the backing file (read-write or read-only).
unsigned long nbElements() const
Return the number of UUID→integer pairs currently stored.
~MMappedUUIDHashTable()
Sync and unmap the file.
void sync()
Flush dirty pages to the backing file with msync().
static constexpr unsigned long NOTHING
Sentinel returned by operator[]() when the UUID is not present.
static constexpr double MAXIMUM_LOAD_FACTOR
Rehash when this fraction of slots is occupied.
table_t * table
Pointer to the memory-mapped table header.
Core types, constants, and utilities shared across ProvSQL.
On-disk layout of the hash table stored in the mmap file.
static constexpr unsigned logSizeForSize(std::size_t size)
Compute the log2 of the slot count from the file size.
value_t t[]
Flexible array of hash-table slots.
uint32_t _reserved
Padding, must be 0.
static constexpr std::size_t sizeForLogSize(unsigned ls)
Compute the file size required for a table with 2^ls slots.
uint64_t magic
File-type identifier.
unsigned log_size
log2 of the number of slots
uint16_t elem_size
sizeof(value_t) at write time
unsigned long nb_elements
Current number of stored key-value pairs.
unsigned long next_value
Next integer value to assign to a new UUID.
uint16_t version
Format version (currently 1).
constexpr unsigned long capacity()
Maximum number of slots in the table (2^log_size).
One slot in the hash table: a UUID key and its associated integer value.
unsigned long value
Associated integer (gate index), or 0 if slot is empty.
UUID structure.