ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
MMappedUUIDHashTable.h
Go to the documentation of this file.
1/**
2 * @file MMappedUUIDHashTable.h
3 * @brief Open-addressing hash table mapping UUIDs to integers, backed by an mmap file.
4 *
5 * @c MMappedUUIDHashTable provides a persistent hash table that maps
6 * 128-bit UUID keys to sequential unsigned-long integers (used as gate
7 * indices into the @c MMappedVector of @c GateInformation). The table
8 * is stored in a memory-mapped file so that it survives PostgreSQL
9 * restarts and is accessible by multiple processes.
10 *
11 * Design constraints:
12 * - **Append-only**: elements can be added but never removed.
13 * - **Open addressing**: collisions are resolved by linear probing.
14 * - **Trivial hash**: the first 8 bytes of the UUID are reinterpreted as
15 * a 64-bit integer and taken modulo the table capacity. UUIDs are
16 * generated uniformly at random (version 4), so this is effectively
17 * uniform.
18 * - **Automatic growth**: when the load factor exceeds @c MAXIMUM_LOAD_FACTOR
19 * the table is doubled in size and rehashed.
20 *
21 * Access to the table from multiple processes is serialised via the
22 * ProvSQL LWLock in @c provsqlSharedState.
23 */
24#ifndef MMAPPED_UUID_HASH_TABLE_H
25#define MMAPPED_UUID_HASH_TABLE_H
26
27#include <cstddef>
28#include <utility>
29
30extern "C" {
31#include "provsql_utils.h"
32}
33
34/**
35 * @brief Persistent open-addressing hash table mapping UUIDs to integers.
36 */
38{
39/** @brief One slot in the hash table: a UUID key and its associated integer value. */
40struct value_t {
41 pg_uuid_t uuid; ///< Key
42 unsigned long value; ///< Associated integer (gate index), or 0 if slot is empty
43};
44
45/**
46 * @brief On-disk layout of the hash table stored in the mmap file.
47 *
48 * The header fields are followed by a flexible array of @c value_t slots.
49 */
50struct table_t {
51 /**
52 * @brief Compute the file size required for a table with @c 2^ls slots.
53 * @param ls Log2 of the desired slot count.
54 * @return Required file size in bytes.
55 */
56 static constexpr std::size_t sizeForLogSize(unsigned ls) {
57 return offsetof(table_t, t) + (1 << ls)*sizeof(value_t);
58 }
59 /**
60 * @brief Compute the log2 of the slot count from the file size.
61 * @param size File size in bytes.
62 * @return Log2 of the number of slots that fit in @p size.
63 */
64 static constexpr unsigned logSizeForSize(std::size_t size) {
65 size -= offsetof(table_t, t);
66 size /= sizeof(value_t);
67 size >>= 1;
68 unsigned log_size=0;
69 while(size) {
70 size >>= 1;
71 ++log_size;
72 }
73 return log_size;
74 }
75 /**
76 * @brief Maximum number of slots in the table (@c 2^log_size).
77 * @return Current capacity (number of available hash-table slots).
78 */
79 constexpr unsigned long capacity() {
80 return 1u << log_size;
81 }
82
83 unsigned log_size; ///< log2 of the number of slots
84 unsigned long nb_elements; ///< Current number of stored key-value pairs
85 unsigned long next_value; ///< Next integer value to assign to a new UUID
86 value_t t[]; ///< Flexible array of hash-table slots
87};
88
89int fd; ///< File descriptor of the backing mmap file
90table_t *table; ///< Pointer to the memory-mapped table header
91
92/** @brief Initial log2 capacity (65 536 slots). */
93static constexpr unsigned STARTING_LOG_SIZE=16;
94/** @brief Rehash when this fraction of slots is occupied. */
95static constexpr double MAXIMUM_LOAD_FACTOR=.5;
96
97/**
98 * @brief Compute the starting slot index for UUID @p u.
99 *
100 * Reinterprets the first 8 bytes of @p u as a 64-bit integer and takes
101 * it modulo the current capacity.
102 * @param u UUID to hash.
103 * @return Slot index in [0, capacity).
104 */
105inline unsigned long hash(pg_uuid_t u) const {
106 return *reinterpret_cast<unsigned long*>(&u) % (1 << table->log_size);
107};
108
109/**
110 * @brief Find the slot index of @p u, or @c NOTHING if absent.
111 * @param u UUID to look up.
112 * @return Slot index, or @c NOTHING if @p u is not in the table.
113 */
114unsigned long find(pg_uuid_t u) const;
115/**
116 * @brief Map @p length bytes from the backing file (read-write or read-only).
117 * @param length Number of bytes to map.
118 * @param read_only If @c true, map read-only.
119 */
120void mmap(size_t length, bool read_only);
121/** @brief Double the table capacity and rehash all existing entries. */
122void grow();
123/**
124 * @brief Store the mapping @p u → @p i in the table.
125 * @param u UUID key to store.
126 * @param i Integer value to associate with @p u.
127 */
128void set(pg_uuid_t u, unsigned long i);
129
130public:
131/** @brief Sentinel returned by @c operator[]() when the UUID is not present. */
132static constexpr unsigned long NOTHING=static_cast<unsigned long>(-1);
133
134/**
135 * @brief Open (or create) the mmap-backed hash table.
136 *
137 * @param filename Path to the backing file (created if absent).
138 * @param read_only If @c true, map the file read-only (no new entries
139 * can be inserted).
140 */
141MMappedUUIDHashTable(const char *filename, bool read_only);
142/** @brief Sync and unmap the file. */
144
145/**
146 * @brief Look up the integer index for UUID @p u.
147 *
148 * @param u The UUID to look up.
149 * @return The associated integer, or @c NOTHING if @p u is absent.
150 */
151unsigned long operator[](pg_uuid_t u) const;
152
153/**
154 * @brief Insert UUID @p u, assigning it the next available integer.
155 *
156 * If @p u is already present the existing value is returned without
157 * modification.
158 *
159 * @param u UUID to insert.
160 * @return A pair @c {value, inserted} where @c inserted is @c true if
161 * a new entry was created.
162 */
163std::pair<unsigned long,bool> add(pg_uuid_t u);
164
165/**
166 * @brief Return the number of UUID→integer pairs currently stored.
167 * @return Element count.
168 */
169inline unsigned long nbElements() const {
170 return table->nb_elements;
171}
172
173/**
174 * @brief Flush dirty pages to the backing file with @c msync().
175 */
176void sync();
177};
178
179 #endif /* MMAPPED_UUID_HASH_TABLE_H */
Persistent open-addressing hash table mapping UUIDs to integers.
void set(pg_uuid_t u, unsigned long i)
Store the mapping u → i in the table.
unsigned long hash(pg_uuid_t u) const
Compute the starting slot index for UUID u.
void grow()
Double the table capacity and rehash all existing entries.
int fd
File descriptor of the backing mmap file.
std::pair< unsigned long, bool > add(pg_uuid_t u)
Insert UUID u, assigning it the next available integer.
static constexpr unsigned STARTING_LOG_SIZE
Initial log2 capacity (65 536 slots).
unsigned long find(pg_uuid_t u) const
Find the slot index of u, or NOTHING if absent.
unsigned long operator[](pg_uuid_t u) const
Look up the integer index for UUID u.
void mmap(size_t length, bool read_only)
Map length bytes from the backing file (read-write or read-only).
unsigned long nbElements() const
Return the number of UUID→integer pairs currently stored.
~MMappedUUIDHashTable()
Sync and unmap the file.
void sync()
Flush dirty pages to the backing file with msync().
static constexpr unsigned long NOTHING
Sentinel returned by operator[]() when the UUID is not present.
static constexpr double MAXIMUM_LOAD_FACTOR
Rehash when this fraction of slots is occupied.
table_t * table
Pointer to the memory-mapped table header.
Core types, constants, and utilities shared across ProvSQL.
On-disk layout of the hash table stored in the mmap file.
static constexpr unsigned logSizeForSize(std::size_t size)
Compute the log2 of the slot count from the file size.
value_t t[]
Flexible array of hash-table slots.
static constexpr std::size_t sizeForLogSize(unsigned ls)
Compute the file size required for a table with 2^ls slots.
unsigned log_size
log2 of the number of slots
unsigned long nb_elements
Current number of stored key-value pairs.
unsigned long next_value
Next integer value to assign to a new UUID.
constexpr unsigned long capacity()
Maximum number of slots in the table (2^log_size).
One slot in the hash table: a UUID key and its associated integer value.
unsigned long value
Associated integer (gate index), or 0 if slot is empty.
UUID structure.