ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
MMappedCircuit.h
Go to the documentation of this file.
1/**
2 * @file MMappedCircuit.h
3 * @brief Persistent, mmap-backed storage for the full provenance circuit.
4 *
5 * @c MMappedCircuit is the authoritative store for all provenance circuit
6 * data that must survive transaction boundaries and be accessible across
7 * multiple PostgreSQL backends. It composes three @c MMappedVector
8 * instances plus one @c MMappedUUIDHashTable:
9 *
10 * | Component | Contents |
11 * |---------------------|-----------------------------------------------|
12 * | @c mapping | UUID → gate index (hash table) |
13 * | @c gates | @c GateInformation records, one per gate |
14 * | @c wires | Flattened child-UUID lists for all gates |
15 * | @c extra | Variable-length string data (e.g. provenance labels) |
16 *
17 * All four backing files live in the PostgreSQL data directory and are
18 * opened/created by the ProvSQL background worker at startup.
19 *
20 * The free-function @c createGenericCircuit() traverses the mmap data
21 * starting from a given root UUID to construct an in-memory
22 * @c GenericCircuit for evaluation.
23 *
24 * @warning ON-DISK ABI: the layouts of @c GateInformation, of the
25 * @c gate_type enum (defined in @c provsql_utils.h), of @c pg_uuid_t,
26 * and of @c MMappedUUIDHashTable's slot structure are all serialised
27 * verbatim into the four @c provsql_*.mmap backing files. ProvSQL
28 * supports in-place extension upgrades (@c ALTER @c EXTENSION @c provsql
29 * @c UPDATE) only because these layouts have been stable since
30 * ProvSQL 1.0.0. Any change that adds, removes, reorders, or resizes
31 * a field -- or that renumbers a @c gate_type enumerator -- silently
32 * breaks every existing installation's on-disk mmap files. If such a
33 * change is necessary, bump an explicit format-version header in the
34 * mmap files, write a migration path, and call it out in a release note.
35 */
36#ifndef MMAPPED_CIRCUIT_H
37#define MMAPPED_CIRCUIT_H
38
39#include <cstdint>
40#include <string>
41#include <vector>
42
43#include "GenericCircuit.h"
44#include "MMappedTableInfo.h"
46#include "MMappedVector.hpp"
47
48extern "C" {
49#include "provsql_utils.h"
50}
51
52/**
53 * @brief Per-gate metadata stored in the @c gates @c MMappedVector.
54 *
55 * Each gate in the persistent circuit has exactly one @c GateInformation
56 * record. The @c children_idx and @c nb_children fields together index
57 * into the @c wires @c MMappedVector to find the gate's children.
58 * Similarly, @c extra_idx and @c extra_len index into the @c extra vector
59 * for variable-length string annotations.
60 */
61typedef struct GateInformation
62{
63 gate_type type; ///< Kind of gate (input, plus, times, …)
64 unsigned nb_children; ///< Number of children
65 unsigned long children_idx;///< Start index of this gate's children in @c wires
66 double prob; ///< Associated probability (default 1.0)
67 unsigned info1; ///< General-purpose integer annotation 1
68 unsigned info2; ///< General-purpose integer annotation 2
69 unsigned long extra_idx; ///< Start index in @c extra for string data
70 unsigned extra_len; ///< Byte length of the string data in @c extra
71
72 /**
73 * @brief Construct a @c GateInformation with mandatory fields.
74 * @param t Gate type.
75 * @param n Number of children.
76 * @param i Start index of children in the @c wires vector.
77 */
78 GateInformation(gate_type t, unsigned n, unsigned long i) :
79 type(t), nb_children(n), children_idx(i), prob(1.), info1(0), info2(0), extra_idx(0), extra_len(0) {
80 }
82
83/**
84 * @brief Persistent mmap-backed representation of the provenance circuit.
85 *
86 * @c MMappedCircuit is the single writer for circuit data; only the
87 * background worker should call its mutating methods. Reading methods
88 * may be called from any process that has mapped the files read-only.
89 */
91private:
92MMappedUUIDHashTable mapping; ///< UUID → gate-index hash table
93MMappedVector<GateInformation> gates; ///< Gate metadata array
94MMappedVector<pg_uuid_t> wires; ///< Flattened child UUID array
95MMappedVector<char> extra; ///< Variable-length string data
96MMappedVector<ProvenanceTableInfo> tableInfo; ///< Per-relation TID/BID metadata (safe-query optimisation)
97
98static constexpr const char *GATES_FILENAME="provsql_gates.mmap"; ///< Backing file for @c gates
99static constexpr const char *WIRES_FILENAME="provsql_wires.mmap"; ///< Backing file for @c wires
100static constexpr const char *MAPPING_FILENAME="provsql_mapping.mmap"; ///< Backing file for @c mapping
101static constexpr const char *EXTRA_FILENAME="provsql_extra.mmap"; ///< Backing file for @c extra
102static constexpr const char *TABLE_INFO_FILENAME="provsql_table_info.mmap"; ///< Backing file for @c tableInfo
103
104/** @brief Build the full path for a mmap file under @c $PGDATA/base/<db_oid>/. */
105static std::string makePath(Oid db_oid, const char *filename);
106
107/** @brief Delegating constructor that accepts pre-built paths. */
108MMappedCircuit(const std::string &mp, const std::string &gp,
109 const std::string &wp, const std::string &ep,
110 const std::string &tp,
111 bool read_only) :
112 mapping (mp.c_str(), read_only, MAGIC_MAPPING),
113 gates (gp.c_str(), read_only, MAGIC_GATES),
114 wires (wp.c_str(), read_only, MAGIC_WIRES),
115 extra (ep.c_str(), read_only, MAGIC_EXTRA),
116 tableInfo(tp.c_str(), read_only, MAGIC_TABLE_INFO) {}
117
118public:
119/** @brief 8-byte magic constants identifying each mmap file type. */
120static constexpr uint64_t MAGIC_GATES =
121 uint64_t('P') | uint64_t('v') << 8 | uint64_t('S') << 16 | uint64_t('G') << 24 |
122 uint64_t('a') << 32 | uint64_t('t') << 40 | uint64_t('e') << 48 | uint64_t('s') << 56;
123static constexpr uint64_t MAGIC_WIRES =
124 uint64_t('P') | uint64_t('v') << 8 | uint64_t('S') << 16 | uint64_t('W') << 24 |
125 uint64_t('i') << 32 | uint64_t('r') << 40 | uint64_t('e') << 48 | uint64_t('s') << 56;
126static constexpr uint64_t MAGIC_MAPPING =
127 uint64_t('P') | uint64_t('v') << 8 | uint64_t('S') << 16 | uint64_t('M') << 24 |
128 uint64_t('a') << 32 | uint64_t('p') << 40 | uint64_t('n') << 48 | uint64_t('g') << 56;
129static constexpr uint64_t MAGIC_EXTRA =
130 uint64_t('P') | uint64_t('v') << 8 | uint64_t('S') << 16 | uint64_t('E') << 24 |
131 uint64_t('x') << 32 | uint64_t('t') << 40 | uint64_t('r') << 48 | uint64_t('a') << 56;
132static constexpr uint64_t MAGIC_TABLE_INFO =
133 uint64_t('P') | uint64_t('v') << 8 | uint64_t('S') << 16 | uint64_t('T') << 24 |
134 uint64_t('b') << 32 | uint64_t('l') << 40 | uint64_t('I') << 48 | uint64_t('n') << 56;
135
136/**
137 * @brief Open all four mmap backing files for the given database.
138 * @param db_oid OID of the target database; files go under $PGDATA/base/<db_oid>/.
139 * @param read_only If @c true, all files are mapped read-only.
140 */
141explicit MMappedCircuit(Oid db_oid, bool read_only = false);
142
143/** @brief Sync all backing files before destruction. */
145 sync();
146}
147
148/**
149 * @brief Persist a new gate to the mmap store.
150 *
151 * Allocates a @c GateInformation record, appends the children to the
152 * @c wires vector, and records the UUID→index mapping. Existing gates
153 * with the same @p token are silently skipped.
154 *
155 * @param token UUID identifying the new gate.
156 * @param type Gate type.
157 * @param children Ordered list of child gate UUIDs.
158 */
159void createGate(pg_uuid_t token, gate_type type, const std::vector<pg_uuid_t> &children);
160
161/**
162 * @brief Update the @c info1 / @c info2 annotations of a gate.
163 * @param token UUID of the gate to update.
164 * @param info1 New value for @c info1.
165 * @param info2 New value for @c info2.
166 */
167void setInfos(pg_uuid_t token, unsigned info1, unsigned info2);
168
169/**
170 * @brief Attach a variable-length string annotation to a gate.
171 * @param token UUID of the gate.
172 * @param s String to store.
173 */
174void setExtra(pg_uuid_t token, const std::string &s);
175
176/**
177 * @brief Set the probability associated with a gate.
178 * @param token UUID of the gate.
179 * @param prob Probability value in [0, 1].
180 * @return @c true if the gate was updated; @c false if the token is a non-input gate.
181 * If the token is not yet in the circuit, an input gate is created lazily.
182 */
183bool setProb(pg_uuid_t token, double prob);
184
185/**
186 * @brief Flush all backing files to disk with @c msync().
187 */
188void sync();
189
190/**
191 * @brief Return the type of the gate identified by @p token.
192 * @param token UUID of the gate.
193 * @return The gate's type, or @c gate_input if not found (lazy default).
194 */
195gate_type getGateType(pg_uuid_t token) const;
196
197/**
198 * @brief Return the child UUIDs of the gate identified by @p token.
199 * @param token UUID of the gate.
200 * @return Ordered vector of child UUIDs.
201 */
202std::vector<pg_uuid_t> getChildren(pg_uuid_t token) const;
203
204/**
205 * @brief Return the probability stored for the gate identified by @p token.
206 * @param token UUID of the gate.
207 * @return The probability, or 1.0 if the gate is not found.
208 */
209double getProb(pg_uuid_t token) const;
210
211/**
212 * @brief Return the @c info1 / @c info2 pair for the gate @p token.
213 * @param token UUID of the gate.
214 * @return Pair @c {info1, info2}, or @c {0,0} if not found.
215 */
216std::pair<unsigned, unsigned> getInfos(pg_uuid_t token) const;
217
218/**
219 * @brief Return the variable-length string annotation for gate @p token.
220 * @param token UUID of the gate.
221 * @return The stored string, or empty if none.
222 */
223std::string getExtra(pg_uuid_t token) const;
224
225/**
226 * @brief Return the total number of gates stored in the circuit.
227 * @return Total gate count.
228 */
229inline unsigned long getNbGates() const {
230 return gates.nbElements();
231}
232
233/**
234 * @brief Insert or update the @c kind / @c block_key half of a
235 * per-table metadata record, preserving any existing ancestor
236 * fields.
237 *
238 * If an entry for @p info.relid already exists, its @c kind and
239 * @c block_key fields are overwritten in place and its @c ancestors
240 * fields are preserved. Otherwise a fresh record is appended with
241 * @c ancestor_n @c == @c 0.
242 *
243 * @param info The record to store; only @c kind / @c block_key are
244 * consumed (@c ancestor fields are sourced from the
245 * existing entry or zeroed for fresh ones).
246 */
247void setTableInfo(const ProvenanceTableInfo &info);
248
249/**
250 * @brief Insert or update the ancestor set of a per-table metadata
251 * record, preserving any existing @c kind / @c block_key
252 * fields.
253 *
254 * No-op when @p relid has no existing record: the safe-query
255 * rewriter only consults ancestry for tracked relations, so a
256 * caller setting ancestry on an unknown relation has missed a
257 * @c setTableInfo step. Callers in this codebase always set
258 * @c kind first.
259 *
260 * @param relid pg_class OID of the relation to update.
261 * @param ancestor_n Number of valid entries in @p ancestors
262 * (must be @c <= @c PROVSQL_TABLE_INFO_MAX_ANCESTORS).
263 * @param ancestors Sorted, deduplicated base-relation OIDs.
264 */
265void setTableAncestry(Oid relid, uint16_t ancestor_n,
266 const Oid *ancestors);
267
268/**
269 * @brief Remove a per-table metadata entry (both halves).
270 *
271 * No-op when @p relid is not present. Removal is done by
272 * tombstoning the matching entry with @c relid @c == @c InvalidOid;
273 * the next @c setTableInfo over the same @p relid reuses the slot.
274 *
275 * @param relid pg_class OID of the relation whose entry to remove.
276 */
277void removeTableInfo(Oid relid);
278
279/**
280 * @brief Clear just the ancestor set of a per-table metadata record,
281 * preserving @c kind / @c block_key.
282 *
283 * No-op when @p relid has no existing record. Useful when a
284 * derived relation's source list changes (e.g. a CTAS is re-run)
285 * without disturbing its kind classification.
286 */
287void removeTableAncestry(Oid relid);
288
289/**
290 * @brief Look up the full per-table metadata record (both halves).
291 *
292 * @param relid pg_class OID of the relation to look up.
293 * @param out On success, filled with the stored record.
294 * @return @c true if a record was found, @c false otherwise.
295 */
296bool getTableInfo(Oid relid, ProvenanceTableInfo &out) const;
297
298/**
299 * @brief Build an in-memory @c GenericCircuit rooted at @p token.
300 *
301 * Performs a depth-first traversal of the mmap-backed circuit starting
302 * from @p token and copies all reachable gates and wires into a newly
303 * constructed @c GenericCircuit.
304 *
305 * @param token UUID of the root gate.
306 * @return An in-memory @c GenericCircuit containing the sub-circuit.
307 */
309
310/**
311 * @brief Build an in-memory @c GenericCircuit reachable from any of
312 * @p roots.
313 *
314 * Multi-root variant of @c createGenericCircuit. Seeds the BFS with
315 * every UUID in @p roots so a shared subgraph reachable from more
316 * than one root is represented by a single @c gate_t (the
317 * @c GenericCircuit::setGate / @c getGate pair is idempotent on the
318 * UUID key). Used by @c getJointCircuit to load an RV's sub-DAG
319 * together with a conditioning gate that sits above it in the
320 * persisted DAG.
321 *
322 * @param roots UUIDs whose reachable closure to load. Order is
323 * irrelevant; identical UUIDs collapse via the
324 * @c std::set deduplication of the work list.
325 * @return An in-memory @c GenericCircuit containing every gate
326 * reachable from any root.
327 */
329 const std::vector<pg_uuid_t> &roots) const;
330};
331
332
333#endif /* MMAPPED_CIRCUIT_H */
Semiring-agnostic in-memory provenance circuit.
Per-table provenance metadata persisted alongside the circuit store.
Open-addressing hash table mapping UUIDs to integers, backed by an mmap file.
Template implementation of MMappedVector<T>.
In-memory provenance circuit with semiring-generic evaluation.
void setTableAncestry(Oid relid, uint16_t ancestor_n, const Oid *ancestors)
Insert or update the ancestor set of a per-table metadata record, preserving any existing kind / bloc...
void setExtra(pg_uuid_t token, const std::string &s)
Attach a variable-length string annotation to a gate.
static constexpr uint64_t MAGIC_TABLE_INFO
void setTableInfo(const ProvenanceTableInfo &info)
Insert or update the kind / block_key half of a per-table metadata record, preserving any existing an...
void removeTableInfo(Oid relid)
Remove a per-table metadata entry (both halves).
MMappedUUIDHashTable mapping
UUID → gate-index hash table.
void createGate(pg_uuid_t token, gate_type type, const std::vector< pg_uuid_t > &children)
Persist a new gate to the mmap store.
std::string getExtra(pg_uuid_t token) const
Return the variable-length string annotation for gate token.
unsigned long getNbGates() const
Return the total number of gates stored in the circuit.
static constexpr const char * GATES_FILENAME
Backing file for gates.
gate_type getGateType(pg_uuid_t token) const
Return the type of the gate identified by token.
static constexpr const char * TABLE_INFO_FILENAME
Backing file for tableInfo.
void removeTableAncestry(Oid relid)
Clear just the ancestor set of a per-table metadata record, preserving kind / block_key.
void sync()
Flush all backing files to disk with msync().
MMappedVector< ProvenanceTableInfo > tableInfo
Per-relation TID/BID metadata (safe-query optimisation).
static constexpr const char * WIRES_FILENAME
Backing file for wires.
GenericCircuit createGenericCircuit(pg_uuid_t token) const
Build an in-memory GenericCircuit rooted at token.
static std::string makePath(Oid db_oid, const char *filename)
Build the full path for a mmap file under $PGDATA/base/<db_oid>/.
bool setProb(pg_uuid_t token, double prob)
Set the probability associated with a gate.
static constexpr const char * EXTRA_FILENAME
Backing file for extra.
static constexpr uint64_t MAGIC_WIRES
static constexpr uint64_t MAGIC_GATES
8-byte magic constants identifying each mmap file type.
static constexpr const char * MAPPING_FILENAME
Backing file for mapping.
bool getTableInfo(Oid relid, ProvenanceTableInfo &out) const
Look up the full per-table metadata record (both halves).
MMappedVector< char > extra
Variable-length string data.
static constexpr uint64_t MAGIC_EXTRA
double getProb(pg_uuid_t token) const
Return the probability stored for the gate identified by token.
std::vector< pg_uuid_t > getChildren(pg_uuid_t token) const
Return the child UUIDs of the gate identified by token.
MMappedVector< GateInformation > gates
Gate metadata array.
~MMappedCircuit()
Sync all backing files before destruction.
MMappedVector< pg_uuid_t > wires
Flattened child UUID array.
void setInfos(pg_uuid_t token, unsigned info1, unsigned info2)
Update the info1 / info2 annotations of a gate.
MMappedCircuit(const std::string &mp, const std::string &gp, const std::string &wp, const std::string &ep, const std::string &tp, bool read_only)
Delegating constructor that accepts pre-built paths.
static constexpr uint64_t MAGIC_MAPPING
std::pair< unsigned, unsigned > getInfos(pg_uuid_t token) const
Return the info1 / info2 pair for the gate token.
Persistent open-addressing hash table mapping UUIDs to integers.
Append-only, mmap-backed vector of elements of type T.
Core types, constants, and utilities shared across ProvSQL.
Per-gate metadata stored in the gates MMappedVector.
double prob
Associated probability (default 1.0).
unsigned info2
General-purpose integer annotation 2.
unsigned long children_idx
Start index of this gate's children in wires.
unsigned info1
General-purpose integer annotation 1.
unsigned long extra_idx
Start index in extra for string data.
unsigned extra_len
Byte length of the string data in extra.
GateInformation(gate_type t, unsigned n, unsigned long i)
Construct a GateInformation with mandatory fields.
unsigned nb_children
Number of children.
gate_type type
Kind of gate (input, plus, times, …).
Per-relation metadata for the safe-query optimisation.
UUID structure.