ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
MMappedCircuit.h
Go to the documentation of this file.
1/**
2 * @file MMappedCircuit.h
3 * @brief Persistent, mmap-backed storage for the full provenance circuit.
4 *
5 * @c MMappedCircuit is the authoritative store for all provenance circuit
6 * data that must survive transaction boundaries and be accessible across
7 * multiple PostgreSQL backends. It composes three @c MMappedVector
8 * instances plus one @c MMappedUUIDHashTable:
9 *
10 * | Component | Contents |
11 * |---------------------|-----------------------------------------------|
12 * | @c mapping | UUID → gate index (hash table) |
13 * | @c gates | @c GateInformation records, one per gate |
14 * | @c wires | Flattened child-UUID lists for all gates |
15 * | @c extra | Variable-length string data (e.g. provenance labels) |
16 *
17 * All four backing files live in the PostgreSQL data directory and are
18 * opened/created by the ProvSQL background worker at startup.
19 *
20 * The free-function @c createGenericCircuit() traverses the mmap data
21 * starting from a given root UUID to construct an in-memory
22 * @c GenericCircuit for evaluation.
23 */
24#ifndef MMAPPED_CIRCUIT_H
25#define MMAPPED_CIRCUIT_H
26
27#include "GenericCircuit.h"
29#include "MMappedVector.hpp"
30
31extern "C" {
32#include "provsql_utils.h"
33}
34
35/**
36 * @brief Per-gate metadata stored in the @c gates @c MMappedVector.
37 *
38 * Each gate in the persistent circuit has exactly one @c GateInformation
39 * record. The @c children_idx and @c nb_children fields together index
40 * into the @c wires @c MMappedVector to find the gate's children.
41 * Similarly, @c extra_idx and @c extra_len index into the @c extra vector
42 * for variable-length string annotations.
43 */
44typedef struct GateInformation
45{
46 gate_type type; ///< Kind of gate (input, plus, times, …)
47 unsigned nb_children; ///< Number of children
48 unsigned long children_idx;///< Start index of this gate's children in @c wires
49 double prob; ///< Associated probability (default 1.0)
50 unsigned info1; ///< General-purpose integer annotation 1
51 unsigned info2; ///< General-purpose integer annotation 2
52 unsigned long extra_idx; ///< Start index in @c extra for string data
53 unsigned extra_len; ///< Byte length of the string data in @c extra
54
55 /**
56 * @brief Construct a @c GateInformation with mandatory fields.
57 * @param t Gate type.
58 * @param n Number of children.
59 * @param i Start index of children in the @c wires vector.
60 */
61 GateInformation(gate_type t, unsigned n, unsigned long i) :
62 type(t), nb_children(n), children_idx(i), prob(1.), info1(0), info2(0), extra_idx(0), extra_len(0) {
63 }
65
66/**
67 * @brief Persistent mmap-backed representation of the provenance circuit.
68 *
69 * @c MMappedCircuit is the single writer for circuit data; only the
70 * background worker should call its mutating methods. Reading methods
71 * may be called from any process that has mapped the files read-only.
72 */
74private:
75MMappedUUIDHashTable mapping; ///< UUID → gate-index hash table
76MMappedVector<GateInformation> gates; ///< Gate metadata array
77MMappedVector<pg_uuid_t> wires; ///< Flattened child UUID array
78MMappedVector<char> extra; ///< Variable-length string data
79
80static constexpr const char *GATES_FILENAME="provsql_gates.mmap"; ///< Backing file for @c gates
81static constexpr const char *WIRES_FILENAME="provsql_wires.mmap"; ///< Backing file for @c wires
82static constexpr const char *MAPPING_FILENAME="provsql_mapping.mmap"; ///< Backing file for @c mapping
83static constexpr const char *EXTRA_FILENAME="provsql_extra.mmap"; ///< Backing file for @c extra
84
85public:
86/**
87 * @brief Open all four mmap backing files.
88 * @param read_only If @c true, all files are mapped read-only.
89 */
90explicit MMappedCircuit(bool read_only = false) :
91 mapping(MAPPING_FILENAME, read_only),
92 gates(GATES_FILENAME, read_only),
93 wires(WIRES_FILENAME, read_only),
94 extra(EXTRA_FILENAME, read_only) {
95}
96/** @brief Sync all backing files before destruction. */
98 sync();
99}
100
101/**
102 * @brief Persist a new gate to the mmap store.
103 *
104 * Allocates a @c GateInformation record, appends the children to the
105 * @c wires vector, and records the UUID→index mapping. Existing gates
106 * with the same @p token are silently skipped.
107 *
108 * @param token UUID identifying the new gate.
109 * @param type Gate type.
110 * @param children Ordered list of child gate UUIDs.
111 */
112void createGate(pg_uuid_t token, gate_type type, const std::vector<pg_uuid_t> &children);
113
114/**
115 * @brief Update the @c info1 / @c info2 annotations of a gate.
116 * @param token UUID of the gate to update.
117 * @param info1 New value for @c info1.
118 * @param info2 New value for @c info2.
119 */
120void setInfos(pg_uuid_t token, unsigned info1, unsigned info2);
121
122/**
123 * @brief Attach a variable-length string annotation to a gate.
124 * @param token UUID of the gate.
125 * @param s String to store.
126 */
127void setExtra(pg_uuid_t token, const std::string &s);
128
129/**
130 * @brief Set the probability associated with a gate.
131 * @param token UUID of the gate.
132 * @param prob Probability value in [0, 1].
133 * @return @c true if the gate was found and updated; @c false otherwise.
134 */
135bool setProb(pg_uuid_t token, double prob);
136
137/**
138 * @brief Flush all backing files to disk with @c msync().
139 */
140void sync();
141
142/**
143 * @brief Return the type of the gate identified by @p token.
144 * @param token UUID of the gate.
145 * @return The gate's type, or @c gate_invalid if not found.
146 */
147gate_type getGateType(pg_uuid_t token) const;
148
149/**
150 * @brief Return the child UUIDs of the gate identified by @p token.
151 * @param token UUID of the gate.
152 * @return Ordered vector of child UUIDs.
153 */
154std::vector<pg_uuid_t> getChildren(pg_uuid_t token) const;
155
156/**
157 * @brief Return the probability stored for the gate identified by @p token.
158 * @param token UUID of the gate.
159 * @return The probability, or 1.0 if the gate is not found.
160 */
161double getProb(pg_uuid_t token) const;
162
163/**
164 * @brief Return the @c info1 / @c info2 pair for the gate @p token.
165 * @param token UUID of the gate.
166 * @return Pair @c {info1, info2}, or @c {0,0} if not found.
167 */
168std::pair<unsigned, unsigned> getInfos(pg_uuid_t token) const;
169
170/**
171 * @brief Return the variable-length string annotation for gate @p token.
172 * @param token UUID of the gate.
173 * @return The stored string, or empty if none.
174 */
175std::string getExtra(pg_uuid_t token) const;
176
177/**
178 * @brief Return the total number of gates stored in the circuit.
179 * @return Total gate count.
180 */
181inline unsigned long getNbGates() const {
182 return gates.nbElements();
183}
184};
185
186/**
187 * @brief Build an in-memory @c GenericCircuit rooted at @p token.
188 *
189 * Performs a depth-first traversal of the mmap-backed circuit starting
190 * from @p token and copies all reachable gates and wires into a newly
191 * constructed @c GenericCircuit.
192 *
193 * @param token UUID of the root gate.
194 * @return An in-memory @c GenericCircuit containing the sub-circuit.
195 */
197
198#endif /* MMAPPED_CIRCUIT_H */
Semiring-agnostic in-memory provenance circuit.
GenericCircuit createGenericCircuit(pg_uuid_t token)
Build an in-memory GenericCircuit rooted at token.
Open-addressing hash table mapping UUIDs to integers, backed by an mmap file.
Template implementation of MMappedVector<T>.
In-memory provenance circuit with semiring-generic evaluation.
Persistent mmap-backed representation of the provenance circuit.
void setExtra(pg_uuid_t token, const std::string &s)
Attach a variable-length string annotation to a gate.
MMappedUUIDHashTable mapping
UUID → gate-index hash table.
void createGate(pg_uuid_t token, gate_type type, const std::vector< pg_uuid_t > &children)
Persist a new gate to the mmap store.
std::string getExtra(pg_uuid_t token) const
Return the variable-length string annotation for gate token.
unsigned long getNbGates() const
Return the total number of gates stored in the circuit.
static constexpr const char * GATES_FILENAME
Backing file for gates.
gate_type getGateType(pg_uuid_t token) const
Return the type of the gate identified by token.
void sync()
Flush all backing files to disk with msync().
static constexpr const char * WIRES_FILENAME
Backing file for wires.
bool setProb(pg_uuid_t token, double prob)
Set the probability associated with a gate.
static constexpr const char * EXTRA_FILENAME
Backing file for extra.
static constexpr const char * MAPPING_FILENAME
Backing file for mapping.
MMappedVector< char > extra
Variable-length string data.
double getProb(pg_uuid_t token) const
Return the probability stored for the gate identified by token.
std::vector< pg_uuid_t > getChildren(pg_uuid_t token) const
Return the child UUIDs of the gate identified by token.
MMappedVector< GateInformation > gates
Gate metadata array.
~MMappedCircuit()
Sync all backing files before destruction.
MMappedVector< pg_uuid_t > wires
Flattened child UUID array.
MMappedCircuit(bool read_only=false)
Open all four mmap backing files.
void setInfos(pg_uuid_t token, unsigned info1, unsigned info2)
Update the info1 / info2 annotations of a gate.
std::pair< unsigned, unsigned > getInfos(pg_uuid_t token) const
Return the info1 / info2 pair for the gate token.
Persistent open-addressing hash table mapping UUIDs to integers.
Append-only, mmap-backed vector of elements of type T.
unsigned long nbElements() const
Return the number of elements currently stored.
Core types, constants, and utilities shared across ProvSQL.
gate_type
Possible gate type in the provenance circuit.
Per-gate metadata stored in the gates MMappedVector.
double prob
Associated probability (default 1.0)
unsigned info2
General-purpose integer annotation 2.
unsigned long children_idx
Start index of this gate's children in wires.
unsigned info1
General-purpose integer annotation 1.
unsigned long extra_idx
Start index in extra for string data.
unsigned extra_len
Byte length of the string data in extra.
GateInformation(gate_type t, unsigned n, unsigned long i)
Construct a GateInformation with mandatory fields.
unsigned nb_children
Number of children.
gate_type type
Kind of gate (input, plus, times, …)
UUID structure.