ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
CircuitFromMMap.cpp
Go to the documentation of this file.
1/**
2 * @file CircuitFromMMap.cpp
3 * @brief Build in-memory circuits from the mmap-backed store.
4 *
5 * Implements the free functions declared in @c CircuitFromMMap.h:
6 * - @c getBooleanCircuit(): reads the mmap store (via the background
7 * worker IPC channel) and constructs a @c BooleanCircuit.
8 * - @c getGenericCircuit(): same but constructs a @c GenericCircuit.
9 *
10 * The internal @c getCircuitFromMMap<C>() template handles the IPC
11 * protocol: it sends a request through the shared-memory pipe,
12 * receives a Boost-serialised circuit blob from the background worker,
13 * and deserialises it into the appropriate circuit type.
14 */
15#include <cmath>
16
17#include <boost/archive/binary_iarchive.hpp>
18#include <boost/iostreams/device/array.hpp>
19#include <boost/iostreams/stream.hpp>
20
21#include "CircuitFromMMap.h"
22#include "HybridEvaluator.h"
23#include "RangeCheck.h"
24#include "having_semantics.hpp"
25#include "semiring/BoolExpr.h"
26#include "provsql_utils_cpp.h"
27
28#include <vector>
29
30extern "C" {
31#include "miscadmin.h"
32#include "provsql_shmem.h"
33#include "provsql_mmap.h"
34#include "provsql_utils.h"
35}
36
37/**
38 * @brief Read and deserialise a circuit rooted at @p token from the mmap worker.
39 * @tparam C Circuit type to deserialise (@c BooleanCircuit or @c GenericCircuit).
40 * @param token UUID of the root gate to retrieve.
41 * @param message_char IPC message-type byte sent to the background worker.
42 * @return Deserialised circuit of type @c C.
43 */
44template<typename C>
45static C getCircuitFromMMap(pg_uuid_t token, char message_char)
46{
48 ADDWRITEM(&message_char, char);
49 ADDWRITEM(&MyDatabaseId, Oid);
50 ADDWRITEM(&token, pg_uuid_t);
51
53 if(!SENDWRITEM())
54 provsql_error("Cannot write to pipe (message type %c)", message_char);
55
56 unsigned long size;
57 if(!READB(size, unsigned long))
58 provsql_error("Cannot read from pipe (message type %c)", message_char);
59
60 char *buf = new char[size], *p = buf;
61 ssize_t actual_read, remaining_size=size;
62 while((actual_read=read(provsql_shared_state->pipembr, p, remaining_size))<remaining_size) {
63 if(actual_read<=0) {
65 delete [] buf;
66 provsql_error("Cannot read from pipe (message type %c)", message_char);
67 } else {
68 remaining_size-=actual_read;
69 p+=actual_read;
70 }
71 }
73
74 boost::iostreams::stream<boost::iostreams::array_source> stream(buf, size);
75 boost::archive::binary_iarchive ia(stream);
76 C c;
77 ia >> c;
78
79 delete [] buf;
80
81 return c;
82}
83
86 pg_uuid_t token,
87 gate_t &gate,
88 std::unordered_map<gate_t, gate_t> &gc_to_bc)
89{
90 auto ggate = gc.getGate(uuid2string(token));
92 for(gate_t u: gc.getInputs()) {
93 gc_to_bc[u]=c.setGate(gc.getUUID(u), BooleanGate::IN, gc.getProb(u));
94 }
95 for(size_t i=0; i<gc.getNbGates(); ++i) {
96 auto u=static_cast<gate_t>(i);
97 if(gc.getGateType(u)==gate_mulinput) {
98 gc_to_bc[u]=c.setGate(gc.getUUID(u), BooleanGate::MULIN, gc.getProb(u));
99 c.setInfo(gc_to_bc[u], gc.getInfos(u).first);
100 c.addWire(
101 gc_to_bc[u],
102 gc_to_bc[gc.getWires(u)[0]]);
103 }
104 }
106 provsql_having(gc, ggate, gc_to_bc, semiring);
107 gate=gc.evaluate(ggate, gc_to_bc, semiring);
108
109 return c;
110}
111
113{
115 std::unordered_map<gate_t, gate_t> gc_to_bc;
116 return getBooleanCircuit(gc, token, gate, gc_to_bc);
117}
118
119/**
120 * @brief Apply the universal load-time simplification passes to @p gc.
121 *
122 * Extracted from @c getGenericCircuit so the joint-circuit loader
123 * (@c getJointCircuit) runs the same passes on its multi-root output.
124 * Gated by the @c provsql.simplify_on_load GUC, identical semantics.
125 */
127{
130 /* Fold deterministic @c gate_arith subtrees to @c gate_value
131 * before @c foldSemiringIdentities (which then collapses any
132 * single-wire @c gate_times / @c gate_plus wrappers around the
133 * resulting @c gate_value). Constant folding is uniformly safe
134 * across consumers: a @c gate_value carries no random identity,
135 * so no shared-RV coupling is decoupled by the rewrite. Lifts
136 * common parser shapes -- the @c -c::random_variable cast emits
137 * @c arith(NEG, value:c) where @c value:-c would round-trip
138 * identically -- into the canonical form so downstream
139 * @c collectRvConstraints / @c asRvVsConstCmp recognise them. */
142 }
143 /* Boolean-only simplification (idempotence, plus-with-one absorber)
144 * is gated on the umbrella provsql.boolean_provenance GUC : every
145 * Boolean-only optimisation enables on the same switch. The wrap
146 * each rule application emits (gate_assumed_boolean) is the
147 * load-time signal to the evaluator that non-Boolean-compatible
148 * semirings must refuse. Independent of simplify_on_load: dropping
149 * the universal passes does not drop the Boolean ones. */
152 }
153}
154
156{
158
159 /* Apply universal cmp-resolution passes (currently RangeCheck) at
160 * load time so every downstream consumer -- semiring evaluators,
161 * MC, view_circuit, PROV export -- sees the simplified circuit.
162 * Each pass replaces decided gate_cmps with Bernoulli gate_input
163 * gates with probability 0 or 1 via
164 * GenericCircuit::resolveCmpToBernoulli; the rewrite is uniform
165 * across semirings (gate_zero / gate_one are the additive /
166 * multiplicative identities in any semiring) so a single sweep
167 * here is correct for every later evaluator.
168 *
169 * Gated by the provsql.simplify_on_load GUC so users debugging
170 * raw circuit structure can opt out. */
172
173 return gc;
174}
175
176/**
177 * @brief IPC: ship a 'j' (joint) request to the mmap worker and
178 * deserialise the returned @c GenericCircuit.
179 *
180 * Mirrors @c getCircuitFromMMap but writes the multi-root payload
181 * the worker's @c 'j' handler expects: <tt>'j' Oid nb_roots {pg_uuid_t}*</tt>.
182 * The response shape is identical to @c 'g' -- @c unsigned @c long
183 * size prefix followed by a Boost-serialised @c GenericCircuit.
184 */
186 pg_uuid_t root_token, pg_uuid_t event_token)
187{
188 char message_char = 'j';
189 unsigned nb_roots = 2;
190 STARTWRITEM();
191 ADDWRITEM(&message_char, char);
192 ADDWRITEM(&MyDatabaseId, Oid);
193 ADDWRITEM(&nb_roots, unsigned);
194 ADDWRITEM(&root_token, pg_uuid_t);
195 ADDWRITEM(&event_token, pg_uuid_t);
196
198 if(!SENDWRITEM())
199 provsql_error("Cannot write to pipe (message type j)");
200
201 unsigned long size;
202 if(!READB(size, unsigned long))
203 provsql_error("Cannot read from pipe (message type j)");
204
205 char *buf = new char[size], *p = buf;
206 ssize_t actual_read, remaining_size=size;
207 while((actual_read=read(provsql_shared_state->pipembr, p, remaining_size))<remaining_size) {
208 if(actual_read<=0) {
210 delete [] buf;
211 provsql_error("Cannot read from pipe (message type j)");
212 } else {
213 remaining_size-=actual_read;
214 p+=actual_read;
215 }
216 }
218
219 boost::iostreams::stream<boost::iostreams::array_source> stream(buf, size);
220 boost::archive::binary_iarchive ia(stream);
222 ia >> c;
223
224 delete [] buf;
225
226 return c;
227}
228
230 pg_uuid_t root_token,
231 pg_uuid_t event_token,
232 gate_t &root_gate,
233 gate_t &event_gate)
234{
235 GenericCircuit gc = getJointCircuitFromMMap(root_token, event_token);
236
238
239 /* Resolve the gate_t for the two roots AFTER simplification. The
240 * passes mutate gate types in place but never delete gates, so the
241 * UUID-to-gate_t map (and therefore @c getGate) stays valid. */
242 root_gate = gc.getGate(uuid2string(root_token));
243 event_gate = gc.getGate(uuid2string(event_token));
244
245 return gc;
246}
Boolean-expression (lineage formula) semiring.
@ IN
Input (variable) gate representing a base tuple.
@ MULIN
Multivalued-input gate (one of several options).
static GenericCircuit getJointCircuitFromMMap(pg_uuid_t root_token, pg_uuid_t event_token)
IPC: ship a 'j' (joint) request to the mmap worker and deserialise the returned GenericCircuit.
BooleanCircuit getBooleanCircuit(GenericCircuit &gc, pg_uuid_t token, gate_t &gate, std::unordered_map< gate_t, gate_t > &gc_to_bc)
Build a BooleanCircuit from an already-loaded GenericCircuit.
GenericCircuit getJointCircuit(pg_uuid_t root_token, pg_uuid_t event_token, gate_t &root_gate, gate_t &event_gate)
Build a GenericCircuit containing the closures of two roots, with shared subgraphs unified.
static C getCircuitFromMMap(pg_uuid_t token, char message_char)
Read and deserialise a circuit rooted at token from the mmap worker.
GenericCircuit getGenericCircuit(pg_uuid_t token)
Build a GenericCircuit from the mmap store rooted at token.
static void applyLoadTimeSimplification(GenericCircuit &gc)
Apply the universal load-time simplification passes to gc.
Build in-memory circuits from the mmap-backed persistent store.
gate_t
Strongly-typed gate identifier.
Definition Circuit.h:49
Peephole simplifier for continuous gate_arith sub-circuits.
Support-based bound check for continuous-RV comparators.
Boolean circuit for provenance formula evaluation.
gate_t setGate(BooleanGate type) override
Allocate a new gate with type type and no UUID.
void setInfo(gate_t g, unsigned info)
Store an integer annotation on gate g.
std::vector< gate_t > & getWires(gate_t g)
Return a mutable reference to the child-wire list of gate g.
Definition Circuit.h:140
gateType getGateType(gate_t g) const
Return the type of gate g.
Definition Circuit.h:130
void addWire(gate_t f, gate_t t)
Add a directed wire from gate f (parent) to gate t (child).
Definition Circuit.hpp:81
uuid getUUID(gate_t g) const
Return the UUID string associated with gate g.
Definition Circuit.hpp:46
gate_t getGate(const uuid &u)
Return (or create) the gate associated with UUID u.
Definition Circuit.hpp:33
std::vector< gate_t >::size_type getNbGates() const
Return the total number of gates in the circuit.
Definition Circuit.h:103
In-memory provenance circuit with semiring-generic evaluation.
S::value_type evaluate(gate_t g, std::unordered_map< gate_t, typename S::value_type > &provenance_mapping, S semiring) const
Evaluate the sub-circuit rooted at gate g over semiring semiring.
void foldSemiringIdentities()
Drop semiring identity wires and collapse single-wire gate_times / gate_plus to their lone non-identi...
double getProb(gate_t g) const
Return the probability for gate g.
const std::set< gate_t > & getInputs() const
Return the set of input (leaf) gates.
std::pair< unsigned, unsigned > getInfos(gate_t g) const
Return the integer annotation pair for gate g.
void foldBooleanIdentities()
Apply Boolean-only simplification rules to gate_plus and gate_times.
Provenance-as-Boolean-circuit semiring.
Definition BoolExpr.h:47
Provenance evaluation helper for HAVING-clause circuits.
void provsql_having(GenericCircuit &c, gate_t g, MapT &mapping, SemiringT S=SemiringT{})
Rewrite HAVING comparison gates in the circuit by enumerating possible worlds.
unsigned runConstantFold(GenericCircuit &gc)
Constant-fold pass over every gate_arith in gc.
unsigned runRangeCheck(GenericCircuit &gc)
Run the support-based pruning pass over gc.
bool provsql_simplify_on_load
Run universal cmp-resolution passes when getGenericCircuit returns; controlled by the provsql....
Definition provsql.c:83
bool provsql_boolean_provenance
Opt-in safe-query optimisation: when true, rewrites hierarchical conjunctive queries to a read-once f...
Definition provsql.c:86
#define provsql_error(fmt,...)
Report a fatal ProvSQL error and abort the current transaction.
Background worker and IPC primitives for mmap-backed circuit storage.
#define READB(var, type)
Read one value of type from the main-to-background pipe.
#define STARTWRITEM()
Reset the shared write buffer for a new batched write.
#define ADDWRITEM(pvar, type)
Append one value of type to the shared write buffer.
#define SENDWRITEM()
Flush the shared write buffer to the background-to-main pipe atomically.
void provsql_shmem_unlock(void)
Release the ProvSQL LWLock.
void provsql_shmem_lock_exclusive(void)
Acquire the ProvSQL LWLock in exclusive mode.
provsqlSharedState * provsql_shared_state
Pointer to the ProvSQL shared-memory segment (set in provsql_shmem_startup).
Shared-memory segment and inter-process pipe management.
Core types, constants, and utilities shared across ProvSQL.
string uuid2string(pg_uuid_t uuid)
Format a pg_uuid_t as a std::string.
C++ utility functions for UUID manipulation.
UUID structure.