ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
BooleanCircuit.h
Go to the documentation of this file.
1/**
2 * @file BooleanCircuit.h
3 * @brief Boolean provenance circuit with support for knowledge compilation.
4 *
5 * @c BooleanCircuit represents the provenance formula of a query result
6 * as a Boolean circuit (AND/OR/NOT/IN gates). It supports multiple
7 * methods for computing the probability of the formula being true under
8 * tuple-independent probabilistic databases:
9 *
10 * | Method | Description |
11 * |------------------------|----------------------------------------------------------|
12 * | @c possibleWorlds() | Exact enumeration over all 2^n possible worlds |
13 * | @c compilation() | Knowledge compilation to d-DNNF via an external tool |
14 * | @c monteCarlo() | Monte Carlo sampling approximation |
15 * | @c WeightMC() | Weighted model counting via weightmc |
16 * | @c independentEvaluation() | Exact evaluation for disconnected circuits |
17 * | @c interpretAsDD() | Direct tree-decomposition-based compilation |
18 * | @c makeDD() | Generic d-DNNF construction dispatcher |
19 *
20 * ### Multivalued inputs (@c MULIN / @c MULVAR)
21 * Multivalued input gates model tuples drawn from a discrete probability
22 * distribution. @c rewriteMultivaluedGates() rewrites them into standard
23 * AND/OR/NOT circuits before knowledge compilation.
24 *
25 * The circuit is Boost-serialisable for transmission to external processes.
26 */
27#ifndef BOOLEAN_CIRCUIT_H
28#define BOOLEAN_CIRCUIT_H
29
30#include <unordered_map>
31#include <unordered_set>
32#include <set>
33#include <map>
34#include <vector>
35
36#include <boost/archive/binary_oarchive.hpp>
37#include <boost/serialization/unordered_map.hpp>
38#include <boost/serialization/map.hpp>
39#include <boost/serialization/set.hpp>
40#include <boost/serialization/vector.hpp>
41
42#include "Circuit.hpp"
43
44/**
45 * @brief Gate types for a Boolean provenance circuit.
46 *
47 * - @c UNDETERMINED Placeholder for a gate whose type has not been set yet.
48 * - @c AND Logical conjunction of child gates.
49 * - @c OR Logical disjunction of child gates.
50 * - @c NOT Logical negation of a single child gate.
51 * - @c IN An input (variable) gate representing a base tuple.
52 * - @c MULIN A multivalued-input gate (one of several options).
53 * - @c MULVAR Auxiliary gate grouping all @c MULIN siblings.
54 */
55enum class BooleanGate {
56 UNDETERMINED, ///< Placeholder gate whose type has not been set yet
57 AND, ///< Logical conjunction of child gates
58 OR, ///< Logical disjunction of child gates
59 NOT, ///< Logical negation of a single child gate
60 IN, ///< Input (variable) gate representing a base tuple
61 MULIN, ///< Multivalued-input gate (one of several options)
62 MULVAR ///< Auxiliary gate grouping all MULIN siblings
63};
64class dDNNF;
65
66/**
67 * @brief Boolean circuit for provenance formula evaluation.
68 *
69 * Inherits the gate/wire infrastructure from @c Circuit<BooleanGate> and
70 * adds probability annotation, info integers (for multivalued inputs), and
71 * a rich set of evaluation algorithms.
72 */
73class BooleanCircuit : public Circuit<BooleanGate> {
74private:
75/**
76 * @brief Evaluate the sub-circuit at @p g on one sampled world.
77 *
78 * Each gate in @p sampled is treated as @c true; all other @c IN gates
79 * are @c false.
80 *
81 * @param g Root gate to evaluate.
82 * @param sampled Set of input gates that are @c true in this world.
83 * @return Boolean value of the circuit at gate @p g.
84 */
85bool evaluate(gate_t g, const std::unordered_set<gate_t> &sampled) const;
86
87/**
88 * @brief Generate a Tseytin transformation of the sub-circuit at @p g.
89 *
90 * Produces a CNF/DIMACS-style encoding suitable for input to model
91 * counters such as @c weightmc or @c d4.
92 *
93 * @param g Root gate.
94 * @param display_prob Include probability weights in the output.
95 * @return DIMACS string.
96 */
97std::string Tseytin(gate_t g, bool display_prob) const;
98
99/**
100 * @brief Recursive helper for @c interpretAsDD().
101 * @param g Current gate to process.
102 * @param seen Set of gates already visited (prevents re-processing).
103 * @param dd The d-DNNF being constructed.
104 * @return Gate ID in @p dd corresponding to @p g.
105 */
106gate_t interpretAsDDInternal(gate_t g, std::set<gate_t> &seen, dDNNF &dd) const;
107/**
108 * @brief Recursive helper for @c independentEvaluation().
109 * @param g Current gate to evaluate.
110 * @param seen Set of gates already evaluated (for memoisation).
111 * @return Probability at gate @p g.
112 */
113double independentEvaluationInternal(gate_t g, std::set<gate_t> &seen) const;
114/**
115 * @brief Recursive helper for @c rewriteMultivaluedGates().
116 * @param muls Gates in the MULVAR group being rewritten.
117 * @param cumulated_probs Cumulative probability thresholds for each MULIN.
118 * @param start First index in @p muls to process.
119 * @param end One past the last index in @p muls to process.
120 * @param prefix Current AND-chain prefix being built.
121 */
123 const std::vector<gate_t> &muls,
124 const std::vector<double> &cumulated_probs,
125 unsigned start,
126 unsigned end,
127 std::vector<gate_t> &prefix);
128
129protected:
130std::set<gate_t> inputs; ///< Set of IN (input) gate IDs
131std::set<gate_t> mulinputs; ///< Set of MULVAR gate IDs
132std::vector<double> prob; ///< Per-gate probability (for IN gates)
133std::map<gate_t, unsigned> info; ///< Per-gate integer info (for MULIN gates)
134bool probabilistic=false; ///< @c true if any gate has a non-unit probability
135
136public:
137/** @brief Construct an empty Boolean circuit. */
141}
142
143/** @copydoc Circuit::addGate() */
144gate_t addGate() override;
145/** @copydoc Circuit::setGate(gateType) */
146gate_t setGate(BooleanGate type) override;
147/** @copydoc Circuit::setGate(const uuid&, gateType) */
148gate_t setGate(const uuid &u, BooleanGate type) override;
149
150/**
151 * @brief Create a new gate with a probability annotation.
152 * @param t Gate type (typically @c BooleanGate::IN).
153 * @param p Probability of this gate being @c true.
154 * @return Gate identifier.
155 */
156gate_t setGate(BooleanGate t, double p);
157
158/**
159 * @brief Create (or update) a gate with a UUID and probability.
160 * @param u UUID string.
161 * @param t Gate type.
162 * @param p Probability of this gate being @c true.
163 * @return Gate identifier.
164 */
165gate_t setGate(const uuid &u, BooleanGate t, double p);
166
167/**
168 * @brief Return the set of input (IN) gate IDs.
169 * @return Const reference to the set of IN gate identifiers.
170 */
171const std::set<gate_t> &getInputs() const {
172 return inputs;
173}
174
175/**
176 * @brief Return @c true if the circuit contains any MULIN gates.
177 *
178 * Multivalued inputs are normally rewritten into AND/OR/NOT/IN gates by
179 * @c rewriteMultivaluedGates() before the circuit is consumed by an
180 * evaluation method. Algorithms that cannot handle multivalued inputs
181 * directly can use this as a precondition check.
182 *
183 * @return @c true iff at least one @c MULIN gate is present.
184 */
186 return !mulinputs.empty();
187}
188
189/**
190 * @brief Set the probability for gate @p g and mark the circuit as probabilistic.
191 * @param g Gate identifier.
192 * @param p Probability value in [0, 1].
193 */
194void setProb(gate_t g, double p) {
195 if(!probabilistic && p!=1.)
196 probabilistic=true;
197 prob[static_cast<std::underlying_type<gate_t>::type>(g)]=p;
198}
199
200/**
201 * @brief Return the probability stored for gate @p g.
202 * @param g Gate identifier.
203 * @return Probability value.
204 */
205double getProb(gate_t g) const {
206 return prob[static_cast<std::underlying_type<gate_t>::type>(g)];
207}
208
209/**
210 * @brief Return @c true if any gate has a non-trivial (< 1) probability.
211 * @return @c true iff at least one gate has a probability strictly less than 1.
212 */
213bool isProbabilistic() const {
214 return probabilistic;
215}
216
217/**
218 * @brief Store an integer annotation on gate @p g.
219 *
220 * Used to record the index of a @c MULIN gate within its @c MULVAR group.
221 *
222 * @param g Gate identifier.
223 * @param info Integer to store.
224 */
225void setInfo(gate_t g, unsigned info);
226
227/**
228 * @brief Return the integer annotation for gate @p g.
229 * @param g Gate identifier.
230 * @return Stored integer, or 0 if not set.
231 */
232unsigned getInfo(gate_t g) const;
233
234/**
235 * @brief Compute the probability by exact enumeration of all possible worlds.
236 *
237 * Only tractable for circuits with a small number of input gates.
238 *
239 * @param g Root gate.
240 * @return Exact probability.
241 */
242double possibleWorlds(gate_t g) const;
243
244/**
245 * @brief Compile the sub-circuit rooted at @p g to a @c dDNNF via an external tool.
246 *
247 * Writes the circuit in DIMACS/DNNF format, invokes @p compiler as a
248 * subprocess, and parses the resulting d-DNNF.
249 *
250 * @param g Root gate.
251 * @param compiler Command to invoke (e.g. "d4", "c2d", "minic2d").
252 * @return The compiled @c dDNNF.
253 */
254dDNNF compilation(gate_t g, std::string compiler) const;
255
256/**
257 * @brief Estimate the probability via Monte Carlo sampling.
258 *
259 * @param g Root gate.
260 * @param samples Number of independent worlds to sample.
261 * @return Estimated probability.
262 */
263double monteCarlo(gate_t g, unsigned samples) const;
264
265/**
266 * @brief Compute the probability using the @c weightmc model counter.
267 *
268 * @param g Root gate.
269 * @param opt Additional command-line options forwarded to @c weightmc.
270 * @return Probability estimate returned by @c weightmc.
271 */
272double WeightMC(gate_t g, std::string opt) const;
273
274/**
275 * @brief Compute the probability exactly when inputs are independent.
276 *
277 * Applicable when the circuit has no shared input gate (i.e., each
278 * input appears at most once).
279 *
280 * @param g Root gate.
281 * @return Exact probability.
282 */
283double independentEvaluation(gate_t g) const;
284
285/**
286 * @brief Rewrite all MULVAR/MULIN gate clusters into standard AND/OR/NOT circuits.
287 *
288 * Must be called before any evaluation method when the circuit contains
289 * multivalued input gates.
290 */
292
293/**
294 * @brief Build a @c dDNNF directly from the Boolean circuit's structure.
295 *
296 * Used as a fallback when no external compiler is available and the
297 * circuit is already in a form that can be interpreted as a d-DNNF.
298 *
299 * @param g Root gate.
300 * @return A @c dDNNF wrapping the same structure.
301 */
302dDNNF interpretAsDD(gate_t g) const;
303
304/**
305 * @brief Dispatch to the appropriate d-DNNF construction method.
306 *
307 * @param g Root gate.
308 * @param method Compilation method name (e.g. "tree-decomposition",
309 * "d4", "c2d", …).
310 * @param args Additional arguments forwarded to the chosen method.
311 * @return The constructed @c dDNNF.
312 */
313dDNNF makeDD(gate_t g, const std::string &method, const std::string &args) const;
314
315/** @copydoc Circuit::toString() */
316virtual std::string toString(gate_t g) const override;
317
318/**
319 * @brief Render the sub-circuit at @p g, labelling input gates from a map.
320 *
321 * Same as @c toString(gate_t), but @c IN and @c MULIN gates whose
322 * gate identifier is present in @p labels are rendered using the
323 * mapped string instead of the default @c x@<id@> placeholder. Gates
324 * not found in @p labels fall back to the default rendering.
325 *
326 * @param g Gate to render.
327 * @param labels Optional mapping from input/mulinput gate IDs to
328 * user-supplied labels.
329 * @return Human-readable string.
330 */
331std::string toString(
332 gate_t g,
333 const std::unordered_map<gate_t, std::string> &labels) const;
334
335private:
336/**
337 * @brief Internal recursive helper for the two @c toString() variants.
338 *
339 * The @p parent parameter carries the gate type of the immediate
340 * caller. It drives parenthesis elision in two cases: at the root
341 * (parent set to @c UNDETERMINED) the outer wrap is omitted, and
342 * when @p parent matches the current gate type (associative
343 * AND/OR) the wrap is omitted to flatten same-op nesting. A 1-wire
344 * AND/OR also bypasses the wrap and delegates to its child since
345 * such single-element joins carry no information.
346 *
347 * @param g Gate to render.
348 * @param parent Gate type of the caller, or @c UNDETERMINED at the root.
349 * @param labels Pointer to a label map, or @c nullptr for the
350 * unlabelled rendering.
351 */
352std::string toStringHelper(
353 gate_t g,
354 BooleanGate parent,
355 const std::unordered_map<gate_t, std::string> *labels) const;
356
357public:
358
359/**
360 * @brief Export the circuit in the textual format expected by external compilers.
361 *
362 * Produces a multi-line string encoding all gates reachable from the
363 * circuit in the format used by the standalone @c tdkc tool and external
364 * model counters.
365 *
366 * @param g Root gate.
367 * @return Circuit description string.
368 */
369std::string exportCircuit(gate_t g) const;
370
371/**
372 * @brief Boost serialisation support.
373 * @param ar Boost archive (input or output).
374 * @param version Archive version (unused).
375 */
376template<class Archive>
377void serialize (Archive & ar, const unsigned int version)
378{
379 ar & uuid2id;
380 ar & id2uuid;
381 ar & gates;
382 ar & wires;
383 ar & inputs;
384 ar & mulinputs;
385 ar & prob;
386 ar & info;
387 ar & probabilistic;
388}
389
392};
393
394#endif /* BOOLEAN_CIRCUIT_H */
BooleanGate
Gate types for a Boolean provenance circuit.
@ MULVAR
Auxiliary gate grouping all MULIN siblings.
@ NOT
Logical negation of a single child gate.
@ OR
Logical disjunction of child gates.
@ AND
Logical conjunction of child gates.
@ IN
Input (variable) gate representing a base tuple.
@ UNDETERMINED
Placeholder gate whose type has not been set yet.
@ MULIN
Multivalued-input gate (one of several options).
gate_t
Strongly-typed gate identifier.
Definition Circuit.h:49
Out-of-line template method implementations for Circuit<gateType>.
std::vector< double > prob
Per-gate probability (for IN gates).
virtual ~BooleanCircuit()
double WeightMC(gate_t g, std::string opt) const
Compute the probability using the weightmc model counter.
bool evaluate(gate_t g, const std::unordered_set< gate_t > &sampled) const
Evaluate the sub-circuit at g on one sampled world.
dDNNF interpretAsDD(gate_t g) const
Build a dDNNF directly from the Boolean circuit's structure.
double possibleWorlds(gate_t g) const
Compute the probability by exact enumeration of all possible worlds.
bool probabilistic
true if any gate has a non-unit probability
void setProb(gate_t g, double p)
Set the probability for gate g and mark the circuit as probabilistic.
std::set< gate_t > inputs
Set of IN (input) gate IDs.
void rewriteMultivaluedGatesRec(const std::vector< gate_t > &muls, const std::vector< double > &cumulated_probs, unsigned start, unsigned end, std::vector< gate_t > &prefix)
Recursive helper for rewriteMultivaluedGates().
double independentEvaluationInternal(gate_t g, std::set< gate_t > &seen) const
Recursive helper for independentEvaluation().
std::string exportCircuit(gate_t g) const
Export the circuit in the textual format expected by external compilers.
const std::set< gate_t > & getInputs() const
Return the set of input (IN) gate IDs.
std::string Tseytin(gate_t g, bool display_prob) const
Generate a Tseytin transformation of the sub-circuit at g.
std::string toStringHelper(gate_t g, BooleanGate parent, const std::unordered_map< gate_t, std::string > *labels) const
Internal recursive helper for the two toString() variants.
dDNNF makeDD(gate_t g, const std::string &method, const std::string &args) const
Dispatch to the appropriate d-DNNF construction method.
gate_t setGate(BooleanGate type) override
Allocate a new gate with type type and no UUID.
unsigned getInfo(gate_t g) const
Return the integer annotation for gate g.
bool hasMultivaluedGates() const
Return true if the circuit contains any MULIN gates.
friend class dDNNFTreeDecompositionBuilder
gate_t addGate() override
Allocate a new gate with a default-initialised type.
double monteCarlo(gate_t g, unsigned samples) const
Estimate the probability via Monte Carlo sampling.
void rewriteMultivaluedGates()
Rewrite all MULVAR/MULIN gate clusters into standard AND/OR/NOT circuits.
double getProb(gate_t g) const
Return the probability stored for gate g.
void setInfo(gate_t g, unsigned info)
Store an integer annotation on gate g.
virtual std::string toString(gate_t g) const override
Return a textual description of gate g for debugging.
BooleanCircuit()
Construct an empty Boolean circuit.
void serialize(Archive &ar, const unsigned int version)
Boost serialisation support.
std::map< gate_t, unsigned > info
Per-gate integer info (for MULIN gates).
friend class boost::serialization::access
dDNNF compilation(gate_t g, std::string compiler) const
Compile the sub-circuit rooted at g to a dDNNF via an external tool.
bool isProbabilistic() const
Return true if any gate has a non-trivial (< 1) probability.
gate_t interpretAsDDInternal(gate_t g, std::set< gate_t > &seen, dDNNF &dd) const
Recursive helper for interpretAsDD().
double independentEvaluation(gate_t g) const
Compute the probability exactly when inputs are independent.
std::set< gate_t > mulinputs
Set of MULVAR gate IDs.
Generic template base class for provenance circuits.
Definition Circuit.h:62
std::string uuid
Definition Circuit.h:65
std::unordered_map< gate_t, uuid > id2uuid
Definition Circuit.h:69
std::unordered_map< uuid, gate_t > uuid2id
Definition Circuit.h:68
std::vector< BooleanGate > gates
Definition Circuit.h:71
std::vector< std::vector< gate_t > > wires
Definition Circuit.h:72
A d-DNNF circuit supporting exact probabilistic and game-theoretic evaluation.
Definition dDNNF.h:69