ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
MonteCarloSampler.cpp
Go to the documentation of this file.
1/**
2 * @file MonteCarloSampler.cpp
3 * @brief Implementation of the RV-aware Monte Carlo sampler.
4 */
5#include "MonteCarloSampler.h"
6#include "Aggregation.h"
7#include "RandomVariable.h"
8#include "RangeCheck.h" // collectRvConstraints
9#include "Circuit.h"
10
11#include <algorithm>
12#include <cmath>
13#include <cstdint>
14#include <limits>
15#include <memory>
16#include <optional>
17#include <random>
18#include <stack>
19#include <stdexcept>
20#include <string>
21#include <unordered_map>
22#include <unordered_set>
23#include <variant>
24#include <vector>
25
26namespace provsql {
27
28namespace {
29
30/// Seed an mt19937_64 from the provsql.monte_carlo_seed GUC.
31std::mt19937_64 seedRng()
32{
33 std::mt19937_64 rng;
34 if(provsql_monte_carlo_seed != -1) {
35 rng.seed(static_cast<uint64_t>(provsql_monte_carlo_seed));
36 } else {
37 std::random_device rd;
38 rng.seed((static_cast<uint64_t>(rd()) << 32) | rd());
39 }
40 return rng;
41}
42
43bool applyCmp(double l, ComparisonOperator op, double r)
44{
45 // IEEE 754 semantics: any comparison involving NaN is false except !=.
46 switch(op) {
47 case ComparisonOperator::LT: return l < r;
48 case ComparisonOperator::LE: return l <= r;
49 case ComparisonOperator::EQ: return l == r;
50 case ComparisonOperator::NE: return l != r;
51 case ComparisonOperator::GE: return l >= r;
52 case ComparisonOperator::GT: return l > r;
53 }
54 return false;
55}
56
57/// Per-iteration sampler state shared between the Boolean and scalar
58/// recursions.
59class Sampler {
60public:
61 Sampler(const GenericCircuit &gc, std::mt19937_64 &rng)
62 : gc_(gc), rng_(rng) {}
63
64 /// Reset per-iteration memo caches.
65 void resetIteration() {
66 bool_cache_.clear();
67 scalar_cache_.clear();
68 }
69
70 bool evalBool(gate_t g);
71 double evalScalar(gate_t g);
72
73private:
74 const GenericCircuit &gc_;
75 std::mt19937_64 &rng_;
76 std::unordered_map<gate_t, bool> bool_cache_;
77 std::unordered_map<gate_t, double> scalar_cache_;
78};
79
80bool Sampler::evalBool(gate_t g)
81{
82 auto it = bool_cache_.find(g);
83 if(it != bool_cache_.end()) return it->second;
84
85 bool result = false;
86 const auto type = gc_.getGateType(g);
87 const auto &wires = gc_.getWires(g);
88
89 switch(type) {
90 case gate_input:
91 case gate_update:
92 {
93 std::uniform_real_distribution<double> u(0.0, 1.0);
94 result = u(rng_) < gc_.getProb(g);
95 break;
96 }
97 case gate_plus:
98 result = false;
99 for(gate_t c : wires) {
100 if(evalBool(c)) { result = true; break; }
101 }
102 break;
103 case gate_times:
104 result = true;
105 for(gate_t c : wires) {
106 if(!evalBool(c)) { result = false; break; }
107 }
108 break;
109 case gate_monus:
110 if(wires.size() != 2)
111 throw CircuitException("gate_monus must have exactly two children");
112 result = evalBool(wires[0]) && !evalBool(wires[1]);
113 break;
114 case gate_zero:
115 result = false;
116 break;
117 case gate_one:
118 result = true;
119 break;
120 case gate_cmp:
121 {
122 if(wires.size() != 2)
123 throw CircuitException("gate_cmp must have exactly two children");
124 bool ok;
125 ComparisonOperator op = cmpOpFromOid(gc_.getInfos(g).first, ok);
126 if(!ok)
127 throw CircuitException(
128 "gate_cmp: unsupported operator OID " +
129 std::to_string(gc_.getInfos(g).first));
130 double l = evalScalar(wires[0]);
131 double r = evalScalar(wires[1]);
132 result = applyCmp(l, op, r);
133 break;
134 }
135 case gate_mulinput:
136 throw CircuitException(
137 "Monte Carlo over circuits containing gate_mulinput "
138 "is not yet supported on the RV path");
139 case gate_delta:
140 // δ-semiring operator: identity on the Boolean semiring, so the
141 // sampled truth value is just the wrapped child's. Showed up
142 // when conditioning on a row's provenance() in an aggregate
143 // query (HAVING / GROUP BY paths can splice δ over the
144 // semimod's k-side).
145 if(wires.size() != 1)
146 throw CircuitException("gate_delta must have exactly one child");
147 result = evalBool(wires[0]);
148 break;
150 // Structural Boolean-rewrite marker: identity on the Boolean
151 // semiring, so the sampled truth value is the wrapped child's.
152 // The marker exists to refuse non-Boolean-compat evaluation; MC
153 // sampling for probability is always Boolean-compat.
154 if(wires.size() != 1)
155 throw CircuitException(
156 "gate_assumed_boolean must have exactly one child");
157 result = evalBool(wires[0]);
158 break;
159 default:
160 throw CircuitException(
161 "Unsupported gate type in Boolean evaluation: " +
162 std::string(gate_type_name[type]));
163 }
164
165 bool_cache_[g] = result;
166 return result;
167}
168
169double Sampler::evalScalar(gate_t g)
170{
171 auto it = scalar_cache_.find(g);
172 if(it != scalar_cache_.end()) return it->second;
173
174 double result = 0.0;
175 const auto type = gc_.getGateType(g);
176 const auto &wires = gc_.getWires(g);
177
178 switch(type) {
179 case gate_value:
180 result = parseDoubleStrict(gc_.getExtra(g));
181 break;
182 case gate_rv:
183 {
184 auto spec = parse_distribution_spec(gc_.getExtra(g));
185 if(!spec)
186 throw CircuitException(
187 "Malformed gate_rv extra: " + gc_.getExtra(g));
188 switch(spec->kind) {
189 case DistKind::Normal: {
190 std::normal_distribution<double> d(spec->p1, spec->p2);
191 result = d(rng_);
192 break;
193 }
194 case DistKind::Uniform: {
195 std::uniform_real_distribution<double> d(spec->p1, spec->p2);
196 result = d(rng_);
197 break;
198 }
199 case DistKind::Exponential: {
200 std::exponential_distribution<double> d(spec->p1);
201 result = d(rng_);
202 break;
203 }
204 case DistKind::Erlang: {
205 /* Gamma(shape, scale) with integer shape k and scale 1/λ
206 * samples Erlang(k, λ) directly. std::gamma_distribution
207 * uses the rate's inverse as its scale parameter. */
208 std::gamma_distribution<double> d(spec->p1, 1.0 / spec->p2);
209 result = d(rng_);
210 break;
211 }
212 }
213 break;
214 }
215 case gate_arith:
216 {
217 if(wires.empty())
218 throw CircuitException("gate_arith must have at least one child");
219 auto op = static_cast<provsql_arith_op>(gc_.getInfos(g).first);
220 switch(op) {
222 result = 0.0;
223 for(gate_t c : wires) result += evalScalar(c);
224 break;
226 result = 1.0;
227 for(gate_t c : wires) result *= evalScalar(c);
228 break;
230 if(wires.size() != 2)
231 throw CircuitException("gate_arith MINUS must be binary");
232 result = evalScalar(wires[0]) - evalScalar(wires[1]);
233 break;
235 if(wires.size() != 2)
236 throw CircuitException("gate_arith DIV must be binary");
237 result = evalScalar(wires[0]) / evalScalar(wires[1]);
238 break;
240 if(wires.size() != 1)
241 throw CircuitException("gate_arith NEG must be unary");
242 result = -evalScalar(wires[0]);
243 break;
244 default:
245 throw CircuitException(
246 "Unknown gate_arith operator tag: " +
247 std::to_string(static_cast<unsigned>(op)));
248 }
249 break;
250 }
251 case gate_agg:
252 {
253 // HAVING-style aggregate evaluated per MC iteration: walk the
254 // gate_semimod children, keep the rows whose k_gate fires in
255 // this world, push their value into a reusable Aggregator,
256 // return the finalised scalar. Closes the priority-4-era gap
257 // that made `WHERE rv > 0 GROUP BY x HAVING count(*) > 1`
258 // structural-only (see continuous_selection.sql section G).
259 //
260 // Type plan: we evaluate every numeric path in float8 to stay
261 // inside evalScalar's return type. COUNT is normalised by
262 // makeAggregator to SumAgg<long>, so we feed it 1L per kept row
263 // without evaluating the gate_one value child; SUM / AVG / MIN /
264 // MAX consume the value via evalScalar. Empty groups finalise
265 // to NONE; we surface NaN, which compares false under IEEE on
266 // any subsequent gate_cmp -- the SQL convention for HAVING on
267 // an empty group.
269 getAggregationOperator(gc_.getInfos(g).first);
270 std::unique_ptr<Aggregator> agg =
274 if(!agg)
275 throw CircuitException(
276 "gate_agg: makeAggregator returned null for op " +
277 std::to_string(static_cast<int>(op)));
278 for(gate_t child : wires) {
279 if(gc_.getGateType(child) != gate_semimod) continue;
280 const auto &sm = gc_.getWires(child);
281 if(sm.size() != 2) continue;
282 if(!evalBool(sm[0])) continue;
284 agg->add(AggValue(1L));
285 } else {
286 agg->add(AggValue(evalScalar(sm[1])));
287 }
288 }
289 AggValue r = agg->finalize();
290 switch(r.getType()) {
291 case ValueType::INT:
292 result = static_cast<double>(std::get<long>(r.v));
293 break;
294 case ValueType::FLOAT:
295 result = std::get<double>(r.v);
296 break;
297 case ValueType::NONE:
298 result = std::numeric_limits<double>::quiet_NaN();
299 break;
300 default:
301 throw CircuitException(
302 "gate_agg: unsupported aggregate result ValueType in MC");
303 }
304 break;
305 }
306 case gate_mixture:
307 {
308 // Two shapes of gate_mixture share this case:
309 //
310 // - Classic 3-wire: [p_token, x_token, y_token]. Draw the
311 // Bernoulli via evalBool, which handles gate_input by
312 // sampling uniform(0,1) < get_prob and memoises on
313 // bool_cache_; two mixtures sharing the same p_token
314 // therefore see the same draw, and any unrelated Boolean
315 // parent of p_token stays in sync.
316 //
317 // - Categorical N-wire: [key, mul_1, ..., mul_n]. Built
318 // directly by the @c provsql.categorical SQL constructor;
319 // each mul_i carries its probability in set_prob and its
320 // outcome value in extra.
321 // We draw a single uniform[0,1) per block, walk the
322 // cumulative probabilities to pick a mulinput, and stash the
323 // Boolean truth values into bool_cache_ so any downstream
324 // Boolean consumer of the mulinputs (independentEvaluation,
325 // OR/AND parents) sees a consistent sampled outcome.
326 if(gc_.isCategoricalMixture(g)) {
327 std::uniform_real_distribution<double> u(0.0, 1.0);
328 const double r = u(rng_);
329 double cum = 0.0;
330 // Default to the last mulinput in case floating-point cumulative
331 // sums leave us shy of 1.0 by a few ULPs.
332 std::size_t chosen = wires.size() - 1;
333 for(std::size_t i = 1; i < wires.size(); ++i) {
334 cum += gc_.getProb(wires[i]);
335 if(r < cum) { chosen = i; break; }
336 }
337 for(std::size_t i = 1; i < wires.size(); ++i) {
338 bool_cache_[wires[i]] = (i == chosen);
339 }
340 result = parseDoubleStrict(gc_.getExtra(wires[chosen]));
341 break;
342 }
343 if(wires.size() != 3)
344 throw CircuitException(
345 "gate_mixture must have exactly three children "
346 "[p_token, x_token, y_token]");
347 result = evalBool(wires[0]) ? evalScalar(wires[1])
348 : evalScalar(wires[2]);
349 break;
350 }
351 default:
352 throw CircuitException(
353 "Unsupported gate type in scalar evaluation: " +
354 std::string(gate_type_name[type]));
355 }
356
357 scalar_cache_[g] = result;
358 return result;
359}
360
361} // namespace
362
363double monteCarloRV(const GenericCircuit &gc, gate_t root, unsigned samples)
364{
365 std::mt19937_64 rng = seedRng();
366 Sampler sampler(gc, rng);
367
368 unsigned success = 0;
369 for(unsigned i = 0; i < samples; ++i) {
370 sampler.resetIteration();
371 if(sampler.evalBool(root))
372 ++success;
373
375 throw CircuitException(
376 "Interrupted after " + std::to_string(i + 1) + " samples");
377 }
378 return success * 1.0 / samples;
379}
380
381std::vector<double> monteCarloJointDistribution(
382 const GenericCircuit &gc,
383 const std::vector<gate_t> &cmps,
384 unsigned samples)
385{
386 const unsigned k = cmps.size();
387 if (k == 0)
388 throw CircuitException(
389 "monteCarloJointDistribution: empty cmps list");
390 if (k > 30)
391 throw CircuitException(
392 "monteCarloJointDistribution: too many cmps in island ("
393 + std::to_string(k) + " > 30)");
394
395 std::mt19937_64 rng = seedRng();
396 Sampler sampler(gc, rng);
397
398 const std::size_t nb_outcomes = std::size_t{1} << k;
399 std::vector<unsigned> counts(nb_outcomes, 0);
400
401 for (unsigned i = 0; i < samples; ++i) {
402 sampler.resetIteration();
403 std::size_t w = 0;
404 for (unsigned j = 0; j < k; ++j) {
405 if (sampler.evalBool(cmps[j])) w |= (std::size_t{1} << j);
406 }
407 ++counts[w];
409 throw CircuitException(
410 "Interrupted after " + std::to_string(i + 1) + " samples");
411 }
412
413 std::vector<double> probs(nb_outcomes);
414 for (std::size_t w = 0; w < nb_outcomes; ++w)
415 probs[w] = counts[w] * 1.0 / samples;
416 return probs;
417}
418
419std::vector<double> monteCarloScalarSamples(
420 const GenericCircuit &gc, gate_t root, unsigned samples)
421{
422 std::mt19937_64 rng = seedRng();
423 Sampler sampler(gc, rng);
424
425 std::vector<double> out;
426 out.reserve(samples);
427 for(unsigned i = 0; i < samples; ++i) {
428 sampler.resetIteration();
429 out.push_back(sampler.evalScalar(root));
430
432 throw CircuitException(
433 "Interrupted after " + std::to_string(i + 1) + " samples");
434 }
435 return out;
436}
437
439 const GenericCircuit &gc, gate_t root, gate_t event_root, unsigned samples)
440{
441 std::mt19937_64 rng = seedRng();
442 Sampler sampler(gc, rng);
443
445 out.attempted = 0;
446 out.accepted.reserve(samples);
447
448 for(unsigned i = 0; i < samples; ++i) {
449 sampler.resetIteration();
450 /* Evaluate the indicator FIRST: this populates bool_cache_ AND
451 * scalar_cache_ for every gate_rv / gate_input that the event
452 * touches, so the subsequent evalScalar(root) reads the same
453 * draws. Shared gate_t leaves between root and event_root are
454 * therefore correctly coupled across the indicator and the
455 * value. */
456 if(sampler.evalBool(event_root)) {
457 out.accepted.push_back(sampler.evalScalar(root));
458 }
459 ++out.attempted;
460
462 throw CircuitException(
463 "Interrupted after " + std::to_string(i + 1) + " samples");
464 }
465 return out;
466}
467
468namespace {
469
470/**
471 * @brief Inverse standard-normal CDF, Beasley-Springer-Moro (1995).
472 *
473 * Returns @c z such that @f$\Phi(z) = p@f$. Accurate to about
474 * @c 1e-7 over @c p ∈ [0.02425, 1 - 0.02425], with a tail
475 * rational fallback for the rest of @c (0, 1). Callers must clamp
476 * @c p strictly inside @c (0, 1) since the function diverges at the
477 * endpoints; the truncated-normal sampler below clamps to
478 * @c [1e-15, 1 - 1e-15] before each call.
479 *
480 * Used by @c try_truncated_closed_form_sample to invert the normal
481 * CDF for inverse-CDF transform sampling. The Beasley-Springer-Moro
482 * routine is in widespread library use (NumPy/SciPy 'norminv', etc.)
483 * and its accuracy is several orders of magnitude tighter than the
484 * sampling noise the tests can detect at 10k draws, so it's a
485 * comfortable margin.
486 */
487double inv_phi(double p)
488{
489 static const double a[] = {
490 -3.969683028665376e+01, 2.209460984245205e+02,
491 -2.759285104469687e+02, 1.383577518672690e+02,
492 -3.066479806614716e+01, 2.506628277459239e+00
493 };
494 static const double b[] = {
495 -5.447609879822406e+01, 1.615858368580409e+02,
496 -1.556989798598866e+02, 6.680131188771972e+01,
497 -1.328068155288572e+01
498 };
499 static const double c_arr[] = {
500 -7.784894002430293e-03, -3.223964580411365e-01,
501 -2.400758277161838e+00, -2.549732539343734e+00,
502 4.374664141464968e+00, 2.938163982698783e+00
503 };
504 static const double d[] = {
505 7.784695709041462e-03, 3.224671290700398e-01,
506 2.445134137142996e+00, 3.754408661907416e+00
507 };
508 static const double p_low = 0.02425;
509 static const double p_high = 1.0 - p_low;
510
511 if (p < p_low) {
512 const double q = std::sqrt(-2.0 * std::log(p));
513 return (((((c_arr[0]*q + c_arr[1])*q + c_arr[2])*q
514 + c_arr[3])*q + c_arr[4])*q + c_arr[5])
515 / ((((d[0]*q + d[1])*q + d[2])*q + d[3])*q + 1.0);
516 }
517 if (p <= p_high) {
518 const double q = p - 0.5;
519 const double r = q * q;
520 return (((((a[0]*r + a[1])*r + a[2])*r + a[3])*r + a[4])*r + a[5]) * q
521 / (((((b[0]*r + b[1])*r + b[2])*r + b[3])*r + b[4])*r + 1.0);
522 }
523 const double q = std::sqrt(-2.0 * std::log(1.0 - p));
524 return -(((((c_arr[0]*q + c_arr[1])*q + c_arr[2])*q
525 + c_arr[3])*q + c_arr[4])*q + c_arr[5])
526 / ((((d[0]*q + d[1])*q + d[2])*q + d[3])*q + 1.0);
527}
528
529} // namespace
530
531std::optional<std::vector<double>>
533 gate_t event_root, unsigned n)
534{
535 auto m = matchTruncatedSingleRv(gc, root, event_root);
536 if (!m) return std::nullopt;
537 const DistributionSpec &spec = m->spec;
538 const double lo = m->lo, hi = m->hi;
539
540 std::mt19937_64 rng = seedRng();
541 std::uniform_real_distribution<double> U01(0.0, 1.0);
542
543 std::vector<double> out;
544 out.reserve(n);
545
546 switch (spec.kind) {
547 case DistKind::Uniform: {
548 /* @c matchTruncatedSingleRv already intersected the event's
549 * interval with the RV's natural [a, b] support, so a plain
550 * uniform draw on [lo, hi] is the conditional distribution. */
551 std::uniform_real_distribution<double> U(lo, hi);
552 for (unsigned i = 0; i < n; ++i) out.push_back(U(rng));
553 return out;
554 }
556 const double lambda = spec.p1;
557 if (!(lambda > 0.0)) return std::nullopt;
558 if (std::isinf(hi)) {
559 /* X | X > lo = lo + Exp(λ) by memorylessness. Numerically
560 * stable for arbitrarily large @c lo, where the inverse-CDF
561 * would underflow on @c 1 - exp(-λ·lo). */
562 std::exponential_distribution<double> E(lambda);
563 for (unsigned i = 0; i < n; ++i) out.push_back(lo + E(rng));
564 return out;
565 }
566 /* Two-sided truncation: inverse-CDF on @c [F(lo), F(hi)] with
567 * @c F(x) = -expm1(-λx) for accuracy near 0, and @c x =
568 * -log1p(-u)/λ for accuracy as @c u approaches 1. */
569 const double F_lo = -std::expm1(-lambda * lo);
570 const double F_hi = -std::expm1(-lambda * hi);
571 if (!(F_lo < F_hi)) return std::nullopt;
572 for (unsigned i = 0; i < n; ++i) {
573 const double u = F_lo + U01(rng) * (F_hi - F_lo);
574 out.push_back(-std::log1p(-u) / lambda);
575 }
576 return out;
577 }
578 case DistKind::Normal: {
579 const double mu = spec.p1;
580 const double sigma = spec.p2;
581 if (!(sigma > 0.0)) return std::nullopt;
582 const double sqrt2 = std::sqrt(2.0);
583 const double alpha = (lo - mu) / sigma;
584 const double beta = (hi - mu) / sigma;
585 const double Phi_a = std::isfinite(alpha)
586 ? 0.5 * (1.0 + std::erf(alpha / sqrt2))
587 : (alpha < 0 ? 0.0 : 1.0);
588 const double Phi_b = std::isfinite(beta)
589 ? 0.5 * (1.0 + std::erf(beta / sqrt2))
590 : (beta < 0 ? 0.0 : 1.0);
591 if (!(Phi_a < Phi_b)) return std::nullopt;
592 /* Clamp the target probability strictly inside (0, 1) so
593 * @c inv_phi does not diverge near the asymptotes. The 1e-15
594 * margin is well below the BSM approximation's intrinsic
595 * accuracy floor (~1e-7), so it's a safe sentinel. */
596 static constexpr double EPS = 1e-15;
597 for (unsigned i = 0; i < n; ++i) {
598 double u = Phi_a + U01(rng) * (Phi_b - Phi_a);
599 if (u < EPS) u = EPS;
600 if (u > 1.0 - EPS) u = 1.0 - EPS;
601 const double z = inv_phi(u);
602 out.push_back(mu + sigma * z);
603 }
604 return out;
605 }
606 case DistKind::Erlang:
607 /* Truncated Erlang requires the regularised lower incomplete
608 * gamma's inverse. Out of scope for v1; MC fallback handles it. */
609 return std::nullopt;
610 }
611 return std::nullopt;
612}
613
614bool circuitHasRV(const GenericCircuit &gc, gate_t root)
615{
616 std::unordered_set<gate_t> seen;
617 std::stack<gate_t> stack;
618 stack.push(root);
619 while(!stack.empty()) {
620 gate_t g = stack.top();
621 stack.pop();
622 if(!seen.insert(g).second) continue;
623 auto type = gc.getGateType(g);
624 // gate_arith too: it's exclusively a continuous-arithmetic
625 // composite, the BoolExpr semiring path has no rule for it.
626 // gate_mixture is structurally a compound RV root as well.
627 if(type == gate_rv || type == gate_arith || type == gate_mixture)
628 return true;
629 for(gate_t c : gc.getWires(g)) stack.push(c);
630 }
631 return false;
632}
633
634} // namespace provsql
ComparisonOperator cmpOpFromOid(Oid op_oid, bool &ok)
Map a PostgreSQL comparison-operator OID to a ComparisonOperator.
AggregationOperator getAggregationOperator(Oid oid)
Map a PostgreSQL aggregate function OID to an AggregationOperator.
std::unique_ptr< Aggregator > makeAggregator(AggregationOperator op, ValueType t)
Create a concrete Aggregator for the given operator and value type.
Typed aggregation value, operator, and aggregator abstractions.
AggregationOperator
SQL aggregation functions tracked by ProvSQL.
Definition Aggregation.h:50
@ COUNT
COUNT(*) or COUNT(expr) → integer.
Definition Aggregation.h:51
ComparisonOperator
SQL comparison operators used in gate_cmp circuit gates.
Definition Aggregation.h:38
@ LT
Less than (<).
Definition Aggregation.h:42
@ GT
Greater than (>).
Definition Aggregation.h:44
@ LE
Less than or equal (<=).
Definition Aggregation.h:41
@ NE
Not equal (<>).
Definition Aggregation.h:40
@ GE
Greater than or equal (>=).
Definition Aggregation.h:43
@ INT
Signed 64-bit integer.
Definition Aggregation.h:67
@ NONE
No value (NULL).
Definition Aggregation.h:75
@ FLOAT
Double-precision float.
Definition Aggregation.h:68
Generic directed-acyclic-graph circuit template and gate identifier.
gate_t
Strongly-typed gate identifier.
Definition Circuit.h:49
Monte Carlo sampling over a GenericCircuit, RV-aware.
Continuous random-variable helpers (distribution parsing, moments).
Support-based bound check for continuous-RV comparators.
Exception type thrown by circuit operations on invalid input.
Definition Circuit.h:206
std::vector< gate_t > & getWires(gate_t g)
Return a mutable reference to the child-wire list of gate g.
Definition Circuit.h:140
gateType getGateType(gate_t g) const
Return the type of gate g.
Definition Circuit.h:130
In-memory provenance circuit with semiring-generic evaluation.
bool isCategoricalMixture(gate_t g) const
Test whether g is a categorical-form gate_mixture (the explicit provsql.categorical output).
std::string getExtra(gate_t g) const
Return the string extra for gate g.
double getProb(gate_t g) const
Return the probability for gate g.
std::pair< unsigned, unsigned > getInfos(gate_t g) const
Return the integer annotation pair for gate g.
@ Normal
Normal (Gaussian): p1=μ, p2=σ
@ Exponential
Exponential: p1=λ, p2 unused.
@ Uniform
Uniform on [a,b]: p1=a, p2=b.
@ Erlang
Erlang: p1=k (positive integer), p2=λ.
double parseDoubleStrict(const std::string &s)
Strictly parse s as a double.
std::vector< double > monteCarloJointDistribution(const GenericCircuit &gc, const std::vector< gate_t > &cmps, unsigned samples)
Estimate the joint distribution of cmps via Monte Carlo.
ConditionalScalarSamples monteCarloConditionalScalarSamples(const GenericCircuit &gc, gate_t root, gate_t event_root, unsigned samples)
Rejection-sample root conditioned on event_root.
std::vector< double > monteCarloScalarSamples(const GenericCircuit &gc, gate_t root, unsigned samples)
Sample a scalar sub-circuit samples times and return the draws.
std::optional< DistributionSpec > parse_distribution_spec(const std::string &s)
Parse the on-disk text encoding of a gate_rv distribution.
double monteCarloRV(const GenericCircuit &gc, gate_t root, unsigned samples)
Run Monte Carlo on a circuit that may contain gate_rv leaves.
std::optional< std::vector< double > > try_truncated_closed_form_sample(const GenericCircuit &gc, gate_t root, gate_t event_root, unsigned n)
Try to draw n exact samples from the conditional distribution of root given event_root via closed-for...
std::optional< TruncatedSingleRv > matchTruncatedSingleRv(const GenericCircuit &gc, gate_t root, std::optional< gate_t > event_root)
Detect a closed-form, optionally-truncated single-RV shape.
bool circuitHasRV(const GenericCircuit &gc, gate_t root)
Walk the circuit reachable from root looking for any gate_rv.
int provsql_monte_carlo_seed
Seed for the Monte Carlo sampler; -1 means non-deterministic (std::random_device); controlled by the ...
Definition provsql.c:81
bool provsql_interrupted
Global variable that becomes true if this particular backend received an interrupt signal.
Definition provsql.c:74
const char * gate_type_name[]
Names of gate types.
provsql_arith_op
Arithmetic operator tags used by gate_arith.
@ PROVSQL_ARITH_DIV
binary, child0 / child1
@ PROVSQL_ARITH_PLUS
n-ary, sum of children
@ PROVSQL_ARITH_NEG
unary, -child0
@ PROVSQL_ARITH_MINUS
binary, child0 - child1
@ PROVSQL_ARITH_TIMES
n-ary, product of children
@ gate_rv
Continuous random-variable leaf (extra encodes distribution).
@ gate_assumed_boolean
Structural marker over a single child whose sub-circuit was computed under a Boolean-provenance assum...
@ gate_mixture
Probabilistic mixture: three wires [p_token (gate_input Bernoulli), x_token, y_token]; samples x when...
@ gate_arith
n-ary arithmetic gate over scalar-valued children (info1 holds operator tag)
ValueType getType() const
Return the runtime type tag of this value.
std::variant< long, double, bool, std::string, std::vector< long >, std::vector< double >, std::vector< bool >, std::vector< std::string > > v
The variant holding the actual value.
Definition Aggregation.h:92
Outcome of a conditional Monte Carlo sampling pass.
Parsed distribution spec (kind + up to two parameters).
double p2
Second parameter (σ or b; unused for Exponential).
double p1
First parameter (μ, a, or λ).