ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
Expectation.cpp
Go to the documentation of this file.
1/**
2 * @file Expectation.cpp
3 * @brief Implementation of the analytical expectation / variance / moment
4 * evaluator over scalar RV sub-circuits.
5 */
6#include "Expectation.h"
7
8#include "AnalyticEvaluator.h"
9#include "BooleanCircuit.h"
10#include "Circuit.h"
11#include "CircuitFromMMap.h"
12#include "MonteCarloSampler.h"
13#include "RandomVariable.h"
14#include "RangeCheck.h"
15#include "provsql_utils_cpp.h"
16#include "semiring/BoolExpr.h"
17
18extern "C" {
19#include "postgres.h"
20#include "fmgr.h"
21#include "utils/uuid.h"
22#include "provsql_utils.h"
23#include "provsql_error.h"
24
25PG_FUNCTION_INFO_V1(rv_moment);
26}
27
28#include <cmath>
29#include <set>
30#include <string>
31#include <unordered_map>
32#include <vector>
33
34namespace provsql {
35
37{
39 std::unordered_map<gate_t, gate_t> gc_to_bc;
40 for (gate_t u : gc.getInputs()) {
41 gc_to_bc[u] = c.setGate(gc.getUUID(u), BooleanGate::IN, gc.getProb(u));
42 }
43 for (size_t i = 0; i < gc.getNbGates(); ++i) {
44 auto u = static_cast<gate_t>(i);
45 if (gc.getGateType(u) == gate_mulinput) {
46 gc_to_bc[u] = c.setGate(gc.getUUID(u), BooleanGate::MULIN, gc.getProb(u));
47 c.setInfo(gc_to_bc[u], gc.getInfos(u).first);
48 c.addWire(gc_to_bc[u], gc_to_bc[gc.getWires(u)[0]]);
49 }
50 }
52 gate_t bcRoot = gc.evaluate(boolRoot, gc_to_bc, semiring);
53
54 try {
55 return c.independentEvaluation(bcRoot);
56 } catch (CircuitException &) {
57 if (provsql_rv_mc_samples <= 0) {
58 throw CircuitException(
59 "evaluateBooleanProbability: subcircuit could not be evaluated "
60 "independently and provsql.rv_mc_samples = 0 disables the "
61 "Monte Carlo fallback");
62 }
64 return c.monteCarlo(bcRoot,
65 static_cast<unsigned>(provsql_rv_mc_samples));
66 }
67}
68
69namespace {
70
71using RvSet = std::set<gate_t>;
72
73/// Mixing weight π = P(p = true) for a mixture's Bernoulli wire.
74/// For a bare @c gate_input, the probability is the leaf's pinned
75/// @c set_prob; for any compound Boolean gate, defer to
76/// @c evaluateBooleanProbability.
77double mixturePi(const GenericCircuit &gc, gate_t p)
78{
79 return (gc.getGateType(p) == gate_input)
80 ? gc.getProb(p)
82}
83
84/// Cache of the base-@c gate_rv UUID footprints reachable below each
85/// scalar gate, used as the structural-independence witness. Two
86/// children of an arithmetic gate are independent iff their footprints
87/// are disjoint -- and therefore the variance and TIMES expectation
88/// shortcuts apply.
89class FootprintCache {
90public:
91 explicit FootprintCache(const GenericCircuit &gc) : gc_(gc) {}
92
93 const RvSet &of(gate_t g) {
94 auto it = cache_.find(g);
95 if (it != cache_.end()) return it->second;
96 RvSet s;
97 auto type = gc_.getGateType(g);
98 if (type == gate_rv) {
99 s.insert(g);
100 } else if (type == gate_value) {
101 // empty -- no RV reached
102 } else if (type == gate_arith) {
103 for (gate_t c : gc_.getWires(g)) {
104 const auto &cs = of(c);
105 s.insert(cs.begin(), cs.end());
106 }
107 } else if (type == gate_mixture) {
108 const auto &wires = gc_.getWires(g);
109 if (gc_.isCategoricalMixture(g)) {
110 // Categorical-form mixture. Footprint = union of the
111 // mulinputs' footprints (each contributes {self, key} via the
112 // mulinput branch below), so two categoricals sharing a key
113 // overlap on it and are correctly flagged dependent.
114 for (std::size_t i = 1; i < wires.size(); ++i) {
115 const auto &fm = of(wires[i]);
116 s.insert(fm.begin(), fm.end());
117 }
118 } else if (wires.size() == 3) {
119 // Classic 3-wire mixture. Footprint = footprint(p) ∪
120 // footprint(x) ∪ footprint(y). The Boolean wire is included
121 // as a discrete random source so two mixtures whose p's share
122 // an atom are correctly recognised as dependent (their branch
123 // selection is correlated), bypassing the closed-form
124 // independence shortcut. Recursing into wires[0] (rather than
125 // inserting its gate_t directly) generalises that recognition
126 // from bare-input Bernoullis to compound Boolean wires.
127 const auto &fp = of(wires[0]);
128 s.insert(fp.begin(), fp.end());
129 const auto &fx = of(wires[1]);
130 s.insert(fx.begin(), fx.end());
131 const auto &fy = of(wires[2]);
132 s.insert(fy.begin(), fy.end());
133 }
134 } else if (type == gate_mulinput) {
135 // A mulinput is a state-carrying atom (so its own gate_t is in
136 // its footprint -- two mulinputs of the same group are distinct
137 // atoms even though they share a key) *and* references a shared
138 // key gate at wires[0] (whose footprint is added so the shared
139 // key makes the two mulinputs overlap on it, flagging them as
140 // dependent for pairwise_disjoint).
141 s.insert(g);
142 const auto &wires = gc_.getWires(g);
143 if (!wires.empty()) {
144 const auto &fk = of(wires[0]);
145 s.insert(fk.begin(), fk.end());
146 }
147 } else if (type == gate_input) {
148 // Atomic Boolean leaf. Use the gate's own UUID as its footprint
149 // so two Boolean expressions sharing this input collide on it.
150 s.insert(g);
151 } else if (type == gate_plus || type == gate_times || type == gate_monus
152 || type == gate_project || type == gate_eq
153 || type == gate_cmp || type == gate_update) {
154 // Boolean gates: footprint is the union of children's footprints.
155 // Lets a compound `p` wire feeding a mixture propagate its atom
156 // dependencies to FootprintCache for the disjoint-children
157 // shortcuts in rec_expectation / rec_variance / rec_raw_moment.
158 for (gate_t c : gc_.getWires(g)) {
159 const auto &cs = of(c);
160 s.insert(cs.begin(), cs.end());
161 }
162 } else if (type == gate_zero || type == gate_one) {
163 // Empty footprint -- a constant-true / constant-false Boolean
164 // contributes no shared atoms.
165 } else {
166 // Unknown scalar gate type: return an empty footprint. Callers
167 // will trip the analytical-decomposition switch and route the
168 // gate to the MC fallback (or raise if the fallback is disabled),
169 // which is the right behaviour for any unanticipated leaf shape.
170 }
171 return cache_.emplace(g, std::move(s)).first->second;
172 }
173
174private:
175 const GenericCircuit &gc_;
176 std::unordered_map<gate_t, RvSet> cache_;
177};
178
179bool pairwise_disjoint(FootprintCache &fp, const std::vector<gate_t> &children)
180{
181 RvSet seen;
182 for (gate_t c : children) {
183 const auto &fpc = fp.of(c);
184 for (gate_t r : fpc) {
185 if (!seen.insert(r).second) return false;
186 }
187 }
188 return true;
189}
190
191unsigned mc_samples_or_throw(const std::string &what)
192{
193 const int n = provsql_rv_mc_samples;
194 if (n <= 0) {
195 throw CircuitException(
196 what + " could not be decomposed analytically and "
197 "provsql.rv_mc_samples = 0 disables the Monte Carlo fallback");
198 }
199 return static_cast<unsigned>(n);
200}
201
202double mc_raw_moment(const GenericCircuit &gc, gate_t g, unsigned k,
203 const std::string &what)
204{
205 auto samples = monteCarloScalarSamples(gc, g, mc_samples_or_throw(what));
206 if (samples.empty()) return 0.0;
207 double total = 0.0;
208 for (double x : samples) total += std::pow(x, static_cast<double>(k));
209 return total / static_cast<double>(samples.size());
210}
211
212double mc_central_moment(const GenericCircuit &gc, gate_t g, unsigned k,
213 double mu, const std::string &what)
214{
215 auto samples = monteCarloScalarSamples(gc, g, mc_samples_or_throw(what));
216 if (samples.empty()) return 0.0;
217 double total = 0.0;
218 for (double x : samples) {
219 const double d = x - mu;
220 total += std::pow(d, static_cast<double>(k));
221 }
222 return total / static_cast<double>(samples.size());
223}
224
225/// Minimum accepted-sample count for conditional MC moments. Below
226/// this floor we'd be reporting a moment from a handful of accepted
227/// draws and the variance of the estimator would be enormous; raise
228/// rather than silently return a noisy number.
229unsigned min_accepted_floor(unsigned attempted)
230{
231 unsigned floor = attempted / 1000;
232 return floor < 5 ? 5 : floor;
233}
234
235void check_acceptance_or_throw(const ConditionalScalarSamples &cs,
236 const std::string &what)
237{
238 if (cs.accepted.empty()) {
239 /* 0-of-N accepted is the unmistakable signature of an infeasible
240 * conditioning event: raising rv_mc_samples cannot help (the
241 * acceptance probability is exactly 0). Surface that directly
242 * rather than the generic "raise samples or check satisfiability"
243 * advice that applies to merely under-sampled events. */
244 throw CircuitException(
245 what + ": conditioning event is infeasible (0 of " +
246 std::to_string(cs.attempted) +
247 " Monte Carlo samples satisfied it)");
248 }
249 const unsigned floor = min_accepted_floor(cs.attempted);
250 if (cs.accepted.size() < floor) {
251 throw CircuitException(
252 what + ": conditional MC accepted only " +
253 std::to_string(cs.accepted.size()) + " out of " +
254 std::to_string(cs.attempted) +
255 " samples (need >= " + std::to_string(floor) +
256 "); raise provsql.rv_mc_samples or tighten the event.");
257 }
258}
259
260double mc_conditional_raw_moment(const GenericCircuit &gc, gate_t g,
261 unsigned k, gate_t event_root,
262 const std::string &what)
263{
265 gc, g, event_root, mc_samples_or_throw(what));
266 check_acceptance_or_throw(cs, what);
267 double total = 0.0;
268 for (double x : cs.accepted) total += std::pow(x, static_cast<double>(k));
269 return total / static_cast<double>(cs.accepted.size());
270}
271
272double mc_conditional_central_moment(const GenericCircuit &gc, gate_t g,
273 unsigned k, double mu,
274 gate_t event_root,
275 const std::string &what)
276{
278 gc, g, event_root, mc_samples_or_throw(what));
279 check_acceptance_or_throw(cs, what);
280 double total = 0.0;
281 for (double x : cs.accepted) {
282 const double d = x - mu;
283 total += std::pow(d, static_cast<double>(k));
284 }
285 return total / static_cast<double>(cs.accepted.size());
286}
287
288double binomial(unsigned n, unsigned k)
289{
290 if (k > n) return 0.0;
291 if (k > n - k) k = n - k;
292 double r = 1.0;
293 for (unsigned i = 1; i <= k; ++i) {
294 r *= static_cast<double>(n - i + 1);
295 r /= static_cast<double>(i);
296 }
297 return r;
298}
299
300double rec_expectation(const GenericCircuit &gc, gate_t g, FootprintCache &fp);
301double rec_variance(const GenericCircuit &gc, gate_t g, FootprintCache &fp);
302double rec_raw_moment(const GenericCircuit &gc, gate_t g, unsigned k,
303 FootprintCache &fp);
304
305/// Standard normal pdf φ(z) = exp(-z²/2)/√(2π).
306double phi(double z)
307{
308 static const double INV_SQRT_2PI = 1.0 / std::sqrt(2.0 * M_PI);
309 return INV_SQRT_2PI * std::exp(-0.5 * z * z);
310}
311
312/// Standard normal CDF Φ(z) = ½(1 + erf(z/√2)). Mirrors the
313/// AnalyticEvaluator::cdfAt Normal branch so the truncation formulas
314/// here use the same numerical convention.
315double Phi(double z)
316{
317 static const double SQRT2 = std::sqrt(2.0);
318 return 0.5 * (1.0 + std::erf(z / SQRT2));
319}
320
321/**
322 * @brief Raw moments of @c X ~ Normal(μ, σ) truncated to @c [a, b].
323 *
324 * Closed form via the integration-by-parts recurrence on the
325 * standardised variable Z = (X - μ)/σ:
326 * E[Z^{k}|α<Z<β] = (k-1) E[Z^{k-2}|α<Z<β]
327 * + (α^{k-1}φ(α) − β^{k-1}φ(β)) / (Φ(β) − Φ(α))
328 * with E[Z^0|…] = 1 and E[Z^1|…] = (φ(α) − φ(β)) / (Φ(β) − Φ(α))
329 * (Greene, "Econometric Analysis", 5e, App. F). Then expand
330 * E[X^k] = E[(μ + σZ)^k] binomially.
331 *
332 * @c α = -∞ corresponds to @p a = -INFINITY (semi-infinite left tail);
333 * @c β = +∞ to @p b = +INFINITY. Returns @c NaN if @c P(α<Z<β) is
334 * below a numerical floor (so the caller falls through to MC).
335 */
336double truncated_normal_raw_moment(double mu, double sigma, double a, double b,
337 unsigned k)
338{
339 const double alpha = std::isfinite(a) ? (a - mu) / sigma
340 : -std::numeric_limits<double>::infinity();
341 const double beta = std::isfinite(b) ? (b - mu) / sigma
342 : +std::numeric_limits<double>::infinity();
343 const double Phi_alpha = std::isfinite(alpha) ? Phi(alpha) : 0.0;
344 const double Phi_beta = std::isfinite(beta) ? Phi(beta) : 1.0;
345 const double Z = Phi_beta - Phi_alpha;
346 if (Z < 1e-12) return std::numeric_limits<double>::quiet_NaN();
347
348 const double phi_alpha = std::isfinite(alpha) ? phi(alpha) : 0.0;
349 const double phi_beta = std::isfinite(beta) ? phi(beta) : 0.0;
350
351 /* E[Z^k | α<Z<β] via recurrence; store all moments up to k. */
352 std::vector<double> M(k + 1, 0.0);
353 M[0] = 1.0;
354 if (k >= 1) M[1] = (phi_alpha - phi_beta) / Z;
355 for (unsigned m = 2; m <= k; ++m) {
356 /* α^{m-1}·φ(α) and β^{m-1}·φ(β); take 0 when the endpoint is
357 * infinite (the φ factor vanishes faster than any polynomial). */
358 double end_term = 0.0;
359 if (std::isfinite(alpha))
360 end_term += std::pow(alpha, static_cast<double>(m - 1)) * phi_alpha;
361 if (std::isfinite(beta))
362 end_term -= std::pow(beta, static_cast<double>(m - 1)) * phi_beta;
363 M[m] = (m - 1) * M[m - 2] + end_term / Z;
364 }
365
366 /* E[X^k] = E[(μ + σZ)^k] = Σ_{i=0..k} C(k,i) μ^{k-i} σ^i E[Z^i|…]. */
367 double total = 0.0;
368 for (unsigned i = 0; i <= k; ++i) {
369 total += binomial(k, i)
370 * std::pow(mu, static_cast<double>(k - i))
371 * std::pow(sigma, static_cast<double>(i))
372 * M[i];
373 }
374 return total;
375}
376
377/**
378 * @brief Raw moments of @c X ~ Uniform(p1, p2) truncated to @c [a, b].
379 *
380 * The intersection @c [a', b'] = [max(p1,a), min(p2,b)] is uniform;
381 * its k-th raw moment is @c (b'^{k+1} - a'^{k+1}) / ((k+1)(b' - a')).
382 */
383double truncated_uniform_raw_moment(double p1, double p2, double a, double b,
384 unsigned k)
385{
386 const double lo = std::max(p1, a);
387 const double hi = std::min(p2, b);
388 if (hi <= lo) return std::numeric_limits<double>::quiet_NaN();
389 if (k == 0) return 1.0;
390 return (std::pow(hi, static_cast<double>(k + 1))
391 - std::pow(lo, static_cast<double>(k + 1)))
392 / ((k + 1) * (hi - lo));
393}
394
395/**
396 * @brief Raw moments of @c X ~ Exp(λ) truncated to @c [a, b].
397 *
398 * Decomposes via change of variable Y = X - max(a,0):
399 * - left endpoint @c a > 0, right endpoint @c b = +∞: by
400 * memorylessness @c X | X>a is distributed as @c a + Exp(λ), so
401 * @c E[X^k|X>a] = Σ_{i=0..k} C(k,i) a^{k-i} · i!/λ^i.
402 * - finite @c [a, b] (with @c a ≥ 0, @c b < ∞): integrate
403 * @c x^k λ e^{-λx} dx by parts and divide by the truncation mass
404 * @c e^{-λa} - e^{-λb}. Uses the recurrence
405 * @c I_k = k I_{k-1} / λ - (b^k e^{-λb} - a^k e^{-λa}) / λ
406 * with @c I_0 = e^{-λa} - e^{-λb}.
407 */
408double truncated_exponential_raw_moment(double lambda, double a, double b,
409 unsigned k)
410{
411 const double aa = std::max(a, 0.0); /* Exp support is [0, +∞) */
412 if (std::isfinite(b)) {
413 if (b <= aa) return std::numeric_limits<double>::quiet_NaN();
414 /* Finite-interval recurrence on I_k = ∫_{aa}^{b} x^k λ e^{-λx} dx. */
415 const double e_a = std::exp(-lambda * aa);
416 const double e_b = std::exp(-lambda * b);
417 const double Z = e_a - e_b; /* P(aa < X < b) */
418 if (Z < 1e-12) return std::numeric_limits<double>::quiet_NaN();
419 if (k == 0) return 1.0;
420 /* Integration by parts: ∫ x^k λ e^{-λx} dx = -x^k e^{-λx} + k ∫ x^{k-1} e^{-λx} dx
421 * so I_k (with λ factor folded into the e^{-λx}·λ dx term) follows:
422 * I_k = [aa^k e^{-λaa} - b^k e^{-λb}] + (k/λ) · I_{k-1}_no_lambda
423 * where I_{k-1}_no_lambda = ∫ x^{k-1} e^{-λx} dx = I_{k-1}/λ.
424 * Cleaner: compute J_k = ∫_{aa}^{b} x^k e^{-λx} dx via
425 * J_0 = Z/λ; J_k = (aa^k e^{-λaa} - b^k e^{-λb})/λ + (k/λ) J_{k-1}.
426 * Then E[X^k|aa<X<b] = λ J_k / Z. */
427 std::vector<double> J(k + 1, 0.0);
428 J[0] = Z / lambda;
429 for (unsigned m = 1; m <= k; ++m) {
430 const double endpoint = std::pow(aa, static_cast<double>(m)) * e_a
431 - std::pow(b, static_cast<double>(m)) * e_b;
432 J[m] = endpoint / lambda + (m / lambda) * J[m - 1];
433 }
434 return lambda * J[k] / Z;
435 }
436 /* Semi-infinite right tail [aa, +∞): memorylessness. */
437 double total = 0.0;
438 double fact_i = 1.0;
439 for (unsigned i = 0; i <= k; ++i) {
440 total += binomial(k, i)
441 * std::pow(aa, static_cast<double>(k - i))
442 * fact_i / std::pow(lambda, static_cast<double>(i));
443 fact_i *= (i + 1);
444 }
445 return total;
446}
447
448/**
449 * @brief Try to evaluate @f$E[X^k \mid A]@f$ in closed form.
450 *
451 * Fires only when @p root is a bare @c gate_rv of a recognised kind
452 * (Normal / Uniform / Exponential) and the event walk under
453 * @p event_root collects a sound interval constraint on it.
454 * Otherwise returns @c std::nullopt and the caller falls through to
455 * MC rejection.
456 *
457 * For @p central, returns @f$E[(X - \mu_A)^k \mid A]@f$ where
458 * @f$\mu_A@f$ is the closed-form conditional mean obtained by
459 * recursing on @c k = 1, then binomially expanding the central
460 * moment in terms of the raw moments.
461 */
462std::optional<double>
463try_truncated_closed_form(const GenericCircuit &gc, gate_t root,
464 gate_t event_root, unsigned k, bool central)
465{
466 auto m = matchTruncatedSingleRv(gc, root, event_root);
467 if (!m) return std::nullopt;
468 const DistributionSpec &spec = m->spec;
469 const double lo = m->lo, hi = m->hi;
470
471 /* Closed-form raw moment of the truncated distribution. */
472 auto raw = [&](unsigned q) -> std::optional<double> {
473 if (q == 0) return 1.0;
474 double r = std::numeric_limits<double>::quiet_NaN();
475 switch (spec.kind) {
476 case DistKind::Normal:
477 r = truncated_normal_raw_moment(spec.p1, spec.p2, lo, hi, q);
478 break;
480 r = truncated_uniform_raw_moment(spec.p1, spec.p2, lo, hi, q);
481 break;
483 r = truncated_exponential_raw_moment(spec.p1, lo, hi, q);
484 break;
485 case DistKind::Erlang:
486 /* Truncated Erlang moments require the regularised lower
487 * incomplete gamma; out of scope for v1. Fall through to MC. */
488 return std::nullopt;
489 }
490 if (std::isnan(r)) return std::nullopt;
491 return r;
492 };
493
494 if (!central) return raw(k);
495
496 /* Central: E[(X - μ_A)^k | A] = Σ_{i=0..k} C(k,i) (-μ_A)^{k-i} E[X^i | A]. */
497 auto mu_opt = raw(1);
498 if (!mu_opt) return std::nullopt;
499 const double mu = *mu_opt;
500 if (k == 1) return 0.0;
501 double total = 0.0;
502 for (unsigned i = 0; i <= k; ++i) {
503 auto m_i = raw(i);
504 if (!m_i) return std::nullopt;
505 total += binomial(k, i)
506 * std::pow(-mu, static_cast<double>(k - i)) * (*m_i);
507 }
508 return total;
509}
510
511double rec_expectation(const GenericCircuit &gc, gate_t g, FootprintCache &fp)
512{
513 const auto type = gc.getGateType(g);
514 switch (type) {
515 case gate_value:
516 return parseDoubleStrict(gc.getExtra(g));
517 case gate_rv: {
518 auto spec = parse_distribution_spec(gc.getExtra(g));
519 if (!spec)
520 throw CircuitException(
521 "Expectation: malformed gate_rv extra: " + gc.getExtra(g));
522 return analytical_mean(*spec);
523 }
524 case gate_arith: {
525 const auto op = static_cast<provsql_arith_op>(gc.getInfos(g).first);
526 const auto &wires = gc.getWires(g);
527 switch (op) {
528 case PROVSQL_ARITH_PLUS: {
529 double s = 0.0;
530 for (gate_t c : wires) s += rec_expectation(gc, c, fp);
531 return s;
532 }
533 case PROVSQL_ARITH_MINUS: {
534 if (wires.size() != 2)
535 throw CircuitException("gate_arith MINUS must be binary");
536 return rec_expectation(gc, wires[0], fp)
537 - rec_expectation(gc, wires[1], fp);
538 }
539 case PROVSQL_ARITH_NEG: {
540 if (wires.size() != 1)
541 throw CircuitException("gate_arith NEG must be unary");
542 return -rec_expectation(gc, wires[0], fp);
543 }
544 case PROVSQL_ARITH_TIMES: {
545 if (pairwise_disjoint(fp, wires)) {
546 double p = 1.0;
547 for (gate_t c : wires) p *= rec_expectation(gc, c, fp);
548 return p;
549 }
550 return mc_raw_moment(gc, g, 1,
551 "Expectation of gate_arith TIMES with shared random variables");
552 }
553 case PROVSQL_ARITH_DIV: {
554 if (wires.size() != 2)
555 throw CircuitException("gate_arith DIV must be binary");
556 if (gc.getGateType(wires[1]) == gate_value) {
557 const double divisor = parseDoubleStrict(gc.getExtra(wires[1]));
558 return rec_expectation(gc, wires[0], fp) / divisor;
559 }
560 return mc_raw_moment(gc, g, 1,
561 "Expectation of gate_arith DIV with non-constant divisor");
562 }
563 }
564 throw CircuitException(
565 "Expectation: unknown gate_arith op tag: " +
566 std::to_string(static_cast<unsigned>(op)));
567 }
568 case gate_mixture: {
569 const auto &wires = gc.getWires(g);
570 if (gc.isCategoricalMixture(g)) {
571 // Categorical mixture: E[M] = Σ π_i · v_i, where each mulinput
572 // mul_i carries π_i in set_prob and v_i in extra.
573 double s = 0.0;
574 for (std::size_t i = 1; i < wires.size(); ++i) {
575 s += gc.getProb(wires[i])
576 * parseDoubleStrict(gc.getExtra(wires[i]));
577 }
578 return s;
579 }
580 // E[mixture(p, X, Y)] = π·E[X] + (1-π)·E[Y], where π = P(p = true).
581 // For a bare gate_input p, π is the leaf's pinned set_prob. For
582 // a compound Boolean p, route through evaluateBooleanProbability
583 // so π honors the tuple-independent semantics of the Boolean DAG.
584 if (wires.size() != 3)
585 throw CircuitException(
586 "Expectation: gate_mixture must have exactly three children");
587 const double pi = mixturePi(gc, wires[0]);
588 return pi * rec_expectation(gc, wires[1], fp)
589 + (1.0 - pi) * rec_expectation(gc, wires[2], fp);
590 }
591 default:
592 return mc_raw_moment(gc, g, 1,
593 "Expectation of gate type " + std::string(gate_type_name[type]));
594 }
595}
596
597double rec_variance(const GenericCircuit &gc, gate_t g, FootprintCache &fp)
598{
599 const auto type = gc.getGateType(g);
600 switch (type) {
601 case gate_value:
602 return 0.0;
603 case gate_rv: {
604 auto spec = parse_distribution_spec(gc.getExtra(g));
605 if (!spec)
606 throw CircuitException(
607 "Variance: malformed gate_rv extra: " + gc.getExtra(g));
608 return analytical_variance(*spec);
609 }
610 case gate_arith: {
611 const auto op = static_cast<provsql_arith_op>(gc.getInfos(g).first);
612 const auto &wires = gc.getWires(g);
613 auto mc_var = [&](const std::string &what) {
614 const double mu = mc_raw_moment(gc, g, 1, what);
615 return mc_central_moment(gc, g, 2, mu, what);
616 };
617 switch (op) {
618 case PROVSQL_ARITH_PLUS: {
619 if (pairwise_disjoint(fp, wires)) {
620 double s = 0.0;
621 for (gate_t c : wires) s += rec_variance(gc, c, fp);
622 return s;
623 }
624 return mc_var(
625 "Variance of gate_arith PLUS with shared random variables");
626 }
627 case PROVSQL_ARITH_MINUS: {
628 if (wires.size() != 2)
629 throw CircuitException("gate_arith MINUS must be binary");
630 if (pairwise_disjoint(fp, wires)) {
631 return rec_variance(gc, wires[0], fp)
632 + rec_variance(gc, wires[1], fp);
633 }
634 return mc_var(
635 "Variance of gate_arith MINUS with shared random variables");
636 }
637 case PROVSQL_ARITH_NEG: {
638 if (wires.size() != 1)
639 throw CircuitException("gate_arith NEG must be unary");
640 return rec_variance(gc, wires[0], fp);
641 }
642 case PROVSQL_ARITH_TIMES: {
643 if (pairwise_disjoint(fp, wires)) {
644 // Var(prod Xi) = prod E[Xi^2] - (prod E[Xi])^2
645 // = prod (Var[Xi] + E[Xi]^2) - (prod E[Xi])^2
646 double prod_e2 = 1.0;
647 double prod_e1 = 1.0;
648 for (gate_t c : wires) {
649 const double mu_c = rec_expectation(gc, c, fp);
650 const double v_c = rec_variance(gc, c, fp);
651 prod_e2 *= (v_c + mu_c * mu_c);
652 prod_e1 *= mu_c;
653 }
654 return prod_e2 - prod_e1 * prod_e1;
655 }
656 return mc_var(
657 "Variance of gate_arith TIMES with shared random variables");
658 }
659 case PROVSQL_ARITH_DIV: {
660 if (wires.size() != 2)
661 throw CircuitException("gate_arith DIV must be binary");
662 if (gc.getGateType(wires[1]) == gate_value) {
663 const double divisor = parseDoubleStrict(gc.getExtra(wires[1]));
664 return rec_variance(gc, wires[0], fp) / (divisor * divisor);
665 }
666 return mc_var(
667 "Variance of gate_arith DIV with non-constant divisor");
668 }
669 }
670 throw CircuitException(
671 "Variance: unknown gate_arith op tag: " +
672 std::to_string(static_cast<unsigned>(op)));
673 }
674 case gate_mixture: {
675 const auto &wires = gc.getWires(g);
676 if (gc.isCategoricalMixture(g)) {
677 // Categorical mixture: Var(M) = Σ π_i v_i² − (Σ π_i v_i)².
678 double e1 = 0.0, e2 = 0.0;
679 for (std::size_t i = 1; i < wires.size(); ++i) {
680 const double p = gc.getProb(wires[i]);
681 const double v = parseDoubleStrict(gc.getExtra(wires[i]));
682 e1 += p * v;
683 e2 += p * v * v;
684 }
685 return e2 - e1 * e1;
686 }
687 // Var(M) = π·(Var(X) + E[X]²) + (1-π)·(Var(Y) + E[Y]²) - E[M]²
688 // (law of total variance specialised to a Bernoulli mixture).
689 if (wires.size() != 3)
690 throw CircuitException(
691 "Variance: gate_mixture must have exactly three children");
692 const double pi = mixturePi(gc, wires[0]);
693 const double ex = rec_expectation(gc, wires[1], fp);
694 const double ey = rec_expectation(gc, wires[2], fp);
695 const double vx = rec_variance(gc, wires[1], fp);
696 const double vy = rec_variance(gc, wires[2], fp);
697 const double em = pi * ex + (1.0 - pi) * ey;
698 return pi * (vx + ex * ex)
699 + (1.0 - pi) * (vy + ey * ey)
700 - em * em;
701 }
702 default: {
703 const std::string what =
704 "Variance of gate type " + std::string(gate_type_name[type]);
705 const double mu = mc_raw_moment(gc, g, 1, what);
706 return mc_central_moment(gc, g, 2, mu, what);
707 }
708 }
709}
710
711double rec_raw_moment(const GenericCircuit &gc, gate_t g, unsigned k,
712 FootprintCache &fp)
713{
714 if (k == 0) return 1.0;
715 if (k == 1) return rec_expectation(gc, g, fp);
716
717 const auto type = gc.getGateType(g);
718 switch (type) {
719 case gate_value:
720 return std::pow(parseDoubleStrict(gc.getExtra(g)),
721 static_cast<double>(k));
722 case gate_rv: {
723 auto spec = parse_distribution_spec(gc.getExtra(g));
724 if (!spec)
725 throw CircuitException(
726 "Moment: malformed gate_rv extra: " + gc.getExtra(g));
727 return analytical_raw_moment(*spec, k);
728 }
729 case gate_arith: {
730 const auto op = static_cast<provsql_arith_op>(gc.getInfos(g).first);
731 const auto &wires = gc.getWires(g);
732 switch (op) {
733 case PROVSQL_ARITH_NEG: {
734 if (wires.size() != 1)
735 throw CircuitException("gate_arith NEG must be unary");
736 const double v = rec_raw_moment(gc, wires[0], k, fp);
737 return ((k % 2 == 0) ? 1.0 : -1.0) * v;
738 }
739 case PROVSQL_ARITH_PLUS: {
740 if (pairwise_disjoint(fp, wires)) {
741 // Fold-left: m_acc[i] holds E[(X1 + ... + Xj)^i] for the
742 // first j children processed; combining with the next
743 // independent child Y uses the binomial theorem.
744 std::vector<double> m_acc(k + 1, 0.0);
745 for (unsigned i = 0; i <= k; ++i)
746 m_acc[i] = rec_raw_moment(gc, wires[0], i, fp);
747 for (size_t w = 1; w < wires.size(); ++w) {
748 std::vector<double> next(k + 1, 0.0);
749 std::vector<double> moments_y(k + 1, 0.0);
750 for (unsigned i = 0; i <= k; ++i)
751 moments_y[i] = rec_raw_moment(gc, wires[w], i, fp);
752 for (unsigned kp = 0; kp <= k; ++kp) {
753 double total = 0.0;
754 for (unsigned i = 0; i <= kp; ++i) {
755 total += binomial(kp, i) * m_acc[i] * moments_y[kp - i];
756 }
757 next[kp] = total;
758 }
759 m_acc = std::move(next);
760 }
761 return m_acc[k];
762 }
763 return mc_raw_moment(gc, g, k,
764 "Raw moment of gate_arith PLUS with shared random variables");
765 }
766 case PROVSQL_ARITH_MINUS: {
767 if (wires.size() != 2)
768 throw CircuitException("gate_arith MINUS must be binary");
769 if (pairwise_disjoint(fp, wires)) {
770 double total = 0.0;
771 for (unsigned i = 0; i <= k; ++i) {
772 const double sign = ((k - i) % 2 == 0) ? 1.0 : -1.0;
773 total += binomial(k, i)
774 * rec_raw_moment(gc, wires[0], i, fp)
775 * sign
776 * rec_raw_moment(gc, wires[1], k - i, fp);
777 }
778 return total;
779 }
780 return mc_raw_moment(gc, g, k,
781 "Raw moment of gate_arith MINUS with shared random variables");
782 }
783 case PROVSQL_ARITH_TIMES: {
784 if (pairwise_disjoint(fp, wires)) {
785 // (prod Xi)^k = prod Xi^k; under independence E factors.
786 double p = 1.0;
787 for (gate_t c : wires) p *= rec_raw_moment(gc, c, k, fp);
788 return p;
789 }
790 return mc_raw_moment(gc, g, k,
791 "Raw moment of gate_arith TIMES with shared random variables");
792 }
793 case PROVSQL_ARITH_DIV: {
794 if (wires.size() != 2)
795 throw CircuitException("gate_arith DIV must be binary");
796 if (gc.getGateType(wires[1]) == gate_value) {
797 const double divisor = parseDoubleStrict(gc.getExtra(wires[1]));
798 return rec_raw_moment(gc, wires[0], k, fp)
799 / std::pow(divisor, static_cast<double>(k));
800 }
801 return mc_raw_moment(gc, g, k,
802 "Raw moment of gate_arith DIV with non-constant divisor");
803 }
804 }
805 throw CircuitException(
806 "Moment: unknown gate_arith op tag: " +
807 std::to_string(static_cast<unsigned>(op)));
808 }
809 case gate_mixture: {
810 const auto &wires = gc.getWires(g);
811 if (gc.isCategoricalMixture(g)) {
812 // Categorical mixture: E[M^k] = Σ π_i v_i^k.
813 double s = 0.0;
814 for (std::size_t i = 1; i < wires.size(); ++i) {
815 const double v = parseDoubleStrict(gc.getExtra(wires[i]));
816 s += gc.getProb(wires[i])
817 * std::pow(v, static_cast<double>(k));
818 }
819 return s;
820 }
821 // E[M^k] = π·E[X^k] + (1-π)·E[Y^k].
822 if (wires.size() != 3)
823 throw CircuitException(
824 "Moment: gate_mixture must have exactly three children");
825 const double pi = mixturePi(gc, wires[0]);
826 return pi * rec_raw_moment(gc, wires[1], k, fp)
827 + (1.0 - pi) * rec_raw_moment(gc, wires[2], k, fp);
828 }
829 default:
830 return mc_raw_moment(gc, g, k,
831 "Raw moment of gate type " + std::string(gate_type_name[type]));
832 }
833}
834
835} // namespace
836
837/* Conditional dispatch helpers: try closed-form first, fall through
838 * to MC rejection. Used by all four public compute_* entries to keep
839 * the conditional logic in one place and the unconditional path
840 * unchanged. */
841namespace {
842
843[[noreturn]] void raise_infeasible_event(const GenericCircuit &gc, gate_t root)
844{
845 (void)gc; (void)root;
846 throw CircuitException(
847 "conditioning event is infeasible (empty intersection with the "
848 "random variable's support)");
849}
850
851double conditional_raw_moment(const GenericCircuit &gc, gate_t root,
852 unsigned k, gate_t event_root)
853{
854 if (k == 0) return 1.0;
855 if (auto cf = try_truncated_closed_form(gc, root, event_root, k, false))
856 return *cf;
857 if (eventIsProvablyInfeasible(gc, root, event_root))
858 raise_infeasible_event(gc, root);
859 return mc_conditional_raw_moment(
860 gc, root, k, event_root,
861 "Conditional raw moment of gate type " +
862 std::string(gate_type_name[gc.getGateType(root)]));
863}
864
865double conditional_central_moment(const GenericCircuit &gc, gate_t root,
866 unsigned k, gate_t event_root)
867{
868 if (k == 0) return 1.0;
869 if (k == 1) return 0.0;
870 if (auto cf = try_truncated_closed_form(gc, root, event_root, k, true))
871 return *cf;
872 if (eventIsProvablyInfeasible(gc, root, event_root))
873 raise_infeasible_event(gc, root);
874 /* MC central: need μ_A first. */
875 const double mu = conditional_raw_moment(gc, root, 1, event_root);
876 return mc_conditional_central_moment(
877 gc, root, k, mu, event_root,
878 "Conditional central moment of gate type " +
879 std::string(gate_type_name[gc.getGateType(root)]));
880}
881
882} // namespace
883
885 std::optional<gate_t> event_root)
886{
887 if (event_root.has_value())
888 return conditional_raw_moment(gc, root, 1, *event_root);
889 FootprintCache fp(gc);
890 return rec_expectation(gc, root, fp);
891}
892
894 std::optional<gate_t> event_root)
895{
896 if (event_root.has_value())
897 return conditional_central_moment(gc, root, 2, *event_root);
898 FootprintCache fp(gc);
899 return rec_variance(gc, root, fp);
900}
901
902double compute_raw_moment(const GenericCircuit &gc, gate_t root, unsigned k,
903 std::optional<gate_t> event_root)
904{
905 if (event_root.has_value())
906 return conditional_raw_moment(gc, root, k, *event_root);
907 FootprintCache fp(gc);
908 return rec_raw_moment(gc, root, k, fp);
909}
910
911double compute_central_moment(const GenericCircuit &gc, gate_t root, unsigned k,
912 std::optional<gate_t> event_root)
913{
914 if (event_root.has_value())
915 return conditional_central_moment(gc, root, k, *event_root);
916 if (k == 0) return 1.0;
917 if (k == 1) return 0.0;
918 FootprintCache fp(gc);
919 if (k == 2) return rec_variance(gc, root, fp);
920 // E[(X - mu)^k] = sum_{i=0}^{k} C(k, i) (-mu)^(k-i) E[X^i]
921 const double mu = rec_expectation(gc, root, fp);
922 double total = 0.0;
923 for (unsigned i = 0; i <= k; ++i) {
924 const double mu_pow = std::pow(-mu, static_cast<double>(k - i));
925 total += binomial(k, i) * mu_pow * rec_raw_moment(gc, root, i, fp);
926 }
927 return total;
928}
929
930} // namespace provsql
931
932extern "C" {
933
934/**
935 * @brief SQL: rv_moment(token uuid, k integer, central boolean,
936 * prov uuid DEFAULT gate_one()) -> float8
937 *
938 * Single C entry point shared by the @c expected / @c variance /
939 * @c moment / @c central_moment SQL functions. The SQL wrappers
940 * select the (k, central) pair that matches their semantics:
941 * - @c expected(rv, prov): k=1, central=false.
942 * - @c variance(rv, prov): k=2, central=true.
943 * - @c moment(rv, k, prov): central=false.
944 * - @c central_moment(rv, k, prov): central=true.
945 *
946 * The @p prov argument carries the conditioning event: typically the
947 * row's @c provenance() gate after a @c WHERE predicate folded a
948 * @c gate_cmp into it. When @p prov resolves to @c gate_one (the
949 * default, or the load-time simplification of any always-true
950 * sub-circuit) the unconditional path runs unchanged. Otherwise we
951 * load a JOINT circuit reaching both roots, so shared @c gate_rv
952 * leaves collapse to a single @c gate_t -- the property the
953 * conditional MC sampler relies on to couple the indicator's draw
954 * with the value's draw.
955 */
956Datum rv_moment(PG_FUNCTION_ARGS)
957{
958 try {
959 pg_uuid_t *token = PG_GETARG_UUID_P(0);
960 const int32 k_signed = PG_GETARG_INT32(1);
961 const bool central = PG_GETARG_BOOL(2);
962 pg_uuid_t *prov = PG_GETARG_UUID_P(3);
963
964 if (k_signed < 0)
965 provsql_error("rv_moment: k must be non-negative (got %d)", k_signed);
966 const unsigned k = static_cast<unsigned>(k_signed);
967
968 gate_t root_gate, event_gate;
969 auto gc = getJointCircuit(*token, *prov, root_gate, event_gate);
970
971 /* gate_one event = unconditional after load-time simplification. */
972 std::optional<gate_t> event_opt;
973 if (gc.getGateType(event_gate) != gate_one)
974 event_opt = event_gate;
975
976 double result;
977 if (central)
978 result = provsql::compute_central_moment(gc, root_gate, k, event_opt);
979 else if (k == 1)
980 result = provsql::compute_expectation(gc, root_gate, event_opt);
981 else
982 result = provsql::compute_raw_moment(gc, root_gate, k, event_opt);
983 return Float8GetDatum(result);
984 } catch (const std::exception &e) {
985 provsql_error("rv_moment: %s", e.what());
986 } catch (...) {
987 provsql_error("rv_moment: unknown exception");
988 }
989 PG_RETURN_NULL();
990}
991
992} // extern "C"
Closed-form CDF resolution for trivial gate_cmp shapes.
Boolean-expression (lineage formula) semiring.
Boolean provenance circuit with support for knowledge compilation.
@ IN
Input (variable) gate representing a base tuple.
@ MULIN
Multivalued-input gate (one of several options).
GenericCircuit getJointCircuit(pg_uuid_t root_token, pg_uuid_t event_token, gate_t &root_gate, gate_t &event_gate)
Build a GenericCircuit containing the closures of two roots, with shared subgraphs unified.
Build in-memory circuits from the mmap-backed persistent store.
Generic directed-acyclic-graph circuit template and gate identifier.
gate_t
Strongly-typed gate identifier.
Definition Circuit.h:49
Datum rv_moment(PG_FUNCTION_ARGS)
SQL: rv_moment(token uuid, k integer, central boolean, prov uuid DEFAULT gate_on...
Analytical expectation / variance / moment evaluator over RV circuits.
Monte Carlo sampling over a GenericCircuit, RV-aware.
Continuous random-variable helpers (distribution parsing, moments).
Support-based bound check for continuous-RV comparators.
Boolean circuit for provenance formula evaluation.
gate_t setGate(BooleanGate type) override
Allocate a new gate with type type and no UUID.
double monteCarlo(gate_t g, unsigned samples) const
Estimate the probability via Monte Carlo sampling.
void rewriteMultivaluedGates()
Rewrite all MULVAR/MULIN gate clusters into standard AND/OR/NOT circuits.
void setInfo(gate_t g, unsigned info)
Store an integer annotation on gate g.
double independentEvaluation(gate_t g) const
Compute the probability exactly when inputs are independent.
Exception type thrown by circuit operations on invalid input.
Definition Circuit.h:206
std::vector< gate_t > & getWires(gate_t g)
Return a mutable reference to the child-wire list of gate g.
Definition Circuit.h:140
gateType getGateType(gate_t g) const
Return the type of gate g.
Definition Circuit.h:130
void addWire(gate_t f, gate_t t)
Add a directed wire from gate f (parent) to gate t (child).
Definition Circuit.hpp:81
uuid getUUID(gate_t g) const
Return the UUID string associated with gate g.
Definition Circuit.hpp:46
std::vector< gate_t >::size_type getNbGates() const
Return the total number of gates in the circuit.
Definition Circuit.h:103
In-memory provenance circuit with semiring-generic evaluation.
S::value_type evaluate(gate_t g, std::unordered_map< gate_t, typename S::value_type > &provenance_mapping, S semiring) const
Evaluate the sub-circuit rooted at gate g over semiring semiring.
bool isCategoricalMixture(gate_t g) const
Test whether g is a categorical-form gate_mixture (the explicit provsql.categorical output).
std::string getExtra(gate_t g) const
Return the string extra for gate g.
double getProb(gate_t g) const
Return the probability for gate g.
const std::set< gate_t > & getInputs() const
Return the set of input (leaf) gates.
std::pair< unsigned, unsigned > getInfos(gate_t g) const
Return the integer annotation pair for gate g.
Provenance-as-Boolean-circuit semiring.
Definition BoolExpr.h:47
@ Normal
Normal (Gaussian): p1=μ, p2=σ
@ Exponential
Exponential: p1=λ, p2 unused.
@ Uniform
Uniform on [a,b]: p1=a, p2=b.
@ Erlang
Erlang: p1=k (positive integer), p2=λ.
double compute_raw_moment(const GenericCircuit &gc, gate_t root, unsigned k, std::optional< gate_t > event_root)
Compute the raw moment (or if event_root is set) for k >= 0.
double analytical_variance(const DistributionSpec &d)
Closed-form variance Var(X) for a basic distribution.
double compute_variance(const GenericCircuit &gc, gate_t root, std::optional< gate_t > event_root)
Compute (or if event_root is set) over the scalar sub-circuit rooted at root.
double parseDoubleStrict(const std::string &s)
Strictly parse s as a double.
bool eventIsProvablyInfeasible(const GenericCircuit &gc, gate_t root, std::optional< gate_t > event_root)
True iff the conditioning event is provably infeasible for a bare gate_rv root.
double compute_central_moment(const GenericCircuit &gc, gate_t root, unsigned k, std::optional< gate_t > event_root)
Compute the central moment (or if event_root is set).
ConditionalScalarSamples monteCarloConditionalScalarSamples(const GenericCircuit &gc, gate_t root, gate_t event_root, unsigned samples)
Rejection-sample root conditioned on event_root.
double evaluateBooleanProbability(const GenericCircuit &gc, gate_t boolRoot)
Probability that the Boolean subcircuit rooted at boolRoot evaluates to true under the tuple-independ...
std::vector< double > monteCarloScalarSamples(const GenericCircuit &gc, gate_t root, unsigned samples)
Sample a scalar sub-circuit samples times and return the draws.
std::optional< DistributionSpec > parse_distribution_spec(const std::string &s)
Parse the on-disk text encoding of a gate_rv distribution.
double analytical_mean(const DistributionSpec &d)
Closed-form expectation E[X] for a basic distribution.
std::optional< TruncatedSingleRv > matchTruncatedSingleRv(const GenericCircuit &gc, gate_t root, std::optional< gate_t > event_root)
Detect a closed-form, optionally-truncated single-RV shape.
double compute_expectation(const GenericCircuit &gc, gate_t root, std::optional< gate_t > event_root)
Compute (or if event_root is set) over the scalar sub-circuit rooted at root.
double analytical_raw_moment(const DistributionSpec &d, unsigned k)
Closed-form raw moment for a basic distribution.
int provsql_rv_mc_samples
Default sample count for analytical-evaluator MC fallbacks; 0 disables fallback (callers raise instea...
Definition provsql.c:82
Uniform error-reporting macros for ProvSQL.
#define provsql_error(fmt,...)
Report a fatal ProvSQL error and abort the current transaction.
const char * gate_type_name[]
Names of gate types.
Core types, constants, and utilities shared across ProvSQL.
provsql_arith_op
Arithmetic operator tags used by gate_arith.
@ PROVSQL_ARITH_DIV
binary, child0 / child1
@ PROVSQL_ARITH_PLUS
n-ary, sum of children
@ PROVSQL_ARITH_NEG
unary, -child0
@ PROVSQL_ARITH_MINUS
binary, child0 - child1
@ PROVSQL_ARITH_TIMES
n-ary, product of children
@ gate_rv
Continuous random-variable leaf (extra encodes distribution).
@ gate_mixture
Probabilistic mixture: three wires [p_token (gate_input Bernoulli), x_token, y_token]; samples x when...
@ gate_arith
n-ary arithmetic gate over scalar-valued children (info1 holds operator tag)
C++ utility functions for UUID manipulation.
UUID structure.
Outcome of a conditional Monte Carlo sampling pass.
Parsed distribution spec (kind + up to two parameters).