21#include <unordered_map>
22#include <unordered_set>
31std::mt19937_64 seedRng()
37 std::random_device rd;
38 rng.seed((
static_cast<uint64_t
>(rd()) << 32) | rd());
61 Sampler(
const GenericCircuit &gc, std::mt19937_64 &rng)
62 : gc_(gc), rng_(rng) {}
65 void resetIteration() {
67 scalar_cache_.clear();
71 double evalScalar(
gate_t g);
74 const GenericCircuit &gc_;
75 std::mt19937_64 &rng_;
76 std::unordered_map<gate_t, bool> bool_cache_;
77 std::unordered_map<gate_t, double> scalar_cache_;
80bool Sampler::evalBool(
gate_t g)
82 auto it = bool_cache_.find(g);
83 if(it != bool_cache_.end())
return it->second;
93 std::uniform_real_distribution<double> u(0.0, 1.0);
94 result = u(rng_) < gc_.
getProb(g);
100 if(evalBool(c)) { result =
true;
break; }
106 if(!evalBool(c)) { result =
false;
break; }
110 if(wires.size() != 2)
111 throw CircuitException(
"gate_monus must have exactly two children");
112 result = evalBool(wires[0]) && !evalBool(wires[1]);
122 if(wires.size() != 2)
123 throw CircuitException(
"gate_cmp must have exactly two children");
127 throw CircuitException(
128 "gate_cmp: unsupported operator OID " +
129 std::to_string(gc_.
getInfos(g).first));
130 double l = evalScalar(wires[0]);
131 double r = evalScalar(wires[1]);
132 result = applyCmp(l, op, r);
136 throw CircuitException(
137 "Monte Carlo over circuits containing gate_mulinput "
138 "is not yet supported on the RV path");
145 if(wires.size() != 1)
146 throw CircuitException(
"gate_delta must have exactly one child");
147 result = evalBool(wires[0]);
154 if(wires.size() != 1)
155 throw CircuitException(
156 "gate_assumed_boolean must have exactly one child");
157 result = evalBool(wires[0]);
160 throw CircuitException(
161 "Unsupported gate type in Boolean evaluation: " +
165 bool_cache_[g] = result;
169double Sampler::evalScalar(
gate_t g)
171 auto it = scalar_cache_.find(g);
172 if(it != scalar_cache_.end())
return it->second;
176 const auto &wires = gc_.
getWires(g);
186 throw CircuitException(
187 "Malformed gate_rv extra: " + gc_.
getExtra(g));
189 case DistKind::Normal: {
190 std::normal_distribution<double> d(spec->p1, spec->p2);
194 case DistKind::Uniform: {
195 std::uniform_real_distribution<double> d(spec->p1, spec->p2);
199 case DistKind::Exponential: {
200 std::exponential_distribution<double> d(spec->p1);
204 case DistKind::Erlang: {
208 std::gamma_distribution<double> d(spec->p1, 1.0 / spec->p2);
218 throw CircuitException(
"gate_arith must have at least one child");
223 for(
gate_t c : wires) result += evalScalar(c);
227 for(
gate_t c : wires) result *= evalScalar(c);
230 if(wires.size() != 2)
231 throw CircuitException(
"gate_arith MINUS must be binary");
232 result = evalScalar(wires[0]) - evalScalar(wires[1]);
235 if(wires.size() != 2)
236 throw CircuitException(
"gate_arith DIV must be binary");
237 result = evalScalar(wires[0]) / evalScalar(wires[1]);
240 if(wires.size() != 1)
241 throw CircuitException(
"gate_arith NEG must be unary");
242 result = -evalScalar(wires[0]);
245 throw CircuitException(
246 "Unknown gate_arith operator tag: " +
247 std::to_string(
static_cast<unsigned>(op)));
270 std::unique_ptr<Aggregator> agg =
275 throw CircuitException(
276 "gate_agg: makeAggregator returned null for op " +
277 std::to_string(
static_cast<int>(op)));
278 for(
gate_t child : wires) {
280 const auto &sm = gc_.
getWires(child);
281 if(sm.size() != 2)
continue;
282 if(!evalBool(sm[0]))
continue;
284 agg->add(AggValue(1L));
286 agg->add(AggValue(evalScalar(sm[1])));
289 AggValue r = agg->finalize();
292 result =
static_cast<double>(std::get<long>(r.
v));
295 result = std::get<double>(r.
v);
298 result = std::numeric_limits<double>::quiet_NaN();
301 throw CircuitException(
302 "gate_agg: unsupported aggregate result ValueType in MC");
327 std::uniform_real_distribution<double> u(0.0, 1.0);
328 const double r = u(rng_);
332 std::size_t chosen = wires.size() - 1;
333 for(std::size_t i = 1; i < wires.size(); ++i) {
335 if(r < cum) { chosen = i;
break; }
337 for(std::size_t i = 1; i < wires.size(); ++i) {
338 bool_cache_[wires[i]] = (i == chosen);
343 if(wires.size() != 3)
344 throw CircuitException(
345 "gate_mixture must have exactly three children "
346 "[p_token, x_token, y_token]");
347 result = evalBool(wires[0]) ? evalScalar(wires[1])
348 : evalScalar(wires[2]);
352 throw CircuitException(
353 "Unsupported gate type in scalar evaluation: " +
357 scalar_cache_[g] = result;
365 std::mt19937_64 rng = seedRng();
366 Sampler sampler(gc, rng);
368 unsigned success = 0;
369 for(
unsigned i = 0; i < samples; ++i) {
370 sampler.resetIteration();
371 if(sampler.evalBool(root))
376 "Interrupted after " + std::to_string(i + 1) +
" samples");
378 return success * 1.0 / samples;
383 const std::vector<gate_t> &cmps,
386 const unsigned k = cmps.size();
389 "monteCarloJointDistribution: empty cmps list");
392 "monteCarloJointDistribution: too many cmps in island ("
393 + std::to_string(k) +
" > 30)");
395 std::mt19937_64 rng = seedRng();
396 Sampler sampler(gc, rng);
398 const std::size_t nb_outcomes = std::size_t{1} << k;
399 std::vector<unsigned> counts(nb_outcomes, 0);
401 for (
unsigned i = 0; i < samples; ++i) {
402 sampler.resetIteration();
404 for (
unsigned j = 0; j < k; ++j) {
405 if (sampler.evalBool(cmps[j])) w |= (std::size_t{1} << j);
410 "Interrupted after " + std::to_string(i + 1) +
" samples");
413 std::vector<double> probs(nb_outcomes);
414 for (std::size_t w = 0; w < nb_outcomes; ++w)
415 probs[w] = counts[w] * 1.0 / samples;
422 std::mt19937_64 rng = seedRng();
423 Sampler sampler(gc, rng);
425 std::vector<double> out;
426 out.reserve(samples);
427 for(
unsigned i = 0; i < samples; ++i) {
428 sampler.resetIteration();
429 out.push_back(sampler.evalScalar(root));
433 "Interrupted after " + std::to_string(i + 1) +
" samples");
441 std::mt19937_64 rng = seedRng();
442 Sampler sampler(gc, rng);
448 for(
unsigned i = 0; i < samples; ++i) {
449 sampler.resetIteration();
456 if(sampler.evalBool(event_root)) {
457 out.
accepted.push_back(sampler.evalScalar(root));
463 "Interrupted after " + std::to_string(i + 1) +
" samples");
487double inv_phi(
double p)
489 static const double a[] = {
490 -3.969683028665376e+01, 2.209460984245205e+02,
491 -2.759285104469687e+02, 1.383577518672690e+02,
492 -3.066479806614716e+01, 2.506628277459239e+00
494 static const double b[] = {
495 -5.447609879822406e+01, 1.615858368580409e+02,
496 -1.556989798598866e+02, 6.680131188771972e+01,
497 -1.328068155288572e+01
499 static const double c_arr[] = {
500 -7.784894002430293e-03, -3.223964580411365e-01,
501 -2.400758277161838e+00, -2.549732539343734e+00,
502 4.374664141464968e+00, 2.938163982698783e+00
504 static const double d[] = {
505 7.784695709041462e-03, 3.224671290700398e-01,
506 2.445134137142996e+00, 3.754408661907416e+00
508 static const double p_low = 0.02425;
509 static const double p_high = 1.0 - p_low;
512 const double q = std::sqrt(-2.0 * std::log(p));
513 return (((((c_arr[0]*q + c_arr[1])*q + c_arr[2])*q
514 + c_arr[3])*q + c_arr[4])*q + c_arr[5])
515 / ((((d[0]*q + d[1])*q + d[2])*q + d[3])*q + 1.0);
518 const double q = p - 0.5;
519 const double r = q * q;
520 return (((((a[0]*r + a[1])*r + a[2])*r + a[3])*r + a[4])*r + a[5]) * q
521 / (((((b[0]*r + b[1])*r + b[2])*r + b[3])*r + b[4])*r + 1.0);
523 const double q = std::sqrt(-2.0 * std::log(1.0 - p));
524 return -(((((c_arr[0]*q + c_arr[1])*q + c_arr[2])*q
525 + c_arr[3])*q + c_arr[4])*q + c_arr[5])
526 / ((((d[0]*q + d[1])*q + d[2])*q + d[3])*q + 1.0);
531std::optional<std::vector<double>>
533 gate_t event_root,
unsigned n)
536 if (!m)
return std::nullopt;
538 const double lo = m->lo, hi = m->hi;
540 std::mt19937_64 rng = seedRng();
541 std::uniform_real_distribution<double> U01(0.0, 1.0);
543 std::vector<double> out;
551 std::uniform_real_distribution<double> U(lo, hi);
552 for (
unsigned i = 0; i < n; ++i) out.push_back(U(rng));
556 const double lambda = spec.
p1;
557 if (!(lambda > 0.0))
return std::nullopt;
558 if (std::isinf(hi)) {
562 std::exponential_distribution<double> E(lambda);
563 for (
unsigned i = 0; i < n; ++i) out.push_back(lo + E(rng));
569 const double F_lo = -std::expm1(-lambda * lo);
570 const double F_hi = -std::expm1(-lambda * hi);
571 if (!(F_lo < F_hi))
return std::nullopt;
572 for (
unsigned i = 0; i < n; ++i) {
573 const double u = F_lo + U01(rng) * (F_hi - F_lo);
574 out.push_back(-std::log1p(-u) / lambda);
579 const double mu = spec.
p1;
580 const double sigma = spec.
p2;
581 if (!(sigma > 0.0))
return std::nullopt;
582 const double sqrt2 = std::sqrt(2.0);
583 const double alpha = (lo - mu) / sigma;
584 const double beta = (hi - mu) / sigma;
585 const double Phi_a = std::isfinite(alpha)
586 ? 0.5 * (1.0 + std::erf(alpha / sqrt2))
587 : (alpha < 0 ? 0.0 : 1.0);
588 const double Phi_b = std::isfinite(beta)
589 ? 0.5 * (1.0 + std::erf(beta / sqrt2))
590 : (beta < 0 ? 0.0 : 1.0);
591 if (!(Phi_a < Phi_b))
return std::nullopt;
596 static constexpr double EPS = 1e-15;
597 for (
unsigned i = 0; i < n; ++i) {
598 double u = Phi_a + U01(rng) * (Phi_b - Phi_a);
599 if (u < EPS) u = EPS;
600 if (u > 1.0 - EPS) u = 1.0 - EPS;
601 const double z = inv_phi(u);
602 out.push_back(mu + sigma * z);
616 std::unordered_set<gate_t> seen;
617 std::stack<gate_t> stack;
619 while(!stack.empty()) {
622 if(!seen.insert(g).second)
continue;
ComparisonOperator cmpOpFromOid(Oid op_oid, bool &ok)
Map a PostgreSQL comparison-operator OID to a ComparisonOperator.
AggregationOperator getAggregationOperator(Oid oid)
Map a PostgreSQL aggregate function OID to an AggregationOperator.
std::unique_ptr< Aggregator > makeAggregator(AggregationOperator op, ValueType t)
Create a concrete Aggregator for the given operator and value type.
Typed aggregation value, operator, and aggregator abstractions.
AggregationOperator
SQL aggregation functions tracked by ProvSQL.
@ COUNT
COUNT(*) or COUNT(expr) → integer.
ComparisonOperator
SQL comparison operators used in gate_cmp circuit gates.
@ LE
Less than or equal (<=).
@ GE
Greater than or equal (>=).
@ INT
Signed 64-bit integer.
@ FLOAT
Double-precision float.
Generic directed-acyclic-graph circuit template and gate identifier.
gate_t
Strongly-typed gate identifier.
Monte Carlo sampling over a GenericCircuit, RV-aware.
Continuous random-variable helpers (distribution parsing, moments).
Support-based bound check for continuous-RV comparators.
Exception type thrown by circuit operations on invalid input.
std::vector< gate_t > & getWires(gate_t g)
Return a mutable reference to the child-wire list of gate g.
gateType getGateType(gate_t g) const
Return the type of gate g.
In-memory provenance circuit with semiring-generic evaluation.
bool isCategoricalMixture(gate_t g) const
Test whether g is a categorical-form gate_mixture (the explicit provsql.categorical output).
std::string getExtra(gate_t g) const
Return the string extra for gate g.
double getProb(gate_t g) const
Return the probability for gate g.
std::pair< unsigned, unsigned > getInfos(gate_t g) const
Return the integer annotation pair for gate g.
@ Normal
Normal (Gaussian): p1=μ, p2=σ
@ Exponential
Exponential: p1=λ, p2 unused.
@ Uniform
Uniform on [a,b]: p1=a, p2=b.
@ Erlang
Erlang: p1=k (positive integer), p2=λ.
double parseDoubleStrict(const std::string &s)
Strictly parse s as a double.
std::vector< double > monteCarloJointDistribution(const GenericCircuit &gc, const std::vector< gate_t > &cmps, unsigned samples)
Estimate the joint distribution of cmps via Monte Carlo.
ConditionalScalarSamples monteCarloConditionalScalarSamples(const GenericCircuit &gc, gate_t root, gate_t event_root, unsigned samples)
Rejection-sample root conditioned on event_root.
std::vector< double > monteCarloScalarSamples(const GenericCircuit &gc, gate_t root, unsigned samples)
Sample a scalar sub-circuit samples times and return the draws.
std::optional< DistributionSpec > parse_distribution_spec(const std::string &s)
Parse the on-disk text encoding of a gate_rv distribution.
double monteCarloRV(const GenericCircuit &gc, gate_t root, unsigned samples)
Run Monte Carlo on a circuit that may contain gate_rv leaves.
std::optional< std::vector< double > > try_truncated_closed_form_sample(const GenericCircuit &gc, gate_t root, gate_t event_root, unsigned n)
Try to draw n exact samples from the conditional distribution of root given event_root via closed-for...
std::optional< TruncatedSingleRv > matchTruncatedSingleRv(const GenericCircuit &gc, gate_t root, std::optional< gate_t > event_root)
Detect a closed-form, optionally-truncated single-RV shape.
bool circuitHasRV(const GenericCircuit &gc, gate_t root)
Walk the circuit reachable from root looking for any gate_rv.
int provsql_monte_carlo_seed
Seed for the Monte Carlo sampler; -1 means non-deterministic (std::random_device); controlled by the ...
bool provsql_interrupted
Global variable that becomes true if this particular backend received an interrupt signal.
const char * gate_type_name[]
Names of gate types.
provsql_arith_op
Arithmetic operator tags used by gate_arith.
@ PROVSQL_ARITH_DIV
binary, child0 / child1
@ PROVSQL_ARITH_PLUS
n-ary, sum of children
@ PROVSQL_ARITH_NEG
unary, -child0
@ PROVSQL_ARITH_MINUS
binary, child0 - child1
@ PROVSQL_ARITH_TIMES
n-ary, product of children
@ gate_rv
Continuous random-variable leaf (extra encodes distribution).
@ gate_assumed_boolean
Structural marker over a single child whose sub-circuit was computed under a Boolean-provenance assum...
@ gate_mixture
Probabilistic mixture: three wires [p_token (gate_input Bernoulli), x_token, y_token]; samples x when...
@ gate_arith
n-ary arithmetic gate over scalar-valued children (info1 holds operator tag)
ValueType getType() const
Return the runtime type tag of this value.
std::variant< long, double, bool, std::string, std::vector< long >, std::vector< double >, std::vector< bool >, std::vector< std::string > > v
The variant holding the actual value.
Outcome of a conditional Monte Carlo sampling pass.
std::vector< double > accepted
Parsed distribution spec (kind + up to two parameters).
double p2
Second parameter (σ or b; unused for Exponential).
double p1
First parameter (μ, a, or λ).