doxygen-c/html/RangeCheck_8cpp_source.html

/**

 * @file RangeCheck.cpp

 * @brief Implementation of the support-based bound check pass.

 *        See @c RangeCheck.h for the full docstring.

 */

#include "RangeCheck.h"


#include <algorithm>

#include <cmath>

#include <limits>

#include <stack>

#include <unordered_map>

#include <unordered_set>

#include <vector>


#include "Aggregation.h"        // ComparisonOperator + cmpOpFromOid

#include "AnalyticEvaluator.h"  // cdfAt for shape_mass under truncation

#include "CircuitFromMMap.h"    // getGenericCircuit

#include "RandomVariable.h"     // parse_distribution_spec, DistKind

#include "provsql_utils_cpp.h"  // uuid2string


#include <type_traits>          // std::is_same_v in truncateShape

#include <variant>

extern "C" {

#include "postgres.h"

#include "fmgr.h"

#include "funcapi.h"            // get_call_result_type, BlessTupleDesc

#include "access/htup_details.h" // heap_form_tuple (PG 10 declares it here;

                                 // funcapi.h pulls it in transitively from

                                 // PG 11 onwards, but not on 10)

#include "utils/uuid.h"

#include "provsql_utils.h"      // gate_type, provsql_arith_op

#include "provsql_error.h"


PG_FUNCTION_INFO_V1(rv_support);

}


namespace provsql {


namespace {


/**

 * @brief Closed interval @c [lo, hi] on the extended real line.

 *

 * @c -INFINITY / @c +INFINITY are used for unbounded ends (e.g. the

 * support of a normal RV is @c {-INF, +INF}).  Empty intervals are

 * not generated by any constructor below; comparators against an

 * empty interval would be vacuous and we consider them undecidable.

 */

struct Interval {

  double lo;

  double hi;


  static Interval point(double v) { return {v, v}; }

  static Interval all()           { return {-std::numeric_limits<double>::infinity(),

                                            +std::numeric_limits<double>::infinity()}; }

  bool isAll() const {

    return std::isinf(lo) && lo < 0 && std::isinf(hi) && hi > 0;

  }

};


Interval add(Interval a, Interval b) { return {a.lo + b.lo, a.hi + b.hi}; }

Interval sub(Interval a, Interval b) { return {a.lo - b.hi, a.hi - b.lo}; }

Interval neg(Interval a)             { return {-a.hi, -a.lo}; }


/* Interval product: take the min/max of the four corner products.

 * Handles signed bounds correctly (no special case for negative). */

Interval mul(Interval a, Interval b)

{

  double p1 = a.lo * b.lo, p2 = a.lo * b.hi;

  double p3 = a.hi * b.lo, p4 = a.hi * b.hi;

  return {std::min({p1, p2, p3, p4}), std::max({p1, p2, p3, p4})};

}


/* Interval division: if the divisor straddles zero, the result is

 * unbounded in both directions; otherwise compute via @c mul(a, 1/b).

 * The conservative all-real fallback is correct (any real value is

 * possible) but throws away precision &ndash; division by an interval

 * crossing zero is rare in our tests. */

Interval divInt(Interval a, Interval b)

{

  if (b.lo <= 0.0 && b.hi >= 0.0)

    return Interval::all();

  Interval inv = {1.0 / b.hi, 1.0 / b.lo};

  return mul(a, inv);

}


/**

 * @brief Recursively compute the interval of @p g's value across

 *        worlds.  Memoised in @p cache.

 *

 * Recognised gate types:

 * - @c gate_value: point interval on the parsed scalar.

 * - @c gate_rv:    distribution support (uniform exact, exponential

 *                  on @c [0, +∞), normal on @c (-∞, +∞)).

 * - @c gate_arith: propagated via the interval-arith helpers above.

 *

 * Anything else (e.g. an aggregate gate reached via a HAVING cmp)

 * yields the all-real interval, which downstream conservatively

 * treats as undecidable.

 */

Interval intervalOf(const GenericCircuit &gc, gate_t g,

                    std::unordered_map<gate_t, Interval> &cache)

{

  auto it = cache.find(g);

  if (it != cache.end()) return it->second;


  Interval result = Interval::all();

  auto type = gc.getGateType(g);


  switch (type) {

    case gate_value:

      result = Interval::point(parseDoubleStrict(gc.getExtra(g)));

      break;

    case gate_rv: {

      auto spec = parse_distribution_spec(gc.getExtra(g));

      if (!spec) break;

      switch (spec->kind) {

        case DistKind::Normal:

          /* Support is all of ℝ; Interval::all() is the default. */

          break;

        case DistKind::Uniform:

          result = {spec->p1, spec->p2};

          break;

        case DistKind::Exponential:

        case DistKind::Erlang:

          result = {0.0, std::numeric_limits<double>::infinity()};

          break;

      }

      break;

    }

    case gate_arith: {

      auto op = static_cast<provsql_arith_op>(gc.getInfos(g).first);

      const auto &wires = gc.getWires(g);

      if (wires.empty()) break;

      Interval first = intervalOf(gc, wires[0], cache);

      switch (op) {

        case PROVSQL_ARITH_PLUS:

          result = first;

          for (std::size_t i = 1; i < wires.size(); ++i)

            result = add(result, intervalOf(gc, wires[i], cache));

          break;

        case PROVSQL_ARITH_TIMES:

          result = first;

          for (std::size_t i = 1; i < wires.size(); ++i)

            result = mul(result, intervalOf(gc, wires[i], cache));

          break;

        case PROVSQL_ARITH_MINUS:

          if (wires.size() != 2) break;

          result = sub(first, intervalOf(gc, wires[1], cache));

          break;

        case PROVSQL_ARITH_DIV:

          if (wires.size() != 2) break;

          result = divInt(first, intervalOf(gc, wires[1], cache));

          break;

        case PROVSQL_ARITH_NEG:

          if (wires.size() != 1) break;

          result = neg(first);

          break;

      }

      break;

    }

    case gate_semimod: {

      /* HAVING-style constant wrapper: semimod(gate_one, value).  The

       * semiring action of gate_one (always true) on a scalar leaves

       * the scalar unchanged in every world, so the interval of the

       * semimod equals the interval of its value child.  Other

       * semimod shapes (non-trivial k_gate) keep the conservative

       * all-real default. */

      const auto &wires = gc.getWires(g);

      if (wires.size() == 2 && gc.getGateType(wires[0]) == gate_one)

        result = intervalOf(gc, wires[1], cache);

      break;

    }

    case gate_mixture: {

      /* Support of a mixture is the union of its branch supports.

       * Two shapes:

       *  - Classic 3-wire [p_token, x_token, y_token]: the Bernoulli

       *    is a Boolean leaf and contributes nothing to the scalar

       *    interval.

       *  - Categorical N-wire [key, mul_1, ..., mul_n]: each mulinput

       *    carries its outcome value in extra; the support is the

       *    [min, max] of those values. */

      const auto &wires = gc.getWires(g);

      if (gc.isCategoricalMixture(g)) {

        double lo = std::numeric_limits<double>::infinity();

        double hi = -std::numeric_limits<double>::infinity();

        bool any = false;

        for (std::size_t i = 1; i < wires.size(); ++i) {

          double v;

          try { v = parseDoubleStrict(gc.getExtra(wires[i])); }

          catch (const CircuitException &) { any = false; break; }

          lo = std::min(lo, v);

          hi = std::max(hi, v);

          any = true;

        }

        if (any) result = {lo, hi};

      } else if (wires.size() == 3) {

        Interval ix = intervalOf(gc, wires[1], cache);

        Interval iy = intervalOf(gc, wires[2], cache);

        result = {std::min(ix.lo, iy.lo), std::max(ix.hi, iy.hi)};

      }

      break;

    }

    default:

      /* gate_agg is intentionally not handled here -- the empty-subset

       * NULL semantics make a flat interval misleading, so the

       * runRangeCheck loop dispatches agg-bearing cmps to a separate

       * decider that knows the asymmetry between sound FALSE and

       * unsound TRUE decisions for SUM / MIN / MAX.  All other gate

       * types fall through to the all-real default. */

      break;

  }


  cache[g] = result;

  return result;

}


/**

 * @brief Decide a @c gate_cmp from the interval of @c (lhs - rhs).

 *

 * Returns @c NaN when the comparator cannot be decided from interval

 * bounds alone (e.g. the difference straddles zero, or the comparator

 * is @c = / @c <> on overlapping continuous supports &ndash; both of

 * which need a CDF, which a downstream analytic pass can supply).

 * Otherwise returns the certain probability @c 0.0 or @c 1.0.

 */

double decideCmp(const Interval &diff, ComparisonOperator op)

{

  switch (op) {

    case ComparisonOperator::LT:

      if (diff.hi < 0.0)  return 1.0;

      if (diff.lo >= 0.0) return 0.0;

      break;

    case ComparisonOperator::LE:

      if (diff.hi <= 0.0) return 1.0;

      if (diff.lo > 0.0)  return 0.0;

      break;

    case ComparisonOperator::GT:

      if (diff.lo > 0.0)  return 1.0;

      if (diff.hi <= 0.0) return 0.0;

      break;

    case ComparisonOperator::GE:

      if (diff.lo >= 0.0) return 1.0;

      if (diff.hi < 0.0)  return 0.0;

      break;

    case ComparisonOperator::EQ:

      /* Disjoint supports ⇒ certainly false.  Overlapping supports of

       * continuous RVs would have probability zero in the measure-

       * theoretic sense, but the interval pass alone cannot tell

       * whether either side is continuous; leave that to a downstream

       * analytic-CDF pass when one is available. */

      if (diff.hi < 0.0 || diff.lo > 0.0) return 0.0;

      break;

    case ComparisonOperator::NE:

      if (diff.hi < 0.0 || diff.lo > 0.0) return 1.0;

      break;

  }

  return std::numeric_limits<double>::quiet_NaN();

}


/**

 * @brief Decide a @c gate_cmp where one side is a @c gate_agg, the

 *        other is a scalar constant.

 *

 * Computes a value-interval for the aggregate from its semimod

 * children's per-row values, then folds the comparator like the

 * non-agg path &ndash; but accepts only FALSE decisions, never

 * TRUE.  The reason is structural to ProvSQL's HAVING semantics:

 * the per-aggregator subset enumerators in @c subset.cpp

 * (@c count_enum, @c sum_dp, @c enumerate_exhaustive) all skip

 * the empty subset, matching SQL's "no group, no HAVING" rule.

 * So a HAVING cmp's value is the OR over the @em non-empty subsets

 * where the predicate holds.

 *

 * - When no non-empty subset satisfies the predicate (the bound is

 *   strictly disjoint from the threshold on the right side of the

 *   comparator), the cmp value is exactly @c 0 = @c gate_zero.

 *   FALSE decision: sound.

 * - When every non-empty subset satisfies the predicate, the cmp

 *   value equals "the group is non-empty" &ndash; the OR over the

 *   children's k_gates &ndash; which is a non-constant Boolean

 *   expression, @em not @c gate_one.  Returning TRUE here would

 *   replace the cmp with @c gate_one and over-count probability

 *   mass from the empty world (where the group does not exist),

 *   so TRUE decisions are blocked uniformly across all aggregators.

 *

 * Aggregators we don't bound (@c AVG, @c AND, @c OR, @c CHOOSE,

 * @c ARRAY_AGG, @c NONE) fall through to undecidable.

 *

 * @return @c 0.0 if decided to FALSE, @c NaN otherwise.

 */

double decideAggVsConstCmp(const GenericCircuit &gc, gate_t agg_gate,

                           ComparisonOperator op, double const_val,

                           bool agg_on_lhs)

{

  AggregationOperator aop = getAggregationOperator(gc.getInfos(agg_gate).first);


  /* Extract per-child scalar values from the semimod children. */

  std::vector<double> values;

  for (gate_t child : gc.getWires(agg_gate)) {

    if (gc.getGateType(child) != gate_semimod)

      return std::numeric_limits<double>::quiet_NaN();

    const auto &sw = gc.getWires(child);

    if (sw.size() != 2)

      return std::numeric_limits<double>::quiet_NaN();

    gate_t value_gate = sw[1];

    if (gc.getGateType(value_gate) != gate_value)

      return std::numeric_limits<double>::quiet_NaN();

    try {

      values.push_back(parseDoubleStrict(gc.getExtra(value_gate)));

    } catch (const CircuitException &) {

      return std::numeric_limits<double>::quiet_NaN();

    }

  }


  Interval val_interval = Interval::all();


  switch (aop) {

    case AggregationOperator::COUNT:

      val_interval = {0.0, static_cast<double>(values.size())};

      break;

    case AggregationOperator::SUM: {

      double sum_neg = 0.0, sum_pos = 0.0;

      for (double v : values) {

        if (v < 0.0) sum_neg += v;

        else          sum_pos += v;

      }

      val_interval = {std::min(0.0, sum_neg), std::max(0.0, sum_pos)};

      break;

    }

    case AggregationOperator::MIN:

    case AggregationOperator::MAX:

      if (values.empty())

        return std::numeric_limits<double>::quiet_NaN();

      val_interval = {*std::min_element(values.begin(), values.end()),

                      *std::max_element(values.begin(), values.end())};

      break;

    default:

      /* AVG / AND / OR / CHOOSE / ARRAY_AGG / NONE: not decidable

       * with this pass. */

      return std::numeric_limits<double>::quiet_NaN();

  }


  Interval lhs = agg_on_lhs ? val_interval : Interval::point(const_val);

  Interval rhs = agg_on_lhs ? Interval::point(const_val) : val_interval;

  Interval diff = sub(lhs, rhs);

  double p = decideCmp(diff, op);


  /* Only FALSE decisions are sound (see doc comment).  TRUE

   * decisions, if accepted, would replace the cmp with gate_one

   * and credit probability to the empty subset, which provsql_having

   * deliberately excludes from valid worlds. */

  if (p == 0.0) return 0.0;

  return std::numeric_limits<double>::quiet_NaN();

}


/**

 * @brief Try to extract a scalar constant from a cmp's child.

 *

 * Recognises two shapes:

 * - bare @c gate_value: parse its @c extra as a double;

 * - HAVING-style @c gate_semimod with @c k=gate_one and

 *   @c value=gate_value: parse the value's extra.

 *

 * Returns @c NaN on any other shape.

 */

double extractScalarConst(const GenericCircuit &gc, gate_t g)

{

  auto t = gc.getGateType(g);

  if (t == gate_value) {

    try { return parseDoubleStrict(gc.getExtra(g)); }

    catch (const CircuitException &) {

      return std::numeric_limits<double>::quiet_NaN();

    }

  }

  if (t == gate_semimod) {

    const auto &w = gc.getWires(g);

    if (w.size() != 2) return std::numeric_limits<double>::quiet_NaN();

    if (gc.getGateType(w[0]) != gate_one)

      return std::numeric_limits<double>::quiet_NaN();

    if (gc.getGateType(w[1]) != gate_value)

      return std::numeric_limits<double>::quiet_NaN();

    try { return parseDoubleStrict(gc.getExtra(w[1])); }

    catch (const CircuitException &) {

      return std::numeric_limits<double>::quiet_NaN();

    }

  }

  return std::numeric_limits<double>::quiet_NaN();

}


/**

 * @brief Flip the sides of a comparison operator.

 *

 * @c (a op b) is equivalent to @c (b flip(op) a).  Used to normalise

 * a cmp so the random-variable side is always on the left.

 */

ComparisonOperator flipCmpOp(ComparisonOperator op)

{

  switch (op) {

    case ComparisonOperator::LT: return ComparisonOperator::GT;

    case ComparisonOperator::LE: return ComparisonOperator::GE;

    case ComparisonOperator::GT: return ComparisonOperator::LT;

    case ComparisonOperator::GE: return ComparisonOperator::LE;

    case ComparisonOperator::EQ: return ComparisonOperator::EQ;

    case ComparisonOperator::NE: return ComparisonOperator::NE;

  }

  return op;

}


/**

 * @brief Interpret a @c gate_cmp as a per-RV constraint @c rv op c.

 *

 * Returns @c true and fills @p rv_out, @p op_out, @p const_out when

 * exactly one side of the cmp is a @c gate_rv and the other a

 * @c gate_value with a parseable scalar; @c false otherwise (both

 * sides are RVs, both constants, an @c arith subtree appears, etc.).

 *

 * Strict-vs-non-strict inequalities are preserved as the operator;

 * the caller decides whether to treat the boundary as inclusive

 * (continuous distributions: measure-zero, irrelevant for

 * feasibility verdicts).

 */

bool asRvVsConstCmp(const GenericCircuit &gc, gate_t cmp_gate,

                    gate_t &rv_out, ComparisonOperator &op_out,

                    double &const_out)

{

  bool ok = false;

  ComparisonOperator op = cmpOpFromOid(gc.getInfos(cmp_gate).first, ok);

  if (!ok) return false;

  const auto &wires = gc.getWires(cmp_gate);

  if (wires.size() != 2) return false;


  /* Recognise scalar-vs-constant cmps where the scalar side is a

   * bare gate_rv (the original use case for the per-cmp resolution

   * pass) or a gate_mixture (so the conditioning walker can extract

   * intervals on mixture / categorical variables — value-vs-value

   * cmps are folded upstream by RangeCheck before they reach this

   * walker).  Dirac (gate_value) is never the scalar side of a

   * non-trivial cmp at this point; the value-vs-value pair would have

   * been resolved upstream. */

  auto isScalarRv = [](gate_type t) {

    return t == gate_rv || t == gate_mixture;

  };

  auto t0 = gc.getGateType(wires[0]);

  auto t1 = gc.getGateType(wires[1]);

  if (isScalarRv(t0) && t1 == gate_value) {

    try { const_out = parseDoubleStrict(gc.getExtra(wires[1])); }

    catch (const CircuitException &) { return false; }

    rv_out = wires[0];

    op_out = op;

    return true;

  }

  if (t0 == gate_value && isScalarRv(t1)) {

    try { const_out = parseDoubleStrict(gc.getExtra(wires[0])); }

    catch (const CircuitException &) { return false; }

    rv_out = wires[1];

    op_out = flipCmpOp(op);

    return true;

  }

  return false;

}


/**

 * @brief Apply a single @c rv-op-constant constraint to a running

 *        interval for the RV.

 *

 * Strict vs non-strict inequalities collapse onto the same closed

 * interval: continuous distributions assign zero mass to the

 * boundary, so the joint-feasibility verdict is unchanged whether

 * we use @c < or @c <=.  @c <> (NE) cannot be represented as a

 * single interval and is left to the per-cmp pass.

 */

Interval intersectRvConstraint(Interval current, ComparisonOperator op,

                               double c)

{

  switch (op) {

    case ComparisonOperator::LT:

    case ComparisonOperator::LE:

      current.hi = std::min(current.hi, c);

      break;

    case ComparisonOperator::GT:

    case ComparisonOperator::GE:

      current.lo = std::max(current.lo, c);

      break;

    case ComparisonOperator::EQ:

      current.lo = std::max(current.lo, c);

      current.hi = std::min(current.hi, c);

      break;

    case ComparisonOperator::NE:

      /* Cannot represent the complement of a point as a single

       * interval; leave the running interval unchanged. */

      break;

  }

  return current;

}


bool intervalEmpty(Interval i) { return i.lo > i.hi; }


/**

 * @brief Walk an AND-conjunct tree collecting per-RV interval

 *        constraints from its @c gate_cmp leaves.

 *

 * Shared between @c isAndJointlyInfeasible (which checks for an empty

 * intersection) and the public @c collectRvConstraints / conditional

 * @c compute_support paths.  Descends through @c gate_times,

 * collecting every @c gate_cmp interpretable as `rv op const` and

 * intersecting its constraint into a running interval for that RV.

 *

 * @p complete is set to @c true on entry and cleared if the walk

 * encounters any structure other than the AND-friendly set

 * (@c gate_times, @c gate_cmp, @c gate_input, @c gate_one,

 * @c gate_zero) whose footprint *might* constrain an RV

 * (i.e. excluding bare Bernoulli factors).  Callers that need a

 * tight bound (the closed-form moment shortcut) must check it; the

 * support intersection caller can use the result unconditionally

 * because dropping a disjunctive factor only loosens the interval,

 * which is sound for a superset bound on the conditional support.

 *

 * Cmps that do not interpret as `rv op const` (RV vs RV, arith on

 * either side, agg, …) are silently ignored; they belong to the

 * conditioning event but don't constrain a single RV's interval.

 */

void walkAndConjunctIntervals(

    const GenericCircuit &gc, gate_t root,

    std::unordered_map<gate_t, Interval> &rv_intervals,

    std::unordered_map<gate_t, Interval> &support_cache,

    bool &complete)

{

  std::unordered_set<gate_t> seen;

  std::stack<gate_t> stk;

  stk.push(root);

  complete = true;


  while (!stk.empty()) {

    gate_t g = stk.top(); stk.pop();

    if (!seen.insert(g).second) continue;


    auto t = gc.getGateType(g);

    if (t == gate_cmp) {

      gate_t rv = static_cast<gate_t>(0);

      ComparisonOperator op = ComparisonOperator::EQ;

      double c = 0.0;

      if (!asRvVsConstCmp(gc, g, rv, op, c)) {

        /* Cmp shape we don't interpret (RV vs RV, arith involved).

         * Conservatively mark the walk incomplete: this cmp belongs

         * to the event AND could constrain an RV in a way we can't

         * fold into a single interval. */

        complete = false;

        continue;

      }

      auto it = rv_intervals.find(rv);

      Interval current = (it == rv_intervals.end())

                         ? intervalOf(gc, rv, support_cache)

                         : it->second;

      current = intersectRvConstraint(current, op, c);

      rv_intervals[rv] = current;

      continue;  /* never descend into a cmp's operands */

    }

    if (t == gate_times || t == gate_delta || g == root) {

      /* gate_delta wraps a single child as the δ-semiring identity on

       * Booleans, so the AND-conjunct walker is sound to descend

       * through it -- the wrapper carries no constraint of its own.

       * Skipping the descent would mark the walk incomplete and force

       * the moment caller to fall back to MC even when the inner

       * cmps are decidable closed-form. */

      for (gate_t c : gc.getWires(g)) stk.push(c);

      continue;

    }

    if (t == gate_input || t == gate_update || t == gate_one ||

        t == gate_zero) {

      /* Bernoulli leaf / constants: shift P(event), don't truncate

       * any continuous RV.  Skipping is sound and the walk stays

       * complete. */

      continue;

    }

    /* gate_plus (OR), gate_monus (set diff), gate_arith, gate_rv, ...:

     * could affect an RV's conditional distribution in ways that

     * don't reduce to an interval intersection.  Mark the walk

     * incomplete so a moment closed-form caller falls through to MC. */

    complete = false;

  }

}


/**

 * @brief Walk @p root's AND-conjunct cmps and decide whether the

 *        conjunction is jointly infeasible by per-RV interval

 *        intersection.

 *

 * For every @c gate_cmp reachable through a chain of @c gate_times

 * starting at @p root, that is interpretable as @c rv-op-constant,

 * intersect the constraint with the running interval for that RV

 * (initialised to the RV's distribution support).  As soon as any

 * RV's interval becomes empty, the AND is infeasible.

 *

 * Descends only through @c gate_times: @c gate_plus is OR (the

 * disjuncts could individually be feasible even when each is a

 * narrow constraint on the RV, so they do not contribute to the

 * conjunction's infeasibility), @c gate_monus is set difference

 * (likewise), and other gate types break the AND chain.

 *

 * Cmps that this pass cannot interpret (RV vs RV, arith on either

 * side, agg, …) are simply ignored: skipping them is sound &ndash; we

 * just have fewer constraints, so we never falsely declare

 * infeasibility we cannot prove.

 */

bool isAndJointlyInfeasible(const GenericCircuit &gc, gate_t root)

{

  std::unordered_map<gate_t, Interval> rv_intervals;

  std::unordered_map<gate_t, Interval> support_cache;

  bool complete;

  walkAndConjunctIntervals(gc, root, rv_intervals, support_cache, complete);

  for (const auto &kv : rv_intervals) {

    if (intervalEmpty(kv.second)) return true;

  }

  return false;

}


/**

 * @brief Memoised recursive predicate: does @p g's sub-circuit

 *        produce a continuous random variable (no point-mass /

 *        Dirac component)?

 *

 * Used to widen the EQ / NE = 0 / 1 shortcut at the cmp resolution

 * site below the bare-@c gate_rv test, so multi-gate composites like

 * <tt>Exp(0.4) + Exp(0.3) = c</tt> (heterogeneous-rate exponential

 * sum, no closed-form Erlang fold) or

 * <tt>mixture(p, Normal, Uniform) = c</tt> (Bernoulli mixture over

 * two continuous arms) also resolve at load time.  Without this the

 * cmp falls through to AnalyticEvaluator (which returns NaN for

 * EQ / NE) and then to the MC marginalisation, which in finite

 * precision estimates @c P(X = Y) at 0 anyway -- but costs

 * @c provsql.rv_mc_samples iterations to do so.

 *

 * Recursion:

 *  - @c gate_rv -> true (Normal / Uniform / Exp / Erlang all have

 *    continuous densities, no point masses).

 *  - @c gate_value -> false (Dirac at the literal).

 *  - @c gate_arith -> true iff every wire has only-continuous

 *    support.  Sums, products, negations, divisions of continuous

 *    RVs stay continuous in distribution; a @c gate_value sibling

 *    poisons the result (e.g. @c X + 2 is continuous, but

 *    @c X * 0 = 0 has a Dirac at zero -- handled by the existing

 *    constant-fold pre-pass, but defensive here).

 *  - @c gate_mixture, Bernoulli 3-wire <tt>[p, X, Y]</tt> -> true

 *    iff X and Y are both continuous; the Boolean @c p only chooses

 *    an arm, so it does not affect the support type.

 *  - @c gate_mixture, categorical

 *    <tt>[key, mul_1, ..., mul_n]</tt> -> false (point masses at

 *    each mulinput's outcome value).

 *  - Any other gate type -> false (defensive: gate_plus / gate_times

 *    / gate_cmp / gate_agg are not continuous-RV containers).

 *

 * The cache is keyed on @c gate_t and may be shared across multiple

 * cmp gates inside a single @c runRangeCheck invocation.

 */

bool hasOnlyContinuousSupport(const GenericCircuit &gc, gate_t g,

                              std::unordered_map<gate_t, bool> &cache)

{

  auto it = cache.find(g);

  if (it != cache.end()) return it->second;

  /* Memoise pessimistically before recursing so a malformed cyclic

   * sub-circuit (shouldn't happen on well-formed input) returns

   * @c false rather than blowing the stack. */

  cache[g] = false;


  bool result = false;

  auto t = gc.getGateType(g);

  switch (t) {

    case gate_rv:

      result = true;

      break;

    case gate_value:

      result = false;

      break;

    case gate_arith: {

      result = true;

      for (gate_t w : gc.getWires(g)) {

        if (!hasOnlyContinuousSupport(gc, w, cache)) { result = false; break; }

      }

      break;

    }

    case gate_mixture: {

      if (gc.isCategoricalMixture(g)) { result = false; break; }

      const auto &w = gc.getWires(g);

      if (w.size() != 3) { result = false; break; }

      result = hasOnlyContinuousSupport(gc, w[1], cache)

            && hasOnlyContinuousSupport(gc, w[2], cache);

      break;

    }

    default:

      result = false;

      break;

  }


  cache[g] = result;

  return result;

}


/**

 * @brief Recursive collection of the @c gate_rv and @c gate_input

 *        leaves reachable from @p g.

 *

 * The result is a sub-circuit's "random-source footprint": two

 * sub-circuits are independent iff their random-source sets are

 * disjoint.  Used to gate the exact-EQ Dirac sum-product below: the

 * factoring @c P(X = Y) = Σ_v @c P(X=v)·P(Y=v) is only valid when

 * @c X and @c Y are independent, otherwise the per-row coupling

 * (e.g. two mixtures sharing a Bernoulli @c p_token) breaks the

 * factoring and the sum-product silently produces the wrong

 * probability.

 *

 * Descent rules: @c gate_arith and @c gate_mixture descend into all

 * children (Bernoulli @c p_token, categorical key, mulinputs all

 * contribute to the random footprint).  @c gate_value is a

 * deterministic literal and contributes no random source.  Other

 * gate types (Boolean / agg / etc.) don't appear under a continuous

 * cmp side in well-formed circuits; defensively, they contribute

 * nothing.

 */

const std::unordered_set<gate_t> &

collectRandomLeaves(const GenericCircuit &gc, gate_t g,

                    std::unordered_map<gate_t, std::unordered_set<gate_t>> &cache)

{

  auto it = cache.find(g);

  if (it != cache.end()) return it->second;

  /* Insert an empty entry first so a recursive call on a cyclic

   * sub-circuit returns early.  std::unordered_map insertion does

   * not invalidate references to existing elements, but it MAY

   * rehash on growth (invalidating ALL references, including the

   * one we're about to capture).  Build the result locally, then

   * write it back in one shot at the end. */

  cache.emplace(g, std::unordered_set<gate_t>{});


  std::unordered_set<gate_t> out;

  auto t = gc.getGateType(g);

  if (t == gate_rv || t == gate_input) {

    out.insert(g);

  } else if (t == gate_arith || t == gate_mixture) {

    for (gate_t w : gc.getWires(g)) {

      const auto &child = collectRandomLeaves(gc, w, cache);

      out.insert(child.begin(), child.end());

    }

  }


  /* Overwrite the placeholder; locate by find() to avoid a fresh

   * insertion that could rehash and invalidate other iterators in

   * upstream frames. */

  auto fit = cache.find(g);

  fit->second = std::move(out);

  return fit->second;

}


using DiracMap    = std::unordered_map<double, double>;

using DiracMapOpt = std::optional<DiracMap>;


/**

 * @brief Recursive extraction of @p g's Dirac mass map (value -> mass).

 *

 * Returns @c std::nullopt when the sub-circuit's discrete component

 * is not statically extractable (e.g. an opaque @c gate_arith over

 * mixtures, a Bernoulli mixture whose @c p_token is a compound

 * Boolean, etc.).  When the sub-circuit is purely continuous the

 * map is well-defined but empty (no Diracs, no masses).

 *

 * Used by the exact EQ shortcut below: for independent @c X, @c Y

 * with extractable mass maps @c M_X, @c M_Y:

 * <tt>P(X = Y) = Σ_{v ∈ M_X ∩ M_Y} M_X[v] · M_Y[v]</tt>.  Continuous

 * components contribute zero by measure-zero arguments (Dirac vs

 * continuous and continuous vs continuous), so they need not appear

 * in the sum.

 *

 * Shape rules:

 * - @c gate_value:v: a Dirac at the literal with mass @c 1.

 * - @c gate_rv: continuous in every supported family, empty map.

 * - categorical @c gate_mixture <tt>[key, mul_1, ..., mul_n]</tt>:

 *   sum @c getProb(mul_i) into @c map[parseDouble(extra(mul_i))].

 *   Multiple mulinputs at the same outcome (which the constructor

 *   doesn't produce but is sound to handle) merge masses.

 * - Bernoulli @c gate_mixture <tt>[p_token, X, Y]</tt> with

 *   @c p_token a bare @c gate_input: pull @c π = @c getProb(p_token)

 *   and recurse into X, Y to get @c M_X, @c M_Y; result is

 *   <tt>π·M_X[v] + (1-π)·M_Y[v]</tt> per outcome value.  Compound

 *   Boolean @c p_tokens (whose probability would have to come from

 *   a recursive @c probability_evaluate call) bail.

 * - Anything else: @c std::nullopt.

 */

DiracMapOpt

collectDiracMassMap(const GenericCircuit &gc, gate_t g,

                    std::unordered_map<gate_t, DiracMapOpt> &cache)

{

  auto it = cache.find(g);

  if (it != cache.end()) return it->second;

  /* Pessimistic cycle guard, same reasoning as @c collectRandomLeaves. */

  cache.emplace(g, std::nullopt);


  DiracMapOpt result;

  auto t = gc.getGateType(g);

  switch (t) {

    case gate_value: {

      try {

        DiracMap m;

        m[parseDoubleStrict(gc.getExtra(g))] = 1.0;

        result = std::move(m);

      } catch (const CircuitException &) {

        /* unparseable extra: bail */

      }

      break;

    }

    case gate_rv:

      result = DiracMap{};  /* continuous, no point masses */

      break;

    case gate_mixture: {

      const auto &w = gc.getWires(g);

      if (gc.isCategoricalMixture(g)) {

        DiracMap m;

        bool ok = true;

        for (std::size_t i = 1; i < w.size(); ++i) {

          double v;

          try { v = parseDoubleStrict(gc.getExtra(w[i])); }

          catch (const CircuitException &) { ok = false; break; }

          const double p = gc.getProb(w[i]);

          if (!std::isfinite(p) || p < 0.0 || p > 1.0) { ok = false; break; }

          m[v] += p;

        }

        if (ok) result = std::move(m);

      } else if (w.size() == 3

                 && gc.getGateType(w[0]) == gate_input) {

        const double pi = gc.getProb(w[0]);

        if (std::isfinite(pi) && pi >= 0.0 && pi <= 1.0) {

          auto mx = collectDiracMassMap(gc, w[1], cache);

          auto my = collectDiracMassMap(gc, w[2], cache);

          if (mx && my) {

            DiracMap m;

            for (const auto &[v, mass] : *mx) m[v] += pi * mass;

            for (const auto &[v, mass] : *my) m[v] += (1.0 - pi) * mass;

            result = std::move(m);

          }

        }

      }

      break;

    }

    default:

      break;

  }


  auto fit = cache.find(g);

  fit->second = result;

  return result;

}


}  // namespace


unsigned runRangeCheck(GenericCircuit &gc)

{

  std::unordered_map<gate_t, Interval> cache;

  /* Shared across all cmp gates in this @c runRangeCheck invocation.

   * Keyed on gate_t and immutable across cmp iterations because

   * resolving one cmp only changes the cmp's own gate type, not

   * the sub-circuit underneath @c wires[0..1] of other cmps. */

  std::unordered_map<gate_t, bool> continuous_support_cache;

  std::unordered_map<gate_t, DiracMapOpt> dirac_cache;

  std::unordered_map<gate_t, std::unordered_set<gate_t>> leaf_cache;

  unsigned resolved = 0;


  /* Snapshot the cmp gate ids before we start mutating: in-place

   * resolution turns a @c gate_cmp into a @c gate_input, but

   * @c getNbGates only grows, never shrinks, so iterating by index

   * over the original count is safe.  We re-check the type at each

   * step to skip already-resolved slots. */

  const auto nb = gc.getNbGates();

  std::vector<gate_t> cmps;

  for (std::size_t i = 0; i < nb; ++i) {

    auto g = static_cast<gate_t>(i);

    if (gc.getGateType(g) == gate_cmp)

      cmps.push_back(g);

  }


  for (gate_t c : cmps) {

    if (gc.getGateType(c) != gate_cmp) continue;  /* defensive */


    bool ok = false;

    ComparisonOperator op = cmpOpFromOid(gc.getInfos(c).first, ok);

    if (!ok) continue;


    const auto &wires = gc.getWires(c);

    if (wires.size() != 2) continue;


    /* Identity shortcut: when both sides of the cmp are the same

     * gate (same UUID), the sampler's per-iteration memoisation

     * guarantees both reads return identical values, so the

     * comparator collapses to a constant.  Universal across gate

     * types and semirings; runs first so neither the continuous

     * EQ/NE shortcut nor the interval-based path needs an explicit

     * @c lhs != rhs guard. */

    if (wires[0] == wires[1]) {

      double p = std::numeric_limits<double>::quiet_NaN();

      switch (op) {

        case ComparisonOperator::EQ:

        case ComparisonOperator::LE:

        case ComparisonOperator::GE:

          p = 1.0; break;

        case ComparisonOperator::NE:

        case ComparisonOperator::LT:

        case ComparisonOperator::GT:

          p = 0.0; break;

      }

      gc.resolveCmpToBernoulli(c, p);

      ++resolved;

      continue;

    }


    /* Continuous EQ / NE shortcut: P(X = c) = 0 and P(X != c) = 1

     * exactly when at least one side has a continuous distribution

     * (point equality has measure zero under any continuous

     * distribution).  Universal across semirings: the gate_zero /

     * gate_one rewrite is meaningful in every semiring (not just

     * probability), so the resolution belongs here rather than in

     * AnalyticEvaluator.

     *

     * @c hasOnlyContinuousSupport widens the test beyond a bare

     * @c gate_rv leaf: heterogeneous-rate exponential sums, products

     * of independent continuous RVs, and Bernoulli mixtures over

     * two continuous arms all qualify because their distribution

     * has no point-mass component.  Categorical mixtures (point

     * masses at each outcome value) and pure-deterministic

     * @c gate_value sub-circuits do NOT qualify and fall through to

     * the agg / interval / AnalyticEvaluator paths.

     *

     * The @c wires[0] == @c wires[1] case is already handled by the

     * identity shortcut above. */

    if (op == ComparisonOperator::EQ ||

        op == ComparisonOperator::NE) {

      bool lhs_continuous = hasOnlyContinuousSupport(gc, wires[0],

                                                     continuous_support_cache);

      bool rhs_continuous = hasOnlyContinuousSupport(gc, wires[1],

                                                     continuous_support_cache);

      if (lhs_continuous || rhs_continuous) {

        double p = (op == ComparisonOperator::EQ) ? 0.0 : 1.0;

        gc.resolveCmpToBernoulli(c, p);

        ++resolved;

        continue;

      }


      /* Exact Dirac sum-product.  When both sides have extractable

       * @c (value -> mass) maps AND the two sub-circuits are

       * independent (random-leaf footprints disjoint), the

       * convolution at zero of @c (X - Y) has support exactly on

       * @c Dirac(X) ∩ Dirac(Y) with mass

       * <tt>M_X(v) · M_Y(v)</tt> per overlapping value; the

       * continuous and continuous-vs-Dirac contributions vanish by

       * measure zero.  This generalises the bare-disjoint case to

       * any pair of statically-known discrete distributions:

       * <tt>P(categorical(a) = categorical(b))</tt> with overlapping

       * outcomes, mixtures with @c as_random branches, etc.

       *

       * The independence test is essential: two mixtures sharing a

       * Bernoulli @c p_token are correlated and the sum-product

       * factoring breaks (the actual @c P(X=Y) cannot be recovered

       * from the marginals alone).  @c collectRandomLeaves'

       * footprint-disjoint check is the gate.

       *

       * When both maps are empty (purely continuous on both sides)

       * the existing branch above already fired, so the sum-product

       * path here only runs for at-least-one-discrete shapes. */

      auto m_l = collectDiracMassMap(gc, wires[0], dirac_cache);

      auto m_r = collectDiracMassMap(gc, wires[1], dirac_cache);

      if (m_l && m_r) {

        const auto &leaves_l = collectRandomLeaves(gc, wires[0], leaf_cache);

        const auto &leaves_r = collectRandomLeaves(gc, wires[1], leaf_cache);

        bool independent = true;

        for (gate_t leaf : leaves_l) {

          if (leaves_r.count(leaf)) { independent = false; break; }

        }

        if (independent) {

          double p_eq = 0.0;

          /* Iterate over the smaller map to keep the sum at

           * O(min(|M_l|, |M_r|)) lookups. */

          const DiracMap *small = (m_l->size() <= m_r->size()) ? &*m_l : &*m_r;

          const DiracMap *large = (m_l->size() <= m_r->size()) ? &*m_r : &*m_l;

          for (const auto &[v, mass] : *small) {

            auto fit = large->find(v);

            if (fit != large->end()) p_eq += mass * fit->second;

          }

          /* Clamp into @c [0, 1] defensively: floating-point summation

           * of masses (each in [0, 1]) might overshoot by an ULP, and

           * @c resolveCmpToBernoulli requires a strict probability. */

          if (p_eq < 0.0) p_eq = 0.0;

          if (p_eq > 1.0) p_eq = 1.0;

          double p = (op == ComparisonOperator::EQ) ? p_eq : 1.0 - p_eq;

          gc.resolveCmpToBernoulli(c, p);

          ++resolved;

          continue;

        }

      }

    }


    /* HAVING-style cmp: agg on one side, scalar constant on the

     * other.  Decide via the agg-aware path which is cheaper than

     * intervalOf + decideCmp and which knows the empty-subset NULL

     * semantics for SUM / MIN / MAX (see decideAggVsConstCmp). */

    bool lhs_is_agg = gc.getGateType(wires[0]) == gate_agg;

    bool rhs_is_agg = gc.getGateType(wires[1]) == gate_agg;

    if (lhs_is_agg != rhs_is_agg) {

      gate_t agg_side   = lhs_is_agg ? wires[0] : wires[1];

      gate_t const_side = lhs_is_agg ? wires[1] : wires[0];

      double const_val = extractScalarConst(gc, const_side);

      if (!std::isnan(const_val)) {

        double p = decideAggVsConstCmp(gc, agg_side, op, const_val,

                                       lhs_is_agg);

        if (!std::isnan(p)) {

          gc.resolveCmpToBernoulli(c, p);

          ++resolved;

          continue;

        }

      }

    }


    /* Interval-based path for non-agg cmps (RV, gate_arith, value). */

    Interval lhs = intervalOf(gc, wires[0], cache);

    Interval rhs = intervalOf(gc, wires[1], cache);

    /* Skip if both sides are unbounded; @c decideCmp would never

     * return a decision and the work is wasted. */

    if (lhs.isAll() && rhs.isAll()) continue;


    Interval diff = sub(lhs, rhs);

    double p = decideCmp(diff, op);

    if (!std::isnan(p)) {

      gc.resolveCmpToBernoulli(c, p);

      ++resolved;

    }

  }


  /* Joint-conjunction pass: walk every @c gate_times and check

   * whether its AND-conjunct cmps, viewed together, constrain some

   * shared RV to an empty interval.  Catches the joint-infeasibility

   * case the per-cmp pass above cannot see (each cmp individually

   * leaves a non-empty range, but their intersection is empty).

   *

   * Snapshot the gate_times indices first: @c resolveGateToZero

   * mutates the type, so iterating the live vector while resolving

   * would skip slots.  The post-snapshot type re-check guards against

   * a @c gate_times that the per-cmp pass somehow already collapsed

   * (currently not possible, but cheap insurance for future passes). */

  const auto nb_after = gc.getNbGates();

  std::vector<gate_t> times_gates;

  for (std::size_t i = 0; i < nb_after; ++i) {

    auto g = static_cast<gate_t>(i);

    if (gc.getGateType(g) == gate_times)

      times_gates.push_back(g);

  }

  for (gate_t t : times_gates) {

    if (gc.getGateType(t) != gate_times) continue;  /* defensive */

    if (isAndJointlyInfeasible(gc, t)) {

      gc.resolveGateToZero(t);

      ++resolved;

    }

  }


  return resolved;

}


std::pair<double, double>


compute_support(const GenericCircuit &gc, gate_t root,

                std::optional<gate_t> event_root)

{

  std::unordered_map<gate_t, Interval> cache;

  Interval iv = intervalOf(gc, root, cache);


  /* Conditional path: intersect with the event's AND-conjunct

   * constraints on @p root.  Walks event_root collecting `rv op c`

   * cmps; non-target constraints are ignored (they affect P(event)

   * but not the truncation of root's distribution).  Even if the

   * walk is "incomplete" (gate_plus / gate_monus / arith encountered)

   * the result is sound: we're computing a SUPERSET bound on the

   * conditional support, and the unconditional support is already a

   * superset, so the intersection of the collected constraints with

   * the unconditional is also a superset. */

  if (event_root.has_value()) {

    std::unordered_map<gate_t, Interval> rv_intervals;

    bool complete;

    walkAndConjunctIntervals(gc, *event_root, rv_intervals, cache, complete);

    auto it = rv_intervals.find(root);

    if (it != rv_intervals.end()) {

      iv.lo = std::max(iv.lo, it->second.lo);

      iv.hi = std::min(iv.hi, it->second.hi);

      /* Defensively clamp to avoid an inverted interval if a buggy

       * walker produced one; should not happen but cheap. */

      if (iv.lo > iv.hi) iv.lo = iv.hi;

    }

  }


  return {iv.lo, iv.hi};

}


std::optional<std::pair<double, double>>


collectRvConstraints(const GenericCircuit &gc, gate_t event_root,

                     gate_t target_rv)

{

  std::unordered_map<gate_t, Interval> rv_intervals;

  std::unordered_map<gate_t, Interval> support_cache;

  bool complete;

  walkAndConjunctIntervals(gc, event_root, rv_intervals, support_cache,

                           complete);

  if (!complete) return std::nullopt;

  /* If the walk found no cmp constraining target_rv, the conditional

   * support is the unconditional support (the event is independent

   * of target_rv along the recognised structure).  Returning the

   * unconditional interval lets the moment closed-form path

   * short-circuit to the unconditional moment, matching the

   * mathematical truth. */

  auto it = rv_intervals.find(target_rv);

  Interval iv;

  if (it != rv_intervals.end()) {

    iv = it->second;

    /* Intersect with the RV's own support to be safe (event may

     * over-constrain past the support, e.g. `Exp(λ) < -1`). */

    Interval base = intervalOf(gc, target_rv, support_cache);

    iv.lo = std::max(iv.lo, base.lo);

    iv.hi = std::min(iv.hi, base.hi);

    if (iv.lo > iv.hi) iv.lo = iv.hi;

  } else {

    iv = intervalOf(gc, target_rv, support_cache);

  }

  return std::make_pair(iv.lo, iv.hi);

}


/**

 * @brief Parse a @c gate_value's @c extra as a finite @c float8.

 *

 * Sibling of @c extract_constant_double in @c having_semantics.cpp but

 * with a const @c GenericCircuit ref (used in the closed-form shape

 * detector path).  Bails on @c NaN / @c ±Infinity so a downstream

 * stem renderer never sees a non-finite x coordinate.

 */


static bool extract_finite_double(const GenericCircuit &gc, gate_t x,

                                  double &out)

{

  if (gc.getGateType(x) != gate_value) return false;

  const std::string &s = gc.getExtra(x);

  if (s.empty()) return false;

  try {

    size_t idx = 0;

    double v = std::stod(s, &idx);

    if (idx != s.size() || !std::isfinite(v)) return false;

    out = v;

    return true;

  } catch (...) {

    return false;

  }

}


/** @brief Same parsing applied to a mulinput's outcome label (categorical). */


static bool extract_mulinput_value(const GenericCircuit &gc, gate_t mul,

                                   double &out)

{

  if (gc.getGateType(mul) != gate_mulinput) return false;

  const std::string &s = gc.getExtra(mul);

  if (s.empty()) return false;

  try {

    size_t idx = 0;

    double v = std::stod(s, &idx);

    if (idx != s.size() || !std::isfinite(v)) return false;

    out = v;

    return true;

  } catch (...) {

    return false;

  }

}


std::optional<TruncatedSingleRv>


matchTruncatedSingleRv(const GenericCircuit &gc, gate_t root,

                       std::optional<gate_t> event_root)

{

  if (gc.getGateType(root) != gate_rv) return std::nullopt;

  auto spec = parse_distribution_spec(gc.getExtra(root));

  if (!spec) return std::nullopt;


  /* Natural support per family.  Normal is unbounded both sides;

   * Uniform sits exactly on its parameters; Exp / Erlang on

   * [0, +inf).  Used both as the unconditional case and as the

   * intersection seed for collectRvConstraints (which already

   * intersects internally, but the bare-natural case still needs

   * a baseline). */

  double nat_lo = -std::numeric_limits<double>::infinity();

  double nat_hi = +std::numeric_limits<double>::infinity();

  switch (spec->kind) {

    case DistKind::Normal:                                       break;

    case DistKind::Uniform:     nat_lo = spec->p1;

                                nat_hi = spec->p2;               break;

    case DistKind::Exponential: nat_lo = 0.0;                    break;

    case DistKind::Erlang:      nat_lo = 0.0;                    break;

  }


  /* Unconditional path: return natural support, mark untruncated. */

  if (!event_root.has_value()

      || gc.getGateType(*event_root) == gate_one) {

    return TruncatedSingleRv{*spec, nat_lo, nat_hi, /*truncated=*/false};

  }


  /* Infeasible event resolved upstream by RangeCheck: the cmp was

   * folded to gate_zero, the conditional distribution is undefined.

   * @c collectRvConstraints would silently fall back to the natural

   * support here (its walker skips gate_zero like gate_one), so we

   * have to detect this explicitly. */

  if (gc.getGateType(*event_root) == gate_zero) return std::nullopt;


  auto iv = collectRvConstraints(gc, *event_root, root);

  if (!iv.has_value()) return std::nullopt;

  if (!(iv->first < iv->second)) return std::nullopt;


  return TruncatedSingleRv{*spec, iv->first, iv->second, /*truncated=*/true};

}


bool eventIsProvablyInfeasible(const GenericCircuit &gc, gate_t root,

                               std::optional<gate_t> event_root)

{

  if (!event_root.has_value()) return false;

  const auto et = gc.getGateType(*event_root);

  if (et == gate_one) return false;

  /* RangeCheck folded the event to false upstream — universal

   * signal, independent of root gate type (a constant scalar

   * value paired with an impossible cmp lands here too). */

  if (et == gate_zero) return true;

  /* Walk the event's AND-conjuncts; an empty intersection with the

   * RV's natural support is the second infeasibility signal that

   * @c matchTruncatedSingleRv collapses into @c std::nullopt.  Only

   * applicable when the root is itself a bare gate_rv that the

   * walker recognises. */

  if (gc.getGateType(root) != gate_rv) return false;

  auto iv = collectRvConstraints(gc, *event_root, root);

  if (!iv.has_value()) return false;

  return !(iv->first < iv->second);

}


/**

 * @brief Unconditional probability mass of a shape over the

 *        interval @c [lo, hi].

 *

 * @c TruncatedSingleRv arms supplied here must carry

 * @c truncated == @c false (the unconditional shape); the helper

 * uses the natural support to compute the CDF endpoints, so calling

 * with an already-truncated input would double-truncate.

 *

 * Recursive: a Bernoulli mixture's mass is the Bernoulli-weighted

 * combination of its arms' masses.  Categorical mass is the sum of

 * outcome masses falling in the interval.  Dirac mass is 1 iff the

 * Dirac value sits in the interval, else 0.  Returns @c std::nullopt

 * when a leaf's spec defeats the closed-form CDF (e.g. non-integer

 * Erlang shape — @c cdfAt returns NaN there).

 */

static std::optional<double>


shape_mass(const ClosedFormShape &s, double lo, double hi)

{

  return std::visit([&](const auto &v) -> std::optional<double> {

    using T = std::decay_t<decltype(v)>;

    if constexpr (std::is_same_v<T, TruncatedSingleRv>) {

      const double a = std::max(lo, v.lo);

      const double b = std::min(hi, v.hi);

      if (!(a < b)) return 0.0;

      const double cl = std::isfinite(a) ? cdfAt(v.spec, a) : 0.0;

      const double ch = std::isfinite(b) ? cdfAt(v.spec, b) : 1.0;

      if (std::isnan(cl) || std::isnan(ch)) return std::nullopt;

      return ch - cl;

    } else if constexpr (std::is_same_v<T, DiracShape>) {

      return (v.value >= lo && v.value <= hi) ? 1.0 : 0.0;

    } else if constexpr (std::is_same_v<T, CategoricalShape>) {

      double m = 0.0;

      for (const auto &pr : v.outcomes)

        if (pr.first >= lo && pr.first <= hi) m += pr.second;

      return m;

    } else if constexpr (std::is_same_v<T, BernoulliMixtureShape>) {

      auto L = shape_mass(*v.left, lo, hi);

      auto R = shape_mass(*v.right, lo, hi);

      if (!L || !R) return std::nullopt;

      return v.p * (*L) + (1.0 - v.p) * (*R);

    }

    return std::nullopt;

  }, s);

}


/**

 * @brief Conditional shape after truncating the underlying variable

 *        to @c [lo, hi].

 *

 * Bare-RV arm: intersects its natural / current truncation with

 * @c [lo, hi] and marks the result truncated so downstream

 * @c shape_pdf renormalises by the truncated CDF.  Dirac: keep iff

 * value ∈ interval, otherwise nullopt (infeasible).  Categorical:

 * keep outcomes in interval, renormalise masses.  Bernoulli mixture:

 * recursively truncate each arm and reweight the Bernoulli by the

 * ratio of arm masses (the standard

 * @f$ \pi' = \pi Z_L / (\pi Z_L + (1-\pi) Z_R) @f$ update); a

 * fully-eliminated arm degenerates to the surviving one.  Returns

 * @c nullopt when the truncated shape has zero mass (caller can

 * raise infeasibility).

 */

static std::optional<ClosedFormShape>


truncateShape(const ClosedFormShape &s, double lo, double hi)

{

  return std::visit([&](const auto &v) -> std::optional<ClosedFormShape> {

    using T = std::decay_t<decltype(v)>;

    if constexpr (std::is_same_v<T, TruncatedSingleRv>) {

      const double a = std::max(lo, v.lo);

      const double b = std::min(hi, v.hi);

      if (!(a < b)) return std::nullopt;

      return ClosedFormShape{TruncatedSingleRv{v.spec, a, b, /*trunc=*/true}};

    } else if constexpr (std::is_same_v<T, DiracShape>) {

      if (v.value < lo || v.value > hi) return std::nullopt;

      return ClosedFormShape{v};

    } else if constexpr (std::is_same_v<T, CategoricalShape>) {

      CategoricalShape out;

      double total = 0.0;

      for (const auto &pr : v.outcomes) {

        if (pr.first >= lo && pr.first <= hi) {

          out.outcomes.emplace_back(pr.first, pr.second);

          total += pr.second;

        }

      }

      if (out.outcomes.empty() || !(total > 0.0)) return std::nullopt;

      for (auto &pr : out.outcomes) pr.second /= total;

      return ClosedFormShape{std::move(out)};

    } else if constexpr (std::is_same_v<T, BernoulliMixtureShape>) {

      auto mL = shape_mass(*v.left, lo, hi);

      auto mR = shape_mass(*v.right, lo, hi);

      if (!mL || !mR) return std::nullopt;

      const double pL = v.p         * (*mL);

      const double pR = (1.0 - v.p) * (*mR);

      const double Z = pL + pR;

      if (!(Z > 0.0)) return std::nullopt;

      auto Lt = truncateShape(*v.left,  lo, hi);

      auto Rt = truncateShape(*v.right, lo, hi);

      /* Either arm eliminated by the truncation collapses to the

       * surviving arm (its mass was already 0 in shape_mass, so the

       * reweighted p_arm is 1). */

      if (!Lt && !Rt) return std::nullopt;

      if (!Lt) return Rt;

      if (!Rt) return Lt;

      BernoulliMixtureShape m;

      m.p     = pL / Z;

      m.left  = std::make_shared<ClosedFormShape>(std::move(*Lt));

      m.right = std::make_shared<ClosedFormShape>(std::move(*Rt));

      return ClosedFormShape{std::move(m)};

    }

    return std::nullopt;

  }, s);

}


std::optional<ClosedFormShape>


matchClosedFormDistribution(const GenericCircuit &gc, gate_t root,

                            std::optional<gate_t> event_root)

{

  /* Test "event is trivial true": either absent, or resolved to

   * gate_one by load-time simplification. */

  const bool event_trivial = !event_root.has_value()

                          || gc.getGateType(*event_root) == gate_one;


  /* Bare gate_rv root: delegate to the existing single-RV matcher

   * so the truncation logic (collectRvConstraints) is the single

   * source of truth across the closed-form-shape surface. */

  if (gc.getGateType(root) == gate_rv) {

    auto m = matchTruncatedSingleRv(gc, root, event_root);

    if (!m) return std::nullopt;

    return ClosedFormShape{*m};

  }


  /* Helper: match the shape unconditionally first, then if the event

   * is non-trivial extract an interval via collectRvConstraints and

   * apply truncateShape.  Used by the Dirac / categorical / mixture

   * branches below so all three honour conditioning through the same

   * pipeline. */

  auto with_optional_truncation =

    [&](std::optional<ClosedFormShape> unc)

      -> std::optional<ClosedFormShape> {

    if (!unc) return std::nullopt;

    if (event_trivial) return unc;

    auto iv = collectRvConstraints(gc, *event_root, root);

    if (!iv.has_value()) return std::nullopt;

    if (!(iv->first < iv->second)) return std::nullopt;

    return truncateShape(*unc, iv->first, iv->second);

  };


  /* Dirac point: a gate_value with extra parseable as a finite

   * float8 (the underlying form of as_random(c)).  Conditioning on

   * a constant is normally folded upstream by RangeCheck to

   * gate_one / gate_zero, but a probabilistic event whose footprint

   * doesn't constrain the constant lands here untouched (the cmp

   * walker returns the unconditional support); truncateShape then

   * keeps the Dirac iff its value falls in the recognised interval. */

  if (gc.getGateType(root) == gate_value) {

    double v;

    if (!extract_finite_double(gc, root, v)) return std::nullopt;

    return with_optional_truncation(ClosedFormShape{DiracShape{v}});

  }


  /* gate_mixture: either the explicit categorical form

   * (isCategoricalMixture) or the classic Bernoulli triple

   * [p_token, x_token, y_token]. */

  if (gc.getGateType(root) == gate_mixture) {

    const auto &w = gc.getWires(root);


    if (gc.isCategoricalMixture(root)) {

      CategoricalShape cs;

      cs.outcomes.reserve(w.size() - 1);

      for (std::size_t i = 1; i < w.size(); ++i) {

        double v;

        if (!extract_mulinput_value(gc, w[i], v)) return std::nullopt;

        double p = gc.getProb(w[i]);

        if (!std::isfinite(p) || p < 0.0 || p > 1.0) return std::nullopt;

        cs.outcomes.emplace_back(v, p);

      }

      if (cs.outcomes.empty()) return std::nullopt;

      return with_optional_truncation(ClosedFormShape{std::move(cs)});

    }


    /* Classic Bernoulli mixture: 3 wires, [p_token, x_token, y_token]

     * with p_token a bare gate_input; compound Boolean p bails (the

     * generic path would need a probability-over-Boolean-circuit

     * pre-pass we deliberately do not run here). */

    if (w.size() != 3) return std::nullopt;

    if (gc.getGateType(w[0]) != gate_input) return std::nullopt;

    double p = gc.getProb(w[0]);

    if (!std::isfinite(p) || p < 0.0 || p > 1.0) return std::nullopt;


    auto left  = matchClosedFormDistribution(gc, w[1], std::nullopt);

    auto right = matchClosedFormDistribution(gc, w[2], std::nullopt);

    if (!left || !right) return std::nullopt;


    BernoulliMixtureShape m;

    m.p     = p;

    m.left  = std::make_shared<ClosedFormShape>(std::move(*left));

    m.right = std::make_shared<ClosedFormShape>(std::move(*right));

    return with_optional_truncation(ClosedFormShape{std::move(m)});

  }


  return std::nullopt;

}


}  // namespace provsql


extern "C" {


/**

 * @brief SQL: rv_support(token uuid, prov uuid, OUT lo float8, OUT hi float8)

 *

 * Loads the persisted circuit rooted at @p token, intersects with the

 * AND-conjunct cmps in @p prov constraining @p token, and returns the

 * resulting @c [lo, hi] support interval.  When @p prov resolves to

 * @c gate_one (the unconditional default after load-time

 * simplification), the conditional path is skipped and the bare

 * unconditional support of @p token is returned.

 *

 * @c -Infinity / @c +Infinity float8 represent unbounded ends (e.g.

 * the support of a normal RV is @c [-Infinity, +Infinity]).

 */


Datum rv_support(PG_FUNCTION_ARGS)

{

  try {

    pg_uuid_t *token = PG_GETARG_UUID_P(0);

    pg_uuid_t *prov  = PG_GETARG_UUID_P(1);


    gate_t root_gate, event_gate;

    auto gc = getJointCircuit(*token, *prov, root_gate, event_gate);


    /* gate_one as event-side means the conditioning is the trivial

     * "always true" event (either the user passed gate_one() directly

     * or load-time simplification collapsed the event to it).  Take

     * the unconditional path. */

    std::optional<gate_t> event_opt;

    if (gc.getGateType(event_gate) != gate_one)

      event_opt = event_gate;


    auto iv = provsql::compute_support(gc, root_gate, event_opt);


    TupleDesc tupdesc;

    Datum values[2];

    bool nulls[2] = {false, false};


    if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)

      provsql_error("rv_support: expected composite return type");

    tupdesc = BlessTupleDesc(tupdesc);


    values[0] = Float8GetDatum(iv.first);

    values[1] = Float8GetDatum(iv.second);


    PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));

  } catch (const std::exception &e) {

    provsql_error("rv_support: %s", e.what());

  } catch (...) {

    provsql_error("rv_support: unknown exception");

  }

  PG_RETURN_NULL();

}


}  // extern "C"

cmpOpFromOid
ComparisonOperator cmpOpFromOid(Oid op_oid, bool &ok)
Map a PostgreSQL comparison-operator OID to a ComparisonOperator.
Definition Aggregation.cpp:66

getAggregationOperator
AggregationOperator getAggregationOperator(Oid oid)
Map a PostgreSQL aggregate function OID to an AggregationOperator.
Definition Aggregation.cpp:29

Aggregation.h
Typed aggregation value, operator, and aggregator abstractions.

AggregationOperator
AggregationOperator
SQL aggregation functions tracked by ProvSQL.
Definition Aggregation.h:50

AggregationOperator::MAX
@ MAX
MAX → input type.
Definition Aggregation.h:54

AggregationOperator::COUNT
@ COUNT
COUNT(*) or COUNT(expr) → integer.
Definition Aggregation.h:51

AggregationOperator::SUM
@ SUM
SUM → integer or float.
Definition Aggregation.h:52

AggregationOperator::MIN
@ MIN
MIN → input type.
Definition Aggregation.h:53

ComparisonOperator
ComparisonOperator
SQL comparison operators used in gate_cmp circuit gates.
Definition Aggregation.h:38

ComparisonOperator::EQ
@ EQ
Equal (=).
Definition Aggregation.h:39

ComparisonOperator::LT
@ LT
Less than (<).
Definition Aggregation.h:42

ComparisonOperator::GT
@ GT
Greater than (>).
Definition Aggregation.h:44

ComparisonOperator::LE
@ LE
Less than or equal (<=).
Definition Aggregation.h:41

ComparisonOperator::NE
@ NE
Not equal (<>).
Definition Aggregation.h:40

ComparisonOperator::GE
@ GE
Greater than or equal (>=).
Definition Aggregation.h:43

AnalyticEvaluator.h
Closed-form CDF resolution for trivial gate_cmp shapes.

cache
static CircuitCache cache
Process-local singleton circuit gate cache.
Definition CircuitCache.cpp:72

getJointCircuit
GenericCircuit getJointCircuit(pg_uuid_t root_token, pg_uuid_t event_token, gate_t &root_gate, gate_t &event_gate)
Build a GenericCircuit containing the closures of two roots, with shared subgraphs unified.
Definition CircuitFromMMap.cpp:229

CircuitFromMMap.h
Build in-memory circuits from the mmap-backed persistent store.

gate_t
gate_t
Strongly-typed gate identifier.
Definition Circuit.h:49

RandomVariable.h
Continuous random-variable helpers (distribution parsing, moments).

rv_support
Datum rv_support(PG_FUNCTION_ARGS)
SQL: rv_support(token uuid, prov uuid, OUT lo float8, OUT hi float8).
Definition RangeCheck.cpp:1456

RangeCheck.h
Support-based bound check for continuous-RV comparators.

CircuitCache::end
iterator end()
Past-the-end iterator for the cache.
Definition CircuitCache.h:135

Circuit::getWires
std::vector< gate_t > & getWires(gate_t g)
Return a mutable reference to the child-wire list of gate g.
Definition Circuit.h:140

Circuit::getGateType
gateType getGateType(gate_t g) const
Return the type of gate g.
Definition Circuit.h:130

Circuit::getNbGates
std::vector< gate_t >::size_type getNbGates() const
Return the total number of gates in the circuit.
Definition Circuit.h:103

GenericCircuit
In-memory provenance circuit with semiring-generic evaluation.
Definition GenericCircuit.h:49

GenericCircuit::resolveGateToZero
void resolveGateToZero(gate_t g)
Replace an arbitrary gate (typically gate_times) by gate_zero.
Definition GenericCircuit.h:206

GenericCircuit::isCategoricalMixture
bool isCategoricalMixture(gate_t g) const
Test whether g is a categorical-form gate_mixture (the explicit provsql.categorical output).
Definition GenericCircuit.h:516

GenericCircuit::getExtra
std::string getExtra(gate_t g) const
Return the string extra for gate g.
Definition GenericCircuit.h:106

GenericCircuit::getProb
double getProb(gate_t g) const
Return the probability for gate g.
Definition GenericCircuit.h:144

GenericCircuit::resolveCmpToBernoulli
void resolveCmpToBernoulli(gate_t g, double p)
Replace a gate_cmp by a constant Boolean leaf (gate_one for p == 1, gate_zero for p == 0) or by a Ber...
Definition GenericCircuit.h:176

GenericCircuit::getInfos
std::pair< unsigned, unsigned > getInfos(gate_t g) const
Return the integer annotation pair for gate g.
Definition GenericCircuit.h:83

provsql
Definition AnalyticEvaluator.cpp:19

provsql::DistKind::Normal
@ Normal
Normal (Gaussian): p1=μ, p2=σ
Definition RandomVariable.h:29

provsql::DistKind::Exponential
@ Exponential
Exponential: p1=λ, p2 unused.
Definition RandomVariable.h:31

provsql::DistKind::Uniform
@ Uniform
Uniform on [a,b]: p1=a, p2=b.
Definition RandomVariable.h:30

provsql::DistKind::Erlang
@ Erlang
Erlang: p1=k (positive integer), p2=λ.
Definition RandomVariable.h:32

provsql::compute_support
std::pair< double, double > compute_support(const GenericCircuit &gc, gate_t root, std::optional< gate_t > event_root)
Compute the [lo, hi] support interval of a scalar sub-circuit rooted at root.
Definition RangeCheck.cpp:1064

provsql::truncateShape
static std::optional< ClosedFormShape > truncateShape(const ClosedFormShape &s, double lo, double hi)
Conditional shape after truncating the underlying variable to [lo, hi].
Definition RangeCheck.cpp:1299

provsql::matchClosedFormDistribution
std::optional< ClosedFormShape > matchClosedFormDistribution(const GenericCircuit &gc, gate_t root, std::optional< gate_t > event_root)
Detect any of the closed-form shapes supported by rv_analytical_curves.
Definition RangeCheck.cpp:1350

provsql::extract_mulinput_value
static bool extract_mulinput_value(const GenericCircuit &gc, gate_t mul, double &out)
Same parsing applied to a mulinput's outcome label (categorical).
Definition RangeCheck.cpp:1154

provsql::extract_finite_double
static bool extract_finite_double(const GenericCircuit &gc, gate_t x, double &out)
Parse a gate_value's extra as a finite float8.
Definition RangeCheck.cpp:1136

provsql::parseDoubleStrict
double parseDoubleStrict(const std::string &s)
Strictly parse s as a double.
Definition RandomVariable.cpp:17

provsql::runRangeCheck
unsigned runRangeCheck(GenericCircuit &gc)
Run the support-based pruning pass over gc.
Definition RangeCheck.cpp:854

provsql::eventIsProvablyInfeasible
bool eventIsProvablyInfeasible(const GenericCircuit &gc, gate_t root, std::optional< gate_t > event_root)
True iff the conditioning event is provably infeasible for a bare gate_rv root.
Definition RangeCheck.cpp:1215

provsql::shape_mass
static std::optional< double > shape_mass(const ClosedFormShape &s, double lo, double hi)
Unconditional probability mass of a shape over the interval [lo, hi].
Definition RangeCheck.cpp:1253

provsql::parse_distribution_spec
std::optional< DistributionSpec > parse_distribution_spec(const std::string &s)
Parse the on-disk text encoding of a gate_rv distribution.
Definition RandomVariable.cpp:59

provsql::collectRvConstraints
std::optional< std::pair< double, double > > collectRvConstraints(const GenericCircuit &gc, gate_t event_root, gate_t target_rv)
Walk event_root collecting rv op c constraints on target_rv.
Definition RangeCheck.cpp:1097

provsql::matchTruncatedSingleRv
std::optional< TruncatedSingleRv > matchTruncatedSingleRv(const GenericCircuit &gc, gate_t root, std::optional< gate_t > event_root)
Detect a closed-form, optionally-truncated single-RV shape.
Definition RangeCheck.cpp:1172

provsql::ClosedFormShape
std::variant< TruncatedSingleRv, DiracShape, CategoricalShape, BernoulliMixtureShape > ClosedFormShape
One of the closed-form shapes the analytical-curves payload can render: bare RV (continuous PDF/CDF),...
Definition RangeCheck.h:200

provsql::cdfAt
double cdfAt(const DistributionSpec &d, double c)
Closed-form CDF  for a basic continuous distribution.
Definition AnalyticEvaluator.cpp:70

provsql_error.h
Uniform error-reporting macros for ProvSQL.

provsql_error
#define provsql_error(fmt,...)
Report a fatal ProvSQL error and abort the current transaction.
Definition provsql_error.h:38

gate_type
gate_type
Definition provsql_migrate_mmap.cpp:71

gate_update
@ gate_update
Definition provsql_migrate_mmap.cpp:74

gate_value
@ gate_value
Definition provsql_migrate_mmap.cpp:74

gate_delta
@ gate_delta
Definition provsql_migrate_mmap.cpp:74

gate_mulinput
@ gate_mulinput
Definition provsql_migrate_mmap.cpp:74

gate_one
@ gate_one
Definition provsql_migrate_mmap.cpp:73

gate_zero
@ gate_zero
Definition provsql_migrate_mmap.cpp:73

gate_agg
@ gate_agg
Definition provsql_migrate_mmap.cpp:73

gate_times
@ gate_times
Definition provsql_migrate_mmap.cpp:72

gate_cmp
@ gate_cmp
Definition provsql_migrate_mmap.cpp:74

gate_semimod
@ gate_semimod
Definition provsql_migrate_mmap.cpp:73

gate_input
@ gate_input
Definition provsql_migrate_mmap.cpp:72

provsql_utils.h
Core types, constants, and utilities shared across ProvSQL.

provsql_arith_op
provsql_arith_op
Arithmetic operator tags used by gate_arith.
Definition provsql_utils.h:91

PROVSQL_ARITH_DIV
@ PROVSQL_ARITH_DIV
binary, child0 / child1
Definition provsql_utils.h:95

PROVSQL_ARITH_PLUS
@ PROVSQL_ARITH_PLUS
n-ary, sum of children
Definition provsql_utils.h:92

PROVSQL_ARITH_NEG
@ PROVSQL_ARITH_NEG
unary, -child0
Definition provsql_utils.h:96

PROVSQL_ARITH_MINUS
@ PROVSQL_ARITH_MINUS
binary, child0 - child1
Definition provsql_utils.h:94

PROVSQL_ARITH_TIMES
@ PROVSQL_ARITH_TIMES
n-ary, product of children
Definition provsql_utils.h:93

gate_rv
@ gate_rv
Continuous random-variable leaf (extra encodes distribution).
Definition provsql_utils.h:71

gate_mixture
@ gate_mixture
Probabilistic mixture: three wires [p_token (gate_input Bernoulli), x_token, y_token]; samples x when...
Definition provsql_utils.h:73

gate_arith
@ gate_arith
n-ary arithmetic gate over scalar-valued children (info1 holds operator tag)
Definition provsql_utils.h:72

provsql_utils_cpp.h
C++ utility functions for UUID manipulation.

pg_uuid_t
UUID structure.
Definition provsql_migrate_mmap.cpp:56

provsql::BernoulliMixtureShape
Bernoulli mixture (gate_mixture with the [p_token, x_token, y_token] shape).
Definition RangeCheck.h:217

provsql::BernoulliMixtureShape::right
std::shared_ptr< ClosedFormShape > right
Definition RangeCheck.h:220

provsql::BernoulliMixtureShape::left
std::shared_ptr< ClosedFormShape > left
Definition RangeCheck.h:219

provsql::BernoulliMixtureShape::p
double p
Definition RangeCheck.h:218

provsql::CategoricalShape
Categorical distribution over a finite outcome set.
Definition RangeCheck.h:188

provsql::CategoricalShape::outcomes
std::vector< std::pair< double, double > > outcomes
(value, mass) pairs
Definition RangeCheck.h:189

provsql::DiracShape
Point mass at a finite scalar value (a gate_value root, or an as_random(c) leaf surfaced as a gate_va...
Definition RangeCheck.h:172

provsql::TruncatedSingleRv
Detection result for a closed-form, optionally-truncated single-RV shape.
Definition RangeCheck.h:101