ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
having_semantics.hpp
Go to the documentation of this file.
1/**
2 * @file having_semantics.hpp
3 * @brief Provenance evaluation helper for HAVING-clause circuits.
4 *
5 * When a query includes a HAVING clause, ProvSQL creates a special
6 * sub-circuit that encodes the aggregate predicate. Before the main
7 * provenance circuit can be evaluated over a semiring, these HAVING
8 * sub-circuits must first be evaluated to determine which groups
9 * pass the filter.
10 *
11 * The single public entry point @c provsql_having() rewrites HAVING
12 * comparison gates in the circuit by enumerating possible worlds, for
13 * any compiled semiring. If the HAVING gate type is incompatible with
14 * the requested semiring, the function is a no-op.
15 */
16#ifndef PROVSQL_HAVING_SEMANTICS_HPP
17#define PROVSQL_HAVING_SEMANTICS_HPP
18
19#include <vector>
20
21#include "GenericCircuit.hpp"
22#include "subset.hpp"
23
24/** @cond INTERNAL */
25namespace provsql_having_detail {
26std::vector<gate_t> collect_sp_cmp_gates(GenericCircuit &c, gate_t start);
27bool extract_constant_C(GenericCircuit &c, gate_t x, int &C_out);
28bool extract_constant_double(GenericCircuit &c, gate_t x, double &C_out);
29bool semimod_extract_M_and_K(GenericCircuit &c, gate_t semimod_gate, int &m_out, gate_t &k_gate_out);
30ComparisonOperator map_cmp_op(GenericCircuit &c, gate_t cmp_gate, bool &ok);
32}
33/** @endcond */
34
35/**
36 * @brief Rewrite HAVING comparison gates in the circuit by enumerating possible worlds.
37 *
38 * @tparam SemiringT The semiring type used for evaluation.
39 * @tparam MapT The provenance mapping type (gate_t → semiring value).
40 * @param c Circuit to rewrite.
41 * @param g Root gate of the sub-circuit to inspect.
42 * @param mapping Provenance mapping updated in place.
43 * @param S Semiring instance (default-constructed for stateless semirings).
44 */
45template <typename SemiringT, typename MapT>
48 gate_t g,
49 MapT &mapping,
50 SemiringT S = SemiringT{})
51{
52 using namespace provsql_having_detail;
53
54 std::vector<gate_t> cmp_gates = collect_sp_cmp_gates(c, g);
55
56 if (cmp_gates.empty())
57 return;
58
59 auto pw_from_cmp_gate = [&](gate_t cmp_gate, typename SemiringT::value_type &pw_out) -> bool {
60 const auto &cw = c.getWires(cmp_gate);
61 if (cw.size() != 2) return false;
62
63 gate_t L = cw[0];
64 gate_t R = cw[1];
65
66 bool okop = false;
67 ComparisonOperator op = map_cmp_op(c, cmp_gate, okop);
68 if (!okop) return false;
69
70 auto build_from = [&](gate_t agg_side, gate_t const_side, ComparisonOperator effective_op) -> bool {
71 int C = 0;
72 if (!extract_constant_C(c, const_side, C)) return false;
73
74 if (c.getGateType(agg_side) != gate_agg) return false;
75
76 const auto &children = c.getWires(agg_side);
77
78 std::vector<long> mvals;
79 mvals.reserve(children.size());
80
81 std::vector<typename SemiringT::value_type> kvals;
82 kvals.reserve(children.size());
83
84 for (gate_t ch : children) {
85 if (c.getGateType(ch) != gate_semimod) return false;
86
87 int m = 0;
88 gate_t k_gate{};
89 if (!semimod_extract_M_and_K(c, ch, m, k_gate)) return false;
90
91 auto kval = c.evaluate<SemiringT>(k_gate, mapping, S);
92
93 mvals.push_back(m);
94 kvals.push_back(std::move(kval));
95 }
96
97 AggregationOperator agg_kind = getAggregationOperator(c.getInfos(agg_side).first);
98
99 if(agg_kind==AggregationOperator::SUM) {
100 // COUNT(*) is simulated by SUM of 1s
101 bool all_one_mvals = true;
102 for (int m : mvals) {
103 if (m != 1) { all_one_mvals = false; break; }
104 }
105 agg_kind = all_one_mvals ? AggregationOperator::COUNT : AggregationOperator::SUM;
106 }
107
108 bool upset = false;
109 auto worlds = enumerate_valid_worlds(mvals, C, effective_op, agg_kind, S.absorptive(), upset);
110
111 if (worlds.empty()) {
112 pw_out = S.zero();
113 return true;
114 }
115
116 std::vector<typename SemiringT::value_type> disjuncts;
117 disjuncts.reserve(worlds.size());
118
119 const size_t n = kvals.size();
120
121 for (auto mask : worlds) {
122 std::vector<typename SemiringT::value_type> present;
123 std::vector<typename SemiringT::value_type> missing;
124 present.reserve(n);
125 missing.reserve(n);
126
127 for (size_t i = 0; i < n; ++i) {
128 if (mask[i]) {
129 if(kvals[i]!=S.one())
130 present.push_back(kvals[i]);
131 } else if(upset) {
132 // The world enumeration produced an upset generated by a subset
133 } else if ((op==ComparisonOperator::GE || op==ComparisonOperator::GT) && S.absorptive() &&
134 agg_kind==AggregationOperator::MAX) {
135 // Monotonously increasing behavior: do not add anything to missing
136 } else if((op==ComparisonOperator::LE || op==ComparisonOperator::LT) && S.absorptive() &&
137 agg_kind==AggregationOperator::MIN) {
138 // Monotonously decreasing behavior: do not add anything to missing
139 } else {
140 if(kvals[i]!=S.zero())
141 missing.push_back(kvals[i]);
142 }
143 }
144
145 auto present_prod = S.times(present);
146
147 if (missing.empty()) {
148 disjuncts.push_back(std::move(present_prod));
149 } else {
150 auto missing_sum = S.plus(missing);
151 auto monus_factor = S.monus(S.one(), missing_sum);
152 auto term = monus_factor;
153 if(present_prod!=S.one())
154 term = S.times(std::vector<typename SemiringT::value_type>{present_prod, monus_factor});
155 disjuncts.push_back(std::move(term));
156 }
157 }
158
159 pw_out = S.plus(disjuncts);
160 return true;
161 };
162
163 if (c.getGateType(L) == gate_agg)
164 return build_from(L, R, op);
165
166 if (c.getGateType(R) == gate_agg)
167 return build_from(R, L, flip_op(op));
168
169 return false;
170 };
171
172 for (gate_t cmp_gate : cmp_gates) {
173 typename SemiringT::value_type pw;
174 if (!pw_from_cmp_gate(cmp_gate, pw))
175 return;
176
177 mapping[cmp_gate] = std::move(pw);
178 }
179}
180
181#endif
AggregationOperator getAggregationOperator(Oid oid)
Map a PostgreSQL aggregate function OID to an AggregationOperator.
AggregationOperator
SQL aggregation functions tracked by ProvSQL.
Definition Aggregation.h:50
@ MAX
MAX → input type.
Definition Aggregation.h:54
@ COUNT
COUNT(*) or COUNT(expr) → integer.
Definition Aggregation.h:51
@ SUM
SUM → integer or float.
Definition Aggregation.h:52
@ MIN
MIN → input type.
Definition Aggregation.h:53
ComparisonOperator
SQL comparison operators used in gate_cmp circuit gates.
Definition Aggregation.h:38
@ LT
Less than (<).
Definition Aggregation.h:42
@ GT
Greater than (>).
Definition Aggregation.h:44
@ LE
Less than or equal (<=).
Definition Aggregation.h:41
@ GE
Greater than or equal (>=).
Definition Aggregation.h:43
gate_t
Strongly-typed gate identifier.
Definition Circuit.h:49
Template implementation of GenericCircuit::evaluate().
std::vector< gate_t > & getWires(gate_t g)
Return a mutable reference to the child-wire list of gate g.
Definition Circuit.h:140
gateType getGateType(gate_t g) const
Return the type of gate g.
Definition Circuit.h:130
In-memory provenance circuit with semiring-generic evaluation.
S::value_type evaluate(gate_t g, std::unordered_map< gate_t, typename S::value_type > &provenance_mapping, S semiring) const
Evaluate the sub-circuit rooted at gate g over semiring semiring.
std::pair< unsigned, unsigned > getInfos(gate_t g) const
Return the integer annotation pair for gate g.
void provsql_having(GenericCircuit &c, gate_t g, MapT &mapping, SemiringT S=SemiringT{})
Rewrite HAVING comparison gates in the circuit by enumerating possible worlds.
bool extract_constant_C(GenericCircuit &c, gate_t x, int &C_out)
bool extract_constant_double(GenericCircuit &c, gate_t x, double &C_out)
ComparisonOperator flip_op(ComparisonOperator op)
std::vector< gate_t > collect_sp_cmp_gates(GenericCircuit &c, gate_t start)
bool semimod_extract_M_and_K(GenericCircuit &c, gate_t semimod_gate, int &m_out, gate_t &k_gate_out)
ComparisonOperator map_cmp_op(GenericCircuit &c, gate_t cmp_gate, bool &ok)
std::vector< mask_t > enumerate_valid_worlds(const std::vector< long > &values, int constant, ComparisonOperator op, AggregationOperator agg_kind, bool absorptive, bool &upset)
Enumerate all subsets of n tuples satisfying an aggregate predicate.
Definition subset.cpp:356
Enumerate tuple subsets satisfying an aggregate HAVING predicate.