ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
having_semantics.cpp
Go to the documentation of this file.
1/**
2 * @file having_semantics.cpp
3 * @brief HAVING-clause provenance evaluation for all built-in semirings.
4 *
5 * Implements the five @c provsql_try_having_*() functions declared in
6 * @c having_semantics.hpp, one per supported semiring:
7 * - @c provsql_try_having_formula()
8 * - @c provsql_try_having_counting()
9 * - @c provsql_try_having_why()
10 * - @c provsql_try_having_boolexpr()
11 * - @c provsql_try_having_boolean()
12 *
13 * Each function evaluates the sub-circuit rooted at the given
14 * @c gate_agg / @c gate_semimod gate using @c enumerate_valid_worlds()
15 * (for exact predicate testing) and populates the provenance mapping.
16 *
17 * An anonymous-namespace helper @c provsql_try_having_internal() provides
18 * the shared logic; the five public functions are thin wrappers that
19 * instantiate it for the appropriate semiring type.
20 */
21extern "C" {
22#include "postgres.h"
23#include "utils/lsyscache.h"
24}
25
26#include <vector>
27#include <string>
28#include <unordered_map>
29#include <unordered_set>
30
31#include "having_semantics.hpp"
32#include "provsql_utils_cpp.h"
33#include "subset.hpp"
34#include "semiring/Boolean.h"
35#include "semiring/BoolExpr.h"
36#include "semiring/Counting.h"
37#include "semiring/Formula.h"
38#include "semiring/Why.h"
39
40namespace {
41// Parse int from string
42static int parse_int_strict(const std::string &s, bool &ok) {
43 ok = false;
44 if (s.empty()) return 0;
45 size_t idx = 0;
46 try {
47 int v = std::stoi(s, &idx);
48 if (idx != s.size()) return 0;
49 ok = true;
50 return v;
51 } catch (...) {
52 return 0;
53 }
54}
55
56// Map a cmp gate’s Postgres operator to subset.cpp’s ComparisonOperator
57static ComparisonOperator map_cmp_op(GenericCircuit &c, gate_t cmp_gate, bool &ok) {
58 ok = false;
59 auto infos = c.getInfos(cmp_gate);
60
61 char *opname = get_opname(infos.first); // palloc'd string or NULL
62 if (!opname) return ComparisonOperator::EQ;
63
64 std::string s(opname);
65 pfree(opname);
66
67 ok = true;
68 if (s == "=") return ComparisonOperator::EQ;
69 if (s == "<>") return ComparisonOperator::NE;
70 if (s == "<") return ComparisonOperator::LT;
71 if (s == "<=") return ComparisonOperator::LE;
72 if (s == ">") return ComparisonOperator::GT;
73 if (s == ">=") return ComparisonOperator::GE;
74
75 ok = false;
77}
78
79// Flip operator for “C op agg” <=> “agg flip(op) C”
80static ComparisonOperator flip_op(ComparisonOperator op) {
81 switch (op) {
88 }
89 return op;
90}
91
92
93static bool semimod_extract_M_and_K(
95 gate_t semimod_gate,
96 int &m_out,
97 gate_t &k_gate_out)
98{
99 if (c.getGateType(semimod_gate) != gate_semimod) return false;
100
101 const auto &w = c.getWires(semimod_gate);
102 if (w.size() != 2) return false;
103
104 if (c.getGateType(w[1]) != gate_value) return false;
105 bool ok = false;
106 m_out = parse_int_strict(c.getExtra(w[1]), ok);
107 if (!ok) return false;
108
109 k_gate_out = w[0];
110 return true;
111}
112
113// Extract constant C from possible encodings:
114// - gate_value("C")
115// - gate_semimod(C, gate_one)
116// - gate_semimod(gate_one, C)
117static bool extract_constant_C(GenericCircuit &c, gate_t x, int &C_out) {
118
119 if (c.getGateType(x) != gate_semimod)
120 return false;
121
122 const auto &w = c.getWires(x);
123 if (w.size() != 2)
124 return false;
125
126 if (c.getGateType(w[0]) != gate_one)
127 return false;
128
129 if (c.getGateType(w[1]) != gate_value)
130 return false;
131
132 bool ok = false;
133 int v = parse_int_strict(c.getExtra(w[1]), ok);
134 if (!ok)
135 return false;
136
137 C_out = v;
138 return true;
139}
140//collect cmp gates in the prov circuit
141static void collect_sp_cmp_gates(GenericCircuit &c, gate_t start, std::vector<gate_t> &out) {
142 std::vector<gate_t> stack;
143 stack.push_back(start);
144
145 std::unordered_set<gate_t> seen;
146
147 while (!stack.empty()) {
148 gate_t cur = stack.back();
149 stack.pop_back();
150
151 if (!seen.insert(cur).second) continue;
152
153 if (c.getGateType(cur) == gate_cmp) {
154 const auto &cw = c.getWires(cur);
155 if (cw.size() == 2) {
156 gate_t L = cw[0];
157 gate_t R = cw[1];
158
159 int tmpC = 0;
160 bool is_candidate =
161 (c.getGateType(L) == gate_agg && extract_constant_C(c, R, tmpC)) ||
162 (c.getGateType(R) == gate_agg && extract_constant_C(c, L, tmpC));
163
164 if (is_candidate)
165 out.push_back(cur);
166 }
167 }
168
169 const auto &w = c.getWires(cur);
170 for (gate_t ch : w) stack.push_back(ch);
171 }
172}
173
174} // namespace
175
176// ----------------------------------------------------
177// Generic implementation of possible-world
178// semantics for HAVING queries for any semiring defined
179// in src/semiring/
180// --------------------------------------------------
181
182/**
183 * @brief Rewrite HAVING comparison gates in the circuit by enumerating possible worlds.
184 * @tparam SemiringT The semiring type used for evaluation.
185 * @tparam MapT The provenance mapping type (gate_t → semiring value).
186 * @param c Circuit to rewrite.
187 * @param g Root gate of the sub-circuit to inspect.
188 * @param mapping Provenance mapping updated in place.
189 * @param S Semiring instance.
190 */
191template <typename SemiringT, typename MapT>
192static void try_having_impl(
194 gate_t g,
195 MapT &mapping,
196 SemiringT S)
197{
198 std::vector<gate_t> cmp_gates;
199 collect_sp_cmp_gates(c, g, cmp_gates);
200
201 if (cmp_gates.empty())
202 return; // nothing to rewrite
203
204 auto pw_from_cmp_gate = [&](gate_t cmp_gate, typename SemiringT::value_type &pw_out) -> bool {
205 const auto &cw = c.getWires(cmp_gate);
206 if (cw.size() != 2) return false;
207
208 gate_t L = cw[0];
209 gate_t R = cw[1];
210
211 bool okop = false;
212 ComparisonOperator op = map_cmp_op(c, cmp_gate, okop);
213 if (!okop) return false;
214
215 auto build_from = [&](gate_t agg_side, gate_t const_side, ComparisonOperator effective_op) -> bool {
216 int C = 0;
217 if (!extract_constant_C(c, const_side, C)) return false;
218
219 if (c.getGateType(agg_side) != gate_agg) return false;
220
221 const auto &children = c.getWires(agg_side);
222
223 std::vector<long> mvals;
224 mvals.reserve(children.size());
225
226 std::vector<typename SemiringT::value_type> kvals;
227 kvals.reserve(children.size());
228
229 for (gate_t ch : children) {
230 if (c.getGateType(ch) != gate_semimod) return false;
231
232 int m = 0;
233 gate_t k_gate{};
234 if (!semimod_extract_M_and_K(c, ch, m, k_gate)) return false;
235
236 auto kval = c.evaluate<SemiringT>(k_gate, mapping, S);
237
238 mvals.push_back(m);
239 kvals.push_back(std::move(kval));
240 }
241
242 AggregationOperator agg_kind = getAggregationOperator(c.getInfos(agg_side).first);
243
244 if(agg_kind==AggregationOperator::SUM) {
245 // COUNT(*) is simulated by SUM of 1s
246 bool all_one_mvals = true;
247 for (int m : mvals) {
248 if (m != 1) { all_one_mvals = false; break; }
249 }
250 agg_kind = all_one_mvals ? AggregationOperator::COUNT : AggregationOperator::SUM;
251 }
252
253 bool upset = false;
254 auto worlds = enumerate_valid_worlds(mvals, C, effective_op, agg_kind, S.absorptive(), upset);
255
256 if (worlds.empty()) {
257 pw_out = S.zero();
258 return true;
259 }
260
261 std::vector<typename SemiringT::value_type> disjuncts;
262 disjuncts.reserve(worlds.size());
263
264 const size_t n = kvals.size();
265
266 for (auto mask : worlds) {
267 std::vector<typename SemiringT::value_type> present;
268 std::vector<typename SemiringT::value_type> missing;
269 present.reserve(n);
270 missing.reserve(n);
271
272 for (size_t i = 0; i < n; ++i) {
273 if (mask[i]) {
274 if(kvals[i]!=S.one())
275 present.push_back(kvals[i]);
276 } else if(upset) {
277 // The world enumeration produced an upset generated by a subset
278 } else if ((op==ComparisonOperator::GE || op==ComparisonOperator::GT) && S.absorptive() &&
279 agg_kind==AggregationOperator::MAX) {
280 // Monotonously increasing behavior: do not add anything to missing
281 } else if((op==ComparisonOperator::LE || op==ComparisonOperator::LT) && S.absorptive() &&
282 agg_kind==AggregationOperator::MIN) {
283 // Monotonously decreasing behavior: do not add anything to missing
284 } else {
285 if(kvals[i]!=S.zero())
286 missing.push_back(kvals[i]);
287 }
288 }
289
290 auto present_prod = S.times(present);
291
292 if (missing.empty()) {
293 disjuncts.push_back(std::move(present_prod));
294 } else {
295 auto missing_sum = S.plus(missing);
296 auto monus_factor = S.monus(S.one(), missing_sum);
297 auto term = monus_factor;
298 if(present_prod!=S.one())
299 term = S.times(std::vector<typename SemiringT::value_type>{present_prod, monus_factor});
300 disjuncts.push_back(std::move(term));
301 }
302 }
303
304 pw_out = S.plus(disjuncts);
305 return true;
306 };
307
308 if (c.getGateType(L) == gate_agg)
309 return build_from(L, R, op);
310
311 if (c.getGateType(R) == gate_agg)
312 return build_from(R, L, flip_op(op));
313
314 return false;
315 };
316
317 // evaluate all cmp gates individually using pw semantics
318 for (gate_t cmp_gate : cmp_gates) {
319 typename SemiringT::value_type pw;
320 if (!pw_from_cmp_gate(cmp_gate, pw))
321 return;
322
323 mapping[cmp_gate] = std::move(pw);
324 }
325}
326
327//-------------------------
328// Public wrappers
329//-----------------------------
332 gate_t g,
333 std::unordered_map<gate_t, std::string> &mapping)
334{
335 try_having_impl<semiring::Formula>(c, g, mapping, semiring::Formula());
336}
337
340 gate_t g,
341 std::unordered_map<gate_t, unsigned> &mapping)
342{
343 try_having_impl<semiring::Counting>(c, g, mapping, semiring::Counting());
344}
345
348 gate_t g,
349 std::unordered_map<gate_t, semiring::why_provenance_t> &mapping)
350{
351 try_having_impl<semiring::Why>(c, g, mapping, semiring::Why());
352}
353
357 gate_t g,
358 std::unordered_map<gate_t, gate_t> &mapping)
359{
360 try_having_impl<semiring::BoolExpr>(c, g, mapping, be);
361}
362
365 gate_t g,
366 std::unordered_map<gate_t, bool> &mapping)
367{
368 try_having_impl<semiring::Boolean>(c, g, mapping, semiring::Boolean());
369}
AggregationOperator getAggregationOperator(Oid oid)
Map a PostgreSQL aggregate function OID to an AggregationOperator.
AggregationOperator
SQL aggregation functions tracked by ProvSQL.
Definition Aggregation.h:50
@ MAX
MAX → input type.
@ COUNT
COUNT(*) or COUNT(expr) → integer.
@ SUM
SUM → integer or float.
@ MIN
MIN → input type.
ComparisonOperator
SQL comparison operators used in gate_cmp circuit gates.
Definition Aggregation.h:38
@ LT
Less than (<)
@ GT
Greater than (>)
@ LE
Less than or equal (<=)
@ NE
Not equal (<>)
@ GE
Greater than or equal (>=)
Boolean-expression (lineage formula) semiring.
Boolean semiring ({false, true}, ∨, ∧, false, true).
gate_t
Strongly-typed gate identifier.
Definition Circuit.h:48
Counting semiring (ℕ, +, ×, 0, 1).
Symbolic formula semiring producing readable provenance expressions.
Why-provenance semiring (set of witness sets).
std::vector< gate_t > & getWires(gate_t g)
Return a mutable reference to the child-wire list of gate g.
Definition Circuit.h:139
gateType getGateType(gate_t g) const
Return the type of gate g.
Definition Circuit.h:129
In-memory provenance circuit with semiring-generic evaluation.
S::value_type evaluate(gate_t g, std::unordered_map< gate_t, typename S::value_type > &provenance_mapping, S semiring) const
Evaluate the sub-circuit rooted at gate g over semiring semiring.
std::string getExtra(gate_t g) const
Return the string extra for gate g.
std::pair< unsigned, unsigned > getInfos(gate_t g) const
Return the integer annotation pair for gate g.
Provenance-as-Boolean-circuit semiring.
Definition BoolExpr.h:42
The Boolean semiring over bool.
Definition Boolean.h:36
The counting semiring over unsigned.
Definition Counting.h:36
Symbolic provenance formula semiring over std::string.
Definition Formula.h:70
Why-provenance semiring.
Definition Why.h:47
void provsql_try_having_why(GenericCircuit &c, gate_t g, std::unordered_map< gate_t, semiring::why_provenance_t > &mapping)
Evaluate the HAVING sub-circuit at g over the Why-provenance semiring.
void provsql_try_having_counting(GenericCircuit &c, gate_t g, std::unordered_map< gate_t, unsigned > &mapping)
Evaluate the HAVING sub-circuit at g over the Counting semiring.
void provsql_try_having_boolean(GenericCircuit &c, gate_t g, std::unordered_map< gate_t, bool > &mapping)
Evaluate the HAVING sub-circuit at g over the Boolean semiring.
static void try_having_impl(GenericCircuit &c, gate_t g, MapT &mapping, SemiringT S)
Rewrite HAVING comparison gates in the circuit by enumerating possible worlds.
void provsql_try_having_formula(GenericCircuit &c, gate_t g, std::unordered_map< gate_t, std::string > &mapping)
Evaluate the HAVING sub-circuit at g over the Formula semiring.
void provsql_try_having_boolexpr(GenericCircuit &c, semiring::BoolExpr &be, gate_t g, std::unordered_map< gate_t, gate_t > &mapping)
Evaluate the HAVING sub-circuit at g over the BoolExpr semiring.
Provenance evaluation helpers for HAVING-clause circuits.
@ gate_value
Scalar value (for aggregate provenance)
@ gate_one
Semiring one.
@ gate_agg
Aggregation operator (for aggregate provenance)
@ gate_cmp
Currently unused, meant for comparison of aggregate values.
@ gate_semimod
Semimodule scalar multiplication (for aggregate provenance)
C++ utility functions for UUID manipulation.
std::vector< mask_t > enumerate_valid_worlds(const std::vector< long > &values, int constant, ComparisonOperator op, AggregationOperator agg_kind, bool absorptive, bool &upset)
Enumerate all subsets of n tuples satisfying an aggregate predicate.
Definition subset.cpp:347
Enumerate tuple subsets satisfying an aggregate HAVING predicate.