ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
subset.cpp
Go to the documentation of this file.
1/**
2 * @file subset.cpp
3 * @brief Valid-world enumeration for aggregate HAVING predicates.
4 *
5 * Implements @c enumerate_valid_worlds() declared in @c subset.hpp.
6 *
7 * For a list of @f$n@f$ tuples with individual values, the function
8 * iterates over all @f$2^n@f$ possible worlds (bitmasks), computes the
9 * aggregate of the present tuples' values using the requested
10 * @c AggregationOperator, and tests the comparison predicate. All
11 * valid worlds are collected and returned.
12 *
13 * The @c upset output flag is set to @c true when the set of valid
14 * worlds is upward-closed (every superset of a valid world is also
15 * valid), which is the case for monotone aggregation predicates (e.g.
16 * SUM ≥ k). This information is used to optimise the evaluation of
17 * monotone HAVING clauses.
18 *
19 * Internal helpers in an anonymous namespace:
20 * - @c increment(): advance a bitmask to the next possible world.
21 * - @c compute_agg(): compute the aggregate value for one bitmask.
22 */
23#include "subset.hpp"
24#include <algorithm>
25#include <cstddef>
26#include <cstdint>
27#include <limits>
28#include <vector>
29#include <stdexcept>
30#include <cassert>
31
32namespace {
33static bool increment(mask_t &v)
34{
35 for(size_t i=0; i<v.size(); ++i)
36 {
37 v[i]=!v[i];
38 if(v[i])
39 return true;
40 }
41 return false;
42}
43
44static std::vector<mask_t> all_worlds(const std::vector<long> &values)
45{
46 std::vector<mask_t> worlds;
47 mask_t mask(values.size());
48 // Skip empty world
49 while(increment(mask))
50 worlds.push_back(mask);
51 return worlds;
52}
53
54static void append_range(std::vector<mask_t> &out,
55 const std::vector<std::vector<mask_t> > &dp,
56 int lo,
57 int hi)
58{
59 if (dp.empty()) return;
60 const int J = static_cast<int>(dp.size())-1;
61 lo = std::max(lo, 0);
62 hi = std::min(hi, J);
63 if (lo>hi) return;
64
65 for (int j = lo; j <= hi; ++j) {
66 out.insert(out.end(), dp[j].begin(), dp[j].end());
67 }
68}
69
70class DPException : public std::exception {};
71
72/** @brief Return the minimum of two values. */
73#define MIN(x,y) ((x)<(y)?(x):(y))
74
75static std::vector<mask_t> sum_dp(const std::vector<long> &values, int C, ComparisonOperator op, bool absorptive, bool &upset)
76{
77 const std::size_t n = values.size();
78
79 std::vector<mask_t> R;
80
81 // We first deal with NEQ by combining LT and GT
82 if(op == ComparisonOperator::NE) {
83 std::vector<mask_t> lt= sum_dp(values, C, ComparisonOperator::LT, absorptive, upset);
84 std::vector<mask_t> gt= sum_dp(values, C, ComparisonOperator::GT, absorptive, upset);
85 R.reserve(lt.size()+gt.size());
86 R.insert(R.end(),lt.begin(),lt.end());
87 R.insert(R.end(),gt.begin(),gt.end());
88 return R;
89 }
90
91 long long T=0;
92 for (int w: values) {
93 if (w < 0)
94 throw DPException();
95 T+=w;
96 }
97
98 //no valid worlds case
99 if (op == ComparisonOperator::GT && C>=T) return {};
100 if (op == ComparisonOperator::GE && C>T) return {};
101 if (op == ComparisonOperator::LT && C<=0) return {};
102 if (op == ComparisonOperator::LE && C<0) return {};
103 if (op == ComparisonOperator::EQ && (C>T || C<0)) return {};
104
105 //tautology cases
106 if (op == ComparisonOperator::GT && C<0) return all_worlds(values);
107 if (op == ComparisonOperator::GE && C<=0) return all_worlds(values);
108 if (op == ComparisonOperator::LT && C>T) return all_worlds(values);
109 if (op == ComparisonOperator::LE && C>=T) return all_worlds(values);
110
111 long long J=0;
113 J=T;
114 else if (op==ComparisonOperator::LT)
115 J=MIN(C-1,T);
116 else
117 J=MIN(C,T);
118
119 assert(J>=0);
120
121 std::vector<std::vector<mask_t> > dp(static_cast<std::size_t>(J) + 1);
122 dp[0].push_back(mask_t(n)); // dp[0] <- {emptyset}
123
124 int pref_sum=0;
125
126 for (std::size_t i=0; i<n; ++i)
127 {
128 const int w=values[i];
129 pref_sum+=w;
130 const int j_max=MIN(J,pref_sum);
131
132 for (int j = j_max; j >= w; --j) {
133 const int p = j - w;
134 if(absorptive && ((op==ComparisonOperator::GT && p>C) ||
135 (op==ComparisonOperator::GE && p>=C))) {
136 upset=true;
137 continue;
138 }
139 size_t s=dp[p].size();
140 for(size_t k=0; k<s; ++k) {
141 mask_t m = dp[p][k];
142 m[i] = true;
143 dp[j].push_back(m);
144 }
145 }
146 }
147
148 switch(op){
150 append_range(R,dp,C,C);
151 break;
152
154 append_range(R,dp,C+1,J);
155 break;
156
157
159 append_range(R,dp,1,C-1);
160 break;
161
163 append_range(R,dp,C,J);
164 break;
165
167 append_range(R,dp,1,C);
168 break;
169
170 case ComparisonOperator::NE: // case already processed
171 assert(false);
172 }
173
174 return R;
175}
176
177//generate k-subsets form an n-set
178static void combinations(std::size_t start,
179 int k_left,
180 mask_t mask,
181 std::vector<mask_t> &out)
182{
183 const size_t n = mask.size();
184
185 if (k_left == 0) {
186 out.push_back(mask);
187 return;
188 }
189
190 if (start >= n) return;
191
192 const std::size_t remaining = n - start;
193 if (remaining < static_cast<std::size_t>(k_left)) return;
194
195 combinations(start + 1, k_left, mask, out);
196
197 mask[start]=true;
198 combinations(start + 1, k_left - 1, mask, out);
199}
200
201static std::vector<mask_t> count_enum(const std::vector<long> &values, int m, ComparisonOperator op, bool absorptive, bool &upset)
202{
203 const int n = static_cast<int>(values.size());
204 std::vector<mask_t> out;
205
206 auto add_exact_k = [&](int k) {
207 if (k < 0 || k > n) return;
208 combinations(0, k, mask_t(n), out);
209 };
210
211 switch (op)
212 {
214 if(m!=0) add_exact_k(m);
215 break;
216
218 ++m;
219 [[fallthrough]];
221 /* Skip the empty subset, mirroring the // Skip empty world rule
222 * in @c all_worlds and @c enumerate_exhaustive: a HAVING
223 * predicate on an empty group is undefined in SQL semantics
224 * (the group does not exist, so HAVING is not evaluated), so
225 * the @c count >= 0 / @c count > -K family must collapse to
226 * "group is non-empty" rather than to a universal tautology
227 * (probability 1 in every world). Equivalently, m is clamped
228 * to at least 1 here. */
229 if (m < 1) m = 1;
230 if(absorptive) {
231 upset=true;
232 add_exact_k(m);
233 } else
234 for (int k = m; k <= n; ++k) add_exact_k(k);
235 break;
236
238 --m;
239 [[fallthrough]];
241 for (int k = 1; k <= m; ++k) add_exact_k(k);
242 break;
243
245 for (int k = 1; k <= n; ++k)
246 {
247 if (k != m) add_exact_k(k);
248 }
249 break;
250 }
251
252 return out;
253}
254
255}
256
257/**
258 * @brief Apply a comparison operator to two values.
259 * @tparam I Type of the left operand.
260 * @tparam J Type of the right operand.
261 * @param a Left operand.
262 * @param op Comparison operator.
263 * @param b Right operand.
264 * @return Result of the comparison.
265 */
266template<typename I, typename J>
267static bool compare(I a, ComparisonOperator op, J b) {
268 switch (op) {
269 case ComparisonOperator::EQ: return a == b;
270 case ComparisonOperator::NE: return a != b;
271 case ComparisonOperator::GT: return a > b;
272 case ComparisonOperator::LT: return a < b;
273 case ComparisonOperator::GE: return a >= b;
274 case ComparisonOperator::LE: return a <= b;
275 }
276 return false;
277}
278
279/**
280 * @brief Evaluate whether the aggregation of @p values masked by @p mask satisfies @p op @p constant.
281 * @param values Input values to aggregate.
282 * @param mask Boolean mask selecting which values to include.
283 * @param constant Right-hand side constant of the comparison.
284 * @param op Comparison operator.
285 * @param aggregator Aggregator to apply to the selected values.
286 * @return @c true if the aggregate result satisfies the comparison.
287 */
288bool evaluate(const std::vector<long>& values,
289 const std::vector<bool>& mask,
290 int constant, ComparisonOperator op,
291 std::unique_ptr<Aggregator> aggregator)
292{
293 for (std::size_t i = 0; i < values.size(); ++i) {
294 if (mask[i]) aggregator->add(AggValue {values[i]});
295 }
296 auto res = aggregator->finalize();
297 switch(aggregator->resultType()) {
298 case ValueType::INT:
299 return compare(std::get<long>(res.v), op, constant);
301 return compare(std::get<bool>(res.v), op, constant);
302 case ValueType::FLOAT:
303 return compare(std::get<double>(res.v), op, constant);
304 default:
305 throw std::runtime_error("Cannot compare this kind of value");
306 }
307}
308
309/**
310 * @brief Enumerate all subsets satisfying a HAVING predicate by exhaustive search.
311 * @param values Input values.
312 * @param constant Constant for the comparison.
313 * @param op Comparison operator.
314 * @param agg_kind Aggregation function to apply.
315 * @param absorptive Whether the semiring is absorptive.
316 * @param upset Set to @c true if the result set forms an upset (monotone).
317 * @return Vector of satisfying subset masks.
318 */
319std::vector<mask_t> enumerate_exhaustive(
320 const std::vector<long> &values,
321 int constant,
323 AggregationOperator agg_kind,
324 bool absorptive,
325 bool &upset)
326{
327 const size_t n = values.size();
328
329 std::vector<mask_t> worlds;
330 mask_t mask(n);
331
332 bool all_worlds = true;
333
334 while(increment(mask)) { // Skipping empty world
335 auto aggregator = makeAggregator(agg_kind, ValueType::INT);
336
337 if(evaluate(values, mask, constant, op, std::move(aggregator)))
338 worlds.push_back(mask);
339 else
340 all_worlds=false;
341 }
342
343 if(all_worlds && absorptive)
344 {
345 worlds.clear();
346
347 // In that case, the result is equivalent to the upset generated by
348 // the single-tuple possible worlds
349 combinations(0, 1, mask_t(n), worlds);
350 upset=true;
351 }
352
353 return worlds;
354}
355
356std::vector<mask_t> enumerate_valid_worlds(
357 const std::vector<long> &values,
358 int constant,
360 AggregationOperator agg_kind,
361 bool absorptive,
362 bool &upset
363 )
364{
365 if (agg_kind == AggregationOperator::COUNT)
366 return count_enum(values,constant,op, absorptive, upset);
367
368 if(agg_kind == AggregationOperator::SUM)
369 try {
370 return sum_dp(values, constant, op, absorptive, upset);
371 } catch(DPException &e) {
372 // We will use the default implementation of the enumeration
373 }
374
375 return enumerate_exhaustive(values, constant, op, agg_kind, absorptive, upset);
376}
std::unique_ptr< Aggregator > makeAggregator(AggregationOperator op, ValueType t)
Create a concrete Aggregator for the given operator and value type.
AggregationOperator
SQL aggregation functions tracked by ProvSQL.
Definition Aggregation.h:50
@ COUNT
COUNT(*) or COUNT(expr) → integer.
Definition Aggregation.h:51
@ SUM
SUM → integer or float.
Definition Aggregation.h:52
ComparisonOperator
SQL comparison operators used in gate_cmp circuit gates.
Definition Aggregation.h:38
@ LT
Less than (<).
Definition Aggregation.h:42
@ GT
Greater than (>).
Definition Aggregation.h:44
@ LE
Less than or equal (<=).
Definition Aggregation.h:41
@ NE
Not equal (<>).
Definition Aggregation.h:40
@ GE
Greater than or equal (>=).
Definition Aggregation.h:43
@ INT
Signed 64-bit integer.
Definition Aggregation.h:67
@ BOOLEAN
Boolean.
Definition Aggregation.h:69
@ FLOAT
Double-precision float.
Definition Aggregation.h:68
A dynamically-typed aggregate value.
Definition Aggregation.h:85
bool evaluate(const std::vector< long > &values, const std::vector< bool > &mask, int constant, ComparisonOperator op, std::unique_ptr< Aggregator > aggregator)
Evaluate whether the aggregation of values masked by mask satisfies op constant.
Definition subset.cpp:288
std::vector< mask_t > enumerate_exhaustive(const std::vector< long > &values, int constant, ComparisonOperator op, AggregationOperator agg_kind, bool absorptive, bool &upset)
Enumerate all subsets satisfying a HAVING predicate by exhaustive search.
Definition subset.cpp:319
#define MIN(x, y)
Return the minimum of two values.
Definition subset.cpp:73
std::vector< mask_t > enumerate_valid_worlds(const std::vector< long > &values, int constant, ComparisonOperator op, AggregationOperator agg_kind, bool absorptive, bool &upset)
Enumerate all subsets of n tuples satisfying an aggregate predicate.
Definition subset.cpp:356
static bool compare(I a, ComparisonOperator op, J b)
Apply a comparison operator to two values.
Definition subset.cpp:267
Enumerate tuple subsets satisfying an aggregate HAVING predicate.
std::vector< bool > mask_t
A bitmask over tuples representing one possible world.
Definition subset.hpp:28