ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
subset.cpp
Go to the documentation of this file.
1/**
2 * @file subset.cpp
3 * @brief Valid-world enumeration for aggregate HAVING predicates.
4 *
5 * Implements @c enumerate_valid_worlds() declared in @c subset.hpp.
6 *
7 * For a list of @f$n@f$ tuples with individual values, the function
8 * iterates over all @f$2^n@f$ possible worlds (bitmasks), computes the
9 * aggregate of the present tuples' values using the requested
10 * @c AggregationOperator, and tests the comparison predicate. All
11 * valid worlds are collected and returned.
12 *
13 * The @c upset output flag is set to @c true when the set of valid
14 * worlds is upward-closed (every superset of a valid world is also
15 * valid), which is the case for monotone aggregation predicates (e.g.
16 * SUM ≥ k). This information is used to optimise the evaluation of
17 * monotone HAVING clauses.
18 *
19 * Internal helpers in an anonymous namespace:
20 * - @c increment(): advance a bitmask to the next possible world.
21 * - @c compute_agg(): compute the aggregate value for one bitmask.
22 */
23#include "subset.hpp"
24#include <algorithm>
25#include <cstddef>
26#include <cstdint>
27#include <limits>
28#include <vector>
29#include <stdexcept>
30#include <cassert>
31
32namespace {
33static bool increment(mask_t &v)
34{
35 for(size_t i=0; i<v.size(); ++i)
36 {
37 v[i]=!v[i];
38 if(v[i])
39 return true;
40 }
41 return false;
42}
43
44static std::vector<mask_t> all_worlds(const std::vector<long> &values)
45{
46 std::vector<mask_t> worlds;
47 mask_t mask(values.size());
48 // Skip empty world
49 while(increment(mask))
50 worlds.push_back(mask);
51 return worlds;
52}
53
54static void append_range(std::vector<mask_t> &out,
55 const std::vector<std::vector<mask_t> > &dp,
56 int lo,
57 int hi)
58{
59 if (dp.empty()) return;
60 const int J = static_cast<int>(dp.size())-1;
61 lo = std::max(lo, 0);
62 hi = std::min(hi, J);
63 if (lo>hi) return;
64
65 for (int j = lo; j <= hi; ++j) {
66 out.insert(out.end(), dp[j].begin(), dp[j].end());
67 }
68}
69
70class DPException : public std::exception {};
71
72/** @brief Return the minimum of two values. */
73#define MIN(x,y) ((x)<(y)?(x):(y))
74
75static std::vector<mask_t> sum_dp(const std::vector<long> &values, int C, ComparisonOperator op, bool absorptive, bool &upset)
76{
77 const std::size_t n = values.size();
78
79 std::vector<mask_t> R;
80
81 // We first deal with NEQ by combining LT and GT
82 if(op == ComparisonOperator::NE) {
83 std::vector<mask_t> lt= sum_dp(values, C, ComparisonOperator::LT, absorptive, upset);
84 std::vector<mask_t> gt= sum_dp(values, C, ComparisonOperator::GT, absorptive, upset);
85 R.reserve(lt.size()+gt.size());
86 R.insert(R.end(),lt.begin(),lt.end());
87 R.insert(R.end(),gt.begin(),gt.end());
88 return R;
89 }
90
91 long long T=0;
92 for (int w: values) {
93 if (w < 0)
94 throw DPException();
95 T+=w;
96 }
97
98 //no valid worlds case
99 if (op == ComparisonOperator::GT && C>=T) return {};
100 if (op == ComparisonOperator::GE && C>T) return {};
101 if (op == ComparisonOperator::LT && C<=0) return {};
102 if (op == ComparisonOperator::LE && C<0) return {};
103 if (op == ComparisonOperator::EQ && (C>T || C<0)) return {};
104
105 //tautology cases
106 if (op == ComparisonOperator::GT && C<0) return all_worlds(values);
107 if (op == ComparisonOperator::GE && C<=0) return all_worlds(values);
108 if (op == ComparisonOperator::LT && C>T) return all_worlds(values);
109 if (op == ComparisonOperator::LE && C>=T) return all_worlds(values);
110
111 long long J=0;
113 J=T;
114 else if (op==ComparisonOperator::LT)
115 J=MIN(C-1,T);
116 else
117 J=MIN(C,T);
118
119 assert(J>=0);
120
121 std::vector<std::vector<mask_t> > dp(static_cast<std::size_t>(J) + 1);
122 dp[0].push_back(mask_t(n)); // dp[0] <- {emptyset}
123
124 int pref_sum=0;
125
126 for (std::size_t i=0; i<n; ++i)
127 {
128 const int w=values[i];
129 pref_sum+=w;
130 const int j_max=MIN(J,pref_sum);
131
132 for (int j = j_max; j >= w; --j) {
133 const int p = j - w;
134 if(absorptive && ((op==ComparisonOperator::GT && p>C) ||
135 (op==ComparisonOperator::GE && p>=C))) {
136 upset=true;
137 continue;
138 }
139 size_t s=dp[p].size();
140 for(size_t k=0; k<s; ++k) {
141 mask_t m = dp[p][k];
142 m[i] = true;
143 dp[j].push_back(m);
144 }
145 }
146 }
147
148 switch(op){
150 append_range(R,dp,C,C);
151 break;
152
154 append_range(R,dp,C+1,J);
155 break;
156
157
159 append_range(R,dp,1,C-1);
160 break;
161
163 append_range(R,dp,C,J);
164 break;
165
167 append_range(R,dp,1,C);
168 break;
169
170 case ComparisonOperator::NE: // case already processed
171 assert(false);
172 }
173
174 return R;
175}
176
177//generate k-subsets form an n-set
178static void combinations(std::size_t start,
179 int k_left,
180 mask_t mask,
181 std::vector<mask_t> &out)
182{
183 const size_t n = mask.size();
184
185 if (k_left == 0) {
186 out.push_back(mask);
187 return;
188 }
189
190 if (start >= n) return;
191
192 const std::size_t remaining = n - start;
193 if (remaining < static_cast<std::size_t>(k_left)) return;
194
195 combinations(start + 1, k_left, mask, out);
196
197 mask[start]=true;
198 combinations(start + 1, k_left - 1, mask, out);
199}
200
201static std::vector<mask_t> count_enum(const std::vector<long> &values, int m, ComparisonOperator op, bool absorptive, bool &upset)
202{
203 const int n = static_cast<int>(values.size());
204 std::vector<mask_t> out;
205
206 auto add_exact_k = [&](int k) {
207 if (k < 0 || k > n) return;
208 combinations(0, k, mask_t(n), out);
209 };
210
211 switch (op)
212 {
214 if(m!=0) add_exact_k(m);
215 break;
216
218 ++m;
219 [[fallthrough]];
221 if(absorptive) {
222 upset=true;
223 add_exact_k(m);
224 } else
225 for (int k = m; k <= n; ++k) add_exact_k(k);
226 break;
227
229 --m;
230 [[fallthrough]];
232 for (int k = 1; k <= m; ++k) add_exact_k(k);
233 break;
234
236 for (int k = 1; k <= n; ++k)
237 {
238 if (k != m) add_exact_k(k);
239 }
240 break;
241 }
242
243 return out;
244}
245
246}
247
248/**
249 * @brief Apply a comparison operator to two values.
250 * @tparam I Type of the left operand.
251 * @tparam J Type of the right operand.
252 * @param a Left operand.
253 * @param op Comparison operator.
254 * @param b Right operand.
255 * @return Result of the comparison.
256 */
257template<typename I, typename J>
258static bool compare(I a, ComparisonOperator op, J b) {
259 switch (op) {
260 case ComparisonOperator::EQ: return a == b;
261 case ComparisonOperator::NE: return a != b;
262 case ComparisonOperator::GT: return a > b;
263 case ComparisonOperator::LT: return a < b;
264 case ComparisonOperator::GE: return a >= b;
265 case ComparisonOperator::LE: return a <= b;
266 }
267 return false;
268}
269
270/**
271 * @brief Evaluate whether the aggregation of @p values masked by @p mask satisfies @p op @p constant.
272 * @param values Input values to aggregate.
273 * @param mask Boolean mask selecting which values to include.
274 * @param constant Right-hand side constant of the comparison.
275 * @param op Comparison operator.
276 * @param aggregator Aggregator to apply to the selected values.
277 * @return @c true if the aggregate result satisfies the comparison.
278 */
279bool evaluate(const std::vector<long>& values,
280 const std::vector<bool>& mask,
281 int constant, ComparisonOperator op,
282 std::unique_ptr<Aggregator> aggregator)
283{
284 for (std::size_t i = 0; i < values.size(); ++i) {
285 if (mask[i]) aggregator->add(AggValue {values[i]});
286 }
287 auto res = aggregator->finalize();
288 switch(aggregator->resultType()) {
289 case ValueType::INT:
290 return compare(std::get<long>(res.v), op, constant);
292 return compare(std::get<bool>(res.v), op, constant);
293 case ValueType::FLOAT:
294 return compare(std::get<double>(res.v), op, constant);
295 default:
296 throw std::runtime_error("Cannot compare this kind of value");
297 }
298}
299
300/**
301 * @brief Enumerate all subsets satisfying a HAVING predicate by exhaustive search.
302 * @param values Input values.
303 * @param constant Constant for the comparison.
304 * @param op Comparison operator.
305 * @param agg_kind Aggregation function to apply.
306 * @param absorptive Whether the semiring is absorptive.
307 * @param upset Set to @c true if the result set forms an upset (monotone).
308 * @return Vector of satisfying subset masks.
309 */
310std::vector<mask_t> enumerate_exhaustive(
311 const std::vector<long> &values,
312 int constant,
314 AggregationOperator agg_kind,
315 bool absorptive,
316 bool &upset)
317{
318 const size_t n = values.size();
319
320 std::vector<mask_t> worlds;
321 mask_t mask(n);
322
323 bool all_worlds = true;
324
325 while(increment(mask)) { // Skipping empty world
326 auto aggregator = makeAggregator(agg_kind, ValueType::INT);
327
328 if(evaluate(values, mask, constant, op, std::move(aggregator)))
329 worlds.push_back(mask);
330 else
331 all_worlds=false;
332 }
333
334 if(all_worlds && absorptive)
335 {
336 worlds.clear();
337
338 // In that case, the result is equivalent to the upset generated by
339 // the single-tuple possible worlds
340 combinations(0, 1, mask_t(n), worlds);
341 upset=true;
342 }
343
344 return worlds;
345}
346
347std::vector<mask_t> enumerate_valid_worlds(
348 const std::vector<long> &values,
349 int constant,
351 AggregationOperator agg_kind,
352 bool absorptive,
353 bool &upset
354 )
355{
356 if (agg_kind == AggregationOperator::COUNT)
357 return count_enum(values,constant,op, absorptive, upset);
358
359 if(agg_kind == AggregationOperator::SUM)
360 try {
361 return sum_dp(values, constant, op, absorptive, upset);
362 } catch(DPException &e) {
363 // We will use the default implementation of the enumeration
364 }
365
366 return enumerate_exhaustive(values, constant, op, agg_kind, absorptive, upset);
367}
std::unique_ptr< Aggregator > makeAggregator(AggregationOperator op, ValueType t)
Create a concrete Aggregator for the given operator and value type.
AggregationOperator
SQL aggregation functions tracked by ProvSQL.
Definition Aggregation.h:50
@ COUNT
COUNT(*) or COUNT(expr) → integer.
@ SUM
SUM → integer or float.
ComparisonOperator
SQL comparison operators used in gate_cmp circuit gates.
Definition Aggregation.h:38
@ LT
Less than (<)
@ GT
Greater than (>)
@ LE
Less than or equal (<=)
@ NE
Not equal (<>)
@ GE
Greater than or equal (>=)
@ INT
Signed 64-bit integer.
@ BOOLEAN
Boolean.
@ FLOAT
Double-precision float.
A dynamically-typed aggregate value.
Definition Aggregation.h:85
bool evaluate(const std::vector< long > &values, const std::vector< bool > &mask, int constant, ComparisonOperator op, std::unique_ptr< Aggregator > aggregator)
Evaluate whether the aggregation of values masked by mask satisfies op constant.
Definition subset.cpp:279
std::vector< mask_t > enumerate_exhaustive(const std::vector< long > &values, int constant, ComparisonOperator op, AggregationOperator agg_kind, bool absorptive, bool &upset)
Enumerate all subsets satisfying a HAVING predicate by exhaustive search.
Definition subset.cpp:310
#define MIN(x, y)
Return the minimum of two values.
Definition subset.cpp:73
std::vector< mask_t > enumerate_valid_worlds(const std::vector< long > &values, int constant, ComparisonOperator op, AggregationOperator agg_kind, bool absorptive, bool &upset)
Enumerate all subsets of n tuples satisfying an aggregate predicate.
Definition subset.cpp:347
static bool compare(I a, ComparisonOperator op, J b)
Apply a comparison operator to two values.
Definition subset.cpp:258
Enumerate tuple subsets satisfying an aggregate HAVING predicate.
std::vector< bool > mask_t
A bitmask over tuples representing one possible world.
Definition subset.hpp:28