ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
Aggregation.h
Go to the documentation of this file.
1/**
2 * @file Aggregation.h
3 * @brief Typed aggregation value, operator, and aggregator abstractions.
4 *
5 * This header provides the type system used by ProvSQL's aggregate
6 * provenance evaluation:
7 *
8 * - @c ComparisonOperator: the six standard SQL comparison operators,
9 * used by @c gate_cmp gates in the circuit.
10 * - @c AggregationOperator: the SQL aggregation functions that ProvSQL
11 * tracks provenance for (COUNT, SUM, MIN, MAX, AVG, AND, OR, …).
12 * - @c ValueType: the runtime type tag for aggregate values.
13 * - @c AggValue: a tagged union holding one aggregate value of any
14 * supported type, built on @c std::variant.
15 * - @c Aggregator: an abstract interface for stateful incremental
16 * accumulators, one per aggregation function/type combination.
17 *
18 * The free functions @c getAggregationOperator() and @c makeAggregator()
19 * map PostgreSQL OIDs and operator/type pairs to the corresponding C++
20 * objects.
21 */
22#ifndef AGGREGATION_H
23#define AGGREGATION_H
24
25extern "C" {
26#include "postgres.h"
27}
28
29#include <variant>
30#include <string>
31#include <vector>
32#include <cassert>
33#include <memory>
34
35/**
36 * @brief SQL comparison operators used in @c gate_cmp circuit gates.
37 */
39 EQ, ///< Equal (=)
40 NE, ///< Not equal (<>)
41 LE, ///< Less than or equal (<=)
42 LT, ///< Less than (<)
43 GE, ///< Greater than or equal (>=)
44 GT ///< Greater than (>)
45};
46
47/**
48 * @brief SQL aggregation functions tracked by ProvSQL.
49 */
51 COUNT, ///< COUNT(*) or COUNT(expr) → integer
52 SUM, ///< SUM → integer or float
53 MIN, ///< MIN → input type
54 MAX, ///< MAX → input type
55 AVG, ///< AVG → float
56 AND, ///< Boolean AND aggregate
57 OR, ///< Boolean OR aggregate
58 CHOOSE, ///< Arbitrary selection (pick one element)
59 ARRAY_AGG, ///< Array aggregation
60 NONE, ///< No aggregation (returns NULL)
61};
62
63/**
64 * @brief Runtime type tag for aggregate values.
65 */
66enum class ValueType {
67 INT, ///< Signed 64-bit integer
68 FLOAT, ///< Double-precision float
69 BOOLEAN, ///< Boolean
70 STRING, ///< Text string
71 ARRAY_INT, ///< Array of integers
72 ARRAY_FLOAT, ///< Array of floats
73 ARRAY_BOOLEAN,///< Array of booleans
74 ARRAY_STRING, ///< Array of strings
75 NONE ///< No value (NULL)
76};
77
78/**
79 * @brief A dynamically-typed aggregate value.
80 *
81 * Wraps a @c std::variant of all supported scalar and array types.
82 * The active alternative is identified by the @c ValueType tag returned
83 * by @c getType().
84 */
85struct AggValue {
86private:
87 ValueType t; ///< Active type tag
88
89public:
90 /** @brief The variant holding the actual value. */
91 std::variant<long, double, bool, std::string,
92 std::vector<long>, std::vector<double>, std::vector<bool>, std::vector<std::string> > v;
93
94 /** @brief Construct a NULL (NONE) value. */
96 }
97 /** @brief Construct an integer value. @param l Integer value. */
98 AggValue(long l) : t(ValueType::INT), v(l) {
99 }
100 /** @brief Construct a float value. @param d Float value. */
101 AggValue(double d) : t(ValueType::FLOAT), v(d) {
102 }
103 /** @brief Construct a boolean value. @param b Boolean value. */
104 AggValue(bool b) : t(ValueType::BOOLEAN), v(b) {
105 }
106 /** @brief Construct a string value. @param s String value. */
107 AggValue(std::string s) : t(ValueType::STRING), v(s) {
108 }
109 /** @brief Construct an integer-array value. @param vec Integer array. */
110 AggValue(std::vector<long> vec) : t(ValueType::ARRAY_INT), v(vec) {
111 }
112 /** @brief Construct a float-array value. @param vec Float array. */
113 AggValue(std::vector<double> vec) : t(ValueType::ARRAY_FLOAT), v(vec) {
114 }
115 /** @brief Construct a boolean-array value. @param vec Boolean array. */
116 AggValue(std::vector<bool> vec) : t(ValueType::ARRAY_BOOLEAN), v(vec) {
117 }
118 /** @brief Construct a string-array value. @param vec String array. */
119 AggValue(std::vector<std::string> vec) : t(ValueType::ARRAY_STRING), v(vec) {
120 }
121
122 /**
123 * @brief Return the runtime type tag of this value.
124 * @return The @c ValueType identifying the active alternative.
125 */
127 return t;
128 }
129};
130
131/**
132 * @brief Abstract interface for an incremental aggregate accumulator.
133 *
134 * Each concrete subclass implements one aggregation function for one
135 * input type (e.g., SUM over integers, MAX over floats). Instances are
136 * created by @c makeAggregator().
137 */
139 virtual ~Aggregator() = default;
140
141 /**
142 * @brief Incorporate one input value into the running aggregate.
143 * @param x Input value to add.
144 */
145 virtual void add(const AggValue& x) = 0;
146
147 /**
148 * @brief Return the final aggregate result.
149 * @return The accumulated aggregate as an @c AggValue.
150 */
151 virtual AggValue finalize() const = 0;
152
153 /**
154 * @brief Return the aggregation operator this accumulator implements.
155 * @return The @c AggregationOperator enum value for this accumulator.
156 */
157 virtual AggregationOperator op() const = 0;
158
159 /**
160 * @brief Return the type of the input values accepted by @c add().
161 * @return The @c ValueType of values passed to @c add().
162 */
163 virtual ValueType inputType() const = 0;
164
165 /**
166 * @brief Return the type of the value returned by @c finalize().
167 *
168 * Defaults to @c inputType(); override when the result type differs
169 * (e.g., AVG returns FLOAT regardless of the input type).
170 * @return The @c ValueType of the value returned by @c finalize().
171 */
172 virtual ValueType resultType() const {
173 return inputType();
174 }
175};
176
177/**
178 * @brief Map a PostgreSQL aggregate function OID to an @c AggregationOperator.
179 *
180 * @param oid OID of the aggregate function (e.g. @c F_COUNT_ANY, @c F_SUM_INT4).
181 * @return The corresponding @c AggregationOperator.
182 */
184
185/**
186 * @brief Create a concrete @c Aggregator for the given operator and value type.
187 *
188 * @param op The aggregation function to implement.
189 * @param t The type of input values that will be accumulated.
190 * @return A heap-allocated @c Aggregator, or @c nullptr if the combination
191 * is not supported.
192 */
193std::unique_ptr<Aggregator> makeAggregator(AggregationOperator op, ValueType t);
194
195#endif /* AGGREGATION_H */
AggregationOperator
SQL aggregation functions tracked by ProvSQL.
Definition Aggregation.h:50
@ OR
Boolean OR aggregate.
@ MAX
MAX → input type.
@ COUNT
COUNT(*) or COUNT(expr) → integer.
@ AND
Boolean AND aggregate.
@ SUM
SUM → integer or float.
@ ARRAY_AGG
Array aggregation.
@ NONE
No aggregation (returns NULL)
@ MIN
MIN → input type.
@ CHOOSE
Arbitrary selection (pick one element)
@ AVG
AVG → float.
ComparisonOperator
SQL comparison operators used in gate_cmp circuit gates.
Definition Aggregation.h:38
@ LT
Less than (<)
@ GT
Greater than (>)
@ LE
Less than or equal (<=)
@ NE
Not equal (<>)
@ GE
Greater than or equal (>=)
AggregationOperator getAggregationOperator(Oid oid)
Map a PostgreSQL aggregate function OID to an AggregationOperator.
std::unique_ptr< Aggregator > makeAggregator(AggregationOperator op, ValueType t)
Create a concrete Aggregator for the given operator and value type.
ValueType
Runtime type tag for aggregate values.
Definition Aggregation.h:66
@ ARRAY_INT
Array of integers.
@ ARRAY_BOOLEAN
Array of booleans.
@ INT
Signed 64-bit integer.
@ STRING
Text string.
@ ARRAY_FLOAT
Array of floats.
@ ARRAY_STRING
Array of strings.
@ BOOLEAN
Boolean.
@ FLOAT
Double-precision float.
A dynamically-typed aggregate value.
Definition Aggregation.h:85
AggValue(std::vector< bool > vec)
Construct a boolean-array value.
ValueType getType() const
Return the runtime type tag of this value.
AggValue(std::vector< long > vec)
Construct an integer-array value.
AggValue()
Construct a NULL (NONE) value.
Definition Aggregation.h:95
AggValue(long l)
Construct an integer value.
Definition Aggregation.h:98
ValueType t
Active type tag.
Definition Aggregation.h:87
AggValue(std::vector< std::string > vec)
Construct a string-array value.
AggValue(double d)
Construct a float value.
AggValue(std::string s)
Construct a string value.
AggValue(bool b)
Construct a boolean value.
std::variant< long, double, bool, std::string, std::vector< long >, std::vector< double >, std::vector< bool >, std::vector< std::string > > v
The variant holding the actual value.
Definition Aggregation.h:92
AggValue(std::vector< double > vec)
Construct a float-array value.
Abstract interface for an incremental aggregate accumulator.
virtual AggregationOperator op() const =0
Return the aggregation operator this accumulator implements.
virtual void add(const AggValue &x)=0
Incorporate one input value into the running aggregate.
virtual AggValue finalize() const =0
Return the final aggregate result.
virtual ValueType inputType() const =0
Return the type of the input values accepted by add().
virtual ~Aggregator()=default
virtual ValueType resultType() const
Return the type of the value returned by finalize().