ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
Formula.h
Go to the documentation of this file.
1/**
2 * @file semiring/Formula.h
3 * @brief Symbolic representation of provenance as a human-readable formula.
4 *
5 * The @c Formula pseudo-semiring (@c std::string, @f$\oplus@f$, @f$\otimes@f$,
6 * "𝟘", "𝟙") produces a symbolic representation of provenance using
7 * Unicode semiring symbols. It is primarily used for debugging and
8 * testing.
9 *
10 * Each gate evaluates to a string:
11 * - @c zero() → "𝟘"
12 * - @c one() → "𝟙"
13 * - @c plus() → "(a ⊕ b ⊕ 
)" or just "a" for singletons
14 * - @c times() → "(a ⊗ b ⊗ 
)" or just "a" for singletons
15 * - @c monus() → "(a ⊖ b)"
16 * - @c delta() → "ή(a)" or "ήa" if @c a starts with @c (
17 * - @c cmp() → "[s1 op s2]"
18 * - @c semimod()→ "x*s"
19 * - @c agg() → operator-specific notation (e.g., "min(a,b)")
20 * - @c value() → the literal string itself
21 */
22#ifndef FORMULA_H
23#define FORMULA_H
24
25#include <numeric>
26#include <vector>
27#include <string>
28#include <sstream>
29#include <iterator>
30
31#include "Semiring.h"
32
33/**
34 * @brief Concatenate elements of a range with a delimiter.
35 *
36 * Used internally by @c Formula::plus(), @c Formula::times(), and
37 * @c Formula::agg() to build operator-separated strings.
38 *
39 * @tparam Range Any range type with a @c value_type typedef.
40 * @tparam Value Element type (defaults to @c Range::value_type).
41 * @param elements The range to join.
42 * @param delimiter String to insert between adjacent elements.
43 * @return All elements concatenated with @p delimiter between them.
44 */
45template <typename Range, typename Value = typename Range::value_type>
46static std::string join(Range const& elements, const char *const delimiter) {
47 std::ostringstream os;
48 auto b = begin(elements), e = end(elements);
49
50 if (b != e) {
51 std::copy(b, prev(e), std::ostream_iterator<Value>(os, delimiter));
52 b = prev(e);
53 }
54 if (b != e) {
55 os << *b;
56 }
57
58 return os.str();
59}
60
61/**
62 * @brief If @p s is wrapped in a single matched outer paren pair AND its
63 * top-level operator (depth 1, inside that pair) is @p op, return
64 * the inner content; otherwise return @p s unchanged.
65 *
66 * Used by @c Formula::plus() and @c Formula::times() to flatten same-op
67 * nested gates by associativity: a child @c "(a ⊕ b)" feeding into a
68 * parent @c plus is unwrapped to @c "a ⊕ b" so the join produces
69 * @c "a ⊕ b ⊕ c" instead of @c "(a ⊕ b) ⊕ c". A different top-level op
70 * (e.g., a @c times child) keeps its parens.
71 */
72static std::string strip_wrap_if_op(const std::string &s, const std::string &op) {
73 if(s.size() < 2 || s.front() != '(' || s.back() != ')')
74 return s;
75 // Verify the leading '(' closes only at the very end : if any earlier
76 // ')' brings depth back to 0, the outer pair isn't a single matched
77 // wrap (e.g., @c "(a) ⊕ (b)" must not be stripped).
78 int depth = 0;
79 for(size_t i = 0; i < s.size() - 1; ++i) {
80 if(s[i] == '(') ++depth;
81 else if(s[i] == ')') {
82 if(--depth == 0)
83 return s;
84 }
85 }
86 // Scan inner for a depth-0 occurrence of @p op. UTF-8 operators are
87 // multi-byte but @c compare on raw bytes is correct since we never
88 // straddle a UTF-8 char boundary at depth-0 positions outside parens.
89 const std::string inner = s.substr(1, s.size() - 2);
90 depth = 0;
91 for(size_t i = 0; i + op.size() <= inner.size(); ) {
92 if(inner[i] == '(') { ++depth; ++i; }
93 else if(inner[i] == ')') { --depth; ++i; }
94 else if(depth == 0 && inner.compare(i, op.size(), op) == 0)
95 return inner;
96 else
97 ++i;
98 }
99 return s;
100}
101
102namespace semiring {
103/**
104 * @brief Symbolic provenance representation over @c std::string.
105 *
106 * Evaluates circuits to human-readable Unicode formulas.
107 * Supports all optional operations (@c cmp, @c semimod, @c agg,
108 * @c value) in addition to the mandatory ones.
109 */
110class Formula : public semiring::Semiring<std::string>
111{
112public:
113virtual value_type zero() const override {
114 return "𝟘";
115}
116virtual value_type one() const override {
117 return "𝟙";
118}
119virtual value_type plus(const std::vector<value_type> &v) const override {
120 if(v.size()==0)
121 return zero();
122 else if(v.size()==1)
123 return v[0];
124 // Flatten same-op nesting by associativity: a child "(a ⊕ b)" is
125 // inlined as "a ⊕ b" so the join produces "a ⊕ b ⊕ c", not
126 // "(a ⊕ b) ⊕ c". Mixed-op children (e.g., a times subexpression)
127 // keep their parens.
128 std::vector<value_type> flat;
129 flat.reserve(v.size());
130 for(const auto &x : v)
131 flat.push_back(strip_wrap_if_op(x, "⊕"));
132 return "("+join(flat, " ⊕ ")+")";
133}
134virtual value_type times(const std::vector<value_type> &v) const override {
135 if(v.size()==0)
136 return one();
137 else if(v.size()==1)
138 return v[0];
139 std::vector<value_type> flat;
140 flat.reserve(v.size());
141 for(const auto &x : v)
142 flat.push_back(strip_wrap_if_op(x, "⊗"));
143 return "("+join(flat, " ⊗ ")+")";
144}
145virtual value_type monus(value_type x, value_type y) const override
146{
147 return "("+x+" ⊖ "+y+")";
148}
149virtual value_type delta(value_type x) const override
150{
151 if(x[0]=='(')
152 return "ÎŽ"+x;
153 else
154 return "ÎŽ("+x+")";
155}
156virtual value_type cmp(value_type s1, ComparisonOperator op, value_type s2) const override {
157 std::string result = "["+s1+" ";
158 switch(op) {
160 result+="=";
161 break;
163 result+="≠";
164 break;
166 result+="≀";
167 break;
169 result+="<";
170 break;
172 result+="≥";
173 break;
175 result+=">";
176 break;
177 }
178 return result+" "+s2+"]";
179}
180virtual value_type semimod(value_type x, value_type s) const override {
181 return x + "*" + s;
182}
183virtual value_type agg(AggregationOperator op, const std::vector<std::string> &s) override {
185 return "<>";
186
187 if(s.empty()) {
188 switch(op) {
191 return "0";
193 return "+∞";
195 return "-∞";
198 return "<>";
200 return "⊀";
202 return "⊥";
204 return "[]";
206 assert(false);
207 }
208 }
209
210 std::string result;
211 switch(op) {
213 result+="[";
214 break;
216 result+="min(";
217 break;
219 result+="max(";
220 break;
222 result+="avg(";
223 break;
225 result+="choose(";
226 break;
227 default:
228 ;
229 }
230
231 result += s[0];
232
233 for(size_t i = 1; i<s.size(); ++i) {
234 switch(op) {
237 result+="+";
238 break;
244 result+=",";
245 break;
247 result+="√";
248 break;
250 result+="∧";
251 break;
253 assert(false);
254 }
255 result+=s[i];
256 }
258 result+="]";
259 else if(op==AggregationOperator::MIN ||
263 result+=")";
264 return result;
265}
266virtual value_type value(const std::string &s) const override {
267 return s;
268}
269value_type parse_leaf(const char *v) const {
270 return std::string(v);
271}
272/**
273 * @brief Special case: @c Formula serialises the circuit structure as
274 * a string rather than computing a semantic value, so a
275 * safe-query-rewritten circuit renders to its (rewritten)
276 * structural formula and remains a faithful description. The
277 * homomorphism question does not arise.
278 */
279virtual bool compatibleWithBooleanRewrite() const override {
280 return true;
281}
282/**
283 * @brief Serialise a Formula evaluation as text.
284 *
285 * Drops the cosmetic outer paren pair that @c plus / @c times / @c monus
286 * always produce: at the root there is no enclosing context, so the
287 * outer parens carry no disambiguation value.
288 */
289std::string to_text(const value_type &s) const {
290 if(s.size() < 2 || s.front() != '(' || s.back() != ')')
291 return s;
292 int depth = 0;
293 for(size_t i = 0; i < s.size() - 1; ++i) {
294 if(s[i] == '(') ++depth;
295 else if(s[i] == ')') {
296 if(--depth == 0)
297 return s;
298 }
299 }
300 return s.substr(1, s.size() - 2);
301}
302};
303}
304
305#endif /* FORMULA_H */
AggregationOperator
SQL aggregation functions tracked by ProvSQL.
Definition Aggregation.h:50
@ OR
Boolean OR aggregate.
Definition Aggregation.h:57
@ MAX
MAX → input type.
Definition Aggregation.h:54
@ COUNT
COUNT(*) or COUNT(expr) → integer.
Definition Aggregation.h:51
@ AND
Boolean AND aggregate.
Definition Aggregation.h:56
@ SUM
SUM → integer or float.
Definition Aggregation.h:52
@ ARRAY_AGG
Array aggregation.
Definition Aggregation.h:59
@ NONE
No aggregation (returns NULL).
Definition Aggregation.h:60
@ MIN
MIN → input type.
Definition Aggregation.h:53
@ CHOOSE
Arbitrary selection (pick one element).
Definition Aggregation.h:58
@ AVG
AVG → float.
Definition Aggregation.h:55
ComparisonOperator
SQL comparison operators used in gate_cmp circuit gates.
Definition Aggregation.h:38
@ LT
Less than (<).
Definition Aggregation.h:42
@ GT
Greater than (>).
Definition Aggregation.h:44
@ LE
Less than or equal (<=).
Definition Aggregation.h:41
@ NE
Not equal (<>).
Definition Aggregation.h:40
@ GE
Greater than or equal (>=).
Definition Aggregation.h:43
static std::string join(Range const &elements, const char *const delimiter)
Concatenate elements of a range with a delimiter.
Definition Formula.h:46
static std::string strip_wrap_if_op(const std::string &s, const std::string &op)
If s is wrapped in a single matched outer paren pair AND its top-level operator (depth 1,...
Definition Formula.h:72
Abstract semiring interface for provenance evaluation.
Symbolic provenance representation over std::string.
Definition Formula.h:111
virtual value_type zero() const override
Return the additive identity .
Definition Formula.h:113
virtual bool compatibleWithBooleanRewrite() const override
Special case: Formula serialises the circuit structure as a string rather than computing a semantic v...
Definition Formula.h:279
virtual value_type cmp(value_type s1, ComparisonOperator op, value_type s2) const override
Evaluate a comparison gate.
Definition Formula.h:156
virtual value_type semimod(value_type x, value_type s) const override
Apply a semimodule scalar multiplication.
Definition Formula.h:180
virtual value_type agg(AggregationOperator op, const std::vector< std::string > &s) override
Evaluate an aggregation gate.
Definition Formula.h:183
virtual value_type times(const std::vector< value_type > &v) const override
Apply the multiplicative operation to a list of values.
Definition Formula.h:134
value_type parse_leaf(const char *v) const
Definition Formula.h:269
std::string to_text(const value_type &s) const
Serialise a Formula evaluation as text.
Definition Formula.h:289
virtual value_type monus(value_type x, value_type y) const override
Apply the monus (m-semiring difference) operation.
Definition Formula.h:145
virtual value_type one() const override
Return the multiplicative identity .
Definition Formula.h:116
virtual value_type delta(value_type x) const override
Apply the operator.
Definition Formula.h:149
virtual value_type value(const std::string &s) const override
Interpret a literal string as a semiring value.
Definition Formula.h:266
virtual value_type plus(const std::vector< value_type > &v) const override
Apply the additive operation to a list of values.
Definition Formula.h:119
Abstract base class for (m-)semirings.
Definition Semiring.h:93