ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
Why.h
Go to the documentation of this file.
1/**
2 * @file semiring/Why.h
3 * @brief Why-provenance semiring (set of witness sets).
4 *
5 * The **why-provenance** semiring represents provenance as a set of
6 * *witness sets*. Each witness set is a set of base-tuple labels that
7 * together "witness" one derivation of the query result. The full
8 * provenance is the collection of all such witness sets.
9 *
10 * Formally, the carrier type is @f$\mathcal{P}(\mathcal{P}(\text{Labels}))@f$,
11 * implemented as @c why_provenance_t = @c std::set<std::set<std::string>>.
12 *
13 * Operations:
14 * - @c zero() → ∅ (no derivations)
15 * - @c one() → {∅} (one derivation requiring no witnesses)
16 * - @c plus() → union of input sets
17 * - @c times() → pairwise concatenation (Cartesian product of witnesses)
18 * - @c monus() → remove elements of @f$y@f$ from @f$x@f$
19 * - @c delta() → identity (returns @f$x@f$ unchanged if non-empty)
20 *
21 * This semiring is idempotent (set union is idempotent: @f$a \oplus a = a@f$),
22 * but **not** absorptive in the @f$\mathbb{1} \oplus a = \mathbb{1}@f$ sense
23 * used by @c absorptive(): @f${\{\emptyset\}} \cup \{\{x\}\} = \{\emptyset, \{x\}\} \neq \{\emptyset\}@f$.
24 *
25 * @see https://provsql.org/lean-docs/Provenance/Semirings/Why.html
26 * Lean 4 verified instance: @c instSemiringWithMonusWhy, with
27 * proofs of @c Why.idempotent, @c Why.not_absorptive, and
28 * @c Why.not_mul_sub_left_distributive.
29 */
30#ifndef WHY_H
31#define WHY_H
32
33#include <cstring>
34#include <set>
35#include <sstream>
36#include <string>
37#include <vector>
38#include <algorithm>
39
40#include "Semiring.h"
41
42namespace semiring {
43
44/** @brief A single label identifying a base tuple. */
45using label_t = std::string;
46/** @brief A witness: a set of labels that collectively justify one derivation. */
47using label_set = std::set<label_t>;
48/** @brief Why-provenance value: the full set of all witnesses. */
49using why_provenance_t = std::set<label_set>;
50
51/**
52 * @brief Why-provenance semiring.
53 *
54 * Each gate evaluates to a @c why_provenance_t (set of witness sets).
55 */
56class Why : public Semiring<why_provenance_t> {
57public:
58// Additive identity
59value_type zero() const override {
60 return {};
61}
62
63// Multiplicative identity: empty set (⊗(x,{{}}) means "don't change")
64value_type one() const override {
65 return { {} };
66}
67
68// Union of all input sets
69value_type plus(const std::vector<value_type> &vec) const override {
70 value_type result;
71 for (const auto &v : vec) {
72 result.insert(v.begin(), v.end());
73 }
74 return result;
75}
76
77// Cartesian product: union each inner set with each other
78value_type times(const std::vector<value_type> &vec) const override {
79 if (vec.empty()) return one();
80
81 value_type result = vec[0];
82 for (size_t i = 1; i < vec.size(); ++i) {
83 value_type temp;
84 for (const auto &s1 : result) {
85 for (const auto &s2 : vec[i]) {
86 label_set combined = s1;
87 combined.insert(s2.begin(), s2.end());
88 temp.insert(std::move(combined));
89 }
90 }
91 result = std::move(temp);
92 }
93 return result;
94}
95
96
97virtual value_type monus(value_type x, value_type y) const override {
98 for (auto const &s : y) {
99 x.erase(s);
100 }
101 return x;
102}
103
104value_type delta(value_type x) const override {
105 return x.empty() ? zero() : x;
106}
107
108/**
109 * @brief No semiring homomorphism @c BoolFunc(Y) →+* Why exists, so
110 * the safe-query Boolean rewrite is unsound under
111 * why-provenance. (Why tracks set-of-witness-sets per
112 * derivation; the read-once rewrite collapses witnesses in
113 * general.) Inherits the @c false default from @c Semiring;
114 * this override exists for documentation.
115 *
116 * Lean: @c Provenance.Semirings.Why.no_hom_from_BoolFunc
117 * (provenance-lean/Provenance/Semirings/Why.lean).
118 */
119virtual bool compatibleWithBooleanRewrite() const override {
120 return false;
121}
122
123/**
124 * @brief Parse a leaf value into a why-provenance set-of-sets.
125 *
126 * Accepted input formats (round-trip with @c to_text):
127 * - Bare label (no leading @c '{'): @c "Alice" → @c {{Alice}}
128 * - Empty set-of-sets: @c "{}" → zero
129 * - Full form: @c "{{a},{b,c}}" → @c {{a},{b,c}}
130 * (in particular @c "{{}}" parses as @c one)
131 *
132 * Labels may contain any character except @c ',', @c '{', @c '}'.
133 */
134value_type parse_leaf(const char *v) const {
135 if(*v != '{') {
136 label_set single;
137 single.insert(std::string(v));
138 value_type result;
139 result.insert(std::move(single));
140 return result;
141 }
142
143 const char *p = v + 1;
144 value_type result;
145
146 if(*p == '}') {
147 if(*(p+1) != '\0')
148 throw SemiringException("Why: trailing junk after closing '}'");
149 return result;
150 }
151
152 while(*p) {
153 if(*p != '{')
154 throw SemiringException("Why: expected '{' starting a witness");
155 ++p;
156 label_set witness;
157 if(*p != '}') {
158 while(true) {
159 const char *start = p;
160 while(*p && *p != ',' && *p != '}' && *p != '{')
161 ++p;
162 if(p == start)
163 throw SemiringException("Why: empty label");
164 witness.insert(std::string(start, p - start));
165 if(*p == ',') { ++p; continue; }
166 break;
167 }
168 }
169 if(*p != '}')
170 throw SemiringException("Why: expected '}' ending a witness");
171 ++p;
172 result.insert(std::move(witness));
173
174 if(*p == '}') {
175 if(*(p+1) != '\0')
176 throw SemiringException("Why: trailing junk after closing '}'");
177 return result;
178 }
179 if(*p != ',')
180 throw SemiringException("Why: expected ',' or '}' between witnesses");
181 ++p;
182 }
183 throw SemiringException("Why: unterminated input");
184}
185
186std::string to_text(const value_type &prov) const {
187 std::ostringstream oss;
188 oss << "{";
189 bool firstOuter = true;
190 for (const auto &inner : prov) {
191 if (!firstOuter) oss << ",";
192 firstOuter = false;
193 oss << "{";
194 bool firstInner = true;
195 for (const auto &label : inner) {
196 if (!firstInner) oss << ",";
197 firstInner = false;
198 oss << label;
199 }
200 oss << "}";
201 }
202 oss << "}";
203 return oss.str();
204}
205
206};
207
208}
209
210#endif // WHY_H
Abstract semiring interface for provenance evaluation.
Exception thrown when a semiring operation is not supported.
Definition Semiring.h:51
Abstract base class for (m-)semirings.
Definition Semiring.h:93
Why-provenance semiring.
Definition Why.h:56
value_type one() const override
Return the multiplicative identity .
Definition Why.h:64
value_type times(const std::vector< value_type > &vec) const override
Apply the multiplicative operation to a list of values.
Definition Why.h:78
value_type plus(const std::vector< value_type > &vec) const override
Apply the additive operation to a list of values.
Definition Why.h:69
std::string to_text(const value_type &prov) const
Definition Why.h:186
virtual value_type monus(value_type x, value_type y) const override
Apply the monus (m-semiring difference) operation.
Definition Why.h:97
value_type zero() const override
Return the additive identity .
Definition Why.h:59
value_type parse_leaf(const char *v) const
Parse a leaf value into a why-provenance set-of-sets.
Definition Why.h:134
virtual bool compatibleWithBooleanRewrite() const override
No semiring homomorphism BoolFunc(Y) →+* Why exists, so the safe-query Boolean rewrite is unsound und...
Definition Why.h:119
value_type delta(value_type x) const override
Apply the operator.
Definition Why.h:104
std::set< label_t > label_set
A witness: a set of labels that collectively justify one derivation.
Definition Why.h:47
std::string label_t
A single label identifying a base tuple.
Definition Why.h:45
std::set< label_set > why_provenance_t
Why-provenance value: the full set of all witnesses.
Definition Why.h:49