ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
WhereCircuit.cpp
Go to the documentation of this file.
1/**
2 * @file WhereCircuit.cpp
3 * @brief WhereCircuit method implementations and where-provenance evaluation.
4 *
5 * Implements:
6 * - Gate creation methods: @c setGate(), @c setGateInput(),
7 * @c setGateProjection(), @c setGateEquality().
8 * - @c toString(): human-readable gate representation for debugging.
9 * - @c evaluate(): recursive DFS that computes the set of @c Locator
10 * values for each output column position, propagating provenance
11 * through TIMES (Cartesian product), PLUS (union), PROJECT
12 * (column selection), and EQ (equijoin merging) gates.
13 * - @c WhereCircuit::Locator::operator<() and @c toString().
14 */
15#include "WhereCircuit.h"
16
17extern "C" {
18#include "provsql_utils.h"
19#include <unistd.h>
20}
21
22#include <cassert>
23#include <string>
24#include <fstream>
25#include <sstream>
26#include <cstdlib>
27
28using namespace std;
29
31{
32 return Circuit::setGate(u, type);
33}
34
35gate_t WhereCircuit::setGateProjection(const uuid &u, vector<int> &&infos)
36{
37 auto id = setGate(u, WhereGate::PROJECT);
38 projection_info[id]=infos;
39 return id;
40}
41
42gate_t WhereCircuit::setGateEquality(const uuid &u, int pos1, int pos2)
43{
44 auto id = setGate(u, WhereGate::EQ);
45 equality_info[id]=make_pair(pos1,pos2);
46 return id;
47}
48
49gate_t WhereCircuit::setGateInput(const uuid &u, string table, int nb_columns)
50{
51 auto id = setGate(u, WhereGate::IN);
52 input_token[id]=u;
53 input_info[id]=make_pair(table, nb_columns);
54 return id;
55}
56
61
63{
64 std::string op;
65 string result;
66 auto gtype = getGateType(g);
67
68 switch(gtype) {
69 case WhereGate::IN:
70 return input_info.find(g)->second.first+":"+to_string(input_info.find(g)->second.second)+":"+input_token.find(g)->second;
72 op="?";
73 break;
75 op="⊗";
76 break;
77 case WhereGate::PLUS:
78 op="⊕";
79 break;
81 op="Π[";
82 {
83 bool first=true;
84 for(auto i : projection_info.find(g)->second) {
85 if(!first)
86 op+=",";
87 op+=to_string(i);
88 first=false;
89 }
90 }
91 op+="]";
92 break;
93 case WhereGate::EQ:
94 op="=["+to_string(equality_info.find(g)->second.first)+","+to_string(equality_info.find(g)->second.second)+"]";
95 }
96
97 for(auto s: getWires(g)) {
98 if(gtype==WhereGate::PROJECT || gtype==WhereGate::EQ)
99 result = op;
100 else if(!result.empty())
101 result+=" "+op+" ";
102 result+=toStringHelper(s, gtype);
103 }
104
105 // Parenthesis elision (mirrors BooleanCircuit::toStringHelper):
106 // * single-wire TIMES/PLUS: drop the now-meaningless wrap.
107 // * root call (parent = UNDETERMINED): drop the outer wrap.
108 // * same-op nesting (parent == gtype, TIMES/PLUS only): associative.
109 // PROJECT and EQ keep their parens so the prefix-bracketed shape
110 // (e.g., @c =[i,j](...)) renders unambiguously inside a parent gate.
111 bool single_join = (gtype==WhereGate::TIMES || gtype==WhereGate::PLUS)
112 && getWires(g).size()==1;
113 bool same_op_assoc = (gtype==WhereGate::TIMES || gtype==WhereGate::PLUS)
114 && parent==gtype;
115 if(single_join || parent==WhereGate::UNDETERMINED || same_op_assoc)
116 return result;
117 return "("+result+")";
118}
119
120vector<set<WhereCircuit::Locator> > WhereCircuit::evaluate(gate_t g) const
121{
122 vector<set<Locator> > v;
123
124 switch(getGateType(g)) {
125 case WhereGate::IN:
126 {
127 string table=input_info.find(g)->second.first;
128 uuid tid=input_token.find(g)->second;
129 int nb_columns=input_info.find(g)->second.second;
130 for(int i=0; i<nb_columns; ++i) {
131 set<Locator> s;
132 s.insert(Locator(table,tid,i+1));
133 v.push_back(s);
134 }
135 }
136 break;
137
138 case WhereGate::TIMES:
139 if(getWires(g).empty())
140 throw CircuitException("No wire connected to ⊗ gate");
141
142 for(auto g2 : getWires(g)) {
143 if(v.empty())
144 v=evaluate(g2);
145 else {
146 vector<set<Locator> > w=evaluate(g2);
147 v.insert(v.end(), w.begin(), w.end());
148 }
149 }
150 break;
151
152 case WhereGate::PLUS:
153 if(getWires(g).empty())
154 throw CircuitException("No wire connected to ⊕ gate");
155
156 for(auto g2 : getWires(g)) {
157 if(v.empty())
158 v=evaluate(g2);
159 else {
160 vector<set<Locator> > w=evaluate(g2);
161 if(w.size()!=v.size())
162 throw CircuitException("Incompatible inputs for ⊕ gate");
163
164 for(size_t k=0; k<v.size(); ++k) {
165 v[k].insert(w[k].begin(), w[k].end());
166 }
167 }
168 }
169 break;
170
172 if(getWires(g).size()!=1)
173 throw CircuitException("Not exactly one wire connected to Π gate");
174
175 {
176 vector<set<Locator> > w=evaluate(*getWires(g).begin());
177 vector<int> positions=projection_info.find(g)->second;
178 for(auto i : positions) {
179 if(i==0 || i>(int)w.size())
180 v.push_back(set<Locator>());
181 else
182 v.push_back(w[i-1]);
183 }
184 }
185 break;
186
187 case WhereGate::EQ:
188 if(getWires(g).size()!=1)
189 throw CircuitException("Not exactly one wire connected to = gate");
190
191 v=evaluate(*getWires(g).begin());
192 {
193 pair<int,int> positions=equality_info.find(g)->second;
194 if(positions.first>=1 && positions.first<=(int)v.size() &&
195 positions.second>=1 && positions.second<=(int)v.size()) {
196 v[positions.first-1].insert(v[positions.second-1].begin(), v[positions.second-1].end());
197 v[positions.second-1].insert(v[positions.first-1].begin(), v[positions.first-1].end());
198 }
199 }
200 break;
201
202 default:
203 throw CircuitException("Gate type not supported by where-provenance");
204 }
205
206 return v;
207}
208
210{
211 if(this->table != that.table)
212 return this->table < that.table;
213 if(this->tid != that.tid)
214 return this->tid < that.tid;
215 return this->position < that.position;
216}
217
219{
220 return table + ":" + tid + ":" +to_string(position);
221}
gate_t
Strongly-typed gate identifier.
Definition Circuit.h:49
std::string to_string(gate_t g)
Convert a gate_t to its decimal string representation.
Definition Circuit.h:250
Where-provenance circuit tracking column-level data origin.
WhereGate
Gate types for a where-provenance circuit.
@ PROJECT
Projection gate recording which attributes are kept.
@ EQ
Equijoin gate recording the joined attribute pair.
@ PLUS
Sum (disjunction) of child where-provenance sets.
@ TIMES
Product (conjunction) of child where-provenance sets.
@ IN
Input gate for a single base-relation tuple.
@ UNDETERMINED
Placeholder; should not appear in a complete circuit.
Exception type thrown by circuit operations on invalid input.
Definition Circuit.h:206
std::string uuid
Definition Circuit.h:65
std::vector< gate_t > & getWires(gate_t g)
Definition Circuit.h:140
WhereGate getGateType(gate_t g) const
Definition Circuit.h:130
virtual gate_t setGate(const uuid &u, gateType type)
Create or update the gate associated with UUID u.
Definition Circuit.hpp:73
std::unordered_map< gate_t, std::pair< int, int > > equality_info
Joined attribute pair (pos1, pos2) for EQ gates.
std::unordered_map< gate_t, uuid > input_token
UUID of the source tuple for each IN gate.
std::unordered_map< gate_t, std::pair< std::string, int > > input_info
(table name, nb_columns) for each IN gate
std::unordered_map< gate_t, std::vector< int > > projection_info
Projected attribute positions for PROJECT gates.
gate_t setGateInput(const uuid &u, std::string table, int nb_columns)
Create an input gate for a specific table row.
gate_t setGateEquality(const uuid &u, int pos1, int pos2)
Create an equality (equijoin) gate for two attribute positions.
std::string toStringHelper(gate_t g, WhereGate parent) const
Recursive helper for toString that propagates the parent gate type for parenthesis elision.
gate_t setGate(const uuid &u, WhereGate type) override
Create or update the gate associated with UUID u.
std::string toString(gate_t g) const override
Return a textual description of gate g for debugging.
gate_t setGateProjection(const uuid &u, std::vector< int > &&infos)
Create a projection gate with column mapping.
std::vector< std::set< Locator > > evaluate(gate_t g) const
Evaluate the where-provenance circuit at gate g.
Core types, constants, and utilities shared across ProvSQL.
Describes the origin of a single attribute value.
std::string toString() const
Return a human-readable representation of this locator.
bool operator<(Locator that) const
Lexicographic ordering for use in std::set.
int position
Zero-based column index within the tuple.
std::string table
Name of the source relation.
uuid tid
UUID (row token) of the source tuple.