ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
where_provenance.cpp
Go to the documentation of this file.
1/**
2 * @file where_provenance.cpp
3 * @brief SQL function @c provsql.where_provenance() – column-level provenance.
4 *
5 * Implements @c provsql.where_provenance(), which evaluates the
6 * where-provenance of a query result tuple. Where-provenance identifies
7 * the base-relation cells (table, row, column) from which each output
8 * value was copied.
9 *
10 * The function builds a @c WhereCircuit from the mmap-backed circuit store
11 * (using SPI to obtain gate information), evaluates it via
12 * @c WhereCircuit::evaluate(), and returns the result as a PostgreSQL
13 * text-array value containing @c "table.tid.position" locator strings.
14 */
15extern "C" {
16
17#include "postgres.h"
18#include "fmgr.h"
19#include "catalog/pg_type.h"
20#include "utils/uuid.h"
21#include "executor/spi.h"
22#include "utils/builtins.h"
23
24#include "provsql_shmem.h"
25#include "provsql_utils.h"
26
27PG_FUNCTION_INFO_V1(where_provenance);
28}
29
30#include <algorithm>
31#include <utility>
32#include <sstream>
33
34#include "WhereCircuit.h"
35#include "provsql_utils_cpp.h"
36
37using namespace std;
38
39/**
40 * @brief Parse a PostgreSQL text representation of an array of integer pairs.
41 * @param s String in the form @c {{a,b},{c,d},...}.
42 * @return Vector of (first, second) integer pairs.
43 */
44static vector<pair<int,int> > parse_array(string s)
45{
46 s=s.substr(1,s.size()-2); // Remove initial '{' and final '}'
47
48 vector<pair<int,int> > result;
49 istringstream iss(s);
50 string p;
51
52 while(getline(iss, p, '}'))
53 {
54 if(!p.empty() && p[0]==',')
55 p=p.substr(1);
56
57 int k=p.find(",",1);
58 string s1=p.substr(1,k-1);
59 int i1;
60 if(s1=="NULL")
61 i1=0;
62 else
63 i1=stoi(p.substr(1,k-1));
64 int i2=stoi(p.substr(k+1,p.size()-k));
65 result.push_back(make_pair(i1,i2));
66 }
67
68 return result;
69}
70
71/**
72 * @brief Build a JSON where-provenance description for the circuit rooted at @p token.
73 * @param token Datum containing the root provenance gate UUID.
74 * @return JSON string describing the where-provenance.
75 */
77 (Datum token)
78{
79 Datum arguments[1]={token};
80 const constants_t constants = get_constants(true);
81 Oid argtypes[1]={constants.OID_TYPE_UUID};
82 char nulls[1] = {' '};
83
84 SPI_connect();
85
87
88 if(SPI_execute_with_args(
89 "SELECT * FROM provsql.sub_circuit_for_where($1)", 1, argtypes, arguments, nulls, true, 0)
90 == SPI_OK_SELECT) {
91 int proc = SPI_processed;
92 TupleDesc tupdesc = SPI_tuptable->tupdesc;
93 SPITupleTable *tuptable = SPI_tuptable;
94
95 for (int i = 0; i < proc; i++)
96 {
97 HeapTuple tuple = tuptable->vals[i];
98
99 string f = SPI_getvalue(tuple, tupdesc, 1);
100 string type = SPI_getvalue(tuple, tupdesc, 3);
101 if(type == "input") {
102 string table = SPI_getvalue(tuple, tupdesc, 4);
103 int nb_columns = stoi(SPI_getvalue(tuple, tupdesc, 5));
104
105 c.setGateInput(f, table, nb_columns);
106 } else {
107 auto id=c.getGate(f);
108
109 if(type == "times") {
111 } else if(type == "plus") {
113 } else if(type == "project") {
114 vector<pair<int,int> > v = parse_array(SPI_getvalue(tuple, tupdesc, 7));
115 sort(v.begin(), v.end(), [](auto &left, auto &right) {
116 return left.second < right.second;
117 });
118 vector<int> infos;
119 for(auto p : v) {
120 infos.push_back(p.first);
121 }
122 c.setGateProjection(f, move(infos));
123 } else if(type == "eq") {
124 vector<pair<int,int> > v = parse_array(std::string("{")+SPI_getvalue(tuple, tupdesc, 6)+"}");
125 if(v.size()!=1)
126 provsql_error("Incorrect extra information on eq gate");
127 c.setGateEquality(f, v[0].first, v[0].second);
128 } else if(type == "monusr" || type == "monusl" || type == "monus") {
129 provsql_error("Where-provenance of non-monotone query not supported");
130 } else {
131 provsql_error("Wrong type of gate in circuit");
132 }
133 c.addWire(id, c.getGate(SPI_getvalue(tuple, tupdesc, 2)));
134 }
135 }
136 } else {
137 provsql_error("SPI_execute_with_args failed on provsql.sub_circuit_for_where");
138 }
139
140 SPI_finish();
141
142 auto gate = c.getGate(UUIDDatum2string(token));
143
144 vector<set<WhereCircuit::Locator> > v = c.evaluate(gate);
145
146 ostringstream os;
147 os << "{";
148 bool ofirst=true;
149 for(auto s : v) {
150 if(!ofirst)
151 os << ",";
152 os << "[";
153 bool ifirst=true;
154 for(auto l : s) {
155 if(!ifirst)
156 os << ";";
157 os << l.toString();
158 ifirst=false;
159 }
160 os << "]";
161 ofirst=false;
162 }
163 os << "}";
164
165 return os.str();
166}
167
168/** @brief PostgreSQL-callable wrapper for where_provenance(). */
169Datum where_provenance(PG_FUNCTION_ARGS)
170{
171 try {
172 Datum token = PG_GETARG_DATUM(0);
173
174 PG_RETURN_TEXT_P(cstring_to_text(where_provenance_internal(token).c_str()));
175 } catch(const std::exception &e) {
176 provsql_error("where_provenance: %s", e.what());
177 } catch(...) {
178 provsql_error("where_provenance: Unknown exception");
179 }
180}
Where-provenance circuit tracking column-level data origin.
@ PLUS
Sum (disjunction) of child where-provenance sets.
@ TIMES
Product (conjunction) of child where-provenance sets.
void addWire(gate_t f, gate_t t)
Add a directed wire from gate f (parent) to gate t (child).
Definition Circuit.hpp:81
gate_t getGate(const uuid &u)
Return (or create) the gate associated with UUID u.
Definition Circuit.hpp:33
Circuit encoding where-provenance (column-level data origin).
gate_t setGateInput(const uuid &u, std::string table, int nb_columns)
Create an input gate for a specific table row.
gate_t setGateEquality(const uuid &u, int pos1, int pos2)
Create an equality (equijoin) gate for two attribute positions.
gate_t setGate(const uuid &u, WhereGate type) override
Create or update the gate associated with UUID u.
gate_t setGateProjection(const uuid &u, std::vector< int > &&infos)
Create a projection gate with column mapping.
std::vector< std::set< Locator > > evaluate(gate_t g) const
Evaluate the where-provenance circuit at gate g.
#define provsql_error(fmt,...)
Report a fatal ProvSQL error and abort the current transaction.
Shared-memory segment and inter-process pipe management.
constants_t get_constants(bool failure_if_not_possible)
Retrieve the cached OID constants for the current database.
Core types, constants, and utilities shared across ProvSQL.
string UUIDDatum2string(Datum token)
Convert a PostgreSQL Datum holding a UUID to a std::string.
C++ utility functions for UUID manipulation.
Structure to store the value of various constants.
Oid OID_TYPE_UUID
OID of the uuid TYPE.
Datum where_provenance(PG_FUNCTION_ARGS)
PostgreSQL-callable wrapper for where_provenance().
static string where_provenance_internal(Datum token)
Build a JSON where-provenance description for the circuit rooted at token.
static vector< pair< int, int > > parse_array(string s)
Parse a PostgreSQL text representation of an array of integer pairs.