ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
where_provenance.cpp
Go to the documentation of this file.
1/**
2 * @file where_provenance.cpp
3 * @brief SQL function @c provsql.where_provenance() – column-level provenance.
4 *
5 * Implements @c provsql.where_provenance(), which evaluates the
6 * where-provenance of a query result tuple. Where-provenance identifies
7 * the base-relation cells (table, row, column) from which each output
8 * value was copied.
9 *
10 * The function builds a @c WhereCircuit from the mmap-backed circuit store
11 * (using SPI to obtain gate information), evaluates it via
12 * @c WhereCircuit::evaluate(), and returns the result as a PostgreSQL
13 * text-array value containing @c "table.tid.position" locator strings.
14 */
15extern "C" {
16
17#include "postgres.h"
18#include "fmgr.h"
19#include "catalog/pg_type.h"
20#include "utils/uuid.h"
21#include "executor/spi.h"
22#include "utils/builtins.h"
23
24#include "provsql_shmem.h"
25#include "provsql_utils.h"
26
27PG_FUNCTION_INFO_V1(where_provenance);
28}
29
30#include <algorithm>
31#include <utility>
32#include <sstream>
33
34#include "WhereCircuit.h"
35#include "provsql_utils_cpp.h"
36
37using namespace std;
38
39/**
40 * @brief Parse a PostgreSQL text representation of an array of integer pairs.
41 * @param s String in the form @c {{a,b},{c,d},...}.
42 * @return Vector of (first, second) integer pairs.
43 */
44static vector<pair<int,int> > parse_array(string s)
45{
46 s=s.substr(1,s.size()-2); // Remove initial '{' and final '}'
47
48 vector<pair<int,int> > result;
49 istringstream iss(s);
50 string p;
51
52 while(getline(iss, p, '}'))
53 {
54 if(!p.empty() && p[0]==',')
55 p=p.substr(1);
56
57 int k=p.find(",",1);
58 string s1=p.substr(1,k-1);
59 int i1;
60 if(s1=="NULL")
61 i1=0;
62 else
63 i1=stoi(p.substr(1,k-1));
64 int i2=stoi(p.substr(k+1,p.size()-k));
65 result.push_back(make_pair(i1,i2));
66 }
67
68 return result;
69}
70
71/**
72 * @brief Build a JSON where-provenance description for the circuit rooted at @p token.
73 * @param token Datum containing the root provenance gate UUID.
74 * @return JSON string describing the where-provenance.
75 */
77 (Datum token)
78{
79 Datum arguments[1]={token};
80 const constants_t constants = get_constants(true);
81 Oid argtypes[1]={constants.OID_TYPE_UUID};
82 char nulls[1] = {' '};
83
84 SPI_connect();
85
87
88 if(SPI_execute_with_args(
89 "SELECT * FROM provsql.sub_circuit_for_where($1)", 1, argtypes, arguments, nulls, true, 0)
90 == SPI_OK_SELECT) {
91 int proc = SPI_processed;
92 TupleDesc tupdesc = SPI_tuptable->tupdesc;
93 SPITupleTable *tuptable = SPI_tuptable;
94
95 for (int i = 0; i < proc; i++)
96 {
97 HeapTuple tuple = tuptable->vals[i];
98
99 string f = SPI_getvalue(tuple, tupdesc, 1);
100 string type = SPI_getvalue(tuple, tupdesc, 3);
101 if(type == "input") {
102 char *table = SPI_getvalue(tuple, tupdesc, 4);
103 if(table == nullptr)
105 "where_provenance: input gate %s could not be traced back to a "
106 "provenance-tracked relation (it may belong to a table that was "
107 "dropped, or to one outside the search_path)", f.c_str());
108 int nb_columns = stoi(SPI_getvalue(tuple, tupdesc, 5));
109
110 c.setGateInput(f, table, nb_columns);
111 } else {
112 auto id=c.getGate(f);
113
114 if(type == "times" || type == "assumed_boolean" || type == "annotation") {
115 /* assumed_boolean and annotation are single-child wrappers; a
116 * single-wire TIMES is the identity in WhereCircuit semantics (the
117 * Locator-set union of one child is just that child's), so funnel
118 * all three through the same case. */
120 } else if(type == "plus") {
122 } else if(type == "project") {
123 vector<pair<int,int> > v = parse_array(SPI_getvalue(tuple, tupdesc, 7));
124 sort(v.begin(), v.end(), [](auto &left, auto &right) {
125 return left.second < right.second;
126 });
127 vector<int> infos;
128 for(auto p : v) {
129 infos.push_back(p.first);
130 }
131 c.setGateProjection(f, std::move(infos));
132 } else if(type == "eq") {
133 vector<pair<int,int> > v = parse_array(std::string("{")+SPI_getvalue(tuple, tupdesc, 6)+"}");
134 if(v.size()!=1)
135 provsql_error("Incorrect extra information on eq gate");
136 c.setGateEquality(f, v[0].first, v[0].second);
137 } else if(type == "monusr" || type == "monusl" || type == "monus") {
138 provsql_error("Where-provenance of non-monotone query not supported");
139 } else {
140 provsql_error("Where-provenance does not support gates of type %s", type.c_str());
141 }
142 c.addWire(id, c.getGate(SPI_getvalue(tuple, tupdesc, 2)));
143 }
144 }
145 } else {
146 provsql_error("SPI_execute_with_args failed on provsql.sub_circuit_for_where");
147 }
148
149 SPI_finish();
150
151 auto gate = c.getGate(UUIDDatum2string(token));
152
153 vector<set<WhereCircuit::Locator> > v = c.evaluate(gate);
154
155 ostringstream os;
156 os << "{";
157 bool ofirst=true;
158 for(auto s : v) {
159 if(!ofirst)
160 os << ",";
161 os << "[";
162 bool ifirst=true;
163 for(auto l : s) {
164 if(!ifirst)
165 os << ";";
166 os << l.toString();
167 ifirst=false;
168 }
169 os << "]";
170 ofirst=false;
171 }
172 os << "}";
173
174 return os.str();
175}
176
177/** @brief PostgreSQL-callable wrapper for where_provenance(). */
178Datum where_provenance(PG_FUNCTION_ARGS)
179{
180 try {
181 Datum token = PG_GETARG_DATUM(0);
182
183 PG_RETURN_TEXT_P(cstring_to_text(where_provenance_internal(token).c_str()));
184 } catch(const std::exception &e) {
185 provsql_error("where_provenance: %s", e.what());
186 } catch(...) {
187 provsql_error("where_provenance: Unknown exception");
188 }
189}
Where-provenance circuit tracking column-level data origin.
@ PLUS
Sum (disjunction) of child where-provenance sets.
@ TIMES
Product (conjunction) of child where-provenance sets.
void addWire(gate_t f, gate_t t)
Add a directed wire from gate f (parent) to gate t (child).
Definition Circuit.hpp:81
gate_t getGate(const uuid &u)
Return (or create) the gate associated with UUID u.
Definition Circuit.hpp:33
Circuit encoding where-provenance (column-level data origin).
gate_t setGateInput(const uuid &u, std::string table, int nb_columns)
Create an input gate for a specific table row.
gate_t setGateEquality(const uuid &u, int pos1, int pos2)
Create an equality (equijoin) gate for two attribute positions.
gate_t setGate(const uuid &u, WhereGate type) override
Create or update the gate associated with UUID u.
gate_t setGateProjection(const uuid &u, std::vector< int > &&infos)
Create a projection gate with column mapping.
std::vector< std::set< Locator > > evaluate(gate_t g) const
Evaluate the where-provenance circuit at gate g.
#define provsql_error(fmt,...)
Report a fatal ProvSQL error and abort the current transaction.
Shared-memory segment and inter-process pipe management.
constants_t get_constants(bool failure_if_not_possible)
Retrieve the cached OID constants for the current database.
Core types, constants, and utilities shared across ProvSQL.
string UUIDDatum2string(Datum token)
Convert a PostgreSQL Datum holding a UUID to a std::string.
C++ utility functions for UUID manipulation.
Structure to store the value of various constants.
Oid OID_TYPE_UUID
OID of the uuid TYPE.
Datum where_provenance(PG_FUNCTION_ARGS)
PostgreSQL-callable wrapper for where_provenance().
static string where_provenance_internal(Datum token)
Build a JSON where-provenance description for the circuit rooted at token.
static vector< pair< int, int > > parse_array(string s)
Parse a PostgreSQL text representation of an array of integer pairs.