ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
provenance_evaluate_compiled.cpp
Go to the documentation of this file.
1/**
2 * @file provenance_evaluate_compiled.cpp
3 * @brief SQL function @c provsql.provenance_evaluate_compiled() – C++ semiring evaluation.
4 *
5 * Implements the compiled (C++ generic) variant of provenance circuit
6 * evaluation. Unlike @c provenance_evaluate() (which calls user-supplied
7 * PostgreSQL functions for each semiring operation), this function
8 * evaluates the circuit using one of the built-in C++ semiring
9 * implementations from the @c semiring/ directory.
10 *
11 * Supported semirings (selected by the @p semiring argument):
12 * - @c "boolean" → @c semiring::Boolean
13 * - @c "counting" → @c semiring::Counting
14 * - @c "formula" → @c semiring::Formula (symbolic expression)
15 * - @c "why" → @c semiring::Why (witness sets)
16 * - @c "boolexpr" → @c semiring::BoolExpr (Boolean circuit for probability)
17 *
18 * The function first builds a provenance mapping (input-gate UUID → semiring
19 * value) by querying the @c tmp_uuids table via SPI
20 * (using @c initialize_provenance_mapping()), then evaluates the
21 * @c GenericCircuit with @c GenericCircuit::evaluate() and returns the
22 * result as text.
23 */
24extern "C" {
25#include "postgres.h"
26#include "fmgr.h"
27#include "catalog/pg_type.h"
28#include "utils/uuid.h"
29#include "utils/lsyscache.h"
30#include "provsql_shmem.h"
31#include "provsql_utils.h"
32
33PG_FUNCTION_INFO_V1(provenance_evaluate_compiled);
34}
35
36#include <string>
37#include <sstream>
38#include <vector>
39#include <unordered_map>
40#include <algorithm>
41
42#include "having_semantics.hpp"
44#include "semiring/Boolean.h"
45#include "semiring/Counting.h"
46#include "semiring/Formula.h"
47#include "semiring/Why.h"
48#include "semiring/BoolExpr.h"
49
50
51const char *drop_temp_table = "DROP TABLE IF EXISTS tmp_uuids;";
52
53/**
54 * @brief Evaluate the Boolean semiring provenance for a circuit.
55 * @param constants Extension OID cache.
56 * @param c Generic circuit to evaluate.
57 * @param g Root gate of the sub-circuit.
58 * @param inputs Set of input gate IDs.
59 * @param semiring Semiring name (must be "boolean").
60 * @param drop_table Whether the temporary UUID table should be dropped.
61 * @return Bool datum with the evaluated provenance.
62 */
63static Datum pec_bool(
64 const constants_t &constants,
66 gate_t g,
67 const std::set<gate_t> &inputs,
68 const std::string &semiring,
69 bool drop_table)
70{
71 std::unordered_map<gate_t, bool> provenance_mapping;
72 initialize_provenance_mapping<bool>(constants, c, provenance_mapping, [](const char *v) {
73 return *v=='t';
74 }, drop_table);
75
76 if(semiring!="boolean")
77 throw CircuitException("Unknown semiring for type varchar: "+semiring);
78
79 provsql_try_having_boolean(c,g,provenance_mapping);
80 bool out = c.evaluate<semiring::Boolean>(g, provenance_mapping, semiring::Boolean());
81
82 PG_RETURN_BOOL(out);
83}
84
85/**
86 * @brief Evaluate the Boolean-expression semiring provenance for a circuit.
87 * @param constants Extension OID cache.
88 * @param bc Boolean circuit to render as a formula.
89 * @param root Root gate of the circuit.
90 * @return Text datum with the formula string.
91 */
92static Datum pec_boolexpr(
93 const constants_t &constants,
95 gate_t root)
96{
97 std::string out = bc.toString(root);
98
99 text *result = (text *) palloc(VARHDRSZ + out.size());
100 SET_VARSIZE(result, VARHDRSZ + out.size());
101 memcpy(VARDATA(result), out.c_str(), out.size());
102 PG_RETURN_TEXT_P(result);
103}
104
105/**
106 * @brief Evaluate the Why-provenance semiring for a circuit.
107 * @param constants Extension OID cache.
108 * @param c Generic circuit to evaluate.
109 * @param g Root gate.
110 * @param inputs Set of input gate IDs.
111 * @param drop_table Whether the temporary UUID table should be dropped.
112 * @return Varchar datum containing the serialised Why-provenance.
113 */
114static Datum pec_why(
115 const constants_t &constants,
117 gate_t g,
118 const std::set<gate_t> &inputs,
119 bool drop_table)
120{
121 std::unordered_map<gate_t, semiring::why_provenance_t> provenance_mapping;
122
123 initialize_provenance_mapping<semiring::why_provenance_t>(
124 constants,
125 c,
126 provenance_mapping,
127 [](const char *v) {
129 semiring::label_set single;
130 if(strchr(v, '{'))
131 provsql_error("Complex Why-semiring values for input tuples not currently supported.");
132 single.insert(std::string(v));
133 result.insert(std::move(single));
134 return result;
135 },
136 drop_table
137 );
138
139 provsql_try_having_why(c, g, provenance_mapping);
140 semiring::why_provenance_t prov = c.evaluate<semiring::Why>(g, provenance_mapping, semiring::Why());
141
142 // Serialize nested set structure: {{x},{y}}
143 std::ostringstream oss;
144 oss << "{";
145 bool firstOuter = true;
146 for (const auto &inner : prov) {
147 if (!firstOuter) oss << ",";
148 firstOuter = false;
149 oss << "{";
150 bool firstInner = true;
151 for (const auto &label : inner) {
152 if (!firstInner) oss << ",";
153 firstInner = false;
154 oss << label;
155 }
156 oss << "}";
157 }
158 oss << "}";
159
160 std::string out = oss.str();
161 text *result = (text *) palloc(VARHDRSZ + out.size());
162 SET_VARSIZE(result, VARHDRSZ + out.size());
163 memcpy(VARDATA(result), out.c_str(), out.size());
164 PG_RETURN_TEXT_P(result);
165
166}
167/**
168 * @brief Evaluate a varchar semiring provenance for a circuit.
169 * @param constants Extension OID cache.
170 * @param c Generic circuit to evaluate.
171 * @param g Root gate.
172 * @param inputs Set of input gate IDs.
173 * @param semiring Semiring name (e.g. "formula").
174 * @param drop_table Whether the temporary UUID table should be dropped.
175 * @return Varchar datum containing the serialised provenance.
176 */
177static Datum pec_varchar(
178 const constants_t &constants,
180 gate_t g,
181 const std::set<gate_t> &inputs,
182 const std::string &semiring,
183 bool drop_table)
184{
185 std::unordered_map<gate_t, std::string> provenance_mapping;
186 initialize_provenance_mapping<std::string>(
187 constants, c, provenance_mapping,
188 [](const char *v) {
189 return std::string(v);
190 },
191 drop_table
192 );
193
194 if (semiring!= "formula")
195 throw CircuitException("Unknown seimring for type varchar: " + semiring);
196
197 provsql_try_having_formula(c, g, provenance_mapping);
198 std::string s = c.evaluate<semiring::Formula>(g, provenance_mapping, semiring::Formula());
199
200 text *result = (text *) palloc(VARHDRSZ + s.size());
201 SET_VARSIZE(result, VARHDRSZ + s.size());
202 memcpy(VARDATA(result), s.c_str(), s.size());
203 PG_RETURN_TEXT_P(result);
204
205}
206/**
207 * @brief Evaluate an integer semiring provenance for a circuit.
208 * @param constants Extension OID cache.
209 * @param c Generic circuit to evaluate.
210 * @param g Root gate.
211 * @param inputs Set of input gate IDs.
212 * @param semiring Semiring name (e.g. "counting").
213 * @param drop_table Whether the temporary UUID table should be dropped.
214 * @return Int32 datum with the evaluated provenance.
215 */
216static Datum pec_int(
217 const constants_t &constants,
219 gate_t g,
220 const std::set<gate_t> &inputs,
221 const std::string &semiring,
222 bool drop_table)
223{
224 std::unordered_map<gate_t, unsigned> provenance_mapping;
225 initialize_provenance_mapping<unsigned>(constants, c, provenance_mapping, [](const char *v) {
226 return atoi(v);
227 }, drop_table);
228
229 if(semiring!="counting")
230 throw CircuitException("Unknown semiring for type int: "+semiring);
231
232 provsql_try_having_counting(c, g, provenance_mapping);
233 unsigned out = c.evaluate<semiring::Counting>(g, provenance_mapping, semiring::Counting());
234
235 PG_RETURN_INT32((int32) out);
236
237}
238
239/**
240 * @brief Join a provenance mapping table with a set of UUIDs using SPI.
241 * @param table OID of the provenance mapping relation.
242 * @param uuids List of UUID strings to join against.
243 * @return @c true if a temporary table was created (caller must drop it).
244 */
245bool join_with_temp_uuids(Oid table, const std::vector<std::string> &uuids) {
246 if (SPI_connect() != SPI_OK_CONNECT)
247 throw CircuitException("SPI_connect failed");
248
249 char *table_name = get_rel_name(table);
250 if (!table_name) {
251 SPI_finish();
252 throw CircuitException("Invalid OID: no such table");
253 }
254
255 constexpr size_t nb_max_uuid_value = 10000000;
256
257 StringInfoData join_query;
258 initStringInfo(&join_query);
259 bool drop_table = false;
260
261 // Two different mechanisms to implement the join (unless there are no
262 // UUIDs):
263 // if there are less than nb_max_uuid_value UUIDs, we do a join
264 // with a VALUES() list;
265 // otherwise we create a temporary table where we dump the inserts
266 // and join with it.
267 if(uuids.size() == 0) {
268 appendStringInfo(&join_query,
269 "SELECT value, provenance FROM \"%s\" WHERE 'f'", table_name);
270 } else if(uuids.size() <= nb_max_uuid_value) {
271 appendStringInfo(&join_query,
272 "SELECT value, provenance FROM \"%s\" t JOIN (VALUES", table_name);
273 bool first=true;
274 for(auto u: uuids) {
275 appendStringInfo(&join_query, "%s('%s'::uuid)", first?"":",", u.c_str());
276 first=false;
277 }
278
279 appendStringInfo(&join_query, ") AS u(id) ON t.provenance=u.id");
280 } else {
281 const char *create_temp_table = "CREATE TEMP TABLE tmp_uuids(id uuid);";
282 if (SPI_exec(create_temp_table, 0) != SPI_OK_UTILITY) {
283 SPI_finish();
284 throw CircuitException("Failed to create temporary table");
285 }
286 drop_table = true;
287
288 constexpr size_t batch_size = 1000;
289
290 for (size_t offset = 0; offset < uuids.size(); offset += batch_size) {
291 StringInfoData insert_query;
292 initStringInfo(&insert_query);
293 appendStringInfo(&insert_query, "INSERT INTO tmp_uuids VALUES ");
294
295 size_t end = std::min(offset + batch_size, uuids.size());
296 for (size_t i = offset; i < end; ++i) {
297 appendStringInfo(&insert_query, "('%s')%s", uuids[i].c_str(), (i + 1 == end) ? "" : ",");
298 }
299
300 int retval=SPI_exec(insert_query.data, 0);
301 pfree(insert_query.data);
302
303 if(retval != SPI_OK_INSERT) {
304 SPI_exec(drop_temp_table, 0);
305 SPI_finish();
306 throw CircuitException("Batch insert into temp table failed");
307 }
308 }
309
310 appendStringInfo(&join_query,
311 "SELECT value, provenance FROM \"%s\" t JOIN tmp_uuids u ON t.provenance = u.id", table_name);
312 }
313
314 if (SPI_exec(join_query.data, 0) != SPI_OK_SELECT) {
315 if(drop_table)
316 SPI_exec(drop_temp_table, 0);
317 SPI_finish();
318 throw CircuitException("Join query failed");
319 }
320
321 return drop_table;
322}
323
324/**
325 * @brief Core implementation of compiled provenance evaluation.
326 * @param token UUID of the root provenance gate.
327 * @param table OID of the provenance mapping relation.
328 * @param semiring Name of the semiring to evaluate over.
329 * @param type OID of the result element type.
330 * @return Datum containing the semiring evaluation result.
331 */
333 (pg_uuid_t token, Oid table, const std::string &semiring, Oid type)
334{
335 constants_t constants = get_constants(true);
336
337 if(semiring=="boolexpr") {
338 gate_t root;
339 BooleanCircuit bc = getBooleanCircuit(token, root);
340 return pec_boolexpr(constants, bc, root);
341 }
342
344 auto g = c.getGate(uuid2string(token));
345 auto inputs = c.getInputs();
346
347 std::vector<std::string> inputs_uuid;
348 std::transform(inputs.begin(), inputs.end(), std::back_inserter(inputs_uuid), [&c](auto x) {
349 return c.getUUID(x);
350 });
351 bool drop_table = join_with_temp_uuids(table, inputs_uuid);
352
353 if (type == constants.OID_TYPE_VARCHAR)
354 {
355 if (semiring == "why")
356 return pec_why(constants, c, g, inputs, drop_table);
357 else
358 return pec_varchar(constants, c, g, inputs, semiring, drop_table);
359 }
360 else if(type==constants.OID_TYPE_INT)
361 return pec_int(constants, c, g, inputs, semiring, drop_table);
362 else if(type==constants.OID_TYPE_BOOL)
363 return pec_bool(constants, c, g, inputs, semiring, drop_table);
364 else
365 throw CircuitException("Unknown element type for provenance_evaluate_compiled");
366}
367
368/** @brief PostgreSQL-callable wrapper for provenance_evaluate_compiled(). */
369Datum provenance_evaluate_compiled(PG_FUNCTION_ARGS)
370{
371 try {
372 Datum token = PG_GETARG_DATUM(0);
373
374 Oid table = PG_GETARG_OID(1);
375
376 text *t = PG_GETARG_TEXT_P(2);
377 std::string semiring(VARDATA(t),VARSIZE(t)-VARHDRSZ);
378
379 Oid type = get_fn_expr_argtype(fcinfo->flinfo, 3);
380
381 return provenance_evaluate_compiled_internal(*DatumGetUUIDP(token), table, semiring, type);
382 } catch(const std::exception &e) {
383 provsql_error("provenance_evaluate_compiled: %s", e.what());
384 } catch(...) {
385 provsql_error("provenance_evaluate_compiled: Unknown exception");
386 }
387
388 PG_RETURN_NULL();
389}
Boolean-expression (lineage formula) semiring.
Boolean semiring ({false, true}, ∨, ∧, false, true).
BooleanCircuit getBooleanCircuit(pg_uuid_t token, gate_t &gate)
Build a BooleanCircuit from the mmap store rooted at token.
GenericCircuit getGenericCircuit(pg_uuid_t token)
Build a GenericCircuit from the mmap store rooted at token.
gate_t
Strongly-typed gate identifier.
Definition Circuit.h:48
Counting semiring (ℕ, +, ×, 0, 1).
Symbolic formula semiring producing readable provenance expressions.
Why-provenance semiring (set of witness sets).
Boolean circuit for provenance formula evaluation.
virtual std::string toString(gate_t g) const override
Return a textual description of gate g for debugging.
Exception type thrown by circuit operations on invalid input.
Definition Circuit.h:205
gate_t getGate(const uuid &u)
Return (or create) the gate associated with UUID u.
Definition Circuit.hpp:33
In-memory provenance circuit with semiring-generic evaluation.
S::value_type evaluate(gate_t g, std::unordered_map< gate_t, typename S::value_type > &provenance_mapping, S semiring) const
Evaluate the sub-circuit rooted at gate g over semiring semiring.
const std::set< gate_t > & getInputs() const
Return the set of input (leaf) gates.
The Boolean semiring over bool.
Definition Boolean.h:36
The counting semiring over unsigned.
Definition Counting.h:36
Symbolic provenance formula semiring over std::string.
Definition Formula.h:70
Why-provenance semiring.
Definition Why.h:47
void provsql_try_having_why(GenericCircuit &c, gate_t g, std::unordered_map< gate_t, semiring::why_provenance_t > &mapping)
Evaluate the HAVING sub-circuit at g over the Why-provenance semiring.
void provsql_try_having_counting(GenericCircuit &c, gate_t g, std::unordered_map< gate_t, unsigned > &mapping)
Evaluate the HAVING sub-circuit at g over the Counting semiring.
void provsql_try_having_boolean(GenericCircuit &c, gate_t g, std::unordered_map< gate_t, bool > &mapping)
Evaluate the HAVING sub-circuit at g over the Boolean semiring.
void provsql_try_having_formula(GenericCircuit &c, gate_t g, std::unordered_map< gate_t, std::string > &mapping)
Evaluate the HAVING sub-circuit at g over the Formula semiring.
Provenance evaluation helpers for HAVING-clause circuits.
std::set< label_set > why_provenance_t
Why-provenance value: the full set of all witnesses.
Definition Why.h:40
std::set< label_t > label_set
A witness: a set of labels that collectively justify one derivation.
Definition Why.h:38
static Datum provenance_evaluate_compiled_internal(pg_uuid_t token, Oid table, const std::string &semiring, Oid type)
Core implementation of compiled provenance evaluation.
static Datum pec_varchar(const constants_t &constants, GenericCircuit &c, gate_t g, const std::set< gate_t > &inputs, const std::string &semiring, bool drop_table)
Evaluate a varchar semiring provenance for a circuit.
static Datum pec_boolexpr(const constants_t &constants, BooleanCircuit &bc, gate_t root)
Evaluate the Boolean-expression semiring provenance for a circuit.
const char * drop_temp_table
DROP TABLE statement for the per-query temporary provenance mapping table.
bool join_with_temp_uuids(Oid table, const std::vector< std::string > &uuids)
Join a provenance mapping table with a set of UUIDs using SPI.
static Datum pec_why(const constants_t &constants, GenericCircuit &c, gate_t g, const std::set< gate_t > &inputs, bool drop_table)
Evaluate the Why-provenance semiring for a circuit.
static Datum pec_bool(const constants_t &constants, GenericCircuit &c, gate_t g, const std::set< gate_t > &inputs, const std::string &semiring, bool drop_table)
Evaluate the Boolean semiring provenance for a circuit.
Datum provenance_evaluate_compiled(PG_FUNCTION_ARGS)
PostgreSQL-callable wrapper for provenance_evaluate_compiled().
static Datum pec_int(const constants_t &constants, GenericCircuit &c, gate_t g, const std::set< gate_t > &inputs, const std::string &semiring, bool drop_table)
Evaluate an integer semiring provenance for a circuit.
Template helper for populating provenance mappings from SPI results.
#define provsql_error(fmt,...)
Report a fatal ProvSQL error and abort the current transaction.
Shared-memory segment and inter-process pipe management.
constants_t get_constants(bool failure_if_not_possible)
Retrieve the cached OID constants for the current database.
Core types, constants, and utilities shared across ProvSQL.
string uuid2string(pg_uuid_t uuid)
Format a pg_uuid_t as a std::string.
Structure to store the value of various constants.
Oid OID_TYPE_VARCHAR
OID of the VARCHAR TYPE.
Oid OID_TYPE_INT
OID of the INT TYPE.
Oid OID_TYPE_BOOL
OID of the BOOL TYPE.
UUID structure.