ProvSQL SQL API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
provsql.sql
Go to the documentation of this file.
1/**
2 * @file
3 * @brief ProvSQL PL/pgSQL extension code
4 *
5 * This file contains the PL/pgSQL code of the ProvSQL extension. This
6 * extension requires the standard UUID-ossp extension.
7 */
8
9/**
10 * @brief <tt>provsql</tt> schema
11 *
12 * All types and functions introduced by ProvSQL are defined in the
13 * provsql schema, requiring prefixing them by <tt>provsql.</tt> or
14 * using PostgreSQL's <tt>search_path</tt> variable with a command such
15 * as \code{.sql}SET search_path TO public, provsql;\endcode
16 */
17CREATE SCHEMA provsql;
18
19SET search_path TO provsql;
20
21/**
22 * @brief Provenance circuit gate types
23 *
24 * Each gate in the provenance circuit has a type that determines
25 * its semantics during semiring evaluation.
26 */
27CREATE TYPE PROVENANCE_GATE AS
28 ENUM(
29 'input', -- Input (variable) gate of the circuit
30 'plus', -- Semiring plus
31 'times', -- Semiring times
32 'monus', -- M-Semiring monus
33 'project', -- Project gate (for where provenance)
34 'zero', -- Semiring zero
35 'one', -- Semiring one
36 'eq', -- Equijoin gate (for where provenance)
37 'agg', -- Aggregation operator (for aggregate provenance)
38 'semimod', -- Semimodule scalar multiplication (for aggregate provenance)
39 'cmp', -- Currently unused, meant for comparison of aggregate values
40 'delta', -- δ-semiring operator (see Amsterdamer, Deutch, Tannen, PODS 2011)
41 'value', -- Scalar value (for aggregate provenance)
42 'mulinput',-- Multivalued input (for Boolean provenance)
43 'update' -- Update operation
44 );
46/** @defgroup gate_manipulation Circuit gate manipulation
47 * Low-level functions for creating and querying provenance circuit gates.
48 * @{
49 */
51/**
52 * @brief Create a new gate in the provenance circuit
53 *
54 * @param token UUID identifying the new gate
55 * @param type gate type (see PROVENANCE_GATE)
56 * @param children optional array of child gate UUIDs
57 */
58CREATE OR REPLACE FUNCTION create_gate(
59 token UUID,
60 type PROVENANCE_GATE,
61 children UUID[] DEFAULT NULL)
62 RETURNS VOID AS
63 'provsql','create_gate' LANGUAGE C PARALLEL SAFE;
64/** @brief Return the gate type of a provenance token */
65CREATE OR REPLACE FUNCTION get_gate_type(
66 token UUID)
67 RETURNS PROVENANCE_GATE AS
68 'provsql','get_gate_type' LANGUAGE C IMMUTABLE PARALLEL SAFE;
69/** @brief Return the children of a provenance gate */
70CREATE OR REPLACE FUNCTION get_children(
71 token UUID)
72 RETURNS UUID[] AS
73 'provsql','get_children' LANGUAGE C IMMUTABLE PARALLEL SAFE;
74/**
75 * @brief Set the probability of an input gate
76 *
77 * @param token UUID of the input gate
78 * @param p probability value in [0,1]
79 */
80CREATE OR REPLACE FUNCTION set_prob(
81 token UUID, p DOUBLE PRECISION)
82 RETURNS VOID AS
83 'provsql','set_prob' LANGUAGE C PARALLEL SAFE;
84/** @brief Get the probability associated with an input gate */
85CREATE OR REPLACE FUNCTION get_prob(
86 token UUID)
87 RETURNS DOUBLE PRECISION AS
88 'provsql','get_prob' LANGUAGE C STABLE PARALLEL SAFE;
90/**
91 * @brief Set additional INTEGER values on provenance circuit gate
92 *
93 * This function sets two INTEGER values associated to a circuit gate, used in
94 * different ways by different gate types:
95 * - for mulinput, info1 indicates the value of this multivalued variable
96 * - for eq, info1 and info2 indicate the attribute index of the
97 equijoin in, respectively, the first and second columns
98 * - for agg, info1 is the oid of the aggregate function and info2 the
99 oid of the aggregate result type
100 * - for cmp, info1 is the oid of the comparison operator
101 *
102 * @param token UUID of the circuit gate
103 * @param info1 first INTEGER value
104 * @param info2 second INTEGER value
105 */
106CREATE OR REPLACE FUNCTION set_infos(
107 token UUID, info1 INT, info2 INT DEFAULT NULL)
108 RETURNS VOID AS
109 'provsql','set_infos' LANGUAGE C PARALLEL SAFE;
110
111/** @brief Get the INTEGER info values associated with a circuit gate */
112CREATE OR REPLACE FUNCTION get_infos(
113 token UUID, OUT info1 INT, OUT info2 INT)
114 RETURNS RECORD AS
115 'provsql','get_infos' LANGUAGE C STABLE PARALLEL SAFE;
116
117/**
118 * @brief Set extra TEXT information on provenance circuit gate
119 *
120 * This function sets TEXT-encoded data associated to a circuit gate, used in
121 * different ways by different gate types:
122 * - for project, it is a TEXT-encoded ARRAY of two-element ARRAYs that
123 * indicate mappings between input attribute (first element) and output
124 * attribute (second element)
125 * - for value and agg, it is the TEXT-encoded (base for value, computed
126 * for agg) scalar value
127 *
128 * @param token UUID of the circuit gate
129 * @param data TEXT-encoded information
130 */
131CREATE OR REPLACE FUNCTION set_extra(
132 token UUID, data TEXT)
133 RETURNS VOID AS
134 'provsql','set_extra' LANGUAGE C PARALLEL SAFE STRICT;
135/** @brief Get the TEXT-encoded extra data associated with a circuit gate */
136CREATE OR REPLACE FUNCTION get_extra(token UUID)
137 RETURNS TEXT AS
138 'provsql','get_extra' LANGUAGE C STABLE PARALLEL SAFE RETURNS NULL ON NULL INPUT;
139
140/** @brief Return the total number of gates in the provenance circuit */
141CREATE OR REPLACE FUNCTION get_nb_gates() RETURNS BIGINT AS
142 'provsql', 'get_nb_gates' LANGUAGE C PARALLEL SAFE;
144/** @} */
145
146/** @defgroup table_management Provenance table management
147 * Functions for enabling, disabling, and configuring provenance
148 * tracking on user tables.
149 * @{
150 */
151
152/** @brief Trigger function that creates an input gate for each newly inserted row */
153CREATE OR REPLACE FUNCTION add_gate_trigger()
154 RETURNS TRIGGER AS
155$$
156DECLARE
157 attribute RECORD;
158BEGIN
159 PERFORM create_gate(NEW.provsql, 'input');
160 RETURN NEW;
161END
162$$ LANGUAGE plpgsql SET search_path=provsql,pg_temp SECURITY DEFINER;
164/**
165 * @brief Trigger function for DELETE statement provenance tracking
166 *
167 * Records the deletion and applies monus to provenance tokens of
168 * deleted rows. This is the version for PostgreSQL < 14.
169 */
170CREATE OR REPLACE FUNCTION delete_statement_trigger()
171 RETURNS TRIGGER AS
172$$
173DECLARE
174 query_text TEXT;
175 delete_token UUID;
176 old_token UUID;
177 new_token UUID;
178 r RECORD;
179BEGIN
180 delete_token := public.uuid_generate_v4();
181
182 PERFORM create_gate(delete_token, 'input');
183
184 SELECT query
185 INTO query_text
186 FROM pg_stat_activity
187 WHERE pid = pg_backend_pid();
188
189 INSERT INTO delete_provenance (delete_token, query, deleted_by, deleted_at)
190 VALUES (delete_token, query_text, current_user, CURRENT_TIMESTAMP);
191
192 EXECUTE format('INSERT INTO %I.%I SELECT * FROM OLD_TABLE;', TG_TABLE_SCHEMA, TG_TABLE_NAME);
193
194 FOR r IN (SELECT * FROM OLD_TABLE) LOOP
195 old_token := r.provsql;
196 new_token := provenance_monus(old_token, delete_token);
197
198 EXECUTE format('UPDATE %I.%I SET provsql = $1 WHERE provsql = $2;', TG_TABLE_SCHEMA, TG_TABLE_NAME)
199 USING new_token, old_token;
200 END LOOP;
201
202 RETURN NULL;
203END
204$$ LANGUAGE plpgsql SET search_path=provsql,pg_temp SECURITY DEFINER;
205
206
207/**
208 * @brief Enable provenance tracking on an existing table
209 *
210 * Adds a <tt>provsql</tt> UUID column to the table, creates input gates
211 * for all existing rows, and installs a trigger to track future inserts.
212 *
213 * @param _tbl the table to add provenance tracking to
214 */
215CREATE OR REPLACE FUNCTION add_provenance(_tbl REGCLASS)
216 RETURNS VOID AS
217$$
218BEGIN
219 EXECUTE format('ALTER TABLE %s ADD COLUMN provsql UUID UNIQUE DEFAULT public.uuid_generate_v4()', _tbl);
220 EXECUTE format('SELECT provsql.create_gate(provsql, ''input'') FROM %s', _tbl);
221 EXECUTE format('CREATE TRIGGER add_gate BEFORE INSERT ON %s FOR EACH ROW EXECUTE PROCEDURE provsql.add_gate_trigger()',_tbl);
222END
223$$ LANGUAGE plpgsql SECURITY DEFINER;
224
225/**
226 * @brief Remove provenance tracking from a table
227 *
228 * Drops the <tt>provsql</tt> column and associated triggers.
229 *
230 * @param _tbl the table to remove provenance tracking from
231 */
232CREATE OR REPLACE FUNCTION remove_provenance(_tbl REGCLASS)
233 RETURNS VOID AS
234$$
235DECLARE
236BEGIN
237 EXECUTE format('ALTER TABLE %s DROP COLUMN provsql', _tbl);
238 BEGIN
239 EXECUTE format('DROP TRIGGER add_gate on %s', _tbl);
240 EXCEPTION WHEN undefined_object THEN
241 END;
242 BEGIN
243 EXECUTE format('DROP TRIGGER insert_statement on %s', _tbl);
244 EXECUTE format('DROP TRIGGER update_statement on %s', _tbl);
245 EXECUTE format('DROP TRIGGER delete_statement on %s', _tbl);
246 EXCEPTION WHEN undefined_object THEN
247 END;
248END
249$$ LANGUAGE plpgsql;
250
251/**
252 * @brief Set up provenance for a table with duplicate key values
254 * When a table has duplicate rows for a given key, this function
255 * replaces simple input gates with multivalued input (mulinput) gates
256 * that model a uniform distribution over duplicates.
257 *
258 * @param _tbl the table to repair
259 * @param key_att the key attribute(s) as a comma-separated string, or
260 * empty string if the whole table is one group
261 */
262CREATE OR REPLACE FUNCTION repair_key(_tbl REGCLASS, key_att TEXT)
263 RETURNS VOID AS
264$$
265DECLARE
266 key RECORD;
267 key_token UUID;
268 token UUID;
269 RECORD RECORD;
270 nb_rows INTEGER;
271 ind INTEGER;
272 select_key_att TEXT;
273 where_condition TEXT;
274BEGIN
275 IF key_att = '' THEN
276 key_att := '()';
277 select_key_att := '1';
278 ELSE
279 select_key_att := key_att;
280 END IF;
281
282 EXECUTE format('ALTER TABLE %s ADD COLUMN provsql_temp UUID UNIQUE DEFAULT public.uuid_generate_v4()', _tbl);
283
284 FOR key IN
285 EXECUTE format('SELECT %s AS key FROM %s GROUP BY %s', select_key_att, _tbl, key_att)
286 LOOP
287 IF key_att = '()' THEN
288 where_condition := '';
289 ELSE
290 where_condition := format('WHERE %s = %L', key_att, key.key);
291 END IF;
292
293 EXECUTE format('SELECT COUNT(*) FROM %s %s', _tbl, where_condition) INTO nb_rows;
294
295 key_token := public.uuid_generate_v4();
296 PERFORM provsql.create_gate(key_token, 'input');
297 ind := 1;
298 FOR RECORD IN
299 EXECUTE format('SELECT provsql_temp FROM %s %s', _tbl, where_condition)
300 LOOP
301 token:=RECORD.provsql_temp;
302 PERFORM provsql.create_gate(token, 'mulinput', ARRAY[key_token]);
303 PERFORM provsql.set_prob(token, 1./nb_rows);
304 PERFORM provsql.set_infos(token, ind);
305 ind := ind + 1;
306 END LOOP;
307 END LOOP;
308 EXECUTE format('ALTER TABLE %s RENAME COLUMN provsql_temp TO provsql', _tbl);
309 EXECUTE format('CREATE TRIGGER add_gate BEFORE INSERT ON %s FOR EACH ROW EXECUTE PROCEDURE provsql.add_gate_trigger()',_tbl);
310END
311$$ LANGUAGE plpgsql;
312
313/**
314 * @brief Create a provenance mapping table from an attribute
315 *
316 * Creates a new table mapping provenance tokens to values of a given
317 * attribute, for use with semiring evaluation functions.
318 *
319 * @param newtbl name of the mapping table to create
320 * @param oldtbl source table with provenance tracking
321 * @param att attribute whose values populate the mapping
322 * @param preserve_case if true, quote the table name to preserve case
323 */
324CREATE OR REPLACE FUNCTION create_provenance_mapping(
325 newtbl TEXT,
326 oldtbl REGCLASS,
327 att TEXT,
328 preserve_case BOOL DEFAULT 'f'
329) RETURNS VOID AS
330$$
331DECLARE
332BEGIN
333 EXECUTE format('CREATE TEMP TABLE tmp_provsql ON COMMIT DROP AS TABLE %s', oldtbl);
334 ALTER TABLE tmp_provsql RENAME provsql TO provenance;
335 IF preserve_case THEN
336 EXECUTE format('CREATE TABLE %I AS SELECT %s AS value, provenance FROM tmp_provsql', newtbl, att);
337 EXECUTE format('CREATE INDEX ON %I(provenance)', newtbl);
338 ELSE
339 EXECUTE format('CREATE TABLE %s AS SELECT %s AS value, provenance FROM tmp_provsql', newtbl, att);
340 EXECUTE format('CREATE INDEX ON %s(provenance)', newtbl);
341 END IF;
342END
343$$ LANGUAGE plpgsql;
344
345/** @} */
346
347/** @defgroup internal_constants Internal constants
348 * UUID namespace and identity element functions used for
349 * deterministic gate generation.
350 * @{
351 */
352
353/** @brief Return the ProvSQL UUID namespace (used for deterministic gate UUIDs) */
354CREATE OR REPLACE FUNCTION uuid_ns_provsql() RETURNS UUID AS
355$$
356 -- uuid_generate_v5(uuid_ns_url(),'http://pierre.senellart.com/software/provsql/')
357 SELECT '920d4f02-8718-5319-9532-d4ab83a64489'::UUID
358$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
359
360/** @brief Return the UUID of the semiring zero gate */
361CREATE OR REPLACE FUNCTION gate_zero() RETURNS UUID AS
362$$
363 SELECT public.uuid_generate_v5(provsql.uuid_ns_provsql(),'zero');
364$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
365
366/** @brief Return the UUID of the semiring one gate */
367CREATE OR REPLACE FUNCTION gate_one() RETURNS UUID AS
368$$
369 SELECT public.uuid_generate_v5(provsql.uuid_ns_provsql(),'one');
370$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
371
372/** @brief Return the epsilon threshold used for probability comparisons */
373CREATE OR REPLACE FUNCTION epsilon() RETURNS DOUBLE PRECISION AS
374$$
375 SELECT CAST(0.001 AS DOUBLE PRECISION)
376$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
377
378/** @} */
379
380/** @defgroup semiring_operations Semiring operations
381 * Functions that build provenance circuit gates for semiring operations.
382 * These are called internally by the query rewriter.
383 * @{
384 */
385
386/**
387 * @brief Create a times (product) gate from multiple provenance tokens
388 *
389 * Filters out NULL and one-gates; returns gate_one() if all tokens
390 * are trivial, or a single token if only one remains.
391 */
392CREATE OR REPLACE FUNCTION provenance_times(VARIADIC tokens UUID[])
393 RETURNS UUID AS
394$$
395DECLARE
396 times_token UUID;
397 filtered_tokens UUID[];
398BEGIN
399 SELECT array_agg(t) FROM unnest(tokens) t WHERE t IS NOT NULL AND t <> gate_one() INTO filtered_tokens;
400
401 CASE array_length(tokens, 1)
402 WHEN 0 THEN
403 times_token:=gate_one();
404 WHEN 1 THEN
405 times_token:=filtered_tokens[1];
406 ELSE
407 times_token := uuid_generate_v5(uuid_ns_provsql(),concat('times',filtered_tokens));
408
409 PERFORM create_gate(times_token, 'times', ARRAY_AGG(t)) FROM UNNEST(filtered_tokens) AS t WHERE t IS NOT NULL;
410 END CASE;
411
412 RETURN times_token;
413END
414$$ LANGUAGE plpgsql SET search_path=provsql,pg_temp,public SECURITY DEFINER PARALLEL SAFE;
415
416/**
417 * @brief Create a monus (difference) gate from two provenance tokens
418 *
419 * Implements m-semiring monus. Returns token1 if token2 is NULL
420 * (used for LEFT OUTER JOIN semantics in the EXCEPT rewriting).
421 */
422CREATE OR REPLACE FUNCTION provenance_monus(token1 UUID, token2 UUID)
423 RETURNS UUID AS
424$$
425DECLARE
426 monus_token UUID;
427BEGIN
428 IF token1 IS NULL THEN
429 RAISE EXCEPTION USING MESSAGE='provenance_monus is called with first argument NULL';
430 END IF;
432 IF token2 IS NULL THEN
433 -- Special semantics, because of a LEFT OUTER JOIN used by the
434 -- difference operator: token2 NULL means there is no second argument
435 RETURN token1;
436 END IF;
437
438 IF token1 = token2 THEN
439 -- X-X=0
440 monus_token:=gate_zero();
441 ELSIF token1 = gate_zero() THEN
442 -- 0-X=0
443 monus_token:=gate_zero();
444 ELSIF token2 = gate_zero() THEN
445 -- X-0=X
446 monus_token:=token1;
447 ELSE
448 monus_token:=uuid_generate_v5(uuid_ns_provsql(),concat('monus',token1,token2));
449 PERFORM create_gate(monus_token, 'monus', ARRAY[token1::UUID, token2::UUID]);
450 END IF;
452 RETURN monus_token;
453END
454$$ LANGUAGE plpgsql SET search_path=provsql,pg_temp,public SECURITY DEFINER PARALLEL SAFE IMMUTABLE;
455
456/**
457 * @brief Create a project gate for where-provenance tracking
458 *
459 * Records the mapping between input and output attribute positions.
460 *
461 * @param token child provenance token
462 * @param positions array encoding attribute position mappings
463 */
464CREATE OR REPLACE FUNCTION provenance_project(token UUID, VARIADIC positions INT[])
465 RETURNS UUID AS
466$$
467DECLARE
468 project_token UUID;
469 rec RECORD;
470BEGIN
471 project_token:=uuid_generate_v5(uuid_ns_provsql(),concat('project', token, positions));
472 PERFORM create_gate(project_token, 'project', ARRAY[token]);
473 PERFORM set_extra(project_token, ARRAY_AGG(pair)::TEXT)
474 FROM (
475 SELECT ARRAY[(CASE WHEN info=0 THEN NULL ELSE info END), idx] AS pair
476 FROM unnest(positions) WITH ORDINALITY AS a(info, idx)
477 ORDER BY idx
478 ) t;
479
480 RETURN project_token;
481END
482$$ LANGUAGE plpgsql SET search_path=provsql,pg_temp,public SECURITY DEFINER PARALLEL SAFE IMMUTABLE;
483
484/**
485 * @brief Create an equijoin gate for where-provenance tracking
486 *
487 * @param token child provenance token
488 * @param pos1 attribute index in the first relation
489 * @param pos2 attribute index in the second relation
490 */
491CREATE OR REPLACE FUNCTION provenance_eq(token UUID, pos1 INT, pos2 INT)
492 RETURNS UUID AS
493$$
494DECLARE
495 eq_token UUID;
496 rec RECORD;
497BEGIN
498 eq_token:=uuid_generate_v5(uuid_ns_provsql(),concat('eq',token,pos1,',',pos2));
499
500 PERFORM create_gate(eq_token, 'eq', ARRAY[token::UUID]);
501 PERFORM set_infos(eq_token, pos1, pos2);
502 RETURN eq_token;
503END
504$$ LANGUAGE plpgsql SET search_path=provsql,pg_temp,public SECURITY DEFINER PARALLEL SAFE IMMUTABLE;
505
506/**
507 * @brief Create a plus (sum) gate from an array of provenance tokens
508 *
509 * Filters out NULL and zero-gates; returns gate_zero() if all tokens
510 * are trivial, or a single token if only one remains.
511 */
512CREATE OR REPLACE FUNCTION provenance_plus(tokens UUID[])
513 RETURNS UUID AS
514$$
515DECLARE
516 c INTEGER;
517 plus_token UUID;
518 filtered_tokens UUID[];
519BEGIN
520 SELECT array_agg(t) FROM unnest(tokens) t
521 WHERE t IS NOT NULL AND t <> gate_zero()
522 INTO filtered_tokens;
523
524 c:=array_length(filtered_tokens, 1);
525
526 IF c = 0 THEN
527 plus_token := gate_zero();
528 ELSIF c = 1 THEN
529 plus_token := filtered_tokens[1];
530 ELSE
531 plus_token := uuid_generate_v5(
532 uuid_ns_provsql(),
533 concat('plus', filtered_tokens));
534
535 PERFORM create_gate(plus_token, 'plus', filtered_tokens);
536 END IF;
537
538 RETURN plus_token;
539END
540$$ LANGUAGE plpgsql STRICT SET search_path=provsql,pg_temp,public SECURITY DEFINER PARALLEL SAFE IMMUTABLE;
541
542/**
543 * @brief Create a comparison gate for HAVING clause provenance
545 * @param left_token provenance token for the left operand
546 * @param comparison_op OID of the comparison operator
547 * @param right_token provenance token for the right operand
548 */
549CREATE OR REPLACE FUNCTION provenance_cmp(
550 left_token UUID,
551 comparison_op OID,
552 right_token UUID
553)
554RETURNS UUID AS
555$$
556DECLARE
557 cmp_token UUID;
558BEGIN
559 -- deterministic v5 namespace id
560 cmp_token := public.uuid_generate_v5(
561 uuid_ns_provsql(),
562 concat('cmp', left_token::TEXT, comparison_op::TEXT, right_token::TEXT)
563 );
564 -- wire it up in the circuit
565 PERFORM create_gate(cmp_token, 'cmp', ARRAY[left_token, right_token]);
566 PERFORM set_infos(cmp_token, comparison_op::INTEGER);
567 RETURN cmp_token;
568END
569$$ LANGUAGE plpgsql
570 IMMUTABLE
571 PARALLEL SAFE
572 STRICT;
573
574/** @} */
575
576/** @defgroup semiring_evaluation Semiring evaluation
577 * Functions for evaluating provenance circuits over semirings,
578 * both user-defined (via function references) and compiled (built-in).
579 * @{
580 */
581
582/**
583 * @brief Evaluate provenance using a compiled (built-in) semiring
584 *
585 * This C function handles semiring evaluation entirely in C++ for
586 * better performance. The semiring is specified by name.
587 *
588 * @param token provenance token to evaluate
589 * @param token2value mapping table from tokens to semiring values
590 * @param semiring name of the compiled semiring (e.g., "formula", "counting")
591 * @param element_one identity element of the semiring
592 */
593CREATE OR REPLACE FUNCTION provenance_evaluate_compiled(
594 token UUID,
595 token2value REGCLASS,
596 semiring TEXT,
597 element_one ANYELEMENT)
598RETURNS ANYELEMENT AS
599 'provsql', 'provenance_evaluate_compiled' LANGUAGE C PARALLEL SAFE STABLE;
601
602/**
603 * @brief Evaluate provenance over a user-defined semiring (PL/pgSQL version)
604 *
605 * Recursively walks the provenance circuit and evaluates each gate
606 * using the provided semiring operations. This is the generic version
607 * that accepts semiring operations as function references.
608 *
609 * @param token provenance token to evaluate
610 * @param token2value mapping table from tokens to semiring values
611 * @param element_one identity element of the semiring
612 * @param value_type OID of the semiring value type
613 * @param plus_function semiring addition (aggregate)
614 * @param times_function semiring multiplication (aggregate)
615 * @param monus_function semiring monus (binary), or NULL
616 * @param delta_function δ-semiring operator, or NULL
617 */
618CREATE OR REPLACE FUNCTION provenance_evaluate(
619 token UUID,
620 token2value REGCLASS,
621 element_one ANYELEMENT,
622 value_type REGTYPE,
623 plus_function REGPROC,
624 times_function REGPROC,
625 monus_function REGPROC,
626 delta_function REGPROC)
627 RETURNS ANYELEMENT AS
629DECLARE
630 gate_type PROVENANCE_GATE;
631 result ALIAS FOR $0;
632 children UUID[];
633-- cmp_value ANYELEMENT;
634-- temp_result ANYELEMENT;
635 value_text TEXT;
636BEGIN
637 SELECT get_gate_type(token) INTO gate_type;
638
639 IF gate_type IS NULL THEN
640 RETURN NULL;
641
642 ELSIF gate_type = 'input' THEN
643 EXECUTE format('SELECT value FROM %s WHERE provenance=%L', token2value, token)
644 INTO result;
645 IF result IS NULL THEN
646 result := element_one;
647 END IF;
648 ELSIF gate_type = 'mulinput' THEN
649 SELECT concat('{',(get_children(token))[1]::TEXT,'=',(get_infos(token)).info1,'}')
650 INTO result;
651 ELSIF gate_type='update' THEN
652 EXECUTE format('SELECT value FROM %s WHERE provenance=%L',token2value,token) INTO result;
653 IF result IS NULL THEN
654 result:=element_one;
655 END IF;
656 ELSIF gate_type = 'plus' THEN
657 EXECUTE format('SELECT %s(provsql.provenance_evaluate(t,%L,%L::%s,%L,%L,%L,%L,%L)) FROM unnest(get_children(%L)) AS t',
658 plus_function, token2value, element_one, value_type, value_type, plus_function, times_function, monus_function, delta_function, token)
659 INTO result;
660
661 ELSIF gate_type = 'times' THEN
662 EXECUTE format('SELECT %s(provsql.provenance_evaluate(t,%L,%L::%s,%L,%L,%L,%L,%L)) FROM unnest(get_children(%L)) AS t',
663 times_function, token2value, element_one, value_type, value_type, plus_function, times_function, monus_function, delta_function, token)
664 INTO result;
665
666 ELSIF gate_type = 'monus' THEN
667 IF monus_function IS NULL THEN
668 RAISE EXCEPTION USING MESSAGE='Provenance with negation evaluated over a semiring without monus function';
669 ELSE
670 EXECUTE format('SELECT %s(a1,a2) FROM (SELECT provsql.provenance_evaluate(c[1],%L,%L::%s,%L,%L,%L,%L,%L) AS a1, ' ||
671 'provsql.provenance_evaluate(c[2],%L,%L::%s,%L,%L,%L,%L,%L) AS a2 FROM get_children(%L) c) tmp',
672 monus_function, token2value, element_one, value_type, value_type, plus_function, times_function, monus_function, delta_function,
673 token2value, element_one, value_type, value_type, plus_function, times_function, monus_function, delta_function, token)
674 INTO result;
675 END IF;
676
677 ELSIF gate_type = 'eq' THEN
678 EXECUTE format('SELECT provsql.provenance_evaluate((get_children(%L))[1],%L,%L::%s,%L,%L,%L,%L,%L)',
679 token, token2value, element_one, value_type, value_type, plus_function, times_function, monus_function, delta_function)
680 INTO result;
681
682/* elsif gate_type = 'cmp' then
683
684 EXECUTE format('SELECT provsql.provenance_evaluate((get_children(%L))[1],%L,%L::%s,%L,%L,%L,%L,%L)',
685 token, token2value, element_one, value_type, value_type, plus_function, times_function, monus_function, delta_function)
686 INTO temp_result;
687
688 EXECUTE format('SELECT get_extra((get_children(%L))[2])', token)
689 INTO cmp_value;
690
691 IF temp_result::TEXT = cmp_value::TEXT THEN
692 SELECT concat('{',temp_result::TEXT,'=',cmp_value::TEXT,'}')
693 INTO result;
694 ELSE
695 RETURN gate_zero()
696 */
697
698
699
700 ELSIF gate_type = 'delta' THEN
701 IF delta_function IS NULL THEN
702 RAISE EXCEPTION USING MESSAGE='Provenance with aggregation evaluated over a semiring without delta function';
703 ELSE
704 EXECUTE format('SELECT %I(a) FROM (SELECT provsql.provenance_evaluate((get_children(%L))[1],%L,%L::%s,%L,%L,%L,%L,%L) AS a) tmp',
705 delta_function, token, token2value, element_one, value_type, value_type, plus_function, times_function, monus_function, delta_function)
706 INTO result;
707 END IF;
708
709 ELSIF gate_type = 'zero' THEN
710 EXECUTE format('SELECT %I(a) FROM (SELECT %L::%I AS a WHERE FALSE) temp', plus_function, element_one, value_type)
711 INTO result;
712
713 ELSIF gate_type = 'one' THEN
714 EXECUTE format('SELECT %L::%I', element_one, value_type)
715 INTO result;
716
717 ELSIF gate_type = 'project' THEN
718 EXECUTE format('SELECT provsql.provenance_evaluate((get_children(%L))[1],%L,%L::%s,%L,%L,%L,%L,%L)',
719 token, token2value, element_one, value_type, value_type, plus_function, times_function, monus_function, delta_function)
720 INTO result;
721
722 ELSE
723 RAISE EXCEPTION USING MESSAGE='Unknown gate type';
724 END IF;
725
726 RETURN result;
727END
728$$ LANGUAGE plpgsql PARALLEL SAFE STABLE;
730
731/**
732 * @brief Evaluate aggregate provenance over a user-defined semiring (PL/pgSQL version)
733 *
734 * Handles agg and semimod gates produced by GROUP BY queries.
735 *
736 * @param token provenance token to evaluate
737 * @param token2value mapping table from tokens to semiring values
738 * @param agg_function_final finalization function for the aggregate
739 * @param agg_function aggregate combination function
740 * @param semimod_function semimodule scalar multiplication function
741 * @param element_one identity element of the semiring
742 * @param value_type OID of the semiring value type
743 * @param plus_function semiring addition
744 * @param times_function semiring multiplication
745 * @param monus_function semiring monus, or NULL
746 * @param delta_function δ-semiring operator, or NULL
747 */
748CREATE OR REPLACE FUNCTION aggregation_evaluate(
749 token UUID,
750 token2value REGCLASS,
751 agg_function_final REGPROC,
752 agg_function REGPROC,
753 semimod_function REGPROC,
754 element_one ANYELEMENT,
755 value_type REGTYPE,
756 plus_function REGPROC,
757 times_function REGPROC,
758 monus_function REGPROC,
759 delta_function REGPROC)
760 RETURNS ANYELEMENT AS
761$$
762DECLARE
763 gt PROVENANCE_GATE;
764 result ALIAS FOR $0;
765BEGIN
766 SELECT get_gate_type(token) INTO gt;
768 IF gt IS NULL THEN
769 RETURN NULL;
770 ELSIF gt='agg' THEN
771 EXECUTE format('SELECT %I(%I(provsql.aggregation_evaluate(t,%L,%L,%L,%L,%L::%s,%L,%L,%L,%L,%L)),pp.proname::varchar) FROM
772 unnest(get_children(%L)) AS t, pg_proc pp
773 WHERE pp.oid=(get_infos(%L)).info1
774 GROUP BY pp.proname',
775 agg_function_final, agg_function,token2value,agg_function_final,agg_function,semimod_function,element_one,value_type,value_type,plus_function,times_function,
776 monus_function,delta_function,token,token)
777 INTO result;
778 ELSE
779 -- gt='semimod'
780 EXECUTE format('SELECT %I(get_extra((get_children(%L))[2]),provsql.provenance_evaluate((get_children(%L))[1],%L,%L::%s,%L,%L,%L,%L,%L))',
781 semimod_function,token,token,token2value,element_one,value_type,value_type,plus_function,times_function,monus_function,delta_function)
782 INTO result;
783 END IF;
784 RETURN result;
785END
786$$ LANGUAGE plpgsql PARALLEL SAFE STABLE;
787
788/**
789 * @brief Evaluate provenance over a user-defined semiring (C version)
790 *
791 * Optimized C implementation of provenance_evaluate. Infers the
792 * value type from element_one. Monus and delta functions are optional.
793 *
794 * @param token provenance token to evaluate
795 * @param token2value mapping table from tokens to semiring values
796 * @param element_one identity element of the semiring
797 * @param plus_function semiring addition (aggregate)
798 * @param times_function semiring multiplication (aggregate)
799 * @param monus_function semiring monus, or NULL if not needed
800 * @param delta_function δ-semiring operator, or NULL if not needed
801 */
802CREATE OR REPLACE FUNCTION provenance_evaluate(
803 token UUID,
804 token2value REGCLASS,
805 element_one ANYELEMENT,
806 plus_function REGPROC,
807 times_function REGPROC,
808 monus_function REGPROC = NULL,
809 delta_function REGPROC = NULL)
810 RETURNS ANYELEMENT AS
811 'provsql','provenance_evaluate' LANGUAGE C STABLE;
812
813/** @brief Evaluate aggregate provenance over a user-defined semiring (C version) */
814CREATE OR REPLACE FUNCTION aggregation_evaluate(
815 token UUID,
816 token2value REGCLASS,
817 agg_function_final REGPROC,
818 agg_function REGPROC,
819 semimod_function REGPROC,
820 element_one ANYELEMENT,
821 plus_function REGPROC,
822 times_function REGPROC,
823 monus_function REGPROC = NULL,
824 delta_function REGPROC = NULL)
825 RETURNS ANYELEMENT AS
826 'provsql','aggregation_evaluate' LANGUAGE C STABLE;
828/** @} */
829
830/** @defgroup circuit_introspection Circuit introspection
831 * Functions for examining the structure of provenance circuits,
832 * used by visualization and where-provenance features.
833 * @{
834 */
835
836/** @brief Row type for sub_circuit_with_desc results */
837CREATE TYPE GATE_WITH_DESC AS (f UUID, t UUID, gate_type PROVENANCE_GATE, desc_str CHARACTER VARYING, infos INTEGER[], extra TEXT);
838
839/**
840 * @brief Return the sub-circuit reachable from a token, with descriptions
842 * Recursively traverses the provenance circuit from the given token and
843 * returns all edges together with input gate descriptions from the
844 * mapping table.
845 *
846 * @param token root provenance token
847 * @param token2desc mapping table providing descriptions for input gates
848 */
849CREATE OR REPLACE FUNCTION sub_circuit_with_desc(
850 token UUID,
851 token2desc REGCLASS) RETURNS SETOF GATE_WITH_DESC AS
853BEGIN
854 RETURN QUERY EXECUTE
855 'WITH RECURSIVE transitive_closure(f,t,gate_type) AS (
856 SELECT $1,t,provsql.get_gate_type($1) FROM unnest(provsql.get_children($1)) AS t
857 UNION ALL
858 SELECT p1.t,u,provsql.get_gate_type(p1.t) FROM transitive_closure p1, unnest(provsql.get_children(p1.t)) AS u)
859 SELECT *, ARRAY[(get_infos(f)).info1, (get_infos(f)).info2], get_extra(f) FROM (
860 SELECT f::UUID,t::UUID,gate_type,NULL FROM transitive_closure
861 UNION ALL
862 SELECT p2.provenance::UUID as f, NULL::UUID, ''input'', CAST (p2.value AS varchar) FROM transitive_closure p1 JOIN ' || token2desc || ' AS p2
863 ON p2.provenance=t
864 UNION ALL
865 SELECT provenance::UUID as f, NULL::UUID, ''input'', CAST (value AS varchar) FROM ' || token2desc || ' WHERE provenance=$1
866 ) t'
867 USING token LOOP;
868 RETURN;
869END
870$$ LANGUAGE plpgsql PARALLEL SAFE;
871
872/**
873 * @brief Identify which table and how many columns a provenance token belongs to
874 *
875 * Searches all provenance-tracked tables for a row matching the given
876 * token and returns the table name and column count.
878 * @param token provenance token to look up
879 * @param table_name (OUT) the table containing this token
880 * @param nb_columns (OUT) number of non-provenance columns in that table
881 */
882CREATE OR REPLACE FUNCTION identify_token(
883 token UUID, OUT table_name REGCLASS, OUT nb_columns INTEGER) AS
884$$
885DECLARE
886 t RECORD;
887 result RECORD;
888BEGIN
889 table_name:=NULL;
890 nb_columns:=-1;
891 FOR t IN
892 SELECT relname,
893 (SELECT count(*) FROM pg_attribute a2 WHERE a2.attrelid=a1.attrelid AND attnum>0 AND atttypid<>0)-1 c
894 FROM pg_attribute a1 JOIN pg_type ON atttypid=pg_type.oid
895 JOIN pg_class ON attrelid=pg_class.oid
896 JOIN pg_namespace ON relnamespace=pg_namespace.oid
897 WHERE typname='UUID' AND relkind='r'
898 AND nspname<>'provsql'
899 AND attname='provsql'
900 LOOP
901 EXECUTE format('SELECT * FROM %I WHERE provsql=%L',t.relname,token) INTO result;
902 IF result IS NOT NULL THEN
903 table_name:=t.relname;
904 nb_columns:=t.c;
905 EXIT;
906 END IF;
907 END LOOP;
908END
909$$ LANGUAGE plpgsql STRICT;
910
911/**
912 * @brief Return the sub-circuit for where-provenance computation
913 *
914 * Similar to sub_circuit_with_desc but resolves input gates to their
915 * source table and column count for where-provenance evaluation.
916 */
917CREATE OR REPLACE FUNCTION sub_circuit_for_where(token UUID)
918 RETURNS TABLE(f UUID, t UUID, gate_type PROVENANCE_GATE, table_name REGCLASS, nb_columns INTEGER, infos INTEGER[], extra TEXT) AS
919$$
920 WITH RECURSIVE transitive_closure(f,t,idx,gate_type) AS (
921 SELECT $1,t,id,provsql.get_gate_type($1) FROM unnest(provsql.get_children($1)) WITH ORDINALITY AS a(t,id)
922 UNION ALL
923 SELECT p1.t,u,id,provsql.get_gate_type(p1.t) FROM transitive_closure p1, unnest(provsql.get_children(p1.t)) WITH ORDINALITY AS a(u, id)
924 ) SELECT f, t, gate_type, table_name, nb_columns, ARRAY[(get_infos(f)).info1, (get_infos(f)).info2], get_extra(f) FROM (
925 SELECT f, t::UUID, idx, gate_type, NULL AS table_name, NULL AS nb_columns FROM transitive_closure
926 UNION ALL
927 SELECT DISTINCT t, NULL::UUID, NULL::INT, 'input'::PROVENANCE_GATE, (id).table_name, (id).nb_columns FROM transitive_closure JOIN (SELECT t AS prov, provsql.identify_token(t) as id FROM transitive_closure WHERE t NOT IN (SELECT f FROM transitive_closure)) temp ON t=prov
928 UNION ALL
929 SELECT DISTINCT $1, NULL::UUID, NULL::INT, 'input'::PROVENANCE_GATE, (id).table_name, (id).nb_columns FROM (SELECT provsql.identify_token($1) AS id WHERE $1 NOT IN (SELECT f FROM transitive_closure)) temp
930 ) t
931$$
932LANGUAGE sql;
933
934/** @} */
935
936/** @defgroup agg_token_type Type for the result of aggregate queries
937 *
938 * Custom type <tt>AGG_TOKEN</tt> for a provenance semimodule value, to
939 * be used in attributes that are computed as a result of aggregation.
940 * As for provenance tokens, this is simply a UUID, but this UUID is
941 * displayed in a specific way (as the result of the aggregation
942 * followed by a "(*)") to help with readability.
943 *
944 * @{
945 */
946
947CREATE TYPE AGG_TOKEN;
948
949/** @brief Input function for the AGG_TOKEN type (parses TEXT representation) */
950CREATE OR REPLACE FUNCTION agg_token_in(CSTRING)
951 RETURNS AGG_TOKEN
952 AS 'provsql','agg_token_in' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
953
954/** @brief Output function for the AGG_TOKEN type (produces TEXT representation) */
955CREATE OR REPLACE FUNCTION agg_token_out(AGG_TOKEN)
956 RETURNS CSTRING
957 AS 'provsql','agg_token_out' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
958
959/** @brief Cast an AGG_TOKEN to its TEXT representation */
960CREATE OR REPLACE FUNCTION agg_token_cast(AGG_TOKEN)
961 RETURNS TEXT
962 AS 'provsql','agg_token_cast' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
963
964CREATE TYPE AGG_TOKEN (
965 internallength = 117,
966 input = agg_token_in,
967 output = agg_token_out,
968 alignment = char
969);
970
971/** @brief Extract the UUID from an AGG_TOKEN (implicit cast to UUID) */
972CREATE OR REPLACE FUNCTION agg_token_uuid(aggtok AGG_TOKEN)
973 RETURNS UUID AS
974$$
975BEGIN
976 RETURN agg_token_cast(aggtok)::UUID;
977END
978$$ LANGUAGE plpgsql STRICT SET search_path=provsql,pg_temp,public SECURITY DEFINER IMMUTABLE PARALLEL SAFE;
979
980/** @brief Implicit PostgreSQL cast from AGG_TOKEN to UUID (delegates to agg_token_uuid()) */
981CREATE CAST (AGG_TOKEN AS UUID) WITH FUNCTION agg_token_uuid(AGG_TOKEN) AS IMPLICIT;
982
983/**
984 * @brief Placeholder comparison of AGG_TOKEN with NUMERIC
985 *
986 * This function is never actually called; it exists so the SQL parser
987 * accepts comparison operators between AGG_TOKEN and NUMERIC values.
988 * The ProvSQL query rewriter replaces these comparisons at plan time.
989 */
990CREATE OR REPLACE FUNCTION agg_token_comp_numeric(a AGG_TOKEN, b NUMERIC)
991RETURNS BOOLEAN
992LANGUAGE plpgsql
993IMMUTABLE STRICT PARALLEL SAFE
994AS $$
995BEGIN
996 RAISE EXCEPTION 'Comparison AGG_TOKEN-NUMERIC not implemented, should be replaced by ProvSQL behavior';
997END;
998$$;
999
1000/**
1001 * @brief Placeholder comparison of NUMERIC with AGG_TOKEN
1002 *
1003 * Symmetric to agg_token_comp_numeric; never actually called.
1004 * The ProvSQL query rewriter replaces these comparisons at plan time.
1005 */
1006CREATE OR REPLACE FUNCTION numeric_comp_agg_token(a NUMERIC, b AGG_TOKEN)
1007RETURNS BOOLEAN
1008LANGUAGE plpgsql
1009IMMUTABLE STRICT PARALLEL SAFE
1010AS $$
1011BEGIN
1012 RAISE EXCEPTION 'Comparison NUMERIC-AGG_TOKEN not implemented, should be replaced by ProvSQL behavior';
1013END;
1014$$;
1015
1016/** @brief SQL operator AGG_TOKEN < NUMERIC (placeholder rewritten by ProvSQL at plan time) */
1017CREATE OPERATOR < (
1018 LEFTARG = AGG_TOKEN,
1019 RIGHTARG = NUMERIC,
1020 PROCEDURE = agg_token_comp_numeric,
1021 COMMUTATOR = >,
1022 NEGATOR = >=
1023);
1024/** @brief SQL operator NUMERIC < AGG_TOKEN (placeholder rewritten by ProvSQL at plan time) */
1025CREATE OPERATOR < (
1026 LEFTARG = NUMERIC,
1027 RIGHTARG = AGG_TOKEN,
1028 PROCEDURE = numeric_comp_agg_token,
1029 COMMUTATOR = >,
1030 NEGATOR = >=
1032
1033/** @brief SQL operator AGG_TOKEN <= NUMERIC (placeholder rewritten by ProvSQL at plan time) */
1034CREATE OPERATOR <= (
1035 LEFTARG = AGG_TOKEN,
1036 RIGHTARG = NUMERIC,
1037 PROCEDURE = agg_token_comp_numeric,
1038 COMMUTATOR = >=,
1039 NEGATOR = >
1040);
1041/** @brief SQL operator NUMERIC <= AGG_TOKEN (placeholder rewritten by ProvSQL at plan time) */
1042CREATE OPERATOR <= (
1043 LEFTARG = NUMERIC,
1044 RIGHTARG = AGG_TOKEN,
1045 PROCEDURE = numeric_comp_agg_token,
1046 COMMUTATOR = >=,
1047 NEGATOR = >
1048);
1049
1050/** @brief SQL operator AGG_TOKEN = NUMERIC (placeholder rewritten by ProvSQL at plan time) */
1051CREATE OPERATOR = (
1052 LEFTARG = AGG_TOKEN,
1053 RIGHTARG = NUMERIC,
1054 PROCEDURE = agg_token_comp_numeric,
1055 COMMUTATOR = =,
1056 NEGATOR = <>
1057);
1058/** @brief SQL operator NUMERIC = AGG_TOKEN (placeholder rewritten by ProvSQL at plan time) */
1059CREATE OPERATOR = (
1060 LEFTARG = NUMERIC,
1061 RIGHTARG = AGG_TOKEN,
1062 PROCEDURE = numeric_comp_agg_token,
1063 COMMUTATOR = =,
1064 NEGATOR = <>
1065);
1066
1067/** @brief SQL operator AGG_TOKEN <> NUMERIC (placeholder rewritten by ProvSQL at plan time) */
1068CREATE OPERATOR <> (
1069 LEFTARG = AGG_TOKEN,
1070 RIGHTARG = NUMERIC,
1071 PROCEDURE = agg_token_comp_numeric,
1072 COMMUTATOR = <>,
1073 NEGATOR = =
1074);
1075/** @brief SQL operator NUMERIC <> AGG_TOKEN (placeholder rewritten by ProvSQL at plan time) */
1076CREATE OPERATOR <> (
1077 LEFTARG = NUMERIC,
1078 RIGHTARG = AGG_TOKEN,
1079 PROCEDURE = numeric_comp_agg_token,
1080 COMMUTATOR = <>,
1081 NEGATOR = =
1082);
1084/** @brief SQL operator AGG_TOKEN >= NUMERIC (placeholder rewritten by ProvSQL at plan time) */
1085CREATE OPERATOR >= (
1086 LEFTARG = AGG_TOKEN,
1087 RIGHTARG = NUMERIC,
1088 PROCEDURE = agg_token_comp_numeric,
1089 COMMUTATOR = <=,
1090 NEGATOR = <
1091);
1092/** @brief SQL operator NUMERIC >= AGG_TOKEN (placeholder rewritten by ProvSQL at plan time) */
1093CREATE OPERATOR >= (
1094 LEFTARG = NUMERIC,
1095 RIGHTARG = AGG_TOKEN,
1096 PROCEDURE = numeric_comp_agg_token,
1097 COMMUTATOR = <=,
1098 NEGATOR = <
1099);
1100
1101/** @brief SQL operator AGG_TOKEN > NUMERIC (placeholder rewritten by ProvSQL at plan time) */
1102CREATE OPERATOR > (
1103 LEFTARG = AGG_TOKEN,
1104 RIGHTARG = NUMERIC,
1105 PROCEDURE = agg_token_comp_numeric,
1106 COMMUTATOR = <,
1107 NEGATOR = <=
1108);
1109/** @brief SQL operator NUMERIC > AGG_TOKEN (placeholder rewritten by ProvSQL at plan time) */
1110CREATE OPERATOR > (
1111 LEFTARG = NUMERIC,
1112 RIGHTARG = AGG_TOKEN,
1113 PROCEDURE = numeric_comp_agg_token,
1114 COMMUTATOR = <,
1115 NEGATOR = <=
1116);
1117
1118/** @} */
1119
1120/** @defgroup aggregate_provenance Aggregate provenance
1121 * Functions for building and evaluating aggregate (GROUP BY) provenance,
1122 * including the δ-semiring operator and semimodule multiplication.
1123 * @{
1124 */
1125
1127 * @brief Create a δ-semiring gate wrapping a provenance token
1128 *
1129 * Used internally for aggregate provenance. Returns the token unchanged
1130 * if it is gate_zero() or gate_one(), and gate_one() if the token is NULL.
1131 */
1132CREATE OR REPLACE FUNCTION provenance_delta
1133 (token UUID)
1134 RETURNS UUID AS
1135$$
1136DECLARE
1137 delta_token UUID;
1138BEGIN
1139 IF token = gate_zero() OR token = gate_one() THEN
1140 return token;
1141 END IF;
1142
1143 IF token IS NULL THEN
1144 return gate_one();
1145 END IF;
1146
1147 delta_token:=uuid_generate_v5(uuid_ns_provsql(),concat('delta',token));
1148
1149 PERFORM create_gate(delta_token,'delta',ARRAY[token::UUID]);
1150
1151 RETURN delta_token;
1152END
1153$$ LANGUAGE plpgsql SET search_path=provsql,pg_temp,public SECURITY DEFINER PARALLEL SAFE;
1154
1156 * @brief Build an aggregate provenance gate from grouped tokens
1157 *
1158 * Called internally by the query rewriter for GROUP BY queries.
1159 * Creates an agg gate linking all contributing tokens and records
1160 * the aggregate function OID and the computed scalar value.
1161 *
1162 * @param aggfnoid OID of the SQL aggregate function
1163 * @param aggtype OID of the aggregate result type
1164 * @param val computed aggregate value
1165 * @param tokens array of provenance tokens being aggregated
1166 */
1167CREATE OR REPLACE FUNCTION provenance_aggregate(
1168 aggfnoid INTEGER,
1169 aggtype INTEGER,
1170 val ANYELEMENT,
1171 tokens UUID[])
1172 RETURNS AGG_TOKEN AS
1173$$
1174DECLARE
1175 c INTEGER;
1176 agg_tok UUID;
1177 agg_val varchar;
1178BEGIN
1179 c:=array_length(tokens, 1);
1180
1181 agg_val = CAST(val as VARCHAR);
1182
1183 IF c = 0 THEN
1184 agg_tok := gate_zero();
1185 ELSE
1186 agg_tok := uuid_generate_v5(
1187 uuid_ns_provsql(),
1188 concat('agg',tokens));
1189 PERFORM create_gate(agg_tok, 'agg', tokens);
1190 PERFORM set_infos(agg_tok, aggfnoid, aggtype);
1191 PERFORM set_extra(agg_tok, agg_val);
1192 END IF;
1193
1194 RETURN '( '||agg_tok||' , '||agg_val||' )';
1195END
1196$$ LANGUAGE plpgsql PARALLEL SAFE STRICT SET search_path=provsql,pg_temp,public SECURITY DEFINER;
1197
1198/**
1199 * @brief Create a semimodule scalar multiplication gate
1200 *
1201 * Pairs a scalar value with a provenance token, used internally by
1202 * the query rewriter for aggregate provenance.
1203 *
1204 * @param val the scalar value
1205 * @param token the provenance token to multiply
1206 */
1207CREATE OR REPLACE FUNCTION provenance_semimod(val ANYELEMENT, token UUID)
1208 RETURNS UUID AS
1209$$
1210DECLARE
1211 semimod_token UUID;
1212 value_token UUID;
1213BEGIN
1214 SELECT uuid_generate_v5(uuid_ns_provsql(),concat('value',CAST(val AS VARCHAR)))
1215 INTO value_token;
1216 SELECT uuid_generate_v5(uuid_ns_provsql(),concat('semimod',value_token,token))
1217 INTO semimod_token;
1218
1219 --create value gates
1220 PERFORM create_gate(value_token,'value');
1221 PERFORM set_extra(value_token, CAST(val AS VARCHAR));
1222
1223 --create semimod gate
1224 PERFORM create_gate(semimod_token,'semimod',ARRAY[token::UUID,value_token]);
1225
1226 RETURN semimod_token;
1227END
1228$$ LANGUAGE plpgsql PARALLEL SAFE SET search_path=provsql,pg_temp,public SECURITY DEFINER;
1229
1230/** @} */
1231
1232/** @defgroup probability Probability and Shapley values
1233 * Functions for computing probabilities, expected values, and
1234 * game-theoretic contribution measures (Shapley/Banzhaf values)
1235 * from provenance circuits.
1236 * @{
1237 */
1238
1239/**
1240 * @brief Compute the probability of a provenance token
1241 *
1242 * Compiles the provenance circuit to d-DNNF and evaluates the
1243 * probability. The compilation method can be selected explicitly.
1244 *
1245 * @param token provenance token to evaluate
1246 * @param method knowledge compilation method (NULL for default)
1247 * @param arguments additional arguments for the method
1248 */
1249CREATE OR REPLACE FUNCTION probability_evaluate(
1250 token UUID,
1251 method TEXT = NULL,
1252 arguments TEXT = NULL)
1253 RETURNS DOUBLE PRECISION AS
1254 'provsql','probability_evaluate' LANGUAGE C STABLE;
1255
1256/**
1257 * @brief Compute the expected value of an aggregate expression
1258 *
1259 * Computes E[input | prov], the expected value of an aggregate result
1260 * conditioned on a provenance expression. Supports SUM, MIN, and MAX
1261 * aggregation functions.
1262 *
1263 * @param input aggregate expression (AGG_TOKEN) to compute the expected value of
1264 * @param prov provenance condition (defaults to gate_one(), i.e., unconditional)
1265 * @param method knowledge compilation method
1266 * @param arguments additional arguments for the method
1267 */
1268CREATE OR REPLACE FUNCTION expected(
1269 input ANYELEMENT,
1270 prov UUID = gate_one(),
1271 method TEXT = NULL,
1272 arguments TEXT = NULL)
1273 RETURNS DOUBLE PRECISION AS $$
1274DECLARE
1275 aggregation_function VARCHAR;
1276 token AGG_TOKEN;
1277 result DOUBLE PRECISION;
1278 total_probability DOUBLE PRECISION;
1279BEGIN
1280 token := input::AGG_TOKEN;
1281 IF token IS NULL THEN
1282 RETURN NULL;
1283 END IF;
1284 IF get_gate_type(token) <> 'agg' THEN
1285 RAISE EXCEPTION USING MESSAGE='Wrong gate type for expected value computation';
1286 END IF;
1287 SELECT pp.proname::varchar FROM pg_proc pp WHERE oid=(get_infos(token)).info1 INTO aggregation_function;
1288 IF aggregation_function = 'sum' THEN
1289 -- Expected value and summation operators commute
1290 SELECT SUM(probability_evaluate((get_children(c))[1], method, arguments) * CAST(get_extra((get_children(c))[2]) AS DOUBLE PRECISION))
1291 FROM UNNEST(get_children(token)) AS c INTO result;
1292 ELSIF aggregation_function = 'min' OR aggregation_function = 'max' THEN
1293 -- The entire distribution is of linear size, can be easily computed
1294 WITH tok_value AS (
1295 SELECT (get_children(c))[1] AS tok, (CASE WHEN aggregation_function='max' THEN -1 ELSE 1 END) * CAST(get_extra((get_children(c))[2]) AS DOUBLE PRECISION) AS v
1296 FROM UNNEST(get_children(token)) AS c
1297 ) SELECT probability_evaluate(provenance_monus(prov, provenance_plus(ARRAY_AGG(tok)))) FROM tok_value INTO total_probability;
1298 IF total_probability > epsilon() THEN
1299 result := (CASE WHEN aggregation_function='max' THEN -1 ELSE 1 END) * CAST('Infinity' AS DOUBLE PRECISION);
1300 ELSE
1301 WITH tok_value AS (
1302 SELECT (get_children(c))[1] AS tok, (CASE WHEN aggregation_function='max' THEN -1 ELSE 1 END) * CAST(get_extra((get_children(c))[2]) AS DOUBLE PRECISION) AS v
1303 FROM UNNEST(get_children(token)) AS c
1304 ) SELECT
1305 (CASE WHEN aggregation_function='max' THEN -1 ELSE 1 END) * SUM(p*v) FROM
1306 (SELECT t1.v AS v, probability_evaluate(provenance_monus(provenance_plus(ARRAY_AGG(t1.tok)),provenance_plus(ARRAY_AGG(t2.tok))), method, arguments) AS p
1307 FROM tok_value t1 LEFT OUTER JOIN tok_value t2 ON t1.v > t2.v
1308 GROUP BY t1.v) t INTO result;
1309 END IF;
1310 ELSE
1311 RAISE EXCEPTION USING MESSAGE='Cannot compute expected value for aggregation function ' || aggregation_function;
1312 END IF;
1313 IF prov <> gate_one() AND result <> 0. AND result <> 'Infinity' AND result <> '-Infinity' THEN
1314 result := result/probability_evaluate(prov, method, arguments);
1315 END IF;
1316 RETURN result;
1317END
1318$$ LANGUAGE plpgsql PARALLEL SAFE SET search_path=provsql SECURITY DEFINER;;
1319
1320/**
1321 * @brief Compute the Shapley value of an input variable
1322 *
1323 * Measures the contribution of a specific input variable to the
1324 * truth of a provenance expression, using game-theoretic Shapley values.
1325 *
1326 * @param token provenance token to evaluate
1327 * @param variable UUID of the input variable
1328 * @param method knowledge compilation method
1329 * @param arguments additional arguments for the method
1330 * @param banzhaf if true, compute the Banzhaf value instead
1331 */
1332CREATE OR REPLACE FUNCTION shapley(
1333 token UUID,
1334 variable UUID,
1335 method TEXT = NULL,
1336 arguments TEXT = NULL,
1337 banzhaf BOOLEAN = 'f')
1338 RETURNS DOUBLE PRECISION AS
1339 'provsql','shapley' LANGUAGE C STABLE;
1340
1341/** @brief Compute Shapley values for all input variables at once */
1342CREATE OR REPLACE FUNCTION shapley_all_vars(
1343 IN token UUID,
1344 IN method TEXT = NULL,
1345 IN arguments TEXT = NULL,
1346 IN banzhaf BOOLEAN = 'f',
1347 OUT variable UUID,
1348 OUT value DOUBLE PRECISION)
1349 RETURNS SETOF RECORD AS
1350 'provsql', 'shapley_all_vars'
1351 LANGUAGE C STABLE;
1352
1353/** @brief Compute the Banzhaf power index of an input variable */
1354CREATE OR REPLACE FUNCTION banzhaf(
1355 token UUID,
1356 variable UUID,
1357 method TEXT = NULL,
1358 arguments TEXT = NULL)
1359 RETURNS DOUBLE PRECISION AS
1360 $$ SELECT provsql.shapley(token, variable, method, arguments, 't') $$
1361 LANGUAGE SQL;
1362
1363/** @brief Compute Banzhaf power indices for all input variables at once */
1364CREATE OR REPLACE FUNCTION banzhaf_all_vars(
1365 IN token UUID,
1366 IN method TEXT = NULL,
1367 IN arguments TEXT = NULL,
1368 OUT variable UUID,
1369 OUT value DOUBLE PRECISION)
1370 RETURNS SETOF RECORD AS
1371 $$ SELECT * FROM provsql.shapley_all_vars(token, method, arguments, 't') $$
1372 LANGUAGE SQL;
1373
1374/** @} */
1375
1376/** @defgroup provenance_output Provenance output
1377 * Functions for visualizing and exporting provenance circuits
1378 * in various formats.
1379 * @{
1380 */
1381
1382/**
1383 * @brief Return a DOT or TEXT visualization of the provenance circuit
1384 *
1385 * @param token root provenance token
1386 * @param token2desc mapping table for gate descriptions
1387 * @param dbg debug level (0 = normal)
1388 */
1389CREATE OR REPLACE FUNCTION view_circuit(
1390 token UUID,
1391 token2desc REGCLASS,
1392 dbg INT = 0)
1393 RETURNS TEXT AS
1394 'provsql','view_circuit' LANGUAGE C;
1395
1396/**
1397 * @brief Return an XML representation of the provenance circuit
1398 *
1399 * @param token root provenance token
1400 * @param token2desc optional mapping table for gate descriptions
1401 */
1402CREATE OR REPLACE FUNCTION to_provxml(
1403 token UUID,
1404 token2desc REGCLASS = NULL)
1405 RETURNS TEXT AS
1406 'provsql','to_provxml' LANGUAGE C;
1407
1408/** @brief Return the provenance token of the current query result tuple */
1409CREATE OR REPLACE FUNCTION provenance() RETURNS UUID AS
1410 'provsql', 'provenance' LANGUAGE C;
1411
1412/**
1413 * @brief Compute where-provenance for a result tuple
1414 *
1415 * Returns a TEXT representation showing which input columns
1416 * contributed to each output column.
1417 */
1418CREATE OR REPLACE FUNCTION where_provenance(token UUID)
1419 RETURNS TEXT AS
1420 'provsql','where_provenance' LANGUAGE C;
1421
1422/** @} */
1423
1424/** @defgroup circuit_init Circuit initialization
1425 * Functions and statements executed at extension load time to
1426 * reset internal caches and create the constant zero/one gates.
1427 * @{
1428 */
1429
1430/** @brief Reset the internal cache of OID constants used by the query rewriter */
1431CREATE OR REPLACE FUNCTION reset_constants_cache()
1432 RETURNS VOID AS
1433 'provsql', 'reset_constants_cache' LANGUAGE C;
1434
1435SELECT reset_constants_cache();
1436
1437SELECT create_gate(gate_zero(), 'zero');
1438SELECT create_gate(gate_one(), 'one');
1439
1440/** @} */
1441
1442/** @brief Types of update operations tracked for temporal provenance */
1443CREATE TYPE QUERY_TYPE_ENUM AS ENUM (
1444 'INSERT', -- Row was inserted
1445 'DELETE', -- Row was deleted
1446 'UPDATE', -- Row was updated
1447 'UNDO' -- Previous operation was undone
1448 );
1449
1450/** @defgroup compiled_semirings Compiled semirings
1451 * Definitions of compiled semirings
1452 * @{
1453 */
1454
1455/** @brief Evaluate provenance as a symbolic formula (e.g., "a ⊗ b ⊕ c") */
1456CREATE FUNCTION sr_formula(token ANYELEMENT, token2value REGCLASS)
1457 RETURNS VARCHAR AS
1458$$
1459BEGIN
1460 RETURN provsql.provenance_evaluate_compiled(
1461 token,
1462 token2value,
1463 'formula',
1464 '𝟙'::VARCHAR
1465 );
1466END
1467$$ LANGUAGE plpgsql STRICT PARALLEL SAFE STABLE;
1468
1469/** @brief Evaluate provenance over the counting semiring (ℕ) */
1470CREATE FUNCTION sr_counting(token ANYELEMENT, token2value REGCLASS)
1471 RETURNS INT AS
1472$$
1473BEGIN
1474 RETURN provsql.provenance_evaluate_compiled(
1475 token,
1476 token2value,
1477 'counting',
1478 1
1479 );
1480END
1481$$ LANGUAGE plpgsql STRICT PARALLEL SAFE STABLE;
1482
1483/** @brief Evaluate provenance as why-provenance (set of witness sets) */
1484CREATE FUNCTION sr_why(token ANYELEMENT, token2value REGCLASS)
1485 RETURNS VARCHAR AS
1486$$
1487BEGIN
1488 RETURN provsql.provenance_evaluate_compiled(
1489 token,
1490 token2value,
1491 'why',
1492 '{}'::VARCHAR
1493 );
1494END
1495$$ LANGUAGE plpgsql STRICT PARALLEL SAFE STABLE;
1496
1497/** @brief Evaluate provenance as a Boolean expression */
1498CREATE FUNCTION sr_boolexpr(token ANYELEMENT)
1499 RETURNS VARCHAR AS
1500$$
1501BEGIN
1502 RETURN provsql.provenance_evaluate_compiled(
1503 token,
1504 NULL,
1505 'boolexpr',
1506 '⊤'::VARCHAR
1507 );
1508END
1509$$ LANGUAGE plpgsql STRICT PARALLEL SAFE STABLE;
1510
1511/** @brief Evaluate provenance over the Boolean semiring (true/false) */
1512CREATE FUNCTION sr_boolean(token ANYELEMENT, token2value REGCLASS)
1513 RETURNS BOOLEAN AS
1514$$
1515BEGIN
1516 RETURN provsql.provenance_evaluate_compiled(
1517 token,
1518 token2value,
1519 'BOOLEAN',
1520 TRUE
1521 );
1522END
1523$$ LANGUAGE plpgsql STRICT PARALLEL SAFE STABLE;
1524
1525/** @} */
1526
1527/** @defgroup choose_aggregate choose aggregate
1528 * Choose one value among many, used in particular to code a mutually
1529 * exclusive choice as an aggregate.
1530 * @{
1531 */
1532
1533/** @brief Transition function for the choose aggregate (keeps first non-NULL value) */
1534CREATE FUNCTION choose_function(state ANYELEMENT, data ANYELEMENT)
1535 RETURNS ANYELEMENT AS
1536$$
1537BEGIN
1538 IF state IS NULL THEN
1539 RETURN data;
1540 ELSE
1541 RETURN state;
1542 END IF;
1543END
1544$$ LANGUAGE plpgsql PARALLEL SAFE IMMUTABLE;
1545
1546/** @brief Aggregate that returns an arbitrary non-NULL value from a group */
1547CREATE AGGREGATE choose(ANYELEMENT) (
1548 SFUNC = choose_function,
1549 STYPE = ANYELEMENT
1550);
1551
1552/** @} */
1553
1554GRANT USAGE ON SCHEMA provsql TO PUBLIC;
1555
1556SET search_path TO public;
1557SET search_path TO provsql;
1558
1559/** @defgroup update_provenance Update provenance (PostgreSQL 14+)
1560 * Extended provenance tracking for INSERT, UPDATE, DELETE, and UNDO
1561 * operations, including temporal validity ranges.
1562 * @{
1563 */
1564
1565/**
1566 * @brief Table recording the history of INSERT, UPDATE, DELETE, and UNDO operations
1567 *
1568 * Each row records one provenance-tracked modification, linking the
1569 * operation's provenance token to metadata (query TEXT, type, user,
1570 * TIMESTAMP) and the temporal validity range of the affected rows.
1571 */
1572CREATE TABLE update_provenance (
1573 provsql UUID,
1574 query TEXT,
1575 query_type QUERY_TYPE_ENUM,
1576 username TEXT,
1577 ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1578 valid_time TSTZMULTIRANGE DEFAULT TSTZMULTIRANGE(tstzrange(CURRENT_TIMESTAMP, NULL))
1579);
1580
1581/** @cond INTERNAL */
1582/* Enable provenance tracking on an existing table (PostgreSQL 14+ version).
1583 * Overrides the common version; documented via add_provenance in provsql.common.sql. */
1584CREATE OR REPLACE FUNCTION add_provenance(_tbl REGCLASS)
1585 RETURNS VOID AS
1586$$
1587BEGIN
1588 EXECUTE format('ALTER TABLE %s ADD COLUMN provsql UUID UNIQUE DEFAULT public.uuid_generate_v4()', _tbl);
1589 EXECUTE format('SELECT provsql.create_gate(provsql, ''input'') FROM %s', _tbl);
1590 EXECUTE format('CREATE TRIGGER add_gate BEFORE INSERT ON %s FOR EACH ROW EXECUTE PROCEDURE provsql.add_gate_trigger()',_tbl);
1591
1592 EXECUTE format('CREATE TRIGGER insert_statement AFTER INSERT ON %s REFERENCING NEW TABLE AS NEW_TABLE FOR EACH STATEMENT EXECUTE PROCEDURE provsql.insert_statement_trigger()', _tbl);
1593 EXECUTE format('CREATE TRIGGER delete_statement AFTER DELETE ON %s REFERENCING OLD TABLE AS OLD_TABLE FOR EACH STATEMENT EXECUTE PROCEDURE provsql.delete_statement_trigger()', _tbl);
1594 EXECUTE format('CREATE TRIGGER update_statement AFTER UPDATE ON %s REFERENCING OLD TABLE AS OLD_TABLE NEW TABLE AS NEW_TABLE FOR EACH STATEMENT EXECUTE PROCEDURE provsql.update_statement_trigger()', _tbl);
1595
1596END
1597$$ LANGUAGE plpgsql SECURITY DEFINER;
1598/** @endcond */
1599
1600/** @cond INTERNAL */
1601/* Trigger function for DELETE statement provenance tracking (PostgreSQL 14+).
1602 * Overrides the common version; documented via delete_statement_trigger in provsql.common.sql. */
1603CREATE OR REPLACE FUNCTION delete_statement_trigger()
1604 RETURNS TRIGGER AS
1605$$
1606DECLARE
1607 query_text TEXT;
1608 delete_token UUID;
1609 old_token UUID;
1610 new_token UUID;
1611 r RECORD;
1612 enable_trigger BOOL;
1613BEGIN
1614 enable_trigger := current_setting('provsql.update_provenance', true);
1615 IF enable_trigger = 'f' THEN
1616 RETURN NULL;
1617 END IF;
1618 delete_token := public.uuid_generate_v4();
1619
1620 PERFORM create_gate(delete_token, 'update');
1621
1622 SELECT query
1623 INTO query_text
1624 FROM pg_stat_activity
1625 WHERE pid = pg_backend_pid();
1626
1627 INSERT INTO update_provenance (provsql, query, query_type, username, ts, valid_time)
1628 VALUES (delete_token, query_text, 'DELETE', current_user, CURRENT_TIMESTAMP, TSTZMULTIRANGE(tstzrange(CURRENT_TIMESTAMP, NULL)));
1629
1630 PERFORM set_config('provsql.update_provenance', 'off', false);
1631 EXECUTE format('INSERT INTO %I.%I SELECT * FROM OLD_TABLE;', TG_TABLE_SCHEMA, TG_TABLE_NAME);
1632 PERFORM set_config('provsql.update_provenance', 'on', false);
1633
1634 FOR r IN (SELECT * FROM OLD_TABLE) LOOP
1635 old_token := r.provsql;
1636 new_token := provenance_monus(old_token, delete_token);
1637
1638 PERFORM set_config('provsql.update_provenance', 'off', false);
1639 EXECUTE format('UPDATE %I.%I SET provsql = $1 WHERE provsql = $2;', TG_TABLE_SCHEMA, TG_TABLE_NAME)
1640 USING new_token, old_token;
1641 PERFORM set_config('provsql.update_provenance', 'on', false);
1642 END LOOP;
1643
1644 RETURN NULL;
1645END
1646$$ LANGUAGE plpgsql SET search_path=provsql,pg_temp SECURITY DEFINER;
1647/** @endcond */
1648
1649/**
1650 * @brief Trigger function for INSERT statement provenance tracking
1651 *
1652 * Records the insertion in update_provenance and multiplies provenance
1653 * tokens of inserted rows with the insert token.
1654 */
1655CREATE OR REPLACE FUNCTION insert_statement_trigger()
1656 RETURNS TRIGGER AS
1657$$
1658DECLARE
1659 query_text TEXT;
1660 insert_token UUID;
1661 old_token UUID;
1662 new_token UUID;
1663 r RECORD;
1664 enable_trigger BOOL;
1665BEGIN
1666 enable_trigger := current_setting('provsql.update_provenance', true);
1667 IF enable_trigger = 'f' THEN
1668 RETURN NULL;
1669 END IF;
1670
1671 insert_token := public.uuid_generate_v4();
1672
1673 PERFORM create_gate(insert_token, 'update');
1674
1675 SELECT query
1676 INTO query_text
1677 FROM pg_stat_activity
1678 WHERE pid = pg_backend_pid();
1679
1680 INSERT INTO update_provenance (provsql, query, query_type, username, ts, valid_time)
1681 VALUES (insert_token, query_text, 'INSERT', current_user, CURRENT_TIMESTAMP, TSTZMULTIRANGE(tstzrange(CURRENT_TIMESTAMP, NULL)));
1682
1683 FOR r IN (SELECT * FROM NEW_TABLE) LOOP
1684 old_token := r.provsql;
1685 new_token := provenance_times(old_token, insert_token);
1686 PERFORM set_config('provsql.update_provenance', 'off', false);
1687 EXECUTE format('UPDATE %I.%I SET provsql = $1 WHERE provsql = $2;', TG_TABLE_SCHEMA, TG_TABLE_NAME)
1688 USING new_token, old_token;
1689 PERFORM set_config('provsql.update_provenance', 'on', false);
1690 END LOOP;
1691
1692 RETURN NULL;
1693END
1694$$ LANGUAGE plpgsql SET search_path=provsql,pg_temp SECURITY DEFINER;
1695
1696/**
1697 * @brief Trigger function for UPDATE statement provenance tracking
1698 *
1699 * Records the update in update_provenance. Multiplies new-row tokens
1700 * with the update token and applies monus to old-row tokens.
1701 */
1702CREATE OR REPLACE FUNCTION update_statement_trigger()
1703 RETURNS TRIGGER AS
1704$$
1705DECLARE
1706 query_text TEXT;
1707 update_token UUID;
1708 old_token UUID;
1709 new_token UUID;
1710 r RECORD;
1711 enable_trigger BOOL;
1712BEGIN
1713 enable_trigger := current_setting('provsql.update_provenance', true);
1714 IF enable_trigger = 'f' THEN
1715 RETURN NULL;
1716 END IF;
1717 update_token := public.uuid_generate_v4();
1718
1719 PERFORM create_gate(update_token, 'update');
1720
1721 SELECT query
1722 INTO query_text
1723 FROM pg_stat_activity
1724 WHERE pid = pg_backend_pid();
1725
1726 INSERT INTO update_provenance (provsql, query, query_type, username, ts, valid_time)
1727 VALUES (update_token, query_text, 'UPDATE', current_user, CURRENT_TIMESTAMP, TSTZMULTIRANGE(tstzrange(CURRENT_TIMESTAMP, NULL)));
1728
1729 FOR r IN (SELECT * FROM NEW_TABLE) LOOP
1730 old_token := r.provsql;
1731 new_token := provenance_times(old_token, update_token);
1732
1733 PERFORM set_config('provsql.update_provenance', 'off', false);
1734 EXECUTE format('UPDATE %I.%I SET provsql = $1 WHERE provsql = $2;', TG_TABLE_SCHEMA, TG_TABLE_NAME)
1735 USING new_token, old_token;
1736 PERFORM set_config('provsql.update_provenance', 'on', false);
1737 END LOOP;
1738
1739 PERFORM set_config('provsql.update_provenance', 'off', false);
1740 EXECUTE format('INSERT INTO %I.%I SELECT * FROM OLD_TABLE;', TG_TABLE_SCHEMA, TG_TABLE_NAME);
1741 PERFORM set_config('provsql.update_provenance', 'on', false);
1742
1743 FOR r IN (SELECT * FROM OLD_TABLE) LOOP
1744 old_token := r.provsql;
1745 new_token := provenance_monus(old_token, update_token);
1746
1747 PERFORM set_config('provsql.update_provenance', 'off', false);
1748 EXECUTE format('UPDATE %I.%I SET provsql = $1 WHERE provsql = $2;', TG_TABLE_SCHEMA, TG_TABLE_NAME)
1749 USING new_token, old_token;
1750 PERFORM set_config('provsql.update_provenance', 'on', false);
1751 END LOOP;
1752
1753 RETURN NULL;
1754END
1755$$ LANGUAGE plpgsql SET search_path=provsql,pg_temp SECURITY DEFINER;
1756
1757
1758/** @} */
1759
1760/** @defgroup temporal_db Temporal DB (PostgreSQL 14+)
1761 * Functions for temporal database support. These use provenance
1762 * evaluation over the multirange semiring to track temporal validity
1763 * of tuples.
1764 * @{
1765 */
1766
1767/** @brief Transition function for temporal union (plus): merge multiranges */
1768SET search_path TO provsql;
1769CREATE OR REPLACE FUNCTION union_tstzintervals_plus_state(
1770 state TSTZMULTIRANGE,
1771 value TSTZMULTIRANGE
1772)
1773RETURNS TSTZMULTIRANGE AS
1774$$
1775 SELECT CASE WHEN state IS NULL THEN value ELSE state + value END
1776$$ LANGUAGE SQL IMMUTABLE;
1777
1778/** @brief Transition function for temporal intersection (times): intersect multiranges */
1779CREATE OR REPLACE FUNCTION union_tstzintervals_times_state(
1780 state TSTZMULTIRANGE,
1781 value TSTZMULTIRANGE
1782)
1783RETURNS TSTZMULTIRANGE AS
1784$$
1785 SELECT CASE WHEN state IS NULL THEN value ELSE state * value END
1786$$ LANGUAGE SQL IMMUTABLE;
1787
1788/** @brief Aggregate: union of TIMESTAMP multiranges (semiring plus) */
1789CREATE OR REPLACE AGGREGATE union_tstzintervals_plus(TSTZMULTIRANGE)
1790(
1791 sfunc = union_tstzintervals_plus_state,
1792 stype = TSTZMULTIRANGE,
1793 initcond = '{}'
1794);
1795
1796/** @brief Aggregate: intersection of TIMESTAMP multiranges (semiring times) */
1797CREATE OR REPLACE AGGREGATE union_tstzintervals_times(TSTZMULTIRANGE)
1798(
1799 sfunc = union_tstzintervals_times_state,
1800 stype = TSTZMULTIRANGE,
1801 initcond = '{(,)}'
1802);
1803
1804/** @brief Temporal monus: subtract one multirange from another */
1805CREATE OR REPLACE FUNCTION union_tstzintervals_monus(
1806 state TSTZMULTIRANGE,
1807 value TSTZMULTIRANGE
1808)
1809RETURNS TSTZMULTIRANGE AS
1810$$
1811 SELECT CASE WHEN state <@ value THEN '{}'::TSTZMULTIRANGE ELSE state - value END
1812$$ LANGUAGE SQL IMMUTABLE STRICT;
1813
1814/**
1815 * @brief Evaluate temporal provenance as a TIMESTAMP multirange
1816 *
1817 * Evaluates provenance over the multirange semiring to compute the
1818 * valid time intervals of a tuple.
1819 *
1820 * @param token provenance token to evaluate
1821 * @param token2value mapping table from tokens to temporal validity ranges
1822 */
1823CREATE OR REPLACE FUNCTION union_tstzintervals(
1824 token UUID,
1825 token2value REGCLASS
1826)
1827RETURNS TSTZMULTIRANGE AS
1828$$
1829BEGIN
1830 RETURN provenance_evaluate(
1831 token,
1832 token2value,
1833 '{(,)}'::TSTZMULTIRANGE,
1834 'union_tstzintervals_plus',
1835 'union_tstzintervals_times',
1836 'union_tstzintervals_monus'
1837 );
1838END
1839$$ LANGUAGE plpgsql PARALLEL SAFE;
1840
1841/**
1842 * @brief Create a view mapping provenance tokens to attribute values
1843 *
1844 * Like create_provenance_mapping but creates a view instead of a table,
1845 * so it always reflects the current state of the source table.
1846 *
1847 * @param newview name of the view to create
1848 * @param oldtbl source table with provenance tracking
1849 * @param att attribute whose values populate the mapping
1850 * @param preserve_case if true, quote the view name to preserve case
1851 */
1852CREATE OR REPLACE FUNCTION create_provenance_mapping_view(
1853 newview TEXT,
1854 oldtbl REGCLASS,
1855 att TEXT,
1856 preserve_case BOOL DEFAULT false
1857)
1858RETURNS VOID
1859LANGUAGE plpgsql
1860AS
1861$$
1862BEGIN
1863 IF preserve_case THEN
1864 EXECUTE format(
1865 'CREATE OR REPLACE VIEW %I AS SELECT %s AS value, provsql AS provenance FROM %s',
1866 newview,
1867 att,
1868 oldtbl
1869 );
1870 ELSE
1871 EXECUTE format(
1872 'CREATE OR REPLACE VIEW %s AS SELECT %s AS value, provsql AS provenance FROM %s',
1873 newview,
1874 att,
1875 oldtbl
1876 );
1877 END IF;
1878END;
1879$$;
1880
1881/**
1882 * @brief Query a table as it was at a specific point in time
1883 *
1884 * Returns all rows whose temporal validity includes the given TIMESTAMP.
1885 *
1886 * @param tablename name of the provenance-tracked table
1887 * @param at_time the point in time to query
1888 */
1889CREATE OR REPLACE FUNCTION timetravel(
1890 tablename TEXT,
1891 at_time TIMESTAMPTZ
1892)
1893RETURNS SETOF RECORD
1894LANGUAGE plpgsql
1895AS
1896$$
1897BEGIN
1898 RETURN QUERY EXECUTE format(
1899 '
1900 SELECT
1901 %1$I.*,
1902 union_tstzintervals(provenance(), ''%2$I'')
1903 FROM
1904 %1$I
1905 WHERE
1906 union_tstzintervals(provenance(), ''%2$I'') @> %3$L::TIMESTAMPTZ
1907 ',
1908 tablename,
1909 'time_validity_view',
1910 at_time::TEXT
1911 );
1912END;
1913$$;
1914
1915/**
1916 * @brief Query a table for rows valid during a time interval
1917 *
1918 * Returns all rows whose temporal validity overlaps the given range.
1919 *
1920 * @param tablename name of the provenance-tracked table
1921 * @param from_time start of the time interval
1922 * @param to_time end of the time interval
1923 */
1924CREATE OR REPLACE FUNCTION timeslice(
1925 tablename TEXT,
1926 from_time TIMESTAMPTZ,
1927 to_time TIMESTAMPTZ
1928)
1929RETURNS SETOF RECORD
1930LANGUAGE plpgsql
1931AS
1932$$
1933BEGIN
1934 RETURN QUERY EXECUTE format(
1935 '
1936 SELECT
1937 %1$I.*,
1938 union_tstzintervals(provenance(), ''%2$I'')
1939 FROM
1940 %1$I
1941 WHERE
1942 union_tstzintervals(provenance(), ''%2$I'')
1943 && tstzrange(%3$L::TIMESTAMPTZ, %4$L::TIMESTAMPTZ)
1944 ',
1945 tablename,
1946 'time_validity_view',
1947 from_time::TEXT,
1948 to_time::TEXT
1949 );
1950END;
1951$$;
1952
1953/**
1954 * @brief Query the full temporal history of specific rows
1955 *
1956 * Returns all versions of rows matching the given column values,
1957 * with their temporal validity ranges.
1958 *
1959 * @param tablename name of the provenance-tracked table
1960 * @param col_names array of column names to filter on
1961 * @param col_values array of corresponding values to match
1962 */
1963CREATE OR REPLACE FUNCTION history(
1964 tablename TEXT,
1965 col_names TEXT[],
1966 col_values TEXT[]
1967)
1968RETURNS SETOF RECORD
1969LANGUAGE plpgsql
1970AS
1971$$
1972DECLARE
1973 condition TEXT := '';
1974 i INT;
1975BEGIN
1976 IF array_length(col_names, 1) IS NULL
1977 OR array_length(col_values, 1) IS NULL
1978 OR array_length(col_names, 1) != array_length(col_values, 1)
1979 THEN
1980 RAISE EXCEPTION 'col_names and col_values must have the same (non-null) length';
1981 END IF;
1982
1983 FOR i IN 1..array_length(col_names, 1)
1984 LOOP
1985 IF i > 1 THEN
1986 condition := condition || ' AND ';
1987 END IF;
1988 condition := condition || format('%I = %L', col_names[i], col_values[i]);
1989 END LOOP;
1990
1991 RETURN QUERY EXECUTE format(
1992 '
1993 SELECT
1994 %I.*,
1995 union_tstzintervals(provenance(), ''%I'')
1996 FROM
1997 %I
1998 WHERE
1999 %s
2000 ',
2001 tablename,
2002 'time_validity_view',
2003 tablename,
2004 condition
2005 );
2006END;
2007$$;
2008
2009/**
2010 * @brief Get the valid time range for a specific tuple
2011 *
2012 * @param token provenance token of the tuple
2013 * @param tablename name of the table containing the tuple
2014 */
2015CREATE OR REPLACE FUNCTION get_valid_time(
2016 token UUID,
2017 tablename TEXT
2018)
2019RETURNS TSTZMULTIRANGE
2020LANGUAGE plpgsql
2021AS $$
2022DECLARE
2023 result TSTZMULTIRANGE;
2024BEGIN
2025 EXECUTE format(
2026 '
2027 SELECT
2028 union_tstzintervals(provenance(), %L)
2029 FROM
2030 %I
2031 WHERE
2032 provsql = %L
2033 ',
2034 'time_validity_view',
2035 tablename,
2036 token
2037 )
2038 INTO result;
2039
2040 RETURN result;
2041END;
2042$$;
2043
2044/**
2045 * @brief Undo a previously recorded update operation
2046 *
2047 * Traverses all provenance-tracked tables and rewrites their circuits
2048 * to apply monus with respect to the given update token, effectively
2049 * undoing the operation.
2050 *
2051 * @param c UUID of the update operation to undo (from update_provenance)
2052 */
2053CREATE OR REPLACE FUNCTION undo(
2054 c UUID
2055)
2056RETURNS UUID
2057LANGUAGE plpgsql
2058AS $$
2059DECLARE
2060 undo_query TEXT;
2061 undone_query TEXT;
2062 undo_token UUID;
2063 schema_rec RECORD;
2064 table_rec RECORD;
2065 row_rec RECORD;
2066 new_x UUID;
2067BEGIN
2068 SELECT query INTO undone_query
2069 FROM update_provenance
2070 WHERE provsql = c
2071 LIMIT 1;
2072
2073 IF undone_query IS NULL THEN
2074 RAISE NOTICE 'Unable to find % in update_provenance', c;
2075 RETURN c;
2076 END IF;
2077
2078 SELECT query
2079 INTO undo_query
2080 FROM pg_stat_activity
2081 WHERE pid = pg_backend_pid();
2082
2083 undo_token := public.uuid_generate_v4();
2084 PERFORM create_gate(undo_token, 'update');
2085 INSERT INTO update_provenance(provsql, query, query_type, username, ts, valid_time)
2086 VALUES (
2087 undo_token,
2088 undo_query,
2089 'UNDO',
2090 current_user,
2091 CURRENT_TIMESTAMP,
2092 TSTZMULTIRANGE(tstzrange(CURRENT_TIMESTAMP, NULL))
2093 );
2094
2095 PERFORM set_config('provsql.update_provenance', 'off', false);
2096
2097 FOR schema_rec IN
2098 SELECT nspname
2099 FROM pg_namespace
2100 WHERE nspname NOT IN ('pg_catalog','information_schema','pg_toast','pg_temp_1','pg_toast_temp_1')
2101 LOOP
2102 FOR table_rec IN
2103 EXECUTE format('SELECT tablename AS tname FROM pg_tables WHERE schemaname = %L', schema_rec.nspname)
2104 LOOP
2105 IF EXISTS (
2106 SELECT 1
2107 FROM information_schema.columns
2108 WHERE table_schema = schema_rec.nspname
2109 AND table_name = table_rec.tname
2110 AND table_name <> 'update_provenance'
2111 AND column_name = 'provsql'
2112 ) THEN
2113 FOR row_rec IN
2114 EXECUTE format('SELECT provsql AS x FROM %I.%I', schema_rec.nspname, table_rec.tname)
2115 LOOP
2116 new_x := replace_the_circuit(row_rec.x, c, undo_token);
2117 EXECUTE format('UPDATE %I.%I SET provsql = $1 WHERE provsql = $2',
2118 schema_rec.nspname, table_rec.tname)
2119 USING new_x, row_rec.x;
2120 END LOOP;
2121 END IF;
2122 END LOOP;
2123 END LOOP;
2124
2125 PERFORM set_config('provsql.update_provenance', 'on', false);
2126
2127 RETURN undo_token;
2128END;
2129$$;
2130
2131/**
2132 * @brief Recursively rewrite a circuit to undo a specific operation
2133 *
2134 * Helper for undo(). Walks the circuit and replaces occurrences of
2135 * the target update gate with its monus.
2136 *
2137 * @param x provenance token to rewrite
2138 * @param c UUID of the update operation to undo
2139 * @param u UUID of the undo operation
2140 */
2141CREATE OR REPLACE FUNCTION replace_the_circuit(
2142 x UUID,
2143 c UUID,
2144 u UUID
2145)
2146RETURNS UUID
2147LANGUAGE plpgsql
2148AS $$
2149DECLARE
2150 nchildren UUID[];
2151 child UUID;
2152 ntoken UUID;
2153 ntype PROVENANCE_GATE;
2154BEGIN
2155 IF x = c THEN
2156 RETURN provenance_monus(c, u);
2157 -- update and input gates cannot have children
2158 ELSIF get_gate_type(x) = 'update' OR get_gate_type(x) = 'input' THEN
2159 RETURN x;
2160 ELSE
2161 nchildren := '{}';
2162 FOREACH child IN ARRAY get_children(x)
2163 LOOP
2164 nchildren := array_append(nchildren, replace_the_circuit(child, c, u));
2165 END LOOP;
2166
2167 ntoken := public.uuid_generate_v4();
2168 ntype := get_gate_type(x);
2169
2170 PERFORM create_gate(ntoken, ntype, nchildren);
2171 RETURN ntoken;
2172 END IF;
2173END;
2174$$;
2175
2176SELECT create_provenance_mapping_view('time_validity_view', 'update_provenance', 'valid_time');
2177
2178/** @} */
2179
2180SET search_path TO public;