ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
provsql.c File Reference

PostgreSQL planner hook for transparent provenance tracking. More...

#include "postgres.h"
#include "fmgr.h"
#include "miscadmin.h"
#include "pg_config.h"
#include "access/htup_details.h"
#include "access/sysattr.h"
#include "catalog/pg_aggregate.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/print.h"
#include "optimizer/planner.h"
#include "parser/parse_oper.h"
#include "parser/parsetree.h"
#include "storage/lwlock.h"
#include "storage/shmem.h"
#include "utils/fmgroids.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/ruleutils.h"
#include "utils/syscache.h"
#include <time.h>
#include "provsql_mmap.h"
#include "provsql_shmem.h"
#include "provsql_utils.h"
#include "compatibility.h"
Include dependency graph for provsql.c:

Go to the source code of this file.

Classes

struct  reduce_varattno_mutator_context
 Context for the reduce_varattno_mutator tree walker. More...
 
struct  aggregation_type_mutator_context
 Context for the aggregation_type_mutator tree walker. More...
 
struct  aggregation_mutator_context
 Context for the aggregation_mutator tree walker. More...
 
struct  provenance_mutator_context
 Context for the provenance_mutator tree walker. More...
 

Enumerations

enum  semiring_operation { SR_PLUS , SR_MONUS , SR_TIMES }
 Semiring operation used to combine provenance tokens. More...
 

Functions

void _PG_init (void)
 Extension initialization — called once when the shared library is loaded.
 
void _PG_fini (void)
 Extension teardown — restores the planner and shmem hooks.
 
static Query * process_query (const constants_t *constants, Query *q, bool **removed)
 Rewrite a single SELECT query to carry provenance.
 
static Var * make_provenance_attribute (const constants_t *constants, Query *q, RangeTblEntry *r, Index relid, AttrNumber attid)
 Build a Var node that references the provenance column of a relation.
 
static Node * reduce_varattno_mutator (Node *node, reduce_varattno_mutator_context *context)
 Tree-mutator callback that adjusts Var attribute numbers.
 
static void reduce_varattno_by_offset (List *targetList, Index varno, int *offset)
 Adjust Var attribute numbers in targetList after columns are removed.
 
static Node * aggregation_type_mutator (Node *node, aggregation_type_mutator_context *context)
 Tree-mutator that retyps a specific Var to agg_token.
 
static void fix_type_of_aggregation_result (const constants_t *constants, Query *q, Index rteid, List *targetList)
 Retypes aggregation-result Vars in q from UUID to agg_token.
 
static List * get_provenance_attributes (const constants_t *constants, Query *q)
 Collect all provenance Var nodes reachable from q's range table.
 
static Bitmapset * remove_provenance_attributes_select (const constants_t *constants, Query *q, bool **removed)
 Strip provenance UUID columns from q's SELECT list.
 
static Expr * add_eq_from_OpExpr_to_Expr (const constants_t *constants, OpExpr *fromOpExpr, Expr *toExpr, int **columns)
 Wrap toExpr in a provenance_eq gate if fromOpExpr is an equality between two tracked columns.
 
static Expr * add_eq_from_Quals_to_Expr (const constants_t *constants, Node *quals, Expr *result, int **columns)
 Walk a join-condition or WHERE quals node and add eq gates for every equality it contains.
 
static Expr * make_aggregation_expression (const constants_t *constants, Aggref *agg_ref, List *prov_atts, semiring_operation op)
 Build the provenance expression for a single aggregate function.
 
static FuncExpr * having_Expr_to_provenance_cmp (Expr *expr, const constants_t *constants, bool negated)
 Dispatch a HAVING sub-expression to the appropriate converter.
 
static FuncExpr * having_OpExpr_to_provenance_cmp (OpExpr *opExpr, const constants_t *constants, bool negated)
 Convert a comparison OpExpr on aggregate results into a provenance_cmp gate expression.
 
static FuncExpr * having_BoolExpr_to_provenance (BoolExpr *be, const constants_t *constants, bool negated)
 Convert a Boolean combination of HAVING comparisons into a provenance_times / provenance_plus gate expression.
 
static Expr * make_provenance_expression (const constants_t *constants, Query *q, List *prov_atts, bool aggregation, bool group_by_rewrite, semiring_operation op, int **columns, int nbcols)
 Build the combined provenance expression to be added to the SELECT list.
 
static Query * build_inner_for_distinct_key (Query *q, Expr *key_expr, List *groupby_tes)
 Build the inner GROUP-BY subquery for one AGG(DISTINCT key).
 
static Query * build_outer_for_distinct_key (TargetEntry *orig_agg_te, Query *inner, int n_gb, const constants_t *constants)
 Wrap inner in an outer query that applies the original aggregate.
 
static Query * rewrite_agg_distinct (Query *q, const constants_t *constants)
 Rewrite every AGG(DISTINCT key) in q using independent subqueries.
 
static Node * aggregation_mutator (Node *node, aggregation_mutator_context *context)
 Tree-mutator that replaces Aggrefs with provenance-aware aggregates.
 
static void replace_aggregations_by_provenance_aggregate (const constants_t *constants, Query *q, List *prov_atts, semiring_operation op)
 Replace every Aggref in q with a provenance-aware aggregate.
 
static void add_to_select (Query *q, Expr *provenance)
 Append the provenance expression to q's target list.
 
static Node * provenance_mutator (Node *node, provenance_mutator_context *context)
 Tree-mutator that replaces provenance() calls with the actual provenance expression.
 
static void replace_provenance_function_by_expression (const constants_t *constants, Query *q, Expr *provsql)
 Replace every explicit provenance() call in q with provsql.
 
static void transform_distinct_into_group_by (Query *q)
 Convert a SELECT DISTINCT into an equivalent GROUP BY.
 
static void remove_provenance_attribute_groupref (Query *q, const Bitmapset *removed_sortgrouprefs)
 Remove sort/group references that belonged to removed provenance columns.
 
static void remove_provenance_attribute_setoperations (Query *q, bool *removed)
 Strip the provenance column's type info from a set-operation node.
 
static Query * rewrite_non_all_into_external_group_by (Query *q)
 Wrap a non-ALL set operation in an outer GROUP BY query.
 
static bool provenance_function_walker (Node *node, void *data)
 Tree walker that returns true if any provenance() call is found.
 
static bool provenance_function_in_group_by (const constants_t *constants, Query *q)
 Check whether a provenance() call appears in the GROUP BY list.
 
static bool has_provenance_walker (Node *node, void *data)
 Tree walker that detects any provenance-bearing relation or provenance() call.
 
static bool has_provenance (const constants_t *constants, Query *q)
 Return true if q involves any provenance-bearing relation or contains an explicit provenance() call.
 
static bool aggtoken_walker (Node *node, const constants_t *constants)
 Tree walker that detects any Var of type agg_token.
 
static bool has_aggtoken (Node *node, const constants_t *constants)
 Return true if node contains a Var of type agg_token.
 
static bool transform_except_into_join (const constants_t *constants, Query *q)
 Rewrite an EXCEPT query into a LEFT JOIN with monus provenance.
 
static void process_set_operation_union (const constants_t *constants, SetOperationStmt *stmt)
 Recursively annotate a UNION tree with the provenance UUID type.
 
static void add_select_non_zero (const constants_t *constants, Query *q, Expr *provsql)
 Add a WHERE condition filtering out zero-provenance tuples.
 
static Node * add_to_havingQual (Node *havingQual, Expr *expr)
 Append expr to havingQual with an AND, creating one if needed.
 
static bool check_selection_on_aggregate (OpExpr *op, const constants_t *constants)
 Check whether op is a supported comparison on an aggregate result.
 
static bool check_boolexpr_on_aggregate (BoolExpr *be, const constants_t *constants)
 Check whether every leaf of a Boolean expression is a supported comparison on an aggregate result.
 
static bool check_expr_on_aggregate (Expr *expr, const constants_t *constants)
 Top-level dispatcher for supported WHERE-on-aggregate patterns.
 
static void build_column_map (Query *q, int **columns, int *nbcols)
 Build the per-RTE column-numbering map used by where-provenance.
 
static void migrate_aggtoken_quals_to_having (const constants_t *constants, Query *q)
 Move WHERE conditions on aggregate results (agg_token) to HAVING.
 
static PlannedStmt * provsql_planner (Query *q, int cursorOptions, ParamListInfo boundParams)
 PostgreSQL planner hook — entry point for provenance rewriting.
 

Variables

 PG_MODULE_MAGIC
 Required PostgreSQL extension magic block.
 
bool provsql_interrupted = false
 Global variable that becomes true if this particular backend received an interrupt signal.
 
bool provsql_active = true
 true while ProvSQL query rewriting is enabled
 
bool provsql_where_provenance = false
 Global variable that indicates if where-provenance support has been activated through the provsql.where_provenance run-time configuration parameter.
 
bool provsql_update_provenance = false
 true when provenance tracking for DML is enabled
 
int provsql_verbose = 100
 Verbosity level; controlled by the provsql.verbose_level GUC.
 
static const char * PROVSQL_COLUMN_NAME = "provsql"
 Name of the provenance column added to tracked tables.
 
static planner_hook_type prev_planner = NULL
 Previous planner hook (chained)
 

Detailed Description

PostgreSQL planner hook for transparent provenance tracking.

This file installs a planner_hook that intercepts every SELECT query and rewrites it to propagate a provenance circuit token (UUID) alongside normal result tuples. The rewriting proceeds in three conceptual phases:

  1. Discovery – scan the range table for relations/subqueries that already carry a provsql UUID column (get_provenance_attributes).
  2. Expression building – combine the discovered tokens according to the semiring operation that corresponds to the SQL operator in use (⊗ for joins, ⊕ for duplicate elimination, ⊖ for EXCEPT) and wrap aggregations (make_provenance_expression, make_aggregation_expression).
  3. Splice – append the resulting provenance expression to the target list and replace any explicit provenance() call in the query with the computed expression (add_to_select, replace_provenance_function_by_expression).

Definition in file provsql.c.

Enumeration Type Documentation

◆ semiring_operation

Semiring operation used to combine provenance tokens.

SR_TIMES corresponds to the multiplicative operation (joins, Cartesian products), SR_PLUS to the additive operation (duplicate elimination), and SR_MONUS to the monus / set-difference operation (EXCEPT).

Enumerator
SR_PLUS 

Semiring addition (UNION, SELECT DISTINCT)

SR_MONUS 

Semiring monus / set difference (EXCEPT)

SR_TIMES 

Semiring multiplication (JOIN, Cartesian product)

Definition at line 485 of file provsql.c.

Function Documentation

◆ _PG_fini()

void _PG_fini ( void  )
extern

Extension teardown — restores the planner and shmem hooks.

Definition at line 2895 of file provsql.c.

◆ _PG_init()

void _PG_init ( void  )
extern

Extension initialization — called once when the shared library is loaded.

Registers the four GUC variables (provsql.active, where_provenance, update_provenance, verbose_level), installs the planner hook and shared-memory hooks, and launches the background MMap worker.

Must be loaded via shared_preload_libraries; raises an error otherwise.

Definition at line 2826 of file provsql.c.

Here is the call graph for this function:

◆ add_eq_from_OpExpr_to_Expr()

static Expr * add_eq_from_OpExpr_to_Expr ( const constants_t constants,
OpExpr *  fromOpExpr,
Expr *  toExpr,
int **  columns 
)
static

Wrap toExpr in a provenance_eq gate if fromOpExpr is an equality between two tracked columns.

Used for where-provenance: each equijoin condition (and some WHERE equalities) introduces an eq gate that records which attribute positions were compared. Because this function is also called for WHERE predicates, it applies extra guards and silently returns toExpr unchanged when the expression does not match the expected shape (both sides must be Var nodes, possibly wrapped in a RelabelType).

Parameters
constantsExtension OID cache.
fromOpExprThe equality OpExpr to inspect.
toExprExisting provenance expression to wrap.
columnsPer-RTE column-numbering array.
Returns
toExpr wrapped in provenance_eq(toExpr, col1, col2), or toExpr unchanged if the shape is unsupported.

Definition at line 513 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ add_eq_from_Quals_to_Expr()

static Expr * add_eq_from_Quals_to_Expr ( const constants_t constants,
Node *  quals,
Expr *  result,
int **  columns 
)
static

Walk a join-condition or WHERE quals node and add eq gates for every equality it contains.

Dispatches to add_eq_from_OpExpr_to_Expr for simple OpExpr nodes and iterates over the arguments of an AND BoolExpr. OR/NOT inside a join ON clause are rejected with an error.

Parameters
constantsExtension OID cache.
qualsRoot of the quals tree (OpExpr or BoolExpr), or NULL (in which case result is returned unchanged).
resultProvenance expression to wrap.
columnsPer-RTE column-numbering array.
Returns
Updated provenance expression with zero or more eq gates added.

Definition at line 585 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ add_select_non_zero()

static void add_select_non_zero ( const constants_t constants,
Query *  q,
Expr *  provsql 
)
static

Add a WHERE condition filtering out zero-provenance tuples.

For EXCEPT queries, tuples whose provenance evaluates to zero (i.e., the right-hand side fully subsumes the left-hand side) must be excluded from the result. This function appends provsql <> gate_zero() to q->jointree->quals, ANDing with any existing WHERE condition.

Parameters
constantsExtension OID cache.
qQuery to modify in place.
provsqlProvenance expression that was added to the SELECT list.

Definition at line 2269 of file provsql.c.

Here is the caller graph for this function:

◆ add_to_havingQual()

static Node * add_to_havingQual ( Node *  havingQual,
Expr *  expr 
)
static

Append expr to havingQual with an AND, creating one if needed.

If havingQual is NULL, returns expr directly. If it is already an AND BoolExpr, appends to its argument list. Otherwise wraps both in a new AND node.

Parameters
havingQualExisting HAVING qualifier, or NULL.
exprExpression to conjoin.
Returns
The updated HAVING qualifier.

Definition at line 2306 of file provsql.c.

Here is the caller graph for this function:

◆ add_to_select()

static void add_to_select ( Query *  q,
Expr *  provenance 
)
static

Append the provenance expression to q's target list.

Inserts a new TargetEntry named provsql immediately before any resjunk entries (which must remain last) and adjusts the resno of subsequent entries accordingly.

Parameters
qQuery to modify in place.
provenanceExpression to add (becomes the provsql output column).

Definition at line 1654 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ aggregation_mutator()

static Node * aggregation_mutator ( Node *  node,
aggregation_mutator_context context 
)
static

Tree-mutator that replaces Aggrefs with provenance-aware aggregates.

Parameters
nodeCurrent expression tree node.
contextMutation context with prov_atts, op, and constants.
Returns
Possibly modified node.

Definition at line 1606 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ aggregation_type_mutator()

static Node * aggregation_type_mutator ( Node *  node,
aggregation_type_mutator_context context 
)
static

Tree-mutator that retyps a specific Var to agg_token.

Parameters
nodeCurrent expression tree node.
contextMutation context with varno, varattno, and constants.
Returns
Possibly modified node.

Definition at line 202 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ aggtoken_walker()

static bool aggtoken_walker ( Node *  node,
const constants_t constants 
)
static

Tree walker that detects any Var of type agg_token.

Parameters
nodeCurrent expression tree node.
constantsExtension OID cache.
Returns
true if an agg_token Var is found in node.

Definition at line 2080 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ build_column_map()

static void build_column_map ( Query *  q,
int **  columns,
int *  nbcols 
)
static

Build the per-RTE column-numbering map used by where-provenance.

Assigns a sequential position (1, 2, 3, …) to every non-provenance, non-join, non-empty column across all RTEs in q->rtable. The provsql column is assigned -1 so callers can detect it. Join-RTE columns and empty-named columns (used for anonymous GROUP BY keys) are assigned 0.

Parameters
qQuery whose range table is mapped.
columnsPre-allocated array of length q->rtable->length. Each element is allocated and filled by this function.
nbcolsOut-param: total number of non-provenance output columns.

Definition at line 2444 of file provsql.c.

Here is the caller graph for this function:

◆ build_inner_for_distinct_key()

static Query * build_inner_for_distinct_key ( Query *  q,
Expr *  key_expr,
List *  groupby_tes 
)
static

Build the inner GROUP-BY subquery for one AGG(DISTINCT key).

Produces:

SELECT key_expr, gb_col1, gb_col2, ...
FROM <same tables as q>
GROUP BY key_expr, gb_col1, gb_col2, ...
Parameters
qOriginal query (supplies FROM / WHERE).
key_exprThe DISTINCT argument expression.
groupby_tesNon-aggregate target entries that are GROUP BY columns.
Returns
Fresh inner Query.

Definition at line 1182 of file provsql.c.

Here is the caller graph for this function:

◆ build_outer_for_distinct_key()

static Query * build_outer_for_distinct_key ( TargetEntry *  orig_agg_te,
Query *  inner,
int  n_gb,
const constants_t constants 
)
static

Wrap inner in an outer query that applies the original aggregate.

Produces:

SELECT AGG(key_col), gb_col1, gb_col2, ...
FROM inner
GROUP BY gb_col1, gb_col2, ...

The DISTINCT flag is cleared; inner provides exactly one row per (key, group-by) combination, so the plain aggregate gives the right count.

Parameters
orig_agg_teOriginal TargetEntry containing AGG(DISTINCT key).
innerInner query from build_inner_for_distinct_key.
n_gbNumber of GROUP BY columns (trailing entries in inner).
constantsExtension OID cache.
Returns
Fresh outer Query.

Definition at line 1252 of file provsql.c.

Here is the caller graph for this function:

◆ check_boolexpr_on_aggregate()

static bool check_boolexpr_on_aggregate ( BoolExpr *  be,
const constants_t constants 
)
static

Check whether every leaf of a Boolean expression is a supported comparison on an aggregate result.

Recursively validates OpExpr leaves via check_selection_on_aggregate and descends into nested BoolExpr nodes.

Parameters
beThe Boolean expression to validate.
constantsExtension OID cache.
Returns
True if all leaves are supported, false if any is not.

Definition at line 2389 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ check_expr_on_aggregate()

static bool check_expr_on_aggregate ( Expr *  expr,
const constants_t constants 
)
static

Top-level dispatcher for supported WHERE-on-aggregate patterns.

Parameters
exprExpression to validate (OpExpr or BoolExpr).
constantsExtension OID cache.
Returns
True if ProvSQL can handle this expression.

Definition at line 2415 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ check_selection_on_aggregate()

static bool check_selection_on_aggregate ( OpExpr *  op,
const constants_t constants 
)
static

Check whether op is a supported comparison on an aggregate result.

Returns true iff op is a two-argument operator where at least one argument is a Var of type agg_token (or an implicit-cast wrapper thereof) and the other is a Const (possibly cast). This is the set of WHERE-on-aggregate patterns that ProvSQL can safely move to a HAVING clause.

Parameters
opThe OpExpr to inspect.
constantsExtension OID cache.
Returns
True if the pattern is supported, false otherwise.

Definition at line 2339 of file provsql.c.

Here is the caller graph for this function:

◆ fix_type_of_aggregation_result()

static void fix_type_of_aggregation_result ( const constants_t constants,
Query *  q,
Index  rteid,
List *  targetList 
)
static

Retypes aggregation-result Vars in q from UUID to agg_token.

After a subquery that contains provenance_aggregate is processed, its result type is agg_token rather than plain UUID. This mutator walks the outer query and updates the type of every Var referencing that result column so that subsequent type-checking passes correctly.

Parameters
constantsExtension OID cache.
qOuter query to patch.
rteidRange-table index of the subquery in q.
targetListTarget list of the subquery (to locate provenance_aggregate columns).

Definition at line 231 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_provenance_attributes()

static List * get_provenance_attributes ( const constants_t constants,
Query *  q 
)
static

Collect all provenance Var nodes reachable from q's range table.

Walks every RTE in q->rtable:

  • RTE_RELATION: looks for a column named provsql of type UUID.
  • RTE_SUBQUERY: recursively calls process_query and splices the resulting provenance column back into the parent's column list, also patching outer Var attribute numbers if inner columns were removed.
  • RTE_FUNCTION: handled when the function returns a single UUID column named provsql.
  • RTE_JOIN / RTE_VALUES / RTE_GROUP: handled passively (the underlying base-table RTEs supply the tokens).
Parameters
constantsExtension OID cache.
qQuery whose range table is scanned (subquery RTEs are modified in place by the recursive call).
Returns
List of Var nodes, one per provenance source; NIL if the query has no provenance-bearing relation.

Definition at line 273 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ has_aggtoken()

static bool has_aggtoken ( Node *  node,
const constants_t constants 
)
static

Return true if node contains a Var of type agg_token.

Used to detect whether a WHERE clause references an aggregate result (which must be moved to HAVING).

Parameters
nodeExpression tree to inspect.
constantsExtension OID cache.
Returns
True if an agg_token Var is found anywhere in node.

Definition at line 2103 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ has_provenance()

static bool has_provenance ( const constants_t constants,
Query *  q 
)
static

Return true if q involves any provenance-bearing relation or contains an explicit provenance() call.

This is the gate condition checked by provsql_planner before doing any rewriting: if neither condition holds the query is passed through unchanged.

Parameters
constantsExtension OID cache.
qQuery to inspect.
Returns
True if provenance rewriting is needed.

Definition at line 2070 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ has_provenance_walker()

static bool has_provenance_walker ( Node *  node,
void *  data 
)
static

Tree walker that detects any provenance-bearing relation or provenance() call.

Parameters
nodeCurrent expression tree node.
dataPointer to constants_t (cast from void*).
Returns
true if provenance rewriting is needed for this node.

Definition at line 2006 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ having_BoolExpr_to_provenance()

static FuncExpr * having_BoolExpr_to_provenance ( BoolExpr *  be,
const constants_t constants,
bool  negated 
)
static

Convert a Boolean combination of HAVING comparisons into a provenance_times / provenance_plus gate expression.

Applies De Morgan duality when negated is true: AND becomes provenance_plus (OR) and vice-versa. NOT is handled by flipping negated and delegating to having_Expr_to_provenance_cmp.

Parameters
beBoolean expression from the HAVING clause.
constantsExtension OID cache.
negatedWhether the expression appears under a NOT.
Returns
A FuncExpr combining the sub-expressions.

Definition at line 858 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ having_Expr_to_provenance_cmp()

static FuncExpr * having_Expr_to_provenance_cmp ( Expr *  expr,
const constants_t constants,
bool  negated 
)
static

Dispatch a HAVING sub-expression to the appropriate converter.

Entry point for the mutual recursion between having_BoolExpr_to_provenance and having_OpExpr_to_provenance_cmp.

Parameters
exprSub-expression to convert (BoolExpr or OpExpr).
constantsExtension OID cache.
negatedWhether the expression appears under a NOT.
Returns
Converted FuncExpr.

Definition at line 908 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ having_OpExpr_to_provenance_cmp()

static FuncExpr * having_OpExpr_to_provenance_cmp ( OpExpr *  opExpr,
const constants_t constants,
bool  negated 
)
static

Convert a comparison OpExpr on aggregate results into a provenance_cmp gate expression.

Each argument of opExpr must be one of:

  • A Var of type agg_token (or a FuncExpr implicit-cast wrapper around one) → cast to UUID via agg_token_to_uuid.
  • A scalar Const → wrapped in provenance_semimod(const, gate_one()).

If negated is true the operator OID is replaced by its negator so that NOT(a < b) becomes a >= b at the provenance level.

Parameters
opExprThe comparison expression from the HAVING clause.
constantsExtension OID cache.
negatedWhether the expression appears under a NOT.
Returns
A provenance_cmp(lhs, op_oid, rhs) FuncExpr.

Definition at line 754 of file provsql.c.

Here is the caller graph for this function:

◆ make_aggregation_expression()

static Expr * make_aggregation_expression ( const constants_t constants,
Aggref *  agg_ref,
List *  prov_atts,
semiring_operation  op 
)
static

Build the provenance expression for a single aggregate function.

For SR_PLUS (union context) returns the first provenance attribute directly. For SR_TIMES or SR_MONUS, constructs:

provenance_aggregate(fn_oid, result_type,
original_aggref,
array_agg(provenance_semimod(arg, times_or_monus_token)))

COUNT(*) and COUNT(expr) are remapped to SUM so that the semimodule semantics (scalar × token → token) work correctly.

Parameters
constantsExtension OID cache.
agg_refThe original Aggref node from the query.
prov_attsList of provenance Var nodes.
opSemiring operation (determines how tokens are combined).
Returns
Provenance expression of type agg_token.

Definition at line 636 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ make_provenance_attribute()

static Var * make_provenance_attribute ( const constants_t constants,
Query *  q,
RangeTblEntry *  r,
Index  relid,
AttrNumber  attid 
)
static

Build a Var node that references the provenance column of a relation.

Creates a Var pointing to attribute attid of range-table entry relid, typed as UUID, and marks the column as selected in the permission bitmap so PostgreSQL grants access correctly.

Parameters
constantsExtension OID cache.
qOwning query (needed to update permission info on PG 16+).
rRange-table entry that owns the provenance column.
relid1-based index of r in q->rtable.
attid1-based attribute number of the provenance column in r.
Returns
A freshly allocated Var node.

Definition at line 96 of file provsql.c.

Here is the caller graph for this function:

◆ make_provenance_expression()

static Expr * make_provenance_expression ( const constants_t constants,
Query *  q,
List *  prov_atts,
bool  aggregation,
bool  group_by_rewrite,
semiring_operation  op,
int **  columns,
int  nbcols 
)
static

Build the combined provenance expression to be added to the SELECT list.

Combines the tokens in prov_atts according to op:

  • SR_PLUS → use the first token directly (union branch; the outer array_agg / provenance_plus is added later if needed).
  • SR_TIMES → wrap all tokens in provenance_times(...).
  • SR_MONUS → wrap all tokens in provenance_monus(...).

When aggregation or group_by_rewrite is true, wraps the result in array_agg + provenance_plus to collapse groups. A provenance_delta gate is added for plain aggregations without a HAVING clause.

If a HAVING clause is present it is removed from q->havingQual and converted into a provenance expression via having_Expr_to_provenance_cmp.

If provsql_where_provenance is enabled, equality gates (provenance_eq) are prepended for join conditions and WHERE equalities, and a projection gate is appended if the output columns form a proper subset of the input columns.

Parameters
constantsExtension OID cache.
qQuery being rewritten (HAVING is cleared if present).
prov_attsList of provenance Var nodes.
aggregationTrue if the query contains aggregate functions.
group_by_rewriteTrue if a GROUP BY requires the plus-aggregate wrapper.
opSemiring operation to use for combining tokens.
columnsPer-RTE column-numbering array (for where-provenance).
nbcolsTotal number of non-provenance output columns.
Returns
The provenance Expr to be appended to the target list.

Definition at line 948 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ migrate_aggtoken_quals_to_having()

static void migrate_aggtoken_quals_to_having ( const constants_t constants,
Query *  q 
)
static

Move WHERE conditions on aggregate results (agg_token) to HAVING.

Supported patterns (moved to HAVING):

  • The entire WHERE is a supported agg comparison.
  • The WHERE is a top-level AND where some conjuncts reference aggregates (those are extracted individually) and the rest remain in WHERE.

Unsupported patterns (e.g., "WHERE x=1 OR c>3") raise an error.

Parameters
constantsExtension OID cache.
qQuery to modify in place (jointree->quals and havingQual may both be updated).

Definition at line 2499 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ process_query()

static Query * process_query ( const constants_t constants,
Query *  q,
bool **  removed 
)
static

Rewrite a single SELECT query to carry provenance.

This is the recursive entry point for the provenance rewriter. It is called from provsql_planner for top-level queries and re-entered from get_provenance_attributes for subqueries in FROM.

High-level steps:

  1. Strip any provsql column propagated into this query's target list.
  2. Detect and rewrite structural forms requiring pre-processing: non-ALL set operations (wrap in outer GROUP BY), AGG DISTINCT (push into a subquery), DISTINCT (convert to GROUP BY).
  3. Collect provenance attributes via get_provenance_attributes.
  4. Build a column-numbering map for where-provenance (build_column_map).
  5. Handle aggregates, migrate WHERE-on-aggregate to HAVING, and set ops.
  6. Build and splice the combined provenance expression.
Parameters
constantsExtension OID cache.
qQuery to rewrite (modified in place).
removedOut-param: boolean array indicating which original target list entries were provenance columns and were removed. May be NULL if the caller does not need this info.
Returns
The (possibly restructured) rewritten query, or NULL if the query has no FROM clause and can be skipped.

Definition at line 2577 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ process_set_operation_union()

static void process_set_operation_union ( const constants_t constants,
SetOperationStmt *  stmt 
)
static

Recursively annotate a UNION tree with the provenance UUID type.

Walks the SetOperationStmt tree of a UNION and appends the UUID type to colTypes / colTypmods / colCollations on every node, and sets all = true so that PostgreSQL does not deduplicate the combined stream. The non-ALL deduplication has already been moved to an outer GROUP BY by rewrite_non_all_into_external_group_by before this is called.

Parameters
constantsExtension OID cache.
stmtRoot (or subtree) of the UNION SetOperationStmt.

Definition at line 2240 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ provenance_function_in_group_by()

static bool provenance_function_in_group_by ( const constants_t constants,
Query *  q 
)
static

Check whether a provenance() call appears in the GROUP BY list.

When the user writes GROUP BY provenance(), ProvSQL must not add its own group-by wrapper (the query is already grouping on the token).

Parameters
constantsExtension OID cache.
qQuery to inspect.
Returns
True if any GROUP BY key contains a provenance() call.

Definition at line 1970 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ provenance_function_walker()

static bool provenance_function_walker ( Node *  node,
void *  data 
)
static

Tree walker that returns true if any provenance() call is found.

Used to detect whether a query explicitly calls provenance(), which triggers the substitution in replace_provenance_function_by_expression.

Parameters
nodeCurrent expression tree node.
dataPointer to constants_t (cast from void*).
Returns
true if a provenance() call is found anywhere in node.

Definition at line 1945 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ provenance_mutator()

static Node * provenance_mutator ( Node *  node,
provenance_mutator_context context 
)
static

Tree-mutator that replaces provenance() calls with the actual provenance expression.

Parameters
nodeCurrent expression tree node.
contextMutation context with the provenance expression and constants.
Returns
Possibly modified node.

Definition at line 1712 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ provsql_planner()

static PlannedStmt * provsql_planner ( Query *  q,
int  cursorOptions,
ParamListInfo  boundParams 
)
static

PostgreSQL planner hook — entry point for provenance rewriting.

Replaces (or chains after) the standard planner. For every CMD_SELECT that involves at least one provenance-bearing relation or an explicit provenance() call, rewrites the query via process_query before handing the result to the standard planner. Non-SELECT commands and queries without provenance are passed through unchanged.

Parameters
qThe query to plan.
cursorOptionsCursor options bitmask.
boundParamsPre-bound parameter values.
Returns
The planned statement.

Definition at line 2763 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ reduce_varattno_by_offset()

static void reduce_varattno_by_offset ( List *  targetList,
Index  varno,
int *  offset 
)
static

Adjust Var attribute numbers in targetList after columns are removed.

When provenance columns are stripped from a subquery's target list, the remaining columns shift left. This function applies a pre-computed offset array (one entry per original column) to correct all Var nodes that reference range-table entry varno.

Parameters
targetListTarget list of the outer query to patch.
varnoRange-table entry whose attribute numbers need fixing.
offsetCumulative shift per original attribute (negative or zero).

Definition at line 177 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ reduce_varattno_mutator()

static Node * reduce_varattno_mutator ( Node *  node,
reduce_varattno_mutator_context context 
)
static

Tree-mutator callback that adjusts Var attribute numbers.

Parameters
nodeCurrent expression tree node.
contextMutation context carrying varno and offset.
Returns
Possibly modified node.

Definition at line 148 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ remove_provenance_attribute_groupref()

static void remove_provenance_attribute_groupref ( Query *  q,
const Bitmapset *  removed_sortgrouprefs 
)
static

Remove sort/group references that belonged to removed provenance columns.

After remove_provenance_attributes_select strips provenance entries from the target list, any GROUP BY, ORDER BY, or DISTINCT clause that referenced them by tleSortGroupRef must be cleaned up.

Parameters
qQuery to modify in place.
removed_sortgrouprefsBitmapset of ressortgroupref values to remove.

Definition at line 1795 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ remove_provenance_attribute_setoperations()

static void remove_provenance_attribute_setoperations ( Query *  q,
bool *  removed 
)
static

Strip the provenance column's type info from a set-operation node.

When a provenance column is removed from a UNION/EXCEPT query's target list, the matching entries in the SetOperationStmt's colTypes, colTypmods, and colCollations lists must also be removed.

Parameters
qQuery containing setOperations.
removedBoolean array (from remove_provenance_attributes_select) indicating which columns were removed.

Definition at line 1832 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ remove_provenance_attributes_select()

static Bitmapset * remove_provenance_attributes_select ( const constants_t constants,
Query *  q,
bool **  removed 
)
static

Strip provenance UUID columns from q's SELECT list.

Scans the target list and removes every Var entry whose column name is provsql and whose type is UUID. The remaining entries have their resno values decremented to fill the gaps.

Parameters
constantsExtension OID cache.
qQuery to modify in place.
removedOut-param: allocated boolean array (length = original target list length) where true means the corresponding entry was removed. The caller must pfree this array when done.
Returns
Bitmapset of ressortgroupref values whose entries were removed (so the caller can clean up GROUP BY / ORDER BY).

Definition at line 420 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ replace_aggregations_by_provenance_aggregate()

static void replace_aggregations_by_provenance_aggregate ( const constants_t constants,
Query *  q,
List *  prov_atts,
semiring_operation  op 
)
static

Replace every Aggref in q with a provenance-aware aggregate.

Walks the query tree and substitutes each Aggref node with the result of make_aggregation_expression, which wraps the original aggregate in the semimodule machinery (provenance_semimod + array_agg + provenance_aggregate).

Parameters
constantsExtension OID cache.
qQuery to mutate in place.
prov_attsList of provenance Var nodes.
opSemiring operation for combining tokens across rows.

Definition at line 1634 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ replace_provenance_function_by_expression()

static void replace_provenance_function_by_expression ( const constants_t constants,
Query *  q,
Expr *  provsql 
)
static

Replace every explicit provenance() call in q with provsql.

Users can write provenance() in the target list or WHERE to refer to the provenance token of the current tuple. This mutator substitutes those calls with the actual computed provenance expression.

Parameters
constantsExtension OID cache.
qQuery to mutate in place.
provsqlProvenance expression to substitute.

Definition at line 1744 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ rewrite_agg_distinct()

static Query * rewrite_agg_distinct ( Query *  q,
const constants_t constants 
)
static

Rewrite every AGG(DISTINCT key) in q using independent subqueries.

For a single DISTINCT aggregate, produces a subquery:

SELECT AGG(key), gb... FROM (SELECT key, gb... FROM t GROUP BY key, gb...) GROUP BY gb...

For multiple DISTINCT aggregates with different keys, produces an JOIN of one such subquery per aggregate, joined on the GROUP BY columns. Non-DISTINCT aggregates are left untouched.

Parameters
qQuery to inspect and possibly rewrite.
constantsExtension OID cache.
Returns
Rewritten query, or NULL if no AGG(DISTINCT) was found.

Definition at line 1360 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ rewrite_non_all_into_external_group_by()

static Query * rewrite_non_all_into_external_group_by ( Query *  q)
static

Wrap a non-ALL set operation in an outer GROUP BY query.

UNION / EXCEPT (without ALL) would deduplicate tuples before ProvSQL can attach provenance tokens. To avoid this, the set operation is converted to UNION ALL / EXCEPT ALL and a new outer query is built that groups the results by all non-provenance columns, collecting tokens into an array for the provenance_plus evaluation.

After this rewrite the recursive call to process_query handles the now-ALL inner set operation normally.

Parameters
qQuery whose setOperations is non-ALL (modified to ALL in place).
Returns
New outer query that wraps q as a subquery RTE.

Definition at line 1873 of file provsql.c.

Here is the caller graph for this function:

◆ transform_distinct_into_group_by()

static void transform_distinct_into_group_by ( Query *  q)
static

Convert a SELECT DISTINCT into an equivalent GROUP BY.

ProvSQL cannot handle DISTINCT directly (it would collapse provenance tokens that should remain separate). This function moves every entry from q->distinctClause into q->groupClause (skipping any that are already there) and clears q->distinctClause.

Parameters
qQuery to modify in place.

Definition at line 1762 of file provsql.c.

Here is the caller graph for this function:

◆ transform_except_into_join()

static bool transform_except_into_join ( const constants_t constants,
Query *  q 
)
static

Rewrite an EXCEPT query into a LEFT JOIN with monus provenance.

EXCEPT cannot be handled directly because it deduplicates. This function transforms:

SELECT … FROM A EXCEPT SELECT … FROM B

into a LEFT JOIN of A and B on equality of all non-provenance columns, clears setOperations, and leaves the monus token combination to make_provenance_expression (which will see SR_MONUS).

Only simple (non-chained) EXCEPT is supported; chained EXCEPT raises an error.

Parameters
constantsExtension OID cache.
qQuery to rewrite in place.
Returns
Always true (errors out on unsupported cases).

Definition at line 2126 of file provsql.c.

Here is the call graph for this function:
Here is the caller graph for this function:

Variable Documentation

◆ PG_MODULE_MAGIC

PG_MODULE_MAGIC

Required PostgreSQL extension magic block.

Definition at line 56 of file provsql.c.

◆ prev_planner

planner_hook_type prev_planner = NULL
static

Previous planner hook (chained)

Definition at line 73 of file provsql.c.

◆ provsql_active

bool provsql_active = true

true while ProvSQL query rewriting is enabled

Definition at line 63 of file provsql.c.

◆ PROVSQL_COLUMN_NAME

const char* PROVSQL_COLUMN_NAME = "provsql"
static

Name of the provenance column added to tracked tables.

Definition at line 68 of file provsql.c.

◆ provsql_interrupted

bool provsql_interrupted = false

Global variable that becomes true if this particular backend received an interrupt signal.

Definition at line 62 of file provsql.c.

◆ provsql_update_provenance

bool provsql_update_provenance = false

true when provenance tracking for DML is enabled

Definition at line 65 of file provsql.c.

◆ provsql_verbose

int provsql_verbose = 100

Verbosity level; controlled by the provsql.verbose_level GUC.

Global variable that indicates the verbosity level set by the provsql.verbose_level run-time configuration parameter was set.

Definition at line 66 of file provsql.c.

◆ provsql_where_provenance

bool provsql_where_provenance = false

Global variable that indicates if where-provenance support has been activated through the provsql.where_provenance run-time configuration parameter.

Definition at line 64 of file provsql.c.