ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
provsql.c
Go to the documentation of this file.
1/**
2 * @file provsql.c
3 * @brief PostgreSQL planner hook for transparent provenance tracking.
4 *
5 * This file installs a @c planner_hook that intercepts every SELECT query
6 * and rewrites it to propagate a provenance circuit token (UUID) alongside
7 * normal result tuples. The rewriting proceeds in three conceptual phases:
8 *
9 * -# **Discovery** – scan the range table for relations/subqueries that
10 * already carry a @c provsql UUID column (@c get_provenance_attributes).
11 * -# **Expression building** – combine the discovered tokens according
12 * to the semiring operation that corresponds to the SQL operator in use
13 * (⊗ for joins, ⊕ for duplicate elimination, ⊖ for EXCEPT) and wrap
14 * aggregations (@c make_provenance_expression,
15 * @c make_aggregation_expression).
16 * -# **Splice** – append the resulting provenance expression to the target
17 * list and replace any explicit @c provenance() call in the query with
18 * the computed expression (@c add_to_select,
19 * @c replace_provenance_function_by_expression).
20 */
21#include "postgres.h"
22#include "fmgr.h"
23#include "miscadmin.h"
24#include "pg_config.h"
25#include "access/htup_details.h"
26#include "access/sysattr.h"
27#include "catalog/pg_aggregate.h"
28#include "catalog/pg_collation.h"
29#include "catalog/pg_operator.h"
30#include "catalog/pg_type.h"
31#include "nodes/makefuncs.h"
32#include "nodes/nodeFuncs.h"
33#include "nodes/print.h"
34#include "optimizer/planner.h"
35#include "parser/parse_oper.h"
36#include "parser/parsetree.h"
37#include "storage/lwlock.h"
38#include "storage/shmem.h"
39#include "utils/fmgroids.h"
40#include "utils/guc.h"
41#include "utils/lsyscache.h"
42#include "utils/ruleutils.h"
43#include "utils/syscache.h"
44#include <time.h>
45
46#include "provsql_mmap.h"
47#include "provsql_shmem.h"
48#include "provsql_utils.h"
49
50#if PG_VERSION_NUM < 100000
51#error "ProvSQL requires PostgreSQL version 10 or later"
52#endif
53
54#include "compatibility.h"
55
56PG_MODULE_MAGIC; ///< Required PostgreSQL extension magic block
57
58/* -------------------------------------------------------------------------
59 * Global state & forward declarations
60 * ------------------------------------------------------------------------- */
61
63bool provsql_active = true; ///< @c true while ProvSQL query rewriting is enabled
65bool provsql_update_provenance = false; ///< @c true when provenance tracking for DML is enabled
66int provsql_verbose = 100; ///< Verbosity level; controlled by the @c provsql.verbose_level GUC
67
68static const char *PROVSQL_COLUMN_NAME = "provsql"; ///< Name of the provenance column added to tracked tables
69
70extern void _PG_init(void);
71extern void _PG_fini(void);
72
73static planner_hook_type prev_planner = NULL; ///< Previous planner hook (chained)
74
75static Query *process_query(const constants_t *constants, Query *q,
76 bool **removed);
77
78/* -------------------------------------------------------------------------
79 * Provenance attribute construction
80 * ------------------------------------------------------------------------- */
81
82/**
83 * @brief Build a Var node that references the provenance column of a relation.
84 *
85 * Creates a @c Var pointing to attribute @p attid of range-table entry
86 * @p relid, typed as UUID, and marks the column as selected in the
87 * permission bitmap so PostgreSQL grants access correctly.
88 *
89 * @param constants Extension OID cache.
90 * @param q Owning query (needed to update permission info on PG 16+).
91 * @param r Range-table entry that owns the provenance column.
92 * @param relid 1-based index of @p r in @p q->rtable.
93 * @param attid 1-based attribute number of the provenance column in @p r.
94 * @return A freshly allocated @c Var node.
95 */
96static Var *make_provenance_attribute(const constants_t *constants, Query *q,
97 RangeTblEntry *r, Index relid,
98 AttrNumber attid) {
99 Var *v = makeNode(Var);
100
101 v->varno = relid;
102 v->varattno = attid;
103
104#if PG_VERSION_NUM >= 130000
105 v->varnosyn = relid;
106 v->varattnosyn = attid;
107#else
108 v->varnoold = relid;
109 v->varoattno = attid;
110#endif
111
112 v->vartype = constants->OID_TYPE_UUID;
113 v->varcollid = InvalidOid;
114 v->vartypmod = -1;
115 v->location = -1;
116
117#if PG_VERSION_NUM >= 160000
118 if (r->perminfoindex != 0) {
119 RTEPermissionInfo *rpi =
120 list_nth_node(RTEPermissionInfo, q->rteperminfos, r->perminfoindex - 1);
121 rpi->selectedCols = bms_add_member(
122 rpi->selectedCols, attid - FirstLowInvalidHeapAttributeNumber);
123 }
124#else
125 r->selectedCols = bms_add_member(r->selectedCols,
126 attid - FirstLowInvalidHeapAttributeNumber);
127#endif
128
129 return v;
130}
131
132/* -------------------------------------------------------------------------
133 * Helper mutators: attribute-number fixup and type patching
134 * ------------------------------------------------------------------------- */
135
136/** @brief Context for the @c reduce_varattno_mutator tree walker. */
138 Index varno; ///< Range-table entry whose attribute numbers are being adjusted
139 int *offset; ///< Per-attribute cumulative shift to apply
141
142/**
143 * @brief Tree-mutator callback that adjusts Var attribute numbers.
144 * @param node Current expression tree node.
145 * @param context Mutation context carrying varno and offset.
146 * @return Possibly modified node.
147 */
148static Node *reduce_varattno_mutator(Node *node,
150 if (node == NULL)
151 return NULL;
152
153 if (IsA(node, Var)) {
154 Var *v = (Var *)node;
155
156 if (v->varno == context->varno) {
157 v->varattno += context->offset[v->varattno - 1];
158 }
159 }
160
161 return expression_tree_mutator(node, reduce_varattno_mutator,
162 (void *)context);
163}
164
165/**
166 * @brief Adjust Var attribute numbers in @p targetList after columns are removed.
167 *
168 * When provenance columns are stripped from a subquery's target list, the
169 * remaining columns shift left. This function applies a pre-computed
170 * @p offset array (one entry per original column) to correct all @c Var
171 * nodes that reference range-table entry @p varno.
172 *
173 * @param targetList Target list of the outer query to patch.
174 * @param varno Range-table entry whose attribute numbers need fixing.
175 * @param offset Cumulative shift per original attribute (negative or zero).
176 */
177static void reduce_varattno_by_offset(List *targetList, Index varno,
178 int *offset) {
179 ListCell *lc;
180 reduce_varattno_mutator_context context = {varno, offset};
181
182 foreach (lc, targetList) {
183 Node *te = lfirst(lc);
184 expression_tree_mutator(te, reduce_varattno_mutator, (void *)&context);
185 }
186}
187
188/** @brief Context for the @c aggregation_type_mutator tree walker. */
190 Index varno; ///< Range-table entry index of the aggregate var
191 Index varattno; ///< Attribute number of the aggregate column
192 const constants_t *constants; ///< Extension OID cache
194
195/**
196 * @brief Tree-mutator that retyps a specific Var to @c agg_token.
197 * @param node Current expression tree node.
198 * @param context Mutation context with varno, varattno, and constants.
199 * @return Possibly modified node.
200 */
201static Node *
204 if (node == NULL)
205 return NULL;
206
207 if (IsA(node, Var)) {
208 Var *v = (Var *)node;
209
210 if (v->varno == context->varno && v->varattno == context->varattno) {
211 v->vartype = context->constants->OID_TYPE_AGG_TOKEN;
212 }
213 }
214 return expression_tree_mutator(node, aggregation_type_mutator,
215 (void *)context);
216}
217
218/**
219 * @brief Retypes aggregation-result Vars in @p q from UUID to @c agg_token.
220 *
221 * After a subquery that contains @c provenance_aggregate is processed, its
222 * result type is @c agg_token rather than plain UUID. This mutator walks
223 * the outer query and updates the type of every @c Var referencing that
224 * result column so that subsequent type-checking passes correctly.
225 *
226 * @param constants Extension OID cache.
227 * @param q Outer query to patch.
228 * @param rteid Range-table index of the subquery in @p q.
229 * @param targetList Target list of the subquery (to locate provenance_aggregate columns).
230 */
231static void fix_type_of_aggregation_result(const constants_t *constants,
232 Query *q, Index rteid,
233 List *targetList) {
234 ListCell *lc;
235 aggregation_type_mutator_context context = {0, 0, constants};
236 Index attno = 1;
237
238 foreach (lc, targetList) {
239 TargetEntry *te = (TargetEntry *)lfirst(lc);
240 if (IsA(te->expr, FuncExpr)) {
241 FuncExpr *f = (FuncExpr *)te->expr;
242
243 if (f->funcid == constants->OID_FUNCTION_PROVENANCE_AGGREGATE) {
244 context.varno = rteid;
245 context.varattno = attno;
246 query_tree_mutator(q, aggregation_type_mutator, &context,
247 QTW_DONT_COPY_QUERY | QTW_IGNORE_RC_SUBQUERIES);
248 }
249 }
250 ++attno;
251 }
252}
253
254/**
255 * @brief Collect all provenance Var nodes reachable from @p q's range table.
256 *
257 * Walks every RTE in @p q->rtable:
258 * - @c RTE_RELATION: looks for a column named @c provsql of type UUID.
259 * - @c RTE_SUBQUERY: recursively calls @c process_query and splices the
260 * resulting provenance column back into the parent's column list, also
261 * patching outer Var attribute numbers if inner columns were removed.
262 * - @c RTE_FUNCTION: handled when the function returns a single UUID column
263 * named @c provsql.
264 * - @c RTE_JOIN / @c RTE_VALUES / @c RTE_GROUP: handled passively (the
265 * underlying base-table RTEs supply the tokens).
266 *
267 * @param constants Extension OID cache.
268 * @param q Query whose range table is scanned (subquery RTEs are
269 * modified in place by the recursive call).
270 * @return List of @c Var nodes, one per provenance source; @c NIL if the
271 * query has no provenance-bearing relation.
272 */
273static List *get_provenance_attributes(const constants_t *constants, Query *q) {
274 List *prov_atts = NIL;
275
276 for(Index rteid = 1; rteid <= q->rtable->length; ++rteid) {
277 RangeTblEntry *r = list_nth_node(RangeTblEntry, q->rtable, rteid-1);
278
279 if (r->rtekind == RTE_RELATION) {
280 ListCell *lc;
281 AttrNumber attid = 1;
282
283 foreach (lc, r->eref->colnames) {
284 const char *v = strVal(lfirst(lc));
285
286 if (!strcmp(v, PROVSQL_COLUMN_NAME) &&
287 get_atttype(r->relid, attid) == constants->OID_TYPE_UUID) {
288 prov_atts =
289 lappend(prov_atts,
290 make_provenance_attribute(constants, q, r, rteid, attid));
291 }
292
293 ++attid;
294 }
295 } else if (r->rtekind == RTE_SUBQUERY) {
296 bool *inner_removed = NULL;
297 int old_targetlist_length =
298 r->subquery->targetList ? r->subquery->targetList->length : 0;
299 Query *new_subquery =
300 process_query(constants, r->subquery, &inner_removed);
301 if (new_subquery != NULL) {
302 int i = 0;
303 int *offset = (int *)palloc(old_targetlist_length * sizeof(int));
304 unsigned varattnoprovsql;
305 ListCell *cell, *prev;
306
307 r->subquery = new_subquery;
308
309 if (inner_removed != NULL) {
310 for (cell = list_head(r->eref->colnames), prev = NULL;
311 cell != NULL;) {
312 if (inner_removed[i]) {
313 r->eref->colnames =
314 my_list_delete_cell(r->eref->colnames, cell, prev);
315 if (prev)
316 cell = my_lnext(r->eref->colnames, prev);
317 else
318 cell = list_head(r->eref->colnames);
319 } else {
320 prev = cell;
321 cell = my_lnext(r->eref->colnames, cell);
322 }
323 ++i;
324 }
325 for (i = 0; i < old_targetlist_length; ++i) {
326 offset[i] =
327 (i == 0 ? 0 : offset[i - 1]) - (inner_removed[i] ? 1 : 0);
328 }
329
330 reduce_varattno_by_offset(q->targetList, rteid, offset);
331 }
332
333 varattnoprovsql = 0;
334 for (cell = list_head(new_subquery->targetList); cell != NULL;
335 cell = my_lnext(new_subquery->targetList, cell)) {
336 TargetEntry *te = (TargetEntry *)lfirst(cell);
337 ++varattnoprovsql;
338 if (te->resname && !strcmp(te->resname, PROVSQL_COLUMN_NAME))
339 break;
340 }
341
342 if (cell != NULL) {
343 r->eref->colnames = list_insert_nth(r->eref->colnames, varattnoprovsql-1,
344 makeString(pstrdup(PROVSQL_COLUMN_NAME)));
345 prov_atts =
346 lappend(prov_atts, make_provenance_attribute(
347 constants, q, r, rteid, varattnoprovsql));
348 }
349 fix_type_of_aggregation_result(constants, q, rteid,
350 r->subquery->targetList);
351 }
352 } else if (r->rtekind == RTE_JOIN) {
353 if (r->jointype == JOIN_INNER || r->jointype == JOIN_LEFT ||
354 r->jointype == JOIN_FULL || r->jointype == JOIN_RIGHT) {
355 // Nothing to do, there will also be RTE entries for the tables
356 // that are part of the join, from which we will extract the
357 // provenance information
358 } else { // Semijoin (should be feasible, but check whether the second
359 // provenance information is available) Antijoin (feasible with
360 // negation)
361 provsql_error("JOIN type not supported by provsql");
362 }
363 } else if (r->rtekind == RTE_FUNCTION) {
364 ListCell *lc;
365 AttrNumber attid = 1;
366
367 foreach (lc, r->functions) {
368 RangeTblFunction *func = (RangeTblFunction *)lfirst(lc);
369
370 if (func->funccolcount == 1) {
371 FuncExpr *expr = (FuncExpr *)func->funcexpr;
372 if (expr->funcresulttype == constants->OID_TYPE_UUID &&
373 !strcmp(get_rte_attribute_name(r, attid), PROVSQL_COLUMN_NAME)) {
374 prov_atts = lappend(prov_atts, make_provenance_attribute(
375 constants, q, r, rteid, attid));
376 }
377 } else {
378 provsql_error("FROM function with multiple output "
379 "attributes not supported by provsql");
380 }
381
382 attid += func->funccolcount;
383 }
384 } else if (r->rtekind == RTE_VALUES) {
385 // Nothing to do, no provenance attribute in literal values
386#if PG_VERSION_NUM >= 180000
387 } else if (r->rtekind == RTE_GROUP) {
388 // Introduced in PostgreSQL 18, we already handle group by from
389 // groupClause
390#endif
391 } else {
392 provsql_error("FROM clause unsupported by provsql");
393 }
394 }
395
396 return prov_atts;
397}
398
399/* -------------------------------------------------------------------------
400 * Target-list surgery
401 * ------------------------------------------------------------------------- */
402
403/**
404 * @brief Strip provenance UUID columns from @p q's SELECT list.
405 *
406 * Scans the target list and removes every @c Var entry whose column name is
407 * @c provsql and whose type is UUID. The remaining entries have their
408 * @c resno values decremented to fill the gaps.
409 *
410 * @param constants Extension OID cache.
411 * @param q Query to modify in place.
412 * @param removed Out-param: allocated boolean array (length =
413 * original target list length) where @c true means the
414 * corresponding entry was removed. The caller must
415 * @c pfree this array when done.
416 * @return Bitmapset of @c ressortgroupref values whose entries were
417 * removed (so the caller can clean up GROUP BY / ORDER BY).
418 */
419static Bitmapset *
421 bool **removed) {
422 int nbRemoved = 0;
423 int i = 0;
424 Bitmapset *ressortgrouprefs = NULL;
425 ListCell *cell, *prev;
426 *removed = (bool *)palloc(q->targetList->length * sizeof(bool));
427
428 for (cell = list_head(q->targetList), prev = NULL; cell != NULL;) {
429 TargetEntry *rt = (TargetEntry *)lfirst(cell);
430 (*removed)[i] = false;
431
432 if (rt->expr->type == T_Var) {
433 Var *v = (Var *)rt->expr;
434
435 if (v->vartype == constants->OID_TYPE_UUID) {
436 const char *colname;
437
438 if (rt->resname)
439 colname = rt->resname;
440 else {
441 /* This case occurs, for example, when grouping by a column
442 * that is projected out */
443 RangeTblEntry *r = (RangeTblEntry *)list_nth(q->rtable, v->varno - 1);
444 colname = strVal(list_nth(r->eref->colnames, v->varattno - 1));
445 }
446
447 if (!strcmp(colname, PROVSQL_COLUMN_NAME)) {
448 q->targetList = my_list_delete_cell(q->targetList, cell, prev);
449
450 (*removed)[i] = true;
451 ++nbRemoved;
452
453 if (rt->ressortgroupref > 0)
454 ressortgrouprefs =
455 bms_add_member(ressortgrouprefs, rt->ressortgroupref);
456 }
457 }
458 }
459
460 if ((*removed)[i]) {
461 if (prev) {
462 cell = my_lnext(q->targetList, prev);
463 } else {
464 cell = list_head(q->targetList);
465 }
466 } else {
467 rt->resno -= nbRemoved;
468 prev = cell;
469 cell = my_lnext(q->targetList, cell);
470 }
471
472 ++i;
473 }
474
475 return ressortgrouprefs;
476}
477
478/**
479 * @brief Semiring operation used to combine provenance tokens.
480 *
481 * @c SR_TIMES corresponds to the multiplicative operation (joins, Cartesian
482 * products), @c SR_PLUS to the additive operation (duplicate elimination), and
483 * @c SR_MONUS to the monus / set-difference operation (EXCEPT).
484 */
485typedef enum {
486 SR_PLUS, ///< Semiring addition (UNION, SELECT DISTINCT)
487 SR_MONUS, ///< Semiring monus / set difference (EXCEPT)
488 SR_TIMES ///< Semiring multiplication (JOIN, Cartesian product)
490
491/* -------------------------------------------------------------------------
492 * Semiring expression builders
493 * ------------------------------------------------------------------------- */
494
495/**
496 * @brief Wrap @p toExpr in a @c provenance_eq gate if @p fromOpExpr is an
497 * equality between two tracked columns.
498 *
499 * Used for where-provenance: each equijoin condition (and some WHERE
500 * equalities) introduces an @c eq gate that records which attribute positions
501 * were compared. Because this function is also called for WHERE predicates,
502 * it applies extra guards and silently returns @p toExpr unchanged when the
503 * expression does not match the expected shape (both sides must be @c Var
504 * nodes, possibly wrapped in a @c RelabelType).
505 *
506 * @param constants Extension OID cache.
507 * @param fromOpExpr The equality @c OpExpr to inspect.
508 * @param toExpr Existing provenance expression to wrap.
509 * @param columns Per-RTE column-numbering array.
510 * @return @p toExpr wrapped in @c provenance_eq(toExpr, col1, col2), or
511 * @p toExpr unchanged if the shape is unsupported.
512 */
513static Expr *add_eq_from_OpExpr_to_Expr(const constants_t *constants,
514 OpExpr *fromOpExpr, Expr *toExpr,
515 int **columns) {
516 Datum first_arg;
517 Datum second_arg;
518 FuncExpr *fc;
519 Const *c1;
520 Const *c2;
521 Var *v1;
522 Var *v2;
523
524 if (my_lnext(fromOpExpr->args, list_head(fromOpExpr->args))) {
525 /* Sometimes Var is nested within a RelabelType */
526 if (IsA(linitial(fromOpExpr->args), Var)) {
527 v1 = linitial(fromOpExpr->args);
528 } else if (IsA(linitial(fromOpExpr->args), RelabelType)) {
529 /* In the WHERE case it can be a Const */
530 RelabelType *rt1 = linitial(fromOpExpr->args);
531 if (IsA(rt1->arg, Var)) { /* Can be Param in the WHERE case */
532 v1 = (Var *)rt1->arg;
533 } else
534 return toExpr;
535 } else
536 return toExpr;
537 first_arg = Int16GetDatum(columns[v1->varno - 1][v1->varattno - 1]);
538
539 if (IsA(lsecond(fromOpExpr->args), Var)) {
540 v2 = lsecond(fromOpExpr->args);
541 } else if (IsA(lsecond(fromOpExpr->args), RelabelType)) {
542 /* In the WHERE case it can be a Const */
543 RelabelType *rt2 = lsecond(fromOpExpr->args);
544 if (IsA(rt2->arg, Var)) { /* Can be Param in the WHERE case */
545 v2 = (Var *)rt2->arg;
546 } else
547 return toExpr;
548 } else
549 return toExpr;
550 second_arg = Int16GetDatum(columns[v2->varno - 1][v2->varattno - 1]);
551
552 fc = makeNode(FuncExpr);
553 fc->funcid = constants->OID_FUNCTION_PROVENANCE_EQ;
554 fc->funcvariadic = false;
555 fc->funcresulttype = constants->OID_TYPE_UUID;
556 fc->location = -1;
557
558 c1 = makeConst(constants->OID_TYPE_INT, -1, InvalidOid, sizeof(int16),
559 first_arg, false, true);
560
561 c2 = makeConst(constants->OID_TYPE_INT, -1, InvalidOid, sizeof(int16),
562 second_arg, false, true);
563
564 fc->args = list_make3(toExpr, c1, c2);
565 return (Expr *)fc;
566 }
567 return toExpr;
568}
569
570/**
571 * @brief Walk a join-condition or WHERE quals node and add @c eq gates for
572 * every equality it contains.
573 *
574 * Dispatches to @c add_eq_from_OpExpr_to_Expr for simple @c OpExpr nodes
575 * and iterates over the arguments of an AND @c BoolExpr. OR/NOT inside a
576 * join ON clause are rejected with an error.
577 *
578 * @param constants Extension OID cache.
579 * @param quals Root of the quals tree (@c OpExpr or @c BoolExpr), or
580 * @c NULL (in which case @p result is returned unchanged).
581 * @param result Provenance expression to wrap.
582 * @param columns Per-RTE column-numbering array.
583 * @return Updated provenance expression with zero or more @c eq gates added.
584 */
585static Expr *add_eq_from_Quals_to_Expr(const constants_t *constants,
586 Node *quals, Expr *result,
587 int **columns) {
588 OpExpr *oe;
589
590 if (!quals)
591 return result;
592
593 if (IsA(quals, OpExpr)) {
594 oe = (OpExpr *)quals;
595 result = add_eq_from_OpExpr_to_Expr(constants, oe, result, columns);
596 } /* Sometimes OpExpr is nested within a BoolExpr */
597 else if (IsA(quals, BoolExpr)) {
598 BoolExpr *be = (BoolExpr *)quals;
599 /* In some cases, there can be an OR or a NOT specified with ON clause */
600 if (be->boolop == OR_EXPR || be->boolop == NOT_EXPR) {
601 provsql_error("Boolean operators OR and NOT in a join...on "
602 "clause are not supported by provsql");
603 } else {
604 ListCell *lc2;
605 foreach (lc2, be->args) {
606 if (IsA(lfirst(lc2), OpExpr)) {
607 oe = (OpExpr *)lfirst(lc2);
608 result = add_eq_from_OpExpr_to_Expr(constants, oe, result, columns);
609 }
610 }
611 }
612 } else { /* Handle other cases */
613 }
614 return result;
615}
616
617/**
618 * @brief Build the provenance expression for a single aggregate function.
619 *
620 * For @c SR_PLUS (union context) returns the first provenance attribute
621 * directly. For @c SR_TIMES or @c SR_MONUS, constructs:
622 * @code
623 * provenance_aggregate(fn_oid, result_type,
624 * original_aggref,
625 * array_agg(provenance_semimod(arg, times_or_monus_token)))
626 * @endcode
627 * COUNT(*) and COUNT(expr) are remapped to SUM so that the semimodule
628 * semantics (scalar × token → token) work correctly.
629 *
630 * @param constants Extension OID cache.
631 * @param agg_ref The original @c Aggref node from the query.
632 * @param prov_atts List of provenance @c Var nodes.
633 * @param op Semiring operation (determines how tokens are combined).
634 * @return Provenance expression of type @c agg_token.
635 */
636static Expr *make_aggregation_expression(const constants_t *constants,
637 Aggref *agg_ref, List *prov_atts,
639 Expr *result;
640 FuncExpr *expr, *expr_s;
641 Aggref *agg = makeNode(Aggref);
642 FuncExpr *plus = makeNode(FuncExpr);
643 TargetEntry *te_inner = makeNode(TargetEntry);
644 Const *fn = makeNode(Const);
645 Const *typ = makeNode(Const);
646
647 if (op == SR_PLUS) {
648 result = linitial(prov_atts);
649 } else {
650 Oid aggregation_function = agg_ref->aggfnoid;
651
652 if (my_lnext(prov_atts, list_head(prov_atts)) == NULL)
653 expr = linitial(prov_atts);
654 else {
655 expr = makeNode(FuncExpr);
656 if (op == SR_TIMES) {
657 ArrayExpr *array = makeNode(ArrayExpr);
658
659 expr->funcid = constants->OID_FUNCTION_PROVENANCE_TIMES;
660 expr->funcvariadic = true;
661
662 array->array_typeid = constants->OID_TYPE_UUID_ARRAY;
663 array->element_typeid = constants->OID_TYPE_UUID;
664 array->elements = prov_atts;
665 array->location = -1;
666
667 expr->args = list_make1(array);
668 } else { // SR_MONUS
669 expr->funcid = constants->OID_FUNCTION_PROVENANCE_MONUS;
670 expr->args = prov_atts;
671 }
672 expr->funcresulttype = constants->OID_TYPE_UUID;
673 expr->location = -1;
674 }
675
676 // semimodule function
677 expr_s = makeNode(FuncExpr);
678 expr_s->funcid = constants->OID_FUNCTION_PROVENANCE_SEMIMOD;
679 expr_s->funcresulttype = constants->OID_TYPE_UUID;
680
681 // check the particular case of count
682 if (aggregation_function == F_COUNT_ ||
683 aggregation_function == F_COUNT_ANY) // count(*) or count(arg)
684 {
685 Const *one = makeConst(constants->OID_TYPE_INT, -1, InvalidOid,
686 sizeof(int32), Int32GetDatum(1), false, true);
687 expr_s->args = list_make2(one, expr);
688 aggregation_function = F_SUM_INT4;
689 } else {
690 expr_s->args =
691 list_make2(((TargetEntry *)linitial(agg_ref->args))->expr, expr);
692 }
693
694 expr_s->location = -1;
695
696 // aggregating all semirings in an array
697 te_inner->resno = 1;
698 te_inner->expr = (Expr *)expr_s;
699 agg->aggfnoid = constants->OID_FUNCTION_ARRAY_AGG;
700 agg->aggtype = constants->OID_TYPE_UUID_ARRAY;
701 agg->args = list_make1(te_inner);
702 agg->aggkind = AGGKIND_NORMAL;
703 agg->location = -1;
704#if PG_VERSION_NUM >= 140000
705 agg->aggno = agg->aggtransno = -1;
706#endif
707
708 agg->aggargtypes = list_make1_oid(constants->OID_TYPE_UUID);
709
710 // final aggregation function
711 plus->funcid = constants->OID_FUNCTION_PROVENANCE_AGGREGATE;
712
713 fn = makeConst(constants->OID_TYPE_INT, -1, InvalidOid, sizeof(int32),
714 Int32GetDatum(aggregation_function), false, true);
715
716 typ = makeConst(constants->OID_TYPE_INT, -1, InvalidOid, sizeof(int32),
717 Int32GetDatum(agg_ref->aggtype), false, true);
718
719 plus->funcresulttype = constants->OID_TYPE_AGG_TOKEN;
720 plus->args = list_make4(fn, typ, agg_ref, agg);
721 plus->location = -1;
722
723 result = (Expr *)plus;
724 }
725
726 return result;
727}
728
729/* -------------------------------------------------------------------------
730 * HAVING / WHERE-on-aggregates rewriting
731 * ------------------------------------------------------------------------- */
732
733/* Forward declaration needed because having_BoolExpr_to_provenance and
734 * having_Expr_to_provenance_cmp are mutually recursive. */
735static FuncExpr *having_Expr_to_provenance_cmp(Expr *expr, const constants_t *constants, bool negated);
736
737/**
738 * @brief Convert a comparison @c OpExpr on aggregate results into a
739 * @c provenance_cmp gate expression.
740 *
741 * Each argument of @p opExpr must be one of:
742 * - A @c Var of type @c agg_token (or a @c FuncExpr implicit-cast wrapper
743 * around one) → cast to UUID via @c agg_token_to_uuid.
744 * - A scalar @c Const → wrapped in @c provenance_semimod(const, gate_one()).
745 *
746 * If @p negated is true the operator OID is replaced by its negator so that
747 * NOT(a < b) becomes a >= b at the provenance level.
748 *
749 * @param opExpr The comparison expression from the HAVING clause.
750 * @param constants Extension OID cache.
751 * @param negated Whether the expression appears under a NOT.
752 * @return A @c provenance_cmp(lhs, op_oid, rhs) @c FuncExpr.
753 */
754static FuncExpr *having_OpExpr_to_provenance_cmp(OpExpr *opExpr, const constants_t *constants, bool negated) {
755 FuncExpr *cmpExpr;
756 Node *arguments[2];
757 Const *oid;
758 Oid opno = opExpr->opno;
759
760 for (unsigned i = 0; i < 2; ++i) {
761 Node *node = (Node *)lfirst(list_nth_cell(opExpr->args, i));
762
763 if (IsA(node, FuncExpr)) {
764 FuncExpr *fe = (FuncExpr *)node;
765 if (fe->funcformat == COERCE_IMPLICIT_CAST ||
766 fe->funcformat == COERCE_EXPLICIT_CAST) {
767 if (fe->args->length == 1)
768 node = lfirst(list_head(fe->args));
769 }
770 }
771
772 if (IsA(node, FuncExpr)) {
773 FuncExpr *fe = (FuncExpr *)node;
774 if (fe->funcid == constants->OID_FUNCTION_PROVENANCE_AGGREGATE) {
775 // We need to add an explicit cast to UUID
776 FuncExpr *castToUUID = makeNode(FuncExpr);
777
778 castToUUID->funcid = constants->OID_FUNCTION_AGG_TOKEN_UUID;
779 castToUUID->funcresulttype = constants->OID_TYPE_UUID;
780 castToUUID->args = list_make1(fe);
781 castToUUID->location = -1;
782
783 arguments[i] = (Node *)castToUUID;
784 } else {
785 provsql_error("cannot handle complex HAVING expressions");
786 }
787 } else if (IsA(node, Var)) {
788 Var *v = (Var *)node;
789
790 if (v->vartype == constants->OID_TYPE_AGG_TOKEN) {
791 // We need to add an explicit cast to UUID
792 FuncExpr *castToUUID = makeNode(FuncExpr);
793
794 castToUUID->funcid = constants->OID_FUNCTION_AGG_TOKEN_UUID;
795 castToUUID->funcresulttype = constants->OID_TYPE_UUID;
796 castToUUID->args = list_make1(v);
797 castToUUID->location = -1;
798
799 arguments[i] = (Node *)castToUUID;
800 } else {
801 provsql_error("cannot handle complex HAVING expressions");
802 }
803 } else if (IsA(node, Const)) {
804 Const *literal = (Const *)node;
805 FuncExpr *oneExpr, *semimodExpr;
806
807 // gate_one() expression
808 oneExpr = makeNode(FuncExpr);
809 oneExpr->funcid = constants->OID_FUNCTION_GATE_ONE;
810 oneExpr->funcresulttype = constants->OID_TYPE_UUID;
811 oneExpr->args = NIL;
812 oneExpr->location = -1;
813
814 // provenance_semimod(literal, gate_one())
815 semimodExpr = makeNode(FuncExpr);
816 semimodExpr->funcid = constants->OID_FUNCTION_PROVENANCE_SEMIMOD;
817 semimodExpr->funcresulttype = constants->OID_TYPE_UUID;
818 semimodExpr->args = list_make2((Expr *)literal, (Expr *)oneExpr);
819 semimodExpr->location = -1;
820
821 arguments[i] = (Node *)semimodExpr;
822 } else {
823 provsql_error("cannot handle complex HAVING expressions");
824 }
825 }
826
827 if (negated) {
828 opno = get_negator(opno);
829 if (!opno)
830 provsql_error("Missing negator");
831 }
832
833 oid = makeConst(constants->OID_TYPE_INT, -1, InvalidOid, sizeof(int32),
834 Int32GetDatum(opno), false, true);
835
836 cmpExpr = makeNode(FuncExpr);
837 cmpExpr->funcid = constants->OID_FUNCTION_PROVENANCE_CMP;
838 cmpExpr->funcresulttype = constants->OID_TYPE_UUID;
839 cmpExpr->args = list_make3(arguments[0], oid, arguments[1]);
840 cmpExpr->location = opExpr->location;
841
842 return cmpExpr;
843}
844
845/**
846 * @brief Convert a Boolean combination of HAVING comparisons into a
847 * @c provenance_times / @c provenance_plus gate expression.
848 *
849 * Applies De Morgan duality when @p negated is true: AND becomes
850 * @c provenance_plus (OR) and vice-versa. NOT is handled by flipping
851 * @p negated and delegating to @c having_Expr_to_provenance_cmp.
852 *
853 * @param be Boolean expression from the HAVING clause.
854 * @param constants Extension OID cache.
855 * @param negated Whether the expression appears under a NOT.
856 * @return A @c FuncExpr combining the sub-expressions.
857 */
858static FuncExpr *having_BoolExpr_to_provenance(BoolExpr *be, const constants_t *constants, bool negated) {
859 if(be->boolop == NOT_EXPR) {
860 Expr *expr = (Expr *) lfirst(list_head(be->args));
861 return having_Expr_to_provenance_cmp(expr, constants, !negated);
862 } else {
863 FuncExpr *result;
864 List *l = NULL;
865 ListCell *lc;
866 ArrayExpr *array = makeNode(ArrayExpr);
867
868 array->array_typeid = constants->OID_TYPE_UUID_ARRAY;
869 array->element_typeid = constants->OID_TYPE_UUID;
870 array->location = -1;
871
872 result = makeNode(FuncExpr);
873 result->funcresulttype = constants->OID_TYPE_UUID;
874 result->funcvariadic = true;
875 result->location = be->location;
876 result->args = list_make1(array);
877
878 if ((be->boolop == AND_EXPR && !negated) || (be->boolop == OR_EXPR && negated))
879 result->funcid = constants->OID_FUNCTION_PROVENANCE_TIMES;
880 else if ((be->boolop == AND_EXPR && negated) || (be->boolop == OR_EXPR && !negated))
881 result->funcid = constants->OID_FUNCTION_PROVENANCE_PLUS;
882 else
883 provsql_error("Unknown Boolean operator");
884
885 foreach (lc, be->args) {
886 Expr *expr = (Expr *)lfirst(lc);
887 FuncExpr *arg = having_Expr_to_provenance_cmp(expr, constants, negated);
888 l = lappend(l, arg);
889 }
890
891 array->elements = l;
892
893 return result;
894 }
895}
896
897/**
898 * @brief Dispatch a HAVING sub-expression to the appropriate converter.
899 *
900 * Entry point for the mutual recursion between
901 * @c having_BoolExpr_to_provenance and @c having_OpExpr_to_provenance_cmp.
902 *
903 * @param expr Sub-expression to convert (@c BoolExpr or @c OpExpr).
904 * @param constants Extension OID cache.
905 * @param negated Whether the expression appears under a NOT.
906 * @return Converted @c FuncExpr.
907 */
908static FuncExpr *having_Expr_to_provenance_cmp(Expr *expr, const constants_t *constants, bool negated)
909{
910 if (IsA(expr, BoolExpr))
911 return having_BoolExpr_to_provenance((BoolExpr *)expr, constants, negated);
912 else if (IsA(expr, OpExpr))
913 return having_OpExpr_to_provenance_cmp((OpExpr *)expr, constants, negated);
914 else
915 provsql_error("Unknown structure within Boolean expression");
916}
917
918/**
919 * @brief Build the combined provenance expression to be added to the SELECT list.
920 *
921 * Combines the tokens in @p prov_atts according to @p op:
922 * - @c SR_PLUS → use the first token directly (union branch; the outer
923 * @c array_agg / @c provenance_plus is added later if needed).
924 * - @c SR_TIMES → wrap all tokens in @c provenance_times(...).
925 * - @c SR_MONUS → wrap all tokens in @c provenance_monus(...).
926 *
927 * When @p aggregation or @p group_by_rewrite is true, wraps the result in
928 * @c array_agg + @c provenance_plus to collapse groups. A @c provenance_delta
929 * gate is added for plain aggregations without a HAVING clause.
930 *
931 * If a HAVING clause is present it is removed from @p q->havingQual and
932 * converted into a provenance expression via @c having_Expr_to_provenance_cmp.
933 *
934 * If @c provsql_where_provenance is enabled, equality gates (@c provenance_eq)
935 * are prepended for join conditions and WHERE equalities, and a projection gate
936 * is appended if the output columns form a proper subset of the input columns.
937 *
938 * @param constants Extension OID cache.
939 * @param q Query being rewritten (HAVING is cleared if present).
940 * @param prov_atts List of provenance @c Var nodes.
941 * @param aggregation True if the query contains aggregate functions.
942 * @param group_by_rewrite True if a GROUP BY requires the plus-aggregate wrapper.
943 * @param op Semiring operation to use for combining tokens.
944 * @param columns Per-RTE column-numbering array (for where-provenance).
945 * @param nbcols Total number of non-provenance output columns.
946 * @return The provenance @c Expr to be appended to the target list.
947 */
948static Expr *make_provenance_expression(const constants_t *constants, Query *q,
949 List *prov_atts, bool aggregation,
950 bool group_by_rewrite,
951 semiring_operation op, int **columns,
952 int nbcols) {
953 Expr *result;
954 ListCell *lc_v;
955
956 if (op == SR_PLUS) {
957 result = linitial(prov_atts);
958 } else {
959 if (my_lnext(prov_atts, list_head(prov_atts)) == NULL) {
960 result = linitial(prov_atts);
961 } else {
962 FuncExpr *expr = makeNode(FuncExpr);
963 if (op == SR_TIMES) {
964 ArrayExpr *array = makeNode(ArrayExpr);
965
966 expr->funcid = constants->OID_FUNCTION_PROVENANCE_TIMES;
967 expr->funcvariadic = true;
968
969 array->array_typeid = constants->OID_TYPE_UUID_ARRAY;
970 array->element_typeid = constants->OID_TYPE_UUID;
971 array->elements = prov_atts;
972 array->location = -1;
973
974 expr->args = list_make1(array);
975 } else { // SR_MONUS
976 expr->funcid = constants->OID_FUNCTION_PROVENANCE_MONUS;
977 expr->args = prov_atts;
978 }
979 expr->funcresulttype = constants->OID_TYPE_UUID;
980 expr->location = -1;
981
982 result = (Expr *)expr;
983 }
984
985 if (group_by_rewrite || aggregation) {
986 Aggref *agg = makeNode(Aggref);
987 FuncExpr *plus = makeNode(FuncExpr);
988 TargetEntry *te_inner = makeNode(TargetEntry);
989
990 q->hasAggs = true;
991
992 te_inner->resno = 1;
993 te_inner->expr = (Expr *)result;
994
995 agg->aggfnoid = constants->OID_FUNCTION_ARRAY_AGG;
996 agg->aggtype = constants->OID_TYPE_UUID_ARRAY;
997 agg->args = list_make1(te_inner);
998 agg->aggkind = AGGKIND_NORMAL;
999 agg->location = -1;
1000#if PG_VERSION_NUM >= 140000
1001 agg->aggno = agg->aggtransno = -1;
1002#endif
1003
1004 agg->aggargtypes = list_make1_oid(constants->OID_TYPE_UUID);
1005
1006 plus->funcid = constants->OID_FUNCTION_PROVENANCE_PLUS;
1007 plus->args = list_make1(agg);
1008 plus->funcresulttype = constants->OID_TYPE_UUID;
1009 plus->location = -1;
1010
1011 result = (Expr *)plus;
1012 }
1013
1014 if (aggregation && !q->havingQual) {
1015 FuncExpr *deltaExpr = makeNode(FuncExpr);
1016
1017 // adding the delta gate to the provenance circuit
1018 deltaExpr->funcid = constants->OID_FUNCTION_PROVENANCE_DELTA;
1019 deltaExpr->args = list_make1(result);
1020 deltaExpr->funcresulttype = constants->OID_TYPE_UUID;
1021 deltaExpr->location = -1;
1022
1023 result = (Expr *)deltaExpr;
1024 }
1025
1026 if (q->havingQual) {
1027 result = (Expr*) having_Expr_to_provenance_cmp((Expr*)q->havingQual, constants, false);
1028 q->havingQual = NULL;
1029 }
1030 }
1031
1032 /* Part to handle eq gates used for where-provenance.
1033 * Placed before projection gates because they need
1034 * to be deeper in the provenance tree. */
1035 if (provsql_where_provenance && q->jointree) {
1036 ListCell *lc;
1037 foreach (lc, q->jointree->fromlist) {
1038 if (IsA(lfirst(lc), JoinExpr)) {
1039 JoinExpr *je = (JoinExpr *)lfirst(lc);
1040 /* Study equalities coming from From clause */
1041 result =
1042 add_eq_from_Quals_to_Expr(constants, je->quals, result, columns);
1043 }
1044 }
1045 /* Study equalities coming from WHERE clause */
1046 result = add_eq_from_Quals_to_Expr(constants, q->jointree->quals, result,
1047 columns);
1048 }
1049
1051 ArrayExpr *array = makeNode(ArrayExpr);
1052 FuncExpr *fe = makeNode(FuncExpr);
1053 bool projection = false;
1054 int nb_column = 0;
1055
1056 fe->funcid = constants->OID_FUNCTION_PROVENANCE_PROJECT;
1057 fe->funcvariadic = true;
1058 fe->funcresulttype = constants->OID_TYPE_UUID;
1059 fe->location = -1;
1060
1061 array->array_typeid = constants->OID_TYPE_INT_ARRAY;
1062 array->element_typeid = constants->OID_TYPE_INT;
1063 array->elements = NIL;
1064 array->location = -1;
1065
1066 foreach (lc_v, q->targetList) {
1067 TargetEntry *te_v = (TargetEntry *)lfirst(lc_v);
1068 if (IsA(te_v->expr, Var)) {
1069 Var *vte_v = (Var *)te_v->expr;
1070 RangeTblEntry *rte_v =
1071 (RangeTblEntry *)lfirst(list_nth_cell(q->rtable, vte_v->varno - 1));
1072 int value_v;
1073#if PG_VERSION_NUM >= 180000
1074 if (rte_v->rtekind == RTE_GROUP) {
1075 Expr *ge = lfirst(list_nth_cell(rte_v->groupexprs, vte_v->varattno - 1));
1076 if(IsA(ge, Var)) {
1077 Var *v = (Var *) ge;
1078 value_v = columns[v->varno - 1][v->varattno - 1];
1079 } else {
1080 Const *ce = makeConst(constants->OID_TYPE_INT, -1, InvalidOid,
1081 sizeof(int32), Int32GetDatum(0), false, true);
1082
1083 array->elements = lappend(array->elements, ce);
1084 value_v = 0;
1085 }
1086 } else
1087#endif
1088 if (rte_v->rtekind != RTE_JOIN) { // Normal RTE
1089 value_v = columns[vte_v->varno - 1][vte_v->varattno - 1];
1090 } else { // Join RTE
1091 Var *jav_v = (Var *)lfirst(
1092 list_nth_cell(rte_v->joinaliasvars, vte_v->varattno - 1));
1093 value_v = columns[jav_v->varno - 1][jav_v->varattno - 1];
1094 }
1095
1096 /* If this is a valid column */
1097 if (value_v > 0) {
1098 Const *ce =
1099 makeConst(constants->OID_TYPE_INT, -1, InvalidOid, sizeof(int32),
1100 Int32GetDatum(value_v), false, true);
1101
1102 array->elements = lappend(array->elements, ce);
1103
1104 if (value_v != ++nb_column)
1105 projection = true;
1106 } else {
1107 if (value_v != -1)
1108 projection = true;
1109 }
1110 } else { // we have a function in target
1111 Const *ce = makeConst(constants->OID_TYPE_INT, -1, InvalidOid,
1112 sizeof(int32), Int32GetDatum(0), false, true);
1113
1114 array->elements = lappend(array->elements, ce);
1115 projection = true;
1116 }
1117 }
1118
1119 if (nb_column != nbcols)
1120 projection = true;
1121
1122 if (projection) {
1123 fe->args = list_make2(result, array);
1124 result = (Expr *)fe;
1125 } else {
1126 pfree(array);
1127 pfree(fe);
1128 }
1129 }
1130
1131 return result;
1132}
1133
1134/* -------------------------------------------------------------------------
1135 * Set-operation & DISTINCT rewriting
1136 * ------------------------------------------------------------------------- */
1137
1138#if PG_VERSION_NUM >= 180000
1139typedef struct {
1140 Index group_rtindex;
1141 List *groupexprs;
1142} resolve_group_rte_ctx;
1143
1144static Node *
1145resolve_group_rte_vars_mutator(Node *node, resolve_group_rte_ctx *ctx) {
1146 if (node == NULL)
1147 return NULL;
1148 if (IsA(node, Var)) {
1149 Var *v = (Var *)node;
1150 if (v->varno == ctx->group_rtindex) {
1151 Node *resolved = copyObject(list_nth(ctx->groupexprs, v->varattno - 1));
1152 /* Clear varnullingrels: the group-step nulling bits reference the
1153 * group_rtindex RTE which does not exist in the fresh inner query.
1154 * Leaving them set causes the planner to access simple_rel_array at
1155 * group_rtindex (which has no RelOptInfo), triggering
1156 * "unrecognized RTE kind: 9". */
1157 if (IsA(resolved, Var))
1158 ((Var *)resolved)->varnullingrels = NULL;
1159 return resolved;
1160 }
1161 }
1162 return expression_tree_mutator(node, resolve_group_rte_vars_mutator,
1163 (void *)ctx);
1164}
1165#endif
1166
1167/**
1168 * @brief Build the inner GROUP-BY subquery for one @c AGG(DISTINCT key).
1169 *
1170 * Produces:
1171 * @code
1172 * SELECT key_expr, gb_col1, gb_col2, ...
1173 * FROM <same tables as q>
1174 * GROUP BY key_expr, gb_col1, gb_col2, ...
1175 * @endcode
1176 *
1177 * @param q Original query (supplies FROM / WHERE).
1178 * @param key_expr The DISTINCT argument expression.
1179 * @param groupby_tes Non-aggregate target entries that are GROUP BY columns.
1180 * @return Fresh inner @c Query.
1181 */
1182static Query *build_inner_for_distinct_key(Query *q, Expr *key_expr,
1183 List *groupby_tes) {
1184 Query *inner;
1185 List *new_tl = NIL;
1186 List *new_gc = NIL;
1187 ListCell *lc;
1188 int resno = 1, sgref = 1;
1189
1190 inner = copyObject(q);
1191
1192 inner->hasAggs = false;
1193 inner->sortClause = NIL;
1194 inner->limitCount = NULL;
1195 inner->limitOffset = NULL;
1196 inner->distinctClause = NIL;
1197 inner->hasDistinctOn = false;
1198 inner->havingQual = NULL;
1199
1200 /* First column: the DISTINCT key */
1201 {
1202 TargetEntry *kte = makeNode(TargetEntry);
1203 SortGroupClause *sgc = makeNode(SortGroupClause);
1204
1205 kte->expr = copyObject(key_expr);
1206 kte->resno = resno++;
1207 kte->resname = "key";
1208 sgc->tleSortGroupRef = kte->ressortgroupref = sgref++;
1209 get_sort_group_operators(exprType((Node *)kte->expr), true, true, false,
1210 &sgc->sortop, &sgc->eqop, NULL, &sgc->hashable);
1211 new_gc = list_make1(sgc);
1212 new_tl = list_make1(kte);
1213 }
1214
1215 /* Remaining columns: GROUP BY columns from the original query */
1216 foreach (lc, groupby_tes) {
1217 TargetEntry *gyte = copyObject((TargetEntry *)lfirst(lc));
1218 SortGroupClause *sgc = makeNode(SortGroupClause);
1219
1220 gyte->resno = resno++;
1221 gyte->resjunk = false;
1222 sgc->tleSortGroupRef = gyte->ressortgroupref = sgref++;
1223 get_sort_group_operators(exprType((Node *)gyte->expr), true, true, false,
1224 &sgc->sortop, &sgc->eqop, NULL, &sgc->hashable);
1225 new_gc = lappend(new_gc, sgc);
1226 new_tl = lappend(new_tl, gyte);
1227 }
1228
1229 inner->targetList = new_tl;
1230 inner->groupClause = new_gc;
1231 return inner;
1232}
1233
1234/**
1235 * @brief Wrap @p inner in an outer query that applies the original aggregate.
1236 *
1237 * Produces:
1238 * @code
1239 * SELECT AGG(key_col), gb_col1, gb_col2, ...
1240 * FROM inner
1241 * GROUP BY gb_col1, gb_col2, ...
1242 * @endcode
1243 * The DISTINCT flag is cleared; @p inner provides exactly one row per
1244 * (key, group-by) combination, so the plain aggregate gives the right count.
1245 *
1246 * @param orig_agg_te Original @c TargetEntry containing @c AGG(DISTINCT key).
1247 * @param inner Inner query from @c build_inner_for_distinct_key.
1248 * @param n_gb Number of GROUP BY columns (trailing entries in @p inner).
1249 * @param constants Extension OID cache.
1250 * @return Fresh outer @c Query.
1251 */
1252static Query *build_outer_for_distinct_key(TargetEntry *orig_agg_te,
1253 Query *inner, int n_gb,
1254 const constants_t *constants) {
1255 Query *outer = makeNode(Query);
1256 RangeTblEntry *rte = makeNode(RangeTblEntry);
1257 Alias *alias = makeNode(Alias), *eref = makeNode(Alias);
1258 RangeTblRef *rtr = makeNode(RangeTblRef);
1259 FromExpr *jt = makeNode(FromExpr);
1260 List *new_tl = NIL, *new_gc = NIL;
1261 ListCell *lc;
1262 int resno = 1, sgref = 1;
1263 int inner_len = list_length(inner->targetList);
1264 int attno;
1265
1266 /* Wrap inner in a subquery RTE */
1267 alias->aliasname = eref->aliasname = "d";
1268 eref->colnames = NIL;
1269 foreach (lc, inner->targetList) {
1270 TargetEntry *te = lfirst(lc);
1271 eref->colnames = lappend(eref->colnames,
1272 makeString(te->resname ? pstrdup(te->resname) : ""));
1273 }
1274 rte->alias = alias;
1275 rte->eref = eref;
1276 rte->rtekind = RTE_SUBQUERY;
1277 rte->subquery = inner;
1278 rte->inFromCl = true;
1279#if PG_VERSION_NUM < 160000
1280 rte->requiredPerms = ACL_SELECT;
1281#endif
1282
1283 rtr->rtindex = 1;
1284 jt->fromlist = list_make1(rtr);
1285
1286 outer->commandType = CMD_SELECT;
1287 outer->canSetTag = true;
1288 outer->rtable = list_make1(rte);
1289 outer->jointree = jt;
1290 outer->hasAggs = true;
1291
1292 /* First output column: the aggregate over the key (col 1 of inner) */
1293 {
1294 TargetEntry *agg_te = copyObject(orig_agg_te);
1295 Aggref *ar = (Aggref *)agg_te->expr;
1296 Var *key_var = makeNode(Var);
1297 TargetEntry *arg_te = makeNode(TargetEntry);
1298
1299 key_var->varno = 1;
1300 key_var->varattno = 1; /* key is first column of inner */
1301 key_var->vartype = linitial_oid(ar->aggargtypes);
1302 key_var->varcollid = exprCollation((Node *)((TargetEntry *)linitial(ar->args))->expr);
1303 key_var->vartypmod = -1;
1304 key_var->location = -1;
1305 arg_te->resno = 1;
1306 arg_te->expr = (Expr *)key_var;
1307
1308 ar->args = list_make1(arg_te);
1309 ar->aggdistinct = NIL;
1310 agg_te->resno = resno++;
1311 new_tl = list_make1(agg_te);
1312 }
1313
1314 /* Remaining output columns: GROUP BY cols (trailing cols of inner) */
1315 for (attno = inner_len - n_gb + 1; attno <= inner_len; attno++) {
1316 TargetEntry *inner_te = list_nth(inner->targetList, attno - 1);
1317 Var *gb_var = makeNode(Var);
1318 TargetEntry *gb_te = makeNode(TargetEntry);
1319 SortGroupClause *sgc = makeNode(SortGroupClause);
1320
1321 gb_var->varno = 1;
1322 gb_var->varattno = attno;
1323 gb_var->vartype = exprType((Node *)inner_te->expr);
1324 gb_var->varcollid = exprCollation((Node *)inner_te->expr);
1325 gb_var->vartypmod = -1;
1326 gb_var->location = -1;
1327
1328 gb_te->resno = resno++;
1329 gb_te->expr = (Expr *)gb_var;
1330 gb_te->resname = inner_te->resname;
1331
1332 sgc->tleSortGroupRef = gb_te->ressortgroupref = sgref++;
1333 sgc->nulls_first = false;
1334 get_sort_group_operators(gb_var->vartype, true, true, false,
1335 &sgc->sortop, &sgc->eqop, NULL, &sgc->hashable);
1336 new_gc = lappend(new_gc, sgc);
1337 new_tl = lappend(new_tl, gb_te);
1338 }
1339
1340 outer->targetList = new_tl;
1341 outer->groupClause = new_gc;
1342 return outer;
1343}
1344
1345/**
1346 * @brief Rewrite every @c AGG(DISTINCT key) in @p q using independent subqueries.
1347 *
1348 * For a single DISTINCT aggregate, produces a subquery:
1349 * @code
1350 * SELECT AGG(key), gb... FROM (SELECT key, gb... FROM t GROUP BY key, gb...) GROUP BY gb...
1351 * @endcode
1352 * For multiple DISTINCT aggregates with different keys, produces an JOIN
1353 * of one such subquery per aggregate, joined on the GROUP BY columns.
1354 * Non-DISTINCT aggregates are left untouched.
1355 *
1356 * @param q Query to inspect and possibly rewrite.
1357 * @param constants Extension OID cache.
1358 * @return Rewritten query, or @c NULL if no @c AGG(DISTINCT) was found.
1359 */
1360static Query *rewrite_agg_distinct(Query *q, const constants_t *constants) {
1361 List *distinct_agg_tes = NIL;
1362 List *groupby_tes = NIL;
1363 ListCell *lc;
1364
1365#if PG_VERSION_NUM >= 180000
1366 /* In PostgreSQL 18, parseCheckAggregates() injects a virtual RTE_GROUP
1367 * entry at the END of the range table. GROUP BY column Vars in the
1368 * SELECT list point to this entry (varno == group_rtindex) instead of
1369 * the underlying base-table RTE.
1370 *
1371 * Strip that entry now, before we do any index arithmetic (fll, rtr->rtindex,
1372 * agg_idx) or copy q->targetList into groupby_tes. Once removed:
1373 * - q->rtable contains only real RTEs, so appending outer-subquery RTEs
1374 * lands at the correct indices.
1375 * - groupby_tes will carry resolved (base-table) Var expressions, so
1376 * the WHERE equalities and the inner-query target list are correct.
1377 * We also resolve the Var(group_rtindex) refs in q's own targetList and
1378 * WHERE clause so the final query doesn't reference the stripped entry. */
1379 if (q->hasGroupRTE) {
1380 resolve_group_rte_ctx grp_ctx;
1381 bool found = false;
1382 ListCell *lc2;
1383 Index idx = 1;
1384 int rte_len = 0;
1385
1386 foreach (lc2, q->rtable) {
1387 RangeTblEntry *r = (RangeTblEntry *)lfirst(lc2);
1388 if (r->rtekind == RTE_GROUP) {
1389 grp_ctx.group_rtindex = idx;
1390 grp_ctx.groupexprs = r->groupexprs;
1391 found = true;
1392 rte_len = idx - 1;
1393 break;
1394 }
1395 idx++;
1396 }
1397
1398 if (found) {
1399 /* Remove the RTE_GROUP (always last, so truncate is safe) */
1400 q->rtable = list_truncate(q->rtable, rte_len);
1401 q->hasGroupRTE = false;
1402
1403 /* Resolve Var(group_rtindex, i) → underlying base-table expression
1404 * throughout the parts of q we will touch below */
1405 foreach (lc2, q->targetList) {
1406 TargetEntry *te = (TargetEntry *)lfirst(lc2);
1407 te->expr = (Expr *)resolve_group_rte_vars_mutator(
1408 (Node *)te->expr, &grp_ctx);
1409 }
1410 if (q->jointree && q->jointree->quals)
1411 q->jointree->quals = resolve_group_rte_vars_mutator(
1412 q->jointree->quals, &grp_ctx);
1413 }
1414 }
1415#endif
1416
1417 /* Extract AGG(DISTINCT) and GROUP BY targets from the target list.
1418 * Regular AGG() aggregations are left untouched. */
1419 foreach (lc, q->targetList) {
1420 TargetEntry *te = lfirst(lc);
1421 if (IsA(te->expr, Aggref)) {
1422 Aggref *ar = (Aggref *)te->expr;
1423 if (list_length(ar->aggdistinct) > 0)
1424 distinct_agg_tes = lappend(distinct_agg_tes, te);
1425 } else {
1426 /* Non-aggregate column — treat as GROUP BY key */
1427 TargetEntry *te_copy = copyObject(te);
1428 te_copy->resjunk = false;
1429 groupby_tes = lappend(groupby_tes, te_copy);
1430 }
1431 }
1432
1433 if (distinct_agg_tes == NIL)
1434 return NULL;
1435
1436 {
1437 int n_aggs = list_length(distinct_agg_tes);
1438 int n_gb = list_length(groupby_tes);
1439 List *outer_queries = NIL;
1440
1441 /* -----------------------------------------------------------------------
1442 * For each DISTINCT aggregate, build:
1443 * inner_i: SELECT key_i, gb... FROM original... GROUP BY key_i, gb...
1444 * outer_i: SELECT AGG(key_i) ASS agg_i, gb... FROM inner_i GROUP BY gb...
1445 *
1446 * Then produce a final query:
1447 * SELECT gb..., agg_0, ..., agg_{N-1}
1448 * FROM original... JOIN outer_0 ON gb... = gb... [JOIN ...]
1449 * keeping the same order for the output columns.
1450 *
1451 * Column order in the final target list follows q->targetList:
1452 * - DISTINCT agg i → Var(n+i, 1) (agg col of outer_i)
1453 * ----------------------------------------------------------------------- */
1454
1455 /* Build one inner + one outer query per DISTINCT aggregate */
1456 foreach (lc, distinct_agg_tes) {
1457 TargetEntry *agg_te = lfirst(lc);
1458 Aggref *ar = (Aggref *)agg_te->expr;
1459 if(list_length(ar->args) != 1)
1460 provsql_error("AGG(DISTINCT) with more than one argument is not supported");
1461 else {
1462 Expr *key_expr = (Expr *)((TargetEntry *)linitial(ar->args))->expr;
1463 Query *inner = build_inner_for_distinct_key(q, key_expr, groupby_tes);
1464 Query *outer = build_outer_for_distinct_key(agg_te, inner, n_gb, constants);
1465 outer_queries = lappend(outer_queries, outer);
1466 }
1467 }
1468
1469 {
1470 /* One subquery RTE per outer query */
1471 int i = 0;
1472 foreach (lc, outer_queries) {
1473 Query *oq = lfirst(lc);
1474 RangeTblEntry *rte = makeNode(RangeTblEntry);
1475 Alias *alias = makeNode(Alias), *eref = makeNode(Alias);
1476 ListCell *lc2;
1477 char buf[16];
1478
1479 snprintf(buf, sizeof(buf), "d%d", i + 1);
1480 alias->aliasname = eref->aliasname = pstrdup(buf);
1481 eref->colnames = NIL;
1482 foreach (lc2, oq->targetList) {
1483 TargetEntry *te = lfirst(lc2);
1484 eref->colnames = lappend(eref->colnames,
1485 makeString(te->resname ? pstrdup(te->resname) : ""));
1486 }
1487 rte->alias = alias;
1488 rte->eref = eref;
1489 rte->rtekind = RTE_SUBQUERY;
1490 rte->subquery = oq;
1491 rte->inFromCl = true;
1492#if PG_VERSION_NUM < 160000
1493 rte->requiredPerms = ACL_SELECT;
1494#endif
1495 q->rtable = lappend(q->rtable, rte);
1496 i++;
1497 }
1498
1499 /* Build FROM list and WHERE conditions for the implicit join.
1500 * Use a simple FROM original..., outer_i, ... WHERE original.gb_j = outer_i.gb_j */
1501 {
1502 FromExpr *jt = q->jointree;
1503 List *from_list = jt->fromlist;
1504 unsigned fll = list_length(from_list);
1505 List *where_args = NIL;
1506
1507 for (i = fll+1; i <= fll+n_aggs; i++) {
1508 RangeTblRef *rtr = makeNode(RangeTblRef);
1509 ListCell *lc2;
1510 unsigned j=0;
1511
1512 rtr->rtindex = i;
1513 from_list = lappend(from_list, rtr);
1514
1515 /* outer_0.gb_j = outer_i.gb_j for each GROUP BY column j */
1516 foreach(lc2, groupby_tes) {
1517 TargetEntry *gb_te = lfirst(lc2);
1518 int gb_attno = ++j + 1; /* col 1 = agg, cols 2+ = GB */
1519 Oid ytype = exprType((Node *)gb_te->expr);
1520 Oid opno = find_equality_operator(ytype, ytype);
1521 Operator opInfo = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno));
1522 Form_pg_operator opform;
1523 OpExpr *oe = makeNode(OpExpr);
1524 Expr *le = copyObject(gb_te->expr);
1525 Var *rv = makeNode(Var);
1526 Oid collation=exprCollation((Node*) le);
1527
1528 if (!HeapTupleIsValid(opInfo))
1529 provsql_error("could not find equality operator for type %u",
1530 ytype);
1531 opform = (Form_pg_operator)GETSTRUCT(opInfo);
1532
1533 oe->opno = opno;
1534 oe->opfuncid = opform->oprcode;
1535 oe->opresulttype = opform->oprresult;
1536 oe->opcollid = InvalidOid;
1537 oe->inputcollid = collation;
1538 oe->location = -1;
1539 ReleaseSysCache(opInfo);
1540
1541 rv->varno = i; rv->varattno = gb_attno;
1542 rv->vartype = ytype; rv->varcollid = collation;
1543 rv->vartypmod = -1; rv->location = -1;
1544
1545 oe->args = list_make2(le, rv);
1546 where_args = lappend(where_args, oe);
1547 }
1548 }
1549
1550 if (list_length(where_args) == 0) {
1551 jt->quals = NULL;
1552 } else if (list_length(where_args) == 1) {
1553 jt->quals = linitial(where_args);
1554 } else {
1555 BoolExpr *be = makeNode(BoolExpr);
1556 be->boolop = AND_EXPR;
1557 be->args = where_args;
1558 be->location = -1;
1559 jt->quals = (Node *)be;
1560 }
1561 }
1562
1563 /* Build final target list in original column order.
1564 * DISTINCT agg i → Var(i+1, 1); GROUP BY col j → Var(1, 2+j). */
1565 {
1566 int agg_idx = list_length(q->jointree->fromlist) - n_aggs + 1;
1567 ListCell *lc2;
1568
1569 foreach (lc2, q->targetList) {
1570 TargetEntry *te = lfirst(lc2);
1571
1572 if (IsA(te->expr, Aggref) &&
1573 ((Aggref *)te->expr)->aggdistinct != NIL) {
1574 Var *v = makeNode(Var);
1575 v->varno = agg_idx++; /* outer_{agg_idx} RTE */
1576 v->varattno = 1; /* agg result is col 1 of each outer */
1577 v->vartypmod = -1;
1578 v->location = -1;
1579 te->expr = (Expr*)v;
1580 }
1581 }
1582 }
1583
1584 return q;
1585 }
1586 }
1587}
1588
1589/* -------------------------------------------------------------------------
1590 * Aggregation replacement mutator
1591 * ------------------------------------------------------------------------- */
1592
1593/** @brief Context for the @c aggregation_mutator tree walker. */
1595 List *prov_atts; ///< List of provenance Var nodes
1596 semiring_operation op; ///< Semiring operation for combining tokens
1597 const constants_t *constants; ///< Extension OID cache
1599
1600/**
1601 * @brief Tree-mutator that replaces Aggrefs with provenance-aware aggregates.
1602 * @param node Current expression tree node.
1603 * @param context Mutation context with prov_atts, op, and constants.
1604 * @return Possibly modified node.
1605 */
1606static Node *aggregation_mutator(Node *node,
1607 aggregation_mutator_context *context) {
1608 if (node == NULL)
1609 return NULL;
1610
1611 if (IsA(node, Aggref)) {
1612 Aggref *ar_v = (Aggref *)node;
1613 return (Node *)make_aggregation_expression(context->constants, ar_v,
1614 context->prov_atts, context->op);
1615 }
1616
1617 return expression_tree_mutator(node, aggregation_mutator, (void *)context);
1618}
1619
1620/**
1621 * @brief Replace every @c Aggref in @p q with a provenance-aware aggregate.
1622 *
1623 * Walks the query tree and substitutes each @c Aggref node with the result
1624 * of @c make_aggregation_expression, which wraps the original aggregate in
1625 * the semimodule machinery (@c provenance_semimod + @c array_agg +
1626 * @c provenance_aggregate).
1627 *
1628 * @param constants Extension OID cache.
1629 * @param q Query to mutate in place.
1630 * @param prov_atts List of provenance @c Var nodes.
1631 * @param op Semiring operation for combining tokens across rows.
1632 */
1633static void
1635 Query *q, List *prov_atts,
1636 semiring_operation op) {
1637
1638 aggregation_mutator_context context = {prov_atts, op, constants};
1639
1640 query_tree_mutator(q, aggregation_mutator, &context,
1641 QTW_DONT_COPY_QUERY | QTW_IGNORE_RT_SUBQUERIES);
1642}
1643
1644/**
1645 * @brief Append the provenance expression to @p q's target list.
1646 *
1647 * Inserts a new @c TargetEntry named @c provsql immediately before any
1648 * @c resjunk entries (which must remain last) and adjusts the @c resno
1649 * of subsequent entries accordingly.
1650 *
1651 * @param q Query to modify in place.
1652 * @param provenance Expression to add (becomes the @c provsql output column).
1653 */
1654static void add_to_select(Query *q, Expr *provenance) {
1655 TargetEntry *newte = makeNode(TargetEntry);
1656 bool inserted = false;
1657 unsigned resno = 0;
1658
1659 newte->expr = provenance;
1660 newte->resname = (char *)PROVSQL_COLUMN_NAME;
1661
1662 if (IsA(provenance, Var)) {
1663 RangeTblEntry *rte = list_nth(q->rtable, ((Var *)provenance)->varno - 1);
1664 newte->resorigtbl = rte->relid;
1665 newte->resorigcol = ((Var *)provenance)->varattno;
1666 }
1667
1668 /* Make sure to insert before all resjunk Target Entry */
1669 for (ListCell *cell = list_head(q->targetList); cell != NULL;) {
1670 TargetEntry *te = (TargetEntry *)lfirst(cell);
1671
1672 if (!inserted)
1673 ++resno;
1674
1675 if (te->resjunk) {
1676 if (!inserted) {
1677 newte->resno = resno;
1678 q->targetList = list_insert_nth(q->targetList, resno - 1, newte);
1679 cell = list_nth_cell(q->targetList, resno);
1680 te = (TargetEntry *)lfirst(cell);
1681 inserted = true;
1682 }
1683
1684 ++te->resno;
1685 }
1686
1687 cell = my_lnext(q->targetList, cell);
1688 }
1689
1690 if (!inserted) {
1691 newte->resno = resno + 1;
1692 q->targetList = lappend(q->targetList, newte);
1693 }
1694}
1695
1696/* -------------------------------------------------------------------------
1697 * Provenance function replacement
1698 * ------------------------------------------------------------------------- */
1699
1700/** @brief Context for the @c provenance_mutator tree walker. */
1702 Expr *provsql; ///< Provenance expression to substitute for provenance() calls
1703 const constants_t *constants; ///< Extension OID cache
1705
1706/**
1707 * @brief Tree-mutator that replaces provenance() calls with the actual provenance expression.
1708 * @param node Current expression tree node.
1709 * @param context Mutation context with the provenance expression and constants.
1710 * @return Possibly modified node.
1711 */
1712static Node *provenance_mutator(Node *node,
1713 provenance_mutator_context *context) {
1714 if (node == NULL)
1715 return NULL;
1716
1717 if (IsA(node, FuncExpr)) {
1718 FuncExpr *f = (FuncExpr *)node;
1719
1720 if (f->funcid == context->constants->OID_FUNCTION_PROVENANCE) {
1721 return (Node *)copyObject(context->provsql);
1722 }
1723 } else if (IsA(node, RangeTblEntry) || IsA(node, RangeTblFunction)) {
1724 // A provenance() expression in a From (not within a subquery) is
1725 // non-sensical
1726 return node;
1727 }
1728
1729 return expression_tree_mutator(node, provenance_mutator, (void *)context);
1730}
1731
1732/**
1733 * @brief Replace every explicit @c provenance() call in @p q with @p provsql.
1734 *
1735 * Users can write @c provenance() in the target list or WHERE to refer to the
1736 * provenance token of the current tuple. This mutator substitutes those calls
1737 * with the actual computed provenance expression.
1738 *
1739 * @param constants Extension OID cache.
1740 * @param q Query to mutate in place.
1741 * @param provsql Provenance expression to substitute.
1742 */
1743static void
1745 Query *q, Expr *provsql) {
1746 provenance_mutator_context context = {provsql, constants};
1747
1748 query_tree_mutator(q, provenance_mutator, &context,
1749 QTW_DONT_COPY_QUERY | QTW_IGNORE_RT_SUBQUERIES);
1750}
1751
1752/**
1753 * @brief Convert a SELECT DISTINCT into an equivalent GROUP BY.
1754 *
1755 * ProvSQL cannot handle DISTINCT directly (it would collapse provenance
1756 * tokens that should remain separate). This function moves every entry
1757 * from @p q->distinctClause into @p q->groupClause (skipping any that are
1758 * already there) and clears @p q->distinctClause.
1759 *
1760 * @param q Query to modify in place.
1761 */
1763 // First check which are already in the group by clause
1764 // Should be either none or all as "SELECT DISTINCT a, b ... GROUP BY a"
1765 // is invalid
1766 Bitmapset *already_in_group_by = NULL;
1767 ListCell *lc;
1768 foreach (lc, q->groupClause) {
1769 SortGroupClause *sgc = (SortGroupClause *)lfirst(lc);
1770 already_in_group_by =
1771 bms_add_member(already_in_group_by, sgc->tleSortGroupRef);
1772 }
1773
1774 foreach (lc, q->distinctClause) {
1775 SortGroupClause *sgc = (SortGroupClause *)lfirst(lc);
1776 if (!bms_is_member(sgc->tleSortGroupRef, already_in_group_by)) {
1777 q->groupClause = lappend(q->groupClause, sgc);
1778 }
1779 }
1780
1781 q->distinctClause = NULL;
1782}
1783
1784/**
1785 * @brief Remove sort/group references that belonged to removed provenance columns.
1786 *
1787 * After @c remove_provenance_attributes_select strips provenance entries from
1788 * the target list, any GROUP BY, ORDER BY, or DISTINCT clause that referenced
1789 * them by @c tleSortGroupRef must be cleaned up.
1790 *
1791 * @param q Query to modify in place.
1792 * @param removed_sortgrouprefs Bitmapset of @c ressortgroupref values to remove.
1793 */
1794static void
1796 const Bitmapset *removed_sortgrouprefs) {
1797 List **lists[3] = {&q->groupClause, &q->distinctClause, &q->sortClause};
1798 int i = 0;
1799
1800 for (i = 0; i < 3; ++i) {
1801 ListCell *cell, *prev;
1802
1803 for (cell = list_head(*lists[i]), prev = NULL; cell != NULL;) {
1804 SortGroupClause *sgc = (SortGroupClause *)lfirst(cell);
1805 if (bms_is_member(sgc->tleSortGroupRef, removed_sortgrouprefs)) {
1806 *lists[i] = my_list_delete_cell(*lists[i], cell, prev);
1807
1808 if (prev) {
1809 cell = my_lnext(*lists[i], prev);
1810 } else {
1811 cell = list_head(*lists[i]);
1812 }
1813 } else {
1814 prev = cell;
1815 cell = my_lnext(*lists[i], cell);
1816 }
1817 }
1818 }
1819}
1820
1821/**
1822 * @brief Strip the provenance column's type info from a set-operation node.
1823 *
1824 * When a provenance column is removed from a UNION/EXCEPT query's target list,
1825 * the matching entries in the @c SetOperationStmt's @c colTypes, @c colTypmods,
1826 * and @c colCollations lists must also be removed.
1827 *
1828 * @param q Query containing @c setOperations.
1829 * @param removed Boolean array (from @c remove_provenance_attributes_select)
1830 * indicating which columns were removed.
1831 */
1832static void remove_provenance_attribute_setoperations(Query *q, bool *removed) {
1833 SetOperationStmt *so = (SetOperationStmt *)q->setOperations;
1834 List **lists[3] = {&so->colTypes, &so->colTypmods, &so->colCollations};
1835 int i = 0;
1836
1837 for (i = 0; i < 3; ++i) {
1838 ListCell *cell, *prev;
1839 int j;
1840
1841 for (cell = list_head(*lists[i]), prev = NULL, j = 0; cell != NULL; ++j) {
1842 if (removed[j]) {
1843 *lists[i] = my_list_delete_cell(*lists[i], cell, prev);
1844
1845 if (prev) {
1846 cell = my_lnext(*lists[i], prev);
1847 } else {
1848 cell = list_head(*lists[i]);
1849 }
1850 } else {
1851 prev = cell;
1852 cell = my_lnext(*lists[i], cell);
1853 }
1854 }
1855 }
1856}
1857
1858/**
1859 * @brief Wrap a non-ALL set operation in an outer GROUP BY query.
1860 *
1861 * UNION / EXCEPT (without ALL) would deduplicate tuples before ProvSQL can
1862 * attach provenance tokens. To avoid this, the set operation is converted to
1863 * UNION ALL / EXCEPT ALL and a new outer query is built that groups the results
1864 * by all non-provenance columns, collecting tokens into an array for the
1865 * @c provenance_plus evaluation.
1866 *
1867 * After this rewrite the recursive call to @c process_query handles the
1868 * now-ALL inner set operation normally.
1869 *
1870 * @param q Query whose @c setOperations is non-ALL (modified to ALL in place).
1871 * @return New outer query that wraps @p q as a subquery RTE.
1872 */
1874 Query *new_query = makeNode(Query);
1875 RangeTblEntry *rte = makeNode(RangeTblEntry);
1876 FromExpr *jointree = makeNode(FromExpr);
1877 RangeTblRef *rtr = makeNode(RangeTblRef);
1878
1879 SetOperationStmt *stmt = (SetOperationStmt *)q->setOperations;
1880
1881 ListCell *lc;
1882 int sortgroupref = 0;
1883
1884 stmt->all = true;
1885 // we might leave sub nodes of the SetOperationsStmt tree with all = false
1886 // but only for recursive trees of operators and only union can be recursive
1887 // https://doxygen.postgresql.org/prepunion_8c_source.html#l00479
1888 // we will set therefore set them later in process_set_operation_union
1889
1890 rte->rtekind = RTE_SUBQUERY;
1891 rte->subquery = q;
1892 rte->eref = copyObject(((RangeTblEntry *)linitial(q->rtable))->eref);
1893 rte->inFromCl = true;
1894#if PG_VERSION_NUM < 160000
1895 // For PG_VERSION_NUM >= 160000, rte->perminfoindex==0 so no need to
1896 // care about permissions
1897 rte->requiredPerms = ACL_SELECT;
1898#endif
1899
1900 rtr->rtindex = 1;
1901 jointree->fromlist = list_make1(rtr);
1902
1903 new_query->commandType = CMD_SELECT;
1904 new_query->canSetTag = true;
1905 new_query->rtable = list_make1(rte);
1906 new_query->jointree = jointree;
1907 new_query->targetList = copyObject(q->targetList);
1908
1909 if (new_query->targetList) {
1910 foreach (lc, new_query->targetList) {
1911 TargetEntry *te = (TargetEntry *)lfirst(lc);
1912 SortGroupClause *sgc = makeNode(SortGroupClause);
1913
1914 sgc->tleSortGroupRef = te->ressortgroupref = ++sortgroupref;
1915
1916 get_sort_group_operators(exprType((Node *)te->expr), false, true, false,
1917 &sgc->sortop, &sgc->eqop, NULL, &sgc->hashable);
1918
1919 new_query->groupClause = lappend(new_query->groupClause, sgc);
1920 }
1921 } else {
1922 GroupingSet *gs = makeNode(GroupingSet);
1923 gs->kind = GROUPING_SET_EMPTY;
1924 gs->content = 0;
1925 gs->location = -1;
1926 new_query->groupingSets = list_make1(gs);
1927 }
1928
1929 return new_query;
1930}
1931
1932/* -------------------------------------------------------------------------
1933 * Detection walkers
1934 * ------------------------------------------------------------------------- */
1935
1936/**
1937 * @brief Tree walker that returns true if any @c provenance() call is found.
1938 *
1939 * Used to detect whether a query explicitly calls @c provenance(), which
1940 * triggers the substitution in @c replace_provenance_function_by_expression.
1941 * @param node Current expression tree node.
1942 * @param data Pointer to @c constants_t (cast from @c void*).
1943 * @return @c true if a @c provenance() call is found anywhere in @p node.
1944 */
1945static bool provenance_function_walker(Node *node, void *data) {
1946 const constants_t *constants = (const constants_t *)data;
1947 if (node == NULL)
1948 return false;
1949
1950 if (IsA(node, FuncExpr)) {
1951 FuncExpr *f = (FuncExpr *)node;
1952
1953 if (f->funcid == constants->OID_FUNCTION_PROVENANCE)
1954 return true;
1955 }
1956
1957 return expression_tree_walker(node, provenance_function_walker, data);
1958}
1959
1960/**
1961 * @brief Check whether a @c provenance() call appears in the GROUP BY list.
1962 *
1963 * When the user writes @c GROUP BY provenance(), ProvSQL must not add its own
1964 * group-by wrapper (the query is already grouping on the token).
1965 *
1966 * @param constants Extension OID cache.
1967 * @param q Query to inspect.
1968 * @return True if any GROUP BY key contains a @c provenance() call.
1969 */
1971 Query *q) {
1972 ListCell *lc;
1973 foreach (lc, q->targetList) {
1974 TargetEntry *te = (TargetEntry *)lfirst(lc);
1975 if (te->ressortgroupref > 0) {
1976 if(expression_tree_walker((Node *)te, provenance_function_walker,
1977 (void *)constants)) {
1978 return true;
1979 }
1980
1981#if PG_VERSION_NUM >= 180000
1982 // Starting from PostgreSQL 18, the content of the GROUP BY is not
1983 // in the groupClause but in an associated RTE_GROUP RangeTblEntry
1984 if(IsA(te->expr, Var)) {
1985 Var *v = (Var *) te->expr;
1986 RangeTblEntry *r = (RangeTblEntry *)list_nth(q->rtable, v->varno - 1);
1987 if(r->rtekind == RTE_GROUP)
1988 if(expression_tree_walker((Node *) r->groupexprs, provenance_function_walker,
1989 (void *)constants)) {
1990 return true;
1991 }
1992 }
1993#endif
1994 }
1995 }
1996
1997 return false;
1998}
1999
2000/**
2001 * @brief Tree walker that detects any provenance-bearing relation or provenance() call.
2002 * @param node Current expression tree node.
2003 * @param data Pointer to @c constants_t (cast from @c void*).
2004 * @return @c true if provenance rewriting is needed for this node.
2005 */
2006static bool has_provenance_walker(Node *node, void *data) {
2007 const constants_t *constants = (const constants_t *)data;
2008 if (node == NULL)
2009 return false;
2010
2011 if (IsA(node, Query)) {
2012 Query *q = (Query *)node;
2013 ListCell *rc;
2014
2015 if (query_tree_walker(q, has_provenance_walker, data, 0))
2016 return true;
2017
2018 foreach (rc, q->rtable) {
2019 RangeTblEntry *r = (RangeTblEntry *)lfirst(rc);
2020 if (r->rtekind == RTE_RELATION) {
2021 ListCell *lc;
2022 AttrNumber attid = 1;
2023
2024 foreach (lc, r->eref->colnames) {
2025 const char *v = strVal(lfirst(lc));
2026
2027 if (!strcmp(v, PROVSQL_COLUMN_NAME) &&
2028 get_atttype(r->relid, attid) == constants->OID_TYPE_UUID) {
2029 return true;
2030 }
2031
2032 ++attid;
2033 }
2034 } else if (r->rtekind == RTE_FUNCTION) {
2035 ListCell *lc;
2036 AttrNumber attid = 1;
2037
2038 foreach (lc, r->functions) {
2039 RangeTblFunction *func = (RangeTblFunction *)lfirst(lc);
2040
2041 if (func->funccolcount == 1) {
2042 FuncExpr *expr = (FuncExpr *)func->funcexpr;
2043 if (expr->funcresulttype == constants->OID_TYPE_UUID &&
2044 !strcmp(get_rte_attribute_name(r, attid),
2046 return true;
2047 }
2048 }
2049
2050 attid += func->funccolcount;
2051 }
2052 }
2053 }
2054 }
2055
2056 return expression_tree_walker(node, provenance_function_walker, data);
2057}
2058
2059/**
2060 * @brief Return true if @p q involves any provenance-bearing relation or
2061 * contains an explicit @c provenance() call.
2062 *
2063 * This is the gate condition checked by @c provsql_planner before doing any
2064 * rewriting: if neither condition holds the query is passed through unchanged.
2065 *
2066 * @param constants Extension OID cache.
2067 * @param q Query to inspect.
2068 * @return True if provenance rewriting is needed.
2069 */
2070static bool has_provenance(const constants_t *constants, Query *q) {
2071 return has_provenance_walker((Node *)q, (void *)constants);
2072}
2073
2074/**
2075 * @brief Tree walker that detects any Var of type agg_token.
2076 * @param node Current expression tree node.
2077 * @param constants Extension OID cache.
2078 * @return @c true if an agg_token Var is found in @p node.
2079 */
2080static bool aggtoken_walker(Node *node, const constants_t *constants) {
2081 if (node == NULL)
2082 return false;
2083
2084 if (IsA(node, Var)) {
2085 Var *v = (Var *) node;
2086 if(v->vartype == constants->OID_TYPE_AGG_TOKEN)
2087 return true;
2088 }
2089
2090 return expression_tree_walker(node, aggtoken_walker, (void*) constants);
2091}
2092
2093/**
2094 * @brief Return true if @p node contains a @c Var of type @c agg_token.
2095 *
2096 * Used to detect whether a WHERE clause references an aggregate result
2097 * (which must be moved to HAVING).
2098 *
2099 * @param node Expression tree to inspect.
2100 * @param constants Extension OID cache.
2101 * @return True if an @c agg_token @c Var is found anywhere in @p node.
2102 */
2103static bool has_aggtoken(Node *node, const constants_t *constants) {
2104 return expression_tree_walker(node, aggtoken_walker, (void*) constants);
2105}
2106
2107/**
2108 * @brief Rewrite an EXCEPT query into a LEFT JOIN with monus provenance.
2109 *
2110 * EXCEPT cannot be handled directly because it deduplicates. This function
2111 * transforms:
2112 * @code
2113 * SELECT … FROM A EXCEPT SELECT … FROM B
2114 * @endcode
2115 * into a LEFT JOIN of A and B on equality of all non-provenance columns,
2116 * clears @c setOperations, and leaves the monus token combination to
2117 * @c make_provenance_expression (which will see @c SR_MONUS).
2118 *
2119 * Only simple (non-chained) EXCEPT is supported; chained EXCEPT raises an
2120 * error.
2121 *
2122 * @param constants Extension OID cache.
2123 * @param q Query to rewrite in place.
2124 * @return Always true (errors out on unsupported cases).
2125 */
2126static bool transform_except_into_join(const constants_t *constants, Query *q) {
2127 SetOperationStmt *setOps = (SetOperationStmt *)q->setOperations;
2128 RangeTblEntry *rte = makeNode(RangeTblEntry);
2129 FromExpr *fe = makeNode(FromExpr);
2130 JoinExpr *je = makeNode(JoinExpr);
2131 BoolExpr *expr = makeNode(BoolExpr);
2132 ListCell *lc;
2133 int attno = 1;
2134
2135 if (!IsA(setOps->larg, RangeTblRef) || !IsA(setOps->rarg, RangeTblRef)) {
2136 provsql_error("Unsupported chain of EXCEPT operations");
2137 }
2138
2139 expr->boolop = AND_EXPR;
2140 expr->location = -1;
2141 expr->args = NIL;
2142
2143 foreach (lc, q->targetList) {
2144 TargetEntry *te = (TargetEntry *)lfirst(lc);
2145 Var *v;
2146
2147 if (!IsA(te->expr, Var))
2148 provsql_error("EXCEPT query format not supported");
2149
2150 v = (Var *)te->expr;
2151
2152 if (v->vartype != constants->OID_TYPE_UUID) {
2153 OpExpr *oe = makeNode(OpExpr);
2154 Oid opno = find_equality_operator(v->vartype, v->vartype);
2155 Operator opInfo = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno));
2156 Form_pg_operator opform;
2157 Var *leftArg, *rightArg;
2158
2159 if (!HeapTupleIsValid(opInfo))
2160 provsql_error("could not find operator with OID %u to compare variables of type %u",
2161 opno, v->vartype);
2162
2163 opform = (Form_pg_operator)GETSTRUCT(opInfo);
2164 leftArg = makeNode(Var);
2165 rightArg = makeNode(Var);
2166
2167 oe->opno = opno;
2168 oe->opfuncid = opform->oprcode;
2169 oe->opresulttype = opform->oprresult;
2170 oe->opcollid = InvalidOid;
2171 oe->inputcollid = DEFAULT_COLLATION_OID;
2172
2173 leftArg->varno = ((RangeTblRef *)setOps->larg)->rtindex;
2174 rightArg->varno = ((RangeTblRef *)setOps->rarg)->rtindex;
2175 leftArg->varattno = rightArg->varattno = attno;
2176
2177#if PG_VERSION_NUM >= 130000
2178 leftArg->varnosyn = rightArg->varnosyn = 0;
2179 leftArg->varattnosyn = rightArg->varattnosyn = 0;
2180#else
2181 leftArg->varnoold = leftArg->varno;
2182 rightArg->varnoold = rightArg->varno;
2183 leftArg->varoattno = rightArg->varoattno = attno;
2184#endif
2185
2186 leftArg->vartype = rightArg->vartype = v->vartype;
2187 leftArg->varcollid = rightArg->varcollid = InvalidOid;
2188 leftArg->vartypmod = rightArg->vartypmod = -1;
2189 leftArg->location = rightArg->location = -1;
2190
2191 oe->args = list_make2(leftArg, rightArg);
2192 oe->location = -1;
2193 expr->args = lappend(expr->args, oe);
2194
2195 ReleaseSysCache(opInfo);
2196 }
2197
2198 ++attno;
2199 }
2200
2201 rte->alias = NULL;
2202 rte->eref = NULL;
2203 rte->joinaliasvars = NULL;
2204
2205 rte->rtekind = RTE_JOIN;
2206 rte->jointype = JOIN_LEFT;
2207
2208 q->rtable = lappend(q->rtable, rte);
2209
2210 je->jointype = JOIN_LEFT;
2211
2212 je->larg = setOps->larg;
2213 je->rarg = setOps->rarg;
2214 je->quals = (Node *)expr;
2215 je->rtindex = list_length(q->rtable);
2216
2217 fe->fromlist = list_make1(je);
2218
2219 q->jointree = fe;
2220
2221 // TODO: Add group by in the right-side table
2222
2223 q->setOperations = 0;
2224
2225 return true;
2226}
2227
2228/**
2229 * @brief Recursively annotate a UNION tree with the provenance UUID type.
2230 *
2231 * Walks the @c SetOperationStmt tree of a UNION and appends the UUID type
2232 * to @c colTypes / @c colTypmods / @c colCollations on every node, and sets
2233 * @c all = true so that PostgreSQL does not deduplicate the combined stream.
2234 * The non-ALL deduplication has already been moved to an outer GROUP BY by
2235 * @c rewrite_non_all_into_external_group_by before this is called.
2236 *
2237 * @param constants Extension OID cache.
2238 * @param stmt Root (or subtree) of the UNION @c SetOperationStmt.
2239 */
2240static void process_set_operation_union(const constants_t *constants,
2241 SetOperationStmt *stmt) {
2242 if (stmt->op != SETOP_UNION) {
2243 provsql_error("Unsupported mixed set operations");
2244 }
2245 if (IsA(stmt->larg, SetOperationStmt)) {
2246 process_set_operation_union(constants, (SetOperationStmt *)(stmt->larg));
2247 }
2248 if (IsA(stmt->rarg, SetOperationStmt)) {
2249 process_set_operation_union(constants, (SetOperationStmt *)(stmt->rarg));
2250 }
2251 stmt->colTypes = lappend_oid(stmt->colTypes, constants->OID_TYPE_UUID);
2252 stmt->colTypmods = lappend_int(stmt->colTypmods, -1);
2253 stmt->colCollations = lappend_int(stmt->colCollations, 0);
2254 stmt->all = true;
2255}
2256
2257/**
2258 * @brief Add a WHERE condition filtering out zero-provenance tuples.
2259 *
2260 * For EXCEPT queries, tuples whose provenance evaluates to zero (i.e., the
2261 * right-hand side fully subsumes the left-hand side) must be excluded from
2262 * the result. This function appends @c provsql <> gate_zero() to
2263 * @p q->jointree->quals, ANDing with any existing WHERE condition.
2264 *
2265 * @param constants Extension OID cache.
2266 * @param q Query to modify in place.
2267 * @param provsql Provenance expression that was added to the SELECT list.
2268 */
2269static void add_select_non_zero(const constants_t *constants, Query *q,
2270 Expr *provsql) {
2271 FuncExpr *gate_zero = makeNode(FuncExpr);
2272 OpExpr *oe = makeNode(OpExpr);
2273
2274 gate_zero->funcid = constants->OID_FUNCTION_GATE_ZERO;
2275 gate_zero->funcresulttype = constants->OID_TYPE_UUID;
2276
2277 oe->opno = constants->OID_OPERATOR_NOT_EQUAL_UUID;
2278 oe->opfuncid = constants->OID_FUNCTION_NOT_EQUAL_UUID;
2279 oe->opresulttype = BOOLOID;
2280 oe->args = list_make2(provsql, gate_zero);
2281 oe->location = -1;
2282
2283 if (q->jointree->quals != NULL) {
2284 BoolExpr *be = makeNode(BoolExpr);
2285
2286 be->boolop = AND_EXPR;
2287 be->args = list_make2(oe, q->jointree->quals);
2288 be->location = -1;
2289
2290 q->jointree->quals = (Node *)be;
2291 } else
2292 q->jointree->quals = (Node *)oe;
2293}
2294
2295/**
2296 * @brief Append @p expr to @p havingQual with an AND, creating one if needed.
2297 *
2298 * If @p havingQual is NULL, returns @p expr directly. If it is already an
2299 * AND @c BoolExpr, appends to its argument list. Otherwise wraps both in a
2300 * new AND node.
2301 *
2302 * @param havingQual Existing HAVING qualifier, or NULL.
2303 * @param expr Expression to conjoin.
2304 * @return The updated HAVING qualifier.
2305 */
2306static Node *add_to_havingQual(Node *havingQual, Expr *expr)
2307{
2308 if(!havingQual) {
2309 havingQual = (Node*) expr;
2310 } else if(IsA(havingQual, BoolExpr) && ((BoolExpr*)havingQual)->boolop==AND_EXPR) {
2311 BoolExpr *be = (BoolExpr*)havingQual;
2312 be->args = lappend(be->args, expr);
2313 } else if(IsA(havingQual, OpExpr) || IsA(havingQual, BoolExpr)) {
2314 /* BoolExpr that is not an AND (OR/NOT): wrap with a new AND node. */
2315 BoolExpr *be = makeNode(BoolExpr);
2316 be->boolop=AND_EXPR;
2317 be->location=-1;
2318 be->args = list_make2(havingQual, expr);
2319 havingQual = (Node*) be;
2320 } else
2321 provsql_error("Unknown structure within Boolean expression");
2322
2323 return havingQual;
2324}
2325
2326/**
2327 * @brief Check whether @p op is a supported comparison on an aggregate result.
2328 *
2329 * Returns true iff @p op is a two-argument operator where at least one
2330 * argument is a @c Var of type @c agg_token (or an implicit-cast wrapper
2331 * thereof) and the other is a @c Const (possibly cast). This is the set
2332 * of WHERE-on-aggregate patterns that ProvSQL can safely move to a HAVING
2333 * clause.
2334 *
2335 * @param op The @c OpExpr to inspect.
2336 * @param constants Extension OID cache.
2337 * @return True if the pattern is supported, false otherwise.
2338 */
2339static bool check_selection_on_aggregate(OpExpr *op, const constants_t *constants)
2340{
2341 bool ok=true;
2342 bool found_agg_token=false;
2343
2344 if(op->args->length != 2)
2345 return false;
2346
2347 for(unsigned i=0; i<2; ++i) {
2348 Node *arg = lfirst(list_nth_cell(op->args, i));
2349
2350 // Check both arguments are either an aggtoken or a constant
2351 // (possibly after a cast)
2352 if((IsA(arg, Var) && ((Var*)arg)->vartype==constants->OID_TYPE_AGG_TOKEN)) {
2353 found_agg_token=true;
2354 } else if(IsA(arg, Const)) {
2355 } else if(IsA(arg, FuncExpr)) {
2356 FuncExpr *fe = (FuncExpr*) arg;
2357 if(fe->funcformat != COERCE_IMPLICIT_CAST && fe->funcformat != COERCE_EXPLICIT_CAST) {
2358 ok=false;
2359 break;
2360 }
2361 if(fe->args->length != 1) {
2362 ok=false;
2363 break;
2364 }
2365 if(!IsA(lfirst(list_head(fe->args)), Const)) {
2366 ok=false;
2367 break;
2368 }
2369 } else {
2370 ok=false;
2371 break;
2372 }
2373 }
2374
2375 return ok && found_agg_token;
2376}
2377
2378/**
2379 * @brief Check whether every leaf of a Boolean expression is a supported
2380 * comparison on an aggregate result.
2381 *
2382 * Recursively validates @c OpExpr leaves via @c check_selection_on_aggregate
2383 * and descends into nested @c BoolExpr nodes.
2384 *
2385 * @param be The Boolean expression to validate.
2386 * @param constants Extension OID cache.
2387 * @return True if all leaves are supported, false if any is not.
2388 */
2389static bool check_boolexpr_on_aggregate(BoolExpr *be, const constants_t *constants)
2390{
2391 ListCell *lc;
2392
2393 foreach (lc, be->args) {
2394 Node *n=lfirst(lc);
2395 if(IsA(n, OpExpr)) {
2396 if(!check_selection_on_aggregate((OpExpr*) n, constants))
2397 return false;
2398 } else if(IsA(n, BoolExpr)) {
2399 if(!check_boolexpr_on_aggregate((BoolExpr*) n, constants))
2400 return false;
2401 } else
2402 return false;
2403 }
2404
2405 return true;
2406}
2407
2408/**
2409 * @brief Top-level dispatcher for supported WHERE-on-aggregate patterns.
2410 *
2411 * @param expr Expression to validate (@c OpExpr or @c BoolExpr).
2412 * @param constants Extension OID cache.
2413 * @return True if ProvSQL can handle this expression.
2414 */
2415static bool check_expr_on_aggregate(Expr *expr, const constants_t *constants) {
2416 switch(expr->type) {
2417 case T_BoolExpr:
2418 return check_boolexpr_on_aggregate((BoolExpr*) expr, constants);
2419 case T_OpExpr:
2420 return check_selection_on_aggregate((OpExpr*) expr, constants);
2421 default:
2422 provsql_error("Unknown structure within Boolean expression");
2423 }
2424}
2425
2426/* -------------------------------------------------------------------------
2427 * Main query transformation
2428 * ------------------------------------------------------------------------- */
2429
2430/**
2431 * @brief Build the per-RTE column-numbering map used by where-provenance.
2432 *
2433 * Assigns a sequential position (1, 2, 3, …) to every non-provenance,
2434 * non-join, non-empty column across all RTEs in @p q->rtable. The
2435 * @c provsql column is assigned -1 so callers can detect it. Join-RTE
2436 * columns and empty-named columns (used for anonymous GROUP BY keys) are
2437 * assigned 0.
2438 *
2439 * @param q Query whose range table is mapped.
2440 * @param columns Pre-allocated array of length @p q->rtable->length.
2441 * Each element is allocated and filled by this function.
2442 * @param nbcols Out-param: total number of non-provenance output columns.
2443 */
2444static void build_column_map(Query *q, int **columns, int *nbcols) {
2445 unsigned i = 0;
2446 ListCell *l;
2447
2448 *nbcols = 0;
2449
2450 foreach (l, q->rtable) {
2451 RangeTblEntry *r = (RangeTblEntry *)lfirst(l);
2452 ListCell *lc;
2453
2454 columns[i] = 0;
2455 if (r->eref) {
2456 unsigned j = 0;
2457
2458 columns[i] = (int *)palloc(r->eref->colnames->length * sizeof(int));
2459
2460 foreach (lc, r->eref->colnames) {
2461 if (!lfirst(lc)) {
2462 /* Column without name — used e.g. when grouping by a discarded column */
2463 columns[i][j] = ++(*nbcols);
2464 } else {
2465 const char *v = strVal(lfirst(lc));
2466
2467 if (strcmp(v, "") && r->rtekind != RTE_JOIN) { /* join RTE columns ignored */
2468 if (!strcmp(v, PROVSQL_COLUMN_NAME))
2469 columns[i][j] = -1;
2470 else
2471 columns[i][j] = ++(*nbcols);
2472 } else {
2473 columns[i][j] = 0;
2474 }
2475 }
2476
2477 ++j;
2478 }
2479 }
2480
2481 ++i;
2482 }
2483}
2484
2485/**
2486 * @brief Move WHERE conditions on aggregate results (@c agg_token) to HAVING.
2487 *
2488 * Supported patterns (moved to HAVING):
2489 * - The entire WHERE is a supported agg comparison.
2490 * - The WHERE is a top-level AND where some conjuncts reference aggregates
2491 * (those are extracted individually) and the rest remain in WHERE.
2492 *
2493 * Unsupported patterns (e.g., "WHERE x=1 OR c>3") raise an error.
2494 *
2495 * @param constants Extension OID cache.
2496 * @param q Query to modify in place (@c jointree->quals and
2497 * @c havingQual may both be updated).
2498 */
2500 Query *q) {
2501 if (!q->jointree || !q->jointree->quals)
2502 return;
2503
2504 if (!has_aggtoken(q->jointree->quals, constants))
2505 return;
2506
2507 /*
2508 * We support WHERE clauses that are (possibly trivial) AND conjunctions of:
2509 * - Conditions that do not mention aggregates (kept in WHERE).
2510 * - Arbitrary Boolean combinations that all refer to aggregates and that
2511 * check_expr_on_aggregate accepts (moved to HAVING).
2512 * Other forms (e.g., "WHERE x=1 OR c>3") are not supported.
2513 */
2514 if (check_expr_on_aggregate((Expr *)q->jointree->quals, constants)) {
2515 /* Entire WHERE is an agg comparison — move it wholesale to HAVING */
2516 q->havingQual =
2517 add_to_havingQual(q->havingQual, (Expr *)q->jointree->quals);
2518 q->jointree->quals = NULL;
2519 } else if (IsA(q->jointree->quals, BoolExpr)) {
2520 BoolExpr *be = (BoolExpr *)q->jointree->quals;
2521 if (be->boolop == AND_EXPR) {
2522 /* Split the AND: move agg conjuncts to HAVING, leave the rest */
2523 ListCell *cell, *prev;
2524 for (cell = list_head(be->args), prev = NULL; cell != NULL;) {
2525 if (has_aggtoken(lfirst(cell), constants)) {
2526 Expr *expr = (Expr *)lfirst(cell);
2527
2528 if (check_expr_on_aggregate(expr, constants)) {
2529 be->args = my_list_delete_cell(be->args, cell, prev);
2530 if (prev)
2531 cell = my_lnext(be->args, prev);
2532 else
2533 cell = list_head(be->args);
2534
2535 q->havingQual = add_to_havingQual(q->havingQual, expr);
2536 } else {
2537 provsql_error("Complex selection on aggregation results not supported");
2538 }
2539 } else {
2540 prev = cell;
2541 cell = my_lnext(be->args, cell);
2542 }
2543 }
2544 } else {
2545 provsql_error("Complex selection on aggregation results not supported");
2546 }
2547 } else {
2548 provsql_error("Unknown structure within Boolean expression");
2549 }
2550}
2551
2552/**
2553 * @brief Rewrite a single SELECT query to carry provenance.
2554 *
2555 * This is the recursive entry point for the provenance rewriter. It is
2556 * called from @c provsql_planner for top-level queries and re-entered from
2557 * @c get_provenance_attributes for subqueries in FROM.
2558 *
2559 * High-level steps:
2560 * 1. Strip any @c provsql column propagated into this query's target list.
2561 * 2. Detect and rewrite structural forms requiring pre-processing:
2562 * non-ALL set operations (wrap in outer GROUP BY), AGG DISTINCT (push
2563 * into a subquery), DISTINCT (convert to GROUP BY).
2564 * 3. Collect provenance attributes via @c get_provenance_attributes.
2565 * 4. Build a column-numbering map for where-provenance (@c build_column_map).
2566 * 5. Handle aggregates, migrate WHERE-on-aggregate to HAVING, and set ops.
2567 * 6. Build and splice the combined provenance expression.
2568 *
2569 * @param constants Extension OID cache.
2570 * @param q Query to rewrite (modified in place).
2571 * @param removed Out-param: boolean array indicating which original target
2572 * list entries were provenance columns and were removed.
2573 * May be @c NULL if the caller does not need this info.
2574 * @return The (possibly restructured) rewritten query, or @c NULL if the
2575 * query has no FROM clause and can be skipped.
2576 */
2577static Query *process_query(const constants_t *constants, Query *q,
2578 bool **removed) {
2579 List *prov_atts;
2580 bool has_union = false;
2581 bool has_difference = false;
2582 bool supported = true;
2583 bool group_by_rewrite = false;
2584 int nbcols = 0;
2585 int **columns;
2586 unsigned i = 0;
2587 if (provsql_verbose >= 50)
2588 elog_node_display(NOTICE, "Before ProvSQL query rewriting", q, true);
2589
2590 if (q->rtable == NULL) {
2591 // No FROM clause, we can skip this query
2592 return NULL;
2593 }
2594
2595 {
2596 Bitmapset *removed_sortgrouprefs = NULL;
2597
2598 if (q->targetList) {
2599 removed_sortgrouprefs =
2600 remove_provenance_attributes_select(constants, q, removed);
2601 if (removed_sortgrouprefs != NULL)
2602 remove_provenance_attribute_groupref(q, removed_sortgrouprefs);
2603 if (q->setOperations)
2605 }
2606 }
2607
2608 if(provsql_active) {
2609 columns = (int **)palloc(q->rtable->length * sizeof(int *));
2610
2611 if (q->setOperations) {
2612 // TODO: Nest set operations as subqueries in FROM,
2613 // so that we only do set operations on base tables
2614
2615 SetOperationStmt *stmt = (SetOperationStmt *)q->setOperations;
2616 if (!stmt->all) {
2618 return process_query(constants, q, removed);
2619 }
2620 }
2621
2622 if (q->hasAggs) {
2623 Query *rewritten = rewrite_agg_distinct(q, constants);
2624 if (rewritten)
2625 return process_query(constants, rewritten, removed);
2626 }
2627
2628 // get_provenance_attributes will also recursively process subqueries
2629 // by calling process_query
2630 prov_atts = get_provenance_attributes(constants, q);
2631
2632 if (prov_atts == NIL)
2633 return q;
2634
2635 if (q->hasSubLinks) {
2636 provsql_error("Subqueries in WHERE clause not supported by provsql");
2637 supported = false;
2638 }
2639
2640 if (supported && q->distinctClause) {
2641 if (q->hasDistinctOn) {
2642 provsql_error("DISTINCT ON not supported by provsql");
2643 supported = false;
2644 } else if (list_length(q->distinctClause) < list_length(q->targetList)) {
2645 provsql_error("Inconsistent DISTINCT and GROUP BY clauses not "
2646 "supported by provsql");
2647 supported = false;
2648 } else {
2650 }
2651 }
2652
2653 if (supported && q->setOperations) {
2654 SetOperationStmt *stmt = (SetOperationStmt *)q->setOperations;
2655
2656 if (stmt->op == SETOP_UNION) {
2657 process_set_operation_union(constants, stmt);
2658 has_union = true;
2659 } else if (stmt->op == SETOP_EXCEPT) {
2660 if (!transform_except_into_join(constants, q))
2661 supported = false;
2662 has_difference = true;
2663 } else {
2664 provsql_error("Set operations other than UNION and EXCEPT not "
2665 "supported by provsql");
2666 supported = false;
2667 }
2668 }
2669
2670 if (supported && q->groupClause &&
2671 !provenance_function_in_group_by(constants, q)) {
2672 group_by_rewrite = true;
2673 }
2674
2675 if (supported && q->groupingSets) {
2676 if (q->groupClause || list_length(q->groupingSets) > 1 ||
2677 ((GroupingSet *)linitial(q->groupingSets))->kind !=
2678 GROUPING_SET_EMPTY) {
2679 provsql_error("GROUPING SETS, CUBE, and ROLLUP not supported by provsql");
2680 supported = false;
2681 } else {
2682 // Simple GROUP BY ()
2683 group_by_rewrite = true;
2684 }
2685 }
2686
2687 if (supported)
2688 build_column_map(q, columns, &nbcols);
2689
2690 if (supported) {
2691 Expr *provenance;
2692
2693 if (q->hasAggs) {
2694 ListCell *lc_sort;
2695
2696 // Compute aggregation expressions
2698 constants, q, prov_atts,
2699 has_union ? SR_PLUS : (has_difference ? SR_MONUS : SR_TIMES));
2700
2701 // If there are any sort clauses on something whose type is now
2702 // aggregate token, we throw an error: sorting aggregation values
2703 // when provenance is captured is ill-defined
2704 foreach (lc_sort, q->sortClause) {
2705 SortGroupClause *sort = (SortGroupClause *)lfirst(lc_sort);
2706 ListCell *lc_te;
2707 foreach (lc_te, q->targetList) {
2708 TargetEntry *te = (TargetEntry *)lfirst(lc_te);
2709 if (sort->tleSortGroupRef == te->ressortgroupref) {
2710 if (exprType((Node *)te->expr) == constants->OID_TYPE_AGG_TOKEN)
2711 provsql_error("ORDER BY on the result of an aggregate function is "
2712 "not supported by ProvSQL");
2713 break;
2714 }
2715 }
2716 }
2717 }
2718
2719 /* Move any WHERE comparisons on aggregate results to HAVING */
2721
2723 constants, q, prov_atts, q->hasAggs, group_by_rewrite,
2724 has_union ? SR_PLUS : (has_difference ? SR_MONUS : SR_TIMES), columns,
2725 nbcols);
2726
2729
2730 if (has_difference)
2731 add_select_non_zero(constants, q, provenance);
2732 }
2733
2734 for (i = 0; i < q->rtable->length; ++i) {
2735 if (columns[i])
2736 pfree(columns[i]);
2737 }
2738 }
2739
2740 if (provsql_verbose >= 50)
2741 elog_node_display(NOTICE, "After ProvSQL query rewriting", q, true);
2742
2743 return q;
2744}
2745
2746/* -------------------------------------------------------------------------
2747 * Planner hook & extension lifecycle
2748 * ------------------------------------------------------------------------- */
2749
2750/**
2751 * @brief PostgreSQL planner hook — entry point for provenance rewriting.
2752 *
2753 * Replaces (or chains after) the standard planner. For every CMD_SELECT
2754 * that involves at least one provenance-bearing relation or an explicit
2755 * @c provenance() call, rewrites the query via @c process_query before
2756 * handing the result to the standard planner. Non-SELECT commands and
2757 * queries without provenance are passed through unchanged.
2758 * @param q The query to plan.
2759 * @param cursorOptions Cursor options bitmask.
2760 * @param boundParams Pre-bound parameter values.
2761 * @return The planned statement.
2762 */
2763static PlannedStmt *provsql_planner(Query *q,
2764#if PG_VERSION_NUM >= 130000
2765 const char *query_string,
2766#endif
2767 int cursorOptions,
2768 ParamListInfo boundParams) {
2769 if (q->commandType == CMD_SELECT && q->rtable) {
2770 const constants_t constants = get_constants(false);
2771
2772 if (constants.ok && has_provenance(&constants, q)) {
2773 bool *removed = NULL;
2774 Query *new_query;
2775 clock_t begin = 0;
2776
2777#if PG_VERSION_NUM >= 150000
2778 if (provsql_verbose >= 20)
2779 provsql_notice("Main query before ProvSQL query rewriting:\n%s\n",
2780 pg_get_querydef(q, true));
2781#endif
2782
2783 if (provsql_verbose >= 40)
2784 begin = clock();
2785
2786 new_query = process_query(&constants, q, &removed);
2787
2788 if (provsql_verbose >= 40)
2789 provsql_notice("planner time spent=%f",
2790 (double)(clock() - begin) / CLOCKS_PER_SEC);
2791
2792#if PG_VERSION_NUM >= 150000
2793 if (provsql_verbose >= 20)
2794 provsql_notice("Main query after ProvSQL query rewriting:\n%s\n",
2795 pg_get_querydef(q, true));
2796#endif
2797
2798 if (new_query != NULL)
2799 q = new_query;
2800 }
2801 }
2802
2803 if (prev_planner)
2804 return prev_planner(q,
2805#if PG_VERSION_NUM >= 130000
2806 query_string,
2807#endif
2808 cursorOptions, boundParams);
2809 else
2810 return standard_planner(q,
2811#if PG_VERSION_NUM >= 130000
2812 query_string,
2813#endif
2814 cursorOptions, boundParams);
2815}
2816
2817/**
2818 * @brief Extension initialization — called once when the shared library is loaded.
2819 *
2820 * Registers the four GUC variables (@c provsql.active, @c where_provenance,
2821 * @c update_provenance, @c verbose_level), installs the planner hook and
2822 * shared-memory hooks, and launches the background MMap worker.
2823 *
2824 * Must be loaded via @c shared_preload_libraries; raises an error otherwise.
2825 */
2826void _PG_init(void) {
2827 if (!process_shared_preload_libraries_in_progress)
2828 provsql_error("provsql needs to be added to the shared_preload_libraries "
2829 "configuration variable");
2830
2831 DefineCustomBoolVariable("provsql.active",
2832 "Should ProvSQL track provenance?",
2833 "1 is standard ProvSQL behavior, 0 means provsql attributes will be dropped.",
2835 true,
2836 PGC_USERSET,
2837 0,
2838 NULL,
2839 NULL,
2840 NULL);
2841 DefineCustomBoolVariable("provsql.where_provenance",
2842 "Should ProvSQL track where-provenance?",
2843 "1 turns where-provenance on, 0 off.",
2845 false,
2846 PGC_USERSET,
2847 0,
2848 NULL,
2849 NULL,
2850 NULL);
2851 DefineCustomBoolVariable("provsql.update_provenance",
2852 "Should ProvSQL track update provenance?",
2853 "1 turns update provenance on, 0 off.",
2855 false,
2856 PGC_USERSET,
2857 0,
2858 NULL,
2859 NULL,
2860 NULL);
2861 DefineCustomIntVariable("provsql.verbose_level",
2862 "Level of verbosity for ProvSQL informational and debug messages",
2863 "0 for quiet (default), 1-9 for informational messages, 10-100 for debug information.",
2865 0,
2866 0,
2867 100,
2868 PGC_USERSET,
2869 1,
2870 NULL,
2871 NULL,
2872 NULL);
2873
2874 // Emit warnings for undeclared provsql.* configuration parameters
2875 EmitWarningsOnPlaceholders("provsql");
2876
2877 prev_planner = planner_hook;
2878 prev_shmem_startup = shmem_startup_hook;
2879#if (PG_VERSION_NUM >= 150000)
2880 prev_shmem_request = shmem_request_hook;
2881 shmem_request_hook = provsql_shmem_request;
2882#else
2884#endif
2885
2886 planner_hook = provsql_planner;
2887 shmem_startup_hook = provsql_shmem_startup;
2888
2890}
2891
2892/**
2893 * @brief Extension teardown — restores the planner and shmem hooks.
2894 */
2895void _PG_fini(void) {
2896 planner_hook = prev_planner;
2897 shmem_startup_hook = prev_shmem_startup;
2898}
List * list_insert_nth(List *list, int pos, void *datum)
Insert datum at position pos in list (PG < 13 backport).
PostgreSQL cross-version compatibility shims for ProvSQL.
#define F_SUM_INT4
OID of sum(int4) aggregate function (pre-PG 14).
static List * my_list_delete_cell(List *list, ListCell *cell, ListCell *prev)
Version-agnostic wrapper around list_delete_cell().
static ListCell * my_lnext(const List *l, const ListCell *c)
Version-agnostic wrapper around lnext().
#define F_COUNT_
OID of count() aggregate function (pre-PG 14).
#define F_COUNT_ANY
OID of count(*) / count(any) aggregate function (pre-PG 14).
Datum provenance(PG_FUNCTION_ARGS)
Error stub for provsql.provenance() on untracked tables.
Definition provenance.c:31
static void transform_distinct_into_group_by(Query *q)
Convert a SELECT DISTINCT into an equivalent GROUP BY.
Definition provsql.c:1762
static void remove_provenance_attribute_groupref(Query *q, const Bitmapset *removed_sortgrouprefs)
Remove sort/group references that belonged to removed provenance columns.
Definition provsql.c:1795
static FuncExpr * having_Expr_to_provenance_cmp(Expr *expr, const constants_t *constants, bool negated)
Dispatch a HAVING sub-expression to the appropriate converter.
Definition provsql.c:908
static bool transform_except_into_join(const constants_t *constants, Query *q)
Rewrite an EXCEPT query into a LEFT JOIN with monus provenance.
Definition provsql.c:2126
bool provsql_where_provenance
Global variable that indicates if where-provenance support has been activated through the provsql....
Definition provsql.c:64
static bool has_provenance_walker(Node *node, void *data)
Tree walker that detects any provenance-bearing relation or provenance() call.
Definition provsql.c:2006
static Node * aggregation_type_mutator(Node *node, aggregation_type_mutator_context *context)
Tree-mutator that retyps a specific Var to agg_token.
Definition provsql.c:202
int provsql_verbose
Verbosity level; controlled by the provsql.verbose_level GUC.
Definition provsql.c:66
static bool check_selection_on_aggregate(OpExpr *op, const constants_t *constants)
Check whether op is a supported comparison on an aggregate result.
Definition provsql.c:2339
void _PG_init(void)
Extension initialization — called once when the shared library is loaded.
Definition provsql.c:2826
static FuncExpr * having_BoolExpr_to_provenance(BoolExpr *be, const constants_t *constants, bool negated)
Convert a Boolean combination of HAVING comparisons into a provenance_times / provenance_plus gate ex...
Definition provsql.c:858
static bool has_aggtoken(Node *node, const constants_t *constants)
Return true if node contains a Var of type agg_token.
Definition provsql.c:2103
static void replace_provenance_function_by_expression(const constants_t *constants, Query *q, Expr *provsql)
Replace every explicit provenance() call in q with provsql.
Definition provsql.c:1744
static List * get_provenance_attributes(const constants_t *constants, Query *q)
Collect all provenance Var nodes reachable from q's range table.
Definition provsql.c:273
PG_MODULE_MAGIC
Required PostgreSQL extension magic block.
Definition provsql.c:56
static void migrate_aggtoken_quals_to_having(const constants_t *constants, Query *q)
Move WHERE conditions on aggregate results (agg_token) to HAVING.
Definition provsql.c:2499
static Query * build_inner_for_distinct_key(Query *q, Expr *key_expr, List *groupby_tes)
Build the inner GROUP-BY subquery for one AGG(DISTINCT key).
Definition provsql.c:1182
static Query * process_query(const constants_t *constants, Query *q, bool **removed)
Rewrite a single SELECT query to carry provenance.
Definition provsql.c:2577
static Node * add_to_havingQual(Node *havingQual, Expr *expr)
Append expr to havingQual with an AND, creating one if needed.
Definition provsql.c:2306
static void fix_type_of_aggregation_result(const constants_t *constants, Query *q, Index rteid, List *targetList)
Retypes aggregation-result Vars in q from UUID to agg_token.
Definition provsql.c:231
static void replace_aggregations_by_provenance_aggregate(const constants_t *constants, Query *q, List *prov_atts, semiring_operation op)
Replace every Aggref in q with a provenance-aware aggregate.
Definition provsql.c:1634
static Var * make_provenance_attribute(const constants_t *constants, Query *q, RangeTblEntry *r, Index relid, AttrNumber attid)
Build a Var node that references the provenance column of a relation.
Definition provsql.c:96
static void remove_provenance_attribute_setoperations(Query *q, bool *removed)
Strip the provenance column's type info from a set-operation node.
Definition provsql.c:1832
static void add_to_select(Query *q, Expr *provenance)
Append the provenance expression to q's target list.
Definition provsql.c:1654
void _PG_fini(void)
Extension teardown — restores the planner and shmem hooks.
Definition provsql.c:2895
static Node * provenance_mutator(Node *node, provenance_mutator_context *context)
Tree-mutator that replaces provenance() calls with the actual provenance expression.
Definition provsql.c:1712
static Query * rewrite_agg_distinct(Query *q, const constants_t *constants)
Rewrite every AGG(DISTINCT key) in q using independent subqueries.
Definition provsql.c:1360
bool provsql_interrupted
Global variable that becomes true if this particular backend received an interrupt signal.
Definition provsql.c:62
static Expr * add_eq_from_Quals_to_Expr(const constants_t *constants, Node *quals, Expr *result, int **columns)
Walk a join-condition or WHERE quals node and add eq gates for every equality it contains.
Definition provsql.c:585
static Query * rewrite_non_all_into_external_group_by(Query *q)
Wrap a non-ALL set operation in an outer GROUP BY query.
Definition provsql.c:1873
static PlannedStmt * provsql_planner(Query *q, int cursorOptions, ParamListInfo boundParams)
PostgreSQL planner hook — entry point for provenance rewriting.
Definition provsql.c:2763
bool provsql_active
true while ProvSQL query rewriting is enabled
Definition provsql.c:63
static bool provenance_function_in_group_by(const constants_t *constants, Query *q)
Check whether a provenance() call appears in the GROUP BY list.
Definition provsql.c:1970
static bool aggtoken_walker(Node *node, const constants_t *constants)
Tree walker that detects any Var of type agg_token.
Definition provsql.c:2080
static Bitmapset * remove_provenance_attributes_select(const constants_t *constants, Query *q, bool **removed)
Strip provenance UUID columns from q's SELECT list.
Definition provsql.c:420
static Expr * make_aggregation_expression(const constants_t *constants, Aggref *agg_ref, List *prov_atts, semiring_operation op)
Build the provenance expression for a single aggregate function.
Definition provsql.c:636
static Expr * add_eq_from_OpExpr_to_Expr(const constants_t *constants, OpExpr *fromOpExpr, Expr *toExpr, int **columns)
Wrap toExpr in a provenance_eq gate if fromOpExpr is an equality between two tracked columns.
Definition provsql.c:513
static planner_hook_type prev_planner
Previous planner hook (chained)
Definition provsql.c:73
static bool check_boolexpr_on_aggregate(BoolExpr *be, const constants_t *constants)
Check whether every leaf of a Boolean expression is a supported comparison on an aggregate result.
Definition provsql.c:2389
static Node * aggregation_mutator(Node *node, aggregation_mutator_context *context)
Tree-mutator that replaces Aggrefs with provenance-aware aggregates.
Definition provsql.c:1606
static Query * build_outer_for_distinct_key(TargetEntry *orig_agg_te, Query *inner, int n_gb, const constants_t *constants)
Wrap inner in an outer query that applies the original aggregate.
Definition provsql.c:1252
static Node * reduce_varattno_mutator(Node *node, reduce_varattno_mutator_context *context)
Tree-mutator callback that adjusts Var attribute numbers.
Definition provsql.c:148
static void build_column_map(Query *q, int **columns, int *nbcols)
Build the per-RTE column-numbering map used by where-provenance.
Definition provsql.c:2444
bool provsql_update_provenance
true when provenance tracking for DML is enabled
Definition provsql.c:65
static bool check_expr_on_aggregate(Expr *expr, const constants_t *constants)
Top-level dispatcher for supported WHERE-on-aggregate patterns.
Definition provsql.c:2415
static bool provenance_function_walker(Node *node, void *data)
Tree walker that returns true if any provenance() call is found.
Definition provsql.c:1945
semiring_operation
Semiring operation used to combine provenance tokens.
Definition provsql.c:485
@ SR_PLUS
Semiring addition (UNION, SELECT DISTINCT)
Definition provsql.c:486
@ SR_TIMES
Semiring multiplication (JOIN, Cartesian product)
Definition provsql.c:488
@ SR_MONUS
Semiring monus / set difference (EXCEPT)
Definition provsql.c:487
static Expr * make_provenance_expression(const constants_t *constants, Query *q, List *prov_atts, bool aggregation, bool group_by_rewrite, semiring_operation op, int **columns, int nbcols)
Build the combined provenance expression to be added to the SELECT list.
Definition provsql.c:948
static const char * PROVSQL_COLUMN_NAME
Name of the provenance column added to tracked tables.
Definition provsql.c:68
static void process_set_operation_union(const constants_t *constants, SetOperationStmt *stmt)
Recursively annotate a UNION tree with the provenance UUID type.
Definition provsql.c:2240
static FuncExpr * having_OpExpr_to_provenance_cmp(OpExpr *opExpr, const constants_t *constants, bool negated)
Convert a comparison OpExpr on aggregate results into a provenance_cmp gate expression.
Definition provsql.c:754
static void add_select_non_zero(const constants_t *constants, Query *q, Expr *provsql)
Add a WHERE condition filtering out zero-provenance tuples.
Definition provsql.c:2269
static void reduce_varattno_by_offset(List *targetList, Index varno, int *offset)
Adjust Var attribute numbers in targetList after columns are removed.
Definition provsql.c:177
static bool has_provenance(const constants_t *constants, Query *q)
Return true if q involves any provenance-bearing relation or contains an explicit provenance() call.
Definition provsql.c:2070
#define provsql_error(fmt,...)
Report a fatal ProvSQL error and abort the current transaction.
#define provsql_notice(fmt,...)
Emit a ProvSQL informational notice (execution continues).
void RegisterProvSQLMMapWorker(void)
Register the ProvSQL mmap background worker with PostgreSQL.
Background worker and IPC primitives for mmap-backed circuit storage.
void provsql_shmem_request(void)
Request shared memory from PostgreSQL (PG ≥ 15).
shmem_startup_hook_type prev_shmem_startup
Saved pointer to the previous shmem_startup_hook, for chaining.
void provsql_shmem_startup(void)
Initialise the ProvSQL shared-memory segment.
Shared-memory segment and inter-process pipe management.
Oid find_equality_operator(Oid ltypeId, Oid rtypeId)
Find the equality operator OID for two given types.
constants_t get_constants(bool failure_if_not_possible)
Retrieve the cached OID constants for the current database.
Core types, constants, and utilities shared across ProvSQL.
@ gate_zero
Semiring zero.
Context for the aggregation_mutator tree walker.
Definition provsql.c:1594
semiring_operation op
Semiring operation for combining tokens.
Definition provsql.c:1596
const constants_t * constants
Extension OID cache.
Definition provsql.c:1597
List * prov_atts
List of provenance Var nodes.
Definition provsql.c:1595
Context for the aggregation_type_mutator tree walker.
Definition provsql.c:189
const constants_t * constants
Extension OID cache.
Definition provsql.c:192
Index varattno
Attribute number of the aggregate column.
Definition provsql.c:191
Index varno
Range-table entry index of the aggregate var.
Definition provsql.c:190
Structure to store the value of various constants.
Oid OID_FUNCTION_PROVENANCE_EQ
OID of the provenance_eq FUNCTION.
Oid OID_FUNCTION_PROVENANCE_AGGREGATE
OID of the provenance_aggregate FUNCTION.
Oid OID_FUNCTION_PROVENANCE_SEMIMOD
OID of the provenance_semimod FUNCTION.
Oid OID_FUNCTION_PROVENANCE
OID of the provenance FUNCTION.
Oid OID_FUNCTION_AGG_TOKEN_UUID
OID of the agg_token_uuid FUNCTION.
Oid OID_FUNCTION_GATE_ZERO
OID of the provenance_zero FUNCTION.
Oid OID_FUNCTION_PROVENANCE_PROJECT
OID of the provenance_project FUNCTION.
Oid OID_TYPE_AGG_TOKEN
OID of the agg_token TYPE.
Oid OID_FUNCTION_ARRAY_AGG
OID of the array_agg FUNCTION.
Oid OID_TYPE_INT
OID of the INT TYPE.
Oid OID_FUNCTION_PROVENANCE_PLUS
OID of the provenance_plus FUNCTION.
Oid OID_OPERATOR_NOT_EQUAL_UUID
OID of the <> operator on UUIDs FUNCTION.
Oid OID_TYPE_UUID
OID of the uuid TYPE.
bool ok
true if constants were loaded
Oid OID_TYPE_INT_ARRAY
OID of the INT[] TYPE.
Oid OID_FUNCTION_PROVENANCE_DELTA
OID of the provenance_delta FUNCTION.
Oid OID_FUNCTION_PROVENANCE_TIMES
OID of the provenance_times FUNCTION.
Oid OID_FUNCTION_PROVENANCE_MONUS
OID of the provenance_monus FUNCTION.
Oid OID_FUNCTION_NOT_EQUAL_UUID
OID of the = operator on UUIDs FUNCTION.
Oid OID_FUNCTION_GATE_ONE
OID of the provenance_one FUNCTION.
Oid OID_TYPE_UUID_ARRAY
OID of the uuid[] TYPE.
Oid OID_FUNCTION_PROVENANCE_CMP
OID of the provenance_cmp FUNCTION.
Context for the provenance_mutator tree walker.
Definition provsql.c:1701
const constants_t * constants
Extension OID cache.
Definition provsql.c:1703
Expr * provsql
Provenance expression to substitute for provenance() calls.
Definition provsql.c:1702
Context for the reduce_varattno_mutator tree walker.
Definition provsql.c:137
Index varno
Range-table entry whose attribute numbers are being adjusted.
Definition provsql.c:138
int * offset
Per-attribute cumulative shift to apply.
Definition provsql.c:139