ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
MMappedTableInfo.h
Go to the documentation of this file.
1/**
2 * @file MMappedTableInfo.h
3 * @brief Per-table provenance metadata persisted alongside the circuit store.
4 *
5 * @c ProvenanceTableInfo records, one per relation tracked by ProvSQL,
6 * are stored in a fifth mmap-backed file (@c provsql_table_info.mmap)
7 * inside each database's @c $PGDATA/base/<db_oid>/ directory.
8 * They feed the safe-query optimisation: the planner-time hierarchy
9 * detector needs to know whether each base relation is TID
10 * (independent leaves; default after @c add_provenance) or BID
11 * (block-correlated leaves; produced by @c repair_key) before it can
12 * decide whether a query is safe to rewrite into read-once form.
13 *
14 * The file uses the same 16-byte header convention as every other
15 * ProvSQL mmap file (magic / version / elem_size / _reserved); records
16 * are fixed-stride so we can back it with @c MMappedVector directly,
17 * without introducing a second variable-length region.
18 *
19 * @warning ON-DISK ABI: the layout of @c ProvenanceTableInfo is
20 * serialised verbatim into @c provsql_table_info.mmap. Adding,
21 * removing, or resizing a field requires bumping the mmap file format
22 * version and providing a migration path, exactly as for
23 * @c GateInformation in @c MMappedCircuit.h.
24 */
25#ifndef MMAPPED_TABLE_INFO_H
26#define MMAPPED_TABLE_INFO_H
27
28#ifdef __cplusplus
29#include <cstdint>
30#else
31#include <stdint.h>
32#endif
33
34#include "postgres.h"
35#include "access/attnum.h"
36
37/**
38 * @brief Cap on the number of block-key columns recorded per relation.
39 *
40 * BID tables (produced by @c repair_key) can have multi-column keys.
41 * We store the column numbers inline in a fixed-size array so each
42 * record is fixed-stride and @c MMappedVector can back the file
43 * directly. Sixteen is generous in practice — provenance-tracked
44 * tables rarely use composite keys wider than a handful of columns.
45 * @c repair_key raises a clear error if a wider key is requested.
46 */
47#define PROVSQL_TABLE_INFO_MAX_BLOCK_KEY 16
48
49/**
50 * @brief Cap on the number of base ancestors recorded per relation.
51 *
52 * The base-ancestor set lists the @c pg_class OIDs of the original
53 * @c add_provenance / @c repair_key relations a derived (CTAS / @c
54 * SELECT @c INTO / @c CREATE @c MATERIALIZED @c VIEW) relation's
55 * provenance ultimately reads from. Base tables carry @c {self}; the
56 * safe-query rewriter consults the set to enforce that joined FROM
57 * entries have disjoint base ancestors before firing the read-once
58 * factoring. Sixty-four covers practical CTAS workloads (typical
59 * derivations span 1-10 sources); set-ancestors raises a clear error
60 * if a wider set is requested, in which case the relation should be
61 * left untracked (the safe-query rewriter will then refuse it on the
62 * missing-ancestry conservative path).
63 */
64#define PROVSQL_TABLE_INFO_MAX_ANCESTORS 64
65
66/**
67 * @brief How the provenance leaves of a tracked relation are correlated.
68 *
69 * Three cases need distinguishing for the safe-query rewriter:
70 *
71 * - @c PROVSQL_TABLE_TID -- independent input leaves; the
72 * post-@c add_provenance default. Each row's provenance token is a
73 * fresh @c gate_input with its own probability.
74 * - @c PROVSQL_TABLE_BID -- block-correlated leaves produced by
75 * @c repair_key. Rows sharing the same value of @c block_key are
76 * mutually exclusive (they originate from a single block
77 * @c gate_input via @c gate_mulinput children). An empty
78 * @c block_key means the whole table is one block.
79 * - @c PROVSQL_TABLE_OPAQUE -- correlations are unknown. Used for
80 * relations whose provenance is derived from a tracked source via
81 * @c CREATE @c TABLE @c AS @c SELECT, @c INSERT @c INTO @c SELECT,
82 * or @c UPDATE under @c provsql.update_provenance. The safe-query
83 * rewriter must bail on these.
84 *
85 * Stored as @c uint8_t in @c ProvenanceTableInfo so the on-disk size
86 * matches the previous @c bool field exactly.
87 *
88 * @warning ON-DISK ABI: these integer values are persisted in
89 * @c provsql_table_info.mmap. Do not reorder or renumber existing
90 * members; new kinds must be appended.
91 */
97
98/**
99 * @brief Per-relation metadata for the safe-query optimisation.
100 *
101 * One record per provenance-tracked relation. @c relid is the
102 * @c pg_class OID of the relation and acts as the primary key during
103 * linear lookup. @c kind discriminates between TID, BID, and OPAQUE
104 * (see @c provsql_table_kind). For BID, @c block_key[0..block_key_n-1]
105 * lists the column numbers whose tuples partition the table into
106 * mutually-exclusive blocks; an empty key means the whole table is
107 * one block. @c block_key is left empty for TID and OPAQUE.
108 * @c ancestors[0..ancestor_n-1] lists the @c pg_class OIDs of the
109 * original @c add_provenance / @c repair_key base relations this
110 * relation's atoms ultimately come from (a sorted, deduplicated set).
111 * Base tables have @c ancestor_n @c == @c 1 with @c ancestors[0]
112 * @c == @c relid; CTAS-derived tables inherit the union of their
113 * sources' ancestor sets. @c ancestor_n @c == @c 0 means the
114 * registry has no information for this relation -- the safe-query
115 * rewriter then conservatively refuses to fire when ancestry-based
116 * disjointness is required.
117 */
118typedef struct ProvenanceTableInfo {
119 Oid relid; ///< pg_class OID of the relation (primary key)
120 uint8_t kind; ///< One of @c provsql_table_kind
121 uint16_t block_key_n; ///< Number of valid entries in @c block_key
122 AttrNumber block_key[PROVSQL_TABLE_INFO_MAX_BLOCK_KEY]; ///< Block-key column numbers
123 uint16_t ancestor_n; ///< Number of valid entries in @c ancestors (0 = no registry info)
124 Oid ancestors[PROVSQL_TABLE_INFO_MAX_ANCESTORS]; ///< Sorted, deduplicated base-relation OIDs
126
127#endif /* MMAPPED_TABLE_INFO_H */
#define PROVSQL_TABLE_INFO_MAX_BLOCK_KEY
Cap on the number of block-key columns recorded per relation.
provsql_table_kind
How the provenance leaves of a tracked relation are correlated.
@ PROVSQL_TABLE_TID
@ PROVSQL_TABLE_BID
@ PROVSQL_TABLE_OPAQUE
#define PROVSQL_TABLE_INFO_MAX_ANCESTORS
Cap on the number of base ancestors recorded per relation.
Per-relation metadata for the safe-query optimisation.
Oid relid
pg_class OID of the relation (primary key)
AttrNumber block_key[PROVSQL_TABLE_INFO_MAX_BLOCK_KEY]
Block-key column numbers.
uint16_t block_key_n
Number of valid entries in block_key.
Oid ancestors[PROVSQL_TABLE_INFO_MAX_ANCESTORS]
Sorted, deduplicated base-relation OIDs.
uint8_t kind
One of provsql_table_kind.
uint16_t ancestor_n
Number of valid entries in ancestors (0 = no registry info).