ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
SimplifiedSubgraph.cpp
Go to the documentation of this file.
1/**
2 * @file SimplifiedSubgraph.cpp
3 * @brief SQL function `provsql.simplified_circuit_subgraph()`.
4 *
5 * Returns a BFS subgraph of the IN-MEMORY GenericCircuit rooted at a
6 * given UUID, so consumers see the result of `simplify_on_load` /
7 * RangeCheck / AnalyticEvaluator passes rather than the persisted
8 * mmap DAG.
9 *
10 * Output: jsonb array of objects, each
11 * {node, parent, child_pos, gate_type, info1, info2, extra, depth}.
12 * Same row shape as the recursive-CTE `circuit_subgraph`, with the
13 * `extra` column inlined so the caller doesn't have to round-trip
14 * through `get_extra` (which would hit the persisted DAG and miss
15 * extras introduced by the simplifier).
16 *
17 * Returning jsonb (rather than SETOF record) keeps the C++ side free
18 * of SRF / FuncCallContext mechanics; a single PG round-trip is the
19 * same cost as the recursive CTE in the persisted-DAG path.
20 */
21extern "C" {
22#include "postgres.h"
23#include "fmgr.h"
24#include "catalog/pg_type.h"
25#include "utils/jsonb.h"
26#include "utils/fmgrprotos.h"
27#include "utils/uuid.h"
28#include "provsql_utils.h"
29
30PG_FUNCTION_INFO_V1(simplified_circuit_subgraph);
31}
32
33#include "CircuitFromMMap.h"
34#include "GenericCircuit.h"
35#include "HybridEvaluator.h"
36#include "provsql_utils_cpp.h"
37
38#include <queue>
39#include <sstream>
40#include <string>
41#include <unordered_map>
42#include <vector>
43
44namespace {
45
46void
47json_escape(std::ostringstream &out, const std::string &s)
48{
49 for (char c : s) {
50 switch (c) {
51 case '"': out << "\\\""; break;
52 case '\\': out << "\\\\"; break;
53 case '\b': out << "\\b"; break;
54 case '\f': out << "\\f"; break;
55 case '\n': out << "\\n"; break;
56 case '\r': out << "\\r"; break;
57 case '\t': out << "\\t"; break;
58 default:
59 if (static_cast<unsigned char>(c) < 0x20) {
60 char buf[8];
61 snprintf(buf, sizeof buf, "\\u%04x", c);
62 out << buf;
63 } else {
64 out << c;
65 }
66 }
67 }
68}
69
70void
71emit_str(std::ostringstream &out, const std::string &s)
72{
73 out << '"';
74 json_escape(out, s);
75 out << '"';
76}
77
78/* Emit a single output row. Mirrors `circuit_subgraph`'s columns
79 * plus an inline `extra`; info1 / info2 are NULL for gate types that
80 * don't use them (input, zero, one), otherwise the integer is
81 * rendered as a string so jsonb_in keeps the same wire shape the
82 * caller expects (TEXT-typed columns in `circuit_subgraph`'s
83 * RETURNS TABLE). */
84void
85emit_node_row(std::ostringstream &out,
86 bool &first,
87 const GenericCircuit &gc,
88 gate_t g,
89 const std::string &node_uuid,
90 const std::string *parent_uuid,
91 int child_pos, /* 1-based; ignored if parent_uuid == nullptr */
92 int depth)
93{
94 if (!first) out << ',';
95 first = false;
96 const auto t = gc.getGateType(g);
97 const char *type_name = (t < nb_gate_types) ? gate_type_name[t] : "invalid";
98 /* GenericCircuit::getInfos returns {-1u, -1u} when the gate has no
99 * entry in the in-memory infos map (CircuitFromMMap only populates
100 * it for mulinput / eq / agg / cmp / arith). Map that sentinel to
101 * JSON null so the caller sees "no info" rather than 4294967295. */
102 const auto infos = gc.getInfos(g);
103 const unsigned NO_INFO = static_cast<unsigned>(-1);
104
105 out << '{';
106 out << "\"node\":"; emit_str(out, node_uuid);
107 out << ",\"parent\":";
108 if (parent_uuid) emit_str(out, *parent_uuid);
109 else out << "null";
110 out << ",\"child_pos\":";
111 if (parent_uuid) out << child_pos;
112 else out << "null";
113 out << ",\"gate_type\":"; emit_str(out, type_name);
114 out << ",\"info1\":";
115 if (infos.first == NO_INFO) { out << "null"; }
116 else { out << '"' << infos.first << '"'; }
117 out << ",\"info2\":";
118 if (infos.second == NO_INFO) { out << "null"; }
119 else { out << '"' << infos.second << '"'; }
120 out << ",\"extra\":";
121 try {
122 emit_str(out, gc.getExtra(g));
123 } catch (const CircuitException &) {
124 out << "null";
125 }
126 /* Emit prob inline for every input / mulinput gate. Consumers that
127 * need a per-gate probability (Studio's anonymous-input inline
128 * percentage; categorical-block introspection) get it without a
129 * separate provsql.get_prob round-trip. NaN is serialised as JSON
130 * null so jsonb_in does not choke on it. */
131 if (t == gate_input || t == gate_mulinput) {
132 const double p = gc.getProb(g);
133 out << ",\"prob\":";
134 if (p != p) /* NaN */ out << "null";
135 else out << p;
136 }
137 /* In-memory Boolean-assumption marker (set by
138 * foldBooleanIdentities) -- emit a JSON true so Studio can stamp
139 * the same B badge on a flag-marked gate as it does on a
140 * persistent gate_assumed_boolean wrapper. Default true is
141 * suppressed by only emitting the field when the flag is set, so
142 * the wire size stays unchanged for the common case. */
143 if (gc.isBooleanAssumed(g)) {
144 out << ",\"boolean_assumed\":true";
145 }
146 out << ",\"depth\":" << depth;
147 out << '}';
148}
149
150} // namespace
151
152extern "C" Datum
154{
155 pg_uuid_t *root_arg = (pg_uuid_t *) PG_GETARG_POINTER(0);
156 int max_depth = PG_GETARG_INT32(1);
157
158 std::ostringstream out;
159 out << '[';
160 bool first = true;
161
162 try {
163 GenericCircuit gc = getGenericCircuit(*root_arg);
164 gate_t root_gate;
165 try {
166 root_gate = gc.getGate(uuid2string(*root_arg));
167 } catch (const CircuitException &) {
168 /* Root not in the simplified circuit (shouldn't normally
169 * happen). Return an empty array so the caller can degrade
170 * gracefully rather than erroring. */
171 out << ']';
172 Datum json_datum = DirectFunctionCall1(
173 jsonb_in, CStringGetDatum(pstrdup(out.str().c_str())));
174 PG_RETURN_DATUM(json_datum);
175 }
176
177 /* getGenericCircuit applied foldSemiringIdentities for us when
178 * provsql.simplify_on_load is on, so the wires here already
179 * reflect identity / absorber collapses; no extra substitution
180 * needed at BFS time.
181 *
182 * Run the hybrid-evaluator simplifier too when the corresponding
183 * GUC is on (default), so consumers see arith folding /
184 * normal-family closures the way probability_evaluate would.
185 * Otherwise the persisted-DAG view and the in-memory simplified
186 * view drift on every introspection feature that depends on a
187 * structural rewrite. */
190 }
191
192 /* BFS to compute the canonical (shortest-path) depth of each
193 * reachable gate. */
194 std::unordered_map<gate_t, int> depth_of;
195 std::queue<gate_t> bfs;
196 depth_of[root_gate] = 0;
197 bfs.push(root_gate);
198 while (!bfs.empty()) {
199 gate_t g = bfs.front(); bfs.pop();
200 int d = depth_of[g];
201 if (d >= max_depth) continue;
202 for (gate_t c : gc.getWires(g)) {
203 auto it = depth_of.find(c);
204 if (it == depth_of.end()) {
205 depth_of[c] = d + 1;
206 bfs.push(c);
207 }
208 }
209 }
210
211 /* Emit one row per (parent, node, child_pos) edge plus a
212 * synthetic root row with parent=NULL. Matches the output shape
213 * of the SQL @c circuit_subgraph (one row per (parent, child)
214 * triple). */
215 const std::string root_uuid = gc.getUUID(root_gate);
216 emit_node_row(out, first, gc, root_gate, root_uuid,
217 /* parent */ nullptr, /* child_pos */ 0,
218 /* depth */ 0);
219
220 for (const auto &[g, d] : depth_of) {
221 if (d >= max_depth) continue;
222 const std::string parent_uuid = gc.getUUID(g);
223 const auto &wires = gc.getWires(g);
224 for (std::size_t i = 0; i < wires.size(); ++i) {
225 gate_t c = wires[i];
226 auto cit = depth_of.find(c);
227 if (cit == depth_of.end()) continue; /* unreachable in BFS */
228 if (cit->second > max_depth) continue;
229 const std::string child_uuid = gc.getUUID(c);
230 emit_node_row(out, first, gc, c, child_uuid,
231 &parent_uuid, static_cast<int>(i + 1),
232 cit->second);
233 }
234 }
235 } catch (const std::exception &e) {
236 provsql_error("simplified_circuit_subgraph: %s", e.what());
237 } catch (...) {
238 provsql_error("simplified_circuit_subgraph: unknown exception");
239 }
240
241 out << ']';
242
243 Datum json_datum = DirectFunctionCall1(
244 jsonb_in, CStringGetDatum(pstrdup(out.str().c_str())));
245 PG_RETURN_DATUM(json_datum);
246}
GenericCircuit getGenericCircuit(pg_uuid_t token)
Build a GenericCircuit from the mmap store rooted at token.
Build in-memory circuits from the mmap-backed persistent store.
gate_t
Strongly-typed gate identifier.
Definition Circuit.h:49
Semiring-agnostic in-memory provenance circuit.
Peephole simplifier for continuous gate_arith sub-circuits.
Datum simplified_circuit_subgraph(PG_FUNCTION_ARGS)
Exception type thrown by circuit operations on invalid input.
Definition Circuit.h:206
std::vector< gate_t > & getWires(gate_t g)
Return a mutable reference to the child-wire list of gate g.
Definition Circuit.h:140
gateType getGateType(gate_t g) const
Return the type of gate g.
Definition Circuit.h:130
uuid getUUID(gate_t g) const
Return the UUID string associated with gate g.
Definition Circuit.hpp:46
gate_t getGate(const uuid &u)
Return (or create) the gate associated with UUID u.
Definition Circuit.hpp:33
In-memory provenance circuit with semiring-generic evaluation.
std::string getExtra(gate_t g) const
Return the string extra for gate g.
double getProb(gate_t g) const
Return the probability for gate g.
bool isBooleanAssumed(gate_t g) const
Report whether g carries the Boolean-assumption flag.
std::pair< unsigned, unsigned > getInfos(gate_t g) const
Return the integer annotation pair for gate g.
unsigned runHybridSimplifier(GenericCircuit &gc)
Run the peephole simplifier over gc.
bool provsql_hybrid_evaluation
Run the hybrid-evaluator simplifier inside probability_evaluate; controlled by the provsql....
Definition provsql.c:84
#define provsql_error(fmt,...)
Report a fatal ProvSQL error and abort the current transaction.
const char * gate_type_name[]
Names of gate types.
Core types, constants, and utilities shared across ProvSQL.
string uuid2string(pg_uuid_t uuid)
Format a pg_uuid_t as a std::string.
C++ utility functions for UUID manipulation.
UUID structure.