ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
tool_registry_sql.cpp
Go to the documentation of this file.
1/**
2 * @file tool_registry_sql.cpp
3 * @brief SQL surface for the external-tool registry (@ref ToolRegistry.h).
4 *
5 * Exposes the in-memory catalog to SQL:
6 *
7 * - @c tool_registry_list(): set-returning, backs the read-only
8 * @c provsql.tools view; reports each record plus an @c available flag
9 * computed with the same @c find_external_tool the dispatchers use.
10 * - @c tool_registry_register() / @c tool_registry_unregister() /
11 * @c tool_registry_set_enabled() / @c tool_registry_set_preference():
12 * mutators, **superuser-only**.
13 *
14 * @par Security
15 * A CLI tool record names an executable that ProvSQL runs as the PostgreSQL
16 * OS user, so editing a record is equivalent to OS-level trust on the
17 * server account (the same trust as setting @c provsql.tool_search_path or
18 * dropping a binary on it). The mutators therefore refuse non-superusers;
19 * the read-only listing is unrestricted, like @c tool_available.
20 *
21 * @par Lifetime
22 * @par Persistence
23 * The compiled-in defaults live in C (@ref ToolRegistry.h); admin changes are
24 * persisted in the @c provsql.tool_overrides table and overlaid on the seed
25 * by @ref provsql_sync_tool_registry, which every registry-consuming SQL
26 * function calls so changes are seen across sessions and backends. An empty
27 * overrides table is exactly the compiled defaults; the table being absent
28 * (an extension older than 1.8.0) is treated the same way.
29 */
30extern "C" {
31#include "postgres.h"
32#include "fmgr.h"
33#include "funcapi.h"
34#include "miscadmin.h"
35#include "executor/spi.h"
36#include "catalog/pg_type.h"
37#include "utils/array.h"
38#include "utils/builtins.h"
39#if PG_VERSION_NUM >= 160000
40#include "varatt.h"
41#endif
42
43#include "compatibility.h" /* TYPALIGN_INT fallback for PG < 11 */
44
45PG_FUNCTION_INFO_V1(tool_registry_list);
46PG_FUNCTION_INFO_V1(tool_registry_register);
47PG_FUNCTION_INFO_V1(tool_registry_unregister);
48PG_FUNCTION_INFO_V1(tool_registry_set_enabled);
49PG_FUNCTION_INFO_V1(tool_registry_set_preference);
50}
51
52#include "ToolRegistry.h"
53#include "tool_registry_sync.h"
54#include "external_tool.h"
55#include "provsql_error.h"
56
57#include <functional>
58#include <string>
59#include <vector>
60
61namespace {
62
63/// Read a SQL text into a std::string.
64std::string text_to_string(text *t)
65{
66 return std::string(VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
67}
68
69/// Build a text[] Datum from a vector of strings (never NULL elements).
70Datum string_vector_to_text_array(const std::vector<std::string> &v)
71{
72 if (v.empty())
73 return PointerGetDatum(construct_empty_array(TEXTOID));
74
75 std::vector<Datum> elems;
76 elems.reserve(v.size());
77 for (const auto &s : v)
78 elems.push_back(PointerGetDatum(cstring_to_text_with_len(s.data(),
79 s.size())));
80
81 ArrayType *arr = construct_array(elems.data(),
82 static_cast<int>(elems.size()),
83 TEXTOID, -1, false, TYPALIGN_INT);
84 return PointerGetDatum(arr);
85}
86
87/// Decode a (non-NULL) text[] argument into a vector of strings, dropping
88/// NULL elements.
89std::vector<std::string> text_array_to_string_vector(ArrayType *arr)
90{
91 std::vector<std::string> out;
92 Datum *elems;
93 bool *nulls;
94 int n;
95 deconstruct_array(arr, TEXTOID, -1, false, TYPALIGN_INT,
96 &elems, &nulls, &n);
97 for (int i = 0; i < n; ++i) {
98 if (nulls[i])
99 continue;
100 out.push_back(text_to_string(DatumGetTextPP(elems[i])));
101 }
102 return out;
103}
104
105/// Reject non-superusers from a registry mutator.
106void require_superuser(const char *fn)
107{
108 if (!superuser())
109 provsql_error("%s: must be superuser (a tool record can run arbitrary "
110 "commands as the PostgreSQL OS user)", fn);
111}
112
113// ---- provsql.tool_overrides persistence (caller manages SPI_connect) ----
114
115/// Read a text[] column of the current SPI tuple as a vector<string>.
116std::vector<std::string> spi_text_array(HeapTuple t, TupleDesc td, int col)
117{
118 bool isnull;
119 Datum d = SPI_getbinval(t, td, col, &isnull);
120 if (isnull)
121 return {};
122 return text_array_to_string_vector(DatumGetArrayTypeP(d));
123}
124
125/// Read a text column of the current SPI tuple ("" when NULL).
126std::string spi_text(HeapTuple t, TupleDesc td, int col)
127{
128 char *s = SPI_getvalue(t, td, col);
129 return s ? std::string(s) : std::string();
130}
131
132/// True iff provsql.tool_overrides exists (an extension < 1.8.0 lacks it).
133/// to_regclass returns NULL rather than erroring on a missing relation.
134bool overrides_table_exists()
135{
136 if (SPI_execute("SELECT to_regclass('provsql.tool_overrides') IS NOT NULL",
137 true, 1) != SPI_OK_SELECT || SPI_processed != 1)
138 return false;
139 bool isnull;
140 Datum d = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc,
141 1, &isnull);
142 return !isnull && DatumGetBool(d);
143}
144
145/// Upsert a complete record (removed=false) into provsql.tool_overrides.
146void upsert_override(const provsql::ToolRecord &rec)
147{
148 Oid types[13] = {TEXTOID, TEXTOID, TEXTOID, TEXTARRAYOID, TEXTARRAYOID,
149 TEXTOID, TEXTOID, INT4OID, BOOLOID, TEXTARRAYOID,
150 TEXTOID, TEXTOID, TEXTOID};
151 Datum vals[13] = {
152 CStringGetTextDatum(rec.name.c_str()),
153 CStringGetTextDatum(rec.kind.c_str()),
154 CStringGetTextDatum(rec.binary.c_str()),
155 string_vector_to_text_array(rec.operations),
156 string_vector_to_text_array(rec.input_formats),
157 CStringGetTextDatum(rec.output_format.c_str()),
158 CStringGetTextDatum(rec.parser.c_str()),
159 Int32GetDatum(rec.preference),
160 BoolGetDatum(rec.enabled),
161 string_vector_to_text_array(rec.dependencies),
162 CStringGetTextDatum(rec.argtpl.c_str()),
163 CStringGetTextDatum(rec.argtpl_circuit.c_str()),
164 CStringGetTextDatum(rec.endpoint.c_str()),
165 };
166 SPI_execute_with_args(
167 "INSERT INTO provsql.tool_overrides "
168 "(name, removed, kind, executable, operations, input_formats, "
169 " output_format, parser, preference, enabled, dependencies, argtpl, "
170 " argtpl_circuit, endpoint) "
171 "VALUES ($1, false, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) "
172 "ON CONFLICT (name) DO UPDATE SET "
173 " removed=false, kind=$2, executable=$3, operations=$4, "
174 " input_formats=$5, output_format=$6, parser=$7, preference=$8, "
175 " enabled=$9, dependencies=$10, argtpl=$11, argtpl_circuit=$12, "
176 " endpoint=$13",
177 13, types, vals, NULL, false, 0);
178}
179
180/// Tombstone a name (removed=true) so the seeded default, if any, is hidden.
181void tombstone_override(const std::string &name)
182{
183 Oid types[1] = {TEXTOID};
184 Datum vals[1] = {CStringGetTextDatum(name.c_str())};
185 SPI_execute_with_args(
186 "INSERT INTO provsql.tool_overrides (name, removed) VALUES ($1, true) "
187 "ON CONFLICT (name) DO UPDATE SET removed=true, kind=NULL, "
188 " executable=NULL, operations=NULL, input_formats=NULL, "
189 " output_format=NULL, parser=NULL, preference=NULL, enabled=NULL, "
190 " dependencies=NULL, argtpl=NULL, argtpl_circuit=NULL, endpoint=NULL",
191 1, types, vals, NULL, false, 0);
192}
193
194} // namespace
195
196/**
197 * @brief Rebuild the in-memory registry as "compiled seed overlaid with the
198 * provsql.tool_overrides rows". See @ref tool_registry_sync.h.
199 */
201{
203 reg.reset(); // back to the compiled-in defaults
204
205 if (SPI_connect() != SPI_OK_CONNECT)
206 return; // cannot read; the seed stands
207 if (overrides_table_exists()) {
208 if (SPI_execute(
209 "SELECT name, removed, kind, executable, operations, input_formats, "
210 " output_format, parser, preference, enabled, dependencies, argtpl, "
211 " argtpl_circuit, endpoint FROM provsql.tool_overrides", true, 0)
212 == SPI_OK_SELECT) {
213 TupleDesc td = SPI_tuptable->tupdesc;
214 for (uint64 i = 0; i < SPI_processed; ++i) {
215 HeapTuple t = SPI_tuptable->vals[i];
216 std::string name = spi_text(t, td, 1);
217 bool isnull;
218 Datum rd = SPI_getbinval(t, td, 2, &isnull);
219 if (!isnull && DatumGetBool(rd)) { // tombstone
220 reg.remove(name);
221 continue;
222 }
224 rec.name = name;
225 rec.kind = spi_text(t, td, 3);
226 rec.binary = spi_text(t, td, 4);
227 rec.operations = spi_text_array(t, td, 5);
228 rec.input_formats = spi_text_array(t, td, 6);
229 rec.output_format = spi_text(t, td, 7);
230 rec.parser = spi_text(t, td, 8);
231 Datum pd = SPI_getbinval(t, td, 9, &isnull);
232 rec.preference = isnull ? 0 : DatumGetInt32(pd);
233 Datum ed = SPI_getbinval(t, td, 10, &isnull);
234 rec.enabled = isnull ? true : DatumGetBool(ed);
235 rec.dependencies = spi_text_array(t, td, 11);
236 rec.argtpl = spi_text(t, td, 12);
237 rec.argtpl_circuit = spi_text(t, td, 13);
238 rec.endpoint = spi_text(t, td, 14);
239 reg.upsert(rec);
240 }
241 }
242 }
243 SPI_finish();
244}
245
246/**
247 * @brief Set-returning listing of the registry, one row per record.
248 *
249 * Columns: name, kind, binary, operations (text[]), input_formats (text[]),
250 * output_format (text), parser (text), preference (int), enabled (bool),
251 * argtpl (text), argtpl_circuit (text), available (bool). @c operations /
252 * @c input_formats /
253 * @c output_format use the KCMCP registry names; @c parser is the CLI-only
254 * decode tag. @c available is true iff @c binary (when set) and every
255 * dependency resolve via @c find_external_tool, so the view reflects what a
256 * subsequent dispatch would actually find on the backend's PATH.
257 */
258extern "C" Datum
259tool_registry_list(PG_FUNCTION_ARGS)
260{
261 // Reflect any persisted overrides (from this or another backend).
263
264 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
265
266 MemoryContext per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
267 MemoryContext oldcontext = MemoryContextSwitchTo(per_query_ctx);
268
269 TupleDesc tupdesc;
270 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) {
271 MemoryContextSwitchTo(oldcontext);
272 provsql_error("tool_registry_list: function must return a row type");
273 }
274 tupdesc = BlessTupleDesc(tupdesc);
275
276 Tuplestorestate *tupstore = tuplestore_begin_heap(
277 rsinfo->allowedModes & SFRM_Materialize_Random, false, work_mem);
278 rsinfo->returnMode = SFRM_Materialize;
279 rsinfo->setResult = tupstore;
280 rsinfo->setDesc = tupdesc;
281
282 try {
283 for (const provsql::ToolRecord &rec : provsql::tool_registry().records()) {
284 Datum values[13];
285 bool nulls[13] = {false, false, false, false, false, false, false,
286 false, false, false, false, false, false};
287
288 values[0] = PointerGetDatum(cstring_to_text_with_len(rec.name.data(),
289 rec.name.size()));
290 values[1] = PointerGetDatum(cstring_to_text_with_len(rec.kind.data(),
291 rec.kind.size()));
292 values[2] = PointerGetDatum(cstring_to_text_with_len(rec.binary.data(),
293 rec.binary.size()));
294 values[3] = string_vector_to_text_array(rec.operations);
295 values[4] = string_vector_to_text_array(rec.input_formats);
296 values[5] = PointerGetDatum(cstring_to_text_with_len(
297 rec.output_format.data(), rec.output_format.size()));
298 values[6] = PointerGetDatum(cstring_to_text_with_len(rec.parser.data(),
299 rec.parser.size()));
300 values[7] = Int32GetDatum(rec.preference);
301 values[8] = BoolGetDatum(rec.enabled);
302 values[9] = PointerGetDatum(cstring_to_text_with_len(rec.argtpl.data(),
303 rec.argtpl.size()));
304 values[10] = PointerGetDatum(cstring_to_text_with_len(
305 rec.argtpl_circuit.data(), rec.argtpl_circuit.size()));
306 values[11] = PointerGetDatum(cstring_to_text_with_len(
307 rec.endpoint.data(), rec.endpoint.size()));
308 values[12] = BoolGetDatum(toolAvailable(rec));
309
310 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
311 }
312 } catch (const std::exception &e) {
313 MemoryContextSwitchTo(oldcontext);
314 provsql_error("tool_registry_list: %s", e.what());
315 } catch (...) {
316 MemoryContextSwitchTo(oldcontext);
317 provsql_error("tool_registry_list: unknown exception");
318 }
319
320 MemoryContextSwitchTo(oldcontext);
321 PG_RETURN_NULL();
322}
323
324/**
325 * @brief Register a tool, or replace the record with the same name.
326 *
327 * Args (in order): name text, executable text, kind text, operations text[],
328 * input_formats text[], output_format text, parser text, argtpl text,
329 * argtpl_circuit text, preference int, enabled bool, endpoint text. A NULL
330 * @c executable defaults to @c name; a NULL @c kind defaults to @c 'cli';
331 * NULL arrays are empty; NULL text fields default to empty; NULL
332 * @c preference is 0 and NULL @c enabled is true; @c endpoint is the KCMCP
333 * server address for a @c 'kcmcp' record. Superuser-only.
334 */
335extern "C" Datum
336tool_registry_register(PG_FUNCTION_ARGS)
337{
338 require_superuser("register_tool");
339
340 if (PG_ARGISNULL(0))
341 provsql_error("register_tool: name must not be NULL");
342
343 try {
345 rec.name = text_to_string(PG_GETARG_TEXT_PP(0));
346 rec.binary = PG_ARGISNULL(1) ? rec.name
347 : text_to_string(PG_GETARG_TEXT_PP(1));
348 rec.kind = PG_ARGISNULL(2) ? std::string("cli")
349 : text_to_string(PG_GETARG_TEXT_PP(2));
350 if (!PG_ARGISNULL(3))
351 rec.operations = text_array_to_string_vector(PG_GETARG_ARRAYTYPE_P(3));
352 if (!PG_ARGISNULL(4))
353 rec.input_formats = text_array_to_string_vector(PG_GETARG_ARRAYTYPE_P(4));
354 if (!PG_ARGISNULL(5))
355 rec.output_format = text_to_string(PG_GETARG_TEXT_PP(5));
356 if (!PG_ARGISNULL(6))
357 rec.parser = text_to_string(PG_GETARG_TEXT_PP(6));
358 if (!PG_ARGISNULL(7))
359 rec.argtpl = text_to_string(PG_GETARG_TEXT_PP(7));
360 if (!PG_ARGISNULL(8))
361 rec.argtpl_circuit = text_to_string(PG_GETARG_TEXT_PP(8));
362 rec.preference = PG_ARGISNULL(9) ? 0 : PG_GETARG_INT32(9);
363 rec.enabled = PG_ARGISNULL(10) ? true : PG_GETARG_BOOL(10);
364 if (!PG_ARGISNULL(11))
365 rec.endpoint = text_to_string(PG_GETARG_TEXT_PP(11));
366
367 if (rec.name.empty())
368 provsql_error("register_tool: name must not be empty");
369
370 // Persist the full record (create or replace) in the overrides table.
371 if (SPI_connect() != SPI_OK_CONNECT)
372 provsql_error("register_tool: SPI_connect failed");
373 upsert_override(rec);
374 SPI_finish();
375 } catch (const std::exception &e) {
376 provsql_error("register_tool: %s", e.what());
377 }
378
379 PG_RETURN_VOID();
380}
381
382/**
383 * @brief Remove a tool record. Errors if no tool of that name is currently
384 * effective, so a typo fails loudly rather than silently doing nothing.
385 * A removed seeded default is recorded as a tombstone; the change persists.
386 */
387extern "C" Datum
389{
390 require_superuser("unregister_tool");
391 std::string name = text_to_string(PG_GETARG_TEXT_PP(0));
392
394 if (provsql::tool_registry().find(name) == nullptr)
395 provsql_error("unregister_tool: no tool named '%s' is registered",
396 name.c_str());
397
398 if (SPI_connect() != SPI_OK_CONNECT)
399 provsql_error("unregister_tool: SPI_connect failed");
400 tombstone_override(name);
401 SPI_finish();
402 PG_RETURN_VOID();
403}
404
405/// Persist a single-field change to an existing tool: load the effective
406/// record, apply @p mutate, and write the full record back. Errors on an
407/// unknown tool name.
408static void persist_tool_change(const char *fn, const std::string &name,
409 const std::function<void(provsql::ToolRecord&)> &mutate)
410{
413 if (cur == nullptr)
414 provsql_error("%s: no tool named '%s' is registered", fn, name.c_str());
415 provsql::ToolRecord rec = *cur;
416 mutate(rec);
417 if (SPI_connect() != SPI_OK_CONNECT)
418 provsql_error("%s: SPI_connect failed", fn);
419 upsert_override(rec);
420 SPI_finish();
421}
422
423/** @brief Enable or disable a tool. Errors on an unknown tool name. */
424extern "C" Datum
426{
427 require_superuser("set_tool_enabled");
428 std::string name = text_to_string(PG_GETARG_TEXT_PP(0));
429 bool enabled = PG_GETARG_BOOL(1);
430 persist_tool_change("set_tool_enabled", name,
431 [enabled](provsql::ToolRecord &r) { r.enabled = enabled; });
432 PG_RETURN_VOID();
433}
434
435/** @brief Set a tool's preference. Errors on an unknown tool name. */
436extern "C" Datum
438{
439 require_superuser("set_tool_preference");
440 std::string name = text_to_string(PG_GETARG_TEXT_PP(0));
441 int preference = PG_GETARG_INT32(1);
442 persist_tool_change("set_tool_preference", name,
443 [preference](provsql::ToolRecord &r) { r.preference = preference; });
444 PG_RETURN_VOID();
445}
In-memory catalog of the external tools ProvSQL can invoke.
The process-local registry singleton.
void upsert(const ToolRecord &rec)
Register a new tool or replace the record with the same name.
void reset()
Discard all records and re-seed the compiled-in defaults.
bool remove(const std::string &name)
Remove the record named name; returns false if none existed.
const ToolRecord * find(const std::string &name) const
Find a record by logical name, or nullptr if none is registered.
PostgreSQL cross-version compatibility shims for ProvSQL.
#define TYPALIGN_INT
int alignment code for the array routines (construct_array / deconstruct_array).
bool toolAvailable(const provsql::ToolRecord &rec)
True iff a registry tool can currently be used.
Helpers for invoking external command-line tools.
ToolRegistry & tool_registry()
Shorthand for ToolRegistry::instance().
Uniform error-reporting macros for ProvSQL.
#define provsql_error(fmt,...)
Report a fatal ProvSQL error and abort the current transaction.
One registered external tool.
std::string output_format
std::vector< std::string > dependencies
std::string argtpl_circuit
std::string kind
"cli" (spawn a binary) or "kcmcp" (talk to a socket server at endpoint).
std::string endpoint
KCMCP server address for kind "kcmcp": "unix:/path" or "host:port".
std::vector< std::string > input_formats
std::vector< std::string > operations
static void persist_tool_change(const char *fn, const std::string &name, const std::function< void(provsql::ToolRecord &)> &mutate)
Persist a single-field change to an existing tool: load the effective record, apply mutate,...
Datum tool_registry_set_enabled(PG_FUNCTION_ARGS)
Enable or disable a tool.
Datum tool_registry_list(PG_FUNCTION_ARGS)
Set-returning listing of the registry, one row per record.
Datum tool_registry_unregister(PG_FUNCTION_ARGS)
Remove a tool record.
void provsql_sync_tool_registry()
Rebuild the in-memory registry as "compiled seed overlaid with the provsql.tool_overrides rows"...
Datum tool_registry_set_preference(PG_FUNCTION_ARGS)
Set a tool's preference.
Datum tool_registry_register(PG_FUNCTION_ARGS)
Register a tool, or replace the record with the same name.
Reload the in-memory external-tool registry from its persistent overrides.