ProvSQL C/C++ API
Adding support for provenance and uncertainty management to PostgreSQL databases
Loading...
Searching...
No Matches
to_prov.cpp
Go to the documentation of this file.
1/**
2 * @file to_prov.cpp
3 * @brief SQL function @c provsql.to_provxml() – XML/JSON circuit export.
4 *
5 * Implements @c provsql.to_provxml(), which serialises the provenance
6 * circuit rooted at a given UUID token to an XML string. The XML
7 * representation can be used for export, interchange, or display.
8 *
9 * The implementation retrieves the circuit from the mmap store via SPI,
10 * traverses it depth-first, and builds the XML document using string
11 * operations. Special XML characters in node labels are escaped by the
12 * local @c xmlEscape() helper.
13 */
14extern "C"
15{
16#include "postgres.h"
17#include "fmgr.h"
18#include "catalog/pg_type.h"
19#include "utils/uuid.h"
20#include "provsql_shmem.h"
21#include "provsql_utils.h"
22
23PG_FUNCTION_INFO_V1(to_provxml);
24}
25
26#include <set>
27#include <csignal>
28#include <utility>
29#include <sstream>
30#include <list>
31#include <algorithm>
32
34
35using namespace std;
36
37/**
38 * @brief Escape special XML characters in @p input.
39 * @param input String to escape.
40 * @return Escaped string suitable for embedding in XML.
41 */
42static string xmlEscape(const std::string& input) {
43 string output;
44 output.reserve(input.size());
45 for (char c : input) {
46 switch (c) {
47 case '&': output += "&amp;"; break;
48 case '<': output += "&lt;"; break;
49 default: output += c; break;
50 }
51 }
52 return output;
53}
54
55/* From provenance_evaluate_compiled.cpp */
56extern const char *drop_temp_table;
57bool join_with_temp_uuids(Oid table, const std::vector<std::string> &uuids);
58
59/**
60 * @brief Build an XML provenance representation for a circuit token.
61 * @param tokenDatum Datum containing the root provenance gate UUID.
62 * @param table Datum with the OID of the provenance mapping table.
63 * @return XML string describing the provenance circuit.
64 */
65static string to_provxml_internal(Datum tokenDatum, Datum table)
66{
67 pg_uuid_t token = *DatumGetUUIDP(tokenDatum);
68
69 stringstream ss;
70
71 ss << "<?xml version='1.0' encoding='UTF-8'?>\n";
72 ss << "<prov:document xmlns:prov='http://www.w3.org/ns/prov#' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:provsql='https://provsql.org/'>\n";
73
75 auto root = c.getGate(uuid2string(token));
76
77 std::unordered_map<gate_t, std::string> provenance_mapping;
78 if(table) {
79 auto inputs = c.getInputs();
80 std::vector<std::string> inputs_uuid;
81 std::transform(inputs.begin(), inputs.end(), std::back_inserter(inputs_uuid), [&c](auto x) {
82 return c.getUUID(x);
83 });
84 bool drop_table = join_with_temp_uuids(table, inputs_uuid);
85 constants_t constants = get_constants(true);
86 initialize_provenance_mapping<std::string>(constants, c, provenance_mapping, [](const char *v) {
87 return std::string(v);
88 }, drop_table);
89 }
90
91 std::list<gate_t> to_process { root };
92 std::set<gate_t> seen;
93
94 while(!to_process.empty()) {
95 auto g = to_process.front();
96 to_process.pop_front();
97 auto uuid = c.getUUID(g);
98 gate_type type = c.getGateType(g);
99
100 ss << " <prov:entity prov:id='provsql:" + uuid + "'>\n";
101 ss << " <prov:type xsi:type='xsd:QName'>provsql:" + std::string(gate_type_name[type]) + "</prov:type>\n";
102 if (type == gate_input && table && provenance_mapping.find(g)!=provenance_mapping.end()) {
103 ss << " <prov:value>" + xmlEscape(provenance_mapping[g]) + "</prov:value>\n";
104 }
105 ss << " </prov:entity>\n";
106
107 bool first=true;
108 for(auto h: c.getWires(g)) {
109 ss << " <prov:wasDerivedFrom>\n";
110 ss << " <prov:generatedEntity prov:ref='provsql:" + uuid + "' />\n";
111 ss << " <prov:usedEntity prov:ref='provsql:" + c.getUUID(h) + "' />\n";
112 if(type == gate_monus) {
113 if(first)
114 ss << " <prov:label>left</prov:label>\n";
115 else
116 ss << " <prov:label>right</prov:label>\n";
117 }
118
119 ss << " </prov:wasDerivedFrom>\n";
120
121 if(seen.find(h) == seen.end()) {
122 to_process.push_back(h);
123 seen.insert(h);
124 }
125
126 first=false;
127 }
128 }
129
130 ss << "</prov:document>\n";
131
132 return ss.str();
133}
134
135/** @brief PostgreSQL-callable wrapper for to_provxml(). */
136Datum to_provxml(PG_FUNCTION_ARGS)
137{
138 try
139 {
140 Datum token = PG_GETARG_DATUM(0);
141 Datum table = PG_GETARG_DATUM(1);
142
143 std::string s = to_provxml_internal(token, table);
144
145 text *result = (text *) palloc(VARHDRSZ + s.size() + 1);
146 SET_VARSIZE(result, VARHDRSZ + s.size());
147
148 memcpy((void *) VARDATA(result),
149 s.c_str(),
150 s.size());
151 PG_RETURN_TEXT_P(result);
152 } catch(const exception &e) {
153 provsql_error("to_provxml: %s", e.what());
154 }
155 catch (...)
156 {
157 provsql_error("to_provxml: Unknown exception");
158 }
159
160 PG_RETURN_NULL();
161}
GenericCircuit getGenericCircuit(pg_uuid_t token)
Build a GenericCircuit from the mmap store rooted at token.
std::vector< gate_t > & getWires(gate_t g)
Return a mutable reference to the child-wire list of gate g.
Definition Circuit.h:139
gateType getGateType(gate_t g) const
Return the type of gate g.
Definition Circuit.h:129
uuid getUUID(gate_t g) const
Return the UUID string associated with gate g.
Definition Circuit.hpp:46
gate_t getGate(const uuid &u)
Return (or create) the gate associated with UUID u.
Definition Circuit.hpp:33
In-memory provenance circuit with semiring-generic evaluation.
const std::set< gate_t > & getInputs() const
Return the set of input (leaf) gates.
Template helper for populating provenance mappings from SPI results.
#define provsql_error(fmt,...)
Report a fatal ProvSQL error and abort the current transaction.
Shared-memory segment and inter-process pipe management.
const char * gate_type_name[]
Names of gate types.
constants_t get_constants(bool failure_if_not_possible)
Retrieve the cached OID constants for the current database.
Core types, constants, and utilities shared across ProvSQL.
gate_type
Possible gate type in the provenance circuit.
@ gate_monus
M-Semiring monus.
@ gate_input
Input (variable) gate of the circuit.
string uuid2string(pg_uuid_t uuid)
Format a pg_uuid_t as a std::string.
Structure to store the value of various constants.
UUID structure.
const char * drop_temp_table
DROP TABLE statement for the per-query temporary provenance mapping table.
bool join_with_temp_uuids(Oid table, const std::vector< std::string > &uuids)
Join a provenance mapping table with a set of UUIDs using SPI.
Datum to_provxml(PG_FUNCTION_ARGS)
PostgreSQL-callable wrapper for to_provxml().
Definition to_prov.cpp:136
static string to_provxml_internal(Datum tokenDatum, Datum table)
Build an XML provenance representation for a circuit token.
Definition to_prov.cpp:65
static string xmlEscape(const std::string &input)
Escape special XML characters in input.
Definition to_prov.cpp:42