doxygen-c/html/subset_8cpp_source.html

/**

 * @file subset.cpp

 * @brief Valid-world enumeration for aggregate HAVING predicates.

 *

 * Implements @c enumerate_valid_worlds() declared in @c subset.hpp.

 *

 * For a list of @f$n@f$ tuples with individual values, the function

 * iterates over all @f$2^n@f$ possible worlds (bitmasks), computes the

 * aggregate of the present tuples' values using the requested

 * @c AggregationOperator, and tests the comparison predicate.  All

 * valid worlds are collected and returned.

 *

 * The @c upset output flag is set to @c true when the set of valid

 * worlds is upward-closed (every superset of a valid world is also

 * valid), which is the case for monotone aggregation predicates (e.g.

 * SUM ≥ k).  This information is used to optimise the evaluation of

 * monotone HAVING clauses.

 *

 * Internal helpers in an anonymous namespace:

 * - @c increment(): advance a bitmask to the next possible world.

 * - @c compute_agg(): compute the aggregate value for one bitmask.

 */

#include "subset.hpp"

#include <algorithm>

#include <cstddef>

#include <cstdint>

#include <limits>

#include <vector>

#include <stdexcept>

#include <cassert>


namespace {

static bool increment(mask_t &v)

{

  for(size_t i=0; i<v.size(); ++i)

  {

    v[i]=!v[i];

    if(v[i])

      return true;

  }

  return false;

}


static std::vector<mask_t> all_worlds(const std::vector<long> &values)

{

  std::vector<mask_t> worlds;

  mask_t mask(values.size());

  // Skip empty world

  while(increment(mask))

    worlds.push_back(mask);

  return worlds;

}


static void append_range(std::vector<mask_t> &out,

                         const std::vector<std::vector<mask_t> > &dp,

                         int lo,

                         int hi)

{

  if (dp.empty()) return;

  const int J = static_cast<int>(dp.size())-1;

  lo = std::max(lo, 0);

  hi = std::min(hi, J);

  if (lo>hi) return;


  for (int j = lo; j <= hi; ++j) {

    out.insert(out.end(), dp[j].begin(), dp[j].end());

  }

}


class DPException : public std::exception {};


/** @brief Return the minimum of two values. */

#define MIN(x,y) ((x)<(y)?(x):(y))


static std::vector<mask_t> sum_dp(const std::vector<long> &values, int C, ComparisonOperator op, bool absorptive, bool &upset)

{

  const std::size_t n = values.size();


  std::vector<mask_t> R;


  // We first deal with NEQ by combining LT and GT

  if(op == ComparisonOperator::NE) {

    std::vector<mask_t> lt= sum_dp(values, C, ComparisonOperator::LT, absorptive, upset);

    std::vector<mask_t> gt= sum_dp(values, C, ComparisonOperator::GT, absorptive, upset);

    R.reserve(lt.size()+gt.size());

    R.insert(R.end(),lt.begin(),lt.end());

    R.insert(R.end(),gt.begin(),gt.end());

    return R;

  }


  long long T=0;

  for (int w: values) {

    if (w < 0)

      throw DPException();

    T+=w;

  }


  //no valid worlds case

  if (op == ComparisonOperator::GT && C>=T) return {};

  if (op == ComparisonOperator::GE && C>T) return {};

  if (op == ComparisonOperator::LT && C<=0) return {};

  if (op == ComparisonOperator::LE && C<0) return {};

  if (op == ComparisonOperator::EQ && (C>T || C<0)) return {};


  //tautology cases

  if (op == ComparisonOperator::GT && C<0) return all_worlds(values);

  if (op == ComparisonOperator::GE && C<=0) return all_worlds(values);

  if (op == ComparisonOperator::LT && C>T) return all_worlds(values);

  if (op == ComparisonOperator::LE && C>=T) return all_worlds(values);


  long long J=0;

  if (op == ComparisonOperator::GT || op == ComparisonOperator::GE)

    J=T;

  else if (op==ComparisonOperator::LT)

    J=MIN(C-1,T);

  else

    J=MIN(C,T);


  assert(J>=0);


  std::vector<std::vector<mask_t> > dp(static_cast<std::size_t>(J) + 1);

  dp[0].push_back(mask_t(n)); // dp[0] <- {emptyset}


  int pref_sum=0;


  for (std::size_t i=0; i<n; ++i)

  {

    const int w=values[i];

    pref_sum+=w;

    const int j_max=MIN(J,pref_sum);


    for (int j = j_max; j >= w; --j) {

      const int p = j - w;

      if(absorptive && ((op==ComparisonOperator::GT && p>C) ||

                        (op==ComparisonOperator::GE && p>=C))) {

        upset=true;

        continue;

      }

      size_t s=dp[p].size();

      for(size_t k=0; k<s; ++k) {

        mask_t m = dp[p][k];

        m[i] = true;

        dp[j].push_back(m);

      }

    }

  }


  switch(op){

  case ComparisonOperator::EQ:

    append_range(R,dp,C,C);

    break;


  case ComparisonOperator::GT:

    append_range(R,dp,C+1,J);

    break;


  case ComparisonOperator::LT:

    append_range(R,dp,1,C-1);

    break;


  case ComparisonOperator::GE:

    append_range(R,dp,C,J);

    break;


  case ComparisonOperator::LE:

    append_range(R,dp,1,C);

    break;


  case ComparisonOperator::NE: // case already processed

    assert(false);

  }


  return R;

}


//generate k-subsets form an n-set

static void combinations(std::size_t start,

                         int k_left,

                         mask_t mask,

                         std::vector<mask_t> &out)

{

  const size_t n = mask.size();


  if (k_left == 0) {

    out.push_back(mask);

    return;

  }


  if (start >= n) return;


  const std::size_t remaining = n - start;

  if (remaining < static_cast<std::size_t>(k_left)) return;


  combinations(start + 1, k_left, mask, out);


  mask[start]=true;

  combinations(start + 1, k_left - 1, mask, out);

}


static std::vector<mask_t> count_enum(const std::vector<long> &values, int m, ComparisonOperator op, bool absorptive, bool &upset)

{

  const int n = static_cast<int>(values.size());

  std::vector<mask_t> out;


  auto add_exact_k = [&](int k) {

                       if (k < 0 || k > n) return;

                       combinations(0, k, mask_t(n), out);

                     };


  switch (op)

  {

  case ComparisonOperator::EQ:

    if(m!=0) add_exact_k(m);

    break;


  case ComparisonOperator::GT:

    ++m;

    [[fallthrough]];

  case ComparisonOperator::GE:

    /* Skip the empty subset, mirroring the // Skip empty world rule

     * in @c all_worlds and @c enumerate_exhaustive: a HAVING

     * predicate on an empty group is undefined in SQL semantics

     * (the group does not exist, so HAVING is not evaluated), so

     * the @c count >= 0 / @c count > -K family must collapse to

     * "group is non-empty" rather than to a universal tautology

     * (probability 1 in every world).  Equivalently, m is clamped

     * to at least 1 here. */

    if (m < 1) m = 1;

    if(absorptive) {

      upset=true;

      add_exact_k(m);

    } else

      for (int k = m; k <= n; ++k) add_exact_k(k);

    break;


  case ComparisonOperator::LT:

    --m;

    [[fallthrough]];

  case ComparisonOperator::LE:

    for (int k = 1; k <= m; ++k) add_exact_k(k);

    break;


  case ComparisonOperator::NE:

    for (int k = 1; k <= n; ++k)

    {

      if (k != m) add_exact_k(k);

    }

    break;

  }


  return out;

}


}


/**

 * @brief Apply a comparison operator to two values.

 * @tparam I  Type of the left operand.

 * @tparam J  Type of the right operand.

 * @param a   Left operand.

 * @param op  Comparison operator.

 * @param b   Right operand.

 * @return    Result of the comparison.

 */

template<typename I, typename J>


static bool compare(I a, ComparisonOperator op, J b) {

  switch (op) {

  case ComparisonOperator::EQ:  return a == b;

  case ComparisonOperator::NE:  return a != b;

  case ComparisonOperator::GT:  return a >  b;

  case ComparisonOperator::LT:  return a <  b;

  case ComparisonOperator::GE:  return a >= b;

  case ComparisonOperator::LE:  return a <= b;

  }

  return false;

}


/**

 * @brief Evaluate whether the aggregation of @p values masked by @p mask satisfies @p op @p constant.

 * @param values      Input values to aggregate.

 * @param mask        Boolean mask selecting which values to include.

 * @param constant    Right-hand side constant of the comparison.

 * @param op          Comparison operator.

 * @param aggregator  Aggregator to apply to the selected values.

 * @return            @c true if the aggregate result satisfies the comparison.

 */


bool evaluate(const std::vector<long>& values,

              const std::vector<bool>& mask,

              int constant, ComparisonOperator op,

              std::unique_ptr<Aggregator> aggregator)

{

  for (std::size_t i = 0; i < values.size(); ++i) {

    if (mask[i]) aggregator->add(AggValue {values[i]});

  }

  auto res = aggregator->finalize();

  switch(aggregator->resultType()) {

  case ValueType::INT:

    return compare(std::get<long>(res.v), op, constant);

  case ValueType::BOOLEAN:

    return compare(std::get<bool>(res.v), op, constant);

  case ValueType::FLOAT:

    return compare(std::get<double>(res.v), op, constant);

  default:

    throw std::runtime_error("Cannot compare this kind of value");

  }

}


/**

 * @brief Enumerate all subsets satisfying a HAVING predicate by exhaustive search.

 * @param values      Input values.

 * @param constant    Constant for the comparison.

 * @param op          Comparison operator.

 * @param agg_kind    Aggregation function to apply.

 * @param absorptive  Whether the semiring is absorptive.

 * @param upset       Set to @c true if the result set forms an upset (monotone).

 * @return            Vector of satisfying subset masks.

 */


std::vector<mask_t> enumerate_exhaustive(

  const std::vector<long> &values,

  int constant,

  ComparisonOperator op,

  AggregationOperator agg_kind,

  bool absorptive,

  bool &upset)

{

  const size_t n = values.size();


  std::vector<mask_t> worlds;

  mask_t mask(n);


  bool all_worlds = true;


  while(increment(mask)) { // Skipping empty world

    auto aggregator = makeAggregator(agg_kind, ValueType::INT);


    if(evaluate(values, mask, constant, op, std::move(aggregator)))

      worlds.push_back(mask);

    else

      all_worlds=false;

  }


  if(all_worlds && absorptive)

  {

    worlds.clear();


    // In that case, the result is equivalent to the upset generated by

    // the single-tuple possible worlds

    combinations(0, 1, mask_t(n), worlds);

    upset=true;

  }


  return worlds;

}


std::vector<mask_t> enumerate_valid_worlds(

  const std::vector<long> &values,

  int constant,

  ComparisonOperator op,

  AggregationOperator agg_kind,

  bool absorptive,

  bool &upset

  )

{

  if (agg_kind == AggregationOperator::COUNT)

    return count_enum(values,constant,op, absorptive, upset);


  if(agg_kind == AggregationOperator::SUM)

    try {

      return sum_dp(values, constant, op, absorptive, upset);

    } catch(DPException &e) {

      // We will use the default implementation of the enumeration

    }


  return enumerate_exhaustive(values, constant, op, agg_kind, absorptive, upset);

}


makeAggregator
std::unique_ptr< Aggregator > makeAggregator(AggregationOperator op, ValueType t)
Create a concrete Aggregator for the given operator and value type.
Definition Aggregation.cpp:316

AggregationOperator
AggregationOperator
SQL aggregation functions tracked by ProvSQL.
Definition Aggregation.h:50

AggregationOperator::COUNT
@ COUNT
COUNT(*) or COUNT(expr) → integer.
Definition Aggregation.h:51

AggregationOperator::SUM
@ SUM
SUM → integer or float.
Definition Aggregation.h:52

ComparisonOperator
ComparisonOperator
SQL comparison operators used in gate_cmp circuit gates.
Definition Aggregation.h:38

ComparisonOperator::EQ
@ EQ
Equal (=).
Definition Aggregation.h:39

ComparisonOperator::LT
@ LT
Less than (<).
Definition Aggregation.h:42

ComparisonOperator::GT
@ GT
Greater than (>).
Definition Aggregation.h:44

ComparisonOperator::LE
@ LE
Less than or equal (<=).
Definition Aggregation.h:41

ComparisonOperator::NE
@ NE
Not equal (<>).
Definition Aggregation.h:40

ComparisonOperator::GE
@ GE
Greater than or equal (>=).
Definition Aggregation.h:43

ValueType::INT
@ INT
Signed 64-bit integer.
Definition Aggregation.h:67

ValueType::BOOLEAN
@ BOOLEAN
Boolean.
Definition Aggregation.h:69

ValueType::FLOAT
@ FLOAT
Double-precision float.
Definition Aggregation.h:68

AggValue
A dynamically-typed aggregate value.
Definition Aggregation.h:85

evaluate
bool evaluate(const std::vector< long > &values, const std::vector< bool > &mask, int constant, ComparisonOperator op, std::unique_ptr< Aggregator > aggregator)
Evaluate whether the aggregation of values masked by mask satisfies op constant.
Definition subset.cpp:288

enumerate_exhaustive
std::vector< mask_t > enumerate_exhaustive(const std::vector< long > &values, int constant, ComparisonOperator op, AggregationOperator agg_kind, bool absorptive, bool &upset)
Enumerate all subsets satisfying a HAVING predicate by exhaustive search.
Definition subset.cpp:319

MIN
#define MIN(x, y)
Return the minimum of two values.
Definition subset.cpp:73

enumerate_valid_worlds
std::vector< mask_t > enumerate_valid_worlds(const std::vector< long > &values, int constant, ComparisonOperator op, AggregationOperator agg_kind, bool absorptive, bool &upset)
Enumerate all subsets of n tuples satisfying an aggregate predicate.
Definition subset.cpp:356

compare
static bool compare(I a, ComparisonOperator op, J b)
Apply a comparison operator to two values.
Definition subset.cpp:267

subset.hpp
Enumerate tuple subsets satisfying an aggregate HAVING predicate.

mask_t
std::vector< bool > mask_t
A bitmask over  tuples representing one possible world.
Definition subset.hpp:28