Source code for snsql.sql.privacy

from typing import List
from ._mechanisms import *
from enum import Enum
import numpy as np

class Stat(Enum):
    count = 1
    sum_int = 2
    sum_large_int = 3
    sum_float = 4
    sum_large_float = 5
    threshold = 6

class Mechanisms:
    def __init__(self):
        self.classes = {
            Mechanism.laplace: Laplace,
            Mechanism.geometric: DiscreteLaplace,
            Mechanism.discrete_laplace: DiscreteLaplace,
            Mechanism.discrete_gaussian: DiscreteGaussian
        }
        self.large = 1000
        self.map = {
            Stat.count: Mechanism.discrete_laplace,
            Stat.sum_int: Mechanism.discrete_laplace,
            Stat.sum_large_int: Mechanism.discrete_laplace,
            Stat.sum_float: Mechanism.laplace,
            Stat.threshold: Mechanism.discrete_laplace
        }
    def _get_stat(self, stat: str, t: str):
        if stat == 'threshold':
            return Stat.threshold
        elif stat == 'count':
            return Stat.count
        elif stat == 'sum' and t in ['float', 'int']:
            return Stat.sum_int if t == 'int' else Stat.sum_float
        else:
            return None
    def get_mechanism(self, sensitivity, stat: str, t: str):
        if sensitivity is np.inf:
            return Unbounded
        stat = self._get_stat(stat, t)
        if stat is None:
            return None
        if stat is Stat.sum_int:
            if sensitivity > self.large and Stat.sum_large_int in self.map:
                stat = Stat.sum_large_int
        elif stat is Stat.sum_float:
            if sensitivity > self.large and Stat.sum_large_float in self.map:
                stat = Stat.sum_large_float
        if stat not in self.map:
            raise ValueError(f"Unable to determine which mechanism to use for {stat}")
        mech = self.map[stat]
        return self.classes[mech]
    @property
    def safe(self):
        return [Mechanism.geometric]

[docs]class Privacy: """Privacy parameters. The Privacy object is passed in when creating any private SQL connection, and applies to all queries executed against that connection. :param epsilon: The epsilon value for each statistic returned by the private SQL connection. :param delta: The delta value for each query processed by the private SQL connection. Most counts and sums will use delta of 0, but dimension censoring and Gaussian mechanism require delta. Set delta to something like 1/n*sqrt(n), where n is the approximate number of rows in the data source. :param alphas: A list of floats representing desired accuracy bounds. Only set this parameter if you plan to use execute_with_accuracy for row-based accuracy. For simple column accuracy bounds, you can pass an alpha directly to get_simple_accuracy, which ignores these alphas. :param mechanisms: A property bag specifying which mechanisms to use for which types of statistics. You will only set this parameter if you want to override default mechanism mapping. """ def __init__(self, *ignore, epsilon:float=1.0, delta:float=10E-16, alphas:List[float]=[], mechanisms:Mechanisms=None): """Privacy params. """ self.epsilon = epsilon self.delta = delta self.alphas = alphas self.mechanisms = mechanisms if mechanisms else Mechanisms()