Source code for snsynth.transform.minmax

from snsynth.transform.definitions import ColumnType
from .base import CachingColumnTransformer
from snsql.sql._mechanisms.approx_bounds import approx_bounds
from snsql.sql.privacy import Privacy
import numpy as np

[docs]class MinMaxTransformer(CachingColumnTransformer): """Transforms a column of values to scale between -1.0 and +1.0. :param lower: The minimum value to scale to. :param upper: The maximum value to scale to. :param negative: If True, scale between -1.0 and 1.0. Otherwise, scale between 0.0 and 1.0. :param epsilon: The privacy budget to use to infer bounds, if none provided. :param nullable: If null values are expected, a second output will be generated indicating null. :param odometer: The optional odometer to use to track privacy budget. """ def __init__(self, *, lower=None, upper=None, negative=True, epsilon=0.0, nullable=False, odometer=None): self.lower = lower self.upper = upper self.epsilon = epsilon self.negative = negative self.budget_spent = [] self.nullable = nullable self.odometer = odometer super().__init__() @property def output_type(self): return ColumnType.CONTINUOUS @property def needs_epsilon(self): return self.lower is None or self.upper is None @property def cardinality(self): if self.nullable: return [None, 2] else: return [None] def allocate_privacy_budget(self, epsilon, odometer): self.epsilon = epsilon self.odometer = odometer def _fit_finish(self): if self.epsilon is not None and self.epsilon > 0.0 and (self.lower is None or self.upper is None): self._fit_vals = [v for v in self._fit_vals if v is not None and not (isinstance(v, float) and np.isnan(v))] if self.odometer is not None: self.odometer.spend(Privacy(epsilon=self.epsilon, delta=0.0)) self.fit_lower, self.fit_upper = approx_bounds(self._fit_vals, self.epsilon) self.budget_spent.append(self.epsilon) if self.fit_lower is None or self.fit_upper is None: raise ValueError("MinMaxTransformer could not find bounds.") elif self.lower is None or self.upper is None: raise ValueError("MinMaxTransformer requires either epsilon or min and max.") else: self.fit_lower = self.lower self.fit_upper = self.upper self._fit_complete = True if self.nullable: self.output_width = 2 else: self.output_width = 1 def _clear_fit(self): self._reset_fit() self.fit_lower = None self.fit_upper = None # if bounds provided, we can immediately use without fitting if self.lower and self.upper: self._fit_complete = True if self.nullable: self.output_width = 2 else: self.output_width = 1 self.fit_lower = self.lower self.fit_upper = self.upper def _transform(self, val): if not self.fit_complete: raise ValueError("MinMaxTransformer has not been fit yet.") if self.nullable and (val is None or isinstance(val, float) and np.isnan(val)): return (0.0, 1) else: val = self.fit_lower if val < self.fit_lower else val val = self.fit_upper if val > self.fit_upper else val val = (val - self.fit_lower) / (self.fit_upper - self.fit_lower) if self.negative: val = (val * 2) - 1 if self.nullable: return (val, 0) else: return val def _inverse_transform(self, val): if not self.fit_complete: raise ValueError("MinMaxTransformer has not been fit yet.") if self.nullable: v, n = val val = v if n == 1: return None if self.negative: val = (1 + val) / 2 val = val * (self.fit_upper - self.fit_lower) + self.fit_lower return np.clip(val, self.fit_lower, self.fit_upper)