import numpy class PercentRankTransform(object): """ Transform arbitrary values into percent ranks. """ def __init__(self, n_bins=1e5): self.n_bins = int(n_bins) self.cdf = None self.bin_edges = None def fit(self, values): """ Fit the transform using the given values, which are used to establish percentiles. """ assert self.cdf is None assert self.bin_edges is None assert len(values) > 0 (hist, self.bin_edges) = numpy.histogram(values, bins=self.n_bins) self.cdf = numpy.ones(len(hist) + 2) * numpy.nan self.cdf[0] = 0.0 self.cdf[-1] = 100.0 numpy.cumsum(hist / numpy.sum(hist) * 100.0, out=self.cdf[1:-1]) assert not numpy.isnan(self.cdf).any() def transform(self, values): """ Return percent ranks (range [0, 100]) for the given values. """ assert self.cdf is not None assert self.bin_edges is not None indices = numpy.searchsorted(self.bin_edges, values) result = self.cdf[indices] assert len(result) == len(values) return result