stream_test.go

package quantile

import (
	"math"
	"math/rand"
	"sort"
	"testing"
)

var (
	Targets = map[float64]float64{
		0.01: 0.001,
		0.10: 0.01,
		0.50: 0.05,
		0.90: 0.01,
		0.99: 0.001,
	}
	TargetsSmallEpsilon = map[float64]float64{
		0.01: 0.0001,
		0.10: 0.001,
		0.50: 0.005,
		0.90: 0.001,
		0.99: 0.0001,
	}
	LowQuantiles  = []float64{0.01, 0.1, 0.5}
	HighQuantiles = []float64{0.99, 0.9, 0.5}
)

const RelativeEpsilon = 0.01

func verifyPercsWithAbsoluteEpsilon(t *testing.T, a []float64, s *Stream) {
	sort.Float64s(a)
	for quantile, epsilon := range Targets {
		n := float64(len(a))
		k := int(quantile * n)
		if k < 1 {
			k = 1
		}
		lower := int((quantile - epsilon) * n)
		if lower < 1 {
			lower = 1
		}
		upper := int(math.Ceil((quantile + epsilon) * n))
		if upper > len(a) {
			upper = len(a)
		}
		w, min, max := a[k-1], a[lower-1], a[upper-1]
		if g := s.Query(quantile); g < min || g > max {
			t.Errorf("q=%f: want %v [%f,%f], got %v", quantile, w, min, max, g)
		}
	}
}

func verifyLowPercsWithRelativeEpsilon(t *testing.T, a []float64, s *Stream) {
	sort.Float64s(a)
	for _, qu := range LowQuantiles {
		n := float64(len(a))
		k := int(qu * n)

		lowerRank := int((1 - RelativeEpsilon) * qu * n)
		upperRank := int(math.Ceil((1 + RelativeEpsilon) * qu * n))
		w, min, max := a[k-1], a[lowerRank-1], a[upperRank-1]
		if g := s.Query(qu); g < min || g > max {
			t.Errorf("q=%f: want %v [%f,%f], got %v", qu, w, min, max, g)
		}
	}
}

func verifyHighPercsWithRelativeEpsilon(t *testing.T, a []float64, s *Stream) {
	sort.Float64s(a)
	for _, qu := range HighQuantiles {
		n := float64(len(a))
		k := int(qu * n)

		lowerRank := int((1 - (1+RelativeEpsilon)*(1-qu)) * n)
		upperRank := int(math.Ceil((1 - (1-RelativeEpsilon)*(1-qu)) * n))
		w, min, max := a[k-1], a[lowerRank-1], a[upperRank-1]
		if g := s.Query(qu); g < min || g > max {
			t.Errorf("q=%f: want %v [%f,%f], got %v", qu, w, min, max, g)
		}
	}
}

func populateStream(s *Stream) []float64 {
	a := make([]float64, 0, 1e5+100)
	for i := 0; i < cap(a); i++ {
		v := rand.NormFloat64()
		// Add 5% asymmetric outliers.
		if i%20 == 0 {
			v = v*v + 1
		}
		s.Insert(v)
		a = append(a, v)
	}
	return a
}

func TestTargetedQuery(t *testing.T) {
	rand.Seed(42)
	s := NewTargeted(Targets)
	a := populateStream(s)
	verifyPercsWithAbsoluteEpsilon(t, a, s)
}

func TestTargetedQuerySmallSampleSize(t *testing.T) {
	rand.Seed(42)
	s := NewTargeted(TargetsSmallEpsilon)
	a := []float64{1, 2, 3, 4, 5}
	for _, v := range a {
		s.Insert(v)
	}
	verifyPercsWithAbsoluteEpsilon(t, a, s)
	// If not yet flushed, results should be precise:
	if !s.flushed() {
		for φ, want := range map[float64]float64{
			0.01: 1,
			0.10: 1,
			0.50: 3,
			0.90: 5,
			0.99: 5,
		} {
			if got := s.Query(φ); got != want {
				t.Errorf("want %f for φ=%f, got %f", want, φ, got)
			}
		}
	}
}

func TestLowBiasedQuery(t *testing.T) {
	rand.Seed(42)
	s := NewLowBiased(RelativeEpsilon)
	a := populateStream(s)
	verifyLowPercsWithRelativeEpsilon(t, a, s)
}

func TestHighBiasedQuery(t *testing.T) {
	rand.Seed(42)
	s := NewHighBiased(RelativeEpsilon)
	a := populateStream(s)
	verifyHighPercsWithRelativeEpsilon(t, a, s)
}

// BrokenTestTargetedMerge is broken, see Merge doc comment.
func BrokenTestTargetedMerge(t *testing.T) {
	rand.Seed(42)
	s1 := NewTargeted(Targets)
	s2 := NewTargeted(Targets)
	a := populateStream(s1)
	a = append(a, populateStream(s2)...)
	s1.Merge(s2.Samples())
	verifyPercsWithAbsoluteEpsilon(t, a, s1)
}

// BrokenTestLowBiasedMerge is broken, see Merge doc comment.
func BrokenTestLowBiasedMerge(t *testing.T) {
	rand.Seed(42)
	s1 := NewLowBiased(RelativeEpsilon)
	s2 := NewLowBiased(RelativeEpsilon)
	a := populateStream(s1)
	a = append(a, populateStream(s2)...)
	s1.Merge(s2.Samples())
	verifyLowPercsWithRelativeEpsilon(t, a, s2)
}

// BrokenTestHighBiasedMerge is broken, see Merge doc comment.
func BrokenTestHighBiasedMerge(t *testing.T) {
	rand.Seed(42)
	s1 := NewHighBiased(RelativeEpsilon)
	s2 := NewHighBiased(RelativeEpsilon)
	a := populateStream(s1)
	a = append(a, populateStream(s2)...)
	s1.Merge(s2.Samples())
	verifyHighPercsWithRelativeEpsilon(t, a, s2)
}

func TestUncompressed(t *testing.T) {
	q := NewTargeted(Targets)
	for i := 100; i > 0; i-- {
		q.Insert(float64(i))
	}
	if g := q.Count(); g != 100 {
		t.Errorf("want count 100, got %d", g)
	}
	// Before compression, Query should have 100% accuracy.
	for quantile := range Targets {
		w := quantile * 100
		if g := q.Query(quantile); g != w {
			t.Errorf("want %f, got %f", w, g)
		}
	}
}

func TestUncompressedSamples(t *testing.T) {
	q := NewTargeted(map[float64]float64{0.99: 0.001})
	for i := 1; i <= 100; i++ {
		q.Insert(float64(i))
	}
	if g := q.Samples().Len(); g != 100 {
		t.Errorf("want count 100, got %d", g)
	}
}

func TestUncompressedOne(t *testing.T) {
	q := NewTargeted(map[float64]float64{0.99: 0.01})
	q.Insert(3.14)
	if g := q.Query(0.90); g != 3.14 {
		t.Error("want PI, got", g)
	}
}

func TestDefaults(t *testing.T) {
	if g := NewTargeted(map[float64]float64{0.99: 0.001}).Query(0.99); g != 0 {
		t.Errorf("want 0, got %f", g)
	}
}