Skip to content

Commit

Permalink
Made anomaly likelihood test deterministic
Browse files Browse the repository at this point in the history
  • Loading branch information
Zbysekz committed Jun 17, 2019
1 parent 2848fc5 commit bb42859
Showing 1 changed file with 56 additions and 31 deletions.
87 changes: 56 additions & 31 deletions py/tests/algorithms/anomaly_likelihood_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,24 +34,27 @@
import mock

from htm.algorithms import anomaly_likelihood as an
from htm.bindings.math import Random
from unittest import TestCase as TestCaseBase

def _sampleDistribution(params, numSamples, verbosity=0):
def _sampleDistribution(params, numSamples, seed=0, verbosity=0):
"""
Given the parameters of a distribution, generate numSamples points from it.
This routine is mostly for testing.
:returns: A numpy array of samples.
"""
"""
rng = numpy.random.RandomState(seed = seed)

if "name" in params:
if params["name"] == "normal":
samples = numpy.random.normal(loc=params["mean"],
samples = rng.normal(loc=params["mean"],
scale=math.sqrt(params["variance"]),
size=numSamples)
elif params["name"] == "pareto":
samples = numpy.random.pareto(params["alpha"], size=numSamples)
samples = rng.pareto(params["alpha"], size=numSamples)
elif params["name"] == "beta":
samples = numpy.random.beta(a=params["alpha"], b=params["beta"],
samples = rng.beta(a=params["alpha"], b=params["beta"],
size=numSamples)
else:
raise ValueError("Undefined distribution: " + params["name"])
Expand All @@ -66,7 +69,7 @@ def _sampleDistribution(params, numSamples, verbosity=0):


def _generateSampleData(mean=0.2, variance=0.2, metricMean=0.2,
metricVariance=0.2):
metricVariance=0.2, seed=0):
"""
Generate 1440 samples of fake metrics data with a particular distribution
of anomaly scores and metric values. Here we generate values every minute.
Expand All @@ -76,12 +79,12 @@ def _generateSampleData(mean=0.2, variance=0.2, metricMean=0.2,
"name": "normal",
"stdev": math.sqrt(variance),
"variance": variance}
samples = _sampleDistribution(p, 1440)
samples = _sampleDistribution(p, 1440, seed)
p = {"mean": metricMean,
"name": "normal",
"stdev": math.sqrt(metricVariance),
"variance": metricVariance}
metricValues = _sampleDistribution(p, 1440)
metricValues = _sampleDistribution(p, 1440, seed)
for hour in range(0, 24):
for minute in range(0, 60):
data.append(
Expand All @@ -94,12 +97,10 @@ def _generateSampleData(mean=0.2, variance=0.2, metricMean=0.2,

return data




class AnomalyLikelihoodClassTest(TestCaseBase):
"""Tests the high-level AnomalyLikelihood class"""


def testCalcSkipRecords(self):

# numIngested is less than both learningPeriod and windowSize
Expand Down Expand Up @@ -329,7 +330,10 @@ def testSerialization(self):

class AnomalyLikelihoodAlgorithmTest(TestCaseBase):
"""Tests the low-level algorithm functions"""

#seed for function _generateSampleData()
#zero means really random and not deterministic
GLOBAL_TEST_SEED = 1


def assertWithinEpsilon(self, a, b, epsilon=0.005):
self.assertLessEqual(abs(a - b), epsilon,
Expand Down Expand Up @@ -415,7 +419,7 @@ def testSampleDistribution(self):
"name": "normal",
"stdev": math.sqrt(0.1),
"variance": 0.1}
samples = _sampleDistribution(p, 1000)
samples = _sampleDistribution(p, 1000, seed=Random(self.GLOBAL_TEST_SEED).getUInt32())

# Ensure estimate is reasonable
np = an.estimateNormal(samples)
Expand All @@ -432,7 +436,7 @@ def testEstimateAnomalyLikelihoods(self):
"""

# Generate an estimate using fake distribution of anomaly scores.
data1 = _generateSampleData(mean=0.2)
data1 = _generateSampleData(mean=0.2,seed=Random(self.GLOBAL_TEST_SEED).getUInt32())

likelihoods, avgRecordList, estimatorParams = (
an.estimateAnomalyLikelihoods(data1[0:1000])
Expand Down Expand Up @@ -482,7 +486,7 @@ def testEstimateAnomalyLikelihoodsMalformedRecords(self):
"""

# Generate a fake distribution of anomaly scores, and add malformed records
data1 = _generateSampleData(mean=0.2)
data1 = _generateSampleData(mean=0.2,seed=Random(self.GLOBAL_TEST_SEED).getUInt32())
data1 = data1[0:1000] + [(2, 2)] + [(2, 2, 2, 2)] + [()] + [(2)]

likelihoods, avgRecordList, estimatorParams = (
Expand All @@ -509,10 +513,11 @@ def testSkipRecords(self):
"""
This calls estimateAnomalyLikelihoods with various values of skipRecords
"""

rnd = Random(self.GLOBAL_TEST_SEED)#we want different data for each generate

# Check happy path
data1 = _generateSampleData(mean=0.1)[0:200]
data1 = data1 + (_generateSampleData(mean=0.9)[0:200])
data1 = _generateSampleData(mean=0.1,seed=rnd.getUInt32())[0:200]
data1 = data1 + (_generateSampleData(mean=0.9,seed=rnd.getUInt32())[0:200])

likelihoods, _, estimatorParams = (
an.estimateAnomalyLikelihoods(data1, skipRecords=200)
Expand Down Expand Up @@ -545,11 +550,12 @@ def testUpdateAnomalyLikelihoods(self):
to estimate the distribution on fake data, followed by several calls
to updateAnomalyLikelihoods.
"""

rnd = Random(self.GLOBAL_TEST_SEED)#we want different data for each generate

#------------------------------------------
# Step 1. Generate an initial estimate using fake distribution of anomaly
# scores.
data1 = _generateSampleData(mean=0.2)[0:1000]
data1 = _generateSampleData(mean=0.2,seed=rnd.getUInt32())[0:1000]
_, _, estimatorParams = (
an.estimateAnomalyLikelihoods(data1, averagingWindow=5)
)
Expand All @@ -558,7 +564,7 @@ def testUpdateAnomalyLikelihoods(self):
# Step 2. Generate some new data with a higher average anomaly
# score. Using the estimator from step 1, to compute likelihoods. Now we
# should see a lot more anomalies.
data2 = _generateSampleData(mean=0.6)[0:300]
data2 = _generateSampleData(mean=0.6,seed=rnd.getUInt32())[0:300]
likelihoods2, avgRecordList2, estimatorParams2 = (
an.updateAnomalyLikelihoods(data2, estimatorParams)
)
Expand All @@ -577,7 +583,7 @@ def testUpdateAnomalyLikelihoods(self):
#------------------------------------------
# Step 3. Generate some new data with the expected average anomaly score. We
# should see fewer anomalies than in Step 2.
data3 = _generateSampleData(mean=0.2)[0:1000]
data3 = _generateSampleData(mean=0.2,seed=rnd.getUInt32())[0:1000]
likelihoods3, avgRecordList3, estimatorParams3 = (
an.updateAnomalyLikelihoods(data3, estimatorParams2)
)
Expand Down Expand Up @@ -620,8 +626,10 @@ def testFlatAnomalyScores(self):
ensures things don't crash.
"""

rnd = Random(self.GLOBAL_TEST_SEED)#we want different data for each generate

# Generate an estimate using fake distribution of anomaly scores.
data1 = _generateSampleData(mean=42.0, variance=1e-10)
data1 = _generateSampleData(mean=42.0, variance=1e-10,seed=rnd.getUInt32())

likelihoods, avgRecordList, estimatorParams = (
an.estimateAnomalyLikelihoods(data1[0:1000])
Expand All @@ -636,7 +644,7 @@ def testFlatAnomalyScores(self):

# If you deviate from the mean, you should get probability 0
# Test this by sending in just slightly different values.
data2 = _generateSampleData(mean=42.5, variance=1e-10)
data2 = _generateSampleData(mean=42.5, variance=1e-10,seed=rnd.getUInt32())
likelihoods2, _, _ = (
an.updateAnomalyLikelihoods(data2[0:10], estimatorParams)
)
Expand All @@ -649,21 +657,21 @@ def testFlatAnomalyScores(self):
# In this case we don't let likelihood to get too low. An average
# anomaly score of 0.1 should be essentially zero, but an average
# of 0.04 should be higher
data3 = _generateSampleData(mean=0.01, variance=1e-6)
data3 = _generateSampleData(mean=0.01, variance=1e-6,seed=rnd.getUInt32())

_, _, estimatorParams3 = (
an.estimateAnomalyLikelihoods(data3[0:1000])
)

data4 = _generateSampleData(mean=0.1, variance=1e-6)
data4 = _generateSampleData(mean=0.1, variance=1e-6,seed=rnd.getUInt32())
likelihoods4, _, estimatorParams4 = (
an.updateAnomalyLikelihoods(data4[0:20], estimatorParams3)
)

# Average of 0.1 should go to zero
self.assertLessEqual(likelihoods4[10:].mean(), 0.002)

data5 = _generateSampleData(mean=0.05, variance=1e-6)
data5 = _generateSampleData(mean=0.05, variance=1e-6,seed=rnd.getUInt32())
likelihoods5, _, _ = (
an.updateAnomalyLikelihoods(data5[0:20], estimatorParams4)
)
Expand All @@ -681,7 +689,7 @@ def testFlatMetricScores(self):
"""
# Generate samples with very flat metric values
data1 = _generateSampleData(
metricMean=42.0, metricVariance=1e-10)[0:1000]
metricMean=42.0, metricVariance=1e-10,seed=Random(self.GLOBAL_TEST_SEED).getUInt32())[0:1000]

likelihoods, _, estimatorParams = (
an.estimateAnomalyLikelihoods(data1)
Expand All @@ -702,7 +710,7 @@ def testVeryFewScores(self):
"""

# Generate an estimate using two data points
data1 = _generateSampleData(mean=42.0, variance=1e-10)
data1 = _generateSampleData(mean=42.0, variance=1e-10,seed=Random(self.GLOBAL_TEST_SEED).getUInt32())

_, _, estimatorParams = (
an.estimateAnomalyLikelihoods(data1[0:2])
Expand Down Expand Up @@ -730,7 +738,7 @@ def testBadParams(self):
"""

# Generate an estimate using one data point
data1 = _generateSampleData(mean=42.0, variance=1e-10)
data1 = _generateSampleData(mean=42.0, variance=1e-10,seed=Random(self.GLOBAL_TEST_SEED).getUInt32())

_, _, estimatorParams = (
an.estimateAnomalyLikelihoods(data1[0:1])
Expand Down Expand Up @@ -829,7 +837,24 @@ def testFilterLikelihoods(self):
self.assertFalse(numpy.array_equal(l3a, l3b),
msg="Failure in case (iii), list 3")

def testManySeeds():

AnomalyLikelihoodAlgorithmTest.GLOBAL_TEST_SEED=72
unittest.main()

# for i in range(1,100):
# print("SEEEEEEEEEEEEEEEEEEEEEED:"+str(i))
# AnomalyLikelihoodAlgorithmTest.GLOBAL_TEST_SEED=i
# unittest.main()

def testDeeply():
"""
This function will execute tests many times and it will use really random seeds.
"""
AnomalyLikelihoodAlgorithmTest.GLOBAL_TEST_SEED=0
for i in range(1,1000):
unittest.main()

if __name__ == "__main__":
unittest.main()
#unittest.main()
testManySeeds()

0 comments on commit bb42859

Please sign in to comment.