Skip to content

Commit d4e18f2

Browse files
committed
Loda now performs density estimation on the fly
1 parent deb2822 commit d4e18f2

File tree

5 files changed

+48
-57
lines changed

5 files changed

+48
-57
lines changed

CHANGELOG.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
1-
- Unreleased
1+
- 0.0.13-beta
22
- Added documentation site
33
- Added Regression and Classification Loss interfaces
44
- Robust Z Score is now a Ranking anomaly detector
5-
- LODA now defaults to auto detect bin count
5+
- Loda now defaults to auto detect bin count
66
- Removed tolerance param from Gradient Boost and AdaBoost
77
- Screen logger timestamp format now configurable
88
- Dropped Persistable contract between SVM-based learners
99
- Random Forest feature importances now serial
1010
- Removed Robust Z Score tolerance parameter
1111
- Added slice method to Dataset API
12+
- Loda now performs density estimation on the fly
1213

1314
- 0.0.12-beta
1415
- Added AdaMax neural network Optimizer
@@ -33,7 +34,7 @@
3334
- Removed Model Orchestra meta-estimator
3435
- Grid Search automatically retrains base estimator
3536
- Added neural net Parameter namespace and interface
36-
- Changed order of LODA hyperparameters
37+
- Changed order of Loda hyperparameters
3738
- Replaced F1 Score with F Beta metric
3839
- Removed ISRU and Gaussian activation functions
3940
- Fixed SELU derivitive computation
@@ -69,7 +70,7 @@
6970
- Pipeline is now more verbose
7071

7172
- 0.0.10-beta
72-
- Added LODA online anomaly detector
73+
- Added Loda online anomaly detector
7374
- Added Radius Neighbors classifier and regressor
7475
- Added fast k-d LOF anomaly detector
7576
- Added base Ball Tree implementation
@@ -84,7 +85,7 @@
8485
- K Means is no longer Online
8586
- Removed Sign function
8687
- Added Binary Tree interface
87-
- Added bin count heuristic to LODA
88+
- Added bin count heuristic to Loda
8889
- Changed order of k-d neighbors hyperparameters
8990
- Removed Hamming distance kernel
9091

docs/anomaly-detectors/loda.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
<span style="float:right;"><a href="https://github.com/RubixML/RubixML/blob/master/src/AnomalyDetectors/LODA.php">Source</a></span>
1+
<span style="float:right;"><a href="https://github.com/RubixML/RubixML/blob/master/src/AnomalyDetectors/Loda.php">Source</a></span>
22

3-
# LODA
3+
# Loda
44
*Lightweight Online Detector of Anomalies* uses a sparse random projection matrix to produce an ensemble of unique one dimensional equi-width histograms able to estimate the probability density of an unknown sample. The decision function takes the anomaly score given by the negative log likelihood whose upper threshold can be set by the user.
55

66
**Interfaces:** [Estimator](../estimator.md), [Learner](../learner.md), [Online](../online.md), [Ranking](api.md#ranking), [Persistable](../persistable.md)
@@ -22,13 +22,13 @@ public static estimateBins($dataset) : int
2222

2323
### Example
2424
```php
25-
use Rubix\ML\AnomalyDetection\LODA;
25+
use Rubix\ML\AnomalyDetection\Loda;
2626

27-
$bins = LODA::estimateBins(1000);
27+
$bins = Loda::estimateBins(1000);
2828

29-
$estimator = new LODA($bins); // Automatically choose bin count
29+
$estimator = new Loda($bins); // Automatically choose bin count
3030

31-
$estimator = new LODA(5, 250, 3.5); // Specifying bins
31+
$estimator = new Loda(5, 250, 3.5); // Specifying bins
3232
```
3333

3434
### References

mkdocs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ nav:
3535
- API Reference: anomaly-detectors/api.md
3636
- Isolation Forest: anomaly-detectors/isolation-forest.md
3737
- K-d LOF: anomaly-detectors/kd-lof.md
38-
- LODA: anomaly-detectors/loda.md
38+
- Loda: anomaly-detectors/loda.md
3939
- Local Outlier Factor: anomaly-detectors/local-outlier-factor.md
4040
- One Class SVM: anomaly-detectors/one-class-svm.md
4141
- Robust Z Score: anomaly-detectors/robust-z-score.md

src/AnomalyDetectors/LODA.php renamed to src/AnomalyDetectors/Loda.php

Lines changed: 26 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@
1818
use const Rubix\ML\LOG_EPSILON;
1919

2020
/**
21-
* LODA
21+
* Loda
2222
*
23-
* Lightweight Online Detector of Anomalies uses sparse random projection vectors
24-
* to generate an ensemble of unique one dimensional equi-width histograms able
25-
* to estimate the probability density of an unknown sample. The anomaly score is
26-
* given by the negative log likelihood whose upper threshold can be set by the
27-
* user through the *contamination* hyper-parameter.
23+
* Lightweight Online Detector of Anomalies a.k.a. Loda uses sparse random
24+
* projection vectors to generate an ensemble of unique one dimensional
25+
* equi-width histograms able to estimate the probability density of an unknown
26+
* sample. The anomaly score is given by the negative log likelihood whose upper
27+
* threshold can be set by the user through the *contamination* hyper-parameter.
2828
*
2929
* References:
3030
* [1] T. Pevný. (2015). Loda: Lightweight on-line detector of anomalies.
@@ -34,7 +34,7 @@
3434
* @package Rubix/ML
3535
* @author Andrew DalPino
3636
*/
37-
class LODA implements Estimator, Learner, Online, Ranking, Persistable
37+
class Loda implements Estimator, Learner, Online, Ranking, Persistable
3838
{
3939
protected const MIN_SPARSE_DIMENSIONS = 3;
4040

@@ -74,7 +74,7 @@ class LODA implements Estimator, Learner, Online, Ranking, Persistable
7474
protected $r;
7575

7676
/**
77-
* The edges, counts, and precomputed probability densities of each histogram.
77+
* The edges, and bin counts of each histogram.
7878
*
7979
* @var array[]|null
8080
*/
@@ -83,9 +83,9 @@ class LODA implements Estimator, Learner, Online, Ranking, Persistable
8383
/**
8484
* The number of samples that have been learned so far.
8585
*
86-
* @var int|null
86+
* @var int
8787
*/
88-
protected $n;
88+
protected $n = 0;
8989

9090
/**
9191
* Estimate the number of bins from the number of samples in a dataset.
@@ -212,15 +212,7 @@ public function train(Dataset $dataset) : void
212212
$counts[$this->bins]++;
213213
}
214214

215-
$densities = [];
216-
217-
foreach ($counts as $count) {
218-
$densities[] = $count > 0
219-
? -log($count / $m)
220-
: -LOG_EPSILON;
221-
}
222-
223-
$this->histograms[] = [$edges, $counts, $densities];
215+
$this->histograms[] = [$edges, $counts];
224216
}
225217

226218
$this->n = $m;
@@ -234,22 +226,20 @@ public function train(Dataset $dataset) : void
234226
*/
235227
public function partial(Dataset $dataset) : void
236228
{
237-
if (!$this->r or !$this->histograms or !$this->n) {
229+
if (!$this->r or !$this->histograms) {
238230
$this->train($dataset);
239231

240232
return;
241233
}
242234

243235
DatasetIsCompatibleWithEstimator::check($dataset, $this);
244236

245-
$this->n += $dataset->numRows();
246-
247237
$projections = Matrix::quick($dataset->samples())
248238
->matmul($this->r)
249239
->transpose();
250240

251241
foreach ($projections as $i => $values) {
252-
[$edges, $counts, $densities] = $this->histograms[$i];
242+
[$edges, $counts] = $this->histograms[$i];
253243

254244
$interior = array_slice($edges, 1, $this->bins, true);
255245

@@ -265,14 +255,10 @@ public function partial(Dataset $dataset) : void
265255
$counts[$this->bins]++;
266256
}
267257

268-
foreach ($counts as $j => $count) {
269-
$densities[$j] = $count > 0
270-
? -log($count / $this->n)
271-
: -LOG_EPSILON;
272-
}
273-
274-
$this->histograms[$i] = [$edges, $counts, $densities];
258+
$this->histograms[$i] = [$edges, $counts];
275259
}
260+
261+
$this->n += $dataset->numRows();
276262
}
277263

278264
/**
@@ -308,27 +294,31 @@ public function rank(Dataset $dataset) : array
308294
->matmul($this->r)
309295
->transpose();
310296

311-
$scores = array_fill(0, $projections->n(), 0.);
297+
$densities = array_fill(0, $projections->n(), 0.);
312298

313299
foreach ($projections as $i => $values) {
314-
[$edges, $counts, $densities] = $this->histograms[$i];
300+
[$edges, $counts] = $this->histograms[$i];
315301

316302
foreach ($values as $j => $value) {
317303
foreach ($edges as $k => $edge) {
318304
if ($value < $edge) {
319-
$scores[$j] += $densities[$k];
305+
$count = $counts[$k];
306+
307+
$densities[$j] += $count > 0
308+
? -log($counts[$k] / $this->n)
309+
: -LOG_EPSILON;
320310

321311
break 1;
322312
}
323313
}
324314
}
325315
}
326316

327-
foreach ($scores as &$score) {
328-
$score /= $this->estimators;
317+
foreach ($densities as &$density) {
318+
$density /= $this->estimators;
329319
}
330320

331-
return $scores;
321+
return $densities;
332322
}
333323

334324
/**

tests/AnomalyDetectors/LODATest.php renamed to tests/AnomalyDetectors/LodaTest.php

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
use Rubix\ML\Estimator;
88
use Rubix\ML\Persistable;
99
use Rubix\ML\Datasets\Unlabeled;
10-
use Rubix\ML\AnomalyDetectors\LODA;
10+
use Rubix\ML\AnomalyDetectors\Loda;
1111
use Rubix\ML\Other\Helpers\DataType;
1212
use Rubix\ML\AnomalyDetectors\Ranking;
1313
use Rubix\ML\Datasets\Generators\Blob;
@@ -18,7 +18,7 @@
1818
use InvalidArgumentException;
1919
use RuntimeException;
2020

21-
class LODATest extends TestCase
21+
class LodaTest extends TestCase
2222
{
2323
protected const TRAIN_SIZE = 400;
2424
protected const TEST_SIZE = 10;
@@ -39,7 +39,7 @@ public function setUp()
3939
1 => new Circle(0., 0., 8., 0.1),
4040
], [0.9, 0.1]);
4141

42-
$this->estimator = new LODA(null, 100, 10.);
42+
$this->estimator = new Loda(null, 100, 10.);
4343

4444
$this->metric = new FBeta();
4545

@@ -48,7 +48,7 @@ public function setUp()
4848

4949
public function test_build_detector()
5050
{
51-
$this->assertInstanceOf(LODA::class, $this->estimator);
51+
$this->assertInstanceOf(Loda::class, $this->estimator);
5252
$this->assertInstanceOf(Learner::class, $this->estimator);
5353
$this->assertInstanceOf(Online::class, $this->estimator);
5454
$this->assertInstanceOf(Ranking::class, $this->estimator);
@@ -65,11 +65,11 @@ public function test_build_detector()
6565

6666
public function test_estimate_bins()
6767
{
68-
$this->assertSame(4, LODA::estimateBins(10));
69-
$this->assertSame(8, LODA::estimateBins(100));
70-
$this->assertSame(11, LODA::estimateBins(1000));
71-
$this->assertSame(14, LODA::estimateBins(10000));
72-
$this->assertSame(18, LODA::estimateBins(100000));
68+
$this->assertSame(4, Loda::estimateBins(10));
69+
$this->assertSame(8, Loda::estimateBins(100));
70+
$this->assertSame(11, Loda::estimateBins(1000));
71+
$this->assertSame(14, Loda::estimateBins(10000));
72+
$this->assertSame(18, Loda::estimateBins(100000));
7373
}
7474

7575
public function test_train_partial_predict()

0 commit comments

Comments
 (0)