Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RFC][ENH] Outliers: Widget upgrade #4338

Merged
merged 10 commits into from
Jan 24, 2020
2 changes: 1 addition & 1 deletion Orange/classification/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from .tree import *
from .simple_tree import *
from .simple_random_forest import *
from .elliptic_envelope import *
from .outlier_detection import *
from .rules import *
from .sgd import *
from .neural_network import *
Expand Down
41 changes: 0 additions & 41 deletions Orange/classification/elliptic_envelope.py

This file was deleted.

73 changes: 73 additions & 0 deletions Orange/classification/outlier_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# pylint: disable=unused-argument
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from Orange.base import SklLearner, SklModel
from Orange.data import Table, Domain

__all__ = ["LocalOutlierFactorLearner", "IsolationForestLearner",
"EllipticEnvelopeLearner"]


class _OutlierDetector(SklLearner):
def __call__(self, data: Table):
data = data.transform(Domain(data.domain.attributes))
return super().__call__(data)


class LocalOutlierFactorLearner(_OutlierDetector):
__wraps__ = LocalOutlierFactor
name = "Local Outlier Factor"

def __init__(self, n_neighbors=20, algorithm="auto", leaf_size=30,
metric="minkowski", p=2, metric_params=None,
contamination="auto", novelty=True, n_jobs=None,
preprocessors=None):
super().__init__(preprocessors=preprocessors)
self.params = vars()


class IsolationForestLearner(_OutlierDetector):
__wraps__ = IsolationForest
name = "Isolation Forest"

def __init__(self, n_estimators=100, max_samples='auto',
contamination='auto', max_features=1.0, bootstrap=False,
n_jobs=None, behaviour='deprecated', random_state=None,
verbose=0, warm_start=False, preprocessors=None):
super().__init__(preprocessors=preprocessors)
self.params = vars()


class EllipticEnvelopeClassifier(SklModel):
def mahalanobis(self, observations):
"""Computes squared Mahalanobis distances of given observations.

Parameters
----------
observations : ndarray (n_samples, n_features) or Orange Table

Returns
-------
distances : ndarray (n_samples,)
Squared Mahalanobis distances given observations.
"""
if isinstance(observations, Table):
observations = observations.X
return self.skl_model.mahalanobis(observations)


class EllipticEnvelopeLearner(_OutlierDetector):
__wraps__ = EllipticEnvelope
__returns__ = EllipticEnvelopeClassifier
name = "Covariance Estimator"

def __init__(self, store_precision=True, assume_centered=False,
support_fraction=None, contamination=0.1,
random_state=None, preprocessors=None):
super().__init__(preprocessors=preprocessors)
self.params = vars()

def __call__(self, data: Table):
data = data.transform(Domain(data.domain.attributes))
return super().__call__(data)
1 change: 1 addition & 0 deletions Orange/classification/svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma="auto", coef0=0.0,


class OneClassSVMLearner(SklLearnerBase):
name = "One class SVM"
__wraps__ = skl_svm.OneClassSVM
preprocessors = svm_pps

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

import numpy as np
from Orange.data import Table, Domain, ContinuousVariable
from Orange.classification import EllipticEnvelopeLearner
from Orange.classification import EllipticEnvelopeLearner, \
IsolationForestLearner, LocalOutlierFactorLearner


class TestEllipticEnvelopeLearner(unittest.TestCase):
Expand Down Expand Up @@ -44,7 +45,7 @@ def test_mahalanobis(self):

def test_EllipticEnvelope_ignores_y(self):
domain = Domain((ContinuousVariable("x1"), ContinuousVariable("x2")),
class_vars=(ContinuousVariable("y1"), ContinuousVariable("y2")))
(ContinuousVariable("y1"), ContinuousVariable("y2")))
X = np.random.random((40, 2))
Y = np.random.random((40, 2))
table = Table(domain, X, Y)
Expand All @@ -60,3 +61,25 @@ def test_EllipticEnvelope_ignores_y(self):
np.testing.assert_array_equal(pred1, pred2)
np.testing.assert_array_equal(pred2, pred3)
np.testing.assert_array_equal(pred3, pred4)


class TestOutlierDetection(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.iris = Table("iris")

def test_LocalOutlierFactorDetector(self):
detector = LocalOutlierFactorLearner(contamination=0.1)
detect = detector(self.iris)
is_inlier = detect(self.iris)
self.assertEqual(len(np.where(is_inlier == -1)[0]), 14)

def test_IsolationForestDetector(self):
detector = IsolationForestLearner(contamination=0.1)
detect = detector(self.iris)
is_inlier = detect(self.iris)
self.assertEqual(len(np.where(is_inlier == -1)[0]), 15)


if __name__ == "__main__":
unittest.main()
Loading