From 413278e592d1e36584128455d7e1e4dedd4cd6ca Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Thu, 16 Jan 2020 06:12:10 +0100 Subject: [PATCH 01/10] EllipticEnvelopeLearner: Move to outlier_detection module --- Orange/classification/__init__.py | 2 +- .../{elliptic_envelope.py => outlier_detection.py} | 0 .../tests/test_outlier_detection.py} | 6 +++++- 3 files changed, 6 insertions(+), 2 deletions(-) rename Orange/classification/{elliptic_envelope.py => outlier_detection.py} (100%) rename Orange/{tests/test_elliptic_envelope.py => classification/tests/test_outlier_detection.py} (95%) diff --git a/Orange/classification/__init__.py b/Orange/classification/__init__.py index 842518fca31..c32fe8ff2d7 100644 --- a/Orange/classification/__init__.py +++ b/Orange/classification/__init__.py @@ -15,7 +15,7 @@ from .tree import * from .simple_tree import * from .simple_random_forest import * -from .elliptic_envelope import * +from .outlier_detection import * from .rules import * from .sgd import * from .neural_network import * diff --git a/Orange/classification/elliptic_envelope.py b/Orange/classification/outlier_detection.py similarity index 100% rename from Orange/classification/elliptic_envelope.py rename to Orange/classification/outlier_detection.py diff --git a/Orange/tests/test_elliptic_envelope.py b/Orange/classification/tests/test_outlier_detection.py similarity index 95% rename from Orange/tests/test_elliptic_envelope.py rename to Orange/classification/tests/test_outlier_detection.py index 682e47f4abc..02a6b603aee 100644 --- a/Orange/tests/test_elliptic_envelope.py +++ b/Orange/classification/tests/test_outlier_detection.py @@ -44,7 +44,7 @@ def test_mahalanobis(self): def test_EllipticEnvelope_ignores_y(self): domain = Domain((ContinuousVariable("x1"), ContinuousVariable("x2")), - class_vars=(ContinuousVariable("y1"), ContinuousVariable("y2"))) + (ContinuousVariable("y1"), ContinuousVariable("y2"))) X = np.random.random((40, 2)) Y = np.random.random((40, 2)) table = Table(domain, X, Y) @@ -60,3 +60,7 @@ def test_EllipticEnvelope_ignores_y(self): np.testing.assert_array_equal(pred1, pred2) np.testing.assert_array_equal(pred2, pred3) np.testing.assert_array_equal(pred3, pred4) + + +if __name__ == "__main__": + unittest.main() From 8a868418749bb3d627d0df1f2c2e26626ce0956b Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Thu, 16 Jan 2020 06:29:52 +0100 Subject: [PATCH 02/10] outlier_detection: Wrap sklearn classes --- Orange/classification/outlier_detection.py | 49 +++++++++++++++---- .../tests/test_outlier_detection.py | 21 +++++++- 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/Orange/classification/outlier_detection.py b/Orange/classification/outlier_detection.py index 60b8e5357d7..a22a4c1a1fc 100644 --- a/Orange/classification/outlier_detection.py +++ b/Orange/classification/outlier_detection.py @@ -1,10 +1,40 @@ -import sklearn.covariance as skl_covariance - +# pylint: disable=unused-argument +from sklearn.covariance import EllipticEnvelope +from sklearn.ensemble import IsolationForest +from sklearn.neighbors import LocalOutlierFactor from Orange.base import SklLearner, SklModel from Orange.data import Table, Domain -from Orange.preprocess import Continuize, RemoveNaNColumns, SklImpute -__all__ = ["EllipticEnvelopeLearner"] +__all__ = ["LocalOutlierFactorLearner", "IsolationForestLearner", + "EllipticEnvelopeLearner"] + + +class _OutlierDetector(SklLearner): + def __call__(self, data: Table): + data = data.transform(Domain(data.domain.attributes)) + return super().__call__(data) + + +class LocalOutlierFactorLearner(_OutlierDetector): + __wraps__ = LocalOutlierFactor + + def __init__(self, n_neighbors=20, algorithm="auto", leaf_size=30, + metric="minkowski", p=2, metric_params=None, + contamination="auto", novelty=True, n_jobs=None, + preprocessors=None): + super().__init__(preprocessors=preprocessors) + self.params = vars() + + +class IsolationForestLearner(_OutlierDetector): + __wraps__ = IsolationForest + + def __init__(self, n_estimators=100, max_samples='auto', + contamination='auto', max_features=1.0, bootstrap=False, + n_jobs=None, behaviour='deprecated', random_state=None, + verbose=0, warm_start=False, preprocessors=None): + super().__init__(preprocessors=preprocessors) + self.params = vars() class EllipticEnvelopeClassifier(SklModel): @@ -25,10 +55,9 @@ def mahalanobis(self, observations): return self.skl_model.mahalanobis(observations) -class EllipticEnvelopeLearner(SklLearner): - __wraps__ = skl_covariance.EllipticEnvelope +class EllipticEnvelopeLearner(_OutlierDetector): + __wraps__ = EllipticEnvelope __returns__ = EllipticEnvelopeClassifier - preprocessors = [Continuize(), RemoveNaNColumns(), SklImpute()] def __init__(self, store_precision=True, assume_centered=False, support_fraction=None, contamination=0.1, @@ -36,6 +65,6 @@ def __init__(self, store_precision=True, assume_centered=False, super().__init__(preprocessors=preprocessors) self.params = vars() - def __call__(self, data): - classless_data = data.transform(Domain(data.domain.attributes)) - return super().__call__(classless_data) + def __call__(self, data: Table): + data = data.transform(Domain(data.domain.attributes)) + return super().__call__(data) diff --git a/Orange/classification/tests/test_outlier_detection.py b/Orange/classification/tests/test_outlier_detection.py index 02a6b603aee..1ac0b0c4ada 100644 --- a/Orange/classification/tests/test_outlier_detection.py +++ b/Orange/classification/tests/test_outlier_detection.py @@ -5,7 +5,8 @@ import numpy as np from Orange.data import Table, Domain, ContinuousVariable -from Orange.classification import EllipticEnvelopeLearner +from Orange.classification import EllipticEnvelopeLearner, \ + IsolationForestLearner, LocalOutlierFactorLearner class TestEllipticEnvelopeLearner(unittest.TestCase): @@ -62,5 +63,23 @@ def test_EllipticEnvelope_ignores_y(self): np.testing.assert_array_equal(pred3, pred4) +class TestOutlierDetection(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.iris = Table("iris") + + def test_LocalOutlierFactorDetector(self): + detector = LocalOutlierFactorLearner(contamination=0.1) + detect = detector(self.iris) + is_inlier = detect(self.iris) + self.assertEqual(len(np.where(is_inlier == -1)[0]), 14) + + def test_IsolationForestDetector(self): + detector = IsolationForestLearner(contamination=0.1) + detect = detector(self.iris) + is_inlier = detect(self.iris) + self.assertEqual(len(np.where(is_inlier == -1)[0]), 15) + + if __name__ == "__main__": unittest.main() From bfc6410a54f93976f6741ec7cdc8c5197e411371 Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Thu, 16 Jan 2020 06:59:32 +0100 Subject: [PATCH 03/10] Outliers: Replace info box with summary --- Orange/widgets/data/owoutliers.py | 27 ++++++-------------- Orange/widgets/data/tests/test_owoutliers.py | 15 ++++++++++- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/Orange/widgets/data/owoutliers.py b/Orange/widgets/data/owoutliers.py index 8ecc8321c3d..97ba07914c4 100644 --- a/Orange/widgets/data/owoutliers.py +++ b/Orange/widgets/data/owoutliers.py @@ -37,9 +37,6 @@ class Outputs: empirical_covariance = Setting(False) support_fraction = Setting(1) - data_info_default = 'No data on input.' - in_out_info_default = ' ' - class Error(widget.OWWidget.Error): singular_cov = Msg("Singular covariance matrix.") memory_error = Msg("Not enough memory") @@ -49,11 +46,6 @@ def __init__(self): self.data = None self.n_inliers = self.n_outliers = None - box = gui.vBox(self.controlArea, "Information") - self.data_info_label = gui.widgetLabel(box, self.data_info_default) - self.in_out_info_label = gui.widgetLabel(box, - self.in_out_info_default) - box = gui.vBox(self.controlArea, "Outlier Detection Method") detection = gui.radioButtons(box, self, "outlier_method") @@ -92,6 +84,9 @@ def __init__(self): callback=self.commit) self.layout().setSizeConstraint(QLayout.SetFixedSize) + self.info.set_input_summary(self.info.NoInput) + self.info.set_output_summary(self.info.NoOutput) + def nu_changed(self): self.outlier_method = self.OneClassSVM @@ -126,14 +121,9 @@ def enable_covariance(self): @Inputs.data @check_sql_input - def set_data(self, dataset): - self.data = dataset - if self.data is None: - self.data_info_label.setText(self.data_info_default) - self.in_out_info_label.setText(self.in_out_info_default) - else: - self.data_info_label.setText('%d instances' % len(self.data)) - self.in_out_info_label.setText(' ') + def set_data(self, data): + self.data = data + self.info.set_input_summary(len(data) if data else self.info.NoOutput) self.enable_covariance() if self.data and len(self.data.domain.attributes) > 1500: @@ -146,7 +136,6 @@ def _get_outliers(self): y_pred, amended_data = self.detect_outliers() except ValueError: self.Error.singular_cov() - self.in_out_info_label.setText(self.in_out_info_default) return None, None except MemoryError: self.Error.memory_error() @@ -156,8 +145,6 @@ def _get_outliers(self): outliers_ind = np.where(y_pred == -1)[0] inliers = amended_data[inliers_ind] outliers = amended_data[outliers_ind] - self.in_out_info_label.setText( - f"{len(inliers)} inliers, {len(outliers)} outliers") self.n_inliers = len(inliers) self.n_outliers = len(outliers) @@ -170,6 +157,8 @@ def commit(self): if self.data: inliers, outliers = self._get_outliers() + summary = len(inliers) if inliers else self.info.NoOutput + self.info.set_output_summary(summary) self.Outputs.inliers.send(inliers) self.Outputs.outliers.send(outliers) diff --git a/Orange/widgets/data/tests/test_owoutliers.py b/Orange/widgets/data/tests/test_owoutliers.py index 6bfcb2c58f0..8d7db810dc0 100644 --- a/Orange/widgets/data/tests/test_owoutliers.py +++ b/Orange/widgets/data/tests/test_owoutliers.py @@ -1,5 +1,5 @@ # Test methods with long descriptive names can omit docstrings -# pylint: disable=missing-docstring +# pylint: disable=missing-docstring, protected-access import unittest @@ -46,6 +46,19 @@ def test_nans(self): self.send_signal(self.widget.Inputs.data, data) self.assertIsNot(self.get_output(self.widget.Outputs.inliers), None) + def test_in_out_summary(self): + info = self.widget.info + self.assertEqual(info._StateInfo__input_summary.brief, "") + self.assertEqual(info._StateInfo__output_summary.brief, "") + + self.send_signal(self.widget.Inputs.data, self.iris) + self.assertEqual(info._StateInfo__input_summary.brief, "150") + self.assertEqual(info._StateInfo__output_summary.brief, "76") + + self.send_signal(self.widget.Inputs.data, None) + self.assertEqual(info._StateInfo__input_summary.brief, "") + self.assertEqual(info._StateInfo__output_summary.brief, "") + if __name__ == "__main__": unittest.main() From a955e1dd0a320c50463e341c25935a7fefe0bea2 Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Thu, 16 Jan 2020 07:34:49 +0100 Subject: [PATCH 04/10] Outliers: New style warnings --- Orange/widgets/data/owoutliers.py | 52 ++++++++++---------- Orange/widgets/data/tests/test_owoutliers.py | 20 ++++++++ 2 files changed, 46 insertions(+), 26 deletions(-) diff --git a/Orange/widgets/data/owoutliers.py b/Orange/widgets/data/owoutliers.py index 97ba07914c4..e56a6011351 100644 --- a/Orange/widgets/data/owoutliers.py +++ b/Orange/widgets/data/owoutliers.py @@ -4,14 +4,14 @@ from Orange.base import SklLearner from Orange.classification import OneClassSVMLearner, EllipticEnvelopeLearner from Orange.data import Table, Domain, ContinuousVariable -from Orange.widgets import widget, gui +from Orange.widgets import gui from Orange.widgets.settings import Setting -from Orange.widgets.utils.widgetpreview import WidgetPreview -from Orange.widgets.widget import Msg, Input, Output from Orange.widgets.utils.sql import check_sql_input +from Orange.widgets.utils.widgetpreview import WidgetPreview +from Orange.widgets.widget import Msg, Input, Output, OWWidget -class OWOutliers(widget.OWWidget): +class OWOutliers(OWWidget): name = "Outliers" description = "Detect outliers." icon = "icons/Outliers.svg" @@ -37,7 +37,12 @@ class Outputs: empirical_covariance = Setting(False) support_fraction = Setting(1) - class Error(widget.OWWidget.Error): + MAX_FEATURES = 1500 + + class Warning(OWWidget.Warning): + disabled_cov = Msg("Too many features for covariance estimation.") + + class Error(OWWidget.Error): singular_cov = Msg("Singular covariance matrix.") memory_error = Msg("Not enough memory") @@ -102,36 +107,32 @@ def support_fraction_changed(self): def empirical_changed(self): self.outlier_method = self.Covariance - def disable_covariance(self): - self.outlier_method = self.OneClassSVM - self.rb_cov.setDisabled(True) - self.l_cov.setDisabled(True) - self.cont_slider.setDisabled(True) - self.cb_emp_cov.setDisabled(True) - self.support_fraction_spin.setDisabled(True) - self.warning('Too many features for covariance estimation.') - - def enable_covariance(self): - self.rb_cov.setDisabled(False) - self.l_cov.setDisabled(False) - self.cont_slider.setDisabled(False) - self.cb_emp_cov.setDisabled(False) - self.support_fraction_spin.setDisabled(False) - self.warning() + def enable_covariance(self, enable=True): + self.rb_cov.setEnabled(enable) + self.l_cov.setEnabled(enable) + self.cont_slider.setEnabled(enable) + self.cb_emp_cov.setEnabled(enable) + self.support_fraction_spin.setEnabled(enable) @Inputs.data @check_sql_input def set_data(self, data): + self.clear_messages() self.data = data self.info.set_input_summary(len(data) if data else self.info.NoOutput) + self.enable_controls() + self.commit() + def enable_controls(self): self.enable_covariance() - if self.data and len(self.data.domain.attributes) > 1500: - self.disable_covariance() - - self.commit() + if self.data and len(self.data.domain.attributes) > self.MAX_FEATURES: + self.outlier_method = self.OneClassSVM + self.enable_covariance(False) + self.Warning.disabled_cov() def _get_outliers(self): + self.Error.singular_cov.clear() + self.Error.memory_error.clear() try: y_pred, amended_data = self.detect_outliers() except ValueError: @@ -151,7 +152,6 @@ def _get_outliers(self): return inliers, outliers def commit(self): - self.clear_messages() inliers = outliers = None self.n_inliers = self.n_outliers = None if self.data: diff --git a/Orange/widgets/data/tests/test_owoutliers.py b/Orange/widgets/data/tests/test_owoutliers.py index 8d7db810dc0..469ad319e6b 100644 --- a/Orange/widgets/data/tests/test_owoutliers.py +++ b/Orange/widgets/data/tests/test_owoutliers.py @@ -2,6 +2,7 @@ # pylint: disable=missing-docstring, protected-access import unittest +from unittest.mock import patch, Mock import numpy as np @@ -59,6 +60,25 @@ def test_in_out_summary(self): self.assertEqual(info._StateInfo__input_summary.brief, "") self.assertEqual(info._StateInfo__output_summary.brief, "") + @patch("Orange.widgets.data.owoutliers.OWOutliers.MAX_FEATURES", 3) + @patch("Orange.widgets.data.owoutliers.OWOutliers.commit", Mock()) + def test_covariance_enabled(self): + self.send_signal(self.widget.Inputs.data, self.iris) + self.assertTrue(self.widget.Warning.disabled_cov.is_shown()) + self.assertFalse(self.widget.rb_cov.isEnabled()) + + self.send_signal(self.widget.Inputs.data, self.iris[:, :2]) + self.assertFalse(self.widget.Warning.disabled_cov.is_shown()) + self.assertTrue(self.widget.rb_cov.isEnabled()) + + self.send_signal(self.widget.Inputs.data, self.iris) + self.assertTrue(self.widget.Warning.disabled_cov.is_shown()) + self.assertFalse(self.widget.rb_cov.isEnabled()) + + self.send_signal(self.widget.Inputs.data, None) + self.assertFalse(self.widget.Warning.disabled_cov.is_shown()) + self.assertTrue(self.widget.rb_cov.isEnabled()) + if __name__ == "__main__": unittest.main() From 84f5e0feefb9fa316551ec3f587468c89f60a6fe Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Thu, 16 Jan 2020 08:11:09 +0100 Subject: [PATCH 05/10] Outliers: Introduce unconditional commit --- Orange/widgets/data/owoutliers.py | 33 +++++++++++++------------------ 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/Orange/widgets/data/owoutliers.py b/Orange/widgets/data/owoutliers.py index e56a6011351..16811f11862 100644 --- a/Orange/widgets/data/owoutliers.py +++ b/Orange/widgets/data/owoutliers.py @@ -36,6 +36,7 @@ class Outputs: cont = Setting(10) empirical_covariance = Setting(False) support_fraction = Setting(1) + auto_commit = Setting(True) MAX_FEATURES = 1500 @@ -62,10 +63,12 @@ def __init__(self): gui.widgetLabel(ibox, 'Nu:', tooltip=tooltip) self.nu_slider = gui.hSlider( ibox, self, "nu", minValue=1, maxValue=100, ticks=10, - labelFormat="%d %%", callback=self.nu_changed, tooltip=tooltip) + labelFormat="%d %%", callback=self.__svm_param_changed, + tooltip=tooltip) self.gamma_spin = gui.spin( ibox, self, "gamma", label="Kernel coefficient:", step=1e-2, - spinType=float, minv=0.01, maxv=10, callback=self.gamma_changed) + spinType=float, minv=0.01, maxv=10, + callback=self.__svm_param_changed) gui.separator(detection, 12) self.rb_cov = gui.appendRadioButton(detection, "Covariance estimator") @@ -73,39 +76,31 @@ def __init__(self): self.l_cov = gui.widgetLabel(ibox, 'Contamination:') self.cont_slider = gui.hSlider( ibox, self, "cont", minValue=0, maxValue=100, ticks=10, - labelFormat="%d %%", callback=self.cont_changed) + labelFormat="%d %%", callback=self.__cov_param_changed) ebox = gui.hBox(ibox) self.cb_emp_cov = gui.checkBox( ebox, self, "empirical_covariance", - "Support fraction:", callback=self.empirical_changed) + "Support fraction:", callback=self.__cov_param_changed) self.support_fraction_spin = gui.spin( ebox, self, "support_fraction", step=1e-1, spinType=float, - minv=0.1, maxv=10, callback=self.support_fraction_changed) + minv=0.1, maxv=10, callback=self.__cov_param_changed) gui.separator(detection, 12) - gui.button(self.buttonsArea, self, "Detect Outliers", - callback=self.commit) + gui.auto_send(self.controlArea, self, "auto_commit") self.layout().setSizeConstraint(QLayout.SetFixedSize) self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) - def nu_changed(self): + def __svm_param_changed(self): self.outlier_method = self.OneClassSVM + self.commit() - def gamma_changed(self): - self.outlier_method = self.OneClassSVM - - def cont_changed(self): - self.outlier_method = self.Covariance - - def support_fraction_changed(self): - self.outlier_method = self.Covariance - - def empirical_changed(self): + def __cov_param_changed(self): self.outlier_method = self.Covariance + self.commit() def enable_covariance(self, enable=True): self.rb_cov.setEnabled(enable) @@ -121,7 +116,7 @@ def set_data(self, data): self.data = data self.info.set_input_summary(len(data) if data else self.info.NoOutput) self.enable_controls() - self.commit() + self.unconditional_commit() def enable_controls(self): self.enable_covariance() From a590d07c76c88e504306aaa95d58785494a537d3 Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Thu, 16 Jan 2020 10:53:44 +0100 Subject: [PATCH 06/10] Outliers: Separate editor for each method --- Orange/classification/outlier_detection.py | 3 + Orange/classification/svm.py | 1 + Orange/widgets/data/owoutliers.py | 205 ++++++++++++------- Orange/widgets/data/tests/test_owoutliers.py | 23 ++- 4 files changed, 155 insertions(+), 77 deletions(-) diff --git a/Orange/classification/outlier_detection.py b/Orange/classification/outlier_detection.py index a22a4c1a1fc..6892a04d112 100644 --- a/Orange/classification/outlier_detection.py +++ b/Orange/classification/outlier_detection.py @@ -17,6 +17,7 @@ def __call__(self, data: Table): class LocalOutlierFactorLearner(_OutlierDetector): __wraps__ = LocalOutlierFactor + name = "Local Outlier Factor" def __init__(self, n_neighbors=20, algorithm="auto", leaf_size=30, metric="minkowski", p=2, metric_params=None, @@ -28,6 +29,7 @@ def __init__(self, n_neighbors=20, algorithm="auto", leaf_size=30, class IsolationForestLearner(_OutlierDetector): __wraps__ = IsolationForest + name = "Isolation Forest" def __init__(self, n_estimators=100, max_samples='auto', contamination='auto', max_features=1.0, bootstrap=False, @@ -58,6 +60,7 @@ def mahalanobis(self, observations): class EllipticEnvelopeLearner(_OutlierDetector): __wraps__ = EllipticEnvelope __returns__ = EllipticEnvelopeClassifier + name = "Covariance Estimator" def __init__(self, store_precision=True, assume_centered=False, support_fraction=None, contamination=0.1, diff --git a/Orange/classification/svm.py b/Orange/classification/svm.py index 689e654f0ef..c603161320c 100644 --- a/Orange/classification/svm.py +++ b/Orange/classification/svm.py @@ -69,6 +69,7 @@ def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma="auto", coef0=0.0, class OneClassSVMLearner(SklLearnerBase): + name = "One class SVM" __wraps__ = skl_svm.OneClassSVM preprocessors = svm_pps diff --git a/Orange/widgets/data/owoutliers.py b/Orange/widgets/data/owoutliers.py index 16811f11862..c6b343a2711 100644 --- a/Orange/widgets/data/owoutliers.py +++ b/Orange/widgets/data/owoutliers.py @@ -1,7 +1,10 @@ import numpy as np -from AnyQt.QtWidgets import QLayout -from Orange.base import SklLearner +from AnyQt.QtCore import Signal +from AnyQt.QtWidgets import QWidget, QVBoxLayout + +from orangewidget.settings import SettingProvider + from Orange.classification import OneClassSVMLearner, EllipticEnvelopeLearner from Orange.data import Table, Domain, ContinuousVariable from Orange.widgets import gui @@ -11,6 +14,73 @@ from Orange.widgets.widget import Msg, Input, Output, OWWidget +class ParametersEditor(QWidget, gui.OWComponent): + param_changed = Signal() + + def __init__(self, parent): + QWidget.__init__(self, parent) + gui.OWComponent.__init__(self, parent) + + self.setMinimumWidth(300) + layout = QVBoxLayout() + layout.setContentsMargins(0, 0, 0, 0) + self.setLayout(layout) + self.param_box = gui.vBox(self, spacing=0) + + def parameter_changed(self): + self.param_changed.emit() + + def get_parameters(self): + raise NotImplementedError + + +class SVMEditor(ParametersEditor): + nu = Setting(50) + gamma = Setting(0.01) + + def __init__(self, parent): + super().__init__(parent) + + tooltip = "An upper bound on the fraction of training errors and a " \ + "lower bound of the fraction of support vectors" + gui.widgetLabel(self.param_box, "Contamination:", tooltip=tooltip) + gui.hSlider(self.param_box, self, "nu", minValue=1, maxValue=100, + ticks=10, labelFormat="%d %%", tooltip=tooltip, + callback=self.parameter_changed) + gui.doubleSpin(self.param_box, self, "gamma", + label="Kernel coefficient:", step=1e-2, minv=0.01, + maxv=10, callback=self.parameter_changed) + + def get_parameters(self): + return {"nu": self.nu / 100, + "gamma": self.gamma} + + +class CovarianceEditor(ParametersEditor): + cont = Setting(10) + empirical_covariance = Setting(False) + support_fraction = Setting(1) + + def __init__(self, parent): + super().__init__(parent) + + gui.widgetLabel(self.param_box, "Contamination:") + gui.hSlider(self.param_box, self, "cont", minValue=0, + maxValue=100, ticks=10, labelFormat="%d %%", + callback=self.parameter_changed) + + ebox = gui.hBox(self.param_box) + gui.checkBox(ebox, self, "empirical_covariance", + "Support fraction:", callback=self.parameter_changed) + gui.doubleSpin(ebox, self, "support_fraction", step=1e-1, + minv=0.1, maxv=10, callback=self.parameter_changed) + + def get_parameters(self): + fraction = self.support_fraction if self.empirical_covariance else None + return {"contamination": self.cont / 100, + "support_fraction": fraction} + + class OWOutliers(OWWidget): name = "Outliers" description = "Detect outliers." @@ -27,15 +97,15 @@ class Outputs: outliers = Output("Outliers", Table) want_main_area = False + resizing_enabled = False OneClassSVM, Covariance = range(2) + METHODS = (OneClassSVMLearner, EllipticEnvelopeLearner) + svm_editor = SettingProvider(SVMEditor) + cov_editor = SettingProvider(CovarianceEditor) + settings_version = 2 outlier_method = Setting(OneClassSVM) - nu = Setting(50) - gamma = Setting(0.01) - cont = Setting(10) - empirical_covariance = Setting(False) - support_fraction = Setting(1) auto_commit = Setting(True) MAX_FEATURES = 1500 @@ -49,65 +119,50 @@ class Error(OWWidget.Error): def __init__(self): super().__init__() - self.data = None - self.n_inliers = self.n_outliers = None - - box = gui.vBox(self.controlArea, "Outlier Detection Method") - detection = gui.radioButtons(box, self, "outlier_method") - - gui.appendRadioButton(detection, - "One class SVM with non-linear kernel (RBF)") - ibox = gui.indentedBox(detection) - tooltip = "An upper bound on the fraction of training errors and a " \ - "lower bound of the fraction of support vectors" - gui.widgetLabel(ibox, 'Nu:', tooltip=tooltip) - self.nu_slider = gui.hSlider( - ibox, self, "nu", minValue=1, maxValue=100, ticks=10, - labelFormat="%d %%", callback=self.__svm_param_changed, - tooltip=tooltip) - self.gamma_spin = gui.spin( - ibox, self, "gamma", label="Kernel coefficient:", step=1e-2, - spinType=float, minv=0.01, maxv=10, - callback=self.__svm_param_changed) - gui.separator(detection, 12) - - self.rb_cov = gui.appendRadioButton(detection, "Covariance estimator") - ibox = gui.indentedBox(detection) - self.l_cov = gui.widgetLabel(ibox, 'Contamination:') - self.cont_slider = gui.hSlider( - ibox, self, "cont", minValue=0, maxValue=100, ticks=10, - labelFormat="%d %%", callback=self.__cov_param_changed) - - ebox = gui.hBox(ibox) - self.cb_emp_cov = gui.checkBox( - ebox, self, "empirical_covariance", - "Support fraction:", callback=self.__cov_param_changed) - self.support_fraction_spin = gui.spin( - ebox, self, "support_fraction", step=1e-1, spinType=float, - minv=0.1, maxv=10, callback=self.__cov_param_changed) - - gui.separator(detection, 12) + self.data = None # type: Table + self.n_inliers = None # type: int + self.n_outliers = None # type: int + self.editors = None # type: Tuple[ParametersEditor] + self.current_editor = None # type: ParametersEditor + self.method_combo = None # type: QComboBox + self.init_gui() + + def init_gui(self): + box = gui.vBox(self.controlArea, "Method") + self.method_combo = gui.comboBox(box, self, "outlier_method", + items=[m.name for m in self.METHODS], + callback=self.__method_changed) + + self._init_editors() gui.auto_send(self.controlArea, self, "auto_commit") - self.layout().setSizeConstraint(QLayout.SetFixedSize) self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) - def __svm_param_changed(self): - self.outlier_method = self.OneClassSVM - self.commit() + def _init_editors(self): + self.svm_editor = SVMEditor(self) + self.cov_editor = CovarianceEditor(self) + + box = gui.vBox(self.controlArea, "Parameters") + self.editors = (self.svm_editor, + self.cov_editor) + for editor in self.editors: + editor.param_changed.connect(lambda: self.commit()) + box.layout().addWidget(editor) + editor.hide() + + self.set_current_editor() - def __cov_param_changed(self): - self.outlier_method = self.Covariance + def __method_changed(self): + self.set_current_editor() self.commit() - def enable_covariance(self, enable=True): - self.rb_cov.setEnabled(enable) - self.l_cov.setEnabled(enable) - self.cont_slider.setEnabled(enable) - self.cb_emp_cov.setEnabled(enable) - self.support_fraction_spin.setEnabled(enable) + def set_current_editor(self): + if self.current_editor: + self.current_editor.hide() + self.current_editor = self.editors[self.outlier_method] + self.current_editor.show() @Inputs.data @check_sql_input @@ -119,10 +174,11 @@ def set_data(self, data): self.unconditional_commit() def enable_controls(self): - self.enable_covariance() + self.method_combo.model().item(self.Covariance).setEnabled(True) if self.data and len(self.data.domain.attributes) > self.MAX_FEATURES: self.outlier_method = self.OneClassSVM - self.enable_covariance(False) + self.set_current_editor() + self.method_combo.model().item(self.Covariance).setEnabled(False) self.Warning.disabled_cov() def _get_outliers(self): @@ -143,7 +199,6 @@ def _get_outliers(self): outliers = amended_data[outliers_ind] self.n_inliers = len(inliers) self.n_outliers = len(outliers) - return inliers, outliers def commit(self): @@ -158,15 +213,9 @@ def commit(self): self.Outputs.outliers.send(outliers) def detect_outliers(self): - if self.outlier_method == self.OneClassSVM: - learner = OneClassSVMLearner( - gamma=self.gamma, nu=self.nu / 100, - preprocessors=SklLearner.preprocessors) - else: - learner = EllipticEnvelopeLearner( - support_fraction=self.support_fraction - if self.empirical_covariance else None, - contamination=self.cont / 100.) + learner_class = self.METHODS[self.outlier_method] + kwargs = self.current_editor.get_parameters() + learner = learner_class(**kwargs) model = learner(self.data) y_pred = model(self.data) amended_data = self.amended_data(model) @@ -198,14 +247,24 @@ def send_report(self): "Detection", (("Detection method", "One class SVM with non-linear kernel (RBF)"), - ("Regularization (nu)", self.nu), - ("Kernel coefficient", self.gamma))) + ("Regularization (nu)", self.svm_editor.nu), + ("Kernel coefficient", self.svm_editor.gamma))) else: self.report_items( "Detection", (("Detection method", "Covariance estimator"), - ("Contamination", self.cont), - ("Support fraction", self.support_fraction))) + ("Contamination", self.cov_editor.cont), + ("Support fraction", self.cov_editor.support_fraction))) + + @classmethod + def migrate_settings(cls, settings, version): + if version is None or version < 2: + settings["svm_editor"] = {"nu": settings.get("nu", 50), + "gamma": settings.get("gamma", 0.01)} + ec, sf = "empirical_covariance", "support_fraction" + settings["cov_editor"] = {"cont": settings.get("cont", 10), + ec: settings.get(ec, False), + sf: settings.get(sf, 1)} if __name__ == "__main__": # pragma: no cover diff --git a/Orange/widgets/data/tests/test_owoutliers.py b/Orange/widgets/data/tests/test_owoutliers.py index 469ad319e6b..7ee0b41dce4 100644 --- a/Orange/widgets/data/tests/test_owoutliers.py +++ b/Orange/widgets/data/tests/test_owoutliers.py @@ -63,21 +63,36 @@ def test_in_out_summary(self): @patch("Orange.widgets.data.owoutliers.OWOutliers.MAX_FEATURES", 3) @patch("Orange.widgets.data.owoutliers.OWOutliers.commit", Mock()) def test_covariance_enabled(self): + cov_item = self.widget.method_combo.model().item(self.widget.Covariance) self.send_signal(self.widget.Inputs.data, self.iris) self.assertTrue(self.widget.Warning.disabled_cov.is_shown()) - self.assertFalse(self.widget.rb_cov.isEnabled()) + self.assertFalse(cov_item.isEnabled()) self.send_signal(self.widget.Inputs.data, self.iris[:, :2]) self.assertFalse(self.widget.Warning.disabled_cov.is_shown()) - self.assertTrue(self.widget.rb_cov.isEnabled()) + self.assertTrue(cov_item.isEnabled()) self.send_signal(self.widget.Inputs.data, self.iris) self.assertTrue(self.widget.Warning.disabled_cov.is_shown()) - self.assertFalse(self.widget.rb_cov.isEnabled()) + self.assertFalse(cov_item.isEnabled()) self.send_signal(self.widget.Inputs.data, None) self.assertFalse(self.widget.Warning.disabled_cov.is_shown()) - self.assertTrue(self.widget.rb_cov.isEnabled()) + self.assertTrue(cov_item.isEnabled()) + + def test_migrate_settings(self): + settings = {"cont": 20, "empirical_covariance": True, + "gamma": 0.04, "nu": 30, "outlier_method": 0, + "support_fraction": 0.5, "__version__": 1} + + widget = self.create_widget(OWOutliers, stored_settings=settings) + self.send_signal(widget.Inputs.data, self.iris) + self.assertEqual(widget.svm_editor.nu, 30) + self.assertEqual(widget.svm_editor.gamma, 0.04) + + self.assertEqual(widget.cov_editor.cont, 20) + self.assertEqual(widget.cov_editor.empirical_covariance, True) + self.assertEqual(widget.cov_editor.support_fraction, 0.5) if __name__ == "__main__": From 8d4e4e6ae7737c38e998bf733190e39bdcb05266 Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Thu, 16 Jan 2020 12:38:01 +0100 Subject: [PATCH 07/10] Outliers: Include LocalOutlierFactor and IsolationForest --- Orange/widgets/data/owoutliers.py | 108 +++++++++++++++---- Orange/widgets/data/tests/test_owoutliers.py | 13 ++- 2 files changed, 102 insertions(+), 19 deletions(-) diff --git a/Orange/widgets/data/owoutliers.py b/Orange/widgets/data/owoutliers.py index c6b343a2711..1b3ee5f43c5 100644 --- a/Orange/widgets/data/owoutliers.py +++ b/Orange/widgets/data/owoutliers.py @@ -1,11 +1,15 @@ +from typing import Dict, Tuple + import numpy as np -from AnyQt.QtCore import Signal +from AnyQt.QtCore import Signal, Qt from AnyQt.QtWidgets import QWidget, QVBoxLayout from orangewidget.settings import SettingProvider -from Orange.classification import OneClassSVMLearner, EllipticEnvelopeLearner +from Orange.base import Model +from Orange.classification import OneClassSVMLearner, EllipticEnvelopeLearner,\ + LocalOutlierFactorLearner, IsolationForestLearner from Orange.data import Table, Domain, ContinuousVariable from Orange.widgets import gui from Orange.widgets.settings import Setting @@ -30,7 +34,7 @@ def __init__(self, parent): def parameter_changed(self): self.param_changed.emit() - def get_parameters(self): + def get_parameters(self) -> Dict: raise NotImplementedError @@ -43,7 +47,7 @@ def __init__(self, parent): tooltip = "An upper bound on the fraction of training errors and a " \ "lower bound of the fraction of support vectors" - gui.widgetLabel(self.param_box, "Contamination:", tooltip=tooltip) + gui.widgetLabel(self.param_box, "Nu:", tooltip=tooltip) gui.hSlider(self.param_box, self, "nu", minValue=1, maxValue=100, ticks=10, labelFormat="%d %%", tooltip=tooltip, callback=self.parameter_changed) @@ -81,6 +85,53 @@ def get_parameters(self): "support_fraction": fraction} +class LocalOutlierFactorEditor(ParametersEditor): + METRICS = ("euclidean", "manhattan", "cosine", "jaccard", + "hamming", "minkowski") + + n_neighbors = Setting(20) + cont = Setting(10) + metric_index = Setting(0) + + def __init__(self, parent): + super().__init__(parent) + + gui.widgetLabel(self.param_box, "Contamination:") + gui.hSlider(self.param_box, self, "cont", minValue=1, + maxValue=50, ticks=5, labelFormat="%d %%", + callback=self.parameter_changed) + gui.spin(self.param_box, self, "n_neighbors", label="Neighbors:", + minv=1, maxv=100000, callback=self.parameter_changed) + gui.comboBox(self.param_box, self, "metric_index", label="Metric:", + orientation=Qt.Horizontal, + items=[m.capitalize() for m in self.METRICS], + callback=self.parameter_changed) + + def get_parameters(self): + return {"n_neighbors": self.n_neighbors, + "contamination": self.cont / 100, + "metric": self.METRICS[self.metric_index]} + + +class IsolationForestEditor(ParametersEditor): + cont = Setting(10) + replicable = Setting(False) + + def __init__(self, parent): + super().__init__(parent) + + gui.widgetLabel(self.param_box, "Contamination:") + gui.hSlider(self.param_box, self, "cont", minValue=0, + maxValue=100, ticks=10, labelFormat="%d %%", + callback=self.parameter_changed) + gui.checkBox(self.param_box, self, "replicable", + "Replicable training", callback=self.parameter_changed) + + def get_parameters(self): + return {"contamination": self.cont / 100, + "random_state": 42 if self.replicable else None} + + class OWOutliers(OWWidget): name = "Outliers" description = "Detect outliers." @@ -99,10 +150,13 @@ class Outputs: want_main_area = False resizing_enabled = False - OneClassSVM, Covariance = range(2) - METHODS = (OneClassSVMLearner, EllipticEnvelopeLearner) + OneClassSVM, Covariance, LOF, IsolationForest = range(4) + METHODS = (OneClassSVMLearner, EllipticEnvelopeLearner, + LocalOutlierFactorLearner, IsolationForestLearner) svm_editor = SettingProvider(SVMEditor) cov_editor = SettingProvider(CovarianceEditor) + lof_editor = SettingProvider(LocalOutlierFactorEditor) + isf_editor = SettingProvider(IsolationForestEditor) settings_version = 2 outlier_method = Setting(OneClassSVM) @@ -143,10 +197,12 @@ def init_gui(self): def _init_editors(self): self.svm_editor = SVMEditor(self) self.cov_editor = CovarianceEditor(self) + self.lof_editor = LocalOutlierFactorEditor(self) + self.isf_editor = IsolationForestEditor(self) box = gui.vBox(self.controlArea, "Parameters") - self.editors = (self.svm_editor, - self.cov_editor) + self.editors = (self.svm_editor, self.cov_editor, + self.lof_editor, self.isf_editor) for editor in self.editors: editor.param_changed.connect(lambda: self.commit()) box.layout().addWidget(editor) @@ -181,7 +237,7 @@ def enable_controls(self): self.method_combo.model().item(self.Covariance).setEnabled(False) self.Warning.disabled_cov() - def _get_outliers(self): + def _get_outliers(self) -> Tuple[Table, Table, Table]: self.Error.singular_cov.clear() self.Error.memory_error.clear() try: @@ -212,7 +268,7 @@ def commit(self): self.Outputs.inliers.send(inliers) self.Outputs.outliers.send(outliers) - def detect_outliers(self): + def detect_outliers(self) -> Tuple[np.ndarray, Table]: learner_class = self.METHODS[self.outlier_method] kwargs = self.current_editor.get_parameters() learner = learner_class(**kwargs) @@ -221,7 +277,7 @@ def detect_outliers(self): amended_data = self.amended_data(model) return np.array(y_pred), amended_data - def amended_data(self, model): + def amended_data(self, model: Model) -> Table: if self.outlier_method != self.Covariance: return self.data mahal = model.mahalanobis(self.data.X) @@ -242,22 +298,38 @@ def send_report(self): (("Input instances", len(self.data)), ("Inliers", self.n_inliers), ("Outliers", self.n_outliers))) - if self.outlier_method == 0: + + params = self.current_editor.get_parameters() + if self.outlier_method == self.OneClassSVM: self.report_items( "Detection", (("Detection method", "One class SVM with non-linear kernel (RBF)"), - ("Regularization (nu)", self.svm_editor.nu), - ("Kernel coefficient", self.svm_editor.gamma))) - else: + ("Regularization (nu)", params["nu"]), + ("Kernel coefficient", params["gamma"]))) + elif self.outlier_method == self.Covariance: self.report_items( "Detection", (("Detection method", "Covariance estimator"), - ("Contamination", self.cov_editor.cont), - ("Support fraction", self.cov_editor.support_fraction))) + ("Contamination", params["contamination"]), + ("Support fraction", params["support_fraction"]))) + elif self.outlier_method == self.LOF: + self.report_items( + "Detection", + (("Detection method", "Local Outlier Factor"), + ("Contamination", params["contamination"]), + ("Number of neighbors", params["n_neighbors"]), + ("Metric", params["metric"]))) + elif self.outlier_method == self.IsolationForest: + self.report_items( + "Detection", + (("Detection method", "Isolation Forest"), + ("Contamination", params["contamination"]))) + else: + raise NotImplementedError @classmethod - def migrate_settings(cls, settings, version): + def migrate_settings(cls, settings: Dict, version: int): if version is None or version < 2: settings["svm_editor"] = {"nu": settings.get("nu", 50), "gamma": settings.get("gamma", 0.01)} diff --git a/Orange/widgets/data/tests/test_owoutliers.py b/Orange/widgets/data/tests/test_owoutliers.py index 7ee0b41dce4..30309f8f7d7 100644 --- a/Orange/widgets/data/tests/test_owoutliers.py +++ b/Orange/widgets/data/tests/test_owoutliers.py @@ -8,7 +8,7 @@ from Orange.data import Table from Orange.widgets.data.owoutliers import OWOutliers -from Orange.widgets.tests.base import WidgetTest +from Orange.widgets.tests.base import WidgetTest, simulate class TestOWOutliers(WidgetTest): @@ -25,6 +25,17 @@ def test_data(self): self.send_signal(self.widget.Inputs.data, None) self.assertEqual(self.widget.data, None) self.assertIsNone(self.get_output(self.widget.Outputs.inliers)) + self.assertIsNone(self.get_output(self.widget.Outputs.outliers)) + + def test_methods(self): + def callback(): + self.widget.send_report() + self.assertIsNotNone(self.get_output(self.widget.Outputs.inliers)) + self.assertIsNotNone(self.get_output(self.widget.Outputs.outliers)) + + self.send_signal(self.widget.Inputs.data, self.iris) + simulate.combobox_run_through_all(self.widget.method_combo, + callback=callback) def test_memory_error(self): """ From 970c4137fc26af14b7d1a8136d241bb8d92d1c10 Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Thu, 16 Jan 2020 12:40:42 +0100 Subject: [PATCH 08/10] Outliers: LocalOutlierFactor as default method --- Orange/widgets/data/owoutliers.py | 4 ++-- Orange/widgets/data/tests/test_owoutliers.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Orange/widgets/data/owoutliers.py b/Orange/widgets/data/owoutliers.py index 1b3ee5f43c5..1f4ee35d371 100644 --- a/Orange/widgets/data/owoutliers.py +++ b/Orange/widgets/data/owoutliers.py @@ -159,7 +159,7 @@ class Outputs: isf_editor = SettingProvider(IsolationForestEditor) settings_version = 2 - outlier_method = Setting(OneClassSVM) + outlier_method = Setting(LOF) auto_commit = Setting(True) MAX_FEATURES = 1500 @@ -232,7 +232,7 @@ def set_data(self, data): def enable_controls(self): self.method_combo.model().item(self.Covariance).setEnabled(True) if self.data and len(self.data.domain.attributes) > self.MAX_FEATURES: - self.outlier_method = self.OneClassSVM + self.outlier_method = self.LOF self.set_current_editor() self.method_combo.model().item(self.Covariance).setEnabled(False) self.Warning.disabled_cov() diff --git a/Orange/widgets/data/tests/test_owoutliers.py b/Orange/widgets/data/tests/test_owoutliers.py index 30309f8f7d7..349c9583ebd 100644 --- a/Orange/widgets/data/tests/test_owoutliers.py +++ b/Orange/widgets/data/tests/test_owoutliers.py @@ -20,8 +20,8 @@ def test_data(self): """Check widget's data and the output with data on the input""" self.send_signal(self.widget.Inputs.data, self.iris) self.assertEqual(self.widget.data, self.iris) - self.assertEqual(len(self.get_output(self.widget.Outputs.inliers)), 76) - self.assertEqual(len(self.get_output(self.widget.Outputs.outliers)), 74) + self.assertEqual(len(self.get_output(self.widget.Outputs.inliers)), 136) + self.assertEqual(len(self.get_output(self.widget.Outputs.outliers)), 14) self.send_signal(self.widget.Inputs.data, None) self.assertEqual(self.widget.data, None) self.assertIsNone(self.get_output(self.widget.Outputs.inliers)) @@ -65,7 +65,7 @@ def test_in_out_summary(self): self.send_signal(self.widget.Inputs.data, self.iris) self.assertEqual(info._StateInfo__input_summary.brief, "150") - self.assertEqual(info._StateInfo__output_summary.brief, "76") + self.assertEqual(info._StateInfo__output_summary.brief, "136") self.send_signal(self.widget.Inputs.data, None) self.assertEqual(info._StateInfo__input_summary.brief, "") From a923f4578cb5f125aa4c2573d1f844a5d8c9c21f Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Thu, 16 Jan 2020 13:42:57 +0100 Subject: [PATCH 09/10] Outliers: Output annotated data --- Orange/widgets/data/owoutliers.py | 30 ++++++++++++++++---- Orange/widgets/data/tests/test_owoutliers.py | 3 ++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/Orange/widgets/data/owoutliers.py b/Orange/widgets/data/owoutliers.py index 1f4ee35d371..db0d244fad4 100644 --- a/Orange/widgets/data/owoutliers.py +++ b/Orange/widgets/data/owoutliers.py @@ -10,7 +10,8 @@ from Orange.base import Model from Orange.classification import OneClassSVMLearner, EllipticEnvelopeLearner,\ LocalOutlierFactorLearner, IsolationForestLearner -from Orange.data import Table, Domain, ContinuousVariable +from Orange.data import Table, Domain, ContinuousVariable, DiscreteVariable +from Orange.data.util import get_unique_names from Orange.widgets import gui from Orange.widgets.settings import Setting from Orange.widgets.utils.sql import check_sql_input @@ -146,6 +147,7 @@ class Inputs: class Outputs: inliers = Output("Inliers", Table) outliers = Output("Outliers", Table) + data = Output("Data", Table) want_main_area = False resizing_enabled = False @@ -244,10 +246,10 @@ def _get_outliers(self) -> Tuple[Table, Table, Table]: y_pred, amended_data = self.detect_outliers() except ValueError: self.Error.singular_cov() - return None, None + return None, None, None except MemoryError: self.Error.memory_error() - return None, None + return None, None, None else: inliers_ind = np.where(y_pred == 1)[0] outliers_ind = np.where(y_pred == -1)[0] @@ -255,18 +257,19 @@ def _get_outliers(self) -> Tuple[Table, Table, Table]: outliers = amended_data[outliers_ind] self.n_inliers = len(inliers) self.n_outliers = len(outliers) - return inliers, outliers + return inliers, outliers, self.annotated_data(amended_data, y_pred) def commit(self): - inliers = outliers = None + inliers = outliers = data = None self.n_inliers = self.n_outliers = None if self.data: - inliers, outliers = self._get_outliers() + inliers, outliers, data = self._get_outliers() summary = len(inliers) if inliers else self.info.NoOutput self.info.set_output_summary(summary) self.Outputs.inliers.send(inliers) self.Outputs.outliers.send(outliers) + self.Outputs.data.send(data) def detect_outliers(self) -> Tuple[np.ndarray, Table]: learner_class = self.METHODS[self.outlier_method] @@ -291,6 +294,21 @@ def amended_data(self, model: Model) -> Table: amended_data.metas = np.hstack((self.data.metas, mahal)) return amended_data + @staticmethod + def annotated_data(data: Table, labels: np.ndarray) -> Table: + domain = data.domain + names = [v.name for v in domain.variables + domain.metas] + name = get_unique_names(names, "Outlier") + + outlier_var = DiscreteVariable(name, values=["Yes", "No"]) + metas = domain.metas + (outlier_var,) + domain = Domain(domain.attributes, domain.class_vars, metas) + data = data.transform(domain) + + labels[labels == -1] = 0 + data.metas[:, -1] = labels + return data + def send_report(self): if self.n_outliers is None or self.n_inliers is None: return diff --git a/Orange/widgets/data/tests/test_owoutliers.py b/Orange/widgets/data/tests/test_owoutliers.py index 349c9583ebd..906aefd8d97 100644 --- a/Orange/widgets/data/tests/test_owoutliers.py +++ b/Orange/widgets/data/tests/test_owoutliers.py @@ -22,16 +22,19 @@ def test_data(self): self.assertEqual(self.widget.data, self.iris) self.assertEqual(len(self.get_output(self.widget.Outputs.inliers)), 136) self.assertEqual(len(self.get_output(self.widget.Outputs.outliers)), 14) + self.assertEqual(len(self.get_output(self.widget.Outputs.data)), 150) self.send_signal(self.widget.Inputs.data, None) self.assertEqual(self.widget.data, None) self.assertIsNone(self.get_output(self.widget.Outputs.inliers)) self.assertIsNone(self.get_output(self.widget.Outputs.outliers)) + self.assertIsNone(self.get_output(self.widget.Outputs.data)) def test_methods(self): def callback(): self.widget.send_report() self.assertIsNotNone(self.get_output(self.widget.Outputs.inliers)) self.assertIsNotNone(self.get_output(self.widget.Outputs.outliers)) + self.assertIsNotNone(self.get_output(self.widget.Outputs.data)) self.send_signal(self.widget.Inputs.data, self.iris) simulate.combobox_run_through_all(self.widget.method_combo, From 40f5521ea20da2cd4b7088981339a3d58b8c1720 Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Thu, 16 Jan 2020 14:57:53 +0100 Subject: [PATCH 10/10] Outliers: LocalOutlierFactor speedup --- Orange/widgets/data/owoutliers.py | 1 + Orange/widgets/data/tests/test_owoutliers.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Orange/widgets/data/owoutliers.py b/Orange/widgets/data/owoutliers.py index db0d244fad4..99bc219aaae 100644 --- a/Orange/widgets/data/owoutliers.py +++ b/Orange/widgets/data/owoutliers.py @@ -111,6 +111,7 @@ def __init__(self, parent): def get_parameters(self): return {"n_neighbors": self.n_neighbors, "contamination": self.cont / 100, + "algorithm": "brute", # works faster for big datasets "metric": self.METRICS[self.metric_index]} diff --git a/Orange/widgets/data/tests/test_owoutliers.py b/Orange/widgets/data/tests/test_owoutliers.py index 906aefd8d97..fa06dd6a976 100644 --- a/Orange/widgets/data/tests/test_owoutliers.py +++ b/Orange/widgets/data/tests/test_owoutliers.py @@ -20,8 +20,8 @@ def test_data(self): """Check widget's data and the output with data on the input""" self.send_signal(self.widget.Inputs.data, self.iris) self.assertEqual(self.widget.data, self.iris) - self.assertEqual(len(self.get_output(self.widget.Outputs.inliers)), 136) - self.assertEqual(len(self.get_output(self.widget.Outputs.outliers)), 14) + self.assertEqual(len(self.get_output(self.widget.Outputs.inliers)), 135) + self.assertEqual(len(self.get_output(self.widget.Outputs.outliers)), 15) self.assertEqual(len(self.get_output(self.widget.Outputs.data)), 150) self.send_signal(self.widget.Inputs.data, None) self.assertEqual(self.widget.data, None) @@ -68,7 +68,7 @@ def test_in_out_summary(self): self.send_signal(self.widget.Inputs.data, self.iris) self.assertEqual(info._StateInfo__input_summary.brief, "150") - self.assertEqual(info._StateInfo__output_summary.brief, "136") + self.assertEqual(info._StateInfo__output_summary.brief, "135") self.send_signal(self.widget.Inputs.data, None) self.assertEqual(info._StateInfo__input_summary.brief, "")