diff --git a/Orange/widgets/evaluate/owliftcurve.py b/Orange/widgets/evaluate/owliftcurve.py index 43f8fb9407c..a181984e1f6 100644 --- a/Orange/widgets/evaluate/owliftcurve.py +++ b/Orange/widgets/evaluate/owliftcurve.py @@ -1,12 +1,7 @@ -""" -Lift Curve Widget ------------------ - -""" -from collections import namedtuple +from enum import IntEnum +from typing import NamedTuple, Dict, Tuple import numpy as np -import sklearn.metrics as skl_metrics from AnyQt.QtWidgets import QListView, QFrame from AnyQt.QtGui import QColor, QPen, QPalette, QFont @@ -14,6 +9,8 @@ import pyqtgraph as pg +from orangewidget.widget import Msg + import Orange from Orange.widgets import widget, gui, settings from Orange.widgets.evaluate.contexthandlers import \ @@ -26,33 +23,24 @@ from Orange.widgets import report -CurvePoints = namedtuple( - "CurvePoints", - ["cases", "tpr", "thresholds"] +CurveData = NamedTuple( + "CurveData", + [("contacted", np.ndarray), # classified as positive + ("respondents", np.ndarray), # true positive rate + ("thresholds", np.ndarray)] ) -CurvePoints.is_valid = property(lambda self: self.cases.size > 0) - -LiftCurve = namedtuple( - "LiftCurve", - ["points", "hull"] -) -LiftCurve.is_valid = property(lambda self: self.points.is_valid) - +CurveData.is_valid = property(lambda self: self.contacted.size > 0) -def liftCurve_from_results(results, clf_index, target): - x, y, thresholds = lift_curve_from_results(results, target, clf_index) - points = CurvePoints(x, y, thresholds) - hull = CurvePoints(*convex_hull([(x, y, thresholds)])) - return LiftCurve(points, hull) +PointsAndHull = NamedTuple( + "PointsAndHull", + [("points", CurveData), + ("hull", CurveData)] +) -PlotCurve = namedtuple( - "PlotCurve", - ["curve", - "curve_item", - "hull_item"] -) +class CurveTypes(IntEnum): + LiftCurve, CumulativeGains = range(2) class OWLiftCurve(widget.OWWidget): @@ -61,49 +49,63 @@ class OWLiftCurve(widget.OWWidget): "from the evaluation of classifiers." icon = "icons/LiftCurve.svg" priority = 1020 - keywords = [] + keywords = ["lift", "cumulative gain"] class Inputs: - evaluation_results = Input("Evaluation Results", Orange.evaluation.Results) + evaluation_results = Input( + "Evaluation Results", Orange.evaluation.Results) + + class Warning(widget.OWWidget.Warning): + undefined_curves = Msg( + "Some curves are undefined; check models and data") + + class Error(widget.OWWidget.Error): + undefined_curves = Msg( + "No defined curves; check models and data") settingsHandler = EvaluationResultsContextHandler() target_index = settings.ContextSetting(0) selected_classifiers = settings.ContextSetting([]) - display_convex_hull = settings.Setting(False) - display_cost_func = settings.Setting(True) - - fp_cost = settings.Setting(500) - fn_cost = settings.Setting(500) - target_prior = settings.Setting(50.0) + display_convex_hull = settings.Setting(True) + curve_type = settings.Setting(CurveTypes.LiftCurve) graph_name = "plot" + YLabels = ("Lift", "TP Rate") + def __init__(self): super().__init__() self.results = None self.classifier_names = [] self.colors = [] - self._curve_data = {} - - box = gui.vBox(self.controlArea, "Plot") - tbox = gui.vBox(box, "Target Class") - tbox.setFlat(True) + self._points_hull: Dict[Tuple[int, int], PointsAndHull] = {} + box = gui.vBox(self.controlArea, box="Curve") self.target_cb = gui.comboBox( - tbox, self, "target_index", callback=self._on_target_changed, - contentsLength=8, searchable=True) + box, self, "target_index", + label="Target: ", orientation=Qt.Horizontal, + callback=self._on_target_changed, + contentsLength=8, searchable=True + ) + gui.radioButtons( + box, self, "curve_type", ("Lift Curve", "Cumulative Gains"), + callback=self._on_curve_type_changed + ) - cbox = gui.vBox(box, "Classifiers") - cbox.setFlat(True) self.classifiers_list_box = gui.listBox( - cbox, self, "selected_classifiers", "classifier_names", + self.controlArea, self, "selected_classifiers", "classifier_names", + box="Models", selectionMode=QListView.MultiSelection, - callback=self._on_classifiers_changed) + callback=self._on_classifiers_changed + ) + self.classifiers_list_box.setMaximumHeight(100) + + gui.checkBox(self.controlArea, self, "display_convex_hull", + "Show convex hull", box="Settings", callback=self._replot) - gui.checkBox(box, self, "display_convex_hull", - "Show lift convex hull", callback=self._replot) + gui.rubber(self.controlArea) self.plotview = pg.GraphicsView(background="w") self.plotview.setFrameStyle(QFrame.StyledPanel) @@ -117,25 +119,20 @@ def __init__(self): tickfont = QFont(self.font()) tickfont.setPixelSize(max(int(tickfont.pixelSize() * 2 // 3), 11)) - axis = self.plot.getAxis("bottom") - axis.setTickFont(tickfont) - axis.setPen(pen) - axis.setLabel("P Rate") - - axis = self.plot.getAxis("left") - axis.setTickFont(tickfont) - axis.setPen(pen) - axis.setLabel("TP Rate") + for pos, label in (("bottom", "P Rate"), ("left", "")): + axis = self.plot.getAxis(pos) + axis.setTickFont(tickfont) + axis.setPen(pen) + axis.setLabel(label) + self._set_left_label() self.plot.showGrid(True, True, alpha=0.1) - self.plot.setRange(xRange=(0.0, 1.0), yRange=(0.0, 1.0), padding=0.05) self.plotview.setCentralItem(self.plot) self.mainArea.layout().addWidget(self.plotview) @Inputs.evaluation_results def set_results(self, results): - """Set the input evaluation results.""" self.closeContext() self.clear() self.results = check_results_adequacy(results, self.Error) @@ -146,91 +143,98 @@ def set_results(self, results): self._setup_plot() def clear(self): - """Clear the widget state.""" self.plot.clear() + self.Warning.clear() + self.Error.clear() self.results = None self.target_cb.clear() self.classifier_names = [] self.colors = [] - self._curve_data = {} + self._points_hull = {} def _initialize(self, results): - N = len(results.predicted) - - names = getattr(results, "learner_names", None) - if names is None: - names = ["#{}".format(i + 1) for i in range(N)] + n_models = len(results.predicted) - self.colors = colorpalettes.get_default_curve_colors(N) + self.classifier_names = getattr(results, "learner_names", None) \ + or [f"#{i}" for i in range(n_models)] + self.selected_classifiers = list(range(n_models)) - self.classifier_names = names - self.selected_classifiers = list(range(N)) - for i in range(N): + self.colors = colorpalettes.get_default_curve_colors(n_models) + for i, color in enumerate(self.colors): item = self.classifiers_list_box.item(i) - item.setIcon(colorpalettes.ColorIcon(self.colors[i])) - - self.target_cb.addItems(results.data.domain.class_var.values) - self.target_index = 0 - - def plot_curves(self, target, clf_idx): - if (target, clf_idx) not in self._curve_data: - curve = liftCurve_from_results(self.results, clf_idx, target) - color = self.colors[clf_idx] - pen = QPen(color, 1) - pen.setCosmetic(True) - shadow_pen = QPen(pen.color().lighter(160), 2.5) - shadow_pen.setCosmetic(True) - item = pg.PlotDataItem( - curve.points[0], curve.points[1], - pen=pen, shadowPen=shadow_pen, - symbol="+", symbolSize=3, symbolPen=shadow_pen, - antialias=True - ) - hull_item = pg.PlotDataItem( - curve.hull[0], curve.hull[1], - pen=pen, antialias=True - ) - self._curve_data[target, clf_idx] = \ - PlotCurve(curve, item, hull_item) - - return self._curve_data[target, clf_idx] - - def _setup_plot(self): - target = self.target_index - selected = self.selected_classifiers - curves = [self.plot_curves(target, clf_idx) for clf_idx in selected] - - for curve in curves: - self.plot.addItem(curve.curve_item) - - if self.display_convex_hull: - hull = convex_hull([c.curve.hull for c in curves]) - self.plot.plot(hull[0], hull[1], pen="y", antialias=True) + item.setIcon(colorpalettes.ColorIcon(color)) - pen = QPen(QColor(100, 100, 100, 100), 1, Qt.DashLine) - pen.setCosmetic(True) - self.plot.plot([0, 1], [0, 1], pen=pen, antialias=True) - - warning = "" - if not all(c.curve.is_valid for c in curves): - if any(c.curve.is_valid for c in curves): - warning = "Some lift curves are undefined" - else: - warning = "All lift curves are undefined" - - self.warning(warning) + class_values = results.data.domain.class_var.values + self.target_cb.addItems(class_values) + if class_values: + self.target_index = 0 def _replot(self): self.plot.clear() if self.results is not None: self._setup_plot() - def _on_target_changed(self): - self._replot() + _on_target_changed = _replot + _on_classifiers_changed = _replot - def _on_classifiers_changed(self): + def _on_curve_type_changed(self): + self._set_left_label() self._replot() + def _set_left_label(self): + self.plot.getAxis("left").setLabel(self.YLabels[self.curve_type]) + + def _setup_plot(self): + self._plot_default_line() + is_valid = [ + self._plot_curve(self.target_index, clf_idx) + for clf_idx in self.selected_classifiers + ] + self.plot.autoRange() + self._set_undefined_curves_err_warn(is_valid) + + def _plot_curve(self, target, clf_idx): + key = (target, clf_idx) + if key not in self._points_hull: + self._points_hull[key] = \ + points_from_results(self.results, target, clf_idx) + points, hull = self._points_hull[key] + + if not points.is_valid: + return False + + color = self.colors[clf_idx] + pen = QPen(color, 1) + pen.setCosmetic(True) + wide_pen = QPen(color, 3) + wide_pen.setCosmetic(True) + + def _plot(points, pen): + contacted, respondents, _ = points + if self.curve_type == CurveTypes.LiftCurve: + respondents = respondents / contacted + self.plot.plot(contacted, respondents, pen=pen, antialias=True) + + _plot(points, wide_pen if not self.display_convex_hull else pen) + if self.display_convex_hull: + _plot(hull, wide_pen) + return True + + def _plot_default_line(self): + pen = QPen(QColor(20, 20, 20), 1, Qt.DashLine) + pen.setCosmetic(True) + y0 = 1 if self.curve_type == CurveTypes.LiftCurve else 0 + self.plot.plot([0, 1], [y0, 1], pen=pen, antialias=True) + + def _set_undefined_curves_err_warn(self, is_valid): + self.Error.undefined_curves.clear() + self.Warning.undefined_curves.clear() + if not all(is_valid): + if any(is_valid): + self.Warning.undefined_curves() + else: + self.Error.undefined_curves() + def send_report(self): if self.results is None: return @@ -241,24 +245,40 @@ def send_report(self): self.report_caption(caption) -def lift_curve_from_results(results, target, clf_idx, subset=slice(0, -1)): - actual = results.actual[subset] - scores = results.probabilities[clf_idx][subset][:, target] - yrate, tpr, thresholds = lift_curve(actual, scores, target) +def points_from_results(results, target, clf_index): + x, y, thresholds = cumulative_gains_from_results(results, target, clf_index) + points = CurveData(x, y, thresholds) + hull = CurveData(*convex_hull([(x, y, thresholds)])) + return PointsAndHull(points, hull) + + +def cumulative_gains_from_results(results, target, clf_idx): + y_true = results.actual + scores = results.probabilities[clf_idx][:, target] + yrate, tpr, thresholds = cumulative_gains(y_true, scores, target) return yrate, tpr, thresholds -def lift_curve(ytrue, ypred, target=1): - P = np.sum(ytrue == target) - N = ytrue.size - P +def cumulative_gains(y_true, y_score, target=1): + if len(y_true) != len(y_score): + raise ValueError("array dimensions don't match") + + if not y_true.size: + return np.array([], dtype=int), np.array([], dtype=int), np.array([]) + + y_true = (y_true == target) + + desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1] + y_score = y_score[desc_score_indices] + y_true = y_true[desc_score_indices] - if P == 0 or N == 0: - # Undefined TP and FP rate - return np.array([]), np.array([]), np.array([]) + distinct_value_indices = np.where(np.diff(y_score))[0] + threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1] - fpr, tpr, thresholds = skl_metrics.roc_curve(ytrue, ypred, pos_label=target) - rpp = fpr * (N / (P + N)) + tpr * (P / (P + N)) - return rpp, tpr, thresholds + respondents = np.cumsum(y_true)[threshold_idxs] + respondents = respondents / respondents[-1] + contacted = (1 + threshold_idxs) / (1 + threshold_idxs[-1]) + return contacted, respondents, y_score[threshold_idxs] if __name__ == "__main__": # pragma: no cover diff --git a/Orange/widgets/evaluate/tests/test_owliftcurve.py b/Orange/widgets/evaluate/tests/test_owliftcurve.py index 165b986e6bc..9ba1ade2ece 100644 --- a/Orange/widgets/evaluate/tests/test_owliftcurve.py +++ b/Orange/widgets/evaluate/tests/test_owliftcurve.py @@ -1,4 +1,6 @@ import copy +import unittest +from unittest.mock import Mock import numpy as np @@ -9,7 +11,8 @@ from Orange.widgets.evaluate.tests.base import EvaluateTest from Orange.widgets.tests.base import WidgetTest from Orange.widgets.tests.utils import simulate -from Orange.widgets.evaluate.owliftcurve import OWLiftCurve +from Orange.widgets.evaluate.owliftcurve import OWLiftCurve, cumulative_gains, \ + cumulative_gains_from_results from Orange.tests import test_filename @@ -53,3 +56,66 @@ def test_nan_input(self): self.assertTrue(self.widget.Error.invalid_results.is_shown()) self.send_signal(self.widget.Inputs.evaluation_results, None) self.assertFalse(self.widget.Error.invalid_results.is_shown()) + + +class UtilsTest(unittest.TestCase): + @staticmethod + def test_cumulative_gains(): + shuffle = [1, 2, 0, 3, 5, 4] + y_true = np.array([1, 1, 0, 0, 1, 0])[shuffle] + y_scores = np.array([0.9, 0.6, 0.5, 0.4, 0.4, 0.2])[shuffle] + + assert_almost_equal = np.testing.assert_almost_equal + + contacted, respondents, thresholds = cumulative_gains(y_true, y_scores) + assert_almost_equal(contacted, np.array([1, 2, 3, 5, 6]) / 6) + assert_almost_equal(thresholds, [0.9, 0.6, 0.5, 0.4, 0.2]) + assert_almost_equal(respondents, np.array([1, 2, 2, 3, 3]) / 3) + + contacted, respondents, thresholds = cumulative_gains(y_true, 1 - y_scores, target=0) + assert_almost_equal(contacted, np.array([1, 3, 4, 5, 6]) / 6) + assert_almost_equal(thresholds, [0.8, 0.6, 0.5, 0.4, 0.1]) + assert_almost_equal(respondents, np.array([1, 2, 3, 3, 3]) / 3) + + contacted, respondents, thresholds = \ + cumulative_gains(np.array([], dtype=int), np.array([])) + assert_almost_equal(contacted, []) + assert_almost_equal(respondents, []) + assert_almost_equal(thresholds, []) + + @staticmethod + def test_cumulative_gains_from_results(): + shuffle = [1, 2, 0, 3, 5, 4] + y_true = np.array([1, 1, 0, 0, 1, 0])[shuffle] + y_scores = np.array([0.9, 0.6, 0.5, 0.4, 0.4, 0.2])[shuffle] + + results = Mock() + results.actual = y_true + results.probabilities = \ + [Mock(), Mock(), np.vstack((1 - y_scores, y_scores)).T] + + assert_almost_equal = np.testing.assert_almost_equal + + contacted, respondents, thresholds = \ + cumulative_gains_from_results(results, 1, 2) + assert_almost_equal(thresholds, [0.9, 0.6, 0.5, 0.4, 0.2]) + assert_almost_equal(contacted, np.array([1, 2, 3, 5, 6]) / 6) + assert_almost_equal(respondents, np.array([1, 2, 2, 3, 3]) / 3) + + contacted, respondents, thresholds = \ + cumulative_gains_from_results(results, 0, 2) + assert_almost_equal(contacted, np.array([1, 3, 4, 5, 6]) / 6) + assert_almost_equal(thresholds, [0.8, 0.6, 0.5, 0.4, 0.1]) + assert_almost_equal(respondents, np.array([1, 2, 3, 3, 3]) / 3) + + results.actual = np.array([], dtype=int) + results.probabilities = np.empty((3, 0, 2)) + contacted, respondents, thresholds = \ + cumulative_gains(np.array([], dtype=int), np.array([])) + assert_almost_equal(contacted, []) + assert_almost_equal(respondents, []) + assert_almost_equal(thresholds, []) + + +if __name__ == "__main__": + unittest.main() diff --git a/doc/visual-programming/source/widgets/evaluate/images/LiftCurve-cumulative-gain.png b/doc/visual-programming/source/widgets/evaluate/images/LiftCurve-cumulative-gain.png new file mode 100644 index 00000000000..6450b7d7cf4 Binary files /dev/null and b/doc/visual-programming/source/widgets/evaluate/images/LiftCurve-cumulative-gain.png differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/LiftCurve-example.png b/doc/visual-programming/source/widgets/evaluate/images/LiftCurve-example.png index ea4dcaae441..ec9e02e0f4c 100644 Binary files a/doc/visual-programming/source/widgets/evaluate/images/LiftCurve-example.png and b/doc/visual-programming/source/widgets/evaluate/images/LiftCurve-example.png differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/LiftCurve-stamped.png b/doc/visual-programming/source/widgets/evaluate/images/LiftCurve-stamped.png index 5384f68db2f..10241c49489 100644 Binary files a/doc/visual-programming/source/widgets/evaluate/images/LiftCurve-stamped.png and b/doc/visual-programming/source/widgets/evaluate/images/LiftCurve-stamped.png differ diff --git a/doc/visual-programming/source/widgets/evaluate/images/LiftCurve.png b/doc/visual-programming/source/widgets/evaluate/images/LiftCurve.png index 40801016120..fd1d60d73cf 100644 Binary files a/doc/visual-programming/source/widgets/evaluate/images/LiftCurve.png and b/doc/visual-programming/source/widgets/evaluate/images/LiftCurve.png differ diff --git a/doc/visual-programming/source/widgets/evaluate/liftcurve.md b/doc/visual-programming/source/widgets/evaluate/liftcurve.md index c0f6ca4cef7..dd2bbd8e5d4 100644 --- a/doc/visual-programming/source/widgets/evaluate/liftcurve.md +++ b/doc/visual-programming/source/widgets/evaluate/liftcurve.md @@ -7,31 +7,28 @@ Measures the performance of a chosen classifier against a random classifier. - Evaluation Results: results of testing classification algorithms -The **Lift curve** shows the relation between the number of instances which were predicted positive and those that are indeed positive and thus measures the performance of a chosen classifier against a random classifier. The graph is constructed with the cumulative number of cases (in descending order of probability) on the x-axis and the cumulative number of true positives on the y-axis. Lift curve is often used in segmenting the population, e.g., plotting the number of responding customers against the number of all customers contacted. You can also determine the optimal classifier and its threshold from the graph. +The **Lift curve** shows the curves for analysing the proportion of true positive data instances in relation to the classifier's threshold or the number of instances that we classify as positive. -![](images/LiftCurve-stamped.png) +Cumulative gains chart shows the proportion of true positive instances (for example, the number of clients who accept the offer) as a function of the number of positive instances (the number of clients contacted), assuming the the instances are ordered according to the model's probability of being positive (e.g. ranking of clients). -1. Choose the desired *Target class*. The default class is chosen alphabetically. -2. If test results contain more than one classifier, the user can choose which curves she or he wants to see plotted. Click on a classifier to select or deselect the curve. -3. *Show lift convex hull* plots a convex hull over lift curves for all classifiers (yellow curve). The curve shows the optimal classifier (or combination thereof) for each desired TP/P rate. -4. Press *Save Image* if you want to save the created image to your computer in a .svg or .png format. -5. Produce a report. -6. 2-D pane with **P rate** (population) as x-axis and **TP rate** (true positives) as a y-axis. The diagonal line represents the behavior of a random classifier. Click and drag to move the pane and scroll in or out to zoom. Click on the "*A*" sign at the bottom left corner to realign the pane. +![](images/LiftCurve-cumulative-gain.png) -**Note!** The perfect classifier would have a steep slope towards 1 until all -classes are guessed correctly and then run straight along 1 on y-axis to -(1,1). +Lift curve shows the ratio between the proportion of true positive instances in the selection and the proportion of customers contacted. See [a tutorial for more details](https://medium.com/analytics-vidhya/understanding-lift-curve-b674d21e426). -Example -------- +![](images/LiftCurve-stamped.png) -At the moment, the only widget which gives the right type of the signal needed by the **Lift Curve** is [Test & Score](../evaluate/testandscore.md). +1. Choose the desired *Target class*. The default is chosen alphabetically. +2. Choose whether to observe lift curve or cumulative gains. +3. If test results contain more than one classifier, the user can choose which curves she or he wants to see plotted. Click on a classifier to select or deselect the curve. +4. *Show lift convex hull* plots a convex hull over lift curves for all classifiers (yellow curve). The curve shows the optimal classifier (or combination thereof) for each desired lift or cumulative gain. +5. Press *Save Image* to save the created image in a .svg or .png format. +6. Produce a report. +7. A plot with **lift** or **cumulative gain** vs. **positive rate**. The dashed line represents the behavior of a random classifier. -In the example below, we try to see the prediction quality for the class 'survived' on the *Titanic* dataset. We compared three different classifiers in the Test Learners widget and sent them to Lift Curve to see their performance against a random model. We see the [Tree](../model/tree.md) classifier is the best out of the three, since it best aligns with *lift convex hull*. We also see that its performance is the best for the first 30% of the population (in order of descending probability), which we can set as the threshold for optimal classification. -![](images/LiftCurve-example.png) +Example +------- -References ----------- +The widgets that provide the right type of the signal needed by the **Lift Curve** (evaluation data) are [Test & Score](../evaluate/testandscore.md) and [Predictions](../evaluate/predictions.md). -Handouts of the University of Notre Dame on Data Mining - Lift Curve. Available [here](https://www3.nd.edu/~busiforc/handouts/DataMining/Lift%20Charts.html). +In the example below, we observe the lift curve and cumulative gain for the bank marketing data, where the classification goal is to predict whether the client will accept a term deposit offer based on his age, job, education, marital status and similar data. The data set is available in the Datasets widget. We run the learning algorithms in the Test and Score widget and send the results to Lift Curve to see their performance against a random model. Of the two algorithms tested, logistic regression outperforms the naive Bayesian classifier. The curve tells us that by picking the first 20 % of clients as ranked by the model, we are going to hit four times more positive instances than by selecting a random sample with 20 % of clients. diff --git a/doc/widgets.json b/doc/widgets.json index 2dd92063534..18dcea3fa67 100644 --- a/doc/widgets.json +++ b/doc/widgets.json @@ -638,7 +638,10 @@ "doc": "visual-programming/source/widgets/evaluate/liftcurve.md", "icon": "../Orange/widgets/evaluate/icons/LiftCurve.svg", "background": "#C3F3F3", - "keywords": [] + "keywords": [ + "lift", + "cumulative gain" + ] }, { "text": "Calibration Plot",