diff --git a/Orange/widgets/visualize/owdistributions.py b/Orange/widgets/visualize/owdistributions.py index 2f3c8d1e68c..0ce84eaf6eb 100644 --- a/Orange/widgets/visualize/owdistributions.py +++ b/Orange/widgets/visualize/owdistributions.py @@ -10,7 +10,7 @@ from AnyQt.QtCore import Qt, QRectF, QPointF, pyqtSignal as Signal import pyqtgraph as pg -from Orange.data import Table, DiscreteVariable, ContinuousVariable +from Orange.data import Table, DiscreteVariable, ContinuousVariable, Domain from Orange.preprocess.discretize import decimal_binnings, time_binnings, \ short_time_units from Orange.statistics import distribution, contingency @@ -66,7 +66,7 @@ def mouseReleaseEvent(event): class DistributionBarItem(pg.GraphicsObject): def __init__(self, x, width, padding, freqs, colors, stacked, expanded, - tooltip, hidden): + tooltip, desc, hidden): super().__init__() self.x = x self.width = width @@ -79,6 +79,7 @@ def __init__(self, x, width, padding, freqs, colors, stacked, expanded, self.polygon = None self.hovered = False self._tooltip = tooltip + self.desc = desc self.hidden = False self.setHidden(hidden) self.setAcceptHoverEvents(True) @@ -358,7 +359,7 @@ def __init__(self): callback=self._on_show_probabilities_changed) gui.checkBox( box, self, "cumulative_distr", "Show cumulative distribution", - callback=self.replot) + callback=self._on_show_cumulative) gui.auto_apply(self.controlArea, self, commit=self.apply) @@ -461,6 +462,10 @@ def _on_cvar_changed(self): self.replot() self.apply() + def _on_show_cumulative(self): + self.replot() + self.apply() + def _on_bins_changed(self): self.reset_select() self._set_bin_width_slider_label() @@ -596,10 +601,10 @@ def _call_plotting(self): self.plot.autoRange() def _add_bar(self, x, width, padding, freqs, colors, stacked, expanded, - tooltip, hidden=False): + tooltip, desc, hidden=False): item = DistributionBarItem( x, width, padding, freqs, colors, stacked, expanded, tooltip, - hidden) + desc, hidden) self.plot.addItem(item) self.bar_items.append(item) @@ -609,13 +614,14 @@ def _disc_plot(self): colors = [QColor(0, 128, 255)] dist = distribution.get_distribution(self.data, self.var) for i, freq in enumerate(dist): + desc = var.values[i] tooltip = \ "

" \ - f"{escape(var.values[i])}: {int(freq)} " \ + f"{escape(desc)}: {int(freq)} " \ f"({100 * freq / len(self.valid_data):.2f} %) " self._add_bar( i - 0.5, 1, 0.1, [freq], colors, - stacked=False, expanded=False, tooltip=tooltip) + stacked=False, expanded=False, tooltip=tooltip, desc=desc) def _disc_split_plot(self): var = self.var @@ -625,11 +631,13 @@ def _disc_split_plot(self): conts = contingency.get_contingency(self.data, self.cvar, self.var) total = len(self.data) for i, freqs in enumerate(conts): + desc = var.values[i] self._add_bar( i - 0.5, 1, 0.1, freqs, gcolors, stacked=self.stacked_columns, expanded=self.show_probs, tooltip=self._split_tooltip( - var.values[i], np.sum(freqs), total, gvalues, freqs)) + desc, np.sum(freqs), total, gvalues, freqs), + desc=desc) def _cont_plot(self): self._set_cont_ticks() @@ -645,14 +653,15 @@ def _cont_plot(self): lasti = len(y) - 1 for i, (x0, x1), freq in zip(count(), zip(x, x[1:]), y): tot_freq += freq + desc = self.str_int(x0, x1, not i, i == lasti) tooltip = \ "

" \ - f"{escape(self.str_int(x0, x1, not i, i == lasti))}: " \ + f"{escape(desc)}: " \ f"{freq} ({100 * freq / total:.2f} %)

" self._add_bar( x0, x1 - x0, 0, [tot_freq if self.cumulative_distr else freq], colors, stacked=False, expanded=False, tooltip=tooltip, - hidden=self.hide_bars) + desc=desc, hidden=self.hide_bars) if self.fitted_distribution: self._plot_approximations( @@ -688,13 +697,14 @@ def _cont_split_plot(self): for i, x0, x1, freqs in zip(count(), bins, bins[1:], zip(*ys)): tot_freqs += freqs plotfreqs = tot_freqs.copy() if self.cumulative_distr else freqs + desc = self.str_int(x0, x1, not i, i == lasti) self._add_bar( x0, x1 - x0, 0 if self.stacked_columns else 0.1, plotfreqs, gcolors, stacked=self.stacked_columns, expanded=self.show_probs, hidden=self.hide_bars, tooltip=self._split_tooltip( - self.str_int(x0, x1, not i, i == lasti), - np.sum(plotfreqs), total, gvalues, plotfreqs)) + desc, np.sum(plotfreqs), total, gvalues, plotfreqs), + desc=desc) if fitters: self._plot_approximations(bins[0], bins[-1], fitters, varcolors, @@ -1073,15 +1083,17 @@ def apply(self): group_indices, values = self._get_output_indices_disc() else: group_indices, values = self._get_output_indices_cont() - hist_indices, hist_values = self._get_histogram_indices() - histogram_data = create_groups_table( - data, hist_indices, values=hist_values) selected = np.nonzero(group_indices)[0] if selected.size: selected_data = create_groups_table( data, group_indices, include_unselected=False, values=values) - annotated_data = create_annotated_table(data, selected) + annotated_data = create_annotated_table(data, selected) + if self.var.is_continuous: # annotate with bins + hist_indices, hist_values = self._get_histogram_indices() + annotated_data = create_groups_table( + annotated_data, hist_indices, var_name="Bin", values=hist_values) + histogram_data = self._get_histogram_table() summary = len(selected_data) if selected_data else self.info.NoOutput details = format_summary_details(selected_data) if selected_data else "" @@ -1116,6 +1128,21 @@ def _get_output_indices_cont(self): self.str_int(x0, x1, not bar_idx, self._is_last_bar(bar_idx))) return group_indices, values + def _get_histogram_table(self): + var_bin = DiscreteVariable("Bin", [bar.desc for bar in self.bar_items]) + var_freq = ContinuousVariable("Count") + X = [] + if self.cvar: + domain = Domain([var_bin, self.cvar, var_freq]) + for i, bar in enumerate(self.bar_items): + for j, freq in enumerate(bar.freqs): + X.append([i, j, freq]) + else: + domain = Domain([var_bin, var_freq]) + for i, bar in enumerate(self.bar_items): + X.append([i, bar.freqs[0]]) + return Table.from_numpy(domain, X) + def _get_histogram_indices(self): group_indices = np.zeros(len(self.data), dtype=np.int32) col = self.data.get_column_view(self.var)[0].astype(float) diff --git a/Orange/widgets/visualize/tests/test_owdistributions.py b/Orange/widgets/visualize/tests/test_owdistributions.py index 318c81f6d51..3d4096732c1 100644 --- a/Orange/widgets/visualize/tests/test_owdistributions.py +++ b/Orange/widgets/visualize/tests/test_owdistributions.py @@ -70,9 +70,8 @@ def test_set_data(self): self.assertIs(widget.cvar, domain.class_var) np.testing.assert_equal(widget.valid_data, self.iris.X[:, 0]) np.testing.assert_equal(widget.valid_group_data, self.iris.Y) - self.assertEqual( - len(self.get_output(widget.Outputs.histogram_data)), 150) - self.assertIsNone(self.get_output(widget.Outputs.annotated_data)) + self.assertIsNotNone(self.get_output(widget.Outputs.histogram_data)) + self.assertIsNotNone(self.get_output(widget.Outputs.annotated_data)) self.assertIsNone(self.get_output(widget.Outputs.selected_data)) # Data gone: clean up @@ -108,9 +107,8 @@ def test_set_data_no_class_no_discrete(self): self.assertIs(widget.cvar, None) np.testing.assert_equal(widget.valid_data, self.iris.X[:, 0]) self.assertIsNone(widget.valid_group_data) - self.assertEqual( - len(self.get_output(widget.Outputs.histogram_data)), 150) - self.assertIsNone(self.get_output(widget.Outputs.annotated_data)) + self.assertIsNotNone(self.get_output(widget.Outputs.histogram_data)) + self.assertIsNotNone(self.get_output(widget.Outputs.annotated_data)) self.assertIsNone(self.get_output(widget.Outputs.selected_data)) def test_set_data_no_class(self): @@ -131,9 +129,8 @@ def test_set_data_no_class(self): self.assertIs(widget.cvar, None) np.testing.assert_equal(widget.valid_data, self.iris.X[:, 0]) self.assertIsNone(widget.valid_group_data) - self.assertEqual( - len(self.get_output(widget.Outputs.histogram_data)), 150) - self.assertIsNone(self.get_output(widget.Outputs.annotated_data)) + self.assertIsNotNone(self.get_output(widget.Outputs.histogram_data)) + self.assertIsNotNone(self.get_output(widget.Outputs.annotated_data)) self.assertIsNone(self.get_output(widget.Outputs.selected_data)) def test_set_data_reg_class(self): @@ -155,9 +152,8 @@ def test_set_data_reg_class(self): self.assertIs(widget.cvar, None) np.testing.assert_equal(widget.valid_data, self.iris.X[:, 0]) self.assertIsNone(widget.valid_group_data) - self.assertEqual( - len(self.get_output(widget.Outputs.histogram_data)), 150) - self.assertIsNone(self.get_output(widget.Outputs.annotated_data)) + self.assertIsNotNone(self.get_output(widget.Outputs.histogram_data)) + self.assertIsNotNone(self.get_output(widget.Outputs.annotated_data)) self.assertIsNone(self.get_output(widget.Outputs.selected_data)) def test_set_data_reg_class_no_discrete(self): @@ -177,11 +173,18 @@ def test_set_data_reg_class_no_discrete(self): self.assertIs(widget.cvar, None) np.testing.assert_equal(widget.valid_data, self.iris.X[:, 0]) self.assertIsNone(widget.valid_group_data) - self.assertEqual( - len(self.get_output(widget.Outputs.histogram_data)), 150) - self.assertIsNone(self.get_output(widget.Outputs.annotated_data)) + self.assertIsNotNone(self.get_output(widget.Outputs.histogram_data)) + self.assertIsNotNone(self.get_output(widget.Outputs.annotated_data)) self.assertIsNone(self.get_output(widget.Outputs.selected_data)) + def test_histogram_data(self): + widget = self.widget + self.send_signal(widget.Inputs.data, self.iris) + self._set_var(self.iris.domain["sepal length"]) + self._set_cvar(self.iris.domain["iris"]) + hist = self.get_output(widget.Outputs.histogram_data) + self.assertTrue(len(hist)>0 and len(hist)%3==0) + def test_switch_var(self): """Widget reset and recomputes when changing var""" widget = self.widget