Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Distributions: change Histogram Data output #4832

Merged
merged 4 commits into from
Jun 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 43 additions & 16 deletions Orange/widgets/visualize/owdistributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from AnyQt.QtCore import Qt, QRectF, QPointF, pyqtSignal as Signal
import pyqtgraph as pg

from Orange.data import Table, DiscreteVariable, ContinuousVariable
from Orange.data import Table, DiscreteVariable, ContinuousVariable, Domain
from Orange.preprocess.discretize import decimal_binnings, time_binnings, \
short_time_units
from Orange.statistics import distribution, contingency
Expand Down Expand Up @@ -66,7 +66,7 @@ def mouseReleaseEvent(event):

class DistributionBarItem(pg.GraphicsObject):
def __init__(self, x, width, padding, freqs, colors, stacked, expanded,
tooltip, hidden):
tooltip, desc, hidden):
super().__init__()
self.x = x
self.width = width
Expand All @@ -79,6 +79,7 @@ def __init__(self, x, width, padding, freqs, colors, stacked, expanded,
self.polygon = None
self.hovered = False
self._tooltip = tooltip
self.desc = desc
self.hidden = False
self.setHidden(hidden)
self.setAcceptHoverEvents(True)
Expand Down Expand Up @@ -358,7 +359,7 @@ def __init__(self):
callback=self._on_show_probabilities_changed)
gui.checkBox(
box, self, "cumulative_distr", "Show cumulative distribution",
callback=self.replot)
callback=self._on_show_cumulative)

gui.auto_apply(self.controlArea, self, commit=self.apply)

Expand Down Expand Up @@ -461,6 +462,10 @@ def _on_cvar_changed(self):
self.replot()
self.apply()

def _on_show_cumulative(self):
self.replot()
self.apply()

def _on_bins_changed(self):
self.reset_select()
self._set_bin_width_slider_label()
Expand Down Expand Up @@ -596,10 +601,10 @@ def _call_plotting(self):
self.plot.autoRange()

def _add_bar(self, x, width, padding, freqs, colors, stacked, expanded,
tooltip, hidden=False):
tooltip, desc, hidden=False):
item = DistributionBarItem(
x, width, padding, freqs, colors, stacked, expanded, tooltip,
hidden)
desc, hidden)
self.plot.addItem(item)
self.bar_items.append(item)

Expand All @@ -609,13 +614,14 @@ def _disc_plot(self):
colors = [QColor(0, 128, 255)]
dist = distribution.get_distribution(self.data, self.var)
for i, freq in enumerate(dist):
desc = var.values[i]
tooltip = \
"<p style='white-space:pre;'>" \
f"<b>{escape(var.values[i])}</b>: {int(freq)} " \
f"<b>{escape(desc)}</b>: {int(freq)} " \
f"({100 * freq / len(self.valid_data):.2f} %) "
self._add_bar(
i - 0.5, 1, 0.1, [freq], colors,
stacked=False, expanded=False, tooltip=tooltip)
stacked=False, expanded=False, tooltip=tooltip, desc=desc)

def _disc_split_plot(self):
var = self.var
Expand All @@ -625,11 +631,13 @@ def _disc_split_plot(self):
conts = contingency.get_contingency(self.data, self.cvar, self.var)
total = len(self.data)
for i, freqs in enumerate(conts):
desc = var.values[i]
self._add_bar(
i - 0.5, 1, 0.1, freqs, gcolors,
stacked=self.stacked_columns, expanded=self.show_probs,
tooltip=self._split_tooltip(
var.values[i], np.sum(freqs), total, gvalues, freqs))
desc, np.sum(freqs), total, gvalues, freqs),
desc=desc)

def _cont_plot(self):
self._set_cont_ticks()
Expand All @@ -645,14 +653,15 @@ def _cont_plot(self):
lasti = len(y) - 1
for i, (x0, x1), freq in zip(count(), zip(x, x[1:]), y):
tot_freq += freq
desc = self.str_int(x0, x1, not i, i == lasti)
tooltip = \
"<p style='white-space:pre;'>" \
f"<b>{escape(self.str_int(x0, x1, not i, i == lasti))}</b>: " \
f"<b>{escape(desc)}</b>: " \
f"{freq} ({100 * freq / total:.2f} %)</p>"
self._add_bar(
x0, x1 - x0, 0, [tot_freq if self.cumulative_distr else freq],
colors, stacked=False, expanded=False, tooltip=tooltip,
hidden=self.hide_bars)
desc=desc, hidden=self.hide_bars)

if self.fitted_distribution:
self._plot_approximations(
Expand Down Expand Up @@ -688,13 +697,14 @@ def _cont_split_plot(self):
for i, x0, x1, freqs in zip(count(), bins, bins[1:], zip(*ys)):
tot_freqs += freqs
plotfreqs = tot_freqs.copy() if self.cumulative_distr else freqs
desc = self.str_int(x0, x1, not i, i == lasti)
self._add_bar(
x0, x1 - x0, 0 if self.stacked_columns else 0.1, plotfreqs,
gcolors, stacked=self.stacked_columns, expanded=self.show_probs,
hidden=self.hide_bars,
tooltip=self._split_tooltip(
self.str_int(x0, x1, not i, i == lasti),
np.sum(plotfreqs), total, gvalues, plotfreqs))
desc, np.sum(plotfreqs), total, gvalues, plotfreqs),
desc=desc)

if fitters:
self._plot_approximations(bins[0], bins[-1], fitters, varcolors,
Expand Down Expand Up @@ -1073,15 +1083,17 @@ def apply(self):
group_indices, values = self._get_output_indices_disc()
else:
group_indices, values = self._get_output_indices_cont()
hist_indices, hist_values = self._get_histogram_indices()
histogram_data = create_groups_table(
data, hist_indices, values=hist_values)
selected = np.nonzero(group_indices)[0]
if selected.size:
selected_data = create_groups_table(
data, group_indices,
include_unselected=False, values=values)
annotated_data = create_annotated_table(data, selected)
annotated_data = create_annotated_table(data, selected)
if self.var.is_continuous: # annotate with bins
hist_indices, hist_values = self._get_histogram_indices()
annotated_data = create_groups_table(
annotated_data, hist_indices, var_name="Bin", values=hist_values)
histogram_data = self._get_histogram_table()

summary = len(selected_data) if selected_data else self.info.NoOutput
details = format_summary_details(selected_data) if selected_data else ""
Expand Down Expand Up @@ -1116,6 +1128,21 @@ def _get_output_indices_cont(self):
self.str_int(x0, x1, not bar_idx, self._is_last_bar(bar_idx)))
return group_indices, values

def _get_histogram_table(self):
var_bin = DiscreteVariable("Bin", [bar.desc for bar in self.bar_items])
var_freq = ContinuousVariable("Count")
X = []
if self.cvar:
domain = Domain([var_bin, self.cvar, var_freq])
for i, bar in enumerate(self.bar_items):
for j, freq in enumerate(bar.freqs):
X.append([i, j, freq])
else:
domain = Domain([var_bin, var_freq])
for i, bar in enumerate(self.bar_items):
X.append([i, bar.freqs[0]])
return Table.from_numpy(domain, X)

def _get_histogram_indices(self):
group_indices = np.zeros(len(self.data), dtype=np.int32)
col = self.data.get_column_view(self.var)[0].astype(float)
Expand Down
33 changes: 18 additions & 15 deletions Orange/widgets/visualize/tests/test_owdistributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,8 @@ def test_set_data(self):
self.assertIs(widget.cvar, domain.class_var)
np.testing.assert_equal(widget.valid_data, self.iris.X[:, 0])
np.testing.assert_equal(widget.valid_group_data, self.iris.Y)
self.assertEqual(
len(self.get_output(widget.Outputs.histogram_data)), 150)
self.assertIsNone(self.get_output(widget.Outputs.annotated_data))
self.assertIsNotNone(self.get_output(widget.Outputs.histogram_data))
self.assertIsNotNone(self.get_output(widget.Outputs.annotated_data))
self.assertIsNone(self.get_output(widget.Outputs.selected_data))

# Data gone: clean up
Expand Down Expand Up @@ -108,9 +107,8 @@ def test_set_data_no_class_no_discrete(self):
self.assertIs(widget.cvar, None)
np.testing.assert_equal(widget.valid_data, self.iris.X[:, 0])
self.assertIsNone(widget.valid_group_data)
self.assertEqual(
len(self.get_output(widget.Outputs.histogram_data)), 150)
self.assertIsNone(self.get_output(widget.Outputs.annotated_data))
self.assertIsNotNone(self.get_output(widget.Outputs.histogram_data))
self.assertIsNotNone(self.get_output(widget.Outputs.annotated_data))
self.assertIsNone(self.get_output(widget.Outputs.selected_data))

def test_set_data_no_class(self):
Expand All @@ -131,9 +129,8 @@ def test_set_data_no_class(self):
self.assertIs(widget.cvar, None)
np.testing.assert_equal(widget.valid_data, self.iris.X[:, 0])
self.assertIsNone(widget.valid_group_data)
self.assertEqual(
len(self.get_output(widget.Outputs.histogram_data)), 150)
self.assertIsNone(self.get_output(widget.Outputs.annotated_data))
self.assertIsNotNone(self.get_output(widget.Outputs.histogram_data))
self.assertIsNotNone(self.get_output(widget.Outputs.annotated_data))
self.assertIsNone(self.get_output(widget.Outputs.selected_data))

def test_set_data_reg_class(self):
Expand All @@ -155,9 +152,8 @@ def test_set_data_reg_class(self):
self.assertIs(widget.cvar, None)
np.testing.assert_equal(widget.valid_data, self.iris.X[:, 0])
self.assertIsNone(widget.valid_group_data)
self.assertEqual(
len(self.get_output(widget.Outputs.histogram_data)), 150)
self.assertIsNone(self.get_output(widget.Outputs.annotated_data))
self.assertIsNotNone(self.get_output(widget.Outputs.histogram_data))
self.assertIsNotNone(self.get_output(widget.Outputs.annotated_data))
self.assertIsNone(self.get_output(widget.Outputs.selected_data))

def test_set_data_reg_class_no_discrete(self):
Expand All @@ -177,11 +173,18 @@ def test_set_data_reg_class_no_discrete(self):
self.assertIs(widget.cvar, None)
np.testing.assert_equal(widget.valid_data, self.iris.X[:, 0])
self.assertIsNone(widget.valid_group_data)
self.assertEqual(
len(self.get_output(widget.Outputs.histogram_data)), 150)
self.assertIsNone(self.get_output(widget.Outputs.annotated_data))
self.assertIsNotNone(self.get_output(widget.Outputs.histogram_data))
self.assertIsNotNone(self.get_output(widget.Outputs.annotated_data))
self.assertIsNone(self.get_output(widget.Outputs.selected_data))

def test_histogram_data(self):
widget = self.widget
self.send_signal(widget.Inputs.data, self.iris)
self._set_var(self.iris.domain["sepal length"])
self._set_cvar(self.iris.domain["iris"])
hist = self.get_output(widget.Outputs.histogram_data)
self.assertTrue(len(hist)>0 and len(hist)%3==0)

def test_switch_var(self):
"""Widget reset and recomputes when changing var"""
widget = self.widget
Expand Down