Skip to content

Commit

Permalink
OWBoxPlot: Add sorting of groups
Browse files Browse the repository at this point in the history
  • Loading branch information
janezd committed Nov 8, 2019
1 parent f6fe789 commit 5330135
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 37 deletions.
105 changes: 78 additions & 27 deletions Orange/widgets/visualize/owboxplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from Orange.widgets import widget, gui
from Orange.widgets.settings import (Setting, DomainContextHandler,
ContextSetting)
from Orange.widgets.utils.itemmodels import DomainModel, VariableListModel
from Orange.widgets.utils.itemmodels import VariableListModel
from Orange.widgets.utils.annotated_data import (create_annotated_table,
ANNOTATED_DATA_SIGNAL_NAME)
from Orange.widgets.utils.widgetpreview import WidgetPreview
Expand Down Expand Up @@ -154,6 +154,7 @@ class Outputs:

attribute = ContextSetting(None)
order_by_importance = Setting(False)
order_grouping_by_importance = Setting(False)
group_var = ContextSetting(None)
show_annotations = Setting(True)
compare = Setting(CompareMeans)
Expand Down Expand Up @@ -213,19 +214,20 @@ def __init__(self):
# set the minimal height (see the penultimate paragraph of
# http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget)
view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
gui.separator(view.box, 6, 6)
self.cb_order = gui.checkBox(
gui.checkBox(
view.box, self, "order_by_importance",
"Order by relevance",
"Order by relevance to subgroups",
tooltip="Order by 𝜒² or ANOVA over the subgroups",
callback=self.apply_sorting)
self.group_vars = DomainModel(
placeholder="None", separators=False,
valid_types=Orange.data.DiscreteVariable)
self.group_view = view = gui.listView(
callback=self.apply_attr_sorting)
self.group_vars = VariableListModel(placeholder="None")
view = gui.listView(
self.controlArea, self, "group_var", box="Subgroups",
model=self.group_vars, callback=self.grouping_changed)
view.setEnabled(False)
gui.checkBox(
view.box, self, "order_grouping_by_importance",
"Order by relevance to variable",
tooltip="Order by 𝜒² or ANOVA over the variable values",
callback=self.apply_group_sorting)
view.setMinimumSize(QSize(30, 30))
# See the comment above
view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
Expand Down Expand Up @@ -258,7 +260,6 @@ def __init__(self):
self.sort_cb = gui.checkBox(
box, self, 'sort_freqs', "Sort by subgroup frequencies",
callback=self.display_changed)
gui.rubber(box)

gui.vBox(self.mainArea, addSpace=True)
self.box_scene = QGraphicsScene()
Expand Down Expand Up @@ -290,12 +291,20 @@ def eventFilter(self, obj, event):

return super().eventFilter(obj, event)

def reset_attrs(self, domain):
def reset_attrs(self):
domain = self.dataset.domain
self.attrs[:] = [
var for var in chain(
domain.class_vars, domain.metas, domain.attributes)
if var.is_primitive()]

def reset_groups(self):
domain = self.dataset.domain
self.group_vars[:] = [None] + [
var for var in chain(
domain.class_vars, domain.metas, domain.attributes)
if var.is_discrete]

# noinspection PyTypeChecker
@Inputs.data
def set_data(self, dataset):
Expand All @@ -309,19 +318,19 @@ def set_data(self, dataset):
self.group_var = None
self.attribute = None
if dataset:
domain = dataset.domain
self.group_vars.set_domain(domain)
self.group_view.setEnabled(len(self.group_vars) > 1)
self.reset_attrs(domain)
self.select_default_variables(domain)
self.reset_attrs()
self.reset_groups()
self.select_default_variables()
self.openContext(self.dataset)
self.grouping_changed()
self.attr_changed()
else:
self.reset_all_data()
self.commit()

def select_default_variables(self, domain):
def select_default_variables(self):
# visualize first non-class variable, group by class (if present)
domain = self.dataset.domain
if len(self.attrs) > len(domain.class_vars):
self.attribute = self.attrs[len(domain.class_vars)]
elif self.attrs:
Expand All @@ -332,7 +341,7 @@ def select_default_variables(self, domain):
else:
self.group_var = None # Reset to trigger selection via callback

def apply_sorting(self):
def apply_attr_sorting(self):
def compute_score(attr):
if attr is group_var:
return 3
Expand Down Expand Up @@ -362,8 +371,48 @@ def compute_score(attr):
include_class=True, include_metas=True) else None
self.attrs.sort(key=compute_score)
else:
self.reset_attrs(domain)
self.attribute = attribute
self.reset_attrs()
self.attribute = attribute # reset selection
self._ensure_selection_visible(self.controls.attribute)

def apply_group_sorting(self):
def compute_stat(group):
if group is attr:
return 3
if group is None:
return -1
if attr.is_continuous:
group_col = data.get_column_view(group)[0].astype(int)
groups = (attr_col[group_col == i]
for i in range(len(group.values)))
groups = (col[~np.isnan(col)] for col in groups)
groups = [group for group in groups if len(group)]
p = f_oneway(*groups)[1] if len(groups) > 1 else 2
else:
p = self._chi_square(group, attr)[1]
if math.isnan(p):
return 2
return p

data = self.dataset
if data is None:
return
attr = self.attribute
group_var = self.group_var
if self.order_grouping_by_importance:
if attr.is_continuous:
attr_col = data.get_column_view(attr)[0].astype(float)
self.group_vars.sort(key=compute_stat)
else:
self.reset_groups()
self.group_var = group_var # reset selection
self._ensure_selection_visible(self.controls.group_var)

@staticmethod
def _ensure_selection_visible(view):
selection = view.selectedIndexes()
if len(selection) == 1:
view.scrollTo(selection[0])

def _chi_square(self, group_var, attr):
# Chi-square with the given distribution into groups
Expand All @@ -380,16 +429,14 @@ def _chi_square(self, group_var, attr):
def reset_all_data(self):
self.clear_scene()
self.stat_test = ""
self.attrs.clear()
self.group_vars.set_domain(None)
self.group_view.setEnabled(False)
self.attrs[:] = []
self.group_vars[:] = [None]
self.is_continuous = False
self.update_display_box()

def grouping_changed(self):
self.cb_order.setEnabled(self.group_var is not None)
self.apply_sorting()
self.attr_changed()
self.apply_attr_sorting()
self.update_graph()

def select_box_items(self):
temp_cond = self.conditions.copy()
Expand All @@ -399,6 +446,10 @@ def select_box_items(self):
[c.conditions for c in temp_cond])

def attr_changed(self):
self.apply_group_sorting()
self.update_graph()

def update_graph(self):
self.compute_box_data()
self.update_display_box()
self.layout_changed()
Expand Down
55 changes: 45 additions & 10 deletions Orange/widgets/visualize/tests/test_owboxplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,9 @@ def test_input_data(self):
self.send_signal(self.widget.Inputs.data, None)
self.assertEqual(len(self.widget.attrs), 0)
self.assertEqual(len(self.widget.group_vars), 1)
self.assertFalse(self.widget.group_view.isEnabled())
self.assertTrue(self.widget.display_box.isHidden())
self.assertFalse(self.widget.stretching_box.isHidden())

self.send_signal(self.widget.Inputs.data, self.iris)
self.assertTrue(self.widget.group_view.isEnabled())

def test_primitive_metas(self):
new_domain = Domain(attributes=[], class_vars=[], metas=(
self.data.domain.attributes + self.data.domain.class_vars))
Expand Down Expand Up @@ -88,7 +84,6 @@ def test_input_data_missings_disc_no_group_var(self):
data.X[:, 1] = np.nan
data.domain.attributes[1].values = []
self.send_signal("Data", data)
self.widget.controls.order_by_importance.setChecked(True)
self._select_list_items(self.widget.controls.attribute)
self._select_list_items(self.widget.controls.group_var)

Expand All @@ -100,7 +95,7 @@ def test_attribute_combinations(self):
m.setCurrentIndex(group_list.model().index(i), m.ClearAndSelect)
self._select_list_items(self.widget.controls.attribute)

def test_apply_sorting(self):
def test_apply_sorting_group(self):
controls = self.widget.controls
group_list = controls.group_var
order_check = controls.order_by_importance
Expand All @@ -115,10 +110,7 @@ def select_group(i):
data = self.titanic
self.send_signal("Data", data)

select_group(0)
self.assertFalse(order_check.isEnabled())
select_group(2) # First attribute
self.assertTrue(order_check.isEnabled())

order_check.setChecked(False)
self.assertEqual(tuple(attributes),
Expand Down Expand Up @@ -150,6 +142,50 @@ def select_group(i):
'fasting blood sugar > 120',
'diameter narrowing'])

def test_apply_sorting_vars(self):
controls = self.widget.controls
attr_list = self.widget.attrs
order_check = controls.order_grouping_by_importance
groups = self.widget.group_vars

def select_attr(i):
attr_selection = controls.attribute.selectionModel()
attr_selection.setCurrentIndex(
attr_list.index(i),
attr_selection.ClearAndSelect)

data = self.titanic
self.send_signal("Data", data)

select_attr(1) # First attribute

order_check.setChecked(False)
self.assertEqual(
tuple(groups),
(None, ) + data.domain.class_vars + data.domain.attributes)
order_check.setChecked(True)
self.assertIsNone(groups[0])
self.assertEqual([x.name for x in groups[1:]],
['sex', 'survived', 'age', 'status'])
select_attr(0) # Class
self.assertIsNone(groups[0])
self.assertEqual([x.name for x in groups[1:]],
['sex', 'status', 'age', 'survived'])

data = self.heart
self.send_signal("Data", data)
select_attr(0) # Class
self.assertIsNone(groups[0])
self.assertEqual([x.name for x in groups[1:]],
['thal',
'chest pain',
'exerc ind ang',
'slope peak exc ST',
'gender',
'rest ECG',
'fasting blood sugar > 120',
'diameter narrowing'])

def test_box_order_when_missing_stats(self):
self.widget.compare = 1
# The widget can't do anything smart here, but shouldn't crash
Expand All @@ -170,7 +206,6 @@ def test_continuous_metas(self):
domain = Domain([], domain.class_var, metas)
data = Table.from_table(domain, self.iris)
self.send_signal(self.widget.Inputs.data, data)
self.widget.controls.order_by_importance.setChecked(True)

def test_label_overlap(self):
self.send_signal(self.widget.Inputs.data, self.heart)
Expand Down

0 comments on commit 5330135

Please sign in to comment.