Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Box Plot: Add box for missing group values #4292

Merged
merged 2 commits into from
Jan 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Orange/statistics/contingency.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,18 @@ def from_data(self, data, col_variable, row_variable=None):
"Fallback method for computation of contingencies is not implemented yet"
)

@property
def array_with_unknowns(self):
"""
This function returns the list of all items returned by __getitem__
with adding a row of row_unknowns together with values.
"""
# pylint: disable=unnecessary-comprehension
other_rows = [x for x in self]
ind = self.row_unknowns > 0
unknown_rows = np.vstack((self.values[ind], self.row_unknowns[ind]))
return other_rows + [unknown_rows]

def __eq__(self, other):
return (
np.array_equal(self.values, other.values) and
Expand Down
27 changes: 26 additions & 1 deletion Orange/tests/test_contingency.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,32 @@ def test_continuous_missing(self):
3., 4., 2., 1., 1., 1., 1.])
self.assertEqual(cont.unknowns, 1)

@staticmethod
def test_continuous_array_with_unknowns():
"""
Test array_with_unknowns function
"""
d = data.Table("iris")
d.Y[:50] = np.zeros(50) * float("nan")
cont = contingency.Continuous(d, "sepal width")
correct_row_unknowns = [0., 0., 1., 0., 0., 0., 0., 0., 1., 6., 5., 5.,
2., 9., 6., 2., 3., 4., 2., 1., 1., 1., 1.]
correct_row_unknowns_no_zero = [
c for c in correct_row_unknowns if c > 0]
correct_values_no_zero = [
v for v, c in zip(cont.values, correct_row_unknowns) if c > 0]

np.testing.assert_almost_equal(cont.row_unknowns, correct_row_unknowns)
arr_unknowns = cont.array_with_unknowns
np.testing.assert_almost_equal(
arr_unknowns[-1][1], correct_row_unknowns_no_zero)
np.testing.assert_almost_equal(
arr_unknowns[-1][0], correct_values_no_zero)

# check if other match to what we get with __getitem__
for v1, v2 in zip(arr_unknowns[:-1], cont):
np.testing.assert_almost_equal(v1, v2)

def test_mixedtype_metas(self):
import Orange
zoo = Orange.data.Table("zoo")
Expand Down Expand Up @@ -212,7 +238,6 @@ def _construct_sparse():
Y = np.array([[1, 2, 1, 0, 0]]).T
return data.Table.from_numpy(domain, X, Y)


def test_sparse(self):
d = self._construct_sparse()
cont = contingency.Discrete(d, 5)
Expand Down
17 changes: 9 additions & 8 deletions Orange/widgets/visualize/owboxplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,20 +494,21 @@ def compute_box_data(self):
self.dist = []
self.conts = contingency.get_contingency(
dataset, attr, self.group_var)
group_var_labels = self.group_var.values + [
f"missing '{self.group_var.name}'"]
if self.is_continuous:
stats, label_texts = [], []
for i, cont in enumerate(self.conts):
for i, cont in enumerate(self.conts.array_with_unknowns):
if np.sum(cont[1]):
stats.append(BoxData(cont, attr, i, self.group_var))
label_texts.append(self.group_var.values[i])
label_texts.append(group_var_labels[i])
self.stats = stats
self.label_txts_all = label_texts
else:
self.label_txts_all = \
[v for v, c in zip(
self.group_var.values + ["Missing values"],
self.conts.array_with_unknowns)
if np.sum(c) > 0]
self.label_txts_all = [
v for v, c in zip(
group_var_labels, self.conts.array_with_unknowns)
if np.sum(c) > 0]
else:
self.dist = distribution.get_distribution(dataset, attr)
self.conts = []
Expand Down Expand Up @@ -1071,7 +1072,7 @@ def strudel(self, dist, group_val_index=None):
cond.append(FilterDiscrete(self.group_var, [group_val_index]))
box.append(FilterGraphicsRectItem(cond, 0, -10, 1, 10))
cum = 0
values = attr.values + ["Missing values"]
values = attr.values + [f"missing '{attr.name}'"]
colors = np.vstack((attr.colors, [128, 128, 128]))
for i, v in enumerate(dist):
if v < 1e-6:
Expand Down