Skip to content

Commit

Permalink
owlinearprojection: Improve NaN handling
Browse files Browse the repository at this point in the history
Fix an error when a column contains (all/some) NaN values.
  • Loading branch information
ales-erjavec committed Jan 20, 2017
1 parent 3c05254 commit e675d5f
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 29 deletions.
77 changes: 50 additions & 27 deletions Orange/widgets/visualize/owlinearprojection.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,10 +695,12 @@ def __deactivate_selection(self):

self.varmodel_other.extend(variables)

def _get_data(self, var):
"""Return the column data for variable `var`."""
def _get_data(self, var, dtype):
"""
Return the column data and mask for variable `var`
"""
X, _ = self.data.get_column_view(var)
return X.ravel()
return column_data(self.data, var, dtype)

def _setup_plot(self, reset_view=True):
self.__replot_requested = False
Expand All @@ -708,7 +710,7 @@ def _setup_plot(self, reset_view=True):
if not variables:
return

coords = [self._get_data(var) for var in variables]
coords = [self._get_data(var, dtype=float)[0] for var in variables]
coords = numpy.vstack(coords)
p, N = coords.shape
assert N == len(self.data), p == len(variables)
Expand All @@ -721,8 +723,9 @@ def _setup_plot(self, reset_view=True):
coords = coords[:, mask]

X, Y = numpy.dot(axes, coords)
X = plotutils.normalized(X)
Y = plotutils.normalized(Y)
if X.size and Y.size:
X = plotutils.normalized(X)
Y = plotutils.normalized(Y)

pen_data, brush_data = self._color_data(mask)
size_data = self._size_data(mask)
Expand Down Expand Up @@ -773,7 +776,7 @@ def _setup_plot(self, reset_view=True):
def _color_data(self, mask=None):
color_var = self.color_var()
if color_var is not None:
color_data = self._get_data(color_var)
color_data, _ = self._get_data(color_var, dtype=float)
if color_var.is_continuous:
color_data = plotutils.continuous_colors(
color_data, None, *color_var.colors)
Expand Down Expand Up @@ -866,14 +869,12 @@ def _shape_data(self, mask):
shape_data = numpy.array(["o"] * len(self.data))
else:
assert shape_var.is_discrete
max_symbol = len(ScatterPlotItem.Symbols) - 1
shape = self._get_data(shape_var)
shape_mask = numpy.isnan(shape)
shape %= max_symbol - 1
shape[shape_mask] = max_symbol

symbols = numpy.array(list(ScatterPlotItem.Symbols))
shape_data = symbols[numpy.asarray(shape, dtype=int)]
max_symbol = symbols.size - 1
shapeidx, shape_mask = column_data(self.data, shape_var, dtype=int)
shapeidx[shape_mask] = max_symbol
shapeidx[~shape_mask] %= max_symbol -1
shape_data = symbols[shapeidx]
if mask is None:
return shape_data
else:
Expand All @@ -892,12 +893,20 @@ def _size_data(self, mask=None):
size_data = numpy.full((len(self.data),), self.point_size,
dtype=float)
else:
size_data = plotutils.normalized(self._get_data(size_var))
size_data -= numpy.nanmin(size_data)
size_mask = numpy.isnan(size_data)
nan_size = OWLinearProjection.MinPointSize - 2
size_data, size_mask = self._get_data(size_var, dtype=float)
size_data_valid = size_data[~size_mask]
if size_data_valid.size:
smin, smax = numpy.min(size_data_valid), numpy.max(size_data_valid)
sspan = smax - smin
else:
sspan = smax = smin = 0
size_data[~size_mask] -= smin
if sspan > 0:
size_data[~size_mask] /= sspan
size_data = \
size_data * self.point_size + OWLinearProjection.MinPointSize
size_data[size_mask] = OWLinearProjection.MinPointSize - 2
size_data[size_mask] = nan_size
if mask is None:
return size_data
else:
Expand Down Expand Up @@ -1541,8 +1550,25 @@ def gestureEvent(self, event):
return False


def column_data(table, var, dtype):
dtype = numpy.dtype(dtype)
col, copy = table.get_column_view(var)
if var.is_primitive() and not isinstance(col.dtype.type, numpy.inexact):
# from mixes metas domain
col = col.astype(float)
copy = True
mask = numpy.isnan(col)
if dtype != col.dtype:
col = col.astype(dtype)
copy = True

if not copy:
col = col.copy()
return col, mask


class plotutils:
@ staticmethod
@staticmethod
def continuous_colors(data, palette=None,
low=(220, 220, 220), high=(0,0,0),
through_black=False):
Expand All @@ -1552,14 +1578,7 @@ def continuous_colors(data, palette=None,
amin, amax = numpy.nanmin(data), numpy.nanmax(data)
span = amax - amin
data = (data - amin) / (span or 1)

mask = numpy.isnan(data)
# Unknown values as gray
# TODO: This should already be a part of palette
colors = numpy.empty((len(data), 3))
colors[mask] = (128, 128, 128)
colors[~mask] = [palette.getRGB(v) for v in data[~mask]]
return colors
return palette.getRGB(data)

@staticmethod
def discrete_colors(data, nvalues, palette=None, color_index=None):
Expand All @@ -1577,7 +1596,11 @@ def discrete_colors(data, nvalues, palette=None, color_index=None):

@staticmethod
def normalized(a):
if not a.size:
return a.copy()
amin, amax = numpy.nanmin(a), numpy.nanmax(a)
if numpy.isnan(amin):
return a.copy()
span = amax - amin
mean = numpy.nanmean(a)
return (a - mean) / (span or 1)
Expand Down
52 changes: 50 additions & 2 deletions Orange/widgets/visualize/tests/test_owlinearprojection.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring
import random
import numpy as np

from AnyQt.QtCore import Qt, QModelIndex, QAbstractItemModel
from AnyQt.QtWidgets import QComboBox
from AnyQt.QtTest import QTest

from Orange.data import Table
from Orange.widgets.visualize.owlinearprojection import OWLinearProjection
from Orange.widgets.tests.base import WidgetTest, WidgetOutputsTestMixin
from Orange.widgets.tests.base import WidgetTest, WidgetOutputsTestMixin, datasets
from Orange.widgets.tests.utils import EventSpy, excepthook_catch, simulate


class TestOWLinearProjection(WidgetTest, WidgetOutputsTestMixin):
Expand All @@ -17,7 +23,7 @@ def setUpClass(cls):
cls.signal_data = cls.data

def setUp(self):
self.widget = self.create_widget(OWLinearProjection)
self.widget = self.create_widget(OWLinearProjection) # type: OWLinearProjection

def _select_data(self):
random.seed(42)
Expand All @@ -28,3 +34,45 @@ def _select_data(self):
def test_no_data(self):
"""Check that the widget doesn't crash on empty data"""
self.send_signal("Data", Table(Table("iris").domain))

def test_nan_plot(self):
data = datasets.data_1()
espy = EventSpy(self.widget, OWLinearProjection.ReplotRequest)
with excepthook_catch() as exclist:
self.send_signal("Data", data)
# ensure delayed replot request is processed
if not espy.events():
assert espy.wait(1000)

cb_color = self.widget.controls.color_index
cb_size = self.widget.controls.size_index
cb_shape = self.widget.controls.shape_index
cb_jitter = self.widget.controls.jitter_value

simulate.combobox_run_through_all(cb_color)
simulate.combobox_run_through_all(cb_size)
simulate.combobox_run_through_all(cb_shape)
with excepthook_catch():
simulate.combobox_activate_index(cb_jitter, 1, delay=1)

data = data.copy()
data.X[:, 0] = np.nan
data.Y[:] = np.nan

spy = EventSpy(self.widget, OWLinearProjection.ReplotRequest)
self.send_signal("Data", data)
self.send_signal("Data Subset", data[2:3])
if not spy.events():
assert spy.wait()

with excepthook_catch():
simulate.combobox_activate_item(cb_color, "X1")

with excepthook_catch():
simulate.combobox_activate_item(cb_size, "X1")

with excepthook_catch():
simulate.combobox_activate_item(cb_shape, "D")

with excepthook_catch():
simulate.combobox_activate_index(cb_jitter, 2, delay=1)

0 comments on commit e675d5f

Please sign in to comment.