diff --git a/Orange/widgets/unsupervised/owmds.py b/Orange/widgets/unsupervised/owmds.py index f6ee0ca5f11..5034cd1fd25 100644 --- a/Orange/widgets/unsupervised/owmds.py +++ b/Orange/widgets/unsupervised/owmds.py @@ -55,11 +55,6 @@ def update_data(self, attr_x, attr_y, reset_view=True): self.plot_widget.hideAxis(axis) self.plot_widget.setAspectLocked(True, 1) - def get_size_index(self): - if self.attr_size == "Stress": - return -2 - return super().get_size_index() - def compute_sizes(self): def scale(a): dmin, dmax = np.nanmin(a), np.nanmax(a) @@ -69,17 +64,16 @@ def scale(a): return np.zeros_like(a) self.master.Information.missing_size.clear() - size_index = self.get_size_index() - if size_index == -1: + if self.attr_size is None: size_data = np.full((self.n_points,), self.point_width, dtype=float) - elif size_index == -2: + elif self.attr_size == "Stress": size_data = scale(stress(self.master.embedding, self.master.effective_matrix)) size_data = self.MinShapeSize + size_data * self.point_width else: size_data = \ self.MinShapeSize + \ - self.scaled_data[size_index, self.valid_data] * \ + self.scaled_data.get_column_view(self.attr_size)[0][self.valid_data] * \ self.point_width nans = np.isnan(size_data) if np.any(nans): @@ -270,11 +264,6 @@ def update_regression_line(self): def init_attr_values(self): domain = self.data and len(self.data) and self.data.domain or None - if domain is not None: - domain = Domain( - attributes=domain.attributes, - class_vars=domain.class_vars, - metas=tuple(a for a in domain.metas if a.is_primitive())) for model in self.models: model.set_domain(domain) self.graph.attr_color = self.data.domain.class_var if domain else None @@ -653,21 +642,12 @@ def _setup_plot(self, new=False): coords = np.vstack((emb_x, emb_y)).T data = self.data - - primitive_metas = tuple(a for a in data.domain.metas if a.is_primitive()) - keys = [k for k, a in enumerate(data.domain.metas) if a.is_primitive()] - data_metas = data.metas[:, keys].astype(float) - - attributes = self.data.domain.attributes + (self.variable_x, self.variable_y) + \ - primitive_metas + attributes = data.domain.attributes + (self.variable_x, self.variable_y) domain = Domain(attributes=attributes, - class_vars=self.data.domain.class_vars) - if data_metas is not None: - data_x = (self.data.X, coords, data_metas) - else: - data_x = (self.data.X, coords) - data = Table.from_numpy(domain, X=hstack(data_x), - Y=self.data.Y) + class_vars=data.domain.class_vars, + metas=data.domain.metas) + data = Table.from_numpy(domain, X=hstack((data.X, coords)), + Y=data.Y, metas=data.metas) subset_data = data[self._subset_mask] if self._subset_mask is not None else None self.graph.new_data(data, subset_data=subset_data, new=new) self.graph.update_data(self.variable_x, self.variable_y, True) diff --git a/Orange/widgets/utils/scaling.py b/Orange/widgets/utils/scaling.py index 71110570731..f17ddfce820 100644 --- a/Orange/widgets/utils/scaling.py +++ b/Orange/widgets/utils/scaling.py @@ -1,5 +1,8 @@ +from itertools import chain + import numpy as np +from Orange.data import Domain from Orange.statistics.basic_stats import DomainBasicStats from Orange.widgets.settings import Setting from Orange.widgets.utils import checksum @@ -12,12 +15,11 @@ class ScaleData: def _reset_data(self): self.domain = None - self.data = None - self.original_data = None # as numpy array + self.data = None # as Orange Table self.scaled_data = None # in [0, 1] self.jittered_data = None self.attr_values = {} - self.domain_data_stat = [] + self.domain_data_stat = {} self.valid_data_array = None self.attribute_flip_info = {} # dictionary with attr: 0/1 if flipped self.jitter_seed = 0 @@ -30,53 +32,58 @@ def rescale_data(self): def _compute_domain_data_stat(self): stt = self.domain_data_stat = \ - getCached(self.data, DomainBasicStats, (self.data,)) - for index in range(len(self.domain)): - attr = self.domain[index] + getCached(self.data, DomainBasicStats, (self.data, True)) + domain = self.domain + for attr in chain(domain.variables, domain.metas): if attr.is_discrete: self.attr_values[attr] = [0, len(attr.values)] elif attr.is_continuous: - self.attr_values[attr] = [stt[index].min, stt[index].max] + self.attr_values[attr] = [stt[attr].min, stt[attr].max] def _compute_scaled_data(self): data = self.data # We cache scaled_data and validArray to share them between widgets cached = getCached(data, "visualizationData") if cached: - self.original_data, self.scaled_data, self.valid_data_array = cached + self.data, self.scaled_data, self.valid_data_array = cached return Y = data.Y if data.Y.ndim == 2 else np.atleast_2d(data.Y).T - self.original_data = np.hstack((data.X, Y)).T - self.scaled_data = no_jit = self.original_data.copy() - self.valid_data_array = np.isfinite(no_jit) - for index in range(len(data.domain)): - attr = data.domain[index] + if np.any(data.metas): + all_data = (data.X, Y, data.metas) + else: + all_data = (data.X, Y) + all_data = np.hstack(all_data).T + self.scaled_data = self.data.copy() + self.valid_data_array = np.isfinite(all_data) + domain = self.domain + for attr in chain(domain.attributes, domain.class_vars, domain.metas): + c = self.scaled_data.get_column_view(attr)[0] if attr.is_discrete: - no_jit[index] *= 2 - no_jit[index] += 1 - no_jit[index] /= 2 * len(attr.values) + c += 0.5 + c /= len(attr.values) else: - dstat = self.domain_data_stat[index] - no_jit[index] -= dstat.min + dstat = self.domain_data_stat[attr] + c -= dstat.min if dstat.max != dstat.min: - no_jit[index] /= dstat.max - dstat.min + c /= dstat.max - dstat.min setCached(data, "visualizationData", - (self.original_data, self.scaled_data, self.valid_data_array)) + (self.data, self.scaled_data, self.valid_data_array)) def _compute_jittered_data(self): data = self.data self.jittered_data = self.scaled_data.copy() random = np.random.RandomState(seed=self.jitter_seed) - for index, col in enumerate(self.jittered_data): + domain = self.domain + for attr in chain(domain.variables, domain.metas): # Need to use a different seed for each feature - attr = data.domain[index] if attr.is_discrete: off = self.jitter_size / (25 * max(1, len(attr.values))) elif attr.is_continuous and self.jitter_continuous: off = self.jitter_size / 25 else: continue + col = self.jittered_data.get_column_view(attr)[0] col += random.uniform(-off, off, len(data)) # fix values outside [0, 1] col = np.absolute(col) @@ -92,8 +99,13 @@ def set_data(self, data, skip_if_same=False, no_data=False): if data is None: return - self.domain = data.domain - self.data = data + domain = data.domain + new_domain = Domain(attributes=domain.attributes, + class_vars=domain.class_vars, + metas=tuple(v for v in domain.metas if v.is_primitive())) + self.data = data.transform(new_domain) + self.data.metas = self.data.metas.astype(float) + self.domain = self.data.domain self.attribute_flip_info = {} if not no_data: self._compute_domain_data_stat() @@ -103,67 +115,73 @@ def set_data(self, data, skip_if_same=False, no_data=False): def flip_attribute(self, attr): if attr.is_discrete: return 0 - index = self.domain.index(attr) self.attribute_flip_info[attr] = 1 - self.attribute_flip_info.get(attr, 0) if attr.is_continuous: self.attr_values[attr] = [-self.attr_values[attr][1], -self.attr_values[attr][0]] - - self.jittered_data[index] = 1 - self.jittered_data[index] - self.scaled_data[index] = 1 - self.scaled_data[index] + col = self.jittered_data.get_column_view(attr)[0] + col *= -1 + col += 1 + col = self.scaled_data.get_column_view(attr)[0] + col *= -1 + col += 1 return 1 - def get_valid_list(self, indices): + def get_valid_list(self, attrs): """ Get array of 0 and 1 of len = len(self.data). If there is a missing value at any attribute in indices return 0 for that instance. """ if self.valid_data_array is None or len(self.valid_data_array) == 0: return np.array([], np.bool) + domain = self.domain + indices = [] + for index, attr in enumerate(chain(domain.variables, domain.metas)): + if attr in attrs: + indices.append(index) return np.all(self.valid_data_array[indices], axis=0) - def get_valid_indices(self, indices): + def get_valid_indices(self, attrs): """ Get array with numbers that represent the instance indices that have a valid data value. """ - valid_list = self.get_valid_list(indices) + valid_list = self.get_valid_list(attrs) return np.nonzero(valid_list)[0] class ScaleScatterPlotData(ScaleData): - def get_xy_data_positions(self, xattr, yattr, filter_valid=False, + def get_xy_data_positions(self, attr_x, attr_y, filter_valid=False, copy=True): """ Create x-y projection of attributes in attrlist. """ - xattr_index = self.domain.index(xattr) - yattr_index = self.domain.index(yattr) + jit = self.jittered_data if filter_valid is True: - filter_valid = self.get_valid_list([xattr_index, yattr_index]) + filter_valid = self.get_valid_list([attr_x, attr_y]) if isinstance(filter_valid, np.ndarray): - xdata = self.jittered_data[xattr_index, filter_valid] - ydata = self.jittered_data[yattr_index, filter_valid] + data_x = jit.get_column_view(attr_x)[0][filter_valid] + data_y = jit.get_column_view(attr_y)[0][filter_valid] elif copy: - xdata = self.jittered_data[xattr_index].copy() - ydata = self.jittered_data[yattr_index].copy() + data_x = jit.get_column_view(attr_x)[0].copy() + data_y = jit.get_column_view(attr_y)[0].copy() else: - xdata = self.jittered_data[xattr_index] - ydata = self.jittered_data[yattr_index] + data_x = jit.get_column_view(attr_x)[0] + data_y = jit.get_column_view(attr_y)[0] - if self.domain[xattr_index].is_discrete: - xdata *= len(self.domain[xattr_index].values) - xdata -= 0.5 + if attr_x.is_discrete: + data_x *= len(attr_x.values) + data_x -= 0.5 else: - xdata *= self.attr_values[xattr][1] - self.attr_values[xattr][0] - xdata += float(self.attr_values[xattr][0]) - if self.domain[yattr_index].is_discrete: - ydata *= len(self.domain[yattr_index].values) - ydata -= 0.5 + data_x *= self.attr_values[attr_x][1] - self.attr_values[attr_x][0] + data_x += float(self.attr_values[attr_x][0]) + if attr_y.is_discrete: + data_y *= len(attr_y.values) + data_y -= 0.5 else: - ydata *= self.attr_values[yattr][1] - self.attr_values[yattr][0] - ydata += float(self.attr_values[yattr][0]) - return xdata, ydata + data_y *= self.attr_values[attr_y][1] - self.attr_values[attr_y][0] + data_y += float(self.attr_values[attr_y][0]) + return data_x, data_y getXYDataPositions = get_xy_data_positions diff --git a/Orange/widgets/visualize/owscatterplot.py b/Orange/widgets/visualize/owscatterplot.py index f3159881924..500b8c14208 100644 --- a/Orange/widgets/visualize/owscatterplot.py +++ b/Orange/widgets/visualize/owscatterplot.py @@ -47,9 +47,12 @@ def iterate_states(self, initial_state): def compute_score(self, state): graph = self.master.graph - ind12 = [graph.domain.index(self.attrs[x]) for x in state] - valid = graph.get_valid_list(ind12) - X = graph.jittered_data[ind12, :][:, valid].T + attrs = [self.attrs[x] for x in state] + valid = graph.get_valid_list(attrs) + cols = [] + for var in attrs: + cols.append(graph.jittered_data.get_column_view(var)[0][valid]) + X = np.column_stack(cols) Y = self.master.data.Y[valid] if X.shape[0] < self.minK: return @@ -66,7 +69,7 @@ def bar_length(self, score): return max(0, -score) def score_heuristic(self): - X = self.master.graph.jittered_data.T + X = self.master.graph.jittered_data.X Y = self.master.data.Y mdomain = self.master.data.domain dom = Domain([ContinuousVariable(str(i)) for i in range(X.shape[1])], @@ -139,7 +142,6 @@ def __init__(self): self.data = None # Orange.data.Table self.subset_data = None # Orange.data.Table - self.data_metas_X = None # self.data, where primitive metas are moved to X self.sql_data = None # Orange.data.sql.table.SqlTable self.attribute_selection_list = None # list of Orange.data.Variable self.__timer = QTimer(self, interval=1200) @@ -243,7 +245,6 @@ def set_data(self, data): same_domain = (self.data and data and data.domain.checksum() == self.data.domain.checksum()) self.data = data - self.data_metas_X = self.move_primitive_metas_to_X(data) if not same_domain: self.init_attr_values() @@ -295,7 +296,6 @@ def add_data(self, time=0.4): data_sample.download_data(2000, partial=True) data = Table(data_sample) self.data = Table.concatenate((self.data, data), axis=0) - self.data_metas_X = self.move_primitive_metas_to_X(self.data) self.handleNewSignals() def switch_sampling(self): @@ -304,15 +304,6 @@ def switch_sampling(self): self.add_data() self.__timer.start() - def move_primitive_metas_to_X(self, data): - if data is not None: - new_attrs = [a for a in data.domain.attributes + data.domain.metas - if a.is_primitive()] - new_metas = [m for m in data.domain.metas if not m.is_primitive()] - new_domain = Domain(new_attrs, data.domain.class_vars, new_metas) - data = data.transform(new_domain) - return data - @Inputs.data_subset def set_subset_data(self, subset_data): self.warning() @@ -322,12 +313,11 @@ def set_subset_data(self, subset_data): else: self.warning("Data subset does not support large Sql tables") subset_data = None - self.subset_data = self.move_primitive_metas_to_X(subset_data) self.controls.graph.alpha_value.setEnabled(subset_data is None) # called when all signals are received, so the graph is updated only once def handleNewSignals(self): - self.graph.new_data(self.data_metas_X, self.subset_data) + self.graph.new_data(self.data, self.subset_data) if self.attribute_selection_list and self.graph.domain and \ all(attr in self.graph.domain for attr in self.attribute_selection_list): diff --git a/Orange/widgets/visualize/owscatterplotgraph.py b/Orange/widgets/visualize/owscatterplotgraph.py index 5658636e6c5..b482983db5d 100644 --- a/Orange/widgets/visualize/owscatterplotgraph.py +++ b/Orange/widgets/visualize/owscatterplotgraph.py @@ -501,8 +501,6 @@ def __init__(self, scatter_widget, parent=None, _="None", view_box=InteractiveVi scene = self.plot_widget.scene() self._create_drag_tooltip(scene) self._data = None # Original Table as passed from widget to new_data before transformations - self.attr_x = None - self.attr_y = None self.replot = self.plot_widget.replot ScaleScatterPlotData.__init__(self) @@ -616,8 +614,8 @@ def sparse_to_dense(self): if data is None or not data.is_sparse(): return data - attrs = {self.attr_x, - self.attr_y, + attrs = {self.shown_x, + self.shown_y, self.attr_color, self.attr_shape, self.attr_size, @@ -655,12 +653,6 @@ def _clear_plot_widget(self): self.set_axis_title("left", "") def update_data(self, attr_x, attr_y, reset_view=True): - self.attr_x = attr_x - self.attr_y = attr_y - if attr_x not in self.data.domain or attr_y not in self.data.domain: - data = self.sparse_to_dense() - self.set_data(data) - self.master.Warning.missing_coords.clear() self.master.Information.missing_coords.clear() self._clear_plot_widget() @@ -672,13 +664,16 @@ def update_data(self, attr_x, attr_y, reset_view=True): yaxis.textWidth = 30 self.shown_x, self.shown_y = attr_x, attr_y + if attr_x not in self.data.domain or attr_y not in self.data.domain: + data = self.sparse_to_dense() + self.set_data(data) if self.jittered_data is None or not len(self.jittered_data): self.valid_data = None else: index_x = self.domain.index(attr_x) index_y = self.domain.index(attr_y) - self.valid_data = self.get_valid_list([index_x, index_y]) + self.valid_data = self.get_valid_list([attr_x, attr_y]) if not np.any(self.valid_data): self.valid_data = None if self.valid_data is None: @@ -724,7 +719,7 @@ def update_data(self, attr_x, attr_y, reset_view=True): self.plot_widget.addItem(self.density_img) self.data_indices = np.flatnonzero(self.valid_data) - if len(self.data_indices) != self.original_data.shape[1]: + if len(self.data_indices) != len(self.data): self.master.Information.missing_coords( self.shown_x.name, self.shown_y.name) @@ -799,14 +794,13 @@ def get_size_index(self): def compute_sizes(self): self.master.Information.missing_size.clear() - size_index = self.get_size_index() - if size_index == -1: + if self.attr_size is None: size_data = np.full((self.n_points,), self.point_width, dtype=float) else: size_data = \ self.MinShapeSize + \ - self.scaled_data[size_index, self.valid_data] * \ + self.scaled_data.get_column_view(self.attr_size)[0][self.valid_data] * \ self.point_width nans = np.isnan(size_data) if np.any(nans): @@ -824,16 +818,16 @@ def update_point_size(self): self.scatterplot_item.setSize(size_data) self.scatterplot_item_sel.setSize(size_data + SELECTION_WIDTH) - def get_color_index(self): + def get_color(self): if self.attr_color is None: - return -1 + return None colors = self.attr_color.colors if self.attr_color.is_discrete: self.discrete_palette = ColorPaletteGenerator( number_of_colors=len(colors), rgb_colors=colors) else: self.continuous_palette = ContinuousPaletteGenerator(*colors) - return self.domain.index(self.attr_color) + return self.attr_color def compute_colors_sel(self, keep_colors=False): if not keep_colors: @@ -868,7 +862,7 @@ def make_pen(color, width): def compute_colors(self, keep_colors=False): if not keep_colors: self.pen_colors = self.brush_colors = None - color_index = self.get_color_index() + self.get_color() def make_pen(color, width): p = QPen(color, width) @@ -880,7 +874,7 @@ def make_pen(color, width): subset = np.array([ex.id in self.subset_indices for ex in self.data[self.valid_data]]) - if color_index == -1: # same color + if self.attr_color is None: # same color color = self.plot_widget.palette().color(OWPalette.Data) pen = [make_pen(color, 1.5)] * self.n_points if subset is not None: @@ -892,8 +886,8 @@ def make_pen(color, width): * self.n_points return pen, brush - c_data = self.original_data[color_index, self.valid_data] - if self.domain[color_index].is_continuous: + c_data = self.data.get_column_view(self.attr_color)[0][self.valid_data] + if self.attr_color.is_continuous: if self.pen_colors is None: self.scale = DiscretizedScale(np.nanmin(c_data), np.nanmax(c_data)) c_data -= self.scale.offset @@ -1004,19 +998,18 @@ def update_labels(self): for label, text in zip(self.labels, label_data): label.setText(text, black) - def get_shape_index(self): + def get_shape(self): if self.attr_shape is None or \ len(self.attr_shape.values) > len(self.CurveSymbols): - return -1 - return self.domain.index(self.attr_shape) + return None + return self.attr_shape def compute_symbols(self): self.master.Information.missing_shape.clear() - shape_index = self.get_shape_index() - if shape_index == -1: + if self.get_shape() is None: shape_data = self.CurveSymbols[np.zeros(self.n_points, dtype=int)] else: - shape_data = self.original_data[shape_index, self.valid_data] + shape_data = self.data.get_column_view(self.attr_shape)[0][self.valid_data] nans = np.isnan(shape_data) if np.any(nans): shape_data[nans] = len(self.CurveSymbols) - 1 @@ -1068,16 +1061,14 @@ def make_legend(self): self.update_legend() def make_color_legend(self): - color_index = self.get_color_index() - if color_index == -1: + if self.attr_color is None: return - color_var = self.domain[color_index] - use_shape = self.get_shape_index() == color_index - if color_var.is_discrete: + use_shape = self.get_shape() == self.get_color() + if self.attr_color.is_discrete: if not self.legend: self.create_legend() palette = self.discrete_palette - for i, value in enumerate(color_var.values): + for i, value in enumerate(self.attr_color.values): color = QColor(*palette.getRGB(i)) brush = color.lighter(self.DarkerValue) self.legend.addItem( @@ -1095,15 +1086,14 @@ def make_color_legend(self): legend.setGeometry(label.boundingRect()) def make_shape_legend(self): - shape_index = self.get_shape_index() - if shape_index == -1 or shape_index == self.get_color_index(): + shape = self.get_shape() + if shape is None or shape == self.get_color(): return if not self.legend: self.create_legend() - shape_var = self.domain[shape_index] color = QColor(0, 0, 0) color.setAlpha(self.alpha_value) - for i, value in enumerate(shape_var.values): + for i, value in enumerate(self.attr_shape.values): self.legend.addItem( ScatterPlotItem(pen=color, brush=color, size=10, symbol=self.CurveSymbols[i]), escape(value)) diff --git a/Orange/widgets/visualize/tests/test_owscatterplot.py b/Orange/widgets/visualize/tests/test_owscatterplot.py index 2914a7391a4..83e6ffa60cd 100644 --- a/Orange/widgets/visualize/tests/test_owscatterplot.py +++ b/Orange/widgets/visualize/tests/test_owscatterplot.py @@ -459,6 +459,25 @@ def test_color_is_optional(self): self.assertEqual(attr_y.currentText(), breathes.name) self.assertEqual(attr_color.currentText(), type.name) + def test_handle_metas(self): + """ + Scatter Plot Graph can handle metas + GH-2699 + """ + w = self.widget + data = Table("iris") + domain = Domain( + attributes=data.domain.attributes[:2], + class_vars=data.domain.class_vars, + metas=data.domain.attributes[2:] + ) + data = data.transform(domain) + # Sometimes floats in metas are saved as objects + data.metas = data.metas.astype(object) + self.send_signal(w.Inputs.data, data) + simulate.combobox_activate_item(w.cb_attr_x, data.domain.metas[1].name) + simulate.combobox_activate_item(w.controls.graph.attr_color, data.domain.metas[0].name) + w.update_graph() if __name__ == "__main__": import unittest