From da1c3d237fcaf2b4fa220f5e8a081c6a551a9d57 Mon Sep 17 00:00:00 2001 From: astaric Date: Mon, 10 Nov 2014 14:51:45 +0100 Subject: [PATCH] Remove irrelevant widgets. --- Orange/widgets/__init__.py | 6 +- Orange/widgets/data/owconcatenate.py | 270 ----- Orange/widgets/data/owcontinuize.py | 423 ------- Orange/widgets/data/oweditdomain.py | 577 ---------- Orange/widgets/data/owfeatureconstructor.py | 845 -------------- Orange/widgets/data/owimpute.py | 1099 ------------------- Orange/widgets/data/owmergedata.py | 301 ----- Orange/widgets/data/owpurgedomain.py | 411 ------- Orange/widgets/data/owrank.py | 591 ---------- 9 files changed, 1 insertion(+), 4522 deletions(-) delete mode 100644 Orange/widgets/data/owconcatenate.py delete mode 100644 Orange/widgets/data/owcontinuize.py delete mode 100644 Orange/widgets/data/oweditdomain.py delete mode 100644 Orange/widgets/data/owfeatureconstructor.py delete mode 100644 Orange/widgets/data/owimpute.py delete mode 100644 Orange/widgets/data/owmergedata.py delete mode 100644 Orange/widgets/data/owpurgedomain.py delete mode 100644 Orange/widgets/data/owrank.py diff --git a/Orange/widgets/__init__.py b/Orange/widgets/__init__.py index 35f11d0cffe..d12b0e027ff 100644 --- a/Orange/widgets/__init__.py +++ b/Orange/widgets/__init__.py @@ -9,10 +9,6 @@ def widget_discovery(discovery): #from . import data dist = pkg_resources.get_distribution("Orange") pkgs = ["Orange.widgets.data", - "Orange.widgets.visualize", - "Orange.widgets.classify", - "Orange.widgets.regression", - "Orange.widgets.evaluate", - "Orange.widgets.unsupervised"] + "Orange.widgets.visualize",] for pkg in pkgs: discovery.process_category_package(pkg, distribution=dist) diff --git a/Orange/widgets/data/owconcatenate.py b/Orange/widgets/data/owconcatenate.py deleted file mode 100644 index 487630637b9..00000000000 --- a/Orange/widgets/data/owconcatenate.py +++ /dev/null @@ -1,270 +0,0 @@ -""" -Concatenate -=========== - -Concatenate (append) two or more data sets. - -""" - -from collections import OrderedDict -from functools import reduce -from itertools import chain, repeat -from operator import itemgetter - -from PyQt4 import QtGui, QtCore -from PyQt4.QtCore import Qt - -import numpy - -import Orange.data -from Orange.widgets import widget, gui, settings - - -class OWConcatenate(widget.OWWidget): - name = "Concatenate" - description = "Concatenate (append) two or more data sets." - priority = 1111 - icon = "icons/Concatenate.svg" - - inputs = [("Primary Data", Orange.data.Table, - "set_primary_data", widget.Default), - ("Additional Data", Orange.data.Table, - "set_more_data", widget.Multiple)] - outputs = [("Data", Orange.data.Table)] - - #: Domain merging operations - MergeUnion, MergeIntersection = 0, 1 - - #: Domain role of the "Source ID" attribute. - ClassRole, AttributeRole, MetaRole = 0, 1, 2 - - #: Selected domain merging operation - merge_type = settings.Setting(0) - #: Append source ID column - append_source_column = settings.Setting(False) - #: Selected "Source ID" domain role - source_column_role = settings.Setting(0) - #: User specified name for the "Source ID" attr - source_attr_name = settings.Setting("Source ID") - - want_main_area = False - - def __init__(self, parent=None): - super().__init__(parent) - - self.primary_data = None - self.more_data = OrderedDict() - - mergebox = gui.widgetBox(self.controlArea, "Domains merging") - box = gui.radioButtons( - mergebox, self, "merge_type", - callback=self._merge_type_changed) - - gui.widgetLabel( - box, self.tr("When there is no primary table, " + - "the domain should be:")) - - gui.appendRadioButton( - box, self.tr("Union of attributes appearing in all tables")) - - gui.appendRadioButton( - box, self.tr("Intersection of attributes in all tables")) - - gui.separator(box) - - label = gui.widgetLabel( - box, - self.tr("The resulting table will have class only if there " + - "is no conflict between input classes.")) - label.setWordWrap(True) - - ### - box = gui.widgetBox( - self.controlArea, self.tr("Source identification"), - addSpace=False) - - cb = gui.checkBox( - box, self, "append_source_column", - self.tr("Append data source IDs")) - - ibox = gui.indentedBox(box, sep=gui.checkButtonOffsetHint(cb)) - - form = QtGui.QFormLayout( - spacing=8, - labelAlignment=Qt.AlignLeft, - formAlignment=Qt.AlignLeft, - fieldGrowthPolicy=QtGui.QFormLayout.AllNonFixedFieldsGrow - ) - - form.addRow( - self.tr("Feature name"), - gui.lineEdit(ibox, self, "source_attr_name", valueType=str)) - - form.addRow( - self.tr("Place"), - gui.comboBox( - ibox, self, "source_column_role", - items=[self.tr("Class attribute"), - self.tr("Attribute"), - self.tr("Meta attribute")]) - ) - - ibox.layout().addLayout(form) - - cb.disables.append(ibox) - cb.makeConsistent() - - gui.button( - self.controlArea, self, self.tr("Apply Changes"), - callback=self.apply, default=True - ) - - gui.rubber(self.controlArea) - self.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed) - - def set_primary_data(self, data): - self.primary_data = data - - def set_more_data(self, data=None, id=None): - if data is None: - del self.more_data[id] - else: - self.more_data[id] = data - - def handleNewSignals(self): - self.apply() - - def apply(self): - tables = [] - if self.primary_data is not None: - tables = [self.primary_data] + list(self.more_data.values()) - domain = self.primary_data.domain - elif self.more_data: - tables = self.more_data.values() - if self.merge_type == OWConcatenate.MergeUnion: - domain = reduce(domain_union, - (table.domain for table in tables)) - else: - domain = reduce(domain_intersection, - (table.domain for table in tables)) - - tables = [Orange.data.Table.from_table(domain, table) - for table in tables] - - if tables: - data = concat(tables) - if self.append_source_column: - source_var = Orange.data.DiscreteVariable( - self.source_attr_name, - values=["{}".format(i) for i in range(len(tables))] - ) - source_values = list( - chain(*(repeat(i, len(table)) - for i, table in enumerate(tables))) - ) - places = ["class_vars", "attributes", "metas"] - place = places[self.source_column_role] - - data = append_columns( - data, **{place: [(source_var, source_values)]} - ) - else: - data = None - - self.send("Data", data) - - def _merge_type_changed(self, ): - if self.primary_data is None and self.more_data: - self.apply() - - -def concat(tables): - Xs = [table.X for table in tables] - Ys = [table.Y for table in tables] - metas = [table.metas for table in tables] - - domain = tables[0].domain - - X = numpy.vstack(Xs) - Y = numpy.vstack(Ys) - metas = numpy.vstack(metas) - return Orange.data.Table.from_numpy(domain, X, Y, metas) - - -def unique(seq): - seen_set = set() - for el in seq: - if el not in seen_set: - yield el - seen_set.add(el) - - -def domain_union(A, B): - union = Orange.data.Domain( - tuple(unique(A.attributes + B.attributes)), - tuple(unique(A.class_vars + B.class_vars)), - tuple(unique(A.metas + B.metas)) - ) - return union - - -def domain_intersection(A, B): - def tuple_intersection(t1, t2): - inters = set(t1) & set(t2) - return tuple(el for el in t1 + t2 if el in inters) - - intersection = Orange.data.Domain( - tuple_intersection(A.attributes, B.attributes), - tuple_intersection(A.class_vars, B.class_vars), - tuple_intersection(A.metas, B.metas), - ) - - return intersection - - -#:: (Table, **{place: [(Variable, values)]}) -> Table -def append_columns(data, attributes=(), class_vars=(), metas=()): - domain = data.domain - new_attributes = tuple(map(itemgetter(0), attributes)) - new_class_vars = tuple(map(itemgetter(0), class_vars)) - new_metas = tuple(map(itemgetter(0), metas)) - - new_domain = Orange.data.Domain( - domain.attributes + new_attributes, - domain.class_vars + new_class_vars, - domain.metas + new_metas - ) - - def ascolumn(array): - array = numpy.asarray(array) - if array.ndim < 2: - array = array.reshape((-1, 1)) - return array - - attr_cols = [ascolumn(col) for _, col in attributes] - class_cols = [ascolumn(col) for _, col in class_vars] - metas = [ascolumn(col) for _, col in metas] - - X = numpy.hstack((data.X,) + tuple(attr_cols)) - Y = numpy.hstack((data.Y,) + tuple(class_cols)) - metas = numpy.hstack((data.metas,) + tuple(metas)) - - new_data = Orange.data.Table.from_numpy(new_domain, X, Y, metas) - return new_data - - -def main(): - app = QtGui.QApplication([]) - w = OWConcatenate() - data_a = Orange.data.Table("iris") - data_b = Orange.data.Table("zoo") - w.set_more_data(data_a, 0) - w.set_more_data(data_b, 1) - w.handleNewSignals() - w.show() - - app.exec_() - - -if __name__ == "__main__": - main() diff --git a/Orange/widgets/data/owcontinuize.py b/Orange/widgets/data/owcontinuize.py deleted file mode 100644 index 4e1961666f0..00000000000 --- a/Orange/widgets/data/owcontinuize.py +++ /dev/null @@ -1,423 +0,0 @@ -#from orngWrap import PreprocessedLearner -from PyQt4 import QtCore -from PyQt4 import QtGui - -import Orange.data -from Orange.statistics import distribution -from Orange.data.continuizer import DomainContinuizer -from Orange.data.table import Table -from Orange.widgets import gui, widget -from Orange.widgets.settings import Setting - - -class OWContinuize(widget.OWWidget): - name = "Continuize" - description = ("Turns discrete attributes into continuous and, " + - "optionally, normalizes the continuous values.") - icon = "icons/Continuize.svg" - author = "Martin Frlin" - category = "Data" - keywords = ["data", "continuize"] - - inputs = [("Data", Orange.data.Table, "setData")] - outputs = [("Data", Orange.data.Table)] - - want_main_area = False - - multinomial_treatment = Setting(0) - zero_based = Setting(1) - continuous_treatment = Setting(0) - class_treatment = Setting(0) - - transform_class = Setting(False) - - autosend = Setting(0) - - multinomial_treats = ( - ("Target or First value as base", DomainContinuizer.LowestIsBase), - ("Most frequent value as base", DomainContinuizer.FrequentIsBase), - ("One attribute per value", DomainContinuizer.NValues), - ("Ignore multinomial attributes", DomainContinuizer.IgnoreMulti), - ("Ignore all discrete attributes", DomainContinuizer.Ignore), - ("Treat as ordinal", DomainContinuizer.AsOrdinal), - ("Divide by number of values", DomainContinuizer.AsNormalizedOrdinal)) - - continuous_treats = ( - ("Leave them as they are", DomainContinuizer.Leave), - ("Normalize by span", DomainContinuizer.NormalizeBySpan), - ("Normalize by standard deviation", DomainContinuizer.NormalizeBySD)) - - class_treats = ( - ("Leave it as it is", DomainContinuizer.Leave), - ("Treat as ordinal", DomainContinuizer.AsOrdinal), - ("Divide by number of values", DomainContinuizer.AsNormalizedOrdinal), - ("One class per value", DomainContinuizer.NValues), - ) - - value_ranges = ["from -1 to 1", "from 0 to 1"] - - def __init__(self, parent=None): - widget.OWWidget.__init__(self, parent) - - self.data_changed = False - - box = gui.widgetBox(self.controlArea, "Multinomial attributes") - gui.radioButtonsInBox( - box, self, "multinomial_treatment", - btnLabels=[x[0] for x in self.multinomial_treats], - callback=self.sendDataIf) - - box = gui.widgetBox(self.controlArea, "Continuous attributes") - gui.radioButtonsInBox( - box, self, "continuous_treatment", - btnLabels=[x[0] for x in self.continuous_treats], - callback=self.sendDataIf) - - box = gui.widgetBox(self.controlArea, "Discrete class attribute") - gui.radioButtonsInBox( - box, self, "class_treatment", - btnLabels=[t[0] for t in self.class_treats], - callback=self.sendDataIf - ) - - zbbox = gui.widgetBox(self.controlArea, "Value range") - - gui.radioButtonsInBox( - zbbox, self, "zero_based", - btnLabels=self.value_ranges, - callback=self.sendDataIf) - - snbox = gui.widgetBox(self.controlArea, "Send data") - gui.button(snbox, self, "Send data", callback=self.sendData, - default=True) - - gui.checkBox(snbox, self, "autosend", "Send automatically", - callback=self.enableAuto) - - self.data = None - self.resize(150, 300) - - def setData(self, data): - self.data = data - if data is None: - self.send("Data", None) - else: - self.sendData() - - def sendDataIf(self): - self.data_changed = True - if self.autosend: - self.sendData() - - def enableAuto(self): - if self.data_changed: - self.sendData() - - def constructContinuizer(self): - conzer = DomainContinuizer( - zero_based=self.zero_based, - multinomial_treatment=self.multinomial_treats[self.multinomial_treatment][1], - continuous_treatment=self.continuous_treats[self.continuous_treatment][1], - class_treatment=self.class_treats[self.class_treatment][1] - ) - - return conzer - - # def sendPreprocessor(self): - # continuizer = self.constructContinuizer() - # self.send("Preprocessor", PreprocessedLearner( - # lambda data, weightId=0, tc=(self.targetValue if self.classTreatment else -1): - # Table(continuizer(data, weightId, tc) - # if data.domain.class_var and isinstance(self.data.domain.class_var, DiscreteVariable) - # else continuizer(data, weightId), data))) - - def sendData(self): - continuizer = self.constructContinuizer() - if self.data is not None: - domain = continuizer(self.data) - data = Table.from_table(domain, self.data) - self.send("Data", data) - else: - self.sendData("Data", None) - self.data_changed = False - - def sendReport(self): - self.reportData(self.data, "Input data") - self.reportSettings( - "Settings", - [("Multinominal attributes", - self.multinomial_treats[self.multinomial_treatment][0]), - ("Continuous attributes", - self.continuous_treats[self.continuous_treatment][0]), - ("Class", self.class_treats[self.class_tereatment][0]), - ("Value range", self.value_ranges[self.zero_based])]) - - -from Orange.feature.transformation import \ - Identity, Indicator, Indicator_1, Normalizer - -from functools import partial, wraps, reduce - - -# flip:: (a * b -> c) -> (b * a -> c) -def flip(func): - "Flip parameter order" - return wraps(func)(lambda a, b: func(b, a)) - - -is_discrete = partial(flip(isinstance), Orange.data.DiscreteVariable) -is_continuous = partial(flip(isinstance), Orange.data.ContinuousVariable) - - -class WeightedIndicator(Indicator): - def __init__(self, variable, value, weight=1.0): - super().__init__(variable, value) - self.weight = weight - - def _transform(self, c): - t = super()._transform(c) * self.weight - if self.weight != 1.0: - t *= self.weight - return t - - -class WeightedIndicator_1(Indicator_1): - def __init__(self, variable, value, weight=1.0): - super().__init__(variable, value) - self.weight = weight - - def _transform(self, c): - t = super()._transform(c) * self.weight - if self.weight != 1.0: - t *= self.weight - return t - - -def make_indicator_var(source, value_ind, weight=None, zero_based=True): - var = Orange.data.ContinuousVariable( - "{}={}".format(source.name, source.values[value_ind]) - ) - if zero_based and weight is None: - indicator = Indicator(source, value=value_ind) - elif zero_based: - indicator = WeightedIndicator(source, value=value_ind, weight=weight) - elif weight is None: - indicator = Indicator_1(source, value=value_ind) - else: - indicator = WeightedIndicator_1(source, value=value_ind, weight=weight) - var.get_value_from = indicator - return var - - -def dummy_coding(var, base_value=-1, zero_based=True): - N = len(var.values) - if base_value == -1: - base_value = var.base_value if var.base_value >= 0 else 0 - assert 0 <= base_value < len(var.values) - return [make_indicator_var(var, i, zero_based=zero_based) - for i in range(N) if i != base_value] - - -def one_hot_coding(var, zero_based=True): - N = len(var.values) - return [make_indicator_var(var, i, zero_based=zero_based) - for i in range(N)] - - -def continuize_domain(data_or_domain, - multinomial_treatment=DomainContinuizer.NValues, - continuous_treatment=DomainContinuizer.Leave, - class_treatment=DomainContinuizer.Leave, - zero_based=True): - - if isinstance(data_or_domain, Orange.data.Domain): - data, domain = None, data_or_domain - else: - data, domain = data_or_domain, data_or_domain.domain - - def needs_dist(var, mtreat, ctreat): - "Does the `var` need a distribution given specified flags" - if isinstance(var, Orange.data.DiscreteVariable): - return mtreat == DomainContinuizer.FrequentIsBase - elif isinstance(var, Orange.data.ContinuousVariable): - return ctreat != DomainContinuizer.Leave - else: - raise ValueError - - # Compute the column indices which need a distribution. - attr_needs_dist = [needs_dist(var, multinomial_treatment, - continuous_treatment) - for var in domain.attributes] - cls_needs_dist = [needs_dist(var, class_treatment, DomainContinuizer.Leave) - for var in domain.class_vars] - - columns = [i for i, needs in enumerate(attr_needs_dist + cls_needs_dist) - if needs] - - if columns: - if data is None: - raise TypeError("continuizer requires data") - dist = distribution.get_distributions_for_columns(data, columns) - else: - dist = [] - - dist_iter = iter(dist) - - newattrs = [continuize_var(var, next(dist_iter) if needs_dist else None, - multinomial_treatment, continuous_treatment, - zero_based) - for var, needs_dist in zip(domain.attributes, attr_needs_dist)] - - newclass = [continuize_var(var, next(dist_iter) if needs_dist else None, - class_treatment, DomainContinuizer.Ignore, - zero_based) - for var, needs_dist in zip(domain.class_vars, cls_needs_dist)] - - newattrs = reduce(list.__iadd__, newattrs, []) - newclass = reduce(list.__iadd__, newclass, []) - return Orange.data.Domain(newattrs, newclass, domain.metas) - - -def continuize_var(var, - data_or_dist=None, - multinomial_treatment=DomainContinuizer.NValues, - continuous_treatment=DomainContinuizer.Leave, - zero_based=True): - - if isinstance(var, Orange.data.ContinuousVariable): - if continuous_treatment == DomainContinuizer.NormalizeBySpan: - return [normalize_by_span(var, data_or_dist, zero_based)] - elif continuous_treatment == DomainContinuizer.NormalizeBySD: - return [normalize_by_sd(var, data_or_dist)] - else: - return [var] - - elif isinstance(var, Orange.data.DiscreteVariable): - if len(var.values) > 2 and \ - multinomial_treatment == DomainContinuizer.ReportError: - raise ValueError("{0.name} is a multinomial variable".format(var)) - if len(var.values) < 2 or \ - multinomial_treatment == DomainContinuizer.Ignore or \ - (multinomial_treatment == DomainContinuizer.IgnoreMulti and \ - len(var.values) > 2): - return [] - elif multinomial_treatment == DomainContinuizer.AsOrdinal: - return [ordinal_to_continuous(var)] - elif multinomial_treatment == DomainContinuizer.AsNormalizedOrdinal: - return [ordinal_to_normalized_continuous(var, zero_based)] - elif multinomial_treatment == DomainContinuizer.NValues: - return one_hot_coding(var, zero_based) - elif multinomial_treatment == DomainContinuizer.LowestIsBase or \ - multinomial_treatment == DomainContinuizer.IgnoreMulti: - return dummy_coding(var, zero_based=zero_based) - elif multinomial_treatment == DomainContinuizer.FrequentIsBase: - dist = _ensure_dist(var, data_or_dist) - modus = dist.modus() - return dummy_coding(var, base_value=modus, zero_based=zero_based) - elif multinomial_treatment == DomainContinuizer.Leave: - return [var] - else: - raise NotImplementedError # ValueError?? - - -def _ensure_dist(var, data_or_dist): - if isinstance(data_or_dist, distribution.Discrete): - if not is_discrete(var): - raise TypeError - return data_or_dist - elif isinstance(data_or_dist, distribution.Continuous): - if not is_continuous(var): - raise TypeError - return data_or_dist - elif isinstance(data_or_dist, Orange.data.Storage): - return distribution.get_distribution(data_or_dist, var) - else: - raise ValueError("Need a distribution or data.") - - -def normalized_var(var, translate, scale): - new_var = Orange.data.ContinuousVariable(var.name) - norm = Normalizer(var, translate, scale) - new_var.get_value_from = norm - return new_var - - -def ordinal_to_continuous(var): - new_var = Orange.data.ContinuousVariable(var.name) - new_var.get_value_from = Identity(var) - return new_var - - -def ordinal_to_normalized_continuous(var, zero_based=True): - n_values = len(var.values) - if zero_based: - return normalized_var(var, 0, 1 / (n_values - 1)) - else: - return normalized_var(var, (n_values - 1) / 2, 2 / (n_values - 1)) - - -def normalize_by_span(var, data_or_dist, zero_based=True): - dist = _ensure_dist(var, data_or_dist) - v_max, v_min = dist.max(), dist.min() - span = v_max - v_min - if span < 1e-15: - span = 1 - - if zero_based: - return normalized_var(var, v_min, 1 / span) - else: - return normalized_var(var, (v_min + v_max) / 2, 2 / span) - - -def normalize_by_sd(var, data_or_dist): - dist = _ensure_dist(var, data_or_dist) - mean, sd = dist.mean(), dist.standard_deviation() - return normalized_var(var, mean, 1 / sd) - - -class DomainContinuizer: - (NValues, LowestIsBase, FrequentIsBase, Ignore, IgnoreMulti, - ReportError, AsOrdinal, AsNormalizedOrdinal, Leave, - NormalizeBySpan, NormalizeBySD) = DomainContinuizer.MultinomialTreatment - - def __new__(cls, data=None, zero_based=True, multinomial_treatment=NValues, - continuous_treatment=Leave, class_treatment=Leave): - self = super().__new__(cls) - self.zero_based = zero_based - self.multinomial_treatment = multinomial_treatment - self.continuous_treatment = continuous_treatment - self.class_treatment = class_treatment - return self if data is None else self(data) - - def __call__(self, data): - treat = self.multinomial_treatment - if isinstance(data, Orange.data.Domain): - domain, data = data, None - else: - domain = data.domain - - if treat == DomainContinuizer.ReportError and \ - any(isinstance(var, Orange.data.DiscreteVariable) and - len(var.values) > 2 - for var in domain): - raise ValueError("Domain has multinomial attributes") - - newdomain = continuize_domain( - data or domain, - self.multinomial_treatment, - self.continuous_treatment, - self.class_treatment, - self.zero_based - ) - return newdomain - - -if __name__ == "__main__": - import sys - a = QtGui.QApplication(sys.argv) - ow = OWContinuize() - data = Table("lenses") - ow.setData(data) - ow.show() - a.exec_() - ow.saveSettings() diff --git a/Orange/widgets/data/oweditdomain.py b/Orange/widgets/data/oweditdomain.py deleted file mode 100644 index e2e626256cc..00000000000 --- a/Orange/widgets/data/oweditdomain.py +++ /dev/null @@ -1,577 +0,0 @@ -""" -Edit Domain ------------ - -A widget for manual editing of a domain's attributes. - -""" -import unicodedata - -from PyQt4 import QtGui -from PyQt4.QtGui import ( - QWidget, QListView, QTreeView, QStandardItemModel, QStandardItem, - QVBoxLayout, QHBoxLayout, QFormLayout, QToolButton, QLineEdit, - QAction, QKeySequence -) - -from PyQt4.QtCore import Qt, QSize -from PyQt4.QtCore import pyqtSignal as Signal, pyqtSlot as Slot - -import Orange.data -import Orange.feature.transformation - -from Orange.widgets import widget, gui, settings -from Orange.widgets.utils import itemmodels - - -def is_discrete(var): - return isinstance(var, Orange.data.DiscreteVariable) - - -def is_continuous(var): - return isinstance(var, Orange.data.ContinuousVariable) - - -def get_qualified(module, name): - """Return a qualified module member ``name`` inside the named - ``module``. - - The module (or package) first gets imported and the name - is retrieved from the module's global namespace. - - """ - # see __import__.__doc__ for why 'fromlist' is used - module = __import__(module, fromlist=[name]) - return getattr(module, name) - - -def variable_description(var): - """Return a variable descriptor. - - A descriptor is a hashable tuple which should uniquely define - the variable i.e. (module, type_name, variable_name, - any_kwargs, sorted-attributes-items). - - """ - var_type = type(var) - if is_discrete(var): - return (var_type.__module__, - var_type.__name__, - var.name, - (("values", tuple(var.values)),), - tuple(sorted(var.attributes.items()))) - else: - return (var_type.__module__, - var_type.__name__, - var.name, - (), - tuple(sorted(var.attributes.items()))) - - -def variable_from_description(description): - """Construct a variable from its description (see - :func:`variable_description`). - - """ - module, type_name, name, kwargs, attrs = description - try: - constructor = get_qualified(module, type_name) - except (ImportError, AttributeError) as ex: - raise ValueError("Invalid descriptor type '{}.{}" - "".format(module, type_name)) - - var = constructor(name, **dict(list(kwargs))) - var.attributes.update(attrs) - return var - - -class DictItemsModel(QStandardItemModel): - """A Qt Item Model class displaying the contents of a python - dictionary. - - """ - # Implement a proper model with in-place editing. - # (Maybe it should be a TableModel with 2 columns) - def __init__(self, parent=None, dict={}): - QStandardItemModel.__init__(self, parent) - self.setHorizontalHeaderLabels(["Key", "Value"]) - self.set_dict(dict) - - def set_dict(self, dict): - self._dict = dict - self.clear() - self.setHorizontalHeaderLabels(["Key", "Value"]) - for key, value in sorted(dict.items()): - key_item = QStandardItem(str(key)) - value_item = QStandardItem(str(value)) - key_item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable) - value_item.setFlags(value_item.flags() | Qt.ItemIsEditable) - self.appendRow([key_item, value_item]) - - def get_dict(self): - dict = {} - for row in range(self.rowCount()): - key_item = self.item(row, 0) - value_item = self.item(row, 1) - dict[str(key_item.text())] = str(value_item.text()) - return dict - - -class VariableEditor(QWidget): - """An editor widget for a variable. - - Can edit the variable name, and its attributes dictionary. - - """ - variable_changed = Signal() - - def __init__(self, parent=None): - QWidget.__init__(self, parent) - self.setup_gui() - - def setup_gui(self): - layout = QVBoxLayout() - self.setLayout(layout) - - self.main_form = QFormLayout() - self.main_form.setFieldGrowthPolicy(QFormLayout.AllNonFixedFieldsGrow) - layout.addLayout(self.main_form) - - self._setup_gui_name() - self._setup_gui_labels() - - def _setup_gui_name(self): - self.name_edit = QLineEdit() - self.main_form.addRow("Name", self.name_edit) - self.name_edit.editingFinished.connect(self.on_name_changed) - - def _setup_gui_labels(self): - vlayout = QVBoxLayout() - vlayout.setContentsMargins(0, 0, 0, 0) - vlayout.setSpacing(1) - - self.labels_edit = QTreeView() - self.labels_edit.setEditTriggers(QTreeView.CurrentChanged) - self.labels_edit.setRootIsDecorated(False) - - self.labels_model = DictItemsModel() - self.labels_edit.setModel(self.labels_model) - - self.labels_edit.selectionModel().selectionChanged.connect( - self.on_label_selection_changed) - - # Necessary signals to know when the labels change - self.labels_model.dataChanged.connect(self.on_labels_changed) - self.labels_model.rowsInserted.connect(self.on_labels_changed) - self.labels_model.rowsRemoved.connect(self.on_labels_changed) - - vlayout.addWidget(self.labels_edit) - hlayout = QHBoxLayout() - hlayout.setContentsMargins(0, 0, 0, 0) - hlayout.setSpacing(1) - self.add_label_action = QAction( - "+", self, - toolTip="Add a new label.", - triggered=self.on_add_label, - enabled=False, - shortcut=QKeySequence(QKeySequence.New)) - - self.remove_label_action = QAction( - unicodedata.lookup("MINUS SIGN"), self, - toolTip="Remove selected label.", - triggered=self.on_remove_label, - enabled=False, - shortcut=QKeySequence(QKeySequence.Delete)) - - button_size = gui.toolButtonSizeHint() - button_size = QSize(button_size, button_size) - - button = QToolButton(self) - button.setFixedSize(button_size) - button.setDefaultAction(self.add_label_action) - hlayout.addWidget(button) - - button = QToolButton(self) - button.setFixedSize(button_size) - button.setDefaultAction(self.remove_label_action) - hlayout.addWidget(button) - hlayout.addStretch(10) - vlayout.addLayout(hlayout) - - self.main_form.addRow("Labels", vlayout) - - def set_data(self, var): - """Set the variable to edit. - """ - self.clear() - self.var = var - - if var is not None: - self.name_edit.setText(var.name) - self.labels_model.set_dict(dict(var.attributes)) - self.add_label_action.setEnabled(True) - else: - self.add_label_action.setEnabled(False) - self.remove_label_action.setEnabled(False) - - def get_data(self): - """Retrieve the modified variable. - """ - name = str(self.name_edit.text()) - labels = self.labels_model.get_dict() - - # Is the variable actually changed. - if not self.is_same(): - var = type(self.var)(name) - var.attributes.update(labels) - self.var = var - else: - var = self.var - - return var - - def is_same(self): - """Is the current model state the same as the input. - """ - name = str(self.name_edit.text()) - labels = self.labels_model.get_dict() - - return self.var and name == self.var.name and labels == self.var.attributes - - def clear(self): - """Clear the editor state. - """ - self.var = None - self.name_edit.setText("") - self.labels_model.set_dict({}) - - def maybe_commit(self): - if not self.is_same(): - self.commit() - - def commit(self): - """Emit a ``variable_changed()`` signal. - """ - self.variable_changed.emit() - - @Slot() - def on_name_changed(self): - self.maybe_commit() - - @Slot() - def on_labels_changed(self, *args): - self.maybe_commit() - - @Slot() - def on_add_label(self): - self.labels_model.appendRow([QStandardItem(""), QStandardItem("")]) - row = self.labels_model.rowCount() - 1 - index = self.labels_model.index(row, 0) - self.labels_edit.edit(index) - - @Slot() - def on_remove_label(self): - rows = self.labels_edit.selectionModel().selectedRows() - if rows: - row = rows[0] - self.labels_model.removeRow(row.row()) - - @Slot() - def on_label_selection_changed(self): - selected = self.labels_edit.selectionModel().selectedRows() - self.remove_label_action.setEnabled(bool(len(selected))) - - -class DiscreteVariableEditor(VariableEditor): - """An editor widget for editing a discrete variable. - - Extends the :class:`VariableEditor` to enable editing of - variables values. - - """ - def setup_gui(self): - layout = QVBoxLayout() - self.setLayout(layout) - - self.main_form = QFormLayout() - self.main_form.setFieldGrowthPolicy(QFormLayout.AllNonFixedFieldsGrow) - layout.addLayout(self.main_form) - - self._setup_gui_name() - self._setup_gui_values() - self._setup_gui_labels() - - def _setup_gui_values(self): - self.values_edit = QListView() - self.values_edit.setEditTriggers(QTreeView.CurrentChanged) - self.values_model = itemmodels.PyListModel(flags=Qt.ItemIsSelectable | \ - Qt.ItemIsEnabled | Qt.ItemIsEditable) - self.values_edit.setModel(self.values_model) - - self.values_model.dataChanged.connect(self.on_values_changed) - self.main_form.addRow("Values", self.values_edit) - - def set_data(self, var): - """Set the variable to edit - """ - VariableEditor.set_data(self, var) - self.values_model[:] = list(var.values) if var is not None else [] - - def get_data(self): - """Retrieve the modified variable - """ - name = str(self.name_edit.text()) - labels = self.labels_model.get_dict() - values = map(str, self.values_model) - - if not self.is_same(): - var = type(self.var)(name, values=values) - var.attributes.update(labels) - self.var = var - else: - var = self.var - - return var - - def is_same(self): - """Is the current model state the same as the input. - """ - values = map(str, self.values_model) - return VariableEditor.is_same(self) and self.var.values == values - - def clear(self): - """Clear the model state. - """ - VariableEditor.clear(self) - self.values_model.wrap([]) - - @Slot() - def on_values_changed(self): - self.maybe_commit() - - -class ContinuousVariableEditor(VariableEditor): - # TODO: enable editing of number_of_decimals, scientific format ... - pass - - -class OWEditDomain(widget.OWWidget): - name = "Edit Domain" - description = "Rename features and their values." - icon = "icons/EditDomain.svg" - priority = 3125 - - inputs = [("Data", Orange.data.Table, "set_data")] - outputs = [("Data", Orange.data.Table)] - - settingsHandler = settings.DomainContextHandler() - - domain_change_hints = settings.ContextSetting({}) - selected_index = settings.ContextSetting({}) - - autocommit = settings.Setting(False) - - def __init__(self, parent=None): - super().__init__(parent) - - self.data = None - self.input_vars = () - self._invalidated = False - - box = gui.widgetBox(self.controlArea, "Domain Features") - - self.domain_model = itemmodels.VariableListModel() - self.domain_view = QListView( - selectionMode=QListView.SingleSelection - ) - self.domain_view.setModel(self.domain_model) - self.domain_view.selectionModel().selectionChanged.connect( - self._on_selection_changed) - box.layout().addWidget(self.domain_view) - - box = gui.widgetBox(self.controlArea, "Reset") - gui.button(box, self, "Reset selected", callback=self.reset_selected) - gui.button(box, self, "Reset all", callback=self.reset_all) - - box = gui.widgetBox(self.controlArea, "Commit") - cb = gui.checkBox(box, self, "autocommit", "Commit on any change") - b = gui.button(box, self, "Commit", callback=self.commit, - default=True) - gui.setStopper(self, b, cb, "_invalidated", callback=self.commit) - - box = gui.widgetBox(self.mainArea, "Edit") - self.editor_stack = QtGui.QStackedWidget() - - self.editor_stack.addWidget(DiscreteVariableEditor()) - self.editor_stack.addWidget(ContinuousVariableEditor()) - self.editor_stack.addWidget(VariableEditor()) - - box.layout().addWidget(self.editor_stack) - - def set_data(self, data): - """Set input data set.""" - self.closeContext() - self.clear() - self.data = data - - if self.data is not None: - self._initialize() - self.openContext(self.data) - self._restore() - - self.commit() - - def clear(self): - """Clear the widget state.""" - self.data = None - self.domain_model[:] = [] - self.input_vars = [] - self.domain_change_hints = {} - self.selected_index = -1 - self._invalidated = False - - def reset_selected(self): - """Reset the currently selected variable to its original state.""" - ind = self.selected_var_index() - if ind >= 0: - var = self.input_vars[ind] - desc = variable_description(var) - if desc in self.domain_change_hints: - del self.domain_change_hints[desc] - - self.domain_model[ind] = var - self.editor_stack.currentWidget().set_data(var) - self._invalidate() - - def reset_all(self): - """Reset all variables to their original state.""" - self.domain_change_hints = {} - if self.data is not None: - # To invalidate stored hints - self.domain_model[:] = self.input_vars - itemmodels.select_row(self.domain_view, self.selected_index) - self._invalidate() - - def selected_var_index(self): - """Return the selected row in 'Domain Features' view.""" - rows = self.domain_view.selectedIndexes() - assert len(rows) <= 1 - return rows[0].row() if rows else -1 - - def _initialize(self): - domain = self.data.domain - self.input_vars = tuple(domain) + domain.metas - self.domain_model[:] = list(self.input_vars) - - def _restore(self): - # Restore the variable states from saved settings. - def transform(var): - vdesc = variable_description(var) - if vdesc in self.domain_change_hints: - newvar = variable_from_description( - self.domain_change_hints[vdesc] - ) - newvar.get_value_from = \ - Orange.feature.transformation.Identity(var) - return newvar - else: - return var - - self.domain_model[:] = map(transform, self.input_vars) - - # Restore the variable selection if possible - index = self.selected_index - if index >= len(self.input_vars): - index = 0 if len(self.input_vars) else -1 - if index >= 0: - itemmodels.select_row(self.domain_view, index) - - def _on_selection_changed(self): - self.selected_index = self.selected_var_index() - self.open_editor(self.selected_index) - - def open_editor(self, index): - self.clear_editor() - if index < 0: - return - - var = self.domain_model[index] - - editor_index = 2 - if is_discrete(var): - editor_index = 0 - elif is_continuous(var): - editor_index = 1 - editor = self.editor_stack.widget(editor_index) - self.editor_stack.setCurrentWidget(editor) - - editor.set_data(var) - editor.variable_changed.connect(self._on_variable_changed) - - def clear_editor(self): - current = self.editor_stack.currentWidget() - try: - current.variable_changed.disconnect(self._on_variable_changed) - except Exception: - pass - current.set_data(None) - - def _on_variable_changed(self): - """User edited the current variable in editor.""" - assert 0 <= self.selected_index <= len(self.domain_model) - editor = self.editor_stack.currentWidget() - new_var = editor.get_data() - - # Replace the variable in the 'Domain Features' view/model - self.domain_model[self.selected_index] = new_var - old_var = self.input_vars[self.selected_index] - - # Store the transformation hint. - self.domain_change_hints[variable_description(old_var)] = \ - variable_description(new_var) - - # Make orange's domain transformation work. - new_var.get_value_from = \ - Orange.feature.transformation.Identity(old_var) - - self._invalidate() - - def _invalidate(self): - """Invalidate the current output.""" - self._invalidated = True - if self.autocommit: - self.commit() - - def commit(self): - """Send the changed data to output.""" - new_data = None - if self.data is not None: - input_domain = self.data.domain - n_attrs = len(input_domain.attributes) - n_vars = len(input_domain.variables) - n_class_vars = len(input_domain.class_vars) - all_new_vars = list(self.domain_model) - attrs = all_new_vars[: n_attrs] - class_vars = all_new_vars[n_attrs: n_attrs + n_class_vars] - new_metas = all_new_vars[n_attrs + n_class_vars:] - new_domain = Orange.data.Domain(attrs, class_vars, new_metas) - new_data = Orange.data.Table.from_table(new_domain, self.data) - - self.send("Data", new_data) - self._invalidated = False - - -def main(): - from PyQt4.QtGui import QApplication - app = QApplication([]) - w = OWEditDomain() - data = Orange.data.Table("iris") - w.set_data(data) - w.show() - w.raise_() - - return app.exec_() - - -if __name__ == "__main__": - main() diff --git a/Orange/widgets/data/owfeatureconstructor.py b/Orange/widgets/data/owfeatureconstructor.py deleted file mode 100644 index e21081b9510..00000000000 --- a/Orange/widgets/data/owfeatureconstructor.py +++ /dev/null @@ -1,845 +0,0 @@ -""" -Feature Constructor - -A widget for defining (constructing) new features from values -of other variables. - -""" -import sys -import re -import copy -import unicodedata -import functools -import builtins -import math -import random -from collections import namedtuple, Counter - - -from PyQt4 import QtGui, QtCore -from PyQt4.QtGui import QSizePolicy -from PyQt4.QtCore import Qt, QEvent, pyqtSignal as Signal, pyqtProperty as Property - -import Orange - -from Orange.widgets import widget, gui -from Orange.widgets.settings import DomainContextHandler, Setting, ContextSetting -from Orange.widgets.utils import itemmodels, vartype - -from .owpythonscript import PythonSyntaxHighlighter - - -FeatureDescriptor = \ - namedtuple("FeatureDescriptor", ["name", "expression"]) - -ContinuousDescriptor = \ - namedtuple("ContinuousDescriptor", - ["name", "expression", "number_of_decimals"]) -DiscreteDescriptor = \ - namedtuple("DiscreteDescriptor", - ["name", "expression", "values", "base_value", "ordered"]) - -StringDescriptor = namedtuple("StringDescriptor", ["name", "expression"]) - - -@functools.lru_cache(50) -def make_variable(descriptor): - - if descriptor.expression.strip(): - get_value_from = \ - lambda instance: eval(descriptor.expression, - {"instance": instance, "_": instance}) - else: - get_value_from = lambda _: float("nan") - - if isinstance(descriptor, ContinuousDescriptor): - var = Orange.data.ContinuousVariable(descriptor.name) - var.number_of_decimals = descriptor.number_of_decimals - var.get_value_from = get_value_from - return var - elif isinstance(descriptor, DiscreteDescriptor): - var = Orange.data.DiscreteVariable( - descriptor.name, - values=descriptor.values, - ordered=descriptor.ordered, - base_value=descriptor.base_value - ) - var.get_value_from = get_value_from - return var - elif isinstance(descriptor, StringDescriptor): - var = Orange.data.StringVariable( - descriptor.name, - ) - var.get_value_from = get_value_from - return var - else: - raise TypeError - - -def is_valid_expression(exp): - try: - ast.parse(exp, mode="eval") - return True - except Exception: - return False - - -class ActionToolBarButton(QtGui.QToolButton): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - -class ActionToolBar(QtGui.QFrame): - iconSizeChanged = Signal(QtCore.QSize) - actionTriggered = Signal(QtGui.QAction) - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - layout = QtGui.QHBoxLayout(spacing=1) - layout.setContentsMargins(0, 0, 0, 0) - - if "sizePolicy" not in kwargs: - self.setSizePolicy(QSizePolicy.MinimumExpanding, - QSizePolicy.Minimum) - - self.setLayout(layout) - layout.addStretch() - - self._actions = [] - - def clear(self): - for action in reversed(self.actions()): - self.removeAction(action) - - def iconSize(self): - if self._iconSize is None: - style = self.style() - pm = style.pixelMetric(QtGui.QStyle.PM_ToolBarIconSize) - return QtCore.QSize(pm, pm) - else: - return self._iconSize - - def setIconSize(self, size): - if self._iconSize != size: - changed = self.iconSize() != size - self._iconSize = size - if changed: - self.iconSizeChanged.emit(self.iconSize()) - - def buttonForAction(self, action): - for ac, button in self._actions: - if action is ac: - return button - return None - - def actionEvent(self, event): - super().actionEvent(event) - - if event.type() == QEvent.ActionAdded: - self._insertActionBefore(event.action(), event.before()) - elif event.type() == QEvent.ActionRemoved: - self._removeAction(event.action()) - elif event.type() == QEvent.ActionChanged: - self._updateAction(event.action()) - - def _insertActionBefore(self, action, before=None): - index = len(self._actions) - if action is not None: - actions = [a for a, _ in self._actions] - try: - index = actions.index(before) - except ValueError: - pass - - button = self._button(action) - self._actions.insert(index, (action, button)) - self.layout().insertWidget(index, button) - - button.triggered.connect(self.actionTriggered) - - def _removeAction(self, action): - actions = [a for a, _ in self._actions] - try: - index = actions.index(action) - except ValueError: - raise - else: - _, button = self._actions[index] - self.layout().takeAt(index) - button.hide() - button.deleteLater() - del self._actions[index] - - def _updateAction(self, action): - pass - - def _button(self, action): - b = ActionToolBarButton( - toolButtonStyle=Qt.ToolButtonIconOnly, - sizePolicy=QSizePolicy(QSizePolicy.Minimum, - QSizePolicy.Minimum) - ) - b.setDefaultAction(action) - b.setPopupMode(QtGui.QToolButton.InstantPopup) - return b - - -def selected_row(view): - if view.selectionMode() in [QtGui.QAbstractItemView.MultiSelection, - QtGui.QAbstractItemView.ExtendedSelection]: - raise ValueError("invalid 'selectionMode'") - - sel_model = view.selectionModel() - indexes = sel_model.selectedRows() - if indexes: - assert len(indexes) == 1 - return indexes[0].row() - else: - return None - - -class FeatureEditor(QtGui.QFrame): - featureChanged = Signal() - featureEdited = Signal() - - modifiedChanged = Signal([], [bool]) - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - layout = QtGui.QFormLayout( - fieldGrowthPolicy=QtGui.QFormLayout.ExpandingFieldsGrow - ) - self.nameedit = QtGui.QLineEdit( - sizePolicy=QSizePolicy(QSizePolicy.Minimum, - QSizePolicy.Fixed) - ) - self.expressionedit = QtGui.QPlainTextEdit( - tabChangesFocus=True, - ) - high = PythonSyntaxHighlighter(self.expressionedit.document()) - layout.addRow(self.tr("Name"), self.nameedit) - layout.addRow(self.tr("Expression"), self.expressionedit) - self.setLayout(layout) - - self.nameedit.editingFinished.connect(self._invalidate) - self.expressionedit.textChanged.connect(self._invalidate) - - self._modified = False - - def setModified(self, modified): - if not type(modified) is bool: - raise TypeError - - if self._modified != modified: - self._modified = modified - self.modifiedChanged.emit() - self.modifiedChanged[bool].emit(modified) - - def modified(self): - return self._modified - - modified = Property(bool, modified, setModified, - notify=modifiedChanged) - - def setEditorData(self, data): - self.nameedit.setText(data.name) - self.expressionedit.setPlainText(data.expression) - self.setModified(False) - self.featureChanged.emit() - - def editorData(self): - return FeatureDescriptor(name=self.nameedit.text(), - expression=self.nameedit.toPlainText()) - - def _invalidate(self): - self.setModified(True) - self.featureEdited.emit() - self.featureChanged.emit() - - -class ContinuousFeatureEditor(FeatureEditor): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.ndecimalsedit = QtGui.QSpinBox(minimum=1, maximum=9) - self.layout().insertRow(1, self.tr("# decimals"), self.ndecimalsedit) - self.ndecimalsedit.editingFinished.connect(self._invalidate) - - self.setTabOrder(self.nameedit, self.ndecimalsedit) - self.setTabOrder(self.ndecimalsedit, self.expressionedit) - - def setEditorData(self, data): - self.ndecimalsedit.setValue(data.number_of_decimals) - super().setEditorData(data) - - def editorData(self): - return ContinuousDescriptor( - name=self.nameedit.text(), - number_of_decimals=self.ndecimalsedit.value(), - expression=self.expressionedit.toPlainText() - ) - - -class DiscreteFeatureEditor(FeatureEditor): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - valueslayout = QtGui.QVBoxLayout(spacing=1) - valueslayout.setContentsMargins(0, 0, 0, 0) - - self.valuesmodel = itemmodels.PyListModel( - [], - flags=Qt.ItemIsSelectable | Qt.ItemIsEnabled | Qt.ItemIsEditable - ) - self.valuesedit = QtGui.QListView( - sizePolicy=QSizePolicy(QSizePolicy.Minimum, - QSizePolicy.MinimumExpanding) - ) - self.valuesedit.setModel(self.valuesmodel) - - toolbar = ActionToolBar() - - addaction = QtGui.QAction( - "+", toolbar, - toolTip="Add a value" - ) - addaction.triggered.connect(self.addValue) - - removeaction = QtGui.QAction( - unicodedata.lookup("MINUS SIGN"), toolbar, - toolTip="Remove selected value", -# shortcut=QtGui.QKeySequence.Delete, -# shortcutContext=Qt.WidgetShortcut - ) - removeaction.triggered.connect(self.removeValue) - - toolbar.addAction(addaction) - toolbar.addAction(removeaction) - - valueslayout.addWidget(self.valuesedit) - valueslayout.addWidget(toolbar) - - self.baseedit = QtGui.QComboBox() - self.baseedit.setModel(self.valuesmodel) - self.orderededit = QtGui.QCheckBox(text=self.tr("Ordered")) - - layout = self.layout() - layout.insertRow(1, self.tr("Values"), valueslayout) - layout.insertRow(2, self.tr("Base Value"), self.baseedit) - layout.insertRow(3, self.orderededit) - - self.valuesmodel.rowsInserted.connect(self._invalidate) - self.valuesmodel.rowsRemoved.connect(self._invalidate) - self.valuesmodel.dataChanged.connect(self._invalidate) - - self.baseedit.activated.connect(self._invalidate) - self.orderededit.clicked.connect(self._invalidate) - - self.setTabOrder(self.nameedit, self.valuesedit) - self.setTabOrder(self.valuesedit, toolbar) - self.setTabOrder(toolbar, self.baseedit) - self.setTabOrder(self.baseedit, self.orderededit) - self.setTabOrder(self.orderededit, self.expressionedit) - - def setEditorData(self, data): - self.valuesmodel[:] = data.values - self.baseedit.setCurrentIndex(data.base_value) - super().setEditorData(data) - - def editorData(self): - return DiscreteDescriptor( - name=self.nameedit.text(), - values=tuple(self.valuesmodel), - base_value=self.baseedit.currentIndex(), - ordered=self.orderededit.isChecked(), - expression=self.expressionedit.toPlainText() - ) - - def addValue(self, name=None): - if name is not None: - name = "%s" % self.valuesmodel.rowCount() - - self.valuesmodel.append(name) - index = self.valuesmodel.index(len(self.valuesmodel) - 1) - self.valuesedit.setCurrentIndex(index) - self.valuesedit.edit(index) - - def removeValue(self): - index = selected_row(self.valuesedit) - if index is not None: - del self.valuesmodel[index] - - -class StringFeatureEditor(FeatureEditor): - def editorData(self): - return StringDescriptor( - name=self.nameedit.text(), - expression=self.expressionedit.toPlainText() - ) - - -_VarMap = { - DiscreteDescriptor: vartype(Orange.data.DiscreteVariable()), - ContinuousDescriptor: vartype(Orange.data.ContinuousVariable()), - StringDescriptor: vartype(Orange.data.StringVariable()) -} - - -@functools.lru_cache(20) -def variable_icon(dtype): - vtype = _VarMap.get(dtype, dtype) - try: - return gui.attributeIconDict[vtype] - except Exception: - return QtGui.QIcon() - - -class FeatureItemDelegate(QtGui.QStyledItemDelegate): - - def displayText(self, value, locale): - return value.name + " := " + value.expression - - def _initStyleOption(self, option, index): - super().initStyleOption(option, index) - model = index.model() - data = model.data(index, Qt.DisplayRole) - icon = variable_icon(type(data)) - option.icon = icon - option.decorationSize = icon.actualSize( - option.decorationSize, QtGui.QIcon.Normal, QtGui.QIcon.Off) - - -class DescriptorModel(itemmodels.PyListModel): - def data(self, index, role=Qt.DisplayRole): - if role == Qt.DecorationRole: - value = self[index.row()] - return variable_icon(type(value)) - else: - return super().data(index, role) - - -class OWFeatureConstructor(widget.OWWidget): - name = "Feature Constructor" - icon = "icons/FeatureConstructor.svg" - inputs = [{"name": "Data", - "type": Orange.data.Table, - "handler": "setData"}] - outputs = [{"name": "Data", - "type": Orange.data.Table}] - want_main_area = False - - # Stored settings - settingsHandler = DomainContextHandler() - descriptors = ContextSetting([]) - currentIndex = ContextSetting(-1) - - EDITORS = [ - (ContinuousDescriptor, ContinuousFeatureEditor), - (DiscreteDescriptor, DiscreteFeatureEditor), - (StringDescriptor, StringFeatureEditor) - ] - - def __init__(self): - super().__init__() - - self.editors = {} - - box = QtGui.QGroupBox( - title=self.tr("Attribute Definitions") - ) - - box.setLayout(QtGui.QHBoxLayout()) - - self.controlArea.layout().addWidget(box) - - # Layout for the list view - layout = QtGui.QVBoxLayout(spacing=1, margin=0) - self.featuremodel = DescriptorModel() - - self.featuremodel.wrap(self.descriptors) - self.featureview = QtGui.QListView( - minimumWidth=200, - sizePolicy=QSizePolicy(QSizePolicy.Minimum, - QSizePolicy.MinimumExpanding) - ) - - self.featureview.setItemDelegate(FeatureItemDelegate()) - self.featureview.setModel(self.featuremodel) - self.featureview.selectionModel().selectionChanged.connect( - self._on_selectedVariableChanged - ) - - self.featuretoolbar = ActionToolBar() - self.addaction = QtGui.QAction( - "+", self, - toolTip="Create a new feature", - shortcut=QtGui.QKeySequence.New - ) - menu = QtGui.QMenu() - cont = menu.addAction("Continuous") - cont.triggered.connect( - lambda: self.addFeature(ContinuousDescriptor("Name", "", 2)) - ) - disc = menu.addAction("Discrete") - disc.triggered.connect( - lambda: self.addFeature( - DiscreteDescriptor("Name", "", ("0", "1"), -1, False)) - ) - - string = menu.addAction("String") - string.triggered.connect( - lambda: self.addFeature(StringDescriptor("Name", "")) - ) - - menu.addSeparator() - self.duplicateaction = menu.addAction("Duplicate selected feature") - self.duplicateaction.triggered.connect(self.duplicateFeature) - self.duplicateaction.setEnabled(False) - - self.addaction.setMenu(menu) - - self.removeaction = QtGui.QAction( - unicodedata.lookup("MINUS SIGN"), self, - toolTip="Remove selected feature", -# shortcut=QtGui.QKeySequence.Delete, -# shortcutContext=Qt.WidgetShortcut - ) - self.removeaction.triggered.connect(self.removeSelectedFeature) - self.featuretoolbar.addAction(self.addaction) - self.featuretoolbar.addAction(self.removeaction) - - layout.addWidget(self.featureview) - layout.addWidget(self.featuretoolbar) - - box.layout().addLayout(layout, 1) - - self.editorstack = QtGui.QStackedWidget( - sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, - QSizePolicy.MinimumExpanding) - ) - - for descclass, editorclass in self.EDITORS: - editor = editorclass() - editor.featureChanged.connect(self._on_modified) - self.editors[descclass] = editor - self.editorstack.addWidget(editor) - - self.editorstack.setEnabled(False) - - box.layout().addWidget(self.editorstack, 3) - - gui.button(self.controlArea, self, "Commit", callback=self.apply, - default=True) - - def setCurrentIndex(self, index): - index = min(index, len(self.featuremodel) - 1) - self.currentIndex = index - if index >= 0: - itemmodels.select_row(self.featureview, index) - desc = self.featuremodel[min(index, len(self.featuremodel) - 1)] - editor = self.editors[type(desc)] - self.editorstack.setCurrentWidget(editor) - editor.setEditorData(desc) - - self.editorstack.setEnabled(index >= 0) - self.duplicateaction.setEnabled(index >= 0) - self.removeaction.setEnabled(index >= 0) - - def _on_selectedVariableChanged(self, selected, *_): - index = selected_row(self.featureview) - if index is not None: - self.setCurrentIndex(index) - else: - self.setCurrentIndex(-1) - - def _on_modified(self): - if self.currentIndex >= 0: - editor = self.editorstack.currentWidget() - self.featuremodel[self.currentIndex] = editor.editorData() - - def setDescriptors(self, descriptors): - """ - Set a list of variable descriptors to edit. - """ - self.descriptors = descriptors - self.featuremodel[:] = list(self.descriptors) - - def setData(self, data=None): - self.closeContext() - - self.featuremodel.wrap([]) - self.currentIndex = -1 - self.data = data - - if self.data is not None: - self.openContext(data) - self.featuremodel.wrap(self.descriptors) - self.setCurrentIndex(self.currentIndex) - - self.editorstack.setEnabled(len(self.featuremodel) > 0) - - self._invalidate() - - def handleNewSignals(self): - if self.data is not None: - self.apply() - - def _invalidate(self): - pass - - def addFeature(self, descriptor): - self.featuremodel.append(descriptor) - self.setCurrentIndex(len(self.featuremodel) - 1) - editor = self.editorstack.currentWidget() - editor.nameedit.setFocus() - editor.nameedit.selectAll() - self._invalidate() - - def removeFeature(self, index): - del self.featuremodel[index] - index = selected_row(self.featureview) - if index is not None: - self.setCurrentIndex(index) - elif index is None and len(self.featuremodel) > 0: - # Deleting the last item clears selection - self.setCurrentIndex(len(self.featuremodel) - 1) - - def removeSelectedFeature(self): - if self.currentIndex >= 0: - self.removeFeature(self.currentIndex) - - def duplicateFeature(self): - desc = self.featuremodel[self.currentIndex] - self.addFeature(copy.deepcopy(desc)) - - def apply(self): - desc = list(self.featuremodel) - - def remove_invalid_expression(desc): - return (desc if is_valid_expression(desc.expression) - else desc._replace(expression="")) - - desc = map(remove_invalid_expression, desc) - source_vars = tuple(self.data.domain) + self.data.domain.metas - new_variables = construct_variables(desc, source_vars) - - attrs = [var for var in new_variables if var.is_primitive()] - metas = [var for var in new_variables if not var.is_primitive()] - new_domain = Orange.data.Domain( - self.data.domain.attributes + tuple(attrs), - self.data.domain.class_vars, - metas=self.data.domain.metas + tuple(metas) - ) - data = Orange.data.Table(new_domain, self.data) - self.send("Data", data) - - -import ast - - -class RewriteNames(ast.NodeTransformer): - def __init__(self, names): - self.names = names - self.rewrites = [] - - def visit_Str(self, node): - if node.s in self.names: - new = ast.Subscript( - value=ast.Name(id="value", ctx=ast.Load()), - slice=ast.Index(value=ast.Str(s=node.s)), - ctx=ast.Load() - ) - self.rewrites.append((node.s)) - return ast.copy_location(new) - else: - return node - - -def bind_names(exp, domain): - names = [f.name for f in domain.features] - transf = RewriteNames(names) - return transf.visit(exp) - - -def freevars(exp, env): - etype = type(exp) - if etype in [ast.Expr, ast.Expression]: - return freevars(exp.body, env) - elif etype == ast.BoolOp: - return sum((freevars(v, env) for v in exp.values), []) - elif etype == ast.BinOp: - return freevars(exp.left, env) + freevars(exp.right, env) - elif etype == ast.UnaryOp: - return freevars(exp.operand, env) - elif etype == ast.IfExp: - return (freevars(exp.test, env) + freevars(exp.body, env) + - freevars(exp.orelse, env)) - elif etype == ast.Dict: - return sum((freevars(v, env) for v in exp.values), []) - elif etype == ast.Set: - return sum((freevars(v, env) for v in exp.elts), []) - elif etype in [ast.SetComp, ast.ListComp, ast.GeneratorExp]: - raise NotImplementedError - elif etype == ast.DictComp: - raise NotImplementedError - # Yield, YieldFrom??? - elif etype == ast.Compare: - return sum((freevars(v, env) for v in [exp.left] + exp.comparators), []) - elif etype == ast.Call: - return sum((freevars(e, env) - for e in [exp.func] + (exp.args or []) + - (exp.keywords or []) + - (exp.starargs or []) + - (exp.kwargs or [])), - []) - elif etype in [ast.Num, ast.Str, ast.Ellipsis]: -# elif etype in [ast.Num, ast.Str, ast.Ellipsis, ast.Bytes]: - return [] - elif etype == ast.Attribute: - return freevars(exp.value, env) - elif etype == ast.Subscript: - return freevars(exp.value, env) + freevars(exp.slice, env), - elif etype == ast.Name: - return [exp.id] if exp.id not in env else [] - elif etype == ast.List: - return sum((freevars(e, env) for e in exp.elts), []) - elif etype == ast.Tuple: - return sum((freevars(e, env) for e in exp.elts), []) - elif etype == ast.Slice: - return sum((freevars(e, env) - for e in filter(None, [exp.lower, exp.upper, exp.step])), - []) - elif etype == ast.ExtSlice: - return sum((freevars(e, env) for e in exp.dims), []) - elif etype == ast.Index: - return freevars(exp.value, env) - else: - raise ValueError(exp) - - -def construct_variables(descriptions, source_vars): - #subs - variables = [] - for desc in descriptions: - _, func = bind_variable(desc, source_vars) - var = make_variable(desc) - var.get_value_from = func - variables.append(var) - return variables - - -def sanitized_name(name): - return re.sub(r"\W", "_", name) - - -def bind_variable(descriptor, env): - """ - (descriptor, env) -> - (descriptor, (instance -> value) | (table -> value list)) - """ - if not descriptor.expression.strip(): - return (descriptor, lambda _: float("nan")) - - exp_ast = ast.parse(descriptor.expression, mode="eval") - freev = unique(freevars(exp_ast, [])) - variables = {sanitized_name(v.name): v for v in env} - source_vars = [(name, variables[name]) for name in freev - if name in variables] - - return (descriptor, FeatureFunc(exp_ast, source_vars)) - - -def make_lambda(expression, args): - def make_arg(name): - if sys.version_info >= (3, 0): - return ast.arg(arg=name, annotation=None) - else: - return ast.Name(id=arg, ctx=ast.Param(), lineno=1, col_offset=0) - - lambda_ = ast.Lambda( - args=ast.arguments( - args=[make_arg(arg) for arg in args], - varargs=None, - varargannotation=None, - kwonlyargs=[], - kwarg=None, - kwargannotation=None, - defaults=[], - kw_defaults=[]), - body=expression.body, - ) - lambda_ = ast.copy_location(lambda_, expression.body) - exp = ast.Expression(body=lambda_, lineno=1, col_offset=0) - ast.dump(exp) - return eval(compile(exp, "", "eval"), __GLOBALS) - - -__ALLOWED = [ - "Ellipsis", "False", "None", "True", "abs", "all", "any", "acsii", - "bin", "bool", "bytearray", "bytes", "chr", "complex", "dict", - "divmod", "enumerate", "filter", "float", "format", "frozenset", - "getattr", "hasattr", "hash", "hex", "id", "int", "iter", "len", - "list", "map", "max", "memoryview", "min", "next", "object", - "oct", "ord", "pow", "range", "repr", "reversed", "round", - "set", "slice", "sorted", "str", "sum", "tuple", "type", - "zip" -] - -__GLOBALS = {name: getattr(builtins, name) for name in __ALLOWED - if hasattr(builtins, name)} - - -__GLOBALS.update({name: getattr(math, name) for name in dir(math) - if not name.startswith("_")}) - -__GLOBALS.update({ - "normalvariate": random.normalvariate, - "gauss": random.gauss, - "expovariate": random.expovariate, - "gammavariate": random.gammavariate, - "betavariate": random.betavariate, - "lognormvariate": random.lognormvariate, - "paretovariate": random.paretovariate, - "vonmisesvariate": random.vonmisesvariate, - "weibullvariate": random.weibullvariate, - "triangular": random.triangular, - "uniform": random.uniform} -) - - -class FeatureFunc(object): - def __init__(self, expression, args): - self.expression = expression - self.args = args - self.func = make_lambda(expression, [name for name, _ in args]) - - def __call__(self, instance, *_): - if isinstance(instance, Orange.data.Table): - return [self(inst) for inst in instance] - else: - args = [instance[var] for _, var in self.args] - return self.func(*args) - - -def unique(seq): - seen = set() - unique_el = [] - for el in seq: - if el not in seen: - unique_el.append(el) - seen.add(el) - return unique_el - - -if __name__ == "__main__": - app = QtGui.QApplication([]) - w = OWFeatureConstructor() - w.show() - data = Orange.data.Table("iris") - w.setData(data) - w.handleNewSignals() - app.exec_() - w.setData(None) - w.saveSettings() diff --git a/Orange/widgets/data/owimpute.py b/Orange/widgets/data/owimpute.py deleted file mode 100644 index 99bd8c51d47..00000000000 --- a/Orange/widgets/data/owimpute.py +++ /dev/null @@ -1,1099 +0,0 @@ - -import collections -from collections import namedtuple - -from PyQt4 import QtGui -from PyQt4.QtGui import ( - QWidget, QGroupBox, QRadioButton, QPushButton, QHBoxLayout, - QVBoxLayout, QStackedLayout, QComboBox, QLineEdit, - QDoubleValidator, QButtonGroup -) - -from PyQt4.QtCore import Qt, QMargins - -import Orange.data -import Orange.classification -from Orange.data import filter as data_filter - -from Orange.widgets import gui, settings -from Orange.widgets.widget import OWWidget -from Orange.widgets.utils import itemmodels, vartype - - -def _margins(margins, container): - if isinstance(margins, tuple): - left, top, right, bottom = margins - elif isinstance(margins, int): - left = top = right = bottom = margins - elif isinstance(margins, QMargins): - left, top, right, bottom = \ - margins.left(), margins.top(), margins.right(), margins.bottom() - else: - raise TypeError - - container_margins = container.getContentsMargins() - margins = [c if m == -1 else m - for c, m in zip([left, top, right, bottom], - container_margins)] - return margins - - -def layout(orientation=Qt.Vertical, margins=None, spacing=None,): - if orientation == Qt.Vertical: - lay = QVBoxLayout() - else: - lay = QHBoxLayout() - - if margins is not None: - left, top, right, bottom = _margins(margins, lay) - lay.setContentsMargins(left, right, top, bottom) - return lay - - -def group_box(title=None, layout=None, margin=None, flat=False, ): - gb = QGroupBox(title=title, flat=flat) - if layout is not None: - gb.setLayout(layout) - return gb - - -def widget(layout=None, tooltip=None, objname=None, enabled=True,): - w = QWidget(toolTip=tooltip, objectName=objname, enabled=enabled) - if layout is not None: - w.setLayout(layout) - return w - - -def radio_button(text="", checked=False, group=None, group_id=None): - button = QRadioButton(text, checked=checked) - if group is not None: - group.addButton(button, ) - if group_id is not None: - group.setId(button, group_id) - return button - - -def push_button(text="", checked=False, checkable=False, - group=None, group_id=None, **kwargs): - button = QPushButton(text, checked=checked, checkable=checkable, **kwargs) - if group is not None: - group.addButton(button) - if group_id is not None: - group.setId(button, group_id) - return button - - -def commit_widget(button_text="Commit", button_default=True, - check_text="Commit on any change", checked=False, - modified=False, clicked=None): - w = widget(layout=layout(margins=0)) - button = push_button(button_text, clicked=clicked, - default=button_default) - button.setDefault(button_default) - - check = QtGui.QCheckBox(check_text, checked=checked) - action = QtGui.QAction( - button_text, w, - objectName="action-commit", - ) - button.clicked.connect(action.trigger) - w.commit_action = action - w.commit_button = button - w.auto_commit_check = check - - w.layout().addWidget(check) - w.layout().addWidget(button) - return w - - -class DisplayFormatDelegate(QtGui.QStyledItemDelegate): - def initStyleOption(self, option, index): - super().initStyleOption(option, index) - state = index.data(Qt.UserRole) - var = index.model()[index.row()] - if state: - fmt = state.method.format - text = fmt.format(var=var, params=state.params, - **state.method._asdict()) - option.text = text - - -METHODS = ( - {"name": "Default (above)", - "short": "", - "description": "As above so below", - "format": "{var.name}"}, - {"name": "Don't impute", - "short": "leave", - "description": "I", - "format": "{var.name} -> leave"}, - {"name": "Average/Most frequent", - "short": "avg", - "description": "Replace with average/modus for the column", - "format": "{var.name} -> avg"}, - {"name": "As a distinct value", - "short": "as_value", - "description": "", - "format": "{var.name} -> new value"}, - {"name": "Model-based imputer", - "short": "model", - "description": "", - "format": "{var.name} -> {params[0]!r}"}, - {"name": "Random values", - "short": "random", - "description": "Replace with a random value", - "format": "{var.name} -> random"}, - {"name": "Remove instances with unknown values", - "short": "drop", - "description": "", - "format": "{var.name} -> drop"}, - {"name": "Value", - "short": "value", - "description": "", - "format": "{var.name} -> {params[0]!s}"}, -) - - -Method = namedtuple( - "Method", - ["name", "short", "description", "format"] -) - - -class Method(Method): - pass - - -State = namedtuple("State", ["method", "params"]) - - -class State(State): - def __new__(cls, method, params=()): - return super().__new__(cls, method, params) - - def _asdict(self): - return {"method": self.method._asdict(), - "params": self.params} - -# state -# - selected default -# - for each variable (indexed by (vartype, name)): -# - selected method (method index, *params) - -# vartype * name -> method -# data method = Method(name) | Method2(name, (*params)) - - -METHODS = [Method(**m) for m in METHODS] - - -class OWImpute(OWWidget): - name = "Impute" - description = "Imputes missing values in the data table." - icon = "icons/Impute.svg" - priority = 2130 - - inputs = [("Data", Orange.data.Table, "set_data"), - ("Learner", Orange.classification.Fitter, "set_fitter")] - outputs = [("Data", Orange.data.Table)] - - METHODS = METHODS - - settingsHandler = settings.DomainContextHandler() - - default_method = settings.Setting(1) - variable_methods = settings.ContextSetting({}) - autocommit = settings.Setting(False) - - want_main_area = False - - def __init__(self, parent=None): - super().__init__(parent) - self.modified = False - - box = group_box(self.tr("Default method"), - layout=layout(Qt.Vertical)) - self.controlArea.layout().addWidget(box) - - bgroup = QButtonGroup() - - for i, m in enumerate(self.METHODS[1:-1], 1): - b = radio_button(m.name, checked=i == self.default_method, - group=bgroup, group_id=i) - box.layout().addWidget(b) - - self.defbggroup = bgroup - - bgroup.buttonClicked[int].connect(self.set_default_method) - box = group_box(self.tr("Individual attribute settings"), - layout=layout(Qt.Horizontal)) - self.controlArea.layout().addWidget(box) - - self.varview = QtGui.QListView( - selectionMode=QtGui.QListView.ExtendedSelection - ) - self.varview.setItemDelegate(DisplayFormatDelegate()) - self.varmodel = itemmodels.VariableListModel() - self.varview.setModel(self.varmodel) - self.varview.selectionModel().selectionChanged.connect( - self._on_var_selection_changed - ) - self.selection = self.varview.selectionModel() - - box.layout().addWidget(self.varview) - - method_layout = layout(Qt.Vertical, margins=0) - box.layout().addLayout(method_layout) - - methodbox = group_box(layout=layout(Qt.Vertical)) - - bgroup = QButtonGroup() - for i, m in enumerate(self.METHODS): - b = radio_button(m.name, group=bgroup, group_id=i) - methodbox.layout().addWidget(b) - - assert self.METHODS[-1].short == "value" - - self.value_stack = value_stack = QStackedLayout() - self.value_combo = QComboBox(activated=self._on_value_changed) - self.value_line = QLineEdit(editingFinished=self._on_value_changed) - self.value_line.setValidator(QDoubleValidator()) - value_stack.addWidget(self.value_combo) - value_stack.addWidget(self.value_line) - methodbox.layout().addLayout(value_stack) - - bgroup.buttonClicked[int].connect( - self.set_method_for_current_selection - ) - reset_button = push_button("Restore all to default", - clicked=self.reset_var_methods, - default=False, autoDefault=False) - - method_layout.addWidget(methodbox) - method_layout.addStretch(2) - method_layout.addWidget(reset_button) - self.varmethodbox = methodbox - self.varbgroup = bgroup - - commitbox = group_box("Commit", layout=layout(margins=0)) - - cwidget = commit_widget( - button_text="Commit", - button_default=True, - check_text="Commit on any change", - checked=self.autocommit, - clicked=self.commit - ) - - def toggle_auto_commit(b): - self.autocommit = b - if self.modified: - self.commit() - - cwidget.auto_commit_check.toggled[bool].connect(toggle_auto_commit) - commitbox.layout().addWidget(cwidget) - - self.addAction(cwidget.commit_action) - self.controlArea.layout().addWidget(commitbox) - - self.data = None - self.fitter = None - - def set_default_method(self, index): - """ - Set the current selected default imputation method. - """ - if self.default_method != index: - self.default_method = index - self.defbggroup.button(index).setChecked(True) - self._invalidate() - - def set_data(self, data): - self.closeContext() - self.clear() - self.data = data - if data is not None: - self.varmodel[:] = data.domain.variables - self.openContext(data.domain) - self.restore_state(self.variable_methods) - itemmodels.select_row(self.varview, 0) - - self.commit() - - def set_fitter(self, fitter): - self.fitter = fitter - - if self.data is not None and \ - any(state.model.short == "model" for state in - map(self.state_for_column, range(len(self.data.domain)))): - self.commit() - - def restore_state(self, state): - for i, var in enumerate(self.varmodel): - key = variable_key(var) - if key in state: - index = self.varmodel.index(i) - self.varmodel.setData(index, state[key], Qt.UserRole) - - def clear(self): - self.varmodel[:] = [] - self.variable_methods = {} - self.data = None - self.modified = False - - def state_for_column(self, column): - """ - #:: int -> State - Return the effective imputation state for `column`. - - :param int column: - :rtype State: - - """ - var = self.varmodel[column] - - state = self.variable_methods.get(variable_key(var), None) - if state is None or state.method == METHODS[0]: - state = State(METHODS[self.default_method], ()) - return state - - def imputer_for_column(self, column): - state = self.state_for_column(column) - data = self.data - var = data.domain[column] - method, params = state - if method.short == "leave": - return None - elif method.short == "avg": - return column_imputer_average(var, data) - elif method.short == "model": - fitter = self.fitter if self.fitter is not None else MeanFitter() - return column_imputer_by_model(var, data, fitter=fitter) - elif method.short == "random": - return column_imputer_random(var, data) - elif method.short == "value": - return column_imputer_defaults(var, data, float(params[0])) - elif method.short == "as_value": - return column_imputer_as_value(var, data) - else: - assert False - - def commit(self): - if self.data is not None: - states = [self.state_for_column(i) - for i in range(len(self.varmodel))] - - # Columns to filter unknowns by dropping rows. - filter_columns = [i for i, state in enumerate(states) - if state.method.short == "drop"] - - impute_columns = [i for i, state in enumerate(states) - if state.method.short not in ["drop", "leave"]] - - imputers = [(self.varmodel[i], self.imputer_for_column(i)) - for i in impute_columns] - - data = self.data - - if imputers: - table_imputer = ImputerModel(data.domain, dict(imputers)) - data = table_imputer(data) - - if filter_columns: - filter_ = data_filter.IsDefined(filter_columns) - data = filter_(data) - else: - data = None - - self.send("Data", data) - self.modified = False - - def _invalidate(self): - self.modified = True - if self.autocommit: - self.commit() - - def _on_var_selection_changed(self): - indexes = self.selection.selectedIndexes() - - vars = [self.varmodel[index.row()] for index in indexes] - defstate = State(METHODS[0], ()) - states = [self.variable_methods.get(variable_key(var), defstate) - for var in vars] - all_cont = all(isinstance(var, Orange.data.ContinuousVariable) - for var in vars) - states = list(unique(states)) - method = None - params = () - state = None - if len(states) == 1: - state = states[0] - method, params = state - mindex = METHODS.index(method) - self.varbgroup.button(mindex).setChecked(True) - elif self.varbgroup.checkedButton() is not None: - self.varbgroup.setExclusive(False) - self.varbgroup.checkedButton().setChecked(False) - self.varbgroup.setExclusive(True) - - values, enabled, stack_index = [], False, 0 - value, value_index = "0.0", 0 - if all_cont: - enabled, stack_index = True, 1 - if method is not None and method.short == "value": - value = params[0] - - elif len(vars) == 1 and \ - isinstance(vars[0], Orange.data.DiscreteVariable): - values, enabled, stack_index = vars[0].values, True, 0 - if method is not None and method.short == "value": - try: - value_index = values.index(params[0]) - except IndexError: - pass - - self.value_stack.setCurrentIndex(stack_index) - self.value_stack.setEnabled(enabled) - - if stack_index == 0: - self.value_combo.clear() - self.value_combo.addItems(values) - self.value_combo.setCurrentIndex(value_index) - else: - self.value_line.setText(value) - - def _on_value_changed(self): - # The "fixed" value in the widget has been changed by the user. - index = self.varbgroup.checkedId() - self.set_method_for_current_selection(index) - - def set_method_for_current_selection(self, methodindex): - indexes = self.selection.selectedIndexes() - self.set_method_for_indexes(indexes, methodindex) - - def set_method_for_indexes(self, indexes, methodindex): - method = METHODS[methodindex] - params = (None,) - if method.short == "value": - if self.value_stack.currentIndex() == 0: - value = self.value_combo.currentIndex() - else: - value = self.value_line.text() - params = (value, ) - elif method.short == "model": - params = ("model", ) - state = State(method, params) - - for index in indexes: - self.varmodel.setData(index, state, Qt.UserRole) - var = self.varmodel[index.row()] - self.variable_methods[variable_key(var)] = state - - self._invalidate() - - def reset_var_methods(self): - indexes = map(self.varmodel.index, range(len(self.varmodel))) - self.set_method_for_indexes(indexes, 0) - - -def variable_key(variable): - return (vartype(variable), variable.name) - - -def unique(iterable): - seen = set() - for el in iterable: - if el not in seen: - seen.add(el) - yield el - - -def translate_domain(X, domain): - if isinstance(domain, tuple): - domain = Orange.data.Domain(domain) - - if X.domain != domain: - if isinstance(X, Orange.data.Table): - X = Orange.data.Table.from_table(domain, X) - elif isinstance(X, Orange.data.Instance): - X = domain.convert(X) - else: - # Storage?? - raise TypeError - - return X - - -def column_imputer(variable, table): - """ - column_imputer :: Variable -> Table -> ColumnImputerModel - """ - pass - - -class ColumnImputerModel(object): - def __init__(self, domain, codomain, transformers): - if isinstance(domain, tuple): - domain = Orange.data.Domain(domain) - if isinstance(codomain, tuple): - codomain = Orange.data.Domain(codomain) - - self.domain = domain - self.codomain = codomain - self.transformers = transformers - - def __call__(self, data): - raise NotImplementedError() - - -def learn_model_for(fitter, variable, data): - """ - Learn a model for `variable` - """ - attrs = [attr for attr in data.domain.attributes - if attr is not variable] - domain = Orange.data.Domain(attrs, (variable,)) - data = Orange.data.Table.from_table(domain, data) - return fitter(data) - - -from Orange.classification.naive_bayes import BayesLearner - - -def column_imputer_by_model(variable, table, *, fitter=BayesLearner()): - model = learn_model_for(fitter, variable, table) - assert model.domain.class_vars == (variable,) - return ColumnImputerFromModel(table.domain, model.domain.class_vars, model) - - -class ColumnImputerFromModel(ColumnImputerModel): - def __init__(self, domain, codomain, model): - transform = ModelTransform(model.domain.class_var, model) - super().__init__(model.domain, codomain, (transform,)) - self.model = model - - def __call__(self, data): - trans = self.transformers[0] - filter_ = data_filter.IsDefined([trans.variable], negate=True) - data_with_unknowns = filter_(data) - values = trans(data_with_unknowns) - - domain = Orange.data.Domain([trans.variable]) - X = Orange.data.Table.from_table(domain, data) - X.X[numpy.isnan(X), :] = values - return X - - -from Orange.statistics import basic_stats -from Orange.statistics import distribution - - -def column_imputer_defaults(variable, table, default): - transform = ReplaceUnknowns(variable, default) - return ColumnImputerDefaults(table.domain, (variable,), - [transform], [default]) - - -def column_imputer_maximal(variable, table): - stats = basic_stats.BasicStats(table, variable) - return column_imputer_defaults(variable, table, stats.max) - - -def column_imputer_minimal(variable, table): - stats = basic_stats.BasicStats(table, variable) - return column_imputer_defaults(variable, table, stats.min) - - -def column_imputer_average(variable, table): - stats = basic_stats.BasicStats(table, variable) - return column_imputer_defaults(variable, table, stats.mean) - - -def column_imputer_modus(variable, table): - stat = distribution.get_distribution(table, variable) - column_imputer_defaults(variable, table, stat.modus()) - - -class ColumnImputerDefaults(ColumnImputerModel): - def __init__(self, domain, codomain, transformers, defaults): - super().__init__(domain, codomain, transformers) - self.defaults = defaults - - def __call__(self, data, weight=None): - data = translate_domain(data, self.codomain) - defaults, X = numpy.broadcast_arrays([self.defaults], data.X) - X = numpy.where(numpy.isnan(X), defaults, X) - return Orange.data.Table.from_numpy(self.codomain, X) - - -def column_imputer_as_value(variable, table): - if isinstance(variable, Orange.data.DiscreteVariable): - fmt = "{var.name}" - value = "N/A" - var = Orange.data.DiscreteVariable( - fmt.format(var=variable), - values=variable.values + [value], - base_value=variable.base_value - ) - var.get_value_from = Lookup( - variable, - numpy.arange(len(variable.values), dtype=int), - unknown=len(variable.values) - ) - codomain = [var] - transformers = [var.get_value_from] - elif isinstance(variable, Orange.data.ContinuousVariable): - fmt = "{var.name}_def" - var = Orange.data.DiscreteVariable( - fmt.format(var=variable), - values=("undef", "def"), - ) - var.get_value_from = IsDefined(variable) - codomain = [variable, var] - stats = basic_stats.BasicStats(table, variable) - transformers = [ReplaceUnknowns(variable, stats.mean), - var.get_value_from] - else: - raise TypeError(type(variable)) - - return ColumnImputerAsValue( - table.domain, Orange.data.Domain(codomain), transformers) - - -class ColumnImputerAsValue(ColumnImputerModel): - def __init__(self, domain, codomain, transformers): - super().__init__(domain, codomain, transformers) - - def __call__(self, data, ): - data = translate_domain(data, self.domain) - data = translate_domain(data, self.codomain) - - variable = self.codomain[0] - if isinstance(variable, Orange.data.ContinuousVariable): - tr = self.transformers[0] - assert isinstance(tr, ReplaceUnknowns) - c = tr(data[:, variable]) - cindex = data.domain.index(variable) - data.X[:, cindex] = c - return data - - -def column_imputer_random(variable, data): - if isinstance(variable, Orange.data.DiscreteVariable): - transformer = RandomTransform(variable) - elif isinstance(variable, Orange.data.ContinuousVariable): - transformer = RandomTransform(variable) - return RandomImputerModel((variable,), (variable,), (transformer,)) - - -class RandomImputerModel(ColumnImputerModel): - def __call__(self, data): - data = translate_domain(data, self.codomain) - trans = self.transformers[0] - values = trans(data).reshape((-1, 1)) - - X = data[:, trans.variable].X - values = numpy.where(numpy.isnan(X), values, X) - return Orange.data.Table.from_numpy(self.codomain, values) - - -# Why even need this? -class NullColumnImputer(ColumnImputerModel): - def __init__(self, domain, codomain, transformers): - super().__init__(domain, codomain, transformers) - - def __call__(self, data, weight=None): - data = translate_domain(data, self.codomain) - return data - - -from functools import reduce -import numpy -from Orange.feature.transformation import \ - ColumnTransformation, Lookup, Identity - - -class IsDefined(ColumnTransformation): - def _transform(self, c): - return ~numpy.isnan(c) - - -class Lookup(Lookup): - def __init__(self, variable, lookup_table, unknown=None): - super().__init__(variable, lookup_table) - self.unknown = unknown - - def _transform(self, column): - if self.unknown is None: - unknown = numpy.nan - else: - unknown = self.unknown - - mask = numpy.isnan(column) - column_valid = numpy.where(mask, 0, column) - values = self.lookup_table[numpy.array(column_valid, dtype=int)] - return numpy.where(mask, unknown, values) - - -class ReplaceUnknowns(ColumnTransformation): - def __init__(self, variable, value=0): - super().__init__(variable) - self.value = value - - def _transform(self, c): - return numpy.where(numpy.isnan(c), self.value, c) - - -class RandomTransform(ColumnTransformation): - def __init__(self, variable, dist=None): - super().__init__(variable) - self.dist = dist - - def _transform(self, c): - if isinstance(self.variable, Orange.data.DiscreteVariable): - if self.dist is not None: - pass - else: - c = numpy.random.randint(len(self.variable.values), - size=c.shape) - else: - if self.dist is not None: - pass - else: - c = numpy.random.normal(size=c.shape) - return c - - -class ModelTransform(ColumnTransformation): - def __init__(self, variable, model): - super().__init__(variable) - self.model = model - - def __call__(self, data): - return self.model(data) - - -# Rename to TableImputer (Model?) -class ImputerModel(object): - """ - A fitted Imputation model. - - :param domain: - Imputer domain. - :param columnimputers: - A mapping of columns in `domain` to a :class:`ColumnImputerModel`. - - """ - def __init__(self, domain, columnimputers={}): - self.columnimputers = columnimputers - self.domain = domain - - col_models = [(var, columnimputers.get(var, None)) - for var in domain.variables] - # variables for the codomain - codomain_attrs = [] - codomain_class_vars = [] - - # column imputers for all variables in the domain - col_imputers = [] - for i, (var, imp) in enumerate(col_models): - if isinstance(imp, ColumnImputerModel): - pass - elif isinstance(imp, Orange.classification.Model): - imp = ColumnImputerFromModel(domain, imp.class_vars, imp) - elif isinstance(imp, collections.Callable): - raise NotImplementedError - imp = ColumnImputerFromCallable(var, imp) - elif imp is None: - imp = NullColumnImputer(domain, (var,), (Identity(var),)) - - col_imputers.append((var, imp)) - - if i < len(domain.attributes): - codomain_attrs.extend(imp.codomain) - else: - codomain_class_vars.extend(imp.codomain) - - self.codomain = Orange.data.Domain( - codomain_attrs, codomain_class_vars, domain.metas - ) - - self.transformers = [] - self.columnimputers = dict(col_imputers) - for var, colimp in col_imputers: - self.transformers.append( - (var, tuple(zip(colimp.codomain, colimp.transformers))) - ) - - def __call__(self, X, weight=None): - X = translate_domain(X, self.domain) - Xp = translate_domain(X, self.codomain) - - if Xp is X: - Xp = Xp.copy() - - nattrs = len(Xp.domain.attributes) - for var in X.domain: - col_imputer = self.columnimputers[var] - if isinstance(col_imputer, NullColumnImputer): - continue - - if not self._is_var_transform(col_imputer): - cols = col_imputer(X) - for i, cv in enumerate(col_imputer.codomain): - cvindex = Xp.domain.index(cv) - if cvindex < len(Xp.domain.attributes): - Xp.X[:, cvindex] = cols.X[:, i] - else: - Xp.Y[:, nattrs - cvindex] = cols.X[:, i] - - return Xp - - def _is_var_transform(self, imputer): - """ - Is `imputer` implemented as a Varible.get_value_from. - - """ - for var, t in zip(imputer.codomain, imputer.transformers): - if var.get_value_from and var.get_value_from is t: - pass - else: - return False - return False - - -""" - -Imputation: - Should be a standard X -> X' transform. - i.e. Given an X of domain D returns the X' of a domain - D' where the number of instances in X' might not be the same - and the D' might not be the same. - - F = Imputer(X) - D' = F.codomain - rows = F.filter(X) - X' = F(X) - assert X'.domain == D' - assert X'.rowids == rows - - Issue 1: The filter might/should be a separate step. I.e. - - F = Imputer(X) o Filter(X, ...) # Imputer(X) | Filter(X) - - F.domain, F.codomain - reduce((+), map(F.transform, F.domain)) == F.codomain - - A ColumnImputer is a mapping [var] -> [var', [var1', ...]]. - The mapping is specified with either var'.get_value_from or - by ColumnImputer i.e. ColumnImputer must contain a Transformation for - each codomain variable - data transform = Transform of Variable * (Variable * Transformation) list - i.e. - source var -> [(new_var, Transformation), ...] - -""" - -from Orange.classification import Fitter, Model - - -class MeanFitter(Fitter): - def fit_storage(self, data): - dist = distribution.get_distribution(data, data.domain.class_var) - domain = Orange.data.Domain((), (data.domain.class_var,)) - return MeanPredictor(domain, dist) - - -class MeanPredictor(Model): - def __init__(self, domain, distribution): - super().__init__(domain) - self.distribution = distribution - self.mean = distribution.mean() - - def predict(self, X): - return numpy.zeros(len(X)) + self.mean - - -import unittest - - -class Test(unittest.TestCase): - def test_impute_defaults(self): - nan = numpy.nan - data = [ - [1.0, nan, 0.0], - [2.0, 1.0, 3.0], - [nan, nan, nan] - ] - data = Orange.data.Table.from_numpy(None, numpy.array(data)) - - cimp1 = column_imputer_average(data.domain[0], data) - self.assertIsInstance(cimp1.transformers[0], ReplaceUnknowns) - trans = cimp1.transformers[0] - self.assertEqual(trans.value, 1.5) - self.assertTrue((trans(data) == [1.0, 2.0, 1.5]).all()) - - cimp2 = column_imputer_maximal(data.domain[1], data) - trans = cimp2.transformers[0] - self.assertTrue((trans(data) == [1.0, 1.0, 1.0]).all()) - - cimp3 = column_imputer_minimal(data.domain[2], data) - trans = cimp3.transformers[0] - self.assertTrue((trans(data) == [0.0, 3.0, 0.0]).all()) - - imputer = ImputerModel( - data.domain, - {data.domain[0]: cimp1, - data.domain[1]: cimp2, - data.domain[2]: cimp3} - ) - idata = imputer(data) - self.assertClose(idata.X, - [[1.0, 1.0, 0.0], - [2.0, 1.0, 3.0], - [1.5, 1.0, 0.0]]) - - def test_impute_as_value(self): - nan = numpy.nan - data = [ - [1.0, nan, 0.0], - [2.0, 1.0, 3.0], - [nan, nan, nan] - ] - domain = Orange.data.Domain( - (Orange.data.DiscreteVariable("A", values=["0", "1", "2"]), - Orange.data.ContinuousVariable("B"), - Orange.data.ContinuousVariable("C")) - ) - data = Orange.data.Table.from_numpy(domain, numpy.array(data)) - - cimp1 = column_imputer_as_value(domain[0], data) - self.assertEqual(len(cimp1.codomain), 1) - self.assertEqual(cimp1.codomain[0].name, "A") - self.assertEqual(cimp1.codomain[0].values, ["0", "1", "2", "N/A"]) - self.assertEqual(len(cimp1.transformers), 1) - - trans = cimp1.transformers[0] - self.assertClose(trans(data), [1.0, 2.0, 3.0]) - self.assertEqual(list(inst[0] for inst in cimp1(data)), - ["1", "2", "N/A"]) - - cimp2 = column_imputer_as_value(domain[1], data) - self.assertEqual(len(cimp2.transformers), 2) - self.assertEqual(cimp2.codomain[0], domain[1]) - self.assertIsInstance(cimp2.codomain[1], Orange.data.DiscreteVariable) - self.assertEqual(cimp2.codomain[1].values, ["undef", "def"]) - - self.assertClose(cimp2.transformers[0](data), [1.0, 1.0, 1.0]) - self.assertClose(cimp2.transformers[1](data), [0, 1, 0]) - - idata = cimp2(data) - self.assertEqual(idata.domain, cimp2.codomain) - self.assertClose(idata.X, [[1, 0], [1, 1], [1, 0]]) - - cimp3 = column_imputer_as_value(domain[2], data) - imputer = ImputerModel( - domain, - {var: cimp for (var, cimp) in zip(domain, (cimp1, cimp2, cimp3))} - ) - idata = imputer(data) - self.assertEqual( - reduce(tuple.__add__, - (tuple(cimp.codomain) for cimp in (cimp1, cimp2, cimp3)), - ), - tuple(idata.domain) - ) - - self.assertClose( - idata.X, - [[1, 1.0, 0, 0.0, 1], - [2, 1.0, 1, 3.0, 1], - [3, 1.0, 0, 1.5, 0]] - ) - - def test_impute_by_model(self): - from Orange.classification.majority import MajorityFitter - - nan = numpy.nan - data = [ - [1.0, nan, 0.0], - [2.0, 1.0, 3.0], - [nan, nan, nan] - ] - domain = Orange.data.Domain( - (Orange.data.DiscreteVariable("A", values=["0", "1", "2"]), - Orange.data.ContinuousVariable("B"), - Orange.data.ContinuousVariable("C")) - ) - data = Orange.data.Table.from_numpy(domain, numpy.array(data)) - - cimp1 = column_imputer_by_model(domain[0], data, - fitter=MajorityFitter()) - self.assertEqual(tuple(cimp1.codomain), (domain[0],)) - - cimp2 = column_imputer_by_model(domain[1], data, fitter=MeanFitter()) - cimp3 = column_imputer_by_model(domain[2], data, fitter=MeanFitter()) - - imputer = ImputerModel( - data.domain, - {data.domain[0]: cimp1, - data.domain[1]: cimp2, - data.domain[2]: cimp3} - ) - idata = imputer(data) - self.assertClose(idata.X, - [[1.0, 1.0, 0.0], - [2.0, 1.0, 3.0], - [1.0, 1.0, 1.5]]) - - def test_impute_random(self): - nan = numpy.nan - data = [ - [1.0, nan, 0.0], - [2.0, 1.0, 3.0], - [nan, nan, nan] - ] - domain = Orange.data.Domain( - (Orange.data.DiscreteVariable("A", values=["0", "1", "2"]), - Orange.data.ContinuousVariable("B"), - Orange.data.ContinuousVariable("C")) - ) - data = Orange.data.Table.from_numpy(domain, numpy.array(data)) - - cimp1 = column_imputer_random(domain[0], data) - self.assertTrue(not numpy.any(numpy.isnan(cimp1(data).X))) - - cimp2 = column_imputer_random(domain[1], data) - self.assertTrue(not numpy.any(numpy.isnan(cimp2(data).X))) - - cimp3 = column_imputer_random(domain[2], data) - self.assertTrue(not numpy.any(numpy.isnan(cimp3(data).X))) - - imputer = ImputerModel( - data.domain, - {data.domain[0]: cimp1, - data.domain[1]: cimp2, - data.domain[2]: cimp3} - ) - idata = imputer(data) - self.assertTrue(not numpy.any(numpy.isnan(idata.X))) - - definedmask = ~numpy.isnan(data.X) - self.assertClose(data.X[definedmask], - idata.X[definedmask]) - - def assertClose(self, X, Y, delta=1e-9, msg=None): - X, Y = numpy.asarray(X), numpy.asarray(Y) - if not (numpy.abs(X - Y) <= delta).all(): - standardMsg = "%s != %s to within delta %f" % (X, Y, delta) - msg = self._formatMessage(msg, standardMsg) - raise self.failureException(msg) - - -if __name__ == "__main__": - app = QtGui.QApplication([]) - w = OWImpute() - w.show() - data = Orange.data.Table("brown-selected") - w.set_data(data) - app.exec_() diff --git a/Orange/widgets/data/owmergedata.py b/Orange/widgets/data/owmergedata.py deleted file mode 100644 index d19dc9d2bfc..00000000000 --- a/Orange/widgets/data/owmergedata.py +++ /dev/null @@ -1,301 +0,0 @@ -import math -import itertools -from collections import defaultdict - -from PyQt4 import QtGui, QtCore -import numpy - -import Orange - -from Orange.widgets import widget -from Orange.widgets import gui -from Orange.widgets.utils import itemmodels - - -class OWMergeData(widget.OWWidget): - name = "Merge Data" - description = "Merges data sets based on values of selected attributes." - icon = "icons/MergeData.svg" - priority = 1110 - - inputs = [("Data A", Orange.data.Table, "setDataA", widget.Default), - ("Data B", Orange.data.Table, "setDataB")] - outputs = [("Merged Data A+B", Orange.data.Table, ), - ("Merged Data B+A", Orange.data.Table, )] - - want_main_area = False - - def __init__(self, parent=None): - super().__init__(parent) - - # data - self.dataA = None - self.dataB = None - - # GUI - w = QtGui.QWidget(self) - self.controlArea.layout().addWidget(w) - grid = QtGui.QGridLayout() - grid.setMargin(0) - w.setLayout(grid) - - # attribute A selection - boxAttrA = gui.widgetBox( - self, self.tr("Attribute A"), addToLayout=False) - grid.addWidget(boxAttrA, 0, 0) - self.attrViewA = QtGui.QListView( - selectionMode=QtGui.QListView.SingleSelection - ) - - self.attrModelA = itemmodels.VariableListModel() - self.attrViewA.setModel(self.attrModelA) - self.attrViewA.selectionModel().selectionChanged.connect( - self._selectedAttrAChanged) - - boxAttrA.layout().addWidget(self.attrViewA) - - # attribute B selection - boxAttrB = gui.widgetBox( - self, self.tr("Attribute B"), addToLayout=False) - grid.addWidget(boxAttrB, 0, 1) - self.attrViewB = QtGui.QListView( - selectionMode=QtGui.QListView.SingleSelection - ) - - self.attrModelB = itemmodels.VariableListModel() - self.attrViewB.setModel(self.attrModelB) - self.attrViewB.selectionModel().selectionChanged.connect( - self._selectedAttrBChanged) - - boxAttrB.layout().addWidget(self.attrViewB) - - # info A - boxDataA = gui.widgetBox( - self, self.tr("Data A Input"), addToLayout=False) - grid.addWidget(boxDataA, 1, 0) - self.infoBoxDataA = gui.widgetLabel(boxDataA, self.dataInfoText(None)) - - # info B - boxDataB = gui.widgetBox( - self, self.tr("Data B Input"), addToLayout=False) - grid.addWidget(boxDataB, 1, 1) - self.infoBoxDataB = gui.widgetLabel(boxDataB, self.dataInfoText(None)) - - # resize - self.resize(400, 500) - - def setDataA(self, data): - #self.closeContext() - self.dataA = data - if data is not None: - self.attrModelA[:] = allvars(data) - else: - self.attrModelA[:] = [] - - self.infoBoxDataA.setText(self.dataInfoText(data)) - - def setDataB(self, data): - #self.closeContext() - self.dataB = data - if data is not None: - self.attrModelB[:] = allvars(data) - else: - self.attrModelB[:] = [] - - self.infoBoxDataB.setText(self.dataInfoText(data)) - - def handleNewSignals(self): - self._invalidate() - - def dataInfoText(self, data): - ninstances = 0 - nvariables = 0 - if data is not None: - ninstances = len(data) - nvariables = len(data.domain) - - instances = self.tr("%n instance(s)", None, ninstances) - attributes = self.tr("%n variable(s)", None, nvariables) - return "\n".join([instances, attributes]) - - def selectedIndexA(self): - return selected_row(self.attrViewA) - - def selectedIndexB(self): - return selected_row(self.attrViewB) - - def commit(self): - indexA = self.selectedIndexA() - indexB = self.selectedIndexB() - if indexA is None or indexB is None: - return - - varA = self.attrModelA[indexA] - varB = self.attrModelB[indexB] - - AB = merge(self.dataA, varA, self.dataB, varB) - BA = merge(self.dataB, varB, self.dataA, varA) - - self.send("Merged Data A+B", AB) - self.send("Merged Data B+A", BA) - - def _selectedAttrAChanged(self, *args): - self._invalidate() - - def _selectedAttrBChanged(self, *args): - self._invalidate() - - def _invalidate(self): - self.commit() - - -def selected_row(view): - rows = view.selectionModel().selectedRows() - if rows: - return rows[0].row() - else: - return None - - -def allvars(data): - return data.domain.attributes + data.domain.class_vars + data.domain.metas - - -def merge(A, varA, B, varB): - join_indices = left_join_indices(A, B, (varA,), (varB,)) - seen_set = set() - - def seen(val): - return val in seen_set or bool(seen_set.add(val)) - - merge_indices = [(i, j) for i, j in join_indices if not seen(i)] - - all_vars_A = set(A.domain.variables + A.domain.metas) - iter_vars_B = itertools.chain( - enumerate(B.domain.variables), - ((-i, m) for i, m in enumerate(B.domain.metas, start=1)) - ) - reduced_indices_B = [i for i, var in iter_vars_B if not var in all_vars_A] - reduced_B = B[:, list(reduced_indices_B)] - - return join_table_by_indices(A, reduced_B, merge_indices) - - -def group_table_indices(table, key_vars, exclude_unknown=False): - """ - Group table indices based on values of selected columns (`key_vars`). - - Return a dictionary mapping all unique value combinations (keys) - into a list of indices in the table where they are present. - - :param Orange.data.Table table: - :param list-of-Orange.data.FeatureDescriptor] key_vars: - :param bool exclude_unknown: - - """ - groups = defaultdict(list) - for i, inst in enumerate(table): - key = [inst[a] for a in key_vars] - if exclude_unknown and any(math.isnan(k) for k in key): - continue - key = tuple([str(k) for k in key]) - groups[key].append(i) - return groups - - -def left_join_indices(table1, table2, vars1, vars2): - key_map1 = group_table_indices(table1, vars1) - key_map2 = group_table_indices(table2, vars2) - indices = [] - for i, inst in enumerate(table1): - key = tuple([str(inst[v]) for v in vars1]) - if key in key_map1 and key in key_map2: - for j in key_map2[key]: - indices.append((i, j)) - else: - indices.append((i, None)) - return indices - - -def right_join_indices(table1, table2, vars1, vars2): - indices = left_join_indices(table2, table1, vars2, vars1) - return [(j, i) for i, j in indices] - - -def inner_join_indices(table1, table2, vars1, vars2): - indices = left_join_indices(table1, table2, vars1, vars2) - return [(i, j) for i, j in indices if j is not None] - - -def left_join(left, right, left_vars, right_vars): - """ - Left join `left` and `right` on values of `left/right_vars`. - """ - indices = left_join_indices(left, right, left_vars, right_vars) - return join_table_by_indices(left, right, indices) - - -def right_join(left, right, left_vars, right_vars): - """ - Right join left and right on attributes attr1 and attr2 - """ - indices = right_join_indices(left, right, left_vars, right_vars) - return join_table_by_indices(left, right, indices) - - -def inner_join(left, right, left_vars, right_vars): - indices = inner_join_indices(left, right, left_vars, right_vars) - return join_table_by_indices(left, right, indices) - - -def join_table_by_indices(left, right, indices): - domain = Orange.data.Domain( - left.domain.attributes + right.domain.attributes, - left.domain.class_vars + right.domain.class_vars, - left.domain.metas + right.domain.metas - ) - X = join_array_by_indices(left.X, right.X, indices) - Y = join_array_by_indices(left.Y, right.Y, indices) - metas = join_array_by_indices(left.metas, right.metas, indices) - - return Orange.data.Table.from_numpy(domain, X, Y, metas) - - -def join_array_by_indices(left, right, indices, masked=float("nan")): - left_masked = [masked] * left.shape[1] - right_masked = [masked] * right.shape[1] - - leftparts = [] - rightparts = [] - for i, j in indices: - if i is not None: - leftparts.append(left[i]) - else: - leftparts.append(left_masked) - if j is not None: - rightparts.append(right[j]) - else: - rightparts.append(right_masked) - - def hstack_blocks(blocks): - return numpy.hstack(list(map(numpy.vstack, blocks))) - - return hstack_blocks((leftparts, rightparts)) - - -def test(): - app = QtGui.QApplication([]) - - w = OWMergeData() - zoo = Orange.data.Table("zoo") - A = zoo[:, [0, 1, 2, "type", -1]] - B = zoo[:, [3, 4, 5, "type", -1]] - w.setDataA(A) - w.setDataB(B) - w.handleNewSignals() - w.show() - app.exec_() - - -if __name__ == "__main__": - test() diff --git a/Orange/widgets/data/owpurgedomain.py b/Orange/widgets/data/owpurgedomain.py deleted file mode 100644 index c03553f5cdc..00000000000 --- a/Orange/widgets/data/owpurgedomain.py +++ /dev/null @@ -1,411 +0,0 @@ -from PyQt4 import QtGui - -import Orange - -from Orange.widgets import gui, widget -from Orange.widgets.settings import Setting - -#: Purging flags -SortValues, RemoveConstant, RemoveUnusedValues = 1, 2, 4 - - -class OWPurgeDomain(widget.OWWidget): - name = "Purge Domain" - description = "Removes redundant values and attributes, sorts values." - icon = "icons/PurgeDomain.svg" - category = "Data" - keywords = ["data", "purge", "domain"] - - inputs = [("Data", Orange.data.Table, "setData")] - outputs = [("Data", Orange.data.Table)] - - removeValues = Setting(1) - removeAttributes = Setting(1) - removeClassAttribute = Setting(1) - removeClasses = Setting(1) - autoSend = Setting(False) - sortValues = Setting(True) - sortClasses = Setting(True) - - want_main_area = False - - def __init__(self, parent=None): - super().__init__(parent) - self.data = None - - self.preRemoveValues = 1 - self.preRemoveClasses = 1 - self.dataChanged = False - - self.removedAttrs = "-" - self.reducedAttrs = "-" - self.resortedAttrs = "-" - self.classAttr = "-" - - boxAt = gui.widgetBox(self.controlArea, "Attributes") - gui.checkBox(boxAt, self, 'sortValues', 'Sort attribute values', - callback=self.optionsChanged) - gui.separator(boxAt, 2) - rua = gui.checkBox( - boxAt, self, "removeAttributes", - "Remove attributes with less than two values", - callback=self.removeAttributesChanged) - ruv = gui.checkBox( - gui.indentedBox(boxAt, sep=gui.checkButtonOffsetHint(rua)), - self, - "removeValues", - "Remove unused attribute values", - callback=self.optionsChanged - ) - rua.disables = [ruv] - rua.makeConsistent() - - boxAt = gui.widgetBox(self.controlArea, "Classes", addSpace=True) - gui.checkBox(boxAt, self, 'sortClasses', 'Sort classes', - callback=self.optionsChanged) - gui.separator(boxAt, 2) - rua = gui.checkBox( - boxAt, self, "removeClassAttribute", - "Remove class attribute if there are less than two classes", - callback=self.removeClassesChanged - ) - ruv = gui.checkBox( - gui.indentedBox(boxAt, sep=gui.checkButtonOffsetHint(rua)), - self, - "removeClasses", - "Remove unused class values", - callback=self.optionsChanged - ) - rua.disables = [ruv] - rua.makeConsistent() - - box3 = gui.widgetBox(self.controlArea, 'Statistics', addSpace=True) - gui.label(box3, self, "Removed attributes: %(removedAttrs)s") - gui.label(box3, self, "Reduced attributes: %(reducedAttrs)s") - gui.label(box3, self, "Resorted attributes: %(resortedAttrs)s") - gui.label(box3, self, "Class attribute: %(classAttr)s") - - box2 = gui.widgetBox(self.controlArea, "Send") - btSend = gui.button(box2, self, "Send data", - callback=self.process, - default=True) - cbAutoSend = gui.checkBox(box2, self, "autoSend", "Send automatically") - - gui.setStopper(self, btSend, cbAutoSend, "dataChanged", self.process) - - gui.rubber(self.controlArea) - - def setData(self, dataset): - if dataset is not None: - self.data = dataset - self.process() - else: - self.removedAttrs = "-" - self.reducedAttrs = "-" - self.resortedAttrs = "-" - self.classAttr = "-" - self.send("Data", None) - self.data = None - self.dataChanged = False - - def removeAttributesChanged(self): - if not self.removeAttributes: - self.preRemoveValues = self.removeValues - self.removeValues = False - else: - self.removeValues = self.preRemoveValues - self.optionsChanged() - - def removeClassesChanged(self): - if not self.removeClassAttribute: - self.preRemoveClasses = self.removeClasses - self.removeClasses = False - else: - self.removeClasses = self.preRemoveClasses - self.optionsChanged() - - def optionsChanged(self): - if self.autoSend: - self.process() - else: - self.dataChanged = True - - def process(self): - if self.data is None: - return - - self.reducedAttrs = 0 - self.removedAttrs = 0 - self.resortedAttrs = 0 - - attr_flags = sum([SortValues * self.sortValues, - RemoveConstant * self.removeAttributes, - RemoveUnusedValues * self.removeValues]) - - class_flags = sum([SortValues * self.sortClasses, - RemoveConstant * self.removeClassAttribute, - RemoveUnusedValues * self.removeClasses]) - domain = self.data.domain - - attrs_state = [purge_var_M(var, self.data, attr_flags) - for var in domain.attributes] - class_vars_state = [purge_var_M(var, self.data, class_flags) - for var in domain.class_vars] - - nremoved = len([st for st in attrs_state if is_removed(st)]) - nreduced = len([st for st in attrs_state - if not is_removed(st) and is_reduced(st)]) - nsorted = len([st for st in attrs_state - if not is_removed(st) and is_sorted(st)]) - - self.removedAttrs = nremoved - self.reducedAttrs = nreduced - self.resortedAttrs = nsorted - - if class_vars_state: - # TODO: Extend the reporting for multi-class domains - st = class_vars_state[0] - if isinstance(st, Var): - self.classAttr = "Class is unchanged" - elif is_removed(st): - self.classAttr = "Class is removed" - else: - status = " and ".join( - [s for s, predicate in zip(["sorted", "reduced"], - [is_sorted, is_reduced]) - if predicate(st)] - ) - self.classAttr = "Class is " + status - - attrs = tuple(merge_transforms(st).var for st in attrs_state - if not is_removed(st)) - class_vars = tuple(merge_transforms(st).var for st in class_vars_state - if not is_removed(st)) - - newdomain = Orange.data.Domain(attrs, class_vars, domain.metas) - if newdomain.attributes != domain.attributes or \ - newdomain.class_vars != domain.class_vars: - data = Orange.data.Table.from_table(newdomain, self.data) - else: - data = self.data - - self.send("Data", data) - - self.dataChanged = False - - -import numpy -from collections import namedtuple - -# Define a simple Purge expression 'language'. -#: A input variable (leaf expression). -Var = namedtuple("Var", ["var"]) -#: Removed variable (can only ever be present as a root node). -Removed = namedtuple("Removed", ["sub", "var"]) -#: A reduced variable -Reduced = namedtuple("Reduced", ["sub", "var"]) -#: A sorted variable -Sorted = namedtuple("Sorted", ["sub", "var"]) -#: A general (lookup) transformed variable. -#: (this node is returned as a result of `merge` which joins consecutive -#: Removed/Reduced nodes into a single Transformed node) -Transformed = namedtuple("Transformed", ["sub", "var"]) - - -def is_var(exp): - """Is `exp` a `Var` node.""" - return isinstance(exp, Var) - -def is_removed(exp): - """Is `exp` a `Removed` node.""" - return isinstance(exp, Removed) - -def _contains(exp, cls): - """Does `node` contain a sub node of type `cls`""" - if isinstance(exp, cls): - return True - elif isinstance(exp, Var): - return False - else: - return _contains(exp.sub, cls) - -def is_reduced(exp): - """Does `exp` contain a `Reduced` node.""" - return _contains(exp, Reduced) - -def is_sorted(exp): - """Does `exp` contain a `Reduced` node.""" - return _contains(exp, Sorted) - - -def merge_transforms(exp): - """ - Merge consecutive Removed, Reduced or Transformed nodes. - - .. note:: Removed nodes are returned unchanged. - - """ - if isinstance(exp, (Var, Removed)): - return exp - elif isinstance(exp, (Reduced, Sorted, Transformed)): - prev = merge_transforms(exp.sub) - if isinstance(prev, (Reduced, Sorted, Transformed)): - B = exp.var.get_value_from - assert isinstance(B, Lookup) - A = B.variable.get_value_from - assert isinstance(A, Lookup) - - new_var = Orange.data.DiscreteVariable( - exp.var.name, - values=exp.var.values, - ordered=exp.var.ordered - ) - new_var.get_value_from = merge_lookup(A, B) - assert isinstance(prev.sub, Var) - return Transformed(prev.sub, new_var) - else: - assert prev is exp.sub - return exp - else: - raise TypeError - - -def purge_var_M(var, data, flags): - state = Var(var) - if flags & RemoveConstant: - var = remove_constant(state.var, data) - if var is None: - return Removed(state, state.var) - - if isinstance(state.var, Orange.data.DiscreteVariable): - if flags & RemoveUnusedValues: - newattr = remove_unused_values(state.var, data) - - if newattr is not state.var: - state = Reduced(state, newattr) - - if flags & RemoveConstant and len(state.var.values) < 2: - return Removed(state, state.var) - - if flags & SortValues: - newattr = sort_var_values(state.var) - if newattr is not state.var: - state = Sorted(state, newattr) - - return state - - -def purge_domain(data, attribute_flags=RemoveConstant | RemoveUnusedValues, - class_flags=RemoveConstant | RemoveUnusedValues): - - attrs = [purge_var_M(var, data, attribute_flags) - for var in data.domain.attributes] - class_vars = [purge_var_M(var, data, class_flags) - for var in data.domain.class_vars] - - attrs = [var for var in attrs if not is_removed(var)] - class_vars = [var for var in class_vars if not is_removed(var)] - attrs = [merge_transforms(var).var for var in attrs] - class_vars = [merge_transforms(var).var for var in class_vars] - - return Orange.data.Domain(attrs, class_vars, data.domain.metas) - - -def has_at_least_two_values(data, var): - ((dist, _), ) = data._compute_distributions([var]) - if isinstance(var, Orange.data.ContinuousVariable): - dist = dist[1, :] - return numpy.sum(dist > 0.0) > 1 - - -def remove_constant(var, data): - if isinstance(var, Orange.data.ContinuousVariable): - if not has_at_least_two_values(data, var): - return None - else: - return var - elif isinstance(var, Orange.data.DiscreteVariable): - if len(var.values) < 2: - return None - else: - return var - else: - return var - - -def remove_unused_values(var, data): - column_data = Orange.data.Table.from_table( - Orange.data.Domain([var]), - data - ) - array = column_data.X.ravel() - mask = numpy.isfinite(array) - unique = numpy.array(numpy.unique(array[mask]), dtype=int) - - if len(unique) == len(var.values): - return var - - used_values = [var.values[i] for i in unique] - new_var = Orange.data.DiscreteVariable( - "R_{}".format(var.name), - values=used_values - ) - translation_table = numpy.array([numpy.NaN] * len(var.values)) - translation_table[unique] = range(len(new_var.values)) - - if 0 >= var.base_value < len(var.values): - base = translation_table[var.base_value] - if numpy.isfinite(base): - new_var.base_value = int(base) - - new_var.get_value_from = Lookup(var, translation_table) - return new_var - - -def sort_var_values(var): - newvalues = list(sorted(var.values)) - - if newvalues == list(var.values): - return var - - translation_table = numpy.array( - [float(newvalues.index(value)) for value in var.values] - ) - - newvar = Orange.data.DiscreteVariable(var.name, values=newvalues) - newvar.get_value_from = Lookup(var, translation_table) - return newvar - -from Orange.feature.transformation import Lookup - - -class Lookup(Lookup): - def _transform(self, column): - mask = numpy.isnan(column) - column_valid = numpy.where(mask, 0, column) - values = self.lookup_table[numpy.array(column_valid, dtype=int)] - return numpy.where(mask, numpy.nan, values) - - -def merge_lookup(A, B): - """ - Merge two consecutive Lookup transforms into one. - """ - lookup_table = numpy.array(A.lookup_table) - mask = numpy.isfinite(lookup_table) - indices = numpy.array(lookup_table[mask], dtype=int) - lookup_table[mask] = B.lookup_table[indices] - return Lookup(A.variable, lookup_table) - -if __name__ == "__main__": - appl = QtGui.QApplication([]) - ow = OWPurgeDomain() - data = Orange.data.Table("car.tab") - subset = [inst for inst in data - if inst["buying"] == "v-high"] - subset = Orange.data.Table(data.domain, subset) - # The "buying" should be removed and the class "y" reduced - ow.setData(subset) - ow.show() - appl.exec_() - ow.saveSettings() diff --git a/Orange/widgets/data/owrank.py b/Orange/widgets/data/owrank.py deleted file mode 100644 index 9a53c85f866..00000000000 --- a/Orange/widgets/data/owrank.py +++ /dev/null @@ -1,591 +0,0 @@ -""" -Rank -==== - -Rank (score) features for prediction. - -""" - - -from collections import namedtuple - -from PyQt4 import QtGui, QtCore -from PyQt4.QtCore import Qt - -import Orange -from Orange.feature import scoring - -from Orange.widgets import widget, settings, gui - - -def is_discrete(var): - return isinstance(var, Orange.data.DiscreteVariable) - - -def is_continuous(var): - return isinstance(var, Orange.data.ContinuousVariable) - - -def is_class_discrete(data): - return is_discrete(data.domain.class_var) - - -def is_class_continuous(data): - return is_continuous(data.domain.class_var) - - -def table(shape, fill=None): - """ Return a 2D table with shape filed with ``fill`` - """ - return [[fill for j in range(shape[1])] for i in range(shape[0])] - - -_score_meta = namedtuple( - "_score_meta", - ["name", - "shortname", - "score", - "supports_regression", - "supports_classification", - "handles_discrete", - "handles_continuous"] -) - - -class score_meta(_score_meta): - # Add sensible defaults to __new__ - def __new__(cls, name, shortname, score, - supports_regression=True, supports_classification=True, - handles_continuous=True, handles_discrete=True): - return _score_meta.__new__( - cls, name, shortname, score, - supports_regression, supports_classification, - handles_discrete, handles_continuous - ) - -# Default scores. -SCORES = [ - score_meta( - "Information Gain", "Inf. gain", scoring.InfoGain, - supports_regression=False, - supports_classification=True, - handles_continuous=False, - handles_discrete=True), - score_meta( - "Gain Ratio", "Gain Ratio", scoring.GainRatio, - supports_regression=False, - handles_continuous=False, - handles_discrete=True), - score_meta( - "Gini Gain", "Gini", scoring.Gini, - supports_regression=False, - supports_classification=True, - handles_continuous=False), -] - -_DEFAULT_SELECTED = set(m.name for m in SCORES) - - -class OWRank(widget.OWWidget): - name = "Rank" - description = "Ranks and filters data features by their relevance." - icon = "icons/Rank.svg" - priority = 1102 - - inputs = [("Data", Orange.data.Table, "setData")] - outputs = [("Reduced Data", Orange.data.Table)] - - SelectNone, SelectAll, SelectManual, SelectNBest = range(4) - - selectMethod = settings.Setting(SelectNBest) - nSelected = settings.Setting(5) - autoApply = settings.Setting(True) - - # Header state for discrete/continuous scores - headerState = settings.Setting((None, None)) - - def __init__(self): - super().__init__() - - self.all_measures = SCORES - - self.selectedMeasures = dict( - [(name, True) for name in _DEFAULT_SELECTED] + - [(m.name, False) - for m in self.all_measures[len(_DEFAULT_SELECTED):]] - ) - # Discrete (0) or continuous (1) class mode - self.rankMode = 0 - - self.data = None - - self.discMeasures = [m for m in self.all_measures - if m.supports_classification] - self.contMeasures = [m for m in self.all_measures - if m.supports_regression] - - selMethBox = gui.widgetBox( - self.controlArea, "Select attributes", addSpace=True) - - grid = QtGui.QGridLayout() - grid.setContentsMargins(0, 0, 0, 0) - self.selectButtons = QtGui.QButtonGroup() - self.selectButtons.buttonClicked[int].connect(self.setSelectMethod) - - def button(text, buttonid, toolTip=None): - b = QtGui.QRadioButton(text) - self.selectButtons.addButton(b, buttonid) - if toolTip is not None: - b.setToolTip(toolTip) - return b - - b1 = button(self.tr("None"), OWRank.SelectNone) - b2 = button(self.tr("All"), OWRank.SelectAll) - b3 = button(self.tr("Manual"), OWRank.SelectManual) - b4 = button(self.tr("Best ranked"), OWRank.SelectNBest) - - s = gui.spin(selMethBox, self, "nSelected", 1, 100, - callback=self.nSelectedChanged) - - grid.addWidget(b1, 0, 0) - grid.addWidget(b2, 1, 0) - grid.addWidget(b3, 2, 0) - grid.addWidget(b4, 3, 0) - grid.addWidget(s, 3, 1) - - self.selectButtons.button(self.selectMethod).setChecked(True) - - selMethBox.layout().addLayout(grid) - - applyButton = gui.button( - selMethBox, self, "Commit", callback=self.apply, default=True, - addSpace=4) - autoApplyCB = gui.checkBox( - selMethBox, self, "autoApply", "Commit automatically") - gui.setStopper( - self, applyButton, autoApplyCB, "dataChanged", self.apply) - - gui.rubber(self.controlArea) - - # Discrete and continuous table views are stacked - self.ranksViewStack = QtGui.QStackedLayout() - self.mainArea.layout().addLayout(self.ranksViewStack) - - self.discRanksView = QtGui.QTableView() - self.ranksViewStack.addWidget(self.discRanksView) - self.discRanksView.setSelectionBehavior(QtGui.QTableView.SelectRows) - self.discRanksView.setSelectionMode(QtGui.QTableView.MultiSelection) - self.discRanksView.setSortingEnabled(True) - - self.discRanksModel = QtGui.QStandardItemModel(self) - self.discRanksModel.setHorizontalHeaderLabels( - ["#"] + [m.shortname for m in self.discMeasures] - ) - - self.discRanksProxyModel = MySortProxyModel(self) - self.discRanksProxyModel.setSourceModel(self.discRanksModel) - self.discRanksView.setModel(self.discRanksProxyModel) - - self.discRanksView.setColumnWidth(0, 20) - self.discRanksView.sortByColumn(1, Qt.DescendingOrder) - self.discRanksView.selectionModel().selectionChanged.connect( - self.onSelectionChanged - ) - self.discRanksView.pressed.connect(self.onSelectItem) - self.discRanksView.horizontalHeader().sectionClicked.connect( - self.headerClick - ) - - if self.headerState[0] is not None: - self.discRanksView.horizontalHeader().restoreState( - self.headerState[0] - ) - - self.contRanksView = QtGui.QTableView() - self.ranksViewStack.addWidget(self.contRanksView) - self.contRanksView.setSelectionBehavior(QtGui.QTableView.SelectRows) - self.contRanksView.setSelectionMode(QtGui.QTableView.MultiSelection) - self.contRanksView.setSortingEnabled(True) - - self.contRanksModel = QtGui.QStandardItemModel(self) - self.contRanksModel.setHorizontalHeaderLabels( - ["#"] + [m.shortname for m in self.contMeasures] - ) - - self.contRanksProxyModel = MySortProxyModel(self) - self.contRanksProxyModel.setSourceModel(self.contRanksModel) - self.contRanksView.setModel(self.contRanksProxyModel) - - self.discRanksView.setColumnWidth(0, 20) - self.contRanksView.sortByColumn(1, Qt.DescendingOrder) - self.contRanksView.selectionModel().selectionChanged.connect( - self.onSelectionChanged - ) - self.contRanksView.pressed.connect(self.onSelectItem) - self.contRanksView.horizontalHeader().sectionClicked.connect( - self.headerClick - ) - if self.headerState[1] is not None: - self.contRanksView.horizontalHeader().restoreState( - self.headerState[1] - ) - - # Switch the current view to Discrete - self.switchRanksMode(0) - self.resetInternals() - self.updateDelegates() - self.updateVisibleScoreColumns() - - self.resize(690, 500) - - self.measure_scores = table((len(self.measures), 0), None) - - def switchRanksMode(self, index): - """ - Switch between discrete/continuous mode - """ - self.rankMode = index - self.ranksViewStack.setCurrentIndex(index) - - if index == 0: - self.ranksView = self.discRanksView - self.ranksModel = self.discRanksModel - self.ranksProxyModel = self.discRanksProxyModel - self.measures = self.discMeasures - else: - self.ranksView = self.contRanksView - self.ranksModel = self.contRanksModel - self.ranksProxyModel = self.contRanksProxyModel - self.measures = self.contMeasures - - self.updateVisibleScoreColumns() - - def setData(self, data): - self.error() - self.resetInternals() - - if data is not None and not data.domain.class_var: - data = None - self.error(100, "") - - self.data = data - if self.data is not None: - attrs = self.data.domain.attributes - self.usefulAttributes = \ - [attr for attr in attrs - if is_discrete(attr) or is_continuous(attr)] - - if is_class_continuous(self.data): - self.switchRanksMode(1) - elif is_class_discrete(self.data): - self.switchRanksMode(0) - else: - # String or other. - self.error(0, "Cannot handle class variable type %r" % - type(self.data.domain.class_var).__name__) - - self.ranksModel.setRowCount(len(attrs)) - for i, a in enumerate(attrs): - if is_discrete(a): - v = len(a.values) - else: - v = "C" - item = ScoreValueItem() - item.setData(v, Qt.DisplayRole) - self.ranksModel.setItem(i, 0, item) - item = QtGui.QStandardItem(a.name) - item.setData(gui.attributeIconDict[a], Qt.DecorationRole) - self.ranksModel.setVerticalHeaderItem(i, item) - - self.measure_scores = table((len(self.measures), - len(attrs)), None) - self.updateScores() - - self.applyIf() - - def updateScores(self, measuresMask=None): - """ - Update the current computed scores. - - If `measuresMask` is given it must be an list of bool values - indicating what measures should be recomputed. - - """ - if not self.data: - return - - measures = self.measures - # Invalidate all warnings - self.warning(range(max(len(self.discMeasures), - len(self.contMeasures)))) - - if measuresMask is None: - # Update all selected measures - measuresMask = [self.selectedMeasures.get(m.name) - for m in measures] - - data = self.data - - for index, (meas, mask) in enumerate(zip(measures, measuresMask)): - if not mask: - continue - estimator = meas.score() - - if not meas.handles_continuous: - data = self.getDiscretizedData() - attr_map = data.attrDict - data = self.data - else: - attr_map, data = {}, self.data - - attr_scores = [] - for attr in data.domain.attributes: - attr = attr_map.get(attr, attr) - s = None - if attr is not None: - try: - s = float(estimator(attr, data)) - except Exception as ex: - self.warning(index, "Error evaluating %r: %r" % - (meas.name, str(ex))) - attr_scores.append(s) - self.measure_scores[index] = attr_scores - - self.updateRankModel(measuresMask) - self.ranksProxyModel.invalidate() - - if self.selectMethod in [0, 2]: - self.autoSelection() - - def updateRankModel(self, measuresMask=None): - """ - Update the rankModel. - """ - values = [] - for i, scores in enumerate(self.measure_scores): - values_one = [] - for j, score in enumerate(scores): - values_one.append(score) - item = self.ranksModel.item(j, i + 1) - if not item: - item = ScoreValueItem() - self.ranksModel.setItem(j, i + 1, item) - item.setData(score, Qt.DisplayRole) - values.append(values_one) - - for i, vals in enumerate(values): - valid_vals = [v for v in vals if v is not None] - if valid_vals: - vmin, vmax = min(valid_vals), max(valid_vals) - for j, v in enumerate(vals): - if v is not None: - # Set the bar ratio role for i-th measure. - ratio = float((v - vmin) / ((vmax - vmin) or 1)) - item = self.ranksModel.item(j, i + 1) - item.setData(ratio, gui.BarRatioRole) - - self.ranksView.setColumnWidth(0, 20) - self.ranksView.resizeRowsToContents() - - def resetInternals(self): - self.data = None - self.discretizedData = None - self.usefulAttributes = [] - self.dataChanged = False - self.ranksModel.setRowCount(0) - - def onSelectionChanged(self, *args): - """ - Called when the ranks view selection changes. - """ - self.applyIf() - - def onSelectItem(self, index): - """ - Called when the user selects/unselects an item in the table view. - """ - self.selectMethod = OWRank.SelectManual # Manual - self.selectButtons.button(self.selectMethod).setChecked(True) - self.applyIf() - - def setSelectMethod(self, method): - if self.selectMethod != method: - self.selectMethod = method - self.selectButtons.button(method).setChecked(True) - self.selectMethodChanged() - - def selectMethodChanged(self): - if self.selectMethod in [OWRank.SelectNone, OWRank.SelectAll, - OWRank.SelectNBest]: - self.autoSelection() - - def nSelectedChanged(self): - self.selectMethod = OWRank.SelectNBest - self.selectButtons.button(self.selectMethod).setChecked(True) - self.selectMethodChanged() - - def getDiscretizedData(self): - if not self.discretizedData: - discretizer = Orange.feature.discretization.EqualFreq(n=4) - contAttrs = [attr for attr in self.data.domain.attributes - if is_continuous(attr)] - at = [] - attrDict = {} - for attri in contAttrs: - try: - nattr = discretizer(attri, self.data) - at.append(nattr) - attrDict[attri] = nattr - except: - pass - domain = Orange.data.Domain(at, self.data.domain.class_var) - self.discretizedData = Orange.data.Table(domain, self.data) - self.discretizedData.attrDict = attrDict - return self.discretizedData - - def autoSelection(self): - selModel = self.ranksView.selectionModel() - rowCount = self.ranksModel.rowCount() - columnCount = self.ranksModel.columnCount() - model = self.ranksProxyModel - - if self.selectMethod == OWRank.SelectNone: - selection = QtGui.QItemSelection() - elif self.selectMethod == OWRank.SelectAll: - selection = QtGui.QItemSelection( - model.index(0, 0), - model.index(rowCount - 1, columnCount - 1) - ) - selModel.select(selection, - QtGui.QItemSelectionModel.ClearAndSelect) - elif self.selectMethod == OWRank.SelectNBest: - nSelected = min(self.nSelected, rowCount) - selection = QtGui.QItemSelection( - model.index(0, 0), - model.index(nSelected - 1, columnCount - 1) - ) - else: - selection = QtGui.QItemSelection() - - selModel.select(selection, QtGui.QItemSelectionModel.ClearAndSelect) - - def headerClick(self, index): - if index >= 1 and self.selectMethod == OWRank.SelectNBest: - # Reselect the top ranked attributes - self.autoSelection() - - # Store the header states - disc = bytes(self.discRanksView.horizontalHeader().saveState()) - cont = bytes(self.contRanksView.horizontalHeader().saveState()) - self.headerState = (disc, cont) - - def measuresSelectionChanged(self, measure=None): - """Measure selection has changed. Update column visibility. - """ - if measure is None: - # Update all scores - measuresMask = None - else: - # Update scores for shown column if they are not yet computed. - shown = self.selectedMeasures.get(measure.name, False) - index = self.measures.index(measure) - if all(s is None for s in self.measure_scores[index]) and shown: - measuresMask = [m == measure for m in self.measures] - else: - measuresMask = [False] * len(self.measures) - self.updateScores(measuresMask) - - self.updateVisibleScoreColumns() - - def updateVisibleScoreColumns(self): - """ - Update the visible columns of the scores view. - """ - for i, measure in enumerate(self.measures): - shown = self.selectedMeasures.get(measure.name) - self.ranksView.setColumnHidden(i + 1, not shown) - - def updateDelegates(self): - self.contRanksView.setItemDelegate( - gui.ColoredBarItemDelegate(self) - ) - - self.discRanksView.setItemDelegate( - gui.ColoredBarItemDelegate(self) - ) - - def sendReport(self): - self.reportData(self.data) - self.reportRaw(gui.reportTable(self.ranksView)) - - def applyIf(self): - if self.autoApply: - self.apply() - else: - self.dataChanged = True - - def apply(self): - selected = self.selectedAttrs() - if not self.data or not selected: - self.send("Reduced Data", None) - else: - - domain = Orange.data.Domain(selected, self.data.domain.class_var, - metas=self.data.domain.metas) - data = Orange.data.Table(domain, self.data) - self.send("Reduced Data", data) - self.dataChanged = False - - def selectedAttrs(self): - if self.data: - inds = self.ranksView.selectionModel().selectedRows(0) - source = self.ranksProxyModel.mapToSource - inds = map(source, inds) - inds = [ind.row() for ind in inds] - return [self.data.domain.attributes[i] for i in inds] - else: - return [] - - -class ScoreValueItem(QtGui.QStandardItem): - """A StandardItem subclass for python objects. - """ - def __init__(self, *args): - super().__init__(*args) - self.setFlags(Qt.ItemIsSelectable | Qt.ItemIsEnabled) - - def __lt__(self, other): - model = self.model() - if model is not None: - role = model.sortRole() - else: - role = Qt.DisplayRole - my = self.data(role) - other = other.data(role) - if my is None: - return True - return my < other - - -class MySortProxyModel(QtGui.QSortFilterProxyModel): - def lessThan(self, left, right): - role = self.sortRole() - left_data = left.data(role) - right_data = right.data(role) - try: - return left_data < right_data - except TypeError: - return left < right - - -if __name__ == "__main__": - a = QtGui.QApplication([]) - ow = OWRank() - ow.setData(Orange.data.Table("wine.tab")) - ow.setData(Orange.data.Table("zoo.tab")) -# ow.setData(Orange.data.Table("servo.tab")) -# ow.setData(Orange.data.Table("iris.tab")) -# ow.setData(orange.ExampleTable("auto-mpg.tab")) - ow.show() - a.exec_() - ow.saveSettings()