From 6cd75630530bd1cd4898d7a8e967e3998a89c1ad Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Wed, 4 Jan 2017 15:58:18 +0100 Subject: [PATCH] OWRandomize: Add a new widget --- Orange/widgets/data/owrandomize.py | 119 ++++++++++++++++++ Orange/widgets/data/tests/test_owrandomize.py | 74 +++++++++++ doc/visual-programming/source/index.rst | 1 + .../source/widgets/data/icons/randomize.png | Bin 0 -> 831 bytes .../source/widgets/data/randomize.rst | 26 ++++ 5 files changed, 220 insertions(+) create mode 100644 Orange/widgets/data/owrandomize.py create mode 100644 Orange/widgets/data/tests/test_owrandomize.py create mode 100644 doc/visual-programming/source/widgets/data/icons/randomize.png create mode 100644 doc/visual-programming/source/widgets/data/randomize.rst diff --git a/Orange/widgets/data/owrandomize.py b/Orange/widgets/data/owrandomize.py new file mode 100644 index 00000000000..3294df8bf81 --- /dev/null +++ b/Orange/widgets/data/owrandomize.py @@ -0,0 +1,119 @@ +import random + +from AnyQt.QtCore import Qt +from AnyQt.QtWidgets import QSizePolicy + +from Orange.data import Table +from Orange.preprocess import Randomize +from Orange.widgets.settings import Setting +from Orange.widgets.widget import OWWidget +from Orange.widgets import gui + + +class OWRandomize(OWWidget): + name = "Randomize" + description = "Randomize features, class and/or metas in data table." + icon = "icons/Random.svg" + priority = 2100 + + inputs = [("Data", Table, "set_data")] + outputs = [("Data", Table)] + + resizing_enabled = False + want_main_area = False + + shuffle_class = Setting(1) + shuffle_attrs = Setting(0) + shuffle_metas = Setting(0) + scope_prop = Setting(80) + random_seed = Setting(0) + auto_apply = Setting(True) + + def __init__(self): + super().__init__() + self.data = None + + # GUI + box = gui.vBox(self.controlArea, "Shuffle") + self.class_check = gui.checkBox( + box, self, "shuffle_class", "Classes", + callback=self._shuffle_check_changed) + self.attrs_check = gui.checkBox( + box, self, "shuffle_attrs", "Features", + callback=self._shuffle_check_changed) + self.metas_check = gui.checkBox( + box, self, "shuffle_metas", "Metas", + callback=self._shuffle_check_changed) + gui.separator(box, 10, 10) + self.replicable_check = gui.checkBox( + box, self, "random_seed", "Replicable shuffling", + callback=self._shuffle_check_changed) + + box = gui.vBox(self.controlArea, "Scope") + hbox = gui.hBox(box) + gui.widgetLabel(hbox, "No data") + self.scope_slider = gui.hSlider( + hbox, self, "scope_prop", minValue=0, maxValue=100, width=140, + createLabel=False, callback=self._scope_slider_changed) + gui.widgetLabel(hbox, "All data") + self.scope_label = gui.widgetLabel( + box, "", alignment=Qt.AlignCenter, + sizePolicy=(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed)) + self._set_scope_label() + + self.apply_button = gui.auto_commit( + self.controlArea, self, "auto_apply", "&Apply", + box=False, commit=self.apply) + + def _shuffle_check_changed(self): + self.apply() + + def _scope_slider_changed(self): + self._set_scope_label() + self.apply() + + def _set_scope_label(self): + self.scope_label.setText("{}%".format(self.scope_prop)) + + def set_data(self, data): + self.data = data + self.apply() + + def apply(self): + data = None + if self.data: + rand_seed = self.random_seed or None + _type = Randomize.RandomizeClasses * self.shuffle_class | \ + Randomize.RandomizeAttributes * self.shuffle_attrs | \ + Randomize.RandomizeMetas * self.shuffle_metas + size = int(len(self.data) * self.scope_prop / 100) + random.seed(rand_seed) + indices = sorted(random.sample(range(len(self.data)), size)) + randomized = Randomize(_type, rand_seed)(self.data[indices]) + data = self.data.copy() + for i, instance in zip(indices, randomized): + data[i] = instance + self.send("Data", data) + + def send_report(self): + text = "No shuffling" + labels = ["Classes", "Features", "Metas"] + include = [self.shuffle_class, self.shuffle_attrs, self.shuffle_metas] + if sum(include): + text = ", ".join([l for i, l in zip(include, labels) if i]) + self.report_items("Settings", + [("Shuffle", text), + ("Scope", "{}% of data".format(self.scope_prop)), + ("Replicable", ["no", "yes"][self.random_seed])]) + + +if __name__ == "__main__": + from AnyQt.QtWidgets import QApplication + + app = QApplication([]) + ow = OWRandomize() + d = Table("iris") + ow.set_data(d) + ow.show() + app.exec_() + ow.saveSettings() diff --git a/Orange/widgets/data/tests/test_owrandomize.py b/Orange/widgets/data/tests/test_owrandomize.py new file mode 100644 index 00000000000..d1cc1460dd3 --- /dev/null +++ b/Orange/widgets/data/tests/test_owrandomize.py @@ -0,0 +1,74 @@ +# Test methods with long descriptive names can omit docstrings +# pylint: disable=missing-docstring + +import numpy as np + +from Orange.data import Table +from Orange.widgets.data.owrandomize import OWRandomize +from Orange.widgets.tests.base import WidgetTest + + +class TestOWRandomize(WidgetTest): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.zoo = Table("zoo") + + def setUp(self): + self.widget = self.create_widget(OWRandomize) + + def test_data(self): + """Check widget's data and output with data on the input""" + self.assertEqual(self.widget.data, None) + self.send_signal("Data", self.zoo) + self.assertEqual(self.widget.data, self.zoo) + output = self.get_output("Data") + np.testing.assert_array_equal(output.X, self.zoo.X) + np.testing.assert_array_equal(output.metas, self.zoo.metas) + self.assertTrue((output.Y != self.zoo.Y).any()) + self.assertTrue((np.sort(output.Y, axis=0) == + np.sort(self.zoo.Y, axis=0)).all()) + self.send_signal("Data", None) + self.assertEqual(self.widget.data, None) + self.assertIsNone(self.get_output("Data")) + + def test_shuffling(self): + """Check widget's output for all types of shuffling""" + self.send_signal("Data", self.zoo) + self.widget.class_check.setChecked(True) + self.widget.attrs_check.setChecked(True) + self.widget.metas_check.setChecked(True) + output = self.get_output("Data") + self.assertTrue((output.X != self.zoo.X).any()) + self.assertTrue((np.sort(output.X, axis=0) == + np.sort(self.zoo.X, axis=0)).all()) + self.assertTrue((output.Y != self.zoo.Y).any()) + self.assertTrue((np.sort(output.Y, axis=0) == + np.sort(self.zoo.Y, axis=0)).all()) + self.assertTrue((output.metas != self.zoo.metas).any()) + self.assertTrue((np.sort(output.metas, axis=0) == + np.sort(self.zoo.metas, axis=0)).all()) + + def test_scope(self): + self.send_signal("Data", self.zoo) + output = self.get_output("Data") + n_zoo = len(self.zoo) + s = int(self.widget.scope_prop / 100 * n_zoo) + self.assertGreater(sum((output.Y == self.zoo.Y).astype(int)), n_zoo - s) + self.assertLessEqual(sum((output.Y != self.zoo.Y).astype(int)), s) + + def test_replicable_shuffling(self): + """Check widget's output for replicable shuffling """ + self.send_signal("Data", self.zoo) + self.widget.replicable_check.setChecked(True) + output = self.get_output("Data") + np.testing.assert_array_equal(output.X, self.zoo.X) + np.testing.assert_array_equal(output.metas, self.zoo.metas) + self.assertTrue((output.Y != self.zoo.Y).any()) + self.assertTrue((np.sort(output.Y, axis=0) == + np.sort(self.zoo.Y, axis=0)).all()) + self.widget.apply() + output2 = self.get_output("Data") + np.testing.assert_array_equal(output.X, output2.X) + np.testing.assert_array_equal(output.Y, output2.Y) + np.testing.assert_array_equal(output.metas, output2.metas) diff --git a/doc/visual-programming/source/index.rst b/doc/visual-programming/source/index.rst index 912fa0fd4e3..ef72856ee18 100644 --- a/doc/visual-programming/source/index.rst +++ b/doc/visual-programming/source/index.rst @@ -32,6 +32,7 @@ Data widgets/data/datasampler widgets/data/discretize widgets/data/continuize + widgets/data/randomize widgets/data/concatenate widgets/data/transpose widgets/data/paintdata diff --git a/doc/visual-programming/source/widgets/data/icons/randomize.png b/doc/visual-programming/source/widgets/data/icons/randomize.png new file mode 100644 index 0000000000000000000000000000000000000000..2353b918c888693a7f97b6b975011da423951c6d GIT binary patch literal 831 zcmV-F1Hk-=P)%H&UidCbM82E z=L+Ww2hNvrvV|WjHa4gl7;DYGL;#Lhj*<@VyoWR>y+GF7(coBy>_|H1A9m8b}&BjVR zh_%>&tvHBtg+{)>vda`cOAN?g1(Cnu6KuqrO+G)zP2D=X8joRblcRs}surWsh6bL% z?@etT#%5e!uw8{+xT{OurQCt9o1A}xc@^tU#|KSyf593IwXyFD4LpwHsgNUL0k&fn zzLakiZ(v5h1Xg7}z7?WQVYG$ah*(PrPGa?hsH?~bcFPyw4TbVye3h0T!)+6)t|BwV zxsl*U(W#6N<=gP`iI-NLIrvG4{Y)&(=hNS3aQEP(Rp(axBSgQ8v+4W#L5dnLSxAyS z+XgGAJB|qP=df_VTE@+o{P<6>Q|p6j($TyI-ooGM`wN2><5mO0kK=4KHqCD@_= z*gAJ1RDUxdpp=w!VBO=|*GQMXpwOxZN@Vyv~GX=rTw-FDkHs7l_ literal 0 HcmV?d00001 diff --git a/doc/visual-programming/source/widgets/data/randomize.rst b/doc/visual-programming/source/widgets/data/randomize.rst new file mode 100644 index 00000000000..0411f0d082b --- /dev/null +++ b/doc/visual-programming/source/widgets/data/randomize.rst @@ -0,0 +1,26 @@ +Randomize +========= + +.. figure:: icons/randomize.png + +Shuffles classes, features and/or metas of data. + +Signals +------- + +**Inputs**: + +- **Data** + + Data set. + +**Outputs**: + +- **Data** + + Randomized data set. + +Description +----------- + +A simple widget that shuffles classes, features and/or metas of data.