Skip to content

Commit

Permalink
OWRandomize: Add a new widget
Browse files Browse the repository at this point in the history
  • Loading branch information
VesnaT committed Jan 4, 2017
1 parent 4b03728 commit 6cd7563
Show file tree
Hide file tree
Showing 5 changed files with 220 additions and 0 deletions.
119 changes: 119 additions & 0 deletions Orange/widgets/data/owrandomize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import random

from AnyQt.QtCore import Qt
from AnyQt.QtWidgets import QSizePolicy

from Orange.data import Table
from Orange.preprocess import Randomize
from Orange.widgets.settings import Setting
from Orange.widgets.widget import OWWidget
from Orange.widgets import gui


class OWRandomize(OWWidget):
name = "Randomize"
description = "Randomize features, class and/or metas in data table."
icon = "icons/Random.svg"
priority = 2100

inputs = [("Data", Table, "set_data")]
outputs = [("Data", Table)]

resizing_enabled = False
want_main_area = False

shuffle_class = Setting(1)
shuffle_attrs = Setting(0)
shuffle_metas = Setting(0)
scope_prop = Setting(80)
random_seed = Setting(0)
auto_apply = Setting(True)

def __init__(self):
super().__init__()
self.data = None

# GUI
box = gui.vBox(self.controlArea, "Shuffle")
self.class_check = gui.checkBox(
box, self, "shuffle_class", "Classes",
callback=self._shuffle_check_changed)
self.attrs_check = gui.checkBox(
box, self, "shuffle_attrs", "Features",
callback=self._shuffle_check_changed)
self.metas_check = gui.checkBox(
box, self, "shuffle_metas", "Metas",
callback=self._shuffle_check_changed)
gui.separator(box, 10, 10)
self.replicable_check = gui.checkBox(
box, self, "random_seed", "Replicable shuffling",
callback=self._shuffle_check_changed)

box = gui.vBox(self.controlArea, "Scope")
hbox = gui.hBox(box)
gui.widgetLabel(hbox, "No data")
self.scope_slider = gui.hSlider(
hbox, self, "scope_prop", minValue=0, maxValue=100, width=140,
createLabel=False, callback=self._scope_slider_changed)
gui.widgetLabel(hbox, "All data")
self.scope_label = gui.widgetLabel(
box, "", alignment=Qt.AlignCenter,
sizePolicy=(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed))
self._set_scope_label()

self.apply_button = gui.auto_commit(
self.controlArea, self, "auto_apply", "&Apply",
box=False, commit=self.apply)

def _shuffle_check_changed(self):
self.apply()

def _scope_slider_changed(self):
self._set_scope_label()
self.apply()

def _set_scope_label(self):
self.scope_label.setText("{}%".format(self.scope_prop))

def set_data(self, data):
self.data = data
self.apply()

def apply(self):
data = None
if self.data:
rand_seed = self.random_seed or None
_type = Randomize.RandomizeClasses * self.shuffle_class | \
Randomize.RandomizeAttributes * self.shuffle_attrs | \
Randomize.RandomizeMetas * self.shuffle_metas
size = int(len(self.data) * self.scope_prop / 100)
random.seed(rand_seed)
indices = sorted(random.sample(range(len(self.data)), size))
randomized = Randomize(_type, rand_seed)(self.data[indices])
data = self.data.copy()
for i, instance in zip(indices, randomized):
data[i] = instance
self.send("Data", data)

def send_report(self):
text = "No shuffling"
labels = ["Classes", "Features", "Metas"]
include = [self.shuffle_class, self.shuffle_attrs, self.shuffle_metas]
if sum(include):
text = ", ".join([l for i, l in zip(include, labels) if i])
self.report_items("Settings",
[("Shuffle", text),
("Scope", "{}% of data".format(self.scope_prop)),
("Replicable", ["no", "yes"][self.random_seed])])


if __name__ == "__main__":
from AnyQt.QtWidgets import QApplication

app = QApplication([])
ow = OWRandomize()
d = Table("iris")
ow.set_data(d)
ow.show()
app.exec_()
ow.saveSettings()
74 changes: 74 additions & 0 deletions Orange/widgets/data/tests/test_owrandomize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring

import numpy as np

from Orange.data import Table
from Orange.widgets.data.owrandomize import OWRandomize
from Orange.widgets.tests.base import WidgetTest


class TestOWRandomize(WidgetTest):
@classmethod
def setUpClass(cls):
super().setUpClass()
cls.zoo = Table("zoo")

def setUp(self):
self.widget = self.create_widget(OWRandomize)

def test_data(self):
"""Check widget's data and output with data on the input"""
self.assertEqual(self.widget.data, None)
self.send_signal("Data", self.zoo)
self.assertEqual(self.widget.data, self.zoo)
output = self.get_output("Data")
np.testing.assert_array_equal(output.X, self.zoo.X)
np.testing.assert_array_equal(output.metas, self.zoo.metas)
self.assertTrue((output.Y != self.zoo.Y).any())
self.assertTrue((np.sort(output.Y, axis=0) ==
np.sort(self.zoo.Y, axis=0)).all())
self.send_signal("Data", None)
self.assertEqual(self.widget.data, None)
self.assertIsNone(self.get_output("Data"))

def test_shuffling(self):
"""Check widget's output for all types of shuffling"""
self.send_signal("Data", self.zoo)
self.widget.class_check.setChecked(True)
self.widget.attrs_check.setChecked(True)
self.widget.metas_check.setChecked(True)
output = self.get_output("Data")
self.assertTrue((output.X != self.zoo.X).any())
self.assertTrue((np.sort(output.X, axis=0) ==
np.sort(self.zoo.X, axis=0)).all())
self.assertTrue((output.Y != self.zoo.Y).any())
self.assertTrue((np.sort(output.Y, axis=0) ==
np.sort(self.zoo.Y, axis=0)).all())
self.assertTrue((output.metas != self.zoo.metas).any())
self.assertTrue((np.sort(output.metas, axis=0) ==
np.sort(self.zoo.metas, axis=0)).all())

def test_scope(self):
self.send_signal("Data", self.zoo)
output = self.get_output("Data")
n_zoo = len(self.zoo)
s = int(self.widget.scope_prop / 100 * n_zoo)
self.assertGreater(sum((output.Y == self.zoo.Y).astype(int)), n_zoo - s)
self.assertLessEqual(sum((output.Y != self.zoo.Y).astype(int)), s)

def test_replicable_shuffling(self):
"""Check widget's output for replicable shuffling """
self.send_signal("Data", self.zoo)
self.widget.replicable_check.setChecked(True)
output = self.get_output("Data")
np.testing.assert_array_equal(output.X, self.zoo.X)
np.testing.assert_array_equal(output.metas, self.zoo.metas)
self.assertTrue((output.Y != self.zoo.Y).any())
self.assertTrue((np.sort(output.Y, axis=0) ==
np.sort(self.zoo.Y, axis=0)).all())
self.widget.apply()
output2 = self.get_output("Data")
np.testing.assert_array_equal(output.X, output2.X)
np.testing.assert_array_equal(output.Y, output2.Y)
np.testing.assert_array_equal(output.metas, output2.metas)
1 change: 1 addition & 0 deletions doc/visual-programming/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Data
widgets/data/datasampler
widgets/data/discretize
widgets/data/continuize
widgets/data/randomize
widgets/data/concatenate
widgets/data/transpose
widgets/data/paintdata
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
26 changes: 26 additions & 0 deletions doc/visual-programming/source/widgets/data/randomize.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Randomize
=========

.. figure:: icons/randomize.png

Shuffles classes, features and/or metas of data.

Signals
-------

**Inputs**:

- **Data**

Data set.

**Outputs**:

- **Data**

Randomized data set.

Description
-----------

A simple widget that shuffles classes, features and/or metas of data.

0 comments on commit 6cd7563

Please sign in to comment.