Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] OWRandomize: Add a new widget #1863

Merged
merged 2 commits into from
Jan 20, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions Orange/preprocess/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def __call__(self, data):
class Randomize(Preprocess):
"""
Construct a preprocessor for randomization of classes,
attributes or metas.
attributes and/or metas.
Given a data table, preprocessor returns a new table in
which the data is shuffled.
Expand All @@ -326,8 +326,8 @@ class Randomize(Preprocess):
>>> randomizer = Randomize(Randomize.RandomizeClasses)
>>> randomized_data = randomizer(data)
"""
Type = Enum("Randomize",
"RandomizeClasses, RandomizeAttributes, RandomizeMetas")
Type = Enum("Randomize", dict(RandomizeClasses=1, RandomizeAttributes=2,
RandomizeMetas=4), type=int)
RandomizeClasses, RandomizeAttributes, RandomizeMetas = Type

def __init__(self, rand_type=RandomizeClasses, rand_seed=None):
Expand All @@ -352,15 +352,12 @@ def __call__(self, data):
new_data = Table(data)
new_data.ensure_copy()

if self.rand_type == Randomize.RandomizeClasses:
if self.rand_type & Randomize.RandomizeClasses:
self.randomize(new_data.Y)
elif self.rand_type == Randomize.RandomizeAttributes:
if self.rand_type & Randomize.RandomizeAttributes:
self.randomize(new_data.X)
elif self.rand_type == Randomize.RandomizeMetas:
if self.rand_type & Randomize.RandomizeMetas:
self.randomize(new_data.metas)
else:
raise TypeError('Unsupported type')

return new_data

def randomize(self, table):
Expand Down
16 changes: 16 additions & 0 deletions Orange/tests/test_randomize.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,22 @@ def test_randomize_metas(self):
self.assertTrue((np.sort(data.metas, axis=0) == np.sort(
data_rand.metas, axis=0)).all())

def test_randomize_all(self):
data = self.zoo
rand_type = Randomize.RandomizeClasses | Randomize.RandomizeAttributes \
| Randomize.RandomizeMetas
randomizer = Randomize(rand_type=rand_type)
data_rand = randomizer(data)
self.assertTrue((data.Y != data_rand.Y).any())
self.assertTrue((np.sort(data.Y, axis=0) == np.sort(
data_rand.Y, axis=0)).all())
self.assertTrue((data.X != data_rand.X).any())
self.assertTrue((np.sort(data.X, axis=0) == np.sort(
data_rand.X, axis=0)).all())
self.assertTrue((data.metas != data_rand.metas).any())
self.assertTrue((np.sort(data.metas, axis=0) == np.sort(
data_rand.metas, axis=0)).all())

def test_randomize_keep_original_data(self):
data_orig = self.zoo
data = Table("zoo")
Expand Down
122 changes: 122 additions & 0 deletions Orange/widgets/data/owrandomize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import random

from AnyQt.QtCore import Qt
from AnyQt.QtWidgets import QSizePolicy

from Orange.data import Table
from Orange.preprocess import Randomize
from Orange.widgets.settings import Setting
from Orange.widgets.widget import OWWidget
from Orange.widgets import gui


class OWRandomize(OWWidget):
name = "Randomize"
description = "Randomize features, class and/or metas in data table."
icon = "icons/Random.svg"
priority = 2100

inputs = [("Data", Table, "set_data")]
outputs = [("Data", Table)]

resizing_enabled = False
want_main_area = False

shuffle_class = Setting(True)
shuffle_attrs = Setting(False)
shuffle_metas = Setting(False)
scope_prop = Setting(80)
random_seed = Setting(0)
auto_apply = Setting(True)

def __init__(self):
super().__init__()
self.data = None

# GUI
box = gui.hBox(self.controlArea, "Shuffled columns")
box.layout().setSpacing(20)
self.class_check = gui.checkBox(
box, self, "shuffle_class", "Classes",
callback=self._shuffle_check_changed)
self.attrs_check = gui.checkBox(
box, self, "shuffle_attrs", "Features",
callback=self._shuffle_check_changed)
self.metas_check = gui.checkBox(
box, self, "shuffle_metas", "Metas",
callback=self._shuffle_check_changed)

box = gui.vBox(self.controlArea, "Shuffled rows")
hbox = gui.hBox(box)
gui.widgetLabel(hbox, "None")
self.scope_slider = gui.hSlider(
hbox, self, "scope_prop", minValue=0, maxValue=100, width=140,
createLabel=False, callback=self._scope_slider_changed)
gui.widgetLabel(hbox, "All")
self.scope_label = gui.widgetLabel(
box, "", alignment=Qt.AlignCenter,
sizePolicy=(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed))
self._set_scope_label()
gui.separator(box, 10, 10)
self.replicable_check = gui.checkBox(
box, self, "random_seed", "Replicable shuffling",
callback=self._shuffle_check_changed)

self.apply_button = gui.auto_commit(
self.controlArea, self, "auto_apply", "&Apply",
box=False, commit=self.apply)

@property
def parts(self):
return [self.shuffle_class, self.shuffle_attrs, self.shuffle_metas]

def _shuffle_check_changed(self):
self.apply()

def _scope_slider_changed(self):
self._set_scope_label()
self.apply()

def _set_scope_label(self):
self.scope_label.setText("{}%".format(self.scope_prop))

def set_data(self, data):
self.data = data
self.apply()

def apply(self):
data = None
if self.data:
rand_seed = self.random_seed or None
size = int(len(self.data) * self.scope_prop / 100)
random.seed(rand_seed)
indices = sorted(random.sample(range(len(self.data)), size))
type_ = sum(t for t, p in zip(Randomize.Type, self.parts) if p)
randomized = Randomize(type_, rand_seed)(self.data[indices])
data = self.data.copy()
for i, instance in zip(indices, randomized):
data[i] = instance
self.send("Data", data)

def send_report(self):
labels = ["classes", "features", "metas"]
include = [label for label, i in zip(labels, self.parts) if i]
text = "none" if not include else \
" and ".join(filter(None, (", ".join(include[:-1]), include[-1])))
self.report_items(
"Settings",
[("Shuffled columns", text),
("Proportion of shuffled rows", "{}%".format(self.scope_prop)),
("Replicable", ["no", "yes"][self.random_seed])])


if __name__ == "__main__":
from AnyQt.QtWidgets import QApplication

app = QApplication([])
ow = OWRandomize()
d = Table("iris")
ow.set_data(d)
ow.show()
app.exec_()
ow.saveSettings()
74 changes: 74 additions & 0 deletions Orange/widgets/data/tests/test_owrandomize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring

import numpy as np

from Orange.data import Table
from Orange.widgets.data.owrandomize import OWRandomize
from Orange.widgets.tests.base import WidgetTest


class TestOWRandomize(WidgetTest):
@classmethod
def setUpClass(cls):
super().setUpClass()
cls.zoo = Table("zoo")

def setUp(self):
self.widget = self.create_widget(OWRandomize)

def test_data(self):
"""Check widget's data and output with data on the input"""
self.assertEqual(self.widget.data, None)
self.send_signal("Data", self.zoo)
self.assertEqual(self.widget.data, self.zoo)
output = self.get_output("Data")
np.testing.assert_array_equal(output.X, self.zoo.X)
np.testing.assert_array_equal(output.metas, self.zoo.metas)
self.assertTrue((output.Y != self.zoo.Y).any())
self.assertTrue((np.sort(output.Y, axis=0) ==
np.sort(self.zoo.Y, axis=0)).all())
self.send_signal("Data", None)
self.assertEqual(self.widget.data, None)
self.assertIsNone(self.get_output("Data"))

def test_shuffling(self):
"""Check widget's output for all types of shuffling"""
self.send_signal("Data", self.zoo)
self.widget.class_check.setChecked(True)
self.widget.attrs_check.setChecked(True)
self.widget.metas_check.setChecked(True)
output = self.get_output("Data")
self.assertTrue((output.X != self.zoo.X).any())
self.assertTrue((np.sort(output.X, axis=0) ==
np.sort(self.zoo.X, axis=0)).all())
self.assertTrue((output.Y != self.zoo.Y).any())
self.assertTrue((np.sort(output.Y, axis=0) ==
np.sort(self.zoo.Y, axis=0)).all())
self.assertTrue((output.metas != self.zoo.metas).any())
self.assertTrue((np.sort(output.metas, axis=0) ==
np.sort(self.zoo.metas, axis=0)).all())

def test_scope(self):
self.send_signal("Data", self.zoo)
output = self.get_output("Data")
n_zoo = len(self.zoo)
s = int(self.widget.scope_prop / 100 * n_zoo)
self.assertGreater(sum((output.Y == self.zoo.Y).astype(int)), n_zoo - s)
self.assertLessEqual(sum((output.Y != self.zoo.Y).astype(int)), s)

def test_replicable_shuffling(self):
"""Check widget's output for replicable shuffling """
self.send_signal("Data", self.zoo)
self.widget.replicable_check.setChecked(True)
output = self.get_output("Data")
np.testing.assert_array_equal(output.X, self.zoo.X)
np.testing.assert_array_equal(output.metas, self.zoo.metas)
self.assertTrue((output.Y != self.zoo.Y).any())
self.assertTrue((np.sort(output.Y, axis=0) ==
np.sort(self.zoo.Y, axis=0)).all())
self.widget.apply()
output2 = self.get_output("Data")
np.testing.assert_array_equal(output.X, output2.X)
np.testing.assert_array_equal(output.Y, output2.Y)
np.testing.assert_array_equal(output.metas, output2.metas)
1 change: 1 addition & 0 deletions doc/visual-programming/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Data
widgets/data/transpose
widgets/data/discretize
widgets/data/continuize
widgets/data/randomize
widgets/data/concatenate
widgets/data/paintdata
widgets/data/pythonscript
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
26 changes: 26 additions & 0 deletions doc/visual-programming/source/widgets/data/randomize.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Randomize
=========

.. figure:: icons/randomize.png

Shuffles classes, features and/or metas of data.

Signals
-------

**Inputs**:

- **Data**

Data set.

**Outputs**:

- **Data**

Randomized data set.

Description
-----------

A simple widget that shuffles classes, features and/or metas of data.