Skip to content

Commit

Permalink
Merge pull request #1863 from VesnaT/owrandomize
Browse files Browse the repository at this point in the history
[ENH] OWRandomize: Add a new widget
  • Loading branch information
janezd authored Jan 20, 2017
2 parents 2add4f6 + 3052123 commit 70fd197
Show file tree
Hide file tree
Showing 7 changed files with 245 additions and 9 deletions.
15 changes: 6 additions & 9 deletions Orange/preprocess/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def __call__(self, data):
class Randomize(Preprocess):
"""
Construct a preprocessor for randomization of classes,
attributes or metas.
attributes and/or metas.
Given a data table, preprocessor returns a new table in
which the data is shuffled.
Expand All @@ -326,8 +326,8 @@ class Randomize(Preprocess):
>>> randomizer = Randomize(Randomize.RandomizeClasses)
>>> randomized_data = randomizer(data)
"""
Type = Enum("Randomize",
"RandomizeClasses, RandomizeAttributes, RandomizeMetas")
Type = Enum("Randomize", dict(RandomizeClasses=1, RandomizeAttributes=2,
RandomizeMetas=4), type=int)
RandomizeClasses, RandomizeAttributes, RandomizeMetas = Type

def __init__(self, rand_type=RandomizeClasses, rand_seed=None):
Expand All @@ -352,15 +352,12 @@ def __call__(self, data):
new_data = Table(data)
new_data.ensure_copy()

if self.rand_type == Randomize.RandomizeClasses:
if self.rand_type & Randomize.RandomizeClasses:
self.randomize(new_data.Y)
elif self.rand_type == Randomize.RandomizeAttributes:
if self.rand_type & Randomize.RandomizeAttributes:
self.randomize(new_data.X)
elif self.rand_type == Randomize.RandomizeMetas:
if self.rand_type & Randomize.RandomizeMetas:
self.randomize(new_data.metas)
else:
raise TypeError('Unsupported type')

return new_data

def randomize(self, table):
Expand Down
16 changes: 16 additions & 0 deletions Orange/tests/test_randomize.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,22 @@ def test_randomize_metas(self):
self.assertTrue((np.sort(data.metas, axis=0) == np.sort(
data_rand.metas, axis=0)).all())

def test_randomize_all(self):
data = self.zoo
rand_type = Randomize.RandomizeClasses | Randomize.RandomizeAttributes \
| Randomize.RandomizeMetas
randomizer = Randomize(rand_type=rand_type)
data_rand = randomizer(data)
self.assertTrue((data.Y != data_rand.Y).any())
self.assertTrue((np.sort(data.Y, axis=0) == np.sort(
data_rand.Y, axis=0)).all())
self.assertTrue((data.X != data_rand.X).any())
self.assertTrue((np.sort(data.X, axis=0) == np.sort(
data_rand.X, axis=0)).all())
self.assertTrue((data.metas != data_rand.metas).any())
self.assertTrue((np.sort(data.metas, axis=0) == np.sort(
data_rand.metas, axis=0)).all())

def test_randomize_keep_original_data(self):
data_orig = self.zoo
data = Table("zoo")
Expand Down
122 changes: 122 additions & 0 deletions Orange/widgets/data/owrandomize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import random

from AnyQt.QtCore import Qt
from AnyQt.QtWidgets import QSizePolicy

from Orange.data import Table
from Orange.preprocess import Randomize
from Orange.widgets.settings import Setting
from Orange.widgets.widget import OWWidget
from Orange.widgets import gui


class OWRandomize(OWWidget):
name = "Randomize"
description = "Randomize features, class and/or metas in data table."
icon = "icons/Random.svg"
priority = 2100

inputs = [("Data", Table, "set_data")]
outputs = [("Data", Table)]

resizing_enabled = False
want_main_area = False

shuffle_class = Setting(True)
shuffle_attrs = Setting(False)
shuffle_metas = Setting(False)
scope_prop = Setting(80)
random_seed = Setting(0)
auto_apply = Setting(True)

def __init__(self):
super().__init__()
self.data = None

# GUI
box = gui.hBox(self.controlArea, "Shuffled columns")
box.layout().setSpacing(20)
self.class_check = gui.checkBox(
box, self, "shuffle_class", "Classes",
callback=self._shuffle_check_changed)
self.attrs_check = gui.checkBox(
box, self, "shuffle_attrs", "Features",
callback=self._shuffle_check_changed)
self.metas_check = gui.checkBox(
box, self, "shuffle_metas", "Metas",
callback=self._shuffle_check_changed)

box = gui.vBox(self.controlArea, "Shuffled rows")
hbox = gui.hBox(box)
gui.widgetLabel(hbox, "None")
self.scope_slider = gui.hSlider(
hbox, self, "scope_prop", minValue=0, maxValue=100, width=140,
createLabel=False, callback=self._scope_slider_changed)
gui.widgetLabel(hbox, "All")
self.scope_label = gui.widgetLabel(
box, "", alignment=Qt.AlignCenter,
sizePolicy=(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed))
self._set_scope_label()
gui.separator(box, 10, 10)
self.replicable_check = gui.checkBox(
box, self, "random_seed", "Replicable shuffling",
callback=self._shuffle_check_changed)

self.apply_button = gui.auto_commit(
self.controlArea, self, "auto_apply", "&Apply",
box=False, commit=self.apply)

@property
def parts(self):
return [self.shuffle_class, self.shuffle_attrs, self.shuffle_metas]

def _shuffle_check_changed(self):
self.apply()

def _scope_slider_changed(self):
self._set_scope_label()
self.apply()

def _set_scope_label(self):
self.scope_label.setText("{}%".format(self.scope_prop))

def set_data(self, data):
self.data = data
self.apply()

def apply(self):
data = None
if self.data:
rand_seed = self.random_seed or None
size = int(len(self.data) * self.scope_prop / 100)
random.seed(rand_seed)
indices = sorted(random.sample(range(len(self.data)), size))
type_ = sum(t for t, p in zip(Randomize.Type, self.parts) if p)
randomized = Randomize(type_, rand_seed)(self.data[indices])
data = self.data.copy()
for i, instance in zip(indices, randomized):
data[i] = instance
self.send("Data", data)

def send_report(self):
labels = ["classes", "features", "metas"]
include = [label for label, i in zip(labels, self.parts) if i]
text = "none" if not include else \
" and ".join(filter(None, (", ".join(include[:-1]), include[-1])))
self.report_items(
"Settings",
[("Shuffled columns", text),
("Proportion of shuffled rows", "{}%".format(self.scope_prop)),
("Replicable", ["no", "yes"][self.random_seed])])


if __name__ == "__main__":
from AnyQt.QtWidgets import QApplication

app = QApplication([])
ow = OWRandomize()
d = Table("iris")
ow.set_data(d)
ow.show()
app.exec_()
ow.saveSettings()
74 changes: 74 additions & 0 deletions Orange/widgets/data/tests/test_owrandomize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring

import numpy as np

from Orange.data import Table
from Orange.widgets.data.owrandomize import OWRandomize
from Orange.widgets.tests.base import WidgetTest


class TestOWRandomize(WidgetTest):
@classmethod
def setUpClass(cls):
super().setUpClass()
cls.zoo = Table("zoo")

def setUp(self):
self.widget = self.create_widget(OWRandomize)

def test_data(self):
"""Check widget's data and output with data on the input"""
self.assertEqual(self.widget.data, None)
self.send_signal("Data", self.zoo)
self.assertEqual(self.widget.data, self.zoo)
output = self.get_output("Data")
np.testing.assert_array_equal(output.X, self.zoo.X)
np.testing.assert_array_equal(output.metas, self.zoo.metas)
self.assertTrue((output.Y != self.zoo.Y).any())
self.assertTrue((np.sort(output.Y, axis=0) ==
np.sort(self.zoo.Y, axis=0)).all())
self.send_signal("Data", None)
self.assertEqual(self.widget.data, None)
self.assertIsNone(self.get_output("Data"))

def test_shuffling(self):
"""Check widget's output for all types of shuffling"""
self.send_signal("Data", self.zoo)
self.widget.class_check.setChecked(True)
self.widget.attrs_check.setChecked(True)
self.widget.metas_check.setChecked(True)
output = self.get_output("Data")
self.assertTrue((output.X != self.zoo.X).any())
self.assertTrue((np.sort(output.X, axis=0) ==
np.sort(self.zoo.X, axis=0)).all())
self.assertTrue((output.Y != self.zoo.Y).any())
self.assertTrue((np.sort(output.Y, axis=0) ==
np.sort(self.zoo.Y, axis=0)).all())
self.assertTrue((output.metas != self.zoo.metas).any())
self.assertTrue((np.sort(output.metas, axis=0) ==
np.sort(self.zoo.metas, axis=0)).all())

def test_scope(self):
self.send_signal("Data", self.zoo)
output = self.get_output("Data")
n_zoo = len(self.zoo)
s = int(self.widget.scope_prop / 100 * n_zoo)
self.assertGreater(sum((output.Y == self.zoo.Y).astype(int)), n_zoo - s)
self.assertLessEqual(sum((output.Y != self.zoo.Y).astype(int)), s)

def test_replicable_shuffling(self):
"""Check widget's output for replicable shuffling """
self.send_signal("Data", self.zoo)
self.widget.replicable_check.setChecked(True)
output = self.get_output("Data")
np.testing.assert_array_equal(output.X, self.zoo.X)
np.testing.assert_array_equal(output.metas, self.zoo.metas)
self.assertTrue((output.Y != self.zoo.Y).any())
self.assertTrue((np.sort(output.Y, axis=0) ==
np.sort(self.zoo.Y, axis=0)).all())
self.widget.apply()
output2 = self.get_output("Data")
np.testing.assert_array_equal(output.X, output2.X)
np.testing.assert_array_equal(output.Y, output2.Y)
np.testing.assert_array_equal(output.metas, output2.metas)
1 change: 1 addition & 0 deletions doc/visual-programming/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Data
widgets/data/transpose
widgets/data/discretize
widgets/data/continuize
widgets/data/randomize
widgets/data/concatenate
widgets/data/paintdata
widgets/data/pythonscript
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
26 changes: 26 additions & 0 deletions doc/visual-programming/source/widgets/data/randomize.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Randomize
=========

.. figure:: icons/randomize.png

Shuffles classes, features and/or metas of data.

Signals
-------

**Inputs**:

- **Data**

Data set.

**Outputs**:

- **Data**

Randomized data set.

Description
-----------

A simple widget that shuffles classes, features and/or metas of data.

0 comments on commit 70fd197

Please sign in to comment.