Skip to content

Commit

Permalink
Merge pull request #2064 from jerneju/zerodivisionerror-owpreprocess
Browse files Browse the repository at this point in the history
[FIX] owpreprocess: Handle columns with only NaN values
  • Loading branch information
lanzagar authored Mar 3, 2017
2 parents c3e2f7a + 08c7255 commit 34b5e94
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 18 deletions.
25 changes: 9 additions & 16 deletions Orange/widgets/data/owpreprocess.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,22 @@
import sys
import bisect
import contextlib
import warnings
from collections import OrderedDict
import pkg_resources

import numpy

from AnyQt.QtWidgets import (
QWidget, QButtonGroup, QGroupBox, QRadioButton, QSlider, QFocusFrame,
QDoubleSpinBox, QComboBox, QSpinBox, QListView, QDockWidget, QLabel,
QScrollArea, QVBoxLayout, QHBoxLayout, QFormLayout, QSpacerItem,
QSizePolicy, QStyle, QStylePainter, QAction, QLabel,
QApplication, QCheckBox
QWidget, QButtonGroup, QGroupBox, QRadioButton, QSlider,
QDoubleSpinBox, QComboBox, QSpinBox, QListView, QLabel,
QScrollArea, QVBoxLayout, QHBoxLayout, QFormLayout,
QSizePolicy, QApplication, QCheckBox
)

from AnyQt.QtGui import (
QCursor, QIcon, QPainter, QPixmap, QStandardItemModel, QStandardItem,
QDrag, QKeySequence
QIcon, QStandardItemModel, QStandardItem
)

from AnyQt.QtCore import (
Qt, QObject, QEvent, QSize, QModelIndex, QMimeData, QTimer
Qt, QEvent, QSize, QMimeData, QTimer
)

from AnyQt.QtCore import pyqtSignal as Signal, pyqtSlot as Slot
Expand All @@ -34,7 +29,6 @@
from Orange.widgets import widget, gui, settings
from Orange.widgets.utils.overlay import OverlayWidget
from Orange.widgets.utils.sql import check_sql_input
from Orange.util import Reprable

from Orange.widgets.data.utils.preprocess import (
BaseEditor, blocked, StandardItemModel, DescriptionRole,
Expand Down Expand Up @@ -271,9 +265,9 @@ class ImputeEditor(BaseEditor):

Imputers = {
NoImputation: (None, {}),
# Constant: (None, {"value": 0})
# Constant: (None, {"value": 0})
Average: (preprocess.impute.Average(), {}),
# Model: (preprocess.impute.Model, {}),
# Model: (preprocess.impute.Model, {}),
Random: (preprocess.impute.Random(), {}),
DropRows: (None, {})
}
Expand Down Expand Up @@ -1175,7 +1169,7 @@ def apply(self):
self.error()
try:
data = preprocessor(self.data)
except ValueError as e:
except (ValueError, ZeroDivisionError) as e:
self.error(str(e))
return
else:
Expand Down Expand Up @@ -1258,4 +1252,3 @@ def test_main(argv=sys.argv):

if __name__ == "__main__":
sys.exit(test_main())

16 changes: 15 additions & 1 deletion Orange/widgets/data/tests/test_owpreprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from Orange.preprocess import discretize, impute, fss, score
from Orange.widgets.data import owpreprocess
from Orange.widgets.data.owpreprocess import OWPreprocess
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.tests.base import WidgetTest, datasets


class TestOWPreprocess(WidgetTest):
Expand Down Expand Up @@ -44,6 +44,20 @@ def test_normalize(self):
np.testing.assert_allclose(output.X.mean(0), 0, atol=1e-7)
np.testing.assert_allclose(output.X.std(0), 1, atol=1e-7)

def test_data_column_nans(self):
"""
ZeroDivisonError - Weights sum to zero, can't be normalized
In case when all rows in a column are NaN then it throws that error.
GH-2064
"""
table = datasets.data_one_column_nans()
saved = {"preprocessors": [("orange.preprocess.scale",
{"center": Scale.CenteringType.Mean,
"scale": Scale.ScalingType.Std})]}
model = self.widget.load(saved)
self.widget.set_model(model)
self.send_signal("Data", table)


# Test for editors
class TestDiscretizeEditor(WidgetTest):
Expand Down
25 changes: 24 additions & 1 deletion Orange/widgets/tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from Orange.classification.base_classification import (
LearnerClassification, ModelClassification
)
from Orange.data import Table
from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable
from Orange.modelling import Fitter
from Orange.preprocess import RemoveNaNColumns, Randomize
from Orange.preprocess.preprocess import PreprocessorList
Expand Down Expand Up @@ -709,3 +709,26 @@ def missing_data_3(cls):
data : Orange.data.Table
"""
return Table(cls.path("missing_data_3.tab"))

@classmethod
def data_one_column_nans(cls):
"""
Data set with two continuous features and one discrete. One continuous
columns has missing values (NaN).
Returns
-------
data : Orange.data.Table
"""
table = Table(
Domain(
[ContinuousVariable("a"),
ContinuousVariable("b"),
DiscreteVariable("c", values=["y", "n"])]
),
list(zip(
[42.48, 16.84, 15.23, 23.8],
["", "", "", ""],
"ynyn"
)))
return table

0 comments on commit 34b5e94

Please sign in to comment.