Skip to content

Commit

Permalink
Discrete variable: remove ordered attribute
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed May 29, 2020
1 parent afa99a2 commit 029071f
Show file tree
Hide file tree
Showing 19 changed files with 81 additions and 148 deletions.
11 changes: 7 additions & 4 deletions Orange/data/io_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,7 @@ def _disc_with_vals_column(data: np.ndarray, col: int,
values="", **_) -> _ColumnProperties:
vals, coltype = _TableBuilder._disc_column(data, col)
return _ColumnProperties(valuemap=Flags.split(values), values=vals,
coltype=coltype, orig_values=vals,
coltype_kwargs={"ordered": True})
coltype=coltype, orig_values=vals)

@staticmethod
def _unknown_column(data: np.ndarray, col: int, **_) -> _ColumnProperties:
Expand Down Expand Up @@ -607,8 +606,12 @@ def _vartype(var):
if var.is_continuous or var.is_string:
return var.TYPE_HEADERS[0]
elif var.is_discrete:
return Flags.join(var.values) if var.ordered else \
var.TYPE_HEADERS[0]
# if number of values is 1 order is not important if more
# values write order in file
return (
Flags.join(var.values) if len(var.values) >= 2
else var.TYPE_HEADERS[0]
)
raise NotImplementedError

return ['continuous'] * data.has_weights() + \
Expand Down
5 changes: 3 additions & 2 deletions Orange/data/pandas_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,9 @@ def _column_to_series(col, vals):
result = ()
if col.is_discrete:
codes = pd.Series(vals).fillna(-1).astype(int)
result = (col.name, pd.Categorical.from_codes(codes=codes, categories=col.values,
ordered=col.ordered))
result = (col.name, pd.Categorical.from_codes(
codes=codes, categories=col.values, ordered=True
))
elif col.is_time:
result = (col.name, pd.to_datetime(vals, unit='s').to_series().reset_index()[0])
elif col.is_continuous:
Expand Down
2 changes: 1 addition & 1 deletion Orange/data/tests/test_io_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def test_column_parts_discrete_values(self):
np.testing.assert_array_equal(column.orig_values,
["red", "red", "green"])
self.assertEqual(column.coltype, DiscreteVariable)
self.assertDictEqual(column.coltype_kwargs, {'ordered': True})
self.assertDictEqual(column.coltype_kwargs, {})

def test_unknown_type_column(self):
data = np.array(self.header0)
Expand Down
25 changes: 12 additions & 13 deletions Orange/data/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,21 +252,17 @@ def test_val_from_str_add(self):
self.assertEqual(var.val_from_str_add("F"), 0)
self.assertEqual(var.val_from_str_add("N"), 2)


def test_repr(self):
var = DiscreteVariable.make("a", values=("F", "M"))
self.assertEqual(
repr(var),
"DiscreteVariable(name='a', values=('F', 'M'))")
var.ordered = True
self.assertEqual(
repr(var),
"DiscreteVariable(name='a', values=('F', 'M'), ordered=True)")

var = DiscreteVariable.make("a", values="1234567")
self.assertEqual(
repr(var),
"DiscreteVariable(name='a', values=('1', '2', '3', '4', '5', '6', '7'))")
"DiscreteVariable(name='a', values=('1', '2', '3', '4', '5', '6', '7'))"
)

def test_no_nonstringvalues(self):
self.assertRaises(TypeError, DiscreteVariable, "foo", values=("a", 42))
Expand Down Expand Up @@ -467,7 +463,6 @@ def varcls_modified(self, name):
var = super().varcls_modified(name)
var.add_value("A")
var.add_value("B")
var.ordered = True
return var

def test_copy_checks_len_values(self):
Expand All @@ -487,6 +482,13 @@ def test_copy_checks_len_values(self):
var2 = var.copy(values=("W", "M"))
self.assertEqual(var2.values, ("W", "M"))

def test_remove_ordered(self):
"""
ordered is deprecated when this test starts to fail remove ordered
parameter. Remove also this test.
"""
self.assertLess(Orange.__version__, "3.28.0")


@variabletest(ContinuousVariable)
class TestContinuousVariable(VariableTest):
Expand Down Expand Up @@ -676,10 +678,7 @@ def varcls_modified(self, name):
"PickleDiscreteVariable",
("with_name", lambda: DiscreteVariable(name="Feature 0")),
("with_str_value", lambda: DiscreteVariable(name="Feature 0",
values=("F", "M"))),
("ordered", lambda: DiscreteVariable(name="Feature 0",
values=("F", "M"),
ordered=True)),
values=("F", "M")))
)


Expand All @@ -691,7 +690,7 @@ def varcls_modified(self, name):

class VariableTestMakeProxy(unittest.TestCase):
def test_make_proxy_disc(self):
abc = DiscreteVariable("abc", values="abc", ordered=True)
abc = DiscreteVariable("abc", values="abc")
abc1 = abc.make_proxy()
abc2 = abc1.make_proxy()
self.assertEqual(abc, abc1)
Expand All @@ -700,7 +699,7 @@ def test_make_proxy_disc(self):
self.assertEqual(hash(abc), hash(abc1))
self.assertEqual(hash(abc1), hash(abc2))

abcx = DiscreteVariable("abc", values="abc", ordered=True)
abcx = DiscreteVariable("abc", values="abc")
self.assertEqual(abc, abcx)
self.assertIsNot(abc, abcx)

Expand Down
29 changes: 16 additions & 13 deletions Orange/data/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,21 +632,16 @@ class DiscreteVariable(Variable):
.. attribute:: values
A list of variable's values.
.. attribute:: ordered
Some algorithms (and, in particular, visualizations) may
sometime reorder the values of the variable, e.g. alphabetically.
This flag hints that the given order of values is "natural"
(e.g. "small", "middle", "large") and should not be changed.
"""

TYPE_HEADERS = ('discrete', 'd', 'categorical')

presorted_values = []

def __init__(self, name="", values=(), ordered=False, compute_value=None,
*, sparse=False):
def __init__(
self, name="", values=(), compute_value=None, sparse=False,
**kwargs
):
""" Construct a discrete variable descriptor with the given values. """
values = TupleList(values) # some people (including me) pass a generator
if not all(isinstance(value, str) for value in values):
Expand All @@ -655,7 +650,13 @@ def __init__(self, name="", values=(), ordered=False, compute_value=None,
super().__init__(name, compute_value, sparse=sparse)
self._values = values
self._value_index = {value: i for i, value in enumerate(values)}
self.ordered = ordered

if "ordered" in kwargs:
warnings.warn(
"ordered is deprecated and does not have effect. It will be "
"removed in future version.",
OrangeDeprecationWarning
)

@property
def values(self):
Expand Down Expand Up @@ -822,17 +823,19 @@ def __reduce__(self):
raise PickleError("Variables without names cannot be pickled")
__dict__ = dict(self.__dict__)
__dict__.pop("_values")
return make_variable, (self.__class__, self._compute_value, self.name,
self.values, self.ordered), \
return (
make_variable,
(self.__class__, self._compute_value, self.name, self.values),
__dict__
)

def copy(self, compute_value=None, *, name=None, values=None, **_):
# pylint: disable=arguments-differ
if values is not None and len(values) != len(self.values):
raise ValueError(
"number of values must match the number of original values")
return super().copy(compute_value=compute_value, name=name,
values=values or self.values, ordered=self.ordered)
values=values or self.values)


class StringVariable(Variable):
Expand Down
1 change: 0 additions & 1 deletion Orange/preprocess/remove.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ def merge_transforms(exp):
new_var = DiscreteVariable(
exp.var.name,
values=exp.var.values,
ordered=exp.var.ordered,
compute_value=merge_lookup(A, B),
sparse=exp.var.sparse,
)
Expand Down
6 changes: 2 additions & 4 deletions Orange/statistics/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,12 +217,10 @@ def sample(self, size=None, replace=True):
return data.Value(self.variable, value_indices)

def min(self):
if self.variable.ordered:
return self.variable.values[0]
return None

def max(self):
if self.variable.ordered:
return self.variable.values[-1]
return None

def sum(self, *args, **kwargs):
res = super().sum(*args, **kwargs)
Expand Down
6 changes: 3 additions & 3 deletions Orange/tests/test_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def setUp(self):
data.Domain(
attributes=[
data.DiscreteVariable('rgb', values=('r', 'g', 'b', 'a')),
data.DiscreteVariable('num', values=('1', '2', '3'), ordered=True),
data.DiscreteVariable('num', values=('1', '2', '3')),
]
),
X=np.array([
Expand Down Expand Up @@ -201,8 +201,8 @@ def test_min_max(self):
self.assertEqual(self.rgb.min(), None)
self.assertEqual(self.rgb.max(), None)
# Min and max should work for ordinal variables
self.assertEqual(self.num.min(), '1')
self.assertEqual(self.num.max(), '3')
self.assertEqual(self.num.min(), None)
self.assertEqual(self.num.max(), None)

def test_array_with_unknowns(self):
d = data.Table("zoo")
Expand Down
3 changes: 2 additions & 1 deletion Orange/tests/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,8 @@ def test_reprs(self):
flid = IsDefined(negate=True)
flhc = HasClass()
flr = Random()
fld = FilterDiscrete(self.attr_disc, None)
fld = \
(self.attr_disc, None)
flsv = SameValue(self.attr_disc, self.value_disc, negate=True)
flc = FilterContinuous(self.vs[0], FilterContinuous.Less, 5)
flc2 = FilterContinuous(self.vs[1], FilterContinuous.Greater, 3)
Expand Down
4 changes: 4 additions & 0 deletions Orange/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,7 @@ def test_pickle_version(self):
self.assertGreaterEqual(PICKLE_PROTOCOL, pickle.DEFAULT_PROTOCOL)
# we should not use a version that is not supported
self.assertLessEqual(PICKLE_PROTOCOL, pickle.HIGHEST_PROTOCOL)


if __name__ == "__main__":
unittest.main()
6 changes: 2 additions & 4 deletions Orange/widgets/data/owcsvimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -1461,12 +1461,10 @@ def pandas_to_table(df):
coldata = series.values # type: pd.Categorical
categories = [str(c) for c in coldata.categories]
var = Orange.data.DiscreteVariable.make(
str(header), values=categories, ordered=coldata.ordered
str(header), values=categories
)
# Remap the coldata into the var.values order/set
coldata = pd.Categorical(
coldata, categories=var.values, ordered=coldata.ordered
)
coldata = pd.Categorical(coldata, categories=var.values)
codes = coldata.codes
assert np.issubdtype(codes.dtype, np.integer)
orangecol = np.array(codes, dtype=np.float)
Expand Down
Loading

0 comments on commit 029071f

Please sign in to comment.