Skip to content

Commit

Permalink
Merge pull request #4601 from robertcv/enh/editdomain_time
Browse files Browse the repository at this point in the history
[FIX] Edit Domain: Improve Text/Categorical to Time conversion
  • Loading branch information
lanzagar authored Apr 5, 2020
2 parents c999ab9 + fcf24c6 commit 3f4c786
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 8 deletions.
28 changes: 21 additions & 7 deletions Orange/widgets/data/oweditdomain.py
Original file line number Diff line number Diff line change
Expand Up @@ -2527,8 +2527,16 @@ def mapper(arr, out=None, dtype=dtype, **kwargs):
def time_parse(values: Sequence[str], name="__"):
tvar = Orange.data.TimeVariable(name)
parse_time = ftry(tvar.parse, ValueError, np.nan)
values = [parse_time(v) for v in values]
return tvar, values
_values = [parse_time(v) for v in values]
if np.all(np.isnan(_values)):
# try parsing it with pandas (like in transform)
dti = pd.to_datetime(values, errors="coerce")
_values = datetime_to_epoch(dti)
date_only = getattr(dti, "_is_dates_only", False)
if np.all(dti != pd.NaT):
tvar.have_date = True
tvar.have_time = not date_only
return tvar, _values


as_string = np.frompyfunc(str, 1, 1)
Expand Down Expand Up @@ -2734,17 +2742,23 @@ def transform(self, c):
raise TypeError


def datetime_to_epoch(dti: pd.DatetimeIndex) -> np.ndarray:
"""Convert datetime to epoch"""
data = dti.values.astype("M8[us]")
mask = np.isnat(data)
data = data.astype(float) / 1e6
data[mask] = np.nan
return data


class ReparseTimeTransform(Transformation):
"""
Re-parse the column's string repr as datetime.
"""
def transform(self, c):
c = column_str_repr(self.variable, c)
c = pd.to_datetime(c, errors="coerce").values.astype("M8[us]")
mask = np.isnat(c)
orangecol = c.astype(float) / 1e6
orangecol[mask] = np.nan
return orangecol
c = pd.to_datetime(c, errors="coerce")
return datetime_to_epoch(c)


class LookupMappingTransform(Transformation):
Expand Down
16 changes: 15 additions & 1 deletion Orange/widgets/data/tests/test_oweditdomain.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import numpy as np
from numpy.testing import assert_array_equal
import pandas as pd

from AnyQt.QtCore import QItemSelectionModel, Qt, QItemSelection
from AnyQt.QtWidgets import QAction, QComboBox, QLineEdit, \
Expand All @@ -33,7 +34,7 @@
table_column_data, ReinterpretVariableEditor, CategoricalVector,
VariableEditDelegate, TransformRole,
RealVector, TimeVector, StringVector, make_dict_mapper, DictMissingConst,
LookupMappingTransform, as_float_or_nan, column_str_repr,
LookupMappingTransform, as_float_or_nan, column_str_repr, time_parse,
GroupItemsDialog)
from Orange.widgets.data.owcolor import OWColor, ColorRole
from Orange.widgets.tests.base import WidgetTest, GuiTest
Expand Down Expand Up @@ -917,6 +918,19 @@ def test_column_str_repr(self):
d = column_str_repr(v, np.array([0., np.nan, 1.0]))
assert_array_equal(d, ["00:00:00", "?", "00:00:01"])

def test_time_parse(self):
"""parsing additional datetimes by pandas"""
date = ["1/22/20", "1/23/20", "1/24/20"]
# we use privet method, check if still exists
assert hasattr(pd.DatetimeIndex, '_is_dates_only')

tval, values = time_parse(date)

self.assertTrue(tval.have_date)
self.assertFalse(tval.have_time)
self.assertListEqual(list(values),
[1579651200.0, 1579737600.0, 1579824000.0])


class TestLookupMappingTransform(TestCase):
def setUp(self) -> None:
Expand Down

0 comments on commit 3f4c786

Please sign in to comment.