Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] SqlTable: Automatically recognize date/time fields #1424

Merged
merged 2 commits into from
Jul 8, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions Orange/data/sql/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
psycopg2.pool = Orange.misc.import_late_warning("psycopg2.pool")

from .. import domain, variable, table, instance, filter,\
DiscreteVariable, ContinuousVariable, StringVariable
DiscreteVariable, ContinuousVariable, StringVariable, TimeVariable
from Orange.data.sql import filter as sql_filter


Expand Down Expand Up @@ -104,8 +104,12 @@ def get_domain(self, type_hints=None, guess_values=False):

def add_to_sql(var, field_name):
if var.is_continuous:
var.to_sql = ToSql("({})::double precision".format(
self.quote_identifier(field_name)))
if isinstance(var, TimeVariable):
var.to_sql = ToSql("extract(epoch from {})".format(
self.quote_identifier(field_name)))
else:
var.to_sql = ToSql("({})::double precision".format(
self.quote_identifier(field_name)))
elif var.is_discrete:
var.to_sql = ToSql("({})::text".format(
self.quote_identifier(field_name)))
Expand Down Expand Up @@ -137,10 +141,19 @@ def get_variable(self, field_name, type_code, inspect_values=False):
INT_TYPES = (20, 21, 23) # bigint, int, smallint
CHAR_TYPES = (25, 1042, 1043,) # text, char, varchar
BOOLEAN_TYPES = (16,) # bool
DATE_TYPES = (1082, 1114, 1184, ) # date, timestamp, timestamptz
# time, timestamp, timestamptz, timetz
TIME_TYPES = (1083, 1114, 1184, 1266,)

if type_code in FLOATISH_TYPES:
return ContinuousVariable(field_name)

if type_code in TIME_TYPES + DATE_TYPES:
tv = TimeVariable(field_name)
tv.have_date |= type_code in DATE_TYPES
tv.have_time |= type_code in TIME_TYPES
return tv

if type_code in INT_TYPES: # bigint, int, smallint
if inspect_values:
values = self.get_distinct_values(field_name)
Expand Down
9 changes: 9 additions & 0 deletions Orange/data/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,3 +986,12 @@ def parse(self, datestr):
try: return dt.timestamp()
except OverflowError:
return -(self.UNIX_EPOCH - dt).total_seconds()

def to_val(self, s):
"""
Convert a value, given as an instance of an arbitrary type, to a float.
"""
if isinstance(s, str):
return self.parse(s)
else:
return super().to_val(s)
59 changes: 55 additions & 4 deletions Orange/tests/sql/test_sql_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
# pylint: disable=missing-docstring

import unittest
import unittest.mock

import numpy as np
from numpy.testing import assert_almost_equal

from Orange.data import filter, ContinuousVariable, DiscreteVariable, \
StringVariable, Table, Domain
StringVariable, TimeVariable, Table, Domain
from Orange.data.sql.table import SqlTable
from Orange.preprocess.discretize import EqualWidth
from Orange.statistics.basic_stats import BasicStats, DomainBasicStats
Expand Down Expand Up @@ -367,16 +368,66 @@ def test_meta_varchar(self):
sql_table = SqlTable(conn, table_name, inspect_values=True)
self.assertFirstMetaIsInstance(sql_table, StringVariable)

def test_date(self):
def test_time_date(self):
table = np.array(['2014-04-12', '2014-04-13', '2014-04-14',
'2014-04-15', '2014-04-16']).reshape(-1, 1)
conn, table_name = self.create_sql_table(table, ['date'])

sql_table = SqlTable(conn, table_name, inspect_values=False)
self.assertFirstMetaIsInstance(sql_table, StringVariable)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

sql_table = SqlTable(conn, table_name, inspect_values=True)
self.assertFirstMetaIsInstance(sql_table, StringVariable)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

def test_time_time(self):
table = np.array(['17:39:51', '11:51:48.46', '05:20:21.492149',
'21:47:06', '04:47:35.8']).reshape(-1, 1)
conn, table_name = self.create_sql_table(table, ['time'])

sql_table = SqlTable(conn, table_name, inspect_values=False)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

sql_table = SqlTable(conn, table_name, inspect_values=True)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

def test_time_timetz(self):
table = np.array(['17:39:51+0200', '11:51:48.46+01', '05:20:21.4921',
'21:47:06-0600', '04:47:35.8+0330']).reshape(-1, 1)
conn, table_name = self.create_sql_table(table, ['timetz'])

sql_table = SqlTable(conn, table_name, inspect_values=False)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

sql_table = SqlTable(conn, table_name, inspect_values=True)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

def test_time_timestamp(self):
table = np.array(['2014-07-15 17:39:51.348149',
'2008-10-05 11:51:48.468149',
'2008-11-03 05:20:21.492149',
'2015-01-02 21:47:06.228149',
'2016-04-16 04:47:35.892149']).reshape(-1, 1)
conn, table_name = self.create_sql_table(table, ['timestamp'])

sql_table = SqlTable(conn, table_name, inspect_values=False)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

sql_table = SqlTable(conn, table_name, inspect_values=True)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

def test_time_timestamptz(self):
table = np.array(['2014-07-15 17:39:51.348149+0200',
'2008-10-05 11:51:48.468149+02',
'2008-11-03 05:20:21.492149+01',
'2015-01-02 21:47:06.228149+0100',
'2016-04-16 04:47:35.892149+0330']).reshape(-1, 1)
conn, table_name = self.create_sql_table(table, ['timestamptz'])

sql_table = SqlTable(conn, table_name, inspect_values=False)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

sql_table = SqlTable(conn, table_name, inspect_values=True)
self.assertFirstAttrIsInstance(sql_table, TimeVariable)

def test_double_precision(self):
table = np.arange(25).reshape((-1, 1))
Expand Down
4 changes: 3 additions & 1 deletion Orange/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,12 +312,14 @@ class TestTimeVariable(VariableTest):
('1969-12-31 23:59:59', -1, '1969-12-31 23:59:59'),
('1900-01-01', -2208988800, '1900-01-01'),
('nan', np.nan, '?'),
('1444651991.81', 1444651991.81, '2015-10-12 12:13:11.810000'),
(1444651991.81, 1444651991.81, '2015-10-12 12:13:11.810000'),
]

def test_parse_repr(self):
for datestr, timestamp, outstr in self.TESTS:
var = TimeVariable('time')
ts = var.parse(datestr)
ts = var.to_val(datestr) # calls parse for strings
if not np.isnan(ts):
self.assertEqual(ts, timestamp, msg=datestr)
self.assertEqual(var.repr_val(ts), outstr, msg=datestr)
Expand Down