From ae9b5b0b700085f6bcd46f225387d5f56dc81771 Mon Sep 17 00:00:00 2001 From: Andreja Date: Fri, 7 Dec 2018 12:39:12 +0100 Subject: [PATCH 1/5] append data instead of create new --- Orange/widgets/data/owmergedata.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Orange/widgets/data/owmergedata.py b/Orange/widgets/data/owmergedata.py index 677f9de7d25..f5f78bfc3de 100644 --- a/Orange/widgets/data/owmergedata.py +++ b/Orange/widgets/data/owmergedata.py @@ -360,16 +360,16 @@ def _join_table_by_indices(self, reduced_extra, indices): of rows given in indices""" if not len(indices): return None - domain = Orange.data.Domain( + self.data.domain = Orange.data.Domain( *(getattr(self.data.domain, x) + getattr(reduced_extra.domain, x) for x in ("attributes", "class_vars", "metas"))) - X = self._join_array_by_indices(self.data.X, reduced_extra.X, indices) - Y = self._join_array_by_indices( + self.data.X = self._join_array_by_indices(self.data.X, reduced_extra.X, indices) + self.data.Y = self._join_array_by_indices( np.c_[self.data.Y], np.c_[reduced_extra.Y], indices) - string_cols = [i for i, var in enumerate(domain.metas) if var.is_string] - metas = self._join_array_by_indices( + string_cols = [i for i, var in enumerate(self.data.domain.metas) if var.is_string] + self.data.metas = self._join_array_by_indices( self.data.metas, reduced_extra.metas, indices, string_cols) - return Orange.data.Table.from_numpy(domain, X, Y, metas) + return self.data @staticmethod def _join_array_by_indices(left, right, indices, string_cols=None): From 70231fb3a7e708bf279cc5940101245ba0f06a3b Mon Sep 17 00:00:00 2001 From: Andreja Date: Fri, 14 Dec 2018 11:38:05 +0100 Subject: [PATCH 2/5] copy table, add test for id --- Orange/widgets/data/owmergedata.py | 11 ++++++----- Orange/widgets/data/tests/test_owmergedata.py | 5 ++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Orange/widgets/data/owmergedata.py b/Orange/widgets/data/owmergedata.py index f5f78bfc3de..40a1ca05169 100644 --- a/Orange/widgets/data/owmergedata.py +++ b/Orange/widgets/data/owmergedata.py @@ -360,16 +360,17 @@ def _join_table_by_indices(self, reduced_extra, indices): of rows given in indices""" if not len(indices): return None - self.data.domain = Orange.data.Domain( + merged = self.data.copy() + merged.domain = Orange.data.Domain( *(getattr(self.data.domain, x) + getattr(reduced_extra.domain, x) for x in ("attributes", "class_vars", "metas"))) - self.data.X = self._join_array_by_indices(self.data.X, reduced_extra.X, indices) - self.data.Y = self._join_array_by_indices( + merged.X = self._join_array_by_indices(self.data.X, reduced_extra.X, indices) + merged.Y = self._join_array_by_indices( np.c_[self.data.Y], np.c_[reduced_extra.Y], indices) string_cols = [i for i, var in enumerate(self.data.domain.metas) if var.is_string] - self.data.metas = self._join_array_by_indices( + merged.metas = self._join_array_by_indices( self.data.metas, reduced_extra.metas, indices, string_cols) - return self.data + return merged @staticmethod def _join_array_by_indices(left, right, indices, string_cols=None): diff --git a/Orange/widgets/data/tests/test_owmergedata.py b/Orange/widgets/data/tests/test_owmergedata.py index 5ab9c7875f0..1e48e7fcc57 100644 --- a/Orange/widgets/data/tests/test_owmergedata.py +++ b/Orange/widgets/data/tests/test_owmergedata.py @@ -231,7 +231,10 @@ def test_output_merge_by_attribute_left(self): self.widget.attr_augment_data = domainA[0] self.widget.attr_augment_extra = domainB[0] self.widget.commit() - self.assertTablesEqual(self.get_output(self.widget.Outputs.data), result) + output = self.get_output(self.widget.Outputs.data) + self.assertTablesEqual(output, result) + self.assertNotEqual(id(output), id(self.dataA)) + self.assertNotEqual(id(output), id(self.dataB)) def test_output_merge_by_attribute_inner(self): """Check output for merging option 'Find matching rows' by attribute""" From 5a55dfc656dcc7d059f0e61d4d2fa9921a19ff47 Mon Sep 17 00:00:00 2001 From: Andreja Date: Fri, 21 Dec 2018 10:20:17 +0100 Subject: [PATCH 3/5] switch to from_numpy --- Orange/widgets/data/owmergedata.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/Orange/widgets/data/owmergedata.py b/Orange/widgets/data/owmergedata.py index 40a1ca05169..ec4441fecf0 100644 --- a/Orange/widgets/data/owmergedata.py +++ b/Orange/widgets/data/owmergedata.py @@ -360,18 +360,21 @@ def _join_table_by_indices(self, reduced_extra, indices): of rows given in indices""" if not len(indices): return None - merged = self.data.copy() - merged.domain = Orange.data.Domain( + domain = Orange.data.Domain( *(getattr(self.data.domain, x) + getattr(reduced_extra.domain, x) for x in ("attributes", "class_vars", "metas"))) - merged.X = self._join_array_by_indices(self.data.X, reduced_extra.X, indices) - merged.Y = self._join_array_by_indices( + X = self._join_array_by_indices(self.data.X, reduced_extra.X, indices) + Y = self._join_array_by_indices( np.c_[self.data.Y], np.c_[reduced_extra.Y], indices) string_cols = [i for i, var in enumerate(self.data.domain.metas) if var.is_string] - merged.metas = self._join_array_by_indices( + metas = self._join_array_by_indices( self.data.metas, reduced_extra.metas, indices, string_cols) - return merged - + table = Orange.data.Table.from_numpy(domain, X, Y, metas) + table.name = getattr(self.data, 'name') + table.attributes = getattr(self.data, 'attributes') + table.ids = getattr(self.data, 'ids') + return Orange.data.Table.from_numpy(domain, X, Y, metas) + @staticmethod def _join_array_by_indices(left, right, indices, string_cols=None): """Join (horizontally) two arrays, taking pairs of rows given in indices From 0a3c7c4bfbc0e3db00bcadb9bef245826c684c40 Mon Sep 17 00:00:00 2001 From: Andreja Date: Fri, 21 Dec 2018 10:50:18 +0100 Subject: [PATCH 4/5] added tests --- Orange/widgets/data/owmergedata.py | 4 ++-- Orange/widgets/data/tests/test_owmergedata.py | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Orange/widgets/data/owmergedata.py b/Orange/widgets/data/owmergedata.py index ec4441fecf0..6d13a59dd1d 100644 --- a/Orange/widgets/data/owmergedata.py +++ b/Orange/widgets/data/owmergedata.py @@ -366,14 +366,14 @@ def _join_table_by_indices(self, reduced_extra, indices): X = self._join_array_by_indices(self.data.X, reduced_extra.X, indices) Y = self._join_array_by_indices( np.c_[self.data.Y], np.c_[reduced_extra.Y], indices) - string_cols = [i for i, var in enumerate(self.data.domain.metas) if var.is_string] + string_cols = [i for i, var in enumerate(domain.metas) if var.is_string] metas = self._join_array_by_indices( self.data.metas, reduced_extra.metas, indices, string_cols) table = Orange.data.Table.from_numpy(domain, X, Y, metas) table.name = getattr(self.data, 'name') table.attributes = getattr(self.data, 'attributes') table.ids = getattr(self.data, 'ids') - return Orange.data.Table.from_numpy(domain, X, Y, metas) + return table @staticmethod def _join_array_by_indices(left, right, indices, string_cols=None): diff --git a/Orange/widgets/data/tests/test_owmergedata.py b/Orange/widgets/data/tests/test_owmergedata.py index 1e48e7fcc57..dc3120704e9 100644 --- a/Orange/widgets/data/tests/test_owmergedata.py +++ b/Orange/widgets/data/tests/test_owmergedata.py @@ -32,7 +32,11 @@ def setUpClass(cls): yB = np.array([np.nan, 1, 0]) metasB = np.array([[np.nan], [1], [0]]).astype(object) cls.dataA = Table(domainA, XA, yA, metasA) + cls.dataA.name = 'dataA' + cls.dataA.attributes = 'dataA attributes' cls.dataB = Table(domainB, XB, yB, metasB) + cls.dataB.name = 'dataB' + cls.dataB.attributes = 'dataB attributes' def setUp(self): self.widget = self.create_widget(OWMergeData) @@ -233,8 +237,9 @@ def test_output_merge_by_attribute_left(self): self.widget.commit() output = self.get_output(self.widget.Outputs.data) self.assertTablesEqual(output, result) - self.assertNotEqual(id(output), id(self.dataA)) - self.assertNotEqual(id(output), id(self.dataB)) + self.assertEqual(output.name, self.dataA.name) + np.testing.assert_array_equal(output.ids, self.dataA.ids) + self.assertEqual(output.attributes, self.dataA.attributes) def test_output_merge_by_attribute_inner(self): """Check output for merging option 'Find matching rows' by attribute""" From 383c2aa281f24aa3605425fb26479671e5da48a8 Mon Sep 17 00:00:00 2001 From: Andreja Date: Fri, 21 Dec 2018 22:46:31 +0100 Subject: [PATCH 5/5] add default val --- Orange/widgets/data/owmergedata.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Orange/widgets/data/owmergedata.py b/Orange/widgets/data/owmergedata.py index 6d13a59dd1d..2b1796f2d12 100644 --- a/Orange/widgets/data/owmergedata.py +++ b/Orange/widgets/data/owmergedata.py @@ -370,11 +370,11 @@ def _join_table_by_indices(self, reduced_extra, indices): metas = self._join_array_by_indices( self.data.metas, reduced_extra.metas, indices, string_cols) table = Orange.data.Table.from_numpy(domain, X, Y, metas) - table.name = getattr(self.data, 'name') - table.attributes = getattr(self.data, 'attributes') - table.ids = getattr(self.data, 'ids') - return table - + table.name = getattr(self.data, 'name', '') + table.attributes = getattr(self.data, 'attributes', {}) + table.ids = self.data.ids + return table + @staticmethod def _join_array_by_indices(left, right, indices, string_cols=None): """Join (horizontally) two arrays, taking pairs of rows given in indices