biolab · markotoplak · Nov 18, 2022 · Nov 15, 2022 · Nov 15, 2022
diff --git a/Orange/statistics/util.py b/Orange/statistics/util.py
@@ -342,20 +342,14 @@ def stats(X, weights=None, compute_variance=False):
     is_numeric = np.issubdtype(X.dtype, np.number)
     is_sparse = sp.issparse(X)
     weighted = weights is not None and X.dtype != object
-
-    def weighted_mean():
-        if is_sparse:
-            w_X = X.multiply(sp.csr_matrix(np.c_[weights] / sum(weights)))
-            return np.asarray(w_X.sum(axis=0)).ravel()
-        else:
-            return bn.nansum(X * np.c_[weights] / sum(weights), axis=0)
+    weights = weights if weighted else None
 
     if X.size and is_numeric and not is_sparse:
         nans = np.isnan(X).sum(axis=0)
         return np.column_stack((
             np.nanmin(X, axis=0),
             np.nanmax(X, axis=0),
-            nanmean(X, axis=0) if not weighted else weighted_mean(),
+            nanmean(X, axis=0, weights=weights),
             nanvar(X, axis=0) if compute_variance else \
                 np.zeros(X.shape[1] if X.ndim == 2 else 1),
             nans,
@@ -369,7 +363,7 @@ def weighted_mean():
         return np.column_stack((
             nanmin(X, axis=0),
             nanmax(X, axis=0),
-            nanmean(X, axis=0) if not weighted else weighted_mean(),
+            nanmean(X, axis=0, weights=weights),
             np.zeros(X.shape[1]),      # variance not supported
             X.shape[0] - non_zero,
             non_zero))
@@ -453,15 +447,31 @@ def nansum_sparse(x):
     return _apply_func(x, np.nansum, nansum_sparse, axis=axis)
 
 
-def nanmean(x, axis=None):
+def nanmean(x, axis=None, weights=None):
     """ Equivalent of np.nanmean that supports sparse or dense matrices. """
+    if axis is None and weights is not None:
+        raise NotImplementedError("weights are only supported if axis is defined")
+
     if not sp.issparse(x):
-        means = bn.nanmean(x, axis=axis)
+        if weights is None:
+            means = bn.nanmean(x, axis=axis)
+        else:
+            if axis == 0:
+                weights = weights.reshape(-1, 1)
+            elif axis == 1:
+                weights = weights.reshape(1, -1)
+            else:
+                raise NotImplementedError
+            nanw = ~np.isnan(x) * weights  # do not divide by non-used weights
+            means = bn.nansum(x * weights, axis=axis) / np.sum(nanw, axis=axis)
     elif axis is None:
         means, _ = mean_variance_axis(x, axis=0)
         means = np.nanmean(means)
     else:
-        means, _ = mean_variance_axis(x, axis=axis)
+        # mean_variance_axis is picky regarding the input type
+        if weights is not None:
+            weights = weights.astype(float)
+        means, _ = mean_variance_axis(x, axis=axis, weights=weights)
 
     return means
 

diff --git a/Orange/tests/test_statistics.py b/Orange/tests/test_statistics.py
@@ -128,6 +128,17 @@ def test_stats_weights(self):
         X = np.arange(4).reshape(2, 2).astype(object)
         np.testing.assert_equal(stats(X, weights), stats(X))
 
+    def test_stats_nans_neutral_weights(self):
+        X = np.arange(4).reshape(2, 2).astype(float)
+        X[0, 0] = np.nan
+        np.testing.assert_equal(stats(X, weights=np.array([1, 1])), stats(X))
+
+    def test_stats_nans_neutral_weights_sparse(self):
+        X = np.arange(4).reshape(2, 2).astype(float)
+        X = csr_matrix(X)
+        X[0, 0] = np.nan
+        np.testing.assert_equal(stats(X, weights=np.array([1, 1])), stats(X))
+
     def test_stats_weights_sparse(self):
         X = np.arange(4).reshape(2, 2).astype(float)
         X = csr_matrix(X)
@@ -342,6 +353,16 @@ def setUp(self):
         self.random_state = check_random_state(42)
         self.x = self.random_state.uniform(size=(10, 5))
         np.fill_diagonal(self.x, np.nan)
+        self.y = np.array([[0, 1, 5],
+                           [3, 4, np.nan],
+                           [2, np.nan, np.nan],
+                           [np.nan, np.nan, np.nan]])
+        self.r0 = [5/3, 5/2, 5/1]
+        self.r1 = [6/3, 7/2, 2/1, np.nan]
+        self.w0 = np.array([4, 3, 2, 1])
+        self.w1 = np.array([1, 2, 3])
+        self.r0w = [13/9, 16/7, 20/4]
+        self.r1w = [17/6, 11/3, 2/1, np.nan]
 
     @dense_sparse
     def test_axis_none(self, array):
@@ -361,6 +382,41 @@ def test_axis_1(self, array):
             np.nanmean(self.x, axis=1), nanmean(array(self.x), axis=1)
         )
 
+    @dense_sparse
+    def test_weights_axis_none(self, array):
+        with self.assertRaises(NotImplementedError):
+            nanmean(array(self.x), weights=1)
+
+    @dense_sparse
+    def test_weights_axis_0(self, array):
+        np.testing.assert_almost_equal(
+            self.r0, nanmean(array(self.y), axis=0)
+        )
+        np.testing.assert_almost_equal(
+            self.r1, nanmean(array(self.y.T), axis=0)
+        )
+        np.testing.assert_almost_equal(
+            self.r0w, nanmean(array(self.y), axis=0, weights=self.w0)
+        )
+        np.testing.assert_almost_equal(
+            self.r1w, nanmean(array(self.y.T), axis=0, weights=self.w1)
+        )
+
+    @dense_sparse
+    def test_weights_axis_1(self, array):
+        np.testing.assert_almost_equal(
+            self.r1, nanmean(array(self.y), axis=1)
+        )
+        np.testing.assert_almost_equal(
+            self.r0, nanmean(array(self.y.T), axis=1)
+        )
+        np.testing.assert_almost_equal(
+            self.r1w, nanmean(array(self.y), axis=1, weights=self.w1)
+        )
+        np.testing.assert_almost_equal(
+            self.r0w, nanmean(array(self.y.T), axis=1, weights=self.w0)
+        )
+
 
 class TestDigitize(unittest.TestCase):
     def setUp(self):