Skip to content

Commit

Permalink
Orange.statistics.util.nanmean extended with weights
Browse files Browse the repository at this point in the history
  • Loading branch information
markotoplak committed Nov 16, 2022
1 parent 4c9885f commit 9c69c4d
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 12 deletions.
34 changes: 22 additions & 12 deletions Orange/statistics/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,20 +342,14 @@ def stats(X, weights=None, compute_variance=False):
is_numeric = np.issubdtype(X.dtype, np.number)
is_sparse = sp.issparse(X)
weighted = weights is not None and X.dtype != object

def weighted_mean():
if is_sparse:
w_X = X.multiply(sp.csr_matrix(np.c_[weights] / sum(weights)))
return np.asarray(w_X.sum(axis=0)).ravel()
else:
return bn.nansum(X * np.c_[weights] / sum(weights), axis=0)
weights = weights if weighted else None

if X.size and is_numeric and not is_sparse:
nans = np.isnan(X).sum(axis=0)
return np.column_stack((
np.nanmin(X, axis=0),
np.nanmax(X, axis=0),
nanmean(X, axis=0) if not weighted else weighted_mean(),
nanmean(X, axis=0, weights=weights),
nanvar(X, axis=0) if compute_variance else \
np.zeros(X.shape[1] if X.ndim == 2 else 1),
nans,
Expand All @@ -369,7 +363,7 @@ def weighted_mean():
return np.column_stack((
nanmin(X, axis=0),
nanmax(X, axis=0),
nanmean(X, axis=0) if not weighted else weighted_mean(),
nanmean(X, axis=0, weights=weights),
np.zeros(X.shape[1]), # variance not supported
X.shape[0] - non_zero,
non_zero))
Expand Down Expand Up @@ -453,15 +447,31 @@ def nansum_sparse(x):
return _apply_func(x, np.nansum, nansum_sparse, axis=axis)


def nanmean(x, axis=None):
def nanmean(x, axis=None, weights=None):
""" Equivalent of np.nanmean that supports sparse or dense matrices. """
if axis is None and weights is not None:
raise NotImplementedError("weights are only supported if axis is defined")

if not sp.issparse(x):
means = bn.nanmean(x, axis=axis)
if weights is None:
means = bn.nanmean(x, axis=axis)
else:
if axis == 0:
weights = weights.reshape(-1, 1)
elif axis == 1:
weights = weights.reshape(1, -1)
else:
raise NotImplementedError
nanw = ~np.isnan(x) * weights # do not divide by non-used weights
means = bn.nansum(x * weights, axis=axis) / np.sum(nanw, axis=axis)
elif axis is None:
means, _ = mean_variance_axis(x, axis=0)
means = np.nanmean(means)
else:
means, _ = mean_variance_axis(x, axis=axis)
# mean_variance_axis is picky regarding the input type
if weights is not None:
weights = weights.astype(float)
means, _ = mean_variance_axis(x, axis=axis, weights=weights)

return means

Expand Down
45 changes: 45 additions & 0 deletions Orange/tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,16 @@ def setUp(self):
self.random_state = check_random_state(42)
self.x = self.random_state.uniform(size=(10, 5))
np.fill_diagonal(self.x, np.nan)
self.y = np.array([[0, 1, 5],
[3, 4, np.nan],
[2, np.nan, np.nan],
[np.nan, np.nan, np.nan]])
self.r0 = [5/3, 5/2, 5/1]
self.r1 = [6/3, 7/2, 2/1, np.nan]
self.w0 = np.array([4, 3, 2, 1])
self.w1 = np.array([1, 2, 3])
self.r0w = [13/9, 16/7, 20/4]
self.r1w = [17/6, 11/3, 2/1, np.nan]

@dense_sparse
def test_axis_none(self, array):
Expand All @@ -372,6 +382,41 @@ def test_axis_1(self, array):
np.nanmean(self.x, axis=1), nanmean(array(self.x), axis=1)
)

@dense_sparse
def test_weights_axis_none(self, array):
with self.assertRaises(NotImplementedError):
nanmean(array(self.x), weights=1)

@dense_sparse
def test_weights_axis_0(self, array):
np.testing.assert_almost_equal(
self.r0, nanmean(array(self.y), axis=0)
)
np.testing.assert_almost_equal(
self.r1, nanmean(array(self.y.T), axis=0)
)
np.testing.assert_almost_equal(
self.r0w, nanmean(array(self.y), axis=0, weights=self.w0)
)
np.testing.assert_almost_equal(
self.r1w, nanmean(array(self.y.T), axis=0, weights=self.w1)
)

@dense_sparse
def test_weights_axis_1(self, array):
np.testing.assert_almost_equal(
self.r1, nanmean(array(self.y), axis=1)
)
np.testing.assert_almost_equal(
self.r0, nanmean(array(self.y.T), axis=1)
)
np.testing.assert_almost_equal(
self.r1w, nanmean(array(self.y), axis=1, weights=self.w1)
)
np.testing.assert_almost_equal(
self.r0w, nanmean(array(self.y.T), axis=1, weights=self.w0)
)


class TestDigitize(unittest.TestCase):
def setUp(self):
Expand Down

0 comments on commit 9c69c4d

Please sign in to comment.