Skip to content

Commit

Permalink
Merge pull request #6542 from janezd/som-output-columns
Browse files Browse the repository at this point in the history
[ENH] SOM: output columns with coordinates and errors
  • Loading branch information
markotoplak authored Sep 1, 2023
2 parents 972405f + e3f2371 commit 3778aee
Show file tree
Hide file tree
Showing 6 changed files with 381 additions and 56 deletions.
10 changes: 8 additions & 2 deletions Orange/projection/_som.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ def get_winners(np.float64_t[:, :, :] weights, np.float64_t[:, :] X, int hex):
np.float64_t[:] row
np.ndarray[np.int16_t, ndim=2] winners = \
np.empty((X.shape[0], 2), dtype=np.int16)
np.ndarray[np.float64_t, ndim=1] distances = \
np.empty((X.shape[0]), dtype=np.float64)
int nrows = X.shape[0]

with nogil:
Expand All @@ -40,8 +42,9 @@ def get_winners(np.float64_t[:, :, :] weights, np.float64_t[:, :] X, int hex):
min_diff = diff
winners[rowi, 0] = win_x
winners[rowi, 1] = win_y
distances[rowi] = min_diff

return winners
return winners, distances


def update(np.float64_t[:, :, :] weights,
Expand Down Expand Up @@ -127,6 +130,8 @@ def get_winners_sparse(np.float64_t[:, :, :] weights,
np.float64_t[:] row,
np.ndarray[np.int16_t, ndim=2] winners = \
np.empty((X.shape[0], 2), dtype=np.int16)
np.ndarray[np.float64_t, ndim=1] distances = \
np.empty((X.shape[0]), dtype=np.float64)
int nrows = X.shape[0]

with nogil:
Expand All @@ -149,7 +154,8 @@ def get_winners_sparse(np.float64_t[:, :, :] weights,

winners[rowi, 0] = win_x
winners[rowi, 1] = win_y
return winners
distances[rowi] = min_diff
return winners, distances


def update_sparse(np.ndarray[np.float64_t, ndim=3] weights,
Expand Down
35 changes: 35 additions & 0 deletions Orange/projection/som.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Union, Optional

import numpy as np
import scipy.sparse as sp

Expand All @@ -14,6 +16,39 @@ def __init__(self, dim_x, dim_y,
self.pca_init = pca_init
self.random_seed = random_seed

@staticmethod
def prepare_data(x: Union[np.ndarray, sp.spmatrix],
offsets: Optional[np.ndarray] = None,
scales: Optional[np.ndarray] = None) \
-> (Union[np.ndarray, sp.spmatrix],
np.ndarray,
Union[np.ndarray, None],
Union[np.ndarray, None]):
if sp.issparse(x) and offsets is not None:
# This is used in compute_value, by any widget, hence there is no
# way to prevent it or report an error. We go dense...
x = x.todense()
if sp.issparse(x):
cont_x = x.tocsr()
mask = np.ones(cont_x.shape[0], bool)
else:
mask = np.all(np.isfinite(x), axis=1)
useful = np.sum(mask)
if useful == 0:
return x, mask, offsets, scales
if useful == len(mask):
cont_x = x.copy()
else:
cont_x = x[mask]
if offsets is None:
offsets = np.min(cont_x, axis=0)
cont_x -= offsets[None, :]
if scales is None:
scales = np.max(cont_x, axis=0)
scales[scales == 0] = 1
cont_x /= scales[None, :]
return cont_x, mask, offsets, scales

def init_weights_random(self, x):
random = (np.random if self.random_seed is None
else np.random.RandomState(self.random_seed))
Expand Down
Loading

0 comments on commit 3778aee

Please sign in to comment.