Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Variable.make #3925

Merged
merged 18 commits into from
Sep 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 145 additions & 39 deletions Orange/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def __call__(self, data):
self.__class__.__name__)

model = self._fit_model(data)
model.used_vals = [np.unique(y) for y in data.Y[:, None].T]
model.used_vals = [np.unique(y).astype(int) for y in data.Y[:, None].T]
model.domain = data.domain
model.supports_multiclass = self.supports_multiclass
model.name = self.name
Expand Down Expand Up @@ -210,40 +210,152 @@ def predict_storage(self, data):
raise TypeError("Unrecognized argument (instance of '{}')"
.format(type(data).__name__))

def get_backmappers(self, data):
backmappers = []
n_values = []

dataclasses = data.domain.class_vars
modelclasses = self.domain.class_vars
if not (modelclasses and dataclasses):
return None, [] # classless model or data; don't touch
if len(dataclasses) != len(modelclasses):
raise DomainTransformationError(
"Mismatching number of model's classes and data classes")
for dataclass, modelclass in zip(dataclasses, modelclasses):
if dataclass != modelclass:
if dataclass.name != modelclass.name:
raise DomainTransformationError(
f"Model for '{modelclass.name}' "
f"cannot predict '{dataclass.name}'")
else:
raise DomainTransformationError(
f"Variables '{modelclass.name}' in the model is "
"incompatible with the variable of the same name "
"in the data.")
n_values.append(dataclass.is_discrete and len(dataclass.values))
if dataclass is not modelclass and dataclass.is_discrete:
backmappers.append(dataclass.get_mapper_from(modelclass))
else:
backmappers.append(None)
if all(x is None for x in backmappers):
backmappers = None
return backmappers, n_values

def backmap_value(self, value, mapped_probs, n_values, backmappers):
if backmappers is None:
return value

if value.ndim == 2: # For multitarget, recursive call by columns
new_value = np.zeros(value.shape)
for i, n_value, backmapper in zip(
itertools.count(), n_values, backmappers):
new_value[:, i] = self.backmap_value(
value[:, i], mapped_probs[:, i, :], [n_value], [backmapper])
return new_value

backmapper = backmappers[0]
if backmapper is None:
return value

value = backmapper(value)
nans = np.isnan(value)
if not np.any(nans):
return value
if mapped_probs is not None:
value[nans] = np.argmax(mapped_probs[nans], axis=1)
else:
value[nans] = np.RandomState(0).choice(
backmapper(np.arange(0, n_values[0] - 1))
(np.sum(nans), ))
return value

def backmap_probs(self, probs, n_values, backmappers):
if backmappers is None:
return probs

if probs.ndim == 3:
new_probs = np.zeros((len(probs), len(n_values), max(n_values)),
dtype=probs.dtype)
for i, n_value, backmapper in zip(
itertools.count(), n_values, backmappers):
new_probs[:, i, :n_value] = self.backmap_probs(
probs[:, i, :], [n_value], [backmapper])
return new_probs

backmapper = backmappers[0]
if backmapper is None:
return probs
n_value = n_values[0]
new_probs = np.zeros((len(probs), n_value), dtype=probs.dtype)
for col in range(probs.shape[1]):
target = backmapper(col)
if not np.isnan(target):
new_probs[:, int(target)] = probs[:, col]
tots = np.sum(new_probs, axis=1)
zero_sum = tots == 0
new_probs[zero_sum] = 1
tots[zero_sum] = n_value
new_probs = new_probs / tots[:, None]
return new_probs

def __call__(self, data, ret=Value):
multitarget = len(self.domain.class_vars) > 1

def one_hot_probs(value):
if not multitarget:
return one_hot(value)

max_card = max(len(c.values) for c in self.domain.class_vars)
probs = np.zeros(value.shape + (max_card,), float)
for i in range(len(self.domain.class_vars)):
probs[:, i, :] = one_hot(value[:, i])
return probs

def fix_dim(x):
return x[0] if one_d else x

def data_to_model_domain():
if data.domain == self.domain:
return data

if self.original_domain.attributes != data.domain.attributes \
and data.X.size \
and not np.isnan(data.X).all():
new_data = data.transform(self.original_domain)
if np.isnan(new_data.X).all():
raise DomainTransformationError(
"domain transformation produced no defined values")
return new_data.transform(self.domain)

return data.transform(self.domain)

if not 0 <= ret <= 2:
raise ValueError("invalid value of argument 'ret'")
if ret > 0 and any(v.is_continuous for v in self.domain.class_vars):
raise ValueError("cannot predict continuous distributions")

# Convert 1d structures to 2d and remember doing it
one_d = True
if isinstance(data, Instance):
data = Table(data.domain, [data])
elif isinstance(data, (list, tuple)) \
and not isinstance(data[0], (list, tuple)):
data = [data]
elif isinstance(data, np.ndarray) and data.ndim == 1:
data = np.atleast_2d(data)
else:
one_d = False

# Call the predictor
one_d = False
if isinstance(data, np.ndarray):
one_d = data.ndim == 1
prediction = self.predict(np.atleast_2d(data))
elif isinstance(data, scipy.sparse.csr.csr_matrix):
backmappers = None
n_values = []
if isinstance(data, (np.ndarray, scipy.sparse.csr.csr_matrix)):
prediction = self.predict(data)
elif isinstance(data, (Table, Instance)):
if isinstance(data, Instance):
data = Table(data.domain, [data])
one_d = True
if data.domain != self.domain:
if self.original_domain.attributes != data.domain.attributes \
and data.X.size \
and not np.isnan(data.X).all():
data = data.transform(self.original_domain)
if np.isnan(data.X).all():
raise DomainTransformationError(
"domain transformation produced no defined values")
data = data.transform(self.domain)
elif isinstance(data, Table):
backmappers, n_values = self.get_backmappers(data)
data = data_to_model_domain()
prediction = self.predict_storage(data)
elif isinstance(data, (list, tuple)):
if not isinstance(data[0], (list, tuple)):
data = [data]
one_d = True
data = Table.from_list(self.original_domain, data)
data = data.transform(self.domain)
prediction = self.predict_storage(data)
Expand All @@ -252,7 +364,6 @@ def fix_dim(x):
.format(type(data).__name__))

# Parse the result into value and probs
multitarget = len(self.domain.class_vars) > 1
if isinstance(prediction, tuple):
value, probs = prediction
elif prediction.ndim == 1 + multitarget:
Expand All @@ -263,28 +374,23 @@ def fix_dim(x):
raise TypeError("model returned a %i-dimensional array",
prediction.ndim)

# Ensure that we have what we need to return
if ret != Model.Probs and value is None:
value = np.argmax(probs, axis=-1)
if ret != Model.Value and probs is None:
if multitarget:
max_card = max(len(c.values)
for c in self.domain.class_vars)
probs = np.zeros(value.shape + (max_card,), float)
for i in range(len(self.domain.class_vars)):
probs[:, i, :] = one_hot(value[:, i])
else:
probs = one_hot(value)
if ret == Model.ValueProbs:
return fix_dim(value), fix_dim(probs)
# Ensure that we have what we need to return; backmapp everything
if probs is None and (ret != Model.Value or backmappers is not None):
probs = one_hot_probs(value)
if probs is not None:
probs = self.backmap_probs(probs, n_values, backmappers)
if ret != Model.Probs:
if value is None:
value = np.argmax(probs, axis=-1)
# probs are already backmapped
else:
return fix_dim(probs)
value = self.backmap_value(value, probs, n_values, backmappers)

# Return what we need to
if ret == Model.Probs:
return fix_dim(probs)
if isinstance(data, Instance) and not multitarget:
value = Value(self.domain.class_var, value[0])
value = [Value(self.domain.class_var, value[0])]
if ret == Model.Value:
return fix_dim(value)
else: # ret == Model.ValueProbs
Expand Down
55 changes: 19 additions & 36 deletions Orange/classification/base_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,43 +18,26 @@ class ModelClassification(Model):


class SklModelClassification(SklModel, ModelClassification):
def __call__(self, data, ret=Model.Value):
prediction = super().__call__(data, ret=ret)

if ret == Model.Value:
def predict(self, X):
prediction = super().predict(X)
if not isinstance(prediction, tuple):
return prediction

if ret == Model.Probs:
probs = prediction
else: # ret == Model.ValueProbs
value, probs = prediction

# Expand probability predictions for class values which are not present
if ret != self.Value:
n_class = len(self.domain.class_vars)
max_values = max(len(cv.values) for cv in self.domain.class_vars)
if max_values != probs.shape[-1]:
if not self.supports_multiclass:
probs = probs[:, np.newaxis, :]
probs_ext = np.zeros((len(probs), n_class, max_values))
for c in range(n_class):
i = 0
class_values = len(self.domain.class_vars[c].values)
for cv in range(class_values):
if (i < len(
self.used_vals[c]) and
cv == self.used_vals[c][i]):
probs_ext[:, c, cv] = probs[:, c, i]
i += 1
if self.supports_multiclass:
probs = probs_ext
else:
probs = probs_ext[:, 0, :]

if ret == Model.Probs:
return probs
else: # ret == Model.ValueProbs
return value, probs
values, probs = prediction

class_vars = self.domain.class_vars
max_values = max(len(cv.values) for cv in class_vars)
if max_values == probs.shape[-1]:
return values, probs

if not self.supports_multiclass:
probs = probs[:, np.newaxis, :]
probs_ext = np.zeros((len(probs), len(class_vars), max_values))
for c, used_vals in enumerate(self.used_vals):
for i, cv in enumerate(used_vals):
probs_ext[:, c, cv] = probs[:, c, i]
if not self.supports_multiclass:
probs_ext = probs_ext[:, 0, :]
return values, probs_ext


class SklLearnerClassification(SklLearner, LearnerClassification):
Expand Down
Loading