squash this commit somewhere

biolab · May 15, 2021 · 2223f50 · 2223f50
1 parent b028b25
commit 2223f50
Show file tree

Hide file tree

Showing 87 changed files with 1,063 additions and 581 deletions.
diff --git a/Orange/classification/_tree_scorers.pyx b/Orange/classification/_tree_scorers.pyx
@@ -17,7 +17,7 @@ cdef extern from "numpy/npy_math.h":
 cpdef enum:
     NULL_BRANCH = -1
 
-def contingency(double[:] x, int nx, double[:] y, int ny):
+def contingency(const double[:] x, int nx, const double[:] y, int ny):
     cdef:
         np.ndarray[np.uint32_t, ndim=2] cont = np.zeros((ny, nx), dtype=np.uint32)
         int n = len(x), yi, xi
@@ -28,7 +28,8 @@ def contingency(double[:] x, int nx, double[:] y, int ny):
             cont[yi, xi] += 1
     return cont
 
-def find_threshold_entropy(double[:] x, double[:] y, np.intp_t[:] idx,
+def find_threshold_entropy(const double[:] x, const double[:] y,
+                           const np.intp_t[:] idx,
                            int n_classes, int min_leaf):
     """
     Find the threshold for continuous attribute values that maximizes
@@ -89,8 +90,9 @@ def find_threshold_entropy(double[:] x, double[:] y, np.intp_t[:] idx,
     return (class_entro - best_entro) / N / log(2), x[idx[best_idx]]
 
 
-def find_binarization_entropy(double[:, :] cont, double[:] class_distr,
-                              double[:] val_distr, int min_leaf):
+def find_binarization_entropy(const double[:, :] cont,
+                              const double[:] class_distr,
+                              const double[:] val_distr, int min_leaf):
     """
     Find the split of discrete values into two groups that optimizes information
     gain.
@@ -187,7 +189,9 @@ def find_binarization_entropy(double[:, :] cont, double[:] class_distr,
     return (class_entro - best_entro) / N / log(2), best_mapping
 
 
-def find_threshold_MSE(double[:] x, double[:] y, np.intp_t[:] idx, int min_leaf):
+def find_threshold_MSE(const double[:] x,
+                       const double[:] y,
+                       const np.intp_t[:] idx, int min_leaf):
     """
     Find the threshold for continuous attribute values that minimizes MSE.
 
@@ -232,7 +236,8 @@ def find_threshold_MSE(double[:] x, double[:] y, np.intp_t[:] idx, int min_leaf)
     return (best_inter - (sum * sum) / N) / N, x[idx[best_idx]]
 
 
-def find_binarization_MSE(double[:] x, double[:] y, int n_values, int min_leaf):
+def find_binarization_MSE(const double[:] x,
+                          const double[:] y, int n_values, int min_leaf):
     """
     Find the split of discrete values into two groups that minimizes the MSE.
 
@@ -315,7 +320,9 @@ def find_binarization_MSE(double[:] x, double[:] y, int n_values, int min_leaf):
     return (best_inter - start_inter) / x.shape[0], best_mapping
 
 
-def compute_grouped_MSE(double[:] x, double[:] y, int n_values, int min_leaf):
+def compute_grouped_MSE(const double[:] x,
+                        const double[:] y,
+                        int n_values, int min_leaf):
     """
     Compute the MSE decrease of the given split into groups.
 
@@ -371,8 +378,10 @@ def compute_grouped_MSE(double[:] x, double[:] y, int n_values, int min_leaf):
     return (inter - sum * sum / n) / x.shape[0]
 
 
-def compute_predictions(double[:, :] X, int[:] code,
-                        double[:, :] values, double[:] thresholds):
+def compute_predictions(const double[:, :] X,
+                        const int[:] code,
+                        const double[:, :] values,
+                        const double[:] thresholds):
     """
     Return the values (distributions, means and variances) stored in the nodes
     to which the tree classify the rows in X.
@@ -419,8 +428,10 @@ def compute_predictions(double[:, :] X, int[:] code,
     return np.asarray(predictions)
 
 
-def compute_predictions_csr(X, int[:] code,
-                               double[:, :] values, double[:] thresholds):
+def compute_predictions_csr(X,
+                            const int[:] code,
+                            const double[:, :] values,
+                            const double[:] thresholds):
     """
     Same as compute_predictions except for sparse data
     """
@@ -431,9 +442,9 @@ def compute_predictions_csr(X, int[:] code,
         double[: ,:] predictions = np.empty(
             (X.shape[0], values.shape[1]), dtype=np.float64)
 
-        double[:] data = X.data
-        np.int32_t[:] indptr = X.indptr
-        np.int32_t[:] indices = X.indices
+        const double[:] data = X.data
+        const np.int32_t[:] indptr = X.indptr
+        const np.int32_t[:] indices = X.indices
         int ind, attr, n_rows
 
     n_rows = X.shape[0]
@@ -463,8 +474,10 @@ def compute_predictions_csr(X, int[:] code,
                 predictions[i, j] = values[node_idx, j]
     return np.asarray(predictions)
 
-def compute_predictions_csc(X, int[:] code,
-                               double[:, :] values, double[:] thresholds):
+def compute_predictions_csc(X,
+                            const int[:] code,
+                            const double[:, :] values,
+                            const double[:] thresholds):
     """
     Same as compute_predictions except for sparse data
     """
@@ -475,9 +488,9 @@ def compute_predictions_csc(X, int[:] code,
         double[: ,:] predictions = np.empty(
             (X.shape[0], values.shape[1]), dtype=np.float64)
 
-        double[:] data = X.data
-        np.int32_t[:] indptr = X.indptr
-        np.int32_t[:] indices = X.indices
+        const double[:] data = X.data
+        const np.int32_t[:] indptr = X.indptr
+        const np.int32_t[:] indices = X.indices
         int ind, attr, n_rows
 
     n_rows = X.shape[0]

diff --git a/Orange/classification/tree.py b/Orange/classification/tree.py
@@ -112,7 +112,7 @@ def _score_disc():
             cont_entr = np.sum(cont * np.log(cont))
             score = (class_entr - attr_entr + cont_entr) / n / np.log(2)
             score *= n / len(data)  # punishment for missing values
-            branches = col_x
+            branches = col_x.copy()
             branches[np.isnan(branches)] = -1
             if score == 0:
                 return REJECT_ATTRIBUTE

diff --git a/Orange/data/instance.py b/Orange/data/instance.py
@@ -34,11 +34,12 @@ def __init__(self, domain, data=None, id=None):
             self._weight = 1
         elif isinstance(data, Instance) and data.domain == domain:
             self._x = np.array(data._x)
-            self._y = np.array(data._y)
+            self._y = np.atleast_1d(np.array(data._y))
             self._metas = np.array(data._metas)
             self._weight = data._weight
         else:
             self._x, self._y, self._metas = domain.convert(data)
+            self._y = np.atleast_1d(self._y)
             self._weight = 1
 
         if id is not None:
@@ -116,7 +117,10 @@ def __getitem__(self, key):
         if 0 <= idx < len(self._domain.attributes):
             value = self._x[idx]
         elif idx >= len(self._domain.attributes):
-            value = self._y[idx - len(self.domain.attributes)]
+            if self._y.ndim == 0:
+                value = self._y
+            else:
+                value = self._y[idx - len(self.domain.attributes)]
         else:
             value = self._metas[-1 - idx]
         var = self._domain[idx]

diff --git a/Orange/data/pandas_compat.py b/Orange/data/pandas_compat.py
@@ -274,6 +274,10 @@ def vars_from_df(df, role=None, force_nominal=False):
                       for var, col, expr in zip(Avars, Acols, Aexpr)]).T
         XYM.append(A)
 
+    # Let the table shares memory with pandas frame
+    if XYM[1] is not None and XYM[1].ndim == 2 and XYM[1].shape[1] == 1:
+        XYM[1] = XYM[1][:, 0]
+
     return XYM, Domain(attrs, class_vars, metas)
 
 

diff --git a/Orange/data/sql/table.py b/Orange/data/sql/table.py
@@ -556,7 +556,7 @@ def _filter_values(self, f):
         return t2
 
     @classmethod
-    def from_table(cls, domain, source, row_indices=...):
+    def from_table(cls, domain, source, row_indices=..., copy=None):
         assert row_indices is ...
 
         table = source.copy()