From da0f0be01671aff211410227b4bbbad048ddf93e Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 21 Apr 2023 19:28:04 +0200 Subject: [PATCH 01/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 8417f9897..5770e0786 100644 --- a/flox/core.py +++ b/flox/core.py @@ -77,7 +77,7 @@ # This dummy axis is inserted using np.expand_dims # and then reduced over during the combine stage by # _simple_combine. -DUMMY_AXIS = -2 +DUMMY_AXIS = -2 + 0 def _is_arg_reduction(func: T_Agg) -> bool: From fd6531179c3f4780a4d1b752e864ee1de0b379e6 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 23 Apr 2023 18:57:08 +0200 Subject: [PATCH 02/96] Update xarray.py --- flox/xarray.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index 6b7c174b0..610b635c1 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -23,6 +23,7 @@ if TYPE_CHECKING: from xarray.core.resample import Resample from xarray.core.types import T_DataArray, T_Dataset + from .core import T_ExpectedGroupsOpt Dims = Union[str, Iterable[Hashable], None] @@ -65,7 +66,7 @@ def xarray_reduce( obj: T_Dataset | T_DataArray, *by: T_DataArray | Hashable, func: str | Aggregation, - expected_groups=None, + expected_groups: T_ExpectedGroupsOpt = None, isbin: bool | Sequence[bool] = False, sort: bool = True, dim: Dims | ellipsis = None, From e9997bf49b10ce0ded090aa75022d9e5dd951f6f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 23 Apr 2023 16:57:21 +0000 Subject: [PATCH 03/96] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/xarray.py | 1 + 1 file changed, 1 insertion(+) diff --git a/flox/xarray.py b/flox/xarray.py index 610b635c1..423d1f270 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -23,6 +23,7 @@ if TYPE_CHECKING: from xarray.core.resample import Resample from xarray.core.types import T_DataArray, T_Dataset + from .core import T_ExpectedGroupsOpt Dims = Union[str, Iterable[Hashable], None] From 8a63dce775ea73ff65ce088656c4dbe6461f4fbb Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 23 Apr 2023 19:15:16 +0200 Subject: [PATCH 04/96] avoid renaming --- flox/xarray.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 610b635c1..7f5d10a05 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -218,7 +218,7 @@ def xarray_reduce( else: isbins = (isbin,) * nby - expected_groups = _validate_expected_groups(nby, expected_groups) + expected_groups_valid = _validate_expected_groups(nby, expected_groups) if not sort: raise NotImplementedError("sort must be True for xarray_reduce") @@ -310,10 +310,10 @@ def xarray_reduce( # Set expected_groups and convert to index since we need coords, sizes # for output xarray objects - expected_groups = list(expected_groups) + expected_groups_valid_list = list(expected_groups_valid) group_names: tuple[Any, ...] = () group_sizes: dict[Any, int] = {} - for idx, (b_, expect, isbin_) in enumerate(zip(by_da, expected_groups, isbins)): + for idx, (b_, expect, isbin_) in enumerate(zip(by_da, expected_groups_valid_list, isbins)): group_name = ( f"{b_.name}_bins" if isbin_ or isinstance(expect, pd.IntervalIndex) else b_.name ) @@ -337,7 +337,7 @@ def xarray_reduce( # The if-check is for type hinting mainly, it narrows down the return # type of _convert_expected_groups_to_index to pure pd.Index: if expect_index is not None: - expected_groups[idx] = expect_index + expected_groups_valid_list[idx] = expect_index group_sizes[group_name] = len(expect_index) else: # This will never be reached @@ -423,7 +423,7 @@ def wrapper(array, *by, func, skipna, core_dims, **kwargs): "skipna": skipna, "engine": engine, "reindex": reindex, - "expected_groups": tuple(expected_groups), + "expected_groups": tuple(expected_groups_valid_list), "isbin": isbins, "finalize_kwargs": finalize_kwargs, "dtype": dtype, @@ -437,7 +437,7 @@ def wrapper(array, *by, func, skipna, core_dims, **kwargs): if all(d not in ds_broad[var].dims for d in dim_tuple): actual[var] = ds_broad[var] - for name, expect, by_ in zip(group_names, expected_groups, by_da): + for name, expect, by_ in zip(group_names, expected_groups_valid_list, by_da): # Can't remove this till xarray handles IntervalIndex if isinstance(expect, pd.IntervalIndex): expect = expect.to_numpy() From a935ca997d17da950165f8d375cad329fc2795e8 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 24 Apr 2023 20:36:22 +0200 Subject: [PATCH 05/96] Update xarray.py --- flox/xarray.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 7563c310e..0a4fe464a 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -22,7 +22,7 @@ if TYPE_CHECKING: from xarray.core.resample import Resample - from xarray.core.types import T_DataArray, T_Dataset + from xarray.core.types import T_DataArray, T_Dataset, T_Expect from .core import T_ExpectedGroupsOpt @@ -311,10 +311,10 @@ def xarray_reduce( # Set expected_groups and convert to index since we need coords, sizes # for output xarray objects - expected_groups_valid_list = list(expected_groups_valid) + expected_groups_valid_list: list[T_Expect] = [] group_names: tuple[Any, ...] = () group_sizes: dict[Any, int] = {} - for idx, (b_, expect, isbin_) in enumerate(zip(by_da, expected_groups_valid_list, isbins)): + for idx, (b_, expect, isbin_) in enumerate(zip(by_da, expected_groups_valid, isbins)): group_name = ( f"{b_.name}_bins" if isbin_ or isinstance(expect, pd.IntervalIndex) else b_.name ) @@ -337,12 +337,11 @@ def xarray_reduce( # The if-check is for type hinting mainly, it narrows down the return # type of _convert_expected_groups_to_index to pure pd.Index: - if expect_index is not None: - expected_groups_valid_list[idx] = expect_index - group_sizes[group_name] = len(expect_index) - else: + if expect_index is None: # This will never be reached raise ValueError("expect_index cannot be None") + expected_groups_valid_list.append(expect_index) + group_sizes[group_name] = len(expect_index) def wrapper(array, *by, func, skipna, core_dims, **kwargs): array, *by = _broadcast_size_one_dims(array, *by, core_dims=core_dims) From 1c82e0fc652543e284ad716c19150cbdb3948fad Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 24 Apr 2023 20:49:52 +0200 Subject: [PATCH 06/96] Update xarray.py --- flox/xarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 0a4fe464a..dade7d92e 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -22,9 +22,9 @@ if TYPE_CHECKING: from xarray.core.resample import Resample - from xarray.core.types import T_DataArray, T_Dataset, T_Expect + from xarray.core.types import T_DataArray, T_Dataset - from .core import T_ExpectedGroupsOpt + from .core import T_ExpectedGroupsOpt, T_Expect Dims = Union[str, Iterable[Hashable], None] From 4695dcdab73d686ef363d46abcec3f5f67cc4f4b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 Apr 2023 18:50:08 +0000 Subject: [PATCH 07/96] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index dade7d92e..6aab97345 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -24,7 +24,7 @@ from xarray.core.resample import Resample from xarray.core.types import T_DataArray, T_Dataset - from .core import T_ExpectedGroupsOpt, T_Expect + from .core import T_Expect, T_ExpectedGroupsOpt Dims = Union[str, Iterable[Hashable], None] From 46f429cc5a267fb8878d945d40766f9c69075063 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 24 Apr 2023 21:05:12 +0200 Subject: [PATCH 08/96] Update xarray.py --- flox/xarray.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/flox/xarray.py b/flox/xarray.py index 6aab97345..696b8584d 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -324,6 +324,8 @@ def xarray_reduce( raise NotImplementedError( "flox does not support binning into an integer number of bins yet." ) + + expect_: T_Expect if expect is None: if isbin_: raise ValueError( From c3c7828dcb5b2d28d5d91f6da114a74244e8631a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 24 Apr 2023 21:27:30 +0200 Subject: [PATCH 09/96] Update xarray.py --- flox/xarray.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 696b8584d..ca9ecd8c8 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -339,11 +339,12 @@ def xarray_reduce( # The if-check is for type hinting mainly, it narrows down the return # type of _convert_expected_groups_to_index to pure pd.Index: - if expect_index is None: + if expect_index is not None: + expected_groups_valid_list.append(expect_index) + group_sizes[group_name] = len(expect_index) + else: # This will never be reached raise ValueError("expect_index cannot be None") - expected_groups_valid_list.append(expect_index) - group_sizes[group_name] = len(expect_index) def wrapper(array, *by, func, skipna, core_dims, **kwargs): array, *by = _broadcast_size_one_dims(array, *by, core_dims=core_dims) From eff861d748456bf2b5fbd9317c9a5769224b29cf Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 24 Apr 2023 22:07:46 +0200 Subject: [PATCH 10/96] Update xarray.py --- flox/xarray.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index ca9ecd8c8..9d005c415 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -440,22 +440,22 @@ def wrapper(array, *by, func, skipna, core_dims, **kwargs): if all(d not in ds_broad[var].dims for d in dim_tuple): actual[var] = ds_broad[var] - for name, expect, by_ in zip(group_names, expected_groups_valid_list, by_da): + for name, expct, by_ in zip(group_names, expected_groups_valid_list, by_da): # Can't remove this till xarray handles IntervalIndex - if isinstance(expect, pd.IntervalIndex): - expect = expect.to_numpy() + if isinstance(expct, pd.IntervalIndex): + expct = expct.to_numpy() if isinstance(actual, xr.Dataset) and name in actual: actual = actual.drop_vars(name) # When grouping by MultiIndex, expect is an pd.Index wrapping # an object array of tuples if name in ds_broad.indexes and isinstance(ds_broad.indexes[name], pd.MultiIndex): levelnames = ds_broad.indexes[name].names - expect = pd.MultiIndex.from_tuples(expect.values, names=levelnames) - actual[name] = expect + expct = pd.MultiIndex.from_tuples(expct.values, names=levelnames) + actual[name] = expct if Version(xr.__version__) > Version("2022.03.0"): actual = actual.set_coords(levelnames) else: - actual[name] = expect + actual[name] = expct if keep_attrs: actual[name].attrs = by_.attrs From ad51845c1b2543aef3b77fac93ebb6fff6beb264 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 20:17:58 +0200 Subject: [PATCH 11/96] Update xarray.py --- flox/xarray.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 3534bd81a..b48c92f79 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -342,10 +342,12 @@ def xarray_reduce( else: expect_ = expect expect_index = _convert_expected_groups_to_index((expect_,), (isbin_,), sort=sort)[0] + reveal_type(expect_index) # The if-check is for type hinting mainly, it narrows down the return # type of _convert_expected_groups_to_index to pure pd.Index: if expect_index is not None: + reveal_type(expect_index) expected_groups_valid_list.append(expect_index) group_sizes[group_name] = len(expect_index) else: @@ -446,22 +448,22 @@ def wrapper(array, *by, func, skipna, core_dims, **kwargs): if all(d not in ds_broad[var].dims for d in dim_tuple): actual[var] = ds_broad[var] - for name, expct, by_ in zip(group_names, expected_groups_valid_list, by_da): + for name, expect, by_ in zip(group_names, expected_groups_valid_list, by_da): # Can't remove this till xarray handles IntervalIndex - if isinstance(expct, pd.IntervalIndex): - expct = expct.to_numpy() + if isinstance(expect, pd.IntervalIndex): + expect = expect.to_numpy() if isinstance(actual, xr.Dataset) and name in actual: actual = actual.drop_vars(name) # When grouping by MultiIndex, expect is an pd.Index wrapping # an object array of tuples if name in ds_broad.indexes and isinstance(ds_broad.indexes[name], pd.MultiIndex): levelnames = ds_broad.indexes[name].names - expct = pd.MultiIndex.from_tuples(expct.values, names=levelnames) - actual[name] = expct + expect = pd.MultiIndex.from_tuples(expect.values, names=levelnames) + actual[name] = expect if Version(xr.__version__) > Version("2022.03.0"): actual = actual.set_coords(levelnames) else: - actual[name] = expct + actual[name] = expect if keep_attrs: actual[name].attrs = by_.attrs From aee3e6cc75b1dc5f227cfe2415571cbf3c0b2cee Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 20:19:24 +0200 Subject: [PATCH 12/96] Update xarray.py --- flox/xarray.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index b48c92f79..4b50b6dce 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -342,12 +342,10 @@ def xarray_reduce( else: expect_ = expect expect_index = _convert_expected_groups_to_index((expect_,), (isbin_,), sort=sort)[0] - reveal_type(expect_index) # The if-check is for type hinting mainly, it narrows down the return # type of _convert_expected_groups_to_index to pure pd.Index: if expect_index is not None: - reveal_type(expect_index) expected_groups_valid_list.append(expect_index) group_sizes[group_name] = len(expect_index) else: From 52027eaf67318dd33976a72771937b704ca8709d Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 21:03:14 +0200 Subject: [PATCH 13/96] split to optional --- flox/core.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/flox/core.py b/flox/core.py index 3e1097419..6e5a7e03e 100644 --- a/flox/core.py +++ b/flox/core.py @@ -51,9 +51,10 @@ T_DuckArray = Union[np.ndarray, DaskArray] # Any ? T_By = T_DuckArray T_Bys = tuple[T_By, ...] - T_ExpectIndex = Union[pd.Index, None] - T_Expect = Union[Sequence, np.ndarray, T_ExpectIndex] - T_ExpectIndexTuple = tuple[T_ExpectIndex, ...] + T_ExpectIndex = pd.Index + T_ExpectIndexOpt = Union[T_ExpectIndex, None] + T_Expect = Union[Sequence, np.ndarray, T_ExpectIndexOpt] + T_ExpectIndexTuple = tuple[T_ExpectIndexOpt, ...] T_ExpectTuple = tuple[T_Expect, ...] T_ExpectedGroups = Union[T_Expect, T_ExpectTuple] T_ExpectedGroupsOpt = Union[T_ExpectedGroups, None] From c4a734719602179d4079fff7f2cd98abaf0aa4a8 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 21:03:28 +0200 Subject: [PATCH 14/96] Update xarray.py --- flox/xarray.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 4b50b6dce..755c4757e 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -24,7 +24,7 @@ from xarray.core.resample import Resample from xarray.core.types import T_DataArray, T_Dataset - from .core import T_Expect, T_ExpectedGroupsOpt + from .core import T_Expect, T_ExpectIndex, T_ExpectedGroupsOpt Dims = Union[str, Iterable[Hashable], None] @@ -248,7 +248,7 @@ def xarray_reduce( try: from xarray.indexes import PandasMultiIndex except ImportError: - PandasMultiIndex = tuple() # type: ignore + PandasMultiIndex = tuple() more_drop = set() for var in maybe_drop: @@ -317,7 +317,7 @@ def xarray_reduce( # Set expected_groups and convert to index since we need coords, sizes # for output xarray objects - expected_groups_valid_list: list[T_Expect] = [] + expected_groups_valid_list: list[T_ExpectIndex] = [] group_names: tuple[Any, ...] = () group_sizes: dict[Any, int] = {} for idx, (b_, expect, isbin_) in enumerate(zip(by_da, expected_groups_valid, isbins)): @@ -446,22 +446,22 @@ def wrapper(array, *by, func, skipna, core_dims, **kwargs): if all(d not in ds_broad[var].dims for d in dim_tuple): actual[var] = ds_broad[var] - for name, expect, by_ in zip(group_names, expected_groups_valid_list, by_da): + for name, expect__, by_ in zip(group_names, expected_groups_valid_list, by_da): # Can't remove this till xarray handles IntervalIndex - if isinstance(expect, pd.IntervalIndex): - expect = expect.to_numpy() + if isinstance(expect__, pd.IntervalIndex): + expect__ = expect__.to_numpy() if isinstance(actual, xr.Dataset) and name in actual: actual = actual.drop_vars(name) # When grouping by MultiIndex, expect is an pd.Index wrapping # an object array of tuples if name in ds_broad.indexes and isinstance(ds_broad.indexes[name], pd.MultiIndex): levelnames = ds_broad.indexes[name].names - expect = pd.MultiIndex.from_tuples(expect.values, names=levelnames) - actual[name] = expect + expect__ = pd.MultiIndex.from_tuples(expect__.values, names=levelnames) + actual[name] = expect__ if Version(xr.__version__) > Version("2022.03.0"): actual = actual.set_coords(levelnames) else: - actual[name] = expect + actual[name] = expect__ if keep_attrs: actual[name].attrs = by_.attrs From 69419669c936f7d5b0567ad0ad64b861234abdab Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 30 May 2023 19:03:42 +0000 Subject: [PATCH 15/96] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index 755c4757e..cc5d87bea 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -24,7 +24,7 @@ from xarray.core.resample import Resample from xarray.core.types import T_DataArray, T_Dataset - from .core import T_Expect, T_ExpectIndex, T_ExpectedGroupsOpt + from .core import T_Expect, T_ExpectedGroupsOpt, T_ExpectIndex Dims = Union[str, Iterable[Hashable], None] From 63a413de34b8666db459b59dffa2c3c9ea1098a5 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 21:10:37 +0200 Subject: [PATCH 16/96] Update xarray.py --- flox/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index cc5d87bea..6b1b3800f 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -248,7 +248,7 @@ def xarray_reduce( try: from xarray.indexes import PandasMultiIndex except ImportError: - PandasMultiIndex = tuple() + PandasMultiIndex = tuple() # type: ignore more_drop = set() for var in maybe_drop: From 836214f43ee3ca07a038a549c051f289a5640aa7 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 21:18:01 +0200 Subject: [PATCH 17/96] convert to pd.Index instead of ndarray --- flox/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index 6b1b3800f..038e2fe87 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -449,7 +449,7 @@ def wrapper(array, *by, func, skipna, core_dims, **kwargs): for name, expect__, by_ in zip(group_names, expected_groups_valid_list, by_da): # Can't remove this till xarray handles IntervalIndex if isinstance(expect__, pd.IntervalIndex): - expect__ = expect__.to_numpy() + expect__ = expect__.to_tuples() if isinstance(actual, xr.Dataset) and name in actual: actual = actual.drop_vars(name) # When grouping by MultiIndex, expect is an pd.Index wrapping From 80169df96ea7425304ec017fed765753357b4b88 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 21:33:39 +0200 Subject: [PATCH 18/96] Handled different slicer types? --- flox/xarray.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 038e2fe87..fb04d2262 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -605,10 +605,15 @@ def resample_reduce( # this creates a label DataArray since resample doesn't do that somehow tostack = [] for idx, slicer in enumerate(resampler._group_indices): - if slicer.stop is None: - stop = resampler._obj.sizes[dim] + if isinstance(slicer, slice): + if slicer.stop is None: + stop = resampler._obj.sizes[dim] + else: + stop = slicer.stop + elif isinstance(slicer, list): + stop = slicer[-1] else: - stop = slicer.stop + stop = slicer tostack.append(idx * np.ones((stop - slicer.start,), dtype=np.int32)) by = xr.DataArray(np.hstack(tostack), dims=(dim,), name="__resample_dim__") From b96de2438305e6b1e7636241ee6cbb00d9df0a89 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 21:38:30 +0200 Subject: [PATCH 19/96] not supported instead? --- flox/xarray.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index fb04d2262..3a8575fec 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -610,11 +610,10 @@ def resample_reduce( stop = resampler._obj.sizes[dim] else: stop = slicer.stop - elif isinstance(slicer, list): - stop = slicer[-1] + tostack.append(idx * np.ones((stop - slicer.start,), dtype=np.int32)) else: - stop = slicer - tostack.append(idx * np.ones((stop - slicer.start,), dtype=np.int32)) + raise NotImplementedError(f"Only slice type is supported, got {type(slicer)=}.") + by = xr.DataArray(np.hstack(tostack), dims=(dim,), name="__resample_dim__") result = ( From bc5a404e0172e3ba066da65a4855273730d05e90 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 22:13:22 +0200 Subject: [PATCH 20/96] specify type for simple_combine --- flox/aggregations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flox/aggregations.py b/flox/aggregations.py index 13b23fafe..11438410b 100644 --- a/flox/aggregations.py +++ b/flox/aggregations.py @@ -185,7 +185,7 @@ def __init__( # how to aggregate results after first round of reduction self.combine: FuncTuple = _atleast_1d(combine) # simpler reductions used with the "simple combine" algorithm - self.simple_combine = None + self.simple_combine: tuple[Callable, ...] = () # final aggregation self.aggregate: Callable | str = aggregate if aggregate else self.combine[0] # finalize results (see mean) @@ -579,7 +579,7 @@ def _initialize_aggregation( else: agg.min_count = 0 - simple_combine = [] + simple_combine: list[Callable] = [] for combine in agg.combine: if isinstance(combine, str): if combine in ["nanfirst", "nanlast"]: From 8e20f163ff7a84133ccf1f6ac6fe4238972055e1 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 22:14:08 +0200 Subject: [PATCH 21/96] Handle None in agg.min_count --- flox/core.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/flox/core.py b/flox/core.py index 6e5a7e03e..b4441f652 100644 --- a/flox/core.py +++ b/flox/core.py @@ -51,7 +51,7 @@ T_DuckArray = Union[np.ndarray, DaskArray] # Any ? T_By = T_DuckArray T_Bys = tuple[T_By, ...] - T_ExpectIndex = pd.Index + T_ExpectIndex: pd.Index T_ExpectIndexOpt = Union[T_ExpectIndex, None] T_Expect = Union[Sequence, np.ndarray, T_ExpectIndexOpt] T_ExpectIndexTuple = tuple[T_ExpectIndexOpt, ...] @@ -99,7 +99,7 @@ def _is_first_last_reduction(func: T_Agg) -> bool: return isinstance(func, str) and func in ["nanfirst", "nanlast", "first", "last"] -def _get_expected_groups(by: T_By, sort: bool) -> pd.Index: +def _get_expected_groups(by: T_By, sort: bool) -> T_ExpectIndex: if is_duck_dask_array(by): raise ValueError("Please provide expected_groups if not grouping by a numpy array.") flatby = by.reshape(-1) @@ -854,7 +854,8 @@ def _finalize_results( """ squeezed = _squeeze_results(results, axis) - if agg.min_count > 0: + min_count = agg.min_count if agg.min_count is not None else 0 + if min_count > 0: counts = squeezed["intermediates"][-1] squeezed["intermediates"] = squeezed["intermediates"][:-1] @@ -865,8 +866,8 @@ def _finalize_results( else: finalized[agg.name] = agg.finalize(*squeezed["intermediates"], **agg.finalize_kwargs) - if agg.min_count > 0: - count_mask = counts < agg.min_count + if min_count > 0: + count_mask = counts < min_count if count_mask.any(): # For one count_mask.any() prevents promoting bool to dtype(fill_value) unless # necessary From dd6cafe28431b9b6afcd1d18e7cfb9f649d5eaa7 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 22:18:32 +0200 Subject: [PATCH 22/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index b4441f652..052bc7d79 100644 --- a/flox/core.py +++ b/flox/core.py @@ -51,7 +51,7 @@ T_DuckArray = Union[np.ndarray, DaskArray] # Any ? T_By = T_DuckArray T_Bys = tuple[T_By, ...] - T_ExpectIndex: pd.Index + T_ExpectIndex = pd.Index T_ExpectIndexOpt = Union[T_ExpectIndex, None] T_Expect = Union[Sequence, np.ndarray, T_ExpectIndexOpt] T_ExpectIndexTuple = tuple[T_ExpectIndexOpt, ...] From 97c30a5b9a5c9288b980d681dbf7501dd8ab78c8 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 22:51:36 +0200 Subject: [PATCH 23/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 052bc7d79..b4441f652 100644 --- a/flox/core.py +++ b/flox/core.py @@ -51,7 +51,7 @@ T_DuckArray = Union[np.ndarray, DaskArray] # Any ? T_By = T_DuckArray T_Bys = tuple[T_By, ...] - T_ExpectIndex = pd.Index + T_ExpectIndex: pd.Index T_ExpectIndexOpt = Union[T_ExpectIndex, None] T_Expect = Union[Sequence, np.ndarray, T_ExpectIndexOpt] T_ExpectIndexTuple = tuple[T_ExpectIndexOpt, ...] From f3e10adc352e80b493e90aa71b5c03b279513752 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 22:55:52 +0200 Subject: [PATCH 24/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index b4441f652..a00d88bec 100644 --- a/flox/core.py +++ b/flox/core.py @@ -51,7 +51,7 @@ T_DuckArray = Union[np.ndarray, DaskArray] # Any ? T_By = T_DuckArray T_Bys = tuple[T_By, ...] - T_ExpectIndex: pd.Index + T_ExpectIndex: type[pd.Index] = pd.Index T_ExpectIndexOpt = Union[T_ExpectIndex, None] T_Expect = Union[Sequence, np.ndarray, T_ExpectIndexOpt] T_ExpectIndexTuple = tuple[T_ExpectIndexOpt, ...] From 2359be91c07aa1fb61ebc4b47a5e8d4e04da5f38 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 23:01:06 +0200 Subject: [PATCH 25/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index a00d88bec..ee5a8f661 100644 --- a/flox/core.py +++ b/flox/core.py @@ -51,7 +51,7 @@ T_DuckArray = Union[np.ndarray, DaskArray] # Any ? T_By = T_DuckArray T_Bys = tuple[T_By, ...] - T_ExpectIndex: type[pd.Index] = pd.Index + T_ExpectIndex = Union[pd.Index] T_ExpectIndexOpt = Union[T_ExpectIndex, None] T_Expect = Union[Sequence, np.ndarray, T_ExpectIndexOpt] T_ExpectIndexTuple = tuple[T_ExpectIndexOpt, ...] From d2378de5893f8e14249d355dcc25b0adf73334ee Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 23:13:14 +0200 Subject: [PATCH 26/96] Update core.py --- flox/core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flox/core.py b/flox/core.py index ee5a8f661..ca1a88101 100644 --- a/flox/core.py +++ b/flox/core.py @@ -220,6 +220,9 @@ def find_group_cohorts(labels, chunks, merge: bool = True) -> dict: raveled = labels.reshape(-1) # these are chunks where a label is present label_chunks = pd.Series(which_chunk).groupby(raveled).unique() + reveal_type(label_chunks) + x = 1 + reveal_type(tuple(label_chunks.get(x))) # These invert the label_chunks mapping so we know which labels occur together. chunks_cohorts = tlz.groupby(lambda x: tuple(label_chunks.get(x)), label_chunks.keys()) From 0a7d7c105329d3d9f365a62777b890d12100e2d0 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 23:14:35 +0200 Subject: [PATCH 27/96] Update core.py --- flox/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/flox/core.py b/flox/core.py index ca1a88101..42c8ede9a 100644 --- a/flox/core.py +++ b/flox/core.py @@ -223,6 +223,7 @@ def find_group_cohorts(labels, chunks, merge: bool = True) -> dict: reveal_type(label_chunks) x = 1 reveal_type(tuple(label_chunks.get(x))) + reveal_type(label_chunks.keys()) # These invert the label_chunks mapping so we know which labels occur together. chunks_cohorts = tlz.groupby(lambda x: tuple(label_chunks.get(x)), label_chunks.keys()) From 57a3ce7a2682bb1ca13df53b10e2683a87c0b233 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 23:16:43 +0200 Subject: [PATCH 28/96] Update core.py --- flox/core.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/flox/core.py b/flox/core.py index 42c8ede9a..a19024cdc 100644 --- a/flox/core.py +++ b/flox/core.py @@ -220,6 +220,10 @@ def find_group_cohorts(labels, chunks, merge: bool = True) -> dict: raveled = labels.reshape(-1) # these are chunks where a label is present label_chunks = pd.Series(which_chunk).groupby(raveled).unique() + try: + from mypy import reveal_type + except: + pass reveal_type(label_chunks) x = 1 reveal_type(tuple(label_chunks.get(x))) From 305cec918bd2dece5d28998b62a0a5156d503c4b Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 23:30:27 +0200 Subject: [PATCH 29/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index a19024cdc..20ead2954 100644 --- a/flox/core.py +++ b/flox/core.py @@ -226,7 +226,7 @@ def find_group_cohorts(labels, chunks, merge: bool = True) -> dict: pass reveal_type(label_chunks) x = 1 - reveal_type(tuple(label_chunks.get(x))) + reveal_type((label_chunks.get(x),)) reveal_type(label_chunks.keys()) # These invert the label_chunks mapping so we know which labels occur together. chunks_cohorts = tlz.groupby(lambda x: tuple(label_chunks.get(x)), label_chunks.keys()) From 8e5413fd6487eb170893f63b70ab2cb9e4177f69 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 23:38:19 +0200 Subject: [PATCH 30/96] Update core.py --- flox/core.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/flox/core.py b/flox/core.py index 20ead2954..5af9e765f 100644 --- a/flox/core.py +++ b/flox/core.py @@ -220,16 +220,8 @@ def find_group_cohorts(labels, chunks, merge: bool = True) -> dict: raveled = labels.reshape(-1) # these are chunks where a label is present label_chunks = pd.Series(which_chunk).groupby(raveled).unique() - try: - from mypy import reveal_type - except: - pass - reveal_type(label_chunks) - x = 1 - reveal_type((label_chunks.get(x),)) - reveal_type(label_chunks.keys()) # These invert the label_chunks mapping so we know which labels occur together. - chunks_cohorts = tlz.groupby(lambda x: tuple(label_chunks.get(x)), label_chunks.keys()) + chunks_cohorts = tlz.groupby(lambda x: (label_chunks.get(x),), label_chunks.keys()) if merge: # First sort by number of chunks occupied by cohort From 9f5bf4a1d3e3cd71b5b03eedb6a208b0cc90479a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 23:43:42 +0200 Subject: [PATCH 31/96] Update core.py --- flox/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flox/core.py b/flox/core.py index 5af9e765f..4a3529366 100644 --- a/flox/core.py +++ b/flox/core.py @@ -460,7 +460,7 @@ def factorize_( axes: T_Axes, *, fastpath: Literal[True], - expected_groups: tuple[pd.Index, ...] | None = None, + expected_groups: T_ExpectIndexTuple | None = None, reindex: bool = False, sort: bool = True, ) -> tuple[np.ndarray, tuple[np.ndarray, ...], tuple[int, ...], int, int, None]: @@ -472,7 +472,7 @@ def factorize_( by: T_Bys, axes: T_Axes, *, - expected_groups: tuple[pd.Index, ...] | None = None, + expected_groups: T_ExpectIndexTuple | None = None, reindex: bool = False, sort: bool = True, fastpath: Literal[False] = False, @@ -485,7 +485,7 @@ def factorize_( by: T_Bys, axes: T_Axes, *, - expected_groups: tuple[pd.Index, ...] | None = None, + expected_groups: T_ExpectIndexTuple | None = None, reindex: bool = False, sort: bool = True, fastpath: bool = False, @@ -497,7 +497,7 @@ def factorize_( by: T_Bys, axes: T_Axes, *, - expected_groups: tuple[pd.Index, ...] | None = None, + expected_groups: T_ExpectIndexTuple | None = None, reindex: bool = False, sort: bool = True, fastpath: bool = False, From 51af4cca216135780e71919129c52da8226ed35d Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 30 May 2023 23:51:48 +0200 Subject: [PATCH 32/96] Update core.py --- flox/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flox/core.py b/flox/core.py index 4a3529366..3b10e6bb2 100644 --- a/flox/core.py +++ b/flox/core.py @@ -99,7 +99,7 @@ def _is_first_last_reduction(func: T_Agg) -> bool: return isinstance(func, str) and func in ["nanfirst", "nanlast", "first", "last"] -def _get_expected_groups(by: T_By, sort: bool) -> T_ExpectIndex: +def _get_expected_groups(by: T_By, sort: bool) -> T_ExpectIndexTuple: if is_duck_dask_array(by): raise ValueError("Please provide expected_groups if not grouping by a numpy array.") flatby = by.reshape(-1) @@ -1589,7 +1589,7 @@ def _assert_by_is_aligned(shape: tuple[int, ...], by: T_Bys): def _convert_expected_groups_to_index( expected_groups: T_ExpectTuple, isbin: Sequence[bool], sort: bool ) -> T_ExpectIndexTuple: - out: list[pd.Index | None] = [] + out: list[T_ExpectIndexOpt] = [] for ex, isbin_ in zip(expected_groups, isbin): if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): if sort: From f8699809df3f79f3363c4a11e2aa6ac7be6b29ad Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 31 May 2023 00:11:26 +0200 Subject: [PATCH 33/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 3b10e6bb2..ff8790184 100644 --- a/flox/core.py +++ b/flox/core.py @@ -547,7 +547,7 @@ def factorize_( else: idx = np.zeros_like(flat, dtype=np.intp) - 1 - found_groups.append(expect) + found_groups.append(np.array(expect)) else: if expect is not None and reindex: sorter = np.argsort(expect) From 3234096a54667f10c663e6a20bbc0cda32fa58e6 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 31 May 2023 19:32:33 +0200 Subject: [PATCH 34/96] Update core.py --- flox/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index ff8790184..960cb1002 100644 --- a/flox/core.py +++ b/flox/core.py @@ -221,7 +221,8 @@ def find_group_cohorts(labels, chunks, merge: bool = True) -> dict: # these are chunks where a label is present label_chunks = pd.Series(which_chunk).groupby(raveled).unique() # These invert the label_chunks mapping so we know which labels occur together. - chunks_cohorts = tlz.groupby(lambda x: (label_chunks.get(x),), label_chunks.keys()) + # chunks_cohorts = tlz.groupby(lambda x: (label_chunks.get(x),), label_chunks.keys()) + chunks_cohorts = tlz.groupby(lambda x: tuple(label_chunks.get(x)), label_chunks.keys()) if merge: # First sort by number of chunks occupied by cohort From 9aee5a0819cfdcd93cf0af8a9cee133b7800c7f8 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 1 Jun 2023 20:45:38 +0200 Subject: [PATCH 35/96] Update core.py --- flox/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flox/core.py b/flox/core.py index 960cb1002..bf1337034 100644 --- a/flox/core.py +++ b/flox/core.py @@ -99,7 +99,7 @@ def _is_first_last_reduction(func: T_Agg) -> bool: return isinstance(func, str) and func in ["nanfirst", "nanlast", "first", "last"] -def _get_expected_groups(by: T_By, sort: bool) -> T_ExpectIndexTuple: +def _get_expected_groups(by: T_By, sort: bool) -> T_ExpectIndexOpt: if is_duck_dask_array(by): raise ValueError("Please provide expected_groups if not grouping by a numpy array.") flatby = by.reshape(-1) @@ -1286,7 +1286,7 @@ def dask_groupby_agg( array: DaskArray, by: T_By, agg: Aggregation, - expected_groups: pd.Index | None, + expected_groups: T_ExpectIndexOpt, axis: T_Axes = (), fill_value: Any = None, method: T_Method = "map-reduce", @@ -1572,7 +1572,7 @@ def _validate_reindex( return reindex -def _assert_by_is_aligned(shape: tuple[int, ...], by: T_Bys): +def _assert_by_is_aligned(shape: tuple[int, ...], by: T_Bys) -> None: assert all(b.ndim == by[0].ndim for b in by[1:]) for idx, b in enumerate(by): if not all(j in [i, 1] for i, j in zip(shape[-b.ndim :], b.shape)): From 9fab7cf9176b1047a8857b72afdc2f86fd4bf6af Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 1 Jun 2023 20:53:47 +0200 Subject: [PATCH 36/96] Update core.py --- flox/core.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/flox/core.py b/flox/core.py index bf1337034..bb2004564 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1426,9 +1426,11 @@ def dask_groupby_agg( group_chunks = ((np.nan,),) else: if expected_groups is None: - expected_groups = _get_expected_groups(by_input, sort=sort) - groups = (expected_groups.to_numpy(),) - group_chunks = ((len(expected_groups),),) + expected_groups_ = _get_expected_groups(by_input, sort=sort) + else: + expected_groups_ = expected_groups + groups = (expected_groups_.to_numpy(),) + group_chunks = ((len(expected_groups_),),) elif method == "cohorts": chunks_cohorts = find_group_cohorts( From ee7c042e2f2f41e1674357741c17a52f3b27fbc0 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 1 Jun 2023 21:41:58 +0200 Subject: [PATCH 37/96] add overloads and rename --- flox/core.py | 37 +++++++++++++++++++++++++++---------- flox/xarray.py | 4 ++-- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/flox/core.py b/flox/core.py index bb2004564..9ea1df355 100644 --- a/flox/core.py +++ b/flox/core.py @@ -52,11 +52,14 @@ T_By = T_DuckArray T_Bys = tuple[T_By, ...] T_ExpectIndex = Union[pd.Index] + T_ExpectIndexTuple = tuple[T_ExpectIndex, ...] T_ExpectIndexOpt = Union[T_ExpectIndex, None] - T_Expect = Union[Sequence, np.ndarray, T_ExpectIndexOpt] - T_ExpectIndexTuple = tuple[T_ExpectIndexOpt, ...] + T_ExpectIndexOptTuple = tuple[T_ExpectIndexOpt, ...] + T_Expect = Union[Sequence, np.ndarray, T_ExpectIndex] T_ExpectTuple = tuple[T_Expect, ...] - T_ExpectedGroups = Union[T_Expect, T_ExpectTuple] + T_ExpectOpt = Union[Sequence, np.ndarray, T_ExpectIndexOpt] + T_ExpectOptTuple = tuple[T_ExpectOpt, ...] + T_ExpectedGroups = Union[T_Expect, T_ExpectOptTuple] T_ExpectedGroupsOpt = Union[T_ExpectedGroups, None] T_Func = Union[str, Callable] T_Funcs = Union[T_Func, Sequence[T_Func]] @@ -99,7 +102,7 @@ def _is_first_last_reduction(func: T_Agg) -> bool: return isinstance(func, str) and func in ["nanfirst", "nanlast", "first", "last"] -def _get_expected_groups(by: T_By, sort: bool) -> T_ExpectIndexOpt: +def _get_expected_groups(by: T_By, sort: bool) -> T_ExpectIndex: if is_duck_dask_array(by): raise ValueError("Please provide expected_groups if not grouping by a numpy array.") flatby = by.reshape(-1) @@ -461,7 +464,7 @@ def factorize_( axes: T_Axes, *, fastpath: Literal[True], - expected_groups: T_ExpectIndexTuple | None = None, + expected_groups: T_ExpectIndexOptTuple | None = None, reindex: bool = False, sort: bool = True, ) -> tuple[np.ndarray, tuple[np.ndarray, ...], tuple[int, ...], int, int, None]: @@ -473,7 +476,7 @@ def factorize_( by: T_Bys, axes: T_Axes, *, - expected_groups: T_ExpectIndexTuple | None = None, + expected_groups: T_ExpectIndexOptTuple | None = None, reindex: bool = False, sort: bool = True, fastpath: Literal[False] = False, @@ -486,7 +489,7 @@ def factorize_( by: T_Bys, axes: T_Axes, *, - expected_groups: T_ExpectIndexTuple | None = None, + expected_groups: T_ExpectIndexOptTuple | None = None, reindex: bool = False, sort: bool = True, fastpath: bool = False, @@ -498,7 +501,7 @@ def factorize_( by: T_Bys, axes: T_Axes, *, - expected_groups: T_ExpectIndexTuple | None = None, + expected_groups: T_ExpectIndexOptTuple | None = None, reindex: bool = False, sort: bool = True, fastpath: bool = False, @@ -1589,9 +1592,23 @@ def _assert_by_is_aligned(shape: tuple[int, ...], by: T_Bys) -> None: ) +@overload +def _convert_expected_groups_to_index( + expected_groups: tuple[None, ...], isbin: Sequence[bool], sort: bool +) -> tuple[None, ...]: + ... + + +@overload def _convert_expected_groups_to_index( expected_groups: T_ExpectTuple, isbin: Sequence[bool], sort: bool ) -> T_ExpectIndexTuple: + ... + + +def _convert_expected_groups_to_index( + expected_groups: T_ExpectOptTuple, isbin: Sequence[bool], sort: bool +) -> T_ExpectIndexOptTuple: out: list[T_ExpectIndexOpt] = [] for ex, isbin_ in zip(expected_groups, isbin): if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): @@ -1618,7 +1635,7 @@ def _lazy_factorize_wrapper(*by: T_By, **kwargs) -> np.ndarray: def _factorize_multiple( by: T_Bys, - expected_groups: T_ExpectIndexTuple, + expected_groups: T_ExpectIndexOptTuple, any_by_dask: bool, reindex: bool, sort: bool = True, @@ -1673,7 +1690,7 @@ def _factorize_multiple( return (group_idx,), found_groups, grp_shape -def _validate_expected_groups(nby: int, expected_groups: T_ExpectedGroupsOpt) -> T_ExpectTuple: +def _validate_expected_groups(nby: int, expected_groups: T_ExpectedGroupsOpt) -> T_ExpectOptTuple: if expected_groups is None: return (None,) * nby diff --git a/flox/xarray.py b/flox/xarray.py index 3a8575fec..3074f9eb8 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -24,7 +24,7 @@ from xarray.core.resample import Resample from xarray.core.types import T_DataArray, T_Dataset - from .core import T_Expect, T_ExpectedGroupsOpt, T_ExpectIndex + from .core import T_ExpectOpt, T_ExpectedGroupsOpt, T_ExpectIndex Dims = Union[str, Iterable[Hashable], None] @@ -331,7 +331,7 @@ def xarray_reduce( "flox does not support binning into an integer number of bins yet." ) - expect_: T_Expect + expect_: T_ExpectOpt if expect is None: if isbin_: raise ValueError( From c2d5d157025c77cf49eda2cd6e20d8ffa1b55acf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 1 Jun 2023 19:42:13 +0000 Subject: [PATCH 38/96] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index 3074f9eb8..6255b0905 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -24,7 +24,7 @@ from xarray.core.resample import Resample from xarray.core.types import T_DataArray, T_Dataset - from .core import T_ExpectOpt, T_ExpectedGroupsOpt, T_ExpectIndex + from .core import T_ExpectedGroupsOpt, T_ExpectIndex, T_ExpectOpt Dims = Union[str, Iterable[Hashable], None] From 4a8a926731c35b3f39ef38d77aea68837de7838c Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 1 Jun 2023 22:10:15 +0200 Subject: [PATCH 39/96] more overloads --- flox/core.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/flox/core.py b/flox/core.py index 9ea1df355..2dc036c99 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1690,6 +1690,16 @@ def _factorize_multiple( return (group_idx,), found_groups, grp_shape +@overload +def _validate_expected_groups(nby: int, expected_groups: None) -> tuple[None, ...]: + ... + + +@overload +def _validate_expected_groups(nby: int, expected_groups: T_ExpectedGroups) -> T_ExpectTuple: + ... + + def _validate_expected_groups(nby: int, expected_groups: T_ExpectedGroupsOpt) -> T_ExpectOptTuple: if expected_groups is None: return (None,) * nby From 630e1bf4de1210d92a1ab1e0d80b7b91c2480915 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 1 Jun 2023 22:32:54 +0200 Subject: [PATCH 40/96] ignore --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 2dc036c99..c3edf2c07 100644 --- a/flox/core.py +++ b/flox/core.py @@ -565,7 +565,7 @@ def factorize_( idx = sorter[(idx,)] idx[mask] = -1 else: - idx, groups = pd.factorize(flat, sort=sort) + idx, groups = pd.factorize(flat, sort=sort) # type: ignore [arg-type] # pandas issue? found_groups.append(np.array(groups)) factorized.append(idx.reshape(groupvar.shape)) From 4d4a697471364470d736cb652a2ae770de1fcd40 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 1 Jun 2023 22:36:58 +0200 Subject: [PATCH 41/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index c3edf2c07..a2e54c350 100644 --- a/flox/core.py +++ b/flox/core.py @@ -565,7 +565,7 @@ def factorize_( idx = sorter[(idx,)] idx[mask] = -1 else: - idx, groups = pd.factorize(flat, sort=sort) # type: ignore [arg-type] # pandas issue? + idx, groups = pd.factorize(flat, sort=sort) # type: ignore # pandas issue? found_groups.append(np.array(groups)) factorized.append(idx.reshape(groupvar.shape)) From cba746aa073d3021fabd6a5047b79d87a040cde4 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 1 Jun 2023 22:41:14 +0200 Subject: [PATCH 42/96] Update xarray.py --- flox/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index 6255b0905..a1a9ea610 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -628,5 +628,5 @@ def resample_reduce( .rename({"__resample_dim__": dim}) .transpose(dim, ...) ) - result[dim] = resampler._unique_coord.data + result[dim] = resampler._unique_coord.data # type: ignore [attr-defined] # TODO: check if real return result From b346137069d55103f239cbcc7b4a654774a56d20 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:04:18 +0200 Subject: [PATCH 43/96] Update core.py --- flox/core.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index a2e54c350..ce8de069d 100644 --- a/flox/core.py +++ b/flox/core.py @@ -224,8 +224,13 @@ def find_group_cohorts(labels, chunks, merge: bool = True) -> dict: # these are chunks where a label is present label_chunks = pd.Series(which_chunk).groupby(raveled).unique() # These invert the label_chunks mapping so we know which labels occur together. + def test(x) -> tuple: + a = label_chunks.get(x) + reveal_type(a) + return tuple(a) + # chunks_cohorts = tlz.groupby(lambda x: (label_chunks.get(x),), label_chunks.keys()) - chunks_cohorts = tlz.groupby(lambda x: tuple(label_chunks.get(x)), label_chunks.keys()) + chunks_cohorts = tlz.groupby(test), label_chunks.keys()) if merge: # First sort by number of chunks occupied by cohort From d86795e2913121edac19cbb10ca77d7e26fab79e Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:06:57 +0200 Subject: [PATCH 44/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index ce8de069d..01e1e492c 100644 --- a/flox/core.py +++ b/flox/core.py @@ -230,7 +230,7 @@ def test(x) -> tuple: return tuple(a) # chunks_cohorts = tlz.groupby(lambda x: (label_chunks.get(x),), label_chunks.keys()) - chunks_cohorts = tlz.groupby(test), label_chunks.keys()) + chunks_cohorts = tlz.groupby(test, label_chunks.keys()) if merge: # First sort by number of chunks occupied by cohort From edf5dea97b634fb8a0ce9c96fc1096d70b78d92c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 1 Jun 2023 21:07:15 +0000 Subject: [PATCH 45/96] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/flox/core.py b/flox/core.py index 01e1e492c..b24bbcddf 100644 --- a/flox/core.py +++ b/flox/core.py @@ -223,6 +223,7 @@ def find_group_cohorts(labels, chunks, merge: bool = True) -> dict: raveled = labels.reshape(-1) # these are chunks where a label is present label_chunks = pd.Series(which_chunk).groupby(raveled).unique() + # These invert the label_chunks mapping so we know which labels occur together. def test(x) -> tuple: a = label_chunks.get(x) From 1fc3d7cb74911f306ddfe8a19bb8a773891d26e2 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 2 Jun 2023 00:13:03 +0200 Subject: [PATCH 46/96] Update core.py --- flox/core.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/flox/core.py b/flox/core.py index b24bbcddf..dd74be9c9 100644 --- a/flox/core.py +++ b/flox/core.py @@ -225,13 +225,12 @@ def find_group_cohorts(labels, chunks, merge: bool = True) -> dict: label_chunks = pd.Series(which_chunk).groupby(raveled).unique() # These invert the label_chunks mapping so we know which labels occur together. - def test(x) -> tuple: - a = label_chunks.get(x) - reveal_type(a) - return tuple(a) + def invert(x) -> tuple(np.ndarray, ...): + arr = label_chunks.get(x) # type: ignore [arg-type] # pandas issue? + return tuple(arr) # chunks_cohorts = tlz.groupby(lambda x: (label_chunks.get(x),), label_chunks.keys()) - chunks_cohorts = tlz.groupby(test, label_chunks.keys()) + chunks_cohorts = tlz.groupby(invert, label_chunks.keys()) if merge: # First sort by number of chunks occupied by cohort From b11631b3bc931a861482f3d492ad3f04afaa8296 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 2 Jun 2023 00:19:02 +0200 Subject: [PATCH 47/96] Update core.py --- flox/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flox/core.py b/flox/core.py index dd74be9c9..997e3f2e8 100644 --- a/flox/core.py +++ b/flox/core.py @@ -225,8 +225,8 @@ def find_group_cohorts(labels, chunks, merge: bool = True) -> dict: label_chunks = pd.Series(which_chunk).groupby(raveled).unique() # These invert the label_chunks mapping so we know which labels occur together. - def invert(x) -> tuple(np.ndarray, ...): - arr = label_chunks.get(x) # type: ignore [arg-type] # pandas issue? + def invert(x) -> tuple[np.ndarray, ...]: + arr = label_chunks.get(x) # type: ignore # pandas issue? return tuple(arr) # chunks_cohorts = tlz.groupby(lambda x: (label_chunks.get(x),), label_chunks.keys()) From 27d041806d149eb44dd241267ae4665409692494 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 2 Jun 2023 00:24:02 +0200 Subject: [PATCH 48/96] Update core.py --- flox/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flox/core.py b/flox/core.py index 997e3f2e8..9cac3abbc 100644 --- a/flox/core.py +++ b/flox/core.py @@ -226,8 +226,8 @@ def find_group_cohorts(labels, chunks, merge: bool = True) -> dict: # These invert the label_chunks mapping so we know which labels occur together. def invert(x) -> tuple[np.ndarray, ...]: - arr = label_chunks.get(x) # type: ignore # pandas issue? - return tuple(arr) + arr = label_chunks.get(x) + return tuple(arr) # type: ignore [arg-type] # pandas issue? # chunks_cohorts = tlz.groupby(lambda x: (label_chunks.get(x),), label_chunks.keys()) chunks_cohorts = tlz.groupby(invert, label_chunks.keys()) From 1c6dd9533a824fa586522c37562bbb9ad7872e91 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 2 Jun 2023 00:31:48 +0200 Subject: [PATCH 49/96] Update core.py --- flox/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 9cac3abbc..309b79dac 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1616,7 +1616,9 @@ def _convert_expected_groups_to_index( ) -> T_ExpectIndexOptTuple: out: list[T_ExpectIndexOpt] = [] for ex, isbin_ in zip(expected_groups, isbin): - if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): + # if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): + if isinstance(ex, pd.Index) and (isinstance(ex, pd.IntervalIndex) or not isbin_): + if sort: ex = ex.sort_values() out.append(ex) From 87b2e9f39988e8dd3ad9ff9169577faf7fff432a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 1 Jun 2023 22:32:14 +0000 Subject: [PATCH 50/96] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 309b79dac..1bdea9632 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1618,7 +1618,6 @@ def _convert_expected_groups_to_index( for ex, isbin_ in zip(expected_groups, isbin): # if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): if isinstance(ex, pd.Index) and (isinstance(ex, pd.IntervalIndex) or not isbin_): - if sort: ex = ex.sort_values() out.append(ex) From 1f3e561b8ae42ed7943d0e64ad9e64c7fb8c4f2a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 2 Jun 2023 00:36:24 +0200 Subject: [PATCH 51/96] Update core.py --- flox/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 309b79dac..0c36a0052 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1616,9 +1616,10 @@ def _convert_expected_groups_to_index( ) -> T_ExpectIndexOptTuple: out: list[T_ExpectIndexOpt] = [] for ex, isbin_ in zip(expected_groups, isbin): + reveal_type(ex) # if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): if isinstance(ex, pd.Index) and (isinstance(ex, pd.IntervalIndex) or not isbin_): - + reveal_type(ex) if sort: ex = ex.sort_values() out.append(ex) From 17af0ca6132918614bd3fd9bc7f23564879c7ad1 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 2 Jun 2023 00:42:52 +0200 Subject: [PATCH 52/96] Update core.py --- flox/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flox/core.py b/flox/core.py index 0c36a0052..fefd17fcb 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1617,8 +1617,8 @@ def _convert_expected_groups_to_index( out: list[T_ExpectIndexOpt] = [] for ex, isbin_ in zip(expected_groups, isbin): reveal_type(ex) - # if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): - if isinstance(ex, pd.Index) and (isinstance(ex, pd.IntervalIndex) or not isbin_): + if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): + # if isinstance(ex, pd.Index) and (isinstance(ex, pd.IntervalIndex) or not isbin_): reveal_type(ex) if sort: ex = ex.sort_values() From d02983e73b1c2c88bfd6f9af55b984ab36bcbcf1 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 2 Jun 2023 00:48:28 +0200 Subject: [PATCH 53/96] Update core.py --- flox/core.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/flox/core.py b/flox/core.py index fefd17fcb..a0ff3a5b6 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1620,9 +1620,11 @@ def _convert_expected_groups_to_index( if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): # if isinstance(ex, pd.Index) and (isinstance(ex, pd.IntervalIndex) or not isbin_): reveal_type(ex) + e = ex + reveal_type(e) if sort: - ex = ex.sort_values() - out.append(ex) + e = e.sort_values() + out.append(e) elif ex is not None: if isbin_: out.append(pd.IntervalIndex.from_breaks(ex)) From 7808b7546a456129141032fc444d85a4b3b4afc1 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 2 Jun 2023 00:51:54 +0200 Subject: [PATCH 54/96] Update core.py --- flox/core.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/flox/core.py b/flox/core.py index a0ff3a5b6..fb9afa51c 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1616,22 +1616,19 @@ def _convert_expected_groups_to_index( ) -> T_ExpectIndexOptTuple: out: list[T_ExpectIndexOpt] = [] for ex, isbin_ in zip(expected_groups, isbin): - reveal_type(ex) if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): - # if isinstance(ex, pd.Index) and (isinstance(ex, pd.IntervalIndex) or not isbin_): - reveal_type(ex) e = ex - reveal_type(e) if sort: e = e.sort_values() out.append(e) elif ex is not None: + e = ex if isbin_: - out.append(pd.IntervalIndex.from_breaks(ex)) + out.append(pd.IntervalIndex.from_breaks(e)) else: if sort: - ex = np.sort(ex) - out.append(pd.Index(ex)) + e = np.sort(e) + out.append(pd.Index(e)) else: assert ex is None out.append(None) From 269ca374c3e87e438a7a7ba84c18657a2f84207a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 2 Jun 2023 00:56:14 +0200 Subject: [PATCH 55/96] Update core.py --- flox/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flox/core.py b/flox/core.py index fb9afa51c..938c2cf62 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1622,13 +1622,13 @@ def _convert_expected_groups_to_index( e = e.sort_values() out.append(e) elif ex is not None: - e = ex + e_ = ex if isbin_: - out.append(pd.IntervalIndex.from_breaks(e)) + out.append(pd.IntervalIndex.from_breaks(e_)) else: if sort: - e = np.sort(e) - out.append(pd.Index(e)) + e_ = np.sort(e_) + out.append(pd.Index(e_)) else: assert ex is None out.append(None) From f7bdad0c54b475798b67357a46974352277dc097 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 2 Jun 2023 07:17:21 +0200 Subject: [PATCH 56/96] Update flox/core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 938c2cf62..9e2ec32e3 100644 --- a/flox/core.py +++ b/flox/core.py @@ -81,7 +81,7 @@ # This dummy axis is inserted using np.expand_dims # and then reduced over during the combine stage by # _simple_combine. -DUMMY_AXIS = -2 + 0 +DUMMY_AXIS = -2 def _is_arg_reduction(func: T_Agg) -> bool: From 83d2612ea6201ec22b39d85cce695f5785dbdd64 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 2 Jun 2023 07:18:23 +0200 Subject: [PATCH 57/96] Update flox/core.py --- flox/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 9e2ec32e3..35f5e89f3 100644 --- a/flox/core.py +++ b/flox/core.py @@ -229,7 +229,6 @@ def invert(x) -> tuple[np.ndarray, ...]: arr = label_chunks.get(x) return tuple(arr) # type: ignore [arg-type] # pandas issue? - # chunks_cohorts = tlz.groupby(lambda x: (label_chunks.get(x),), label_chunks.keys()) chunks_cohorts = tlz.groupby(invert, label_chunks.keys()) if merge: From 3a3b87195909282d6ca9c50ce23bd459c324881d Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Jun 2023 00:58:03 +0200 Subject: [PATCH 58/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 938c2cf62..fc82e2b6f 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1623,7 +1623,7 @@ def _convert_expected_groups_to_index( out.append(e) elif ex is not None: e_ = ex - if isbin_: + if isbin_ and not isinstance(e_, pd.Index): # test out.append(pd.IntervalIndex.from_breaks(e_)) else: if sort: From ce9e071dc72da7e490c0cafe48149f266d699da4 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Jun 2023 10:00:56 +0200 Subject: [PATCH 59/96] Update core.py --- flox/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 0519e926e..c262f403c 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1623,7 +1623,9 @@ def _convert_expected_groups_to_index( elif ex is not None: e_ = ex if isbin_ and not isinstance(e_, pd.Index): # test - out.append(pd.IntervalIndex.from_breaks(e_)) + e__ = e_ + reveal_type(e__) + out.append(pd.IntervalIndex.from_breaks(e__)) else: if sort: e_ = np.sort(e_) From e5ca125dab94afd557a07f34bcaf21b988873faa Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Jun 2023 10:25:37 +0200 Subject: [PATCH 60/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index c262f403c..aba73edad 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1622,7 +1622,7 @@ def _convert_expected_groups_to_index( out.append(e) elif ex is not None: e_ = ex - if isbin_ and not isinstance(e_, pd.Index): # test + if not isinstance(e_, pd.Index) and isbin_: # test e__ = e_ reveal_type(e__) out.append(pd.IntervalIndex.from_breaks(e__)) From 9ad0df3f5ca71a6c3c07449ee7ade604bbd9c120 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Jun 2023 10:29:36 +0200 Subject: [PATCH 61/96] Update core.py --- flox/core.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/flox/core.py b/flox/core.py index aba73edad..4bc27278a 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1622,10 +1622,11 @@ def _convert_expected_groups_to_index( out.append(e) elif ex is not None: e_ = ex - if not isinstance(e_, pd.Index) and isbin_: # test - e__ = e_ - reveal_type(e__) - out.append(pd.IntervalIndex.from_breaks(e__)) + if isbin_: # test + if not isinstance(e_, pd.Index): + e__ = e_ + reveal_type(e__) + out.append(pd.IntervalIndex.from_breaks(e__)) else: if sort: e_ = np.sort(e_) From 0864db63108bd2846162d1d86afd469ee2b694c4 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Jun 2023 10:35:40 +0200 Subject: [PATCH 62/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 4bc27278a..6757c649a 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1623,7 +1623,7 @@ def _convert_expected_groups_to_index( elif ex is not None: e_ = ex if isbin_: # test - if not isinstance(e_, pd.Index): + if isinstance(e_, pd.Index): e__ = e_ reveal_type(e__) out.append(pd.IntervalIndex.from_breaks(e__)) From 7a41ed9d8e9c0eb987b6a552b188391e906ec2a1 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Jun 2023 10:55:30 +0200 Subject: [PATCH 63/96] Update core.py --- flox/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/flox/core.py b/flox/core.py index 6757c649a..8485eddd6 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1623,6 +1623,7 @@ def _convert_expected_groups_to_index( elif ex is not None: e_ = ex if isbin_: # test + reveal_type(e_) if isinstance(e_, pd.Index): e__ = e_ reveal_type(e__) From 67c6864d43a5a4445eb0c1763f6a7d430e0be59e Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Jun 2023 11:48:18 +0200 Subject: [PATCH 64/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 8485eddd6..6146186dd 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1624,7 +1624,7 @@ def _convert_expected_groups_to_index( e_ = ex if isbin_: # test reveal_type(e_) - if isinstance(e_, pd.Index): + if isinstance(e_, (np.ndarray, pd.Index)): e__ = e_ reveal_type(e__) out.append(pd.IntervalIndex.from_breaks(e__)) From bb42b437ffcba908daa5ea7849eb87ef5a9caf96 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Jun 2023 12:04:37 +0200 Subject: [PATCH 65/96] Update core.py --- flox/core.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/flox/core.py b/flox/core.py index 6146186dd..07b053ac9 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1621,13 +1621,13 @@ def _convert_expected_groups_to_index( e = e.sort_values() out.append(e) elif ex is not None: - e_ = ex - if isbin_: # test - reveal_type(e_) - if isinstance(e_, (np.ndarray, pd.Index)): - e__ = e_ - reveal_type(e__) - out.append(pd.IntervalIndex.from_breaks(e__)) + if not isinstance(ex, (np.ndarray, pd.Index)): + e_: np.ndarray | pd.Index = pd.Index(ex) + else: + e_ = ex + + if isbin_: + out.append(pd.IntervalIndex.from_breaks(e_)) else: if sort: e_ = np.sort(e_) From 979c66a1f354ad1052b68573e548876d072d297a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Jun 2023 12:09:29 +0200 Subject: [PATCH 66/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 07b053ac9..beabc313f 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1622,7 +1622,7 @@ def _convert_expected_groups_to_index( out.append(e) elif ex is not None: if not isinstance(ex, (np.ndarray, pd.Index)): - e_: np.ndarray | pd.Index = pd.Index(ex) + e_ = pd.Index(ex) else: e_ = ex From d8b555f1dc9ae78ec5e064372343111e7b44193d Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 5 Jun 2023 13:02:08 +0200 Subject: [PATCH 67/96] Update core.py --- flox/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flox/core.py b/flox/core.py index beabc313f..83549c1c8 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1621,17 +1621,17 @@ def _convert_expected_groups_to_index( e = e.sort_values() out.append(e) elif ex is not None: - if not isinstance(ex, (np.ndarray, pd.Index)): - e_ = pd.Index(ex) - else: + if isinstance(ex, pd.Index): e_ = ex + else: + e_ = pd.Index(ex) if isbin_: out.append(pd.IntervalIndex.from_breaks(e_)) else: if sort: e_ = np.sort(e_) - out.append(pd.Index(e_)) + out.append(e_) else: assert ex is None out.append(None) From 179a51b83f5353a4339f196082f50d6b726bbbe9 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 6 Jun 2023 16:40:37 +0200 Subject: [PATCH 68/96] Update core.py --- flox/core.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/flox/core.py b/flox/core.py index 83549c1c8..5828c90e7 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1621,11 +1621,7 @@ def _convert_expected_groups_to_index( e = e.sort_values() out.append(e) elif ex is not None: - if isinstance(ex, pd.Index): - e_ = ex - else: - e_ = pd.Index(ex) - + e_ = ex if isbin_: out.append(pd.IntervalIndex.from_breaks(e_)) else: From 34eb030a4dc7db9d5aa6ee84f530f2f133a32e71 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 6 Jun 2023 16:46:33 +0200 Subject: [PATCH 69/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 5828c90e7..d04df1cae 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1623,7 +1623,7 @@ def _convert_expected_groups_to_index( elif ex is not None: e_ = ex if isbin_: - out.append(pd.IntervalIndex.from_breaks(e_)) + out.append(pd.IntervalIndex.from_breaks(e_)) # type: ignore [arg-type] # TODO: what do we want here? else: if sort: e_ = np.sort(e_) From f520b4685c17a442c3709e5732ad169371099596 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 6 Jun 2023 16:50:10 +0200 Subject: [PATCH 70/96] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index d04df1cae..8b16a83a6 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1626,7 +1626,7 @@ def _convert_expected_groups_to_index( out.append(pd.IntervalIndex.from_breaks(e_)) # type: ignore [arg-type] # TODO: what do we want here? else: if sort: - e_ = np.sort(e_) + e_ = np.sort(pd.Index(e_)) out.append(e_) else: assert ex is None From e51a7eacbff9236056b369616bbe10bb4dade00d Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 6 Jun 2023 16:53:51 +0200 Subject: [PATCH 71/96] Update core.py --- flox/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flox/core.py b/flox/core.py index 8b16a83a6..7a90149a3 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1626,8 +1626,8 @@ def _convert_expected_groups_to_index( out.append(pd.IntervalIndex.from_breaks(e_)) # type: ignore [arg-type] # TODO: what do we want here? else: if sort: - e_ = np.sort(pd.Index(e_)) - out.append(e_) + e_ = np.sort(e_) + out.append(pd.Index(e_)) else: assert ex is None out.append(None) From 5eaeb1195099839e09e7a97864de4414eaf4fbae Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 6 Jun 2023 16:55:32 +0200 Subject: [PATCH 72/96] Update core.py --- flox/core.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/flox/core.py b/flox/core.py index 7a90149a3..e4e199da2 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1621,13 +1621,12 @@ def _convert_expected_groups_to_index( e = e.sort_values() out.append(e) elif ex is not None: - e_ = ex if isbin_: - out.append(pd.IntervalIndex.from_breaks(e_)) # type: ignore [arg-type] # TODO: what do we want here? + out.append(pd.IntervalIndex.from_breaks(ex)) # type: ignore [arg-type] # TODO: what do we want here? else: if sort: - e_ = np.sort(e_) - out.append(pd.Index(e_)) + ex = np.sort(ex) + out.append(pd.Index(ex)) else: assert ex is None out.append(None) From 78aebe78dde00ef8087ad22626e869a90d15d226 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 Jun 2023 18:07:05 +0000 Subject: [PATCH 73/96] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index f2dbd4b00..c7516fdc7 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -587,4 +587,4 @@ def _rechunk(func, obj, dim, labels, **kwargs): data=func(obj.data, axis=obj.get_axis_num(dim), labels=labels.data, **kwargs) ) - return obj \ No newline at end of file + return obj From 197b924ac6b190759979fb7700dd3a7bef0f713a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 17 Jun 2023 11:01:39 +0200 Subject: [PATCH 74/96] Update xarray.py --- flox/xarray.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index c7516fdc7..878d55eff 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -447,7 +447,8 @@ def wrapper(array, *by, func, skipna, core_dims, **kwargs): for name, expect__, by_ in zip(group_names, expected_groups_valid_list, by_da): # Can't remove this till xarray handles IntervalIndex if isinstance(expect__, pd.IntervalIndex): - expect__ = expect__.to_tuples() + # expect__ = expect__.to_tuples() + expect__ = expect__.to_numpy() if isinstance(actual, xr.Dataset) and name in actual: actual = actual.drop_vars(name) # When grouping by MultiIndex, expect is an pd.Index wrapping From fb5e6ecca1eff3cbd73d134a04949f91cf33dfac Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 17 Jun 2023 19:19:13 +0200 Subject: [PATCH 75/96] Have to add another type here because of xarray not supporting IntervalIndex --- flox/xarray.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 878d55eff..cd62a4591 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -329,17 +329,17 @@ def xarray_reduce( "flox does not support binning into an integer number of bins yet." ) - expect_: T_ExpectOpt + expect1: T_ExpectOpt if expect is None: if isbin_: raise ValueError( f"Please provided bin edges for group variable {idx} " f"named {group_name} in expected_groups." ) - expect_ = _get_expected_groups(b_.data, sort=sort) + expect1 = _get_expected_groups(b_.data, sort=sort) else: - expect_ = expect - expect_index = _convert_expected_groups_to_index((expect_,), (isbin_,), sort=sort)[0] + expect1 = expect + expect_index = _convert_expected_groups_to_index((expect1,), (isbin_,), sort=sort)[0] # The if-check is for type hinting mainly, it narrows down the return # type of _convert_expected_groups_to_index to pure pd.Index: @@ -444,11 +444,15 @@ def wrapper(array, *by, func, skipna, core_dims, **kwargs): if all(d not in ds_broad[var].dims for d in dim_tuple): actual[var] = ds_broad[var] - for name, expect__, by_ in zip(group_names, expected_groups_valid_list, by_da): - # Can't remove this till xarray handles IntervalIndex - if isinstance(expect__, pd.IntervalIndex): - # expect__ = expect__.to_tuples() - expect__ = expect__.to_numpy() + expect3: T_ExpectIndex | np.ndarray + for name, expect2, by_ in zip(group_names, expected_groups_valid_list, by_da): + # Can't remove this until xarray handles IntervalIndex: + if isinstance(expect2, pd.IntervalIndex): + # TODO: Only place where expect3 is an ndarray, remove the type if xarray + # starts supporting IntervalIndex. + expect3 = expect2.to_numpy() + else: + expect3 = expect2 if isinstance(actual, xr.Dataset) and name in actual: actual = actual.drop_vars(name) # When grouping by MultiIndex, expect is an pd.Index wrapping @@ -456,15 +460,15 @@ def wrapper(array, *by, func, skipna, core_dims, **kwargs): if ( name in ds_broad.indexes and isinstance(ds_broad.indexes[name], pd.MultiIndex) - and not isinstance(expect, pd.RangeIndex) + and not isinstance(expect3, pd.RangeIndex) ): levelnames = ds_broad.indexes[name].names - expect__ = pd.MultiIndex.from_tuples(expect__.values, names=levelnames) - actual[name] = expect__ + expect3 = pd.MultiIndex.from_tuples(expect3.values, names=levelnames) + actual[name] = expect3 if Version(xr.__version__) > Version("2022.03.0"): actual = actual.set_coords(levelnames) else: - actual[name] = expect__ + actual[name] = expect3 if keep_attrs: actual[name].attrs = by_.attrs From fb03ef0acd42c2319d77854e2d637205340a8bb5 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 17 Jun 2023 19:26:23 +0200 Subject: [PATCH 76/96] Update xarray.py --- flox/xarray.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flox/xarray.py b/flox/xarray.py index cd62a4591..acd3f2d6c 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -463,6 +463,9 @@ def wrapper(array, *by, func, skipna, core_dims, **kwargs): and not isinstance(expect3, pd.RangeIndex) ): levelnames = ds_broad.indexes[name].names + if isinstance(expect3, np.ndarray): + # TODO: workaoround for IntervalIndex issue. + raise NotImplementedError expect3 = pd.MultiIndex.from_tuples(expect3.values, names=levelnames) actual[name] = expect3 if Version(xr.__version__) > Version("2022.03.0"): From 8e55d3a62181153bcd011d99fc73af5b3a01eb6b Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 23 Jun 2023 07:31:03 +0200 Subject: [PATCH 77/96] test ex instead of e --- flox/core.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/flox/core.py b/flox/core.py index e4e199da2..b0cfd917f 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1616,10 +1616,13 @@ def _convert_expected_groups_to_index( out: list[T_ExpectIndexOpt] = [] for ex, isbin_ in zip(expected_groups, isbin): if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): - e = ex if sort: - e = e.sort_values() - out.append(e) + ex = ex.sort_values() + out.append(ex) + # e = ex + # if sort: + # e = e.sort_values() + # out.append(e) elif ex is not None: if isbin_: out.append(pd.IntervalIndex.from_breaks(ex)) # type: ignore [arg-type] # TODO: what do we want here? From 8b441cbb476929094ab66e3b07598565e3622e11 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 23 Jun 2023 07:37:07 +0200 Subject: [PATCH 78/96] Revert "test ex instead of e" This reverts commit 8e55d3a62181153bcd011d99fc73af5b3a01eb6b. --- flox/core.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/flox/core.py b/flox/core.py index b0cfd917f..e4e199da2 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1616,13 +1616,10 @@ def _convert_expected_groups_to_index( out: list[T_ExpectIndexOpt] = [] for ex, isbin_ in zip(expected_groups, isbin): if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): + e = ex if sort: - ex = ex.sort_values() - out.append(ex) - # e = ex - # if sort: - # e = e.sort_values() - # out.append(e) + e = e.sort_values() + out.append(e) elif ex is not None: if isbin_: out.append(pd.IntervalIndex.from_breaks(ex)) # type: ignore [arg-type] # TODO: what do we want here? From bce7c73265c759e4c64baebbb5ebd6f19f0eed5a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 24 Jun 2023 08:18:58 +0200 Subject: [PATCH 79/96] check reveal_type --- flox/core.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/flox/core.py b/flox/core.py index e4e199da2..47b0b0ccc 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1615,10 +1615,14 @@ def _convert_expected_groups_to_index( ) -> T_ExpectIndexOptTuple: out: list[T_ExpectIndexOpt] = [] for ex, isbin_ in zip(expected_groups, isbin): + reveal_type(ex) if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): + reveal_type(ex) e = ex + reveal_type(e) if sort: e = e.sort_values() + reveal_type(e) out.append(e) elif ex is not None: if isbin_: From 36f872f41d537ab636f72cb79d58de430f5fb81b Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 24 Jun 2023 08:36:07 +0200 Subject: [PATCH 80/96] without e --- flox/core.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/flox/core.py b/flox/core.py index 47b0b0ccc..21cbd9149 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1618,12 +1618,14 @@ def _convert_expected_groups_to_index( reveal_type(ex) if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): reveal_type(ex) - e = ex - reveal_type(e) if sort: - e = e.sort_values() - reveal_type(e) - out.append(e) + reveal_type(ex) + ex = ex.sort_values() + reveal_type(ex) + reveal_type(ex) + + out.append(ex) + elif ex is not None: if isbin_: out.append(pd.IntervalIndex.from_breaks(ex)) # type: ignore [arg-type] # TODO: what do we want here? From ad2037a5769090a8ff510761ba811dd50bace9d5 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 24 Jun 2023 08:46:19 +0200 Subject: [PATCH 81/96] try no redefinition --- flox/core.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/flox/core.py b/flox/core.py index 21cbd9149..882a9850c 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1620,11 +1620,10 @@ def _convert_expected_groups_to_index( reveal_type(ex) if sort: reveal_type(ex) - ex = ex.sort_values() + out.append(ex.sort_values()) + else: reveal_type(ex) - reveal_type(ex) - - out.append(ex) + out.append(ex) elif ex is not None: if isbin_: From 5122703b40a76978b8653bd58ef043c82f3a0296 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 24 Jun 2023 09:00:07 +0200 Subject: [PATCH 82/96] IF redefining ex, mypy always takes the first definition of ex. even if it has been narrowed down. --- flox/core.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/flox/core.py b/flox/core.py index 882a9850c..4e3e1a3b3 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1615,16 +1615,11 @@ def _convert_expected_groups_to_index( ) -> T_ExpectIndexOptTuple: out: list[T_ExpectIndexOpt] = [] for ex, isbin_ in zip(expected_groups, isbin): - reveal_type(ex) if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): - reveal_type(ex) if sort: - reveal_type(ex) out.append(ex.sort_values()) else: - reveal_type(ex) out.append(ex) - elif ex is not None: if isbin_: out.append(pd.IntervalIndex.from_breaks(ex)) # type: ignore [arg-type] # TODO: what do we want here? From 12c91e19e08a516f3d179a0f7d7039e29d2e36de Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 27 Jun 2023 07:17:45 +0200 Subject: [PATCH 83/96] test min_count=0 --- flox/aggregations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/aggregations.py b/flox/aggregations.py index 11438410b..1c5071fc2 100644 --- a/flox/aggregations.py +++ b/flox/aggregations.py @@ -207,7 +207,7 @@ def __init__( # The following are set by _initialize_aggregation self.finalize_kwargs: dict[Any, Any] = {} - self.min_count: int | None = None + self.min_count: int = 0 def _normalize_dtype_fill_value(self, value, name): value = _atleast_1d(value) From b0f415492ed32917814a46311951b05e06ad2be5 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 27 Jun 2023 07:34:28 +0200 Subject: [PATCH 84/96] test min_count=0 --- flox/aggregations.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/flox/aggregations.py b/flox/aggregations.py index 1c5071fc2..e5013032a 100644 --- a/flox/aggregations.py +++ b/flox/aggregations.py @@ -504,7 +504,7 @@ def _initialize_aggregation( dtype, array_dtype, fill_value, - min_count: int | None, + min_count: int, finalize_kwargs: dict[Any, Any] | None, ) -> Aggregation: if not isinstance(func, Aggregation): @@ -559,9 +559,6 @@ def _initialize_aggregation( assert isinstance(finalize_kwargs, dict) agg.finalize_kwargs = finalize_kwargs - if min_count is None: - min_count = 0 - # This is needed for the dask pathway. # Because we use intermediate fill_value since a group could be # absent in one block, but present in another block From 1e11f8f49f519c42836214e2e53dacb9cca275e9 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 27 Jun 2023 07:49:46 +0200 Subject: [PATCH 85/96] test min_count=0 --- flox/xarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index acd3f2d6c..14e1fda7e 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -75,7 +75,7 @@ def xarray_reduce( engine: str = "numpy", keep_attrs: bool | None = True, skipna: bool | None = None, - min_count: int | None = None, + min_count: int = 0, reindex: bool | None = None, **finalize_kwargs, ): @@ -151,7 +151,7 @@ def xarray_reduce( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). - min_count : int, default: None + min_count : int: 0 The required number of valid values to perform the operation. If fewer than min_count non-NA values are present the result will be NA. Only used if skipna is set to True or defaults to True for the From 173facd3229813133efc85cd27f6bb3712efb497 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 27 Jun 2023 08:02:43 +0200 Subject: [PATCH 86/96] test min_count=0 --- flox/core.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/flox/core.py b/flox/core.py index 4e3e1a3b3..554680a9f 100644 --- a/flox/core.py +++ b/flox/core.py @@ -862,7 +862,7 @@ def _finalize_results( """ squeezed = _squeeze_results(results, axis) - min_count = agg.min_count if agg.min_count is not None else 0 + min_count = agg.min_count if min_count > 0: counts = squeezed["intermediates"][-1] squeezed["intermediates"] = squeezed["intermediates"][:-1] @@ -1753,7 +1753,7 @@ def groupby_reduce( axis: T_AxesOpt = None, fill_value=None, dtype: np.typing.DTypeLike = None, - min_count: int | None = None, + min_count: int = 0, method: T_Method = "map-reduce", engine: T_Engine = "numpy", reindex: bool | None = None, @@ -1787,7 +1787,7 @@ def groupby_reduce( Value to assign when a label in ``expected_groups`` is not present. dtype : data-type , optional DType for the output. Can be anything that is accepted by ``np.dtype``. - min_count : int, default: None + min_count : int, default: 0 The required number of valid values to perform the operation. If fewer than min_count non-NA values are present the result will be NA. Only used if skipna is set to True or defaults to True for the @@ -1970,17 +1970,11 @@ def groupby_reduce( # fill_value applies to all-NaN groups as well as labels in expected_groups that are not found. # The only way to do this consistently is mask out using min_count # Consider np.sum([np.nan]) = np.nan, np.nansum([np.nan]) = 0 - if min_count is None: - if nax < by_.ndim or fill_value is not None: - min_count = 1 + if nax < by_.ndim or fill_value is not None: + min_count = 1 # TODO: set in xarray? - if ( - min_count is not None - and min_count > 0 - and func in ["nansum", "nanprod"] - and fill_value is None - ): + if min_count > 0 and func in ["nansum", "nanprod"] and fill_value is None: # nansum, nanprod have fill_value=0, 1 # overwrite than when min_count is set fill_value = np.nan From f11542b109fc0993ec3422dae28e899b89b71b23 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 27 Jun 2023 08:13:52 +0200 Subject: [PATCH 87/96] test min_count = 0 --- flox/core.py | 12 ++++++++---- flox/xarray.py | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/flox/core.py b/flox/core.py index 554680a9f..c4f5f4606 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1753,7 +1753,7 @@ def groupby_reduce( axis: T_AxesOpt = None, fill_value=None, dtype: np.typing.DTypeLike = None, - min_count: int = 0, + min_count: int | None = 0, method: T_Method = "map-reduce", engine: T_Engine = "numpy", reindex: bool | None = None, @@ -1970,17 +1970,21 @@ def groupby_reduce( # fill_value applies to all-NaN groups as well as labels in expected_groups that are not found. # The only way to do this consistently is mask out using min_count # Consider np.sum([np.nan]) = np.nan, np.nansum([np.nan]) = 0 + if min_count is None: + min_count_ = 0 + else: + min_count_ = min_count if nax < by_.ndim or fill_value is not None: - min_count = 1 + min_count_ = 1 # TODO: set in xarray? - if min_count > 0 and func in ["nansum", "nanprod"] and fill_value is None: + if min_count_ > 0 and func in ["nansum", "nanprod"] and fill_value is None: # nansum, nanprod have fill_value=0, 1 # overwrite than when min_count is set fill_value = np.nan kwargs = dict(axis=axis_, fill_value=fill_value, engine=engine) - agg = _initialize_aggregation(func, dtype, array.dtype, fill_value, min_count, finalize_kwargs) + agg = _initialize_aggregation(func, dtype, array.dtype, fill_value, min_count_, finalize_kwargs) groups: tuple[np.ndarray | DaskArray, ...] if not has_dask: diff --git a/flox/xarray.py b/flox/xarray.py index 14e1fda7e..e0736e638 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -75,7 +75,7 @@ def xarray_reduce( engine: str = "numpy", keep_attrs: bool | None = True, skipna: bool | None = None, - min_count: int = 0, + min_count: int | None = 0, reindex: bool | None = None, **finalize_kwargs, ): @@ -426,7 +426,7 @@ def wrapper(array, *by, func, skipna, core_dims, **kwargs): "sort": sort, "fill_value": fill_value, "method": method, - "min_count": min_count, + "min_count": min_count if min_count is not None else 0, "skipna": skipna, "engine": engine, "reindex": reindex, From dff70e6c8c56d2668be20f3765977b887c100a12 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 27 Jun 2023 19:47:01 +0200 Subject: [PATCH 88/96] test min_count=0 --- flox/core.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/flox/core.py b/flox/core.py index c4f5f4606..54235964c 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1971,11 +1971,10 @@ def groupby_reduce( # The only way to do this consistently is mask out using min_count # Consider np.sum([np.nan]) = np.nan, np.nansum([np.nan]) = 0 if min_count is None: - min_count_ = 0 - else: - min_count_ = min_count - if nax < by_.ndim or fill_value is not None: - min_count_ = 1 + if nax < by_.ndim or fill_value is not None: + min_count_: int = 1 + else: + min_count_ = 0 # TODO: set in xarray? if min_count_ > 0 and func in ["nansum", "nanprod"] and fill_value is None: From 20c026945c25916880ada13f141285fe0dc6a2a8 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 27 Jun 2023 19:49:43 +0200 Subject: [PATCH 89/96] test min_count=0 --- flox/core.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/flox/core.py b/flox/core.py index 54235964c..bcffa55b6 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1975,6 +1975,8 @@ def groupby_reduce( min_count_: int = 1 else: min_count_ = 0 + else: + min_count_ = 0 # TODO: set in xarray? if min_count_ > 0 and func in ["nansum", "nanprod"] and fill_value is None: From 7c0720b02cca95d22d9f39c9a5af2d7063e4ede6 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 27 Jun 2023 19:54:51 +0200 Subject: [PATCH 90/96] test min_count=0 --- flox/core.py | 2 +- flox/xarray.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flox/core.py b/flox/core.py index bcffa55b6..2ddbf0ad3 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1753,7 +1753,7 @@ def groupby_reduce( axis: T_AxesOpt = None, fill_value=None, dtype: np.typing.DTypeLike = None, - min_count: int | None = 0, + min_count: int | None = None, method: T_Method = "map-reduce", engine: T_Engine = "numpy", reindex: bool | None = None, diff --git a/flox/xarray.py b/flox/xarray.py index e0736e638..9b7ca9834 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -75,7 +75,7 @@ def xarray_reduce( engine: str = "numpy", keep_attrs: bool | None = True, skipna: bool | None = None, - min_count: int | None = 0, + min_count: int | None = None, reindex: bool | None = None, **finalize_kwargs, ): From 29890809438e4dbd362f208af597031247704b61 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 27 Jun 2023 19:59:47 +0200 Subject: [PATCH 91/96] test min_count=0 --- flox/xarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 9b7ca9834..acd3f2d6c 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -151,7 +151,7 @@ def xarray_reduce( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). - min_count : int: 0 + min_count : int, default: None The required number of valid values to perform the operation. If fewer than min_count non-NA values are present the result will be NA. Only used if skipna is set to True or defaults to True for the @@ -426,7 +426,7 @@ def wrapper(array, *by, func, skipna, core_dims, **kwargs): "sort": sort, "fill_value": fill_value, "method": method, - "min_count": min_count if min_count is not None else 0, + "min_count": min_count, "skipna": skipna, "engine": engine, "reindex": reindex, From 0573a9ae1c3c180ec0dd301dbfe4efd14fce261e Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 27 Jun 2023 20:12:52 +0200 Subject: [PATCH 92/96] test min_count=0 --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 2ddbf0ad3..6387ae83f 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1976,7 +1976,7 @@ def groupby_reduce( else: min_count_ = 0 else: - min_count_ = 0 + min_count_ = min_count # TODO: set in xarray? if min_count_ > 0 and func in ["nansum", "nanprod"] and fill_value is None: From 45a9bcf7e4de21f8418d483c2bdb918d3526cc08 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 27 Jun 2023 20:19:20 +0200 Subject: [PATCH 93/96] test min_count=0 --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 6387ae83f..d0854a42e 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1787,7 +1787,7 @@ def groupby_reduce( Value to assign when a label in ``expected_groups`` is not present. dtype : data-type , optional DType for the output. Can be anything that is accepted by ``np.dtype``. - min_count : int, default: 0 + min_count : int, default: None The required number of valid values to perform the operation. If fewer than min_count non-NA values are present the result will be NA. Only used if skipna is set to True or defaults to True for the From 90bb6dccf837b8be75ac45d6dc3bb086358d4031 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 27 Jun 2023 20:34:57 +0200 Subject: [PATCH 94/96] test min_count=0 --- asv_bench/benchmarks/combine.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/asv_bench/benchmarks/combine.py b/asv_bench/benchmarks/combine.py index 286746c75..d6d7cda6e 100644 --- a/asv_bench/benchmarks/combine.py +++ b/asv_bench/benchmarks/combine.py @@ -43,8 +43,8 @@ class Combine1d(Combine): this is for reducting along a single dimension """ - def setup(self, *args, **kwargs): - def construct_member(groups): + def setup(self, *args, **kwargs) -> None: + def construct_member(groups) -> dict[str, Any]: return { "groups": groups, "intermediates": [ @@ -69,7 +69,7 @@ def construct_member(groups): ] self.kwargs = { "agg": flox.aggregations._initialize_aggregation( - "sum", "float64", np.float64, 0, None, {} + "sum", "float64", np.float64, 0, 0, {} ), "axis": (3,), } From 0380f97dea38fa870e61e7acd758ff91aaa58634 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 27 Jun 2023 20:36:18 +0200 Subject: [PATCH 95/96] test min_count=0 --- asv_bench/benchmarks/combine.py | 1 + 1 file changed, 1 insertion(+) diff --git a/asv_bench/benchmarks/combine.py b/asv_bench/benchmarks/combine.py index d6d7cda6e..27600685f 100644 --- a/asv_bench/benchmarks/combine.py +++ b/asv_bench/benchmarks/combine.py @@ -1,4 +1,5 @@ from functools import partial +from typing import Any import numpy as np From 8a6d04a32962317748ad9f24fd1809d1ead17c53 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 3 Jul 2023 09:46:09 -0600 Subject: [PATCH 96/96] Update asv_bench/benchmarks/combine.py --- asv_bench/benchmarks/combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/combine.py b/asv_bench/benchmarks/combine.py index 27600685f..dd3d7a178 100644 --- a/asv_bench/benchmarks/combine.py +++ b/asv_bench/benchmarks/combine.py @@ -70,7 +70,7 @@ def construct_member(groups) -> dict[str, Any]: ] self.kwargs = { "agg": flox.aggregations._initialize_aggregation( - "sum", "float64", np.float64, 0, 0, {} + "sum", "float64", np.float64, 0, None, {} ), "axis": (3,), }