Skip to content

Commit 3a94b39

Browse files
committed
Incorporated new functions, have to add tests
1 parent 2412106 commit 3a94b39

File tree

2 files changed

+159
-6
lines changed

2 files changed

+159
-6
lines changed

src/blosc2/lazyexpr.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,9 @@ def ne_evaluate(expression, local_dict=None, **kwargs):
206206
"hypot",
207207
"maximum",
208208
"minimum",
209+
"isin",
210+
"startswith",
211+
"endswith",
209212
)
210213

211214

@@ -2567,12 +2570,11 @@ def result_type(
25672570
# Follow NumPy rules for scalar-array operations
25682571
# Create small arrays with the same dtypes and let NumPy's type promotion determine the result type
25692572
arrs = [
2570-
value
2573+
(np.array(value).dtype if isinstance(value, str) else value)
25712574
if (np.isscalar(value) or not hasattr(value, "dtype"))
25722575
else np.array([0], dtype=_convert_dtype(value.dtype))
25732576
for value in arrays_and_dtypes
25742577
]
2575-
arrs = [np.array(a).dtype if isinstance(a, str) else a for a in arrs]
25762578
return np.result_type(*arrs)
25772579

25782580

src/blosc2/ndarray.py

Lines changed: 155 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,15 @@ def is_inside_new_expr() -> bool:
159159
return builtins.any(frame_info.function in {"_new_expr", "_open_lazyarray"} for frame_info in stack)
160160

161161

162+
def is_inside_ne_evaluate() -> bool:
163+
"""
164+
Whether the current code is being executed from an ne_evaluate call
165+
"""
166+
# Get the current call stack
167+
stack = inspect.stack()
168+
return builtins.any(frame_info.function in {"ne_evaluate"} for frame_info in stack)
169+
170+
162171
def make_key_hashable(key):
163172
if isinstance(key, slice):
164173
return (key.start, key.stop, key.step)
@@ -3014,10 +3023,11 @@ def clip(
30143023

30153024
def chunkwise_clip(inputs, output, offset):
30163025
x, min, max = inputs
3017-
output[:] = np.clip(x, min, max)
3026+
output[...] = np.clip(x, min, max)
30183027

30193028
dtype = blosc2.result_type(x)
3020-
return blosc2.lazyudf(chunkwise_clip, (x, min, max), dtype=dtype, shape=x.shape, **kwargs)
3029+
shape = () if np.isscalar(x) else None
3030+
return blosc2.lazyudf(chunkwise_clip, (x, min, max), dtype=dtype, shape=shape, **kwargs)
30213031

30223032

30233033
def logaddexp(x1: int | float | blosc2.Array, x2: int | float | blosc2.Array, **kwargs: Any) -> NDArray:
@@ -3047,15 +3057,16 @@ def logaddexp(x1: int | float | blosc2.Array, x2: int | float | blosc2.Array, **
30473057

30483058
def chunkwise_logaddexp(inputs, output, offset):
30493059
x1, x2 = inputs
3050-
output[:] = np.logaddexp(x1, x2)
3060+
output[...] = np.logaddexp(x1, x2)
30513061

30523062
dtype = blosc2.result_type(x1, x2)
30533063
if dtype == blosc2.bool_:
30543064
raise TypeError("logaddexp doesn't accept boolean arguments.")
30553065

30563066
if np.issubdtype(dtype, np.integer):
30573067
dtype = blosc2.float32
3058-
return blosc2.lazyudf(chunkwise_logaddexp, (x1, x2), dtype=dtype, shape=x1.shape, **kwargs)
3068+
shape = () if np.isscalar(x1) and np.isscalar(x2) else None
3069+
return blosc2.lazyudf(chunkwise_logaddexp, (x1, x2), dtype=dtype, shape=shape, **kwargs)
30593070

30603071

30613072
# implemented in python-blosc2
@@ -4825,6 +4836,146 @@ def where(
48254836
return condition.where(x, y)
48264837

48274838

4839+
def isin(
4840+
element: str | int | float | blosc2.Array,
4841+
test_elements: str | int | float | blosc2.Array,
4842+
assume_unique=False,
4843+
invert=False,
4844+
kind=None,
4845+
**kwargs,
4846+
) -> NDArray:
4847+
"""
4848+
Copy-pasted from numpy documentation: https://numpy.org/doc/stable/reference/generated/numpy.isin.html
4849+
Calculates element in test_elements, broadcasting over element only. Returns a boolean array of the same shape as element that is True where an element of element is in test_elements and False otherwise.
4850+
4851+
Parameters
4852+
----------
4853+
element : blosc2.Array
4854+
Input array.
4855+
4856+
test_elements : blosc2.Array
4857+
The values against which to test each value of element. This argument is flattened if it is an array or array_like.
4858+
4859+
assume_unique: bool, optional
4860+
If True, the input arrays are both assumed to be unique, which can speed up the calculation. Default is False.
4861+
4862+
invert: bool, optional
4863+
If True, the values in the returned array are inverted, as if calculating element not in test_elements. Default is False. np.isin(a, b, invert=True) is equivalent to (but faster than) np.invert(np.isin(a, b)).
4864+
4865+
kind: {None, 'sort', 'table'}, optional
4866+
The algorithm to use. This will not affect the final result, but will affect the speed and memory use. The default, None, will select automatically based on memory considerations.
4867+
If 'sort', will use a mergesort-based approach. This will have a memory usage of roughly 6 times the sum of the sizes of element and test_elements, not accounting for size of dtypes.
4868+
If 'table', will use a lookup table approach similar to a counting sort. This is only available for boolean and integer arrays. This will have a memory usage of the size of element plus the max-min value of test_elements. assume_unique has no effect when the 'table' option is used.
4869+
If None, will automatically choose 'table' if the required memory allocation is less than or equal to 6 times the sum of the sizes of element and test_elements, otherwise will use 'sort'. This is done to not use a large amount of memory by default, even though 'table' may be faster in most cases. If 'table' is chosen, assume_unique will have no effect.
4870+
4871+
kwargs: Any
4872+
kwargs accepted by the :func:`empty` constructor
4873+
4874+
Returns
4875+
-------
4876+
isin: blosc2.Array, bool
4877+
Has the same shape as element. The values element[isin] are in test_elements.
4878+
4879+
"""
4880+
4881+
def chunkwise_isin(inputs, output, offset):
4882+
x1, x2 = inputs
4883+
# output[...] = np.isin(x1, x2, assume_unique=assume_unique, invert=invert, kind=kind)
4884+
output[...] = np.isin(x1, x2)
4885+
4886+
if is_inside_ne_evaluate(): # haven't been able to use miniexpr
4887+
shape = () if np.isscalar(element) else None
4888+
return blosc2.lazyudf(chunkwise_isin, (element, test_elements), dtype=blosc2.bool_, shape=shape)
4889+
4890+
return blosc2.LazyExpr(new_op=(element, "isin", test_elements))
4891+
4892+
4893+
def startswith(
4894+
a: str | blosc2.Array, prefix: str | blosc2.Array
4895+
) -> NDArray: # start: int = 0, end: int | None = None, **kwargs)
4896+
"""
4897+
Copy-pasted from numpy documentation: https://numpy.org/doc/stable/reference/generated/numpy.char.startswith.html
4898+
Returns a boolean array which is True where the string element in a starts with prefix, otherwise False.
4899+
4900+
Parameters
4901+
----------
4902+
a : blosc2.Array
4903+
Input array of bytes_ or str_ dtype
4904+
4905+
prefix : blosc2.Array
4906+
Prefix array of bytes_ or str_ dtype
4907+
4908+
start: int | blosc2.Array
4909+
With start, test beginning at that position.
4910+
4911+
end: int | blosc2.Array
4912+
With end, stop comparing at that position.
4913+
4914+
kwargs: Any
4915+
kwargs accepted by the :func:`empty` constructor
4916+
4917+
Returns
4918+
-------
4919+
out: blosc2.Array, bool
4920+
Has the same shape as element.
4921+
4922+
"""
4923+
4924+
def chunkwise_startswith(inputs, output, offset):
4925+
x1, x2 = inputs
4926+
# output[...] = np.char.startswith(x1, x2, start=start, end=end)
4927+
output[...] = np.char.startswith(x1, x2)
4928+
4929+
if is_inside_ne_evaluate(): # haven't been able to use miniexpr
4930+
shape = () if np.isscalar(a) else None
4931+
return blosc2.lazyudf(chunkwise_startswith, (a, prefix), dtype=blosc2.bool_, shape=shape)
4932+
4933+
return blosc2.LazyExpr(new_op=(a, "startswith", prefix))
4934+
4935+
4936+
def endswith(
4937+
a: str | blosc2.Array, suffix: str | blosc2.Array
4938+
) -> NDArray: # start: int = 0, end: int | None = None, **kwargs) -> NDArray:
4939+
"""
4940+
Copy-pasted from numpy documentation: https://numpy.org/doc/stable/reference/generated/numpy.char.endswith.html
4941+
Returns a boolean array which is True where the string element in a ends with suffix, otherwise False.
4942+
4943+
Parameters
4944+
----------
4945+
a : blosc2.Array
4946+
Input array of bytes_ or str_ dtype
4947+
4948+
suffix : blosc2.Array
4949+
suffix array of bytes_ or str_ dtype
4950+
4951+
start: int | blosc2.Array
4952+
With start, test beginning at that position.
4953+
4954+
end: int | blosc2.Array
4955+
With end, stop comparing at that position.
4956+
4957+
kwargs: Any
4958+
kwargs accepted by the :func:`empty` constructor
4959+
4960+
Returns
4961+
-------
4962+
out: blosc2.Array, bool
4963+
Has the same shape as element.
4964+
4965+
"""
4966+
4967+
def chunkwise_endswith(inputs, output, offset):
4968+
x1, x2 = inputs
4969+
# output[...] = np.char.endswith(x1, x2, start=start, end=end)
4970+
output[...] = np.char.endswith(x1, x2)
4971+
4972+
if is_inside_ne_evaluate(): # haven't been able to use miniexpr
4973+
shape = () if np.isscalar(a) else None
4974+
return blosc2.lazyudf(chunkwise_endswith, (a, suffix), dtype=blosc2.bool_, shape=shape)
4975+
4976+
return blosc2.LazyExpr(new_op=(a, "endswith", suffix))
4977+
4978+
48284979
def lazywhere(value1=None, value2=None):
48294980
"""Decorator to apply a where condition to a LazyExpr."""
48304981

0 commit comments

Comments
 (0)