Speed up categorical regressor with numba #3353

Intron7 · 2024-11-11T14:06:20Z

Use numba to create the regressor for categorical regression

codecov · 2024-11-11T14:21:13Z

❌ 7 Tests Failed:

Tests completed	Failed	Passed	Skipped
2140	7	2133	96

View the top 3 failed test(s) by shortest run time

tests/test_preprocessing.py::test_regress_out_constants

Stack Traces | 0.013s run time

#x1B[0m#x1B[94mdef#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[92mtest_regress_out_constants#x1B[39;49;00m():#x1B[90m#x1B[39;49;00m
        adata = AnnData(np.hstack((np.full((#x1B[94m10#x1B[39;49;00m, #x1B[94m1#x1B[39;49;00m), #x1B[94m0.0#x1B[39;49;00m), np.full((#x1B[94m10#x1B[39;49;00m, #x1B[94m1#x1B[39;49;00m), #x1B[94m1.0#x1B[39;49;00m))))#x1B[90m#x1B[39;49;00m
        adata.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mpercent_mito#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m] = np.random.rand(adata.X.shape[#x1B[94m0#x1B[39;49;00m])#x1B[90m#x1B[39;49;00m
        adata.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mn_counts#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m] = adata.X.sum(axis=#x1B[94m1#x1B[39;49;00m)#x1B[90m#x1B[39;49;00m
        adata_copy = adata.copy()#x1B[90m#x1B[39;49;00m
    #x1B[90m#x1B[39;49;00m
>       sc.pp.regress_out(adata, keys=[#x1B[33m"#x1B[39;49;00m#x1B[33mn_counts#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m, #x1B[33m"#x1B[39;49;00m#x1B[33mpercent_mito#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m])#x1B[90m#x1B[39;49;00m

#x1B[1m#x1B[31mtests/test_preprocessing.py#x1B[0m:436: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../site-packages/legacy_api_wrap/__init__.py#x1B[0m:82: in fn_compatible
    #x1B[0m#x1B[94mreturn#x1B[39;49;00m fn(*args_all, **kw)#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../scanpy/preprocessing/_simple.py#x1B[0m:793: in regress_out
    #x1B[0mres = Parallel(n_jobs=n_jobs)(#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13....../site-packages/joblib/parallel.py#x1B[0m:1985: in __call__
    #x1B[0m#x1B[94mreturn#x1B[39;49;00m output #x1B[94mif#x1B[39;49;00m #x1B[96mself#x1B[39;49;00m.return_generator #x1B[94melse#x1B[39;49;00m #x1B[96mlist#x1B[39;49;00m(output)#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13....../site-packages/joblib/parallel.py#x1B[0m:1913: in _get_sequential_output
    #x1B[0mres = func(*args, **kwargs)#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../scanpy/preprocessing/_simple.py#x1B[0m:815: in _regress_out_chunk
    #x1B[0m#x1B[94mimport#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mstatsmodels#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96mapi#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mas#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96msm#x1B[39;49;00m#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../site-packages/statsmodels/api.py#x1B[0m:76: in <module>
    #x1B[0m#x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m datasets, distributions, iolib, regression, robust, tools#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../statsmodels/distributions/__init__.py#x1B[0m:7: in <module>
    #x1B[0m#x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96mdiscrete#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m (#x1B[90m#x1B[39;49;00m
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

    #x1B[0m#x1B[94mimport#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mnumpy#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mas#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mnp#x1B[39;49;00m#x1B[90m#x1B[39;49;00m
    #x1B[90m#x1B[39;49;00m
    #x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mscipy#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96mstats#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m rv_discrete, poisson, nbinom#x1B[90m#x1B[39;49;00m
    #x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mscipy#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96mspecial#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m gammaln#x1B[90m#x1B[39;49;00m
>   #x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mscipy#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96m_lib#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96m_util#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m _lazywhere#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[31mE   ImportError: cannot import name '_lazywhere' from 'scipy._lib._util' (.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../scipy/_lib/_util.py)#x1B[0m

#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../statsmodels/distributions/discrete.py#x1B[0m:5: ImportError

tests/test_preprocessing.py::test_regress_out_int[float64]

Stack Traces | 0.104s run time

dtype = dtype('float64')

    #x1B[0m#x1B[37m@pytest#x1B[39;49;00m.mark.parametrize(#x1B[33m"#x1B[39;49;00m#x1B[33mdtype#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m, [np.uint32, np.float64, np.uint64])#x1B[90m#x1B[39;49;00m
    #x1B[94mdef#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[92mtest_regress_out_int#x1B[39;49;00m(dtype):#x1B[90m#x1B[39;49;00m
        adata = pbmc3k()[:#x1B[94m200#x1B[39;49;00m, :#x1B[94m200#x1B[39;49;00m].copy()#x1B[90m#x1B[39;49;00m
        adata.X = adata.X.astype(np.float64 #x1B[94mif#x1B[39;49;00m dtype != np.uint32 #x1B[94melse#x1B[39;49;00m np.float32)#x1B[90m#x1B[39;49;00m
        dtype = adata.X.dtype#x1B[90m#x1B[39;49;00m
        adata.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mlabels#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m] = pd.Categorical(#x1B[90m#x1B[39;49;00m
            ([#x1B[33m"#x1B[39;49;00m#x1B[33mA#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m] * (adata.X.shape[#x1B[94m0#x1B[39;49;00m] - #x1B[94m100#x1B[39;49;00m)) + ([#x1B[33m"#x1B[39;49;00m#x1B[33mB#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m] * #x1B[94m100#x1B[39;49;00m)#x1B[90m#x1B[39;49;00m
        )#x1B[90m#x1B[39;49;00m
        adata_other = adata.copy()#x1B[90m#x1B[39;49;00m
        adata_other.X = adata_other.X.astype(dtype)#x1B[90m#x1B[39;49;00m
        #x1B[90m# results using only one processor#x1B[39;49;00m#x1B[90m#x1B[39;49;00m
>       sc.pp.regress_out(adata, keys=[#x1B[33m"#x1B[39;49;00m#x1B[33mlabels#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m])#x1B[90m#x1B[39;49;00m

#x1B[1m#x1B[31mtests/test_preprocessing.py#x1B[0m:366: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../site-packages/legacy_api_wrap/__init__.py#x1B[0m:82: in fn_compatible
    #x1B[0m#x1B[94mreturn#x1B[39;49;00m fn(*args_all, **kw)#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../scanpy/preprocessing/_simple.py#x1B[0m:793: in regress_out
    #x1B[0mres = Parallel(n_jobs=n_jobs)(#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13....../site-packages/joblib/parallel.py#x1B[0m:1985: in __call__
    #x1B[0m#x1B[94mreturn#x1B[39;49;00m output #x1B[94mif#x1B[39;49;00m #x1B[96mself#x1B[39;49;00m.return_generator #x1B[94melse#x1B[39;49;00m #x1B[96mlist#x1B[39;49;00m(output)#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13....../site-packages/joblib/parallel.py#x1B[0m:1913: in _get_sequential_output
    #x1B[0mres = func(*args, **kwargs)#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../scanpy/preprocessing/_simple.py#x1B[0m:815: in _regress_out_chunk
    #x1B[0m#x1B[94mimport#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mstatsmodels#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96mapi#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mas#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96msm#x1B[39;49;00m#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../site-packages/statsmodels/api.py#x1B[0m:76: in <module>
    #x1B[0m#x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m datasets, distributions, iolib, regression, robust, tools#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../statsmodels/distributions/__init__.py#x1B[0m:7: in <module>
    #x1B[0m#x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96mdiscrete#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m (#x1B[90m#x1B[39;49;00m
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

    #x1B[0m#x1B[94mimport#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mnumpy#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mas#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mnp#x1B[39;49;00m#x1B[90m#x1B[39;49;00m
    #x1B[90m#x1B[39;49;00m
    #x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mscipy#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96mstats#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m rv_discrete, poisson, nbinom#x1B[90m#x1B[39;49;00m
    #x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mscipy#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96mspecial#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m gammaln#x1B[90m#x1B[39;49;00m
>   #x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mscipy#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96m_lib#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96m_util#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m _lazywhere#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[31mE   ImportError: cannot import name '_lazywhere' from 'scipy._lib._util' (.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../scipy/_lib/_util.py)#x1B[0m

#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../statsmodels/distributions/discrete.py#x1B[0m:5: ImportError

tests/test_preprocessing.py::test_regress_out_int[uint32]

Stack Traces | 0.108s run time

dtype = dtype('float32')

    #x1B[0m#x1B[37m@pytest#x1B[39;49;00m.mark.parametrize(#x1B[33m"#x1B[39;49;00m#x1B[33mdtype#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m, [np.uint32, np.float64, np.uint64])#x1B[90m#x1B[39;49;00m
    #x1B[94mdef#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[92mtest_regress_out_int#x1B[39;49;00m(dtype):#x1B[90m#x1B[39;49;00m
        adata = pbmc3k()[:#x1B[94m200#x1B[39;49;00m, :#x1B[94m200#x1B[39;49;00m].copy()#x1B[90m#x1B[39;49;00m
        adata.X = adata.X.astype(np.float64 #x1B[94mif#x1B[39;49;00m dtype != np.uint32 #x1B[94melse#x1B[39;49;00m np.float32)#x1B[90m#x1B[39;49;00m
        dtype = adata.X.dtype#x1B[90m#x1B[39;49;00m
        adata.obs[#x1B[33m"#x1B[39;49;00m#x1B[33mlabels#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m] = pd.Categorical(#x1B[90m#x1B[39;49;00m
            ([#x1B[33m"#x1B[39;49;00m#x1B[33mA#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m] * (adata.X.shape[#x1B[94m0#x1B[39;49;00m] - #x1B[94m100#x1B[39;49;00m)) + ([#x1B[33m"#x1B[39;49;00m#x1B[33mB#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m] * #x1B[94m100#x1B[39;49;00m)#x1B[90m#x1B[39;49;00m
        )#x1B[90m#x1B[39;49;00m
        adata_other = adata.copy()#x1B[90m#x1B[39;49;00m
        adata_other.X = adata_other.X.astype(dtype)#x1B[90m#x1B[39;49;00m
        #x1B[90m# results using only one processor#x1B[39;49;00m#x1B[90m#x1B[39;49;00m
>       sc.pp.regress_out(adata, keys=[#x1B[33m"#x1B[39;49;00m#x1B[33mlabels#x1B[39;49;00m#x1B[33m"#x1B[39;49;00m])#x1B[90m#x1B[39;49;00m

#x1B[1m#x1B[31mtests/test_preprocessing.py#x1B[0m:366: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../site-packages/legacy_api_wrap/__init__.py#x1B[0m:82: in fn_compatible
    #x1B[0m#x1B[94mreturn#x1B[39;49;00m fn(*args_all, **kw)#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../scanpy/preprocessing/_simple.py#x1B[0m:793: in regress_out
    #x1B[0mres = Parallel(n_jobs=n_jobs)(#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13....../site-packages/joblib/parallel.py#x1B[0m:1985: in __call__
    #x1B[0m#x1B[94mreturn#x1B[39;49;00m output #x1B[94mif#x1B[39;49;00m #x1B[96mself#x1B[39;49;00m.return_generator #x1B[94melse#x1B[39;49;00m #x1B[96mlist#x1B[39;49;00m(output)#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13....../site-packages/joblib/parallel.py#x1B[0m:1913: in _get_sequential_output
    #x1B[0mres = func(*args, **kwargs)#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../scanpy/preprocessing/_simple.py#x1B[0m:815: in _regress_out_chunk
    #x1B[0m#x1B[94mimport#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mstatsmodels#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96mapi#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mas#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96msm#x1B[39;49;00m#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../site-packages/statsmodels/api.py#x1B[0m:76: in <module>
    #x1B[0m#x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m datasets, distributions, iolib, regression, robust, tools#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../statsmodels/distributions/__init__.py#x1B[0m:7: in <module>
    #x1B[0m#x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96mdiscrete#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m (#x1B[90m#x1B[39;49;00m
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

    #x1B[0m#x1B[94mimport#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mnumpy#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mas#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mnp#x1B[39;49;00m#x1B[90m#x1B[39;49;00m
    #x1B[90m#x1B[39;49;00m
    #x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mscipy#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96mstats#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m rv_discrete, poisson, nbinom#x1B[90m#x1B[39;49;00m
    #x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mscipy#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96mspecial#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m gammaln#x1B[90m#x1B[39;49;00m
>   #x1B[94mfrom#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[04m#x1B[96mscipy#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96m_lib#x1B[39;49;00m#x1B[04m#x1B[96m.#x1B[39;49;00m#x1B[04m#x1B[96m_util#x1B[39;49;00m#x1B[90m #x1B[39;49;00m#x1B[94mimport#x1B[39;49;00m _lazywhere#x1B[90m#x1B[39;49;00m
#x1B[1m#x1B[31mE   ImportError: cannot import name '_lazywhere' from 'scipy._lib._util' (.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../scipy/_lib/_util.py)#x1B[0m

#x1B[1m#x1B[.../hostedtoolcache/Python/3.13.3.........................../x64/lib/python3.13.../statsmodels/distributions/discrete.py#x1B[0m:5: ImportError

To view more test analytics, go to the Test Analytics Dashboard
_{📋 Got 3 mins? Take this short survey to help us improve Test Analytics.}

tests/test_preprocessing.py

src/scanpy/preprocessing/_simple.py

ilan-gold · 2024-11-11T15:13:59Z

tests/test_preprocessing.py

+    np.testing.assert_array_almost_equal(adata.X, tester)
+
+
+def test_regressor_categorical():


I would

explain why this test exists (to test against a previous implementation? I am impartial whether it's necessary TBH since we are already testing for reproducibility, could see getting rid of this)

refactor the "Create org regressors" into a helper function like create_original

I can see your point here

Do you have an an opinion on the first point? Is this test necessary? If so, perhaps a comment then?

tests/test_preprocessing.py

ilan-gold

I think this is missing: #3353 (comment) and the first part of https://github.com/scverse/scanpy/pull/3353/files#r1836830351

tests/test_preprocessing.py

src/scanpy/preprocessing/_simple.py

ilan-gold · 2024-11-12T13:32:38Z

src/scanpy/preprocessing/_simple.py

@@ -722,13 +737,13 @@ def regress_out(
                "we regress on the mean for each category."
            )
        logg.debug("... regressing on per-gene means within categories")
-        regressors = np.zeros(X.shape, dtype="float32")
+        # Create numpy array's from categorical variable
+        cats = np.int64(len(adata.obs[keys[0]].cat.categories))


Also comment why np.int64

because it has be done because of weird typing from pandas. So this ensures that it works within the kernel

so len doesn’t return a Python int? That’s a pandas bug.

Co-authored-by: Ilan Gold <[email protected]>

tests/test_preprocessing.py

src/scanpy/preprocessing/_simple.py

ilan-gold · 2024-11-12T15:53:37Z

tests/test_preprocessing.py

+    np.testing.assert_array_almost_equal(adata.X, tester)
+
+
+def test_regressor_categorical():


Do you have an an opinion on the first point? Is this test necessary? If so, perhaps a comment then?

src/scanpy/preprocessing/_simple.py

tests/test_preprocessing.py

Intron7 · 2025-02-10T16:17:59Z

I renamed one variable to make is clearer what it is. Added some comments that should add more context what the code is doing.

src/scanpy/preprocessing/_simple.py

tests/test_preprocessing.py

src/scanpy/preprocessing/_simple.py

Co-authored-by: Ilan Gold <[email protected]>

ilan-gold · 2025-02-11T10:38:05Z

src/scanpy/preprocessing/_simple.py

+    X: np.ndarray, number_categories: int, cat_array: np.ndarray
+) -> np.ndarray:
+    # create regressor matrix for categorical variables
+    regressors = np.zeros(X.shape, dtype=X.dtype)


check dtype for behavior with integer dtype i.e., need to ensure this is a floating point matrix

Yes! discoverered a bug! regressors needs to be float32 to match old behavior, not sure why it was hardcoded

ilan-gold

Why no test for the dtype if we're also fixing that bug here? or in #3461?

tests/test_preprocessing.py

src/scanpy/preprocessing/_simple.py

docs/release-notes/3353.performance.md

Co-authored-by: Philipp A. <[email protected]>

Co-authored-by: Ilan Gold <[email protected]>

… create_cat_regressor

Intron7 added 3 commits November 11, 2024 14:35

add function and test

086f70d

add test

37244a9

add test for regressor

b4ecb0a

Intron7 added this to the 1.11.0 milestone Nov 11, 2024

Intron7 and others added 2 commits November 11, 2024 15:54

add release note

36858d9

Merge branch 'main' into create_cat_regressor

be1bccc

Intron7 requested review from flying-sheep and ilan-gold November 11, 2024 14:56

ilan-gold requested changes Nov 11, 2024

View reviewed changes

Intron7 added 2 commits November 11, 2024 16:25

update typing

a1a59ae

update test

7b41bc8

Intron7 requested a review from ilan-gold November 11, 2024 15:36

ilan-gold requested changes Nov 12, 2024

View reviewed changes

Intron7 added 2 commits November 12, 2024 13:45

update test

119a142

update dtype

d77fa9c

ilan-gold requested changes Nov 12, 2024

View reviewed changes

Intron7 and others added 4 commits November 12, 2024 14:44

rename cats

236e356

Update tests/test_preprocessing.py

bb9cde4

Co-authored-by: Ilan Gold <[email protected]>

Update tests/test_preprocessing.py

bbb5035

Co-authored-by: Ilan Gold <[email protected]>

Update tests/test_preprocessing.py

2a92193

Co-authored-by: Ilan Gold <[email protected]>

Intron7 requested a review from ilan-gold November 12, 2024 15:18

ilan-gold requested changes Nov 12, 2024

View reviewed changes

ilan-gold and others added 4 commits November 12, 2024 16:53

Update tests/test_preprocessing.py

c7b78c0

remove test

b001c0e

update kernel

c3ce03e

remove test

c50226a

Intron7 requested a review from ilan-gold November 13, 2024 10:55

flying-sheep requested changes Nov 14, 2024

View reviewed changes

src/scanpy/preprocessing/_simple.py Outdated Show resolved Hide resolved

src/scanpy/preprocessing/_simple.py Outdated Show resolved Hide resolved

tests/test_preprocessing.py Outdated Show resolved Hide resolved

make test together

c6665f4

Intron7 requested a review from ilan-gold February 10, 2025 16:16

ilan-gold reviewed Feb 11, 2025

View reviewed changes

src/scanpy/preprocessing/_simple.py Outdated Show resolved Hide resolved

tests/test_preprocessing.py Show resolved Hide resolved

src/scanpy/preprocessing/_simple.py Outdated Show resolved Hide resolved

Update src/scanpy/preprocessing/_simple.py

104a0f3

Co-authored-by: Ilan Gold <[email protected]>

ilan-gold requested changes Feb 11, 2025

View reviewed changes

Intron7 added 3 commits February 11, 2025 13:11

update dtypes

f9b13be

update atol for test

6eafd04

remove int fix

1dae8f4

Intron7 requested a review from ilan-gold February 13, 2025 14:47

ilan-gold requested changes Feb 13, 2025

View reviewed changes

tests/test_preprocessing.py Show resolved Hide resolved

src/scanpy/preprocessing/_simple.py Outdated Show resolved Hide resolved

flying-sheep reviewed Feb 18, 2025

View reviewed changes

docs/release-notes/3353.performance.md Outdated Show resolved Hide resolved

flying-sheep modified the milestones: 1.12.0, 1.11.1 Feb 18, 2025

flying-sheep added the Area – Performance 🐌 label Feb 18, 2025

flying-sheep changed the title ~~Create cat regressor~~ Speed up categorical regressor with numba Feb 18, 2025

flying-sheep assigned Intron7 Feb 18, 2025

flying-sheep modified the milestones: 1.11.1, 1.11.2 Mar 31, 2025

Intron7 and others added 4 commits April 14, 2025 10:02

Update docs/release-notes/3353.performance.md

39ad1c0

Co-authored-by: Philipp A. <[email protected]>

Merge branch 'main' into create_cat_regressor

4f3db86

Update src/scanpy/preprocessing/_simple.py

2d578c8

Co-authored-by: Ilan Gold <[email protected]>

Fix sparse check

eedb314

flying-sheep assigned ilan-gold May 15, 2025

ilan-gold added 7 commits May 21, 2025 12:36

Merge branch 'main' into create_cat_regressor

6a19bc6

(fix): correct dtype check

0053bce

(fix): regress_out with int tested

75be7e2

Merge branch 'main' into create_cat_regressor

2fdd0bf

(fix): float32 regress data type

2b3f1f1

Merge branch 'create_cat_regressor' of github.com:scverse/scanpy into…

0ab9fee

… create_cat_regressor

(fix): atol/rtol

832981b

		np.testing.assert_array_almost_equal(adata.X, tester)


		def test_regressor_categorical():

Speed up categorical regressor with numba #3353

Are you sure you want to change the base?

Speed up categorical regressor with numba #3353

Uh oh!

Conversation

Intron7 commented Nov 11, 2024

Uh oh!

codecov bot commented Nov 11, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

❌ 7 Tests Failed:

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

ilan-gold left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Intron7 commented Feb 10, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

ilan-gold left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

codecov bot commented Nov 11, 2024 •

edited

Loading