Files

135 lines
3.1 KiB
Nix

{
lib,
buildPythonPackage,
fetchFromGitHub,
# build-system
hatch-vcs,
hatchling,
# dependencies
dask-glm,
distributed,
multipledispatch,
numba,
numpy,
packaging,
pandas,
scikit-learn,
scipy,
dask,
# tests
pytest-mock,
pytestCheckHook,
}:
buildPythonPackage rec {
pname = "dask-ml";
version = "2025.1.0";
pyproject = true;
src = fetchFromGitHub {
owner = "dask";
repo = "dask-ml";
tag = "v${version}";
hash = "sha256-DHxx0LFuJmGWYuG/WGHj+a5XHAEekBmlHUUb90rl2IY=";
};
postPatch = ''
substituteInPlace pyproject.toml \
--replace-fail 'addopts = "-rsx -v --durations=10 --color=yes"' \
'addopts = ["-rsx", "-v", "--durations=10", "--color=yes"]'
'';
build-system = [
hatch-vcs
hatchling
];
dependencies = [
dask-glm
distributed
multipledispatch
numba
numpy
packaging
pandas
scikit-learn
scipy
]
++ dask.optional-dependencies.array
++ dask.optional-dependencies.dataframe;
pythonImportsCheck = [
"dask_ml"
"dask_ml.naive_bayes"
"dask_ml.wrappers"
"dask_ml.utils"
];
nativeCheckInputs = [
pytest-mock
pytestCheckHook
];
disabledTestPaths = [
# RuntimeError: Attempting to use an asynchronous Client in a synchronous context of `dask.compute`
# https://github.com/dask/dask-ml/issues/1016
"tests/model_selection/test_hyperband.py"
"tests/model_selection/test_incremental.py"
"tests/model_selection/test_incremental_warns.py"
"tests/model_selection/test_successive_halving.py"
# MockClassifier predates sklearn 1.6 __sklearn_tags__
"tests/model_selection/dask_searchcv/test_model_selection.py"
"tests/model_selection/dask_searchcv/test_model_selection_sklearn.py"
];
disabledTests = [
# AssertionError: Regex pattern did not match.
"test_unknown_category_transform_array"
# ValueError: cannot broadcast shape (nan,) to shape (nan,)
# https://github.com/dask/dask-ml/issues/1012
"test_fit_array"
"test_fit_frame"
"test_fit_transform_frame"
"test_laziness"
"test_lr_score"
"test_ok"
"test_scoring_string"
# sklearn 1.7+: PCA / IncrementalPCA missing power_iteration_normalizer
"test_pca_randomized_solver"
"test_pca_check_projection"
"test_pca_inverse"
"test_pca_score"
"test_pca_score2"
"test_randomized_pca_check_projection"
"test_randomized_pca_inverse"
"test_pca_int_dtype_upcast_to_double"
"test_incremental_pca"
"test_incremental_pca_against_pca_iris"
"test_incremental_pca_against_pca_random_data"
"test_singular_values"
"test_whitening"
# sklearn 1.8 API drift in LogisticRegression/ParallelPostFit
"test_predict"
"test_multiclass"
# XPASS becomes failure under xfail_strict (now honored on pytest 9)
"test_soft_voting_frame"
];
__darwinAllowLocalNetworking = true;
meta = {
description = "Scalable Machine Learn with Dask";
homepage = "https://github.com/dask/dask-ml";
license = lib.licenses.bsd3;
maintainers = with lib.maintainers; [ GaetanLepage ];
};
}