{ lib, config, buildPythonPackage, fetchFromGitHub, # build-system setuptools, # dependencies typing-extensions, # nativeBuildInputs cmake, cudaPackages, # buildInputs dlpack, # tests pytestCheckHook, torch, # passthru cuda-tile, }: let # https://github.com/NVIDIA/cutile-python/blob/v1.4.0/cmake/FetchXLAHeaders.cmake#L5-L6 xla = fetchFromGitHub { owner = "openxla"; repo = "xla"; rev = "b6f37ab7767f428fd6f993de5e211643d47d4deb"; hash = "sha256-U4e3k4nm9gB1x5hahXwycWSryBQuxIPmOzVf6kuahY0="; }; in buildPythonPackage.override { stdenv = cudaPackages.backendStdenv; } (finalAttrs: { pname = "cuda-tile"; version = "1.4.0"; pyproject = true; __structuredAttrs = true; src = fetchFromGitHub { owner = "NVIDIA"; repo = "cutile-python"; tag = "v${finalAttrs.version}"; hash = "sha256-R5V69nJLQ3/1995ezH1/WuueA6cm1vhKZdOECqbwPbU="; }; postPatch = '' substituteInPlace pyproject.toml \ --replace-fail "setuptools==80.10.2" "setuptools" '' # Otherwise fails to find libc # xla_ffi.cpp:(.text+0x308): undefined reference to `__stack_chk_fail' + '' substituteInPlace cext/CMakeLists.txt \ --replace-fail \ "target_link_libraries(_cext_shared _cext_static \''${Python_LIBRARIES} \''${asan_library})" \ "target_link_libraries(_cext_shared _cext_static \''${Python_LIBRARIES} \''${asan_library} c)" '' # Get rid of the vendored broken logic for finding the CUDA toolkit + '' rm cmake/FindCUDAToolkit.cmake '' # Manually inject the library version + '' echo "${finalAttrs.version}" >src/cuda/tile/VERSION ''; build-system = [ setuptools ]; env = { CUDA_TILE_CMAKE_DLPACK_PATH = dlpack; CUDA_TILE_CMAKE_XLA_PATH = xla; }; nativeBuildInputs = [ cmake cudaPackages.cuda_nvcc ]; dontUseCmakeConfigure = true; buildInputs = [ cudaPackages.cuda_cudart # cuda.h ]; dependencies = [ typing-extensions ]; optional-dependencies = { tileiras = [ # unpackaged as it doesn't make sense to have it in nixpkgs # cuda-toolkit ]; }; pythonImportsCheck = [ "cuda.tile" ]; nativeCheckInputs = [ pytestCheckHook torch ]; # Tests require access to a physical GPU doCheck = false; passthru.gpuCheck = cuda-tile.overridePythonAttrs { requiredSystemFeatures = [ "cuda" ]; doCheck = true; }; meta = { description = "Programming model for writing parallel kernels for NVIDIA GPUs"; homepage = "https://docs.nvidia.com/cuda/cutile-python/"; downloadPage = "https://github.com/NVIDIA/cutile-python"; changelog = "https://docs.nvidia.com/cuda/cutile-python/generated/release_notes.html"; license = lib.licenses.asl20; maintainers = with lib.maintainers; [ GaetanLepage prince213 ]; broken = !config.cudaSupport; }; })