Files
Bad3r aff8c2ae29 python3Packages.markitdown: fix passthru.updateScript
gitUpdater {} never selected a version: list-git-tags returns v-prefixed
tags (v0.1.6, matching src.tag = "v${version}"), and generic-updater's
`grep "^[0-9]"` filter drops every tag that does not start with a digit,
so the script always printed [] and made no change.

Set rev-prefix = "v" so versions are compared as 0.1.6, and add
ignoredVersions = "(a|b|rc)[0-9]+$" to skip the PEP 440 pre-release tags
(v0.1.5b1, v0.1.0a1, ...) that version_is_unstable's odd-minor and
patchlevel>=90 heuristics do not recognise.

Validation:
- nix-shell maintainers/scripts/update.nix --argstr package
  python3Packages.markitdown --argstr skip-prompt true
  (0.1.4 -> 0.1.6 selected, pre-releases skipped, correct hash resolved).
- nix fmt -- pkgs/development/python-modules/markitdown/default.nix
- git diff --check
2026-05-29 21:58:55 +03:00

123 lines
2.7 KiB
Nix

{
lib,
stdenv,
buildPythonPackage,
fetchFromGitHub,
# build-system
hatchling,
# dependencies
azure-ai-documentintelligence,
azure-identity,
beautifulsoup4,
charset-normalizer,
defusedxml,
lxml,
magika,
mammoth,
markdownify,
olefile,
openpyxl,
pandas,
pdfminer-six,
pdfplumber,
pydub,
python-pptx,
requests,
speechrecognition,
xlrd,
youtube-transcript-api,
# tests
pytestCheckHook,
# passthru
gitUpdater,
}:
let
isNotAarch64Linux = !(stdenv.hostPlatform.isLinux && stdenv.hostPlatform.isAarch64);
in
buildPythonPackage (finalAttrs: {
pname = "markitdown";
version = "0.1.6";
pyproject = true;
src = fetchFromGitHub {
owner = "microsoft";
repo = "markitdown";
tag = "v${finalAttrs.version}";
hash = "sha256-pLL44w2jVj5X5/TmPqSveQe/9WLj0ddDUYPoSQlz+9E=";
};
sourceRoot = "${finalAttrs.src.name}/packages/markitdown";
build-system = [ hatchling ];
pythonRelaxDeps = [
"magika"
"mammoth"
"youtube-transcript-api"
];
dependencies = [
azure-ai-documentintelligence
azure-identity
beautifulsoup4
charset-normalizer
defusedxml
lxml
magika
mammoth
markdownify
olefile
openpyxl
pandas
pdfminer-six
pdfplumber
pydub
python-pptx
requests
speechrecognition
xlrd
youtube-transcript-api
];
# aarch64-linux fails cpuinfo test, because /sys/devices/system/cpu/ does not exist in the sandbox:
# terminate called after throwing an instance of 'onnxruntime::OnnxRuntimeException'
#
# -> Skip all tests that require importing markitdown
pythonImportsCheck = lib.optionals isNotAarch64Linux [ "markitdown" ];
doCheck = isNotAarch64Linux;
nativeCheckInputs = [ pytestCheckHook ];
disabledTests = [
# Require network access
"test_markitdown_remote"
"test_module_vectors"
"test_cli_vectors"
"test_module_misc"
# Require optional azure-ai-contentunderstanding, unavailable in nixpkgs.
# The fallback stubs hit `UserAgentPolicy() takes no arguments`.
"test_nonexistent_analyzer_raises_value_error"
"test_cu_registered_before_docintel"
];
passthru.updateScript = gitUpdater {
# Drop the "v" tag prefix before version comparison.
rev-prefix = "v";
# Skip PEP 440 pre-release tags.
ignoredVersions = "(a|b|rc)[0-9]+$";
};
meta = {
description = "Python tool for converting files and office documents to Markdown";
homepage = "https://github.com/microsoft/markitdown";
changelog = "https://github.com/microsoft/markitdown/releases/tag/${finalAttrs.src.tag}";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [ malik ];
};
})