nixos: completely remove systemd-udev-settle.service

This service is essentially a random sleep() call that old software that
is unable to properly handle modern hardware initialisation being
asynchronous can use to *mostly* get by.

Over the years is has caused so many issues in NixOS, like introducing
long delays in the boot process[1], complete deadlocks[2] and even failures
when reloading services or activating a new configuration[3].

systemd has been discouraging its use since 2011 (15 years ago!), then
it officially deprecated it in 2018 and since 2020 it started to show on
every boot a huge wall of text calling out all the offending services
that still use it.

Around 2021 we managed to fix around 15 NixOS modules[4] that were
relying on systemd-udev-settle and practically ZFS[5] was the only
remaining one. However, since then, people have actually started to
bring it back with new services[6][7][8]. This is not acceptable.

So, to prevent any more (lazy) uses of systemd-udev-settle, we stop
providing the systemd-udev-settle.service unit entirely. For existing
modules that unfortunately still need it, we replace it with the command
`udevadm settle --timeout=180`, which is all that the service does.
Hopefully this will also increase the awareness that it's bad and
something to be fixed.

Note: I tested this change using

 - `nixosTests.zfs`,
 - `nixosTests.ifstate`,
 - `nixosTests.misc`,
 - `nixosTests.openvswitch`,
 - `nixosTests.predictable-interface-names`
 - `nixosTests.nvidia-container-toolkit`

and making sure that none of the 165 packages that provide upstream
units (via the `systemd.packages` option) had a dependency on
systemd-udev-settle.service.

[1]: https://github.com/NixOS/nixpkgs/pull/25311
[2]: https://github.com/NixOS/nixpkgs/issues/107341
[3]: https://github.com/NixOS/nixpkgs/pull/113804
[4]: https://github.com/NixOS/nixpkgs/issues/73095
[5]: https://github.com/openzfs/zfs/issues/10891
[6]: https://github.com/NixOS/nixpkgs/pull/257525#discussion_r1442702970
[7]: https://github.com/NixOS/nixpkgs/pull/460075
[8]: https://github.com/NixOS/nixpkgs/pull/284507
This commit is contained in:
rnhmjoj
2026-03-28 21:35:49 +01:00
parent 0134737fb0
commit 1e4eb2df3f
8 changed files with 22 additions and 20 deletions

View File

@@ -320,7 +320,6 @@
systemd.services.nvidia-container-toolkit-cdi-generator = {
description = "Container Device Interface (CDI) for Nvidia generator";
after = [ "systemd-udev-settle.service" ];
requiredBy = lib.mkMerge [
(lib.mkIf config.virtualisation.docker.enable [ "docker.service" ])
(lib.mkIf config.virtualisation.podman.enable [ "podman.service" ])
@@ -329,6 +328,11 @@
serviceConfig = {
RuntimeDirectory = "cdi";
RemainAfterExit = true;
# We wait for the udev events queue to empty in the *hope* that the
# devices needed here become available. This is terribly broken and
# essentially no better than a random sleep(). See PR #452645 for
# an attempt to fix this issue.
ExecStartPre = "-${lib.getExe' pkgs.systemd "udevadm"} settle --timeout=180";
ExecStart =
let
script = pkgs.callPackage ./cdi-generate.nix {

View File

@@ -455,7 +455,6 @@ in
"systemd-udevd-control.socket"
"systemd-udevd-kernel.socket"
"systemd-udevd.service"
"systemd-udev-settle.service"
"systemd-udev-trigger.service"
];
boot.initrd.systemd.storePaths = [

View File

@@ -69,7 +69,6 @@ let
# https://github.com/systemd/systemd/blob/main/units/systemd-networkd.service.in
commonServiceConfig = {
after = [
"systemd-udev-settle.service"
"network-pre.target"
"systemd-sysusers.service"
"systemd-sysctl.service"
@@ -88,6 +87,12 @@ let
"network.target"
];
# We wait for the udev events queue to empty in the *hope* that the
# devices needed here become available. This is terribly broken and
# essentially no better than a random sleep().
# FIXME: use .device units dependecies instead.
serviceConfig.ExecStartPre = "-${lib.getExe' pkgs.systemd "udevadm"} settle --timeout=180";
unitConfig = {
# Avoid default dependencies like "basic.target", which prevents ifstate from starting before luks is unlocked.
DefaultDependencies = "no";
@@ -173,7 +178,7 @@ in
etc."ifstate/ifstate.yaml".source = settingsFormat.generate "ifstate.yaml" cfg.settings cfg.package;
};
systemd.services.ifstate = commonServiceConfig // {
systemd.services.ifstate = lib.recursiveUpdate commonServiceConfig {
description = "IfState";
wantedBy = [
@@ -263,7 +268,7 @@ in
"remote-fs.target"
];
services.ifstate-initrd = commonServiceConfig // {
services.ifstate-initrd = lib.recursiveUpdate commonServiceConfig {
description = "IfState initrd";
wantedBy = [

View File

@@ -65,7 +65,6 @@ let
"systemd-udevd-control.socket"
"systemd-udevd-kernel.socket"
"systemd-udevd.service"
"systemd-udev-settle.service"
]
++ (optional (!config.boot.isContainer) "systemd-udev-trigger.service")
++ [
@@ -799,7 +798,6 @@ in
systemd.services.systemd-random-seed.restartIfChanged = false;
systemd.services.systemd-remount-fs.restartIfChanged = false;
systemd.services.systemd-update-utmp.restartIfChanged = false;
systemd.services.systemd-udev-settle.restartIfChanged = false; # Causes long delays in nixos-rebuild
systemd.targets.local-fs.unitConfig.X-StopOnReconfiguration = true;
systemd.targets.remote-fs.unitConfig.X-StopOnReconfiguration = true;
systemd.services.systemd-importd.environment = proxy_env;

View File

@@ -158,16 +158,8 @@ let
}:
lib.nameValuePair "zfs-import-${pool}" {
description = "Import ZFS pool \"${pool}\"";
# We wait for systemd-udev-settle to ensure devices are available,
# but don't *require* it, because mounts shouldn't be killed if it's stopped.
# In the future, hopefully someone will complete this:
# https://github.com/zfsonlinux/zfs/pull/4943
wants = [
"systemd-udev-settle.service"
]
++ lib.optional (config.boot.initrd.clevis.useTang) "network-online.target";
wants = lib.optional (config.boot.initrd.clevis.useTang) "network-online.target";
after = [
"systemd-udev-settle.service"
"systemd-modules-load.service"
"systemd-ask-password-console.service"
]
@@ -189,6 +181,11 @@ let
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
# We wait for the udev events queue to empty in the *hope* that the
# devices needed by the pool become available. This is terribly broken
# and essentially no better than a random sleep(), but we can't do any
# better, see upstream issue https://github.com/openzfs/zfs/issues/10891
ExecStartPre = "${lib.getExe' pkgs.systemd "udevadm"} settle --timeout=180";
};
environment.ZFS_FORCE = lib.optionalString force "-f";
script =

View File

@@ -67,7 +67,6 @@ in
systemd.services.ovsdb = {
description = "Open_vSwitch Database Server";
wantedBy = [ "multi-user.target" ];
after = [ "systemd-udev-settle.service" ];
path = [ cfg.package ];
restartTriggers = [
db

View File

@@ -106,8 +106,7 @@ in
assert "machine" == machine.succeed("hostname -s").strip()
with subtest("whether systemd-udevd automatically loads modules for our hardware"):
machine.succeed("systemctl start systemd-udev-settle.service")
machine.wait_for_unit("systemd-udev-settle.service")
machine.succeed("udevadm settle --timeout=180")
assert "mousedev" in machine.succeed("lsmod")
with subtest("whether systemd-tmpfiles-clean works"):

View File

@@ -43,9 +43,10 @@ pkgs.lib.listToAttrs (
meta = { };
nodes.machine =
{ lib, ... }:
{ pkgs, lib, ... }:
let
script = ''
${lib.getExe' pkgs.systemd "udevadm"} settle --timeout=180
ip link
if ${lib.optionalString predictable "!"} ip link show eth0; then
echo Success