From 270601f6c466bf0cf5bc76197254492ef31a7aaa Mon Sep 17 00:00:00 2001 From: aszlig Date: Mon, 24 Nov 2025 19:16:34 +0100 Subject: [PATCH] nixos/systemd-confinement: Fix with template units Quoting from : > When using confinement.enable = true for an instanced systemd service, > the 2nd instance will fail to start if the 1st instance is still > running. > > This only happens with confinement.enable = true;. Removing this > option causes both service instances to succeed. Maybe this happens > because the /run/confinement/fortune directory is shared between the > instances. The reason why this happens is that the root directory is set to "/run/confinement/${mkPathSafeName name}", which is the non-expanded unit name rather than the full unit name with the instance in case of a template unit. So when a template unit "foo@.service" is involved, the root directory is then "/run/confinement/foo_" regardless of instance, so foo@bar.service uses the same directory as foo@baz.service and when the first unit cleans up the root directory, it also makes it inaccessible for the unit started afterwards. I added a small property test to test concurrent invocations, so we cover this case and other issues that might come up with template units in a future refactor. Signed-off-by: aszlig --- .../modules/security/systemd-confinement.nix | 8 +- .../systemd-confinement/concurrent-runner.py | 131 +++++++++ nixos/tests/systemd-confinement/default.nix | 263 +++++++++++------- 3 files changed, 290 insertions(+), 112 deletions(-) create mode 100644 nixos/tests/systemd-confinement/concurrent-runner.py diff --git a/nixos/modules/security/systemd-confinement.nix b/nixos/modules/security/systemd-confinement.nix index c3c0a1a4975d..89f96a2fbb59 100644 --- a/nixos/modules/security/systemd-confinement.nix +++ b/nixos/modules/security/systemd-confinement.nix @@ -128,11 +128,9 @@ in lib.mkIf config.confinement.enable { serviceConfig = { ReadOnlyPaths = [ "+/" ]; - RuntimeDirectory = [ "confinement/${mkPathSafeName name}" ]; - RootDirectory = "/run/confinement/${mkPathSafeName name}"; - InaccessiblePaths = [ - "-+/run/confinement/${mkPathSafeName name}" - ]; + RuntimeDirectory = [ "confinement/%n" ]; + RootDirectory = "/run/confinement/%n"; + InaccessiblePaths = [ "-+/run/confinement/%n" ]; PrivateMounts = lib.mkDefault true; # https://github.com/NixOS/nixpkgs/issues/14645 is a future attempt diff --git a/nixos/tests/systemd-confinement/concurrent-runner.py b/nixos/tests/systemd-confinement/concurrent-runner.py new file mode 100644 index 000000000000..58cf9f7ff4d9 --- /dev/null +++ b/nixos/tests/systemd-confinement/concurrent-runner.py @@ -0,0 +1,131 @@ +import click +import socket +import sys + +from hypothesis import given, settings, strategies as st +from subprocess import run +from time import sleep + + +@st.composite +def client_actions(draw, size: int = 10): + """ + Generate a string describing a set of actions to perform. + + This is specifically "stringly-typed" so that when looking at the output of + a failed test run, it's easy to visually identify what's wrong. + + The string may consist of the following characters: + + ' ' - Sleep for one tick (0.1s) + '[' - Start the client + ']' - Stop the client + 'R' - Run a subprocess in the client + + So for example the string " [ R ] " would mean: + + * Sleep for two ticks (" ") + * Start the client ("[") + * Sleep for two ticks (" ") + * Run the subprocess ("R") + * Sleep for one tick (" ") + * Stop the client ("]") + * Sleep for two ticks (" ") + + Exactly the same encoding as above is used for the network protocol, so for + debugging issues, all you need to know is the representation above. + """ + assert size > 1 + start = None + stop = None + runs = set() + + if draw(st.booleans()): + start = draw(st.integers(min_value=0, max_value=size - 2)) + stop = draw(st.integers(min_value=start + 1, max_value=size - 1)) + if start + 1 < stop: + runs = draw(st.sets( + st.integers(min_value=start + 1, max_value=stop - 1), + max_size=stop - start, + )) + + out = '' + for index in range(size): + if start is not None and index == start: + out += '[' + elif stop is not None and index == stop: + out += ']' + elif index in runs: + out += 'R' + else: + out += ' ' + return out + + +@click.group() +def cli() -> None: + pass + + +@cli.command('driver') +@settings(deadline=None, max_examples=20) +@given(st.lists(client_actions(), max_size=5)) +def test_driver(client_actions: list[str]) -> None: + clients: list[None | socket.socket] = [None] * len(client_actions) + for index in range(max(map(len, client_actions), default=0)): + for n, actions in enumerate(client_actions): + client = clients[n] + try: + action = actions[index] + except IndexError: + continue + match action: + case '[': + client = socket.socket(socket.AF_INET6) + client.settimeout(60) + client.connect(('::1', 12345)) + client.send(b'[') + clients[n] = client + case ']': + assert client is not None + client.send(b']') + # At this point if we get ']' back from the client, we know + # that everything went smoothly up to this point because + # otherwise the client would have just thrown an exception + # and the connection would be closed. + assert client.recv(1) == b']' + assert not client.recv(1) + client.close() + clients[n] = None + case 'R': + assert client is not None + client.send(b'R') + case ' ': + if client is not None: + client.send(b' ') + sleep(0.1) + assert all(c is None for c in clients), \ + f'clients still running: {clients!r}' + + +@cli.command('client') +@click.argument('executable') +def test_client(executable: str) -> None: + if not (action := sys.stdin.read(1)): + raise SystemExit(1) + assert action == '[', f'{action!r} != "["' + while action := sys.stdin.read(1): + match action: + case 'R': + run([executable], check=True, stdout=sys.stderr) + case ']': + sys.stdout.write(']') + return + case ' ': + sleep(0.1) + case '': + raise SystemExit(1) + + +if __name__ == '__main__': + cli() diff --git a/nixos/tests/systemd-confinement/default.nix b/nixos/tests/systemd-confinement/default.nix index 90cfcb8ab8af..0f8a0a326e7d 100644 --- a/nixos/tests/systemd-confinement/default.nix +++ b/nixos/tests/systemd-confinement/default.nix @@ -2,7 +2,12 @@ import ../make-test-python.nix { name = "systemd-confinement"; nodes.machine = - { pkgs, lib, ... }: + { + pkgs, + lib, + utils, + ... + }: let testLib = pkgs.python3Packages.buildPythonPackage { name = "confinement-testlib"; @@ -215,122 +220,166 @@ import ../make-test-python.nix { } ); + concurrentRunner = pkgs.writers.writePython3 "concurrent-runner" { + libraries = [ + pkgs.python3Packages.click + pkgs.python3Packages.hypothesis + ]; + } ./concurrent-runner.py; + + concurrentTest = { + systemd.services.concurrent-driver = { + description = "Driver for orchestrating concurrent processes"; + requiredBy = [ "multi-user.target" ]; + after = [ + "network.target" + "concurrent-client.socket" + ]; + serviceConfig.Type = "oneshot"; + serviceConfig.ExecStart = utils.escapeSystemdExecArgs [ + concurrentRunner + "driver" + ]; + }; + + systemd.sockets.concurrent-client = { + description = "Socket for concurrent processes"; + requiredBy = [ "sockets.target" ]; + socketConfig.ListenStream = 12345; + socketConfig.Accept = true; + }; + + systemd.services."concurrent-client@" = { + description = "Process %I running concurrently with others"; + confinement.enable = true; + serviceConfig.StandardInput = "socket"; + serviceConfig.StandardError = "journal"; + serviceConfig.ExecStart = utils.escapeSystemdExecArgs [ + concurrentRunner + "client" + "${pkgs.fortune}/bin/fortune" + ]; + }; + }; + in { - imports = lib.imap1 mkTestStep ( - parametrisedTests - ++ [ - { - description = "existence of bind-mounted /etc"; - config.serviceConfig.BindReadOnlyPaths = [ "/etc" ]; - testScript = '' - assert Path('/etc/passwd').read_text() - ''; - } - ( - let - symlink = pkgs.runCommand "symlink" { - target = pkgs.writeText "symlink-target" "got me"; - } "ln -s \"$target\" \"$out\""; - in + imports = + lib.imap1 mkTestStep ( + parametrisedTests + ++ [ { - description = "check if symlinks are properly bind-mounted"; - config.confinement.packages = lib.singleton symlink; + description = "existence of bind-mounted /etc"; + config.serviceConfig.BindReadOnlyPaths = [ "/etc" ]; testScript = '' - assert Path('${symlink}').read_text() == 'got me' + assert Path('/etc/passwd').read_text() ''; } - ) - { - description = "check if StateDirectory works"; - config.serviceConfig.User = "chroot-testuser"; - config.serviceConfig.Group = "chroot-testgroup"; - config.serviceConfig.StateDirectory = "testme"; + ( + let + symlink = pkgs.runCommand "symlink" { + target = pkgs.writeText "symlink-target" "got me"; + } "ln -s \"$target\" \"$out\""; + in + { + description = "check if symlinks are properly bind-mounted"; + config.confinement.packages = lib.singleton symlink; + testScript = '' + assert Path('${symlink}').read_text() == 'got me' + ''; + } + ) + { + description = "check if StateDirectory works"; + config.serviceConfig.User = "chroot-testuser"; + config.serviceConfig.Group = "chroot-testgroup"; + config.serviceConfig.StateDirectory = "testme"; - # We restart on purpose here since we want to check whether the state - # directory actually persists. - config.serviceConfig.Restart = "on-failure"; - config.serviceConfig.RestartMode = "direct"; + # We restart on purpose here since we want to check whether the state + # directory actually persists. + config.serviceConfig.Restart = "on-failure"; + config.serviceConfig.RestartMode = "direct"; - testScript = '' - assert not Path('/tmp/canary').exists() - Path('/tmp/canary').touch() + testScript = '' + assert not Path('/tmp/canary').exists() + Path('/tmp/canary').touch() - if (foo := Path('/var/lib/testme/foo')).exists(): - assert Path('/var/lib/testme/foo').read_text() == 'works' - else: - Path('/var/lib/testme/foo').write_text('works') - print('<4>Exiting with failure to check persistence on restart.') - raise SystemExit(1) - ''; - } - { - description = "check if /bin/sh works"; - testScript = '' - assert Path('/bin/sh').exists() + if (foo := Path('/var/lib/testme/foo')).exists(): + assert Path('/var/lib/testme/foo').read_text() == 'works' + else: + Path('/var/lib/testme/foo').write_text('works') + print('<4>Exiting with failure to check persistence on restart.') + raise SystemExit(1) + ''; + } + { + description = "check if /bin/sh works"; + testScript = '' + assert Path('/bin/sh').exists() - result = run( - ['/bin/sh', '-c', 'echo -n bar'], - capture_output=True, - check=True, - ) - assert result.stdout == b'bar' - ''; - } - { - description = "check if suppressing /bin/sh works"; - config.confinement.binSh = null; - testScript = '' - assert not Path('/bin/sh').exists() - with pytest.raises(FileNotFoundError): - run(['/bin/sh', '-c', 'echo foo']) - ''; - } - { - description = "check if we can set /bin/sh to something different"; - config.confinement.binSh = "${pkgs.hello}/bin/hello"; - testScript = '' - assert Path('/bin/sh').exists() - result = run( - ['/bin/sh', '-g', 'foo'], - capture_output=True, - check=True, - ) - assert result.stdout == b'foo\n' - ''; - } - { - description = "check if only Exec* dependencies are included"; - config.environment.FOOBAR = pkgs.writeText "foobar" "eek"; - testScript = '' - with pytest.raises(FileNotFoundError): - Path(os.environ['FOOBAR']).read_text() - ''; - } - { - description = "check if fullUnit includes all dependencies"; - config.environment.FOOBAR = pkgs.writeText "foobar" "eek"; - config.confinement.fullUnit = true; - testScript = '' - assert Path(os.environ['FOOBAR']).read_text() == 'eek' - ''; - } - { - description = "check if shipped unit file still works"; - config.confinement.mode = "chroot-only"; - rawUnit = '' - [Service] - SystemCallFilter=~kill - SystemCallErrorNumber=ELOOP - ''; - testScript = '' - with pytest.raises(OSError) as excinfo: - os.kill(os.getpid(), signal.SIGKILL) - assert excinfo.value.errno == errno.ELOOP - ''; - } - ] - ); + result = run( + ['/bin/sh', '-c', 'echo -n bar'], + capture_output=True, + check=True, + ) + assert result.stdout == b'bar' + ''; + } + { + description = "check if suppressing /bin/sh works"; + config.confinement.binSh = null; + testScript = '' + assert not Path('/bin/sh').exists() + with pytest.raises(FileNotFoundError): + run(['/bin/sh', '-c', 'echo foo']) + ''; + } + { + description = "check if we can set /bin/sh to something different"; + config.confinement.binSh = "${pkgs.hello}/bin/hello"; + testScript = '' + assert Path('/bin/sh').exists() + result = run( + ['/bin/sh', '-g', 'foo'], + capture_output=True, + check=True, + ) + assert result.stdout == b'foo\n' + ''; + } + { + description = "check if only Exec* dependencies are included"; + config.environment.FOOBAR = pkgs.writeText "foobar" "eek"; + testScript = '' + with pytest.raises(FileNotFoundError): + Path(os.environ['FOOBAR']).read_text() + ''; + } + { + description = "check if fullUnit includes all dependencies"; + config.environment.FOOBAR = pkgs.writeText "foobar" "eek"; + config.confinement.fullUnit = true; + testScript = '' + assert Path(os.environ['FOOBAR']).read_text() == 'eek' + ''; + } + { + description = "check if shipped unit file still works"; + config.confinement.mode = "chroot-only"; + rawUnit = '' + [Service] + SystemCallFilter=~kill + SystemCallErrorNumber=ELOOP + ''; + testScript = '' + with pytest.raises(OSError) as excinfo: + os.kill(os.getpid(), signal.SIGKILL) + assert excinfo.value.errno == errno.ELOOP + ''; + } + ] + ) + ++ [ concurrentTest ]; config.users.groups.chroot-testgroup = { }; config.users.users.chroot-testuser = {