nixos/systemd-confinement: Fix with template units

Quoting from <https://github.com/NixOS/nixpkgs/issues/464323>: > When using confinement.enable = true for an instanced systemd service, > the 2nd instance will fail to start if the 1st instance is still > running. > > This only happens with confinement.enable = true;. Removing this > option causes both service instances to succeed. Maybe this happens > because the /run/confinement/fortune directory is shared between the > instances. The reason why this happens is that the root directory is set to "/run/confinement/${mkPathSafeName name}", which is the non-expanded unit name rather than the full unit name with the instance in case of a template unit. So when a template unit "foo@.service" is involved, the root directory is then "/run/confinement/foo_" regardless of instance, so foo@bar.service uses the same directory as foo@baz.service and when the first unit cleans up the root directory, it also makes it inaccessible for the unit started afterwards. I added a small property test to test concurrent invocations, so we cover this case and other issues that might come up with template units in a future refactor. Signed-off-by: aszlig <aszlig@nix.build>
2026-06-05 21:03:40 +00:00 · 2025-11-24 19:16:34 +01:00
parent 52be777193
commit 270601f6c4
3 changed files with 290 additions and 112 deletions
--- a/nixos/modules/security/systemd-confinement.nix
+++ b/nixos/modules/security/systemd-confinement.nix
@@ -128,11 +128,9 @@ in
            lib.mkIf config.confinement.enable {
              serviceConfig = {
                ReadOnlyPaths = [ "+/" ];
-                RuntimeDirectory = [ "confinement/${mkPathSafeName name}" ];
-                RootDirectory = "/run/confinement/${mkPathSafeName name}";
-                InaccessiblePaths = [
-                  "-+/run/confinement/${mkPathSafeName name}"
-                ];
+                RuntimeDirectory = [ "confinement/%n" ];
+                RootDirectory = "/run/confinement/%n";
+                InaccessiblePaths = [ "-+/run/confinement/%n" ];
                PrivateMounts = lib.mkDefault true;

                # https://github.com/NixOS/nixpkgs/issues/14645 is a future attempt
--- a/nixos/tests/systemd-confinement/concurrent-runner.py
+++ b/nixos/tests/systemd-confinement/concurrent-runner.py
@@ -0,0 +1,131 @@
+import click
+import socket
+import sys
+
+from hypothesis import given, settings, strategies as st
+from subprocess import run
+from time import sleep
+
+
+@st.composite
+def client_actions(draw, size: int = 10):
+    """
+    Generate a string describing a set of actions to perform.
+
+    This is specifically "stringly-typed" so that when looking at the output of
+    a failed test run, it's easy to visually identify what's wrong.
+
+    The string may consist of the following characters:
+
+      ' ' - Sleep for one tick (0.1s)
+      '[' - Start the client
+      ']' - Stop the client
+      'R' - Run a subprocess in the client
+
+    So for example the string "  [  R ]  " would mean:
+
+       * Sleep for two ticks ("  ")
+       * Start the client ("[")
+       * Sleep for two ticks ("  ")
+       * Run the subprocess ("R")
+       * Sleep for one tick (" ")
+       * Stop the client ("]")
+       * Sleep for two ticks ("  ")
+
+    Exactly the same encoding as above is used for the network protocol, so for
+    debugging issues, all you need to know is the representation above.
+    """
+    assert size > 1
+    start = None
+    stop = None
+    runs = set()
+
+    if draw(st.booleans()):
+        start = draw(st.integers(min_value=0, max_value=size - 2))
+        stop = draw(st.integers(min_value=start + 1, max_value=size - 1))
+        if start + 1 < stop:
+            runs = draw(st.sets(
+                st.integers(min_value=start + 1, max_value=stop - 1),
+                max_size=stop - start,
+            ))
+
+    out = ''
+    for index in range(size):
+        if start is not None and index == start:
+            out += '['
+        elif stop is not None and index == stop:
+            out += ']'
+        elif index in runs:
+            out += 'R'
+        else:
+            out += ' '
+    return out
+
+
+@click.group()
+def cli() -> None:
+    pass
+
+
+@cli.command('driver')
+@settings(deadline=None, max_examples=20)
+@given(st.lists(client_actions(), max_size=5))
+def test_driver(client_actions: list[str]) -> None:
+    clients: list[None | socket.socket] = [None] * len(client_actions)
+    for index in range(max(map(len, client_actions), default=0)):
+        for n, actions in enumerate(client_actions):
+            client = clients[n]
+            try:
+                action = actions[index]
+            except IndexError:
+                continue
+            match action:
+                case '[':
+                    client = socket.socket(socket.AF_INET6)
+                    client.settimeout(60)
+                    client.connect(('::1', 12345))
+                    client.send(b'[')
+                    clients[n] = client
+                case ']':
+                    assert client is not None
+                    client.send(b']')
+                    # At this point if we get ']' back from the client, we know
+                    # that everything went smoothly up to this point because
+                    # otherwise the client would have just thrown an exception
+                    # and the connection would be closed.
+                    assert client.recv(1) == b']'
+                    assert not client.recv(1)
+                    client.close()
+                    clients[n] = None
+                case 'R':
+                    assert client is not None
+                    client.send(b'R')
+                case ' ':
+                    if client is not None:
+                        client.send(b' ')
+        sleep(0.1)
+    assert all(c is None for c in clients), \
+           f'clients still running: {clients!r}'
+
+
+@cli.command('client')
+@click.argument('executable')
+def test_client(executable: str) -> None:
+    if not (action := sys.stdin.read(1)):
+        raise SystemExit(1)
+    assert action == '[', f'{action!r} != "["'
+    while action := sys.stdin.read(1):
+        match action:
+            case 'R':
+                run([executable], check=True, stdout=sys.stderr)
+            case ']':
+                sys.stdout.write(']')
+                return
+            case ' ':
+                sleep(0.1)
+            case '':
+                raise SystemExit(1)
+
+
+if __name__ == '__main__':
+    cli()
--- a/nixos/tests/systemd-confinement/default.nix
+++ b/nixos/tests/systemd-confinement/default.nix
@@ -2,7 +2,12 @@ import ../make-test-python.nix {
  name = "systemd-confinement";

  nodes.machine =
-    { pkgs, lib, ... }:
+    {
+      pkgs,
+      lib,
+      utils,
+      ...
+    }:
    let
      testLib = pkgs.python3Packages.buildPythonPackage {
        name = "confinement-testlib";
@@ -215,122 +220,166 @@ import ../make-test-python.nix {
            }
          );

+      concurrentRunner = pkgs.writers.writePython3 "concurrent-runner" {
+        libraries = [
+          pkgs.python3Packages.click
+          pkgs.python3Packages.hypothesis
+        ];
+      } ./concurrent-runner.py;
+
+      concurrentTest = {
+        systemd.services.concurrent-driver = {
+          description = "Driver for orchestrating concurrent processes";
+          requiredBy = [ "multi-user.target" ];
+          after = [
+            "network.target"
+            "concurrent-client.socket"
+          ];
+          serviceConfig.Type = "oneshot";
+          serviceConfig.ExecStart = utils.escapeSystemdExecArgs [
+            concurrentRunner
+            "driver"
+          ];
+        };
+
+        systemd.sockets.concurrent-client = {
+          description = "Socket for concurrent processes";
+          requiredBy = [ "sockets.target" ];
+          socketConfig.ListenStream = 12345;
+          socketConfig.Accept = true;
+        };
+
+        systemd.services."concurrent-client@" = {
+          description = "Process %I running concurrently with others";
+          confinement.enable = true;
+          serviceConfig.StandardInput = "socket";
+          serviceConfig.StandardError = "journal";
+          serviceConfig.ExecStart = utils.escapeSystemdExecArgs [
+            concurrentRunner
+            "client"
+            "${pkgs.fortune}/bin/fortune"
+          ];
+        };
+      };
+
    in
    {
-      imports = lib.imap1 mkTestStep (
-        parametrisedTests
-        ++ [
-          {
-            description = "existence of bind-mounted /etc";
-            config.serviceConfig.BindReadOnlyPaths = [ "/etc" ];
-            testScript = ''
-              assert Path('/etc/passwd').read_text()
-            '';
-          }
-          (
-            let
-              symlink = pkgs.runCommand "symlink" {
-                target = pkgs.writeText "symlink-target" "got me";
-              } "ln -s \"$target\" \"$out\"";
-            in
+      imports =
+        lib.imap1 mkTestStep (
+          parametrisedTests
+          ++ [
            {
-              description = "check if symlinks are properly bind-mounted";
-              config.confinement.packages = lib.singleton symlink;
+              description = "existence of bind-mounted /etc";
+              config.serviceConfig.BindReadOnlyPaths = [ "/etc" ];
              testScript = ''
-                assert Path('${symlink}').read_text() == 'got me'
+                assert Path('/etc/passwd').read_text()
              '';
            }
-          )
-          {
-            description = "check if StateDirectory works";
-            config.serviceConfig.User = "chroot-testuser";
-            config.serviceConfig.Group = "chroot-testgroup";
-            config.serviceConfig.StateDirectory = "testme";
+            (
+              let
+                symlink = pkgs.runCommand "symlink" {
+                  target = pkgs.writeText "symlink-target" "got me";
+                } "ln -s \"$target\" \"$out\"";
+              in
+              {
+                description = "check if symlinks are properly bind-mounted";
+                config.confinement.packages = lib.singleton symlink;
+                testScript = ''
+                  assert Path('${symlink}').read_text() == 'got me'
+                '';
+              }
+            )
+            {
+              description = "check if StateDirectory works";
+              config.serviceConfig.User = "chroot-testuser";
+              config.serviceConfig.Group = "chroot-testgroup";
+              config.serviceConfig.StateDirectory = "testme";

-            # We restart on purpose here since we want to check whether the state
-            # directory actually persists.
-            config.serviceConfig.Restart = "on-failure";
-            config.serviceConfig.RestartMode = "direct";
+              # We restart on purpose here since we want to check whether the state
+              # directory actually persists.
+              config.serviceConfig.Restart = "on-failure";
+              config.serviceConfig.RestartMode = "direct";

-            testScript = ''
-              assert not Path('/tmp/canary').exists()
-              Path('/tmp/canary').touch()
+              testScript = ''
+                assert not Path('/tmp/canary').exists()
+                Path('/tmp/canary').touch()

-              if (foo := Path('/var/lib/testme/foo')).exists():
-                assert Path('/var/lib/testme/foo').read_text() == 'works'
-              else:
-                Path('/var/lib/testme/foo').write_text('works')
-                print('<4>Exiting with failure to check persistence on restart.')
-                raise SystemExit(1)
-            '';
-          }
-          {
-            description = "check if /bin/sh works";
-            testScript = ''
-              assert Path('/bin/sh').exists()
+                if (foo := Path('/var/lib/testme/foo')).exists():
+                  assert Path('/var/lib/testme/foo').read_text() == 'works'
+                else:
+                  Path('/var/lib/testme/foo').write_text('works')
+                  print('<4>Exiting with failure to check persistence on restart.')
+                  raise SystemExit(1)
+              '';
+            }
+            {
+              description = "check if /bin/sh works";
+              testScript = ''
+                assert Path('/bin/sh').exists()

-              result = run(
-                ['/bin/sh', '-c', 'echo -n bar'],
-                capture_output=True,
-                check=True,
-              )
-              assert result.stdout == b'bar'
-            '';
-          }
-          {
-            description = "check if suppressing /bin/sh works";
-            config.confinement.binSh = null;
-            testScript = ''
-              assert not Path('/bin/sh').exists()
-              with pytest.raises(FileNotFoundError):
-                run(['/bin/sh', '-c', 'echo foo'])
-            '';
-          }
-          {
-            description = "check if we can set /bin/sh to something different";
-            config.confinement.binSh = "${pkgs.hello}/bin/hello";
-            testScript = ''
-              assert Path('/bin/sh').exists()
-              result = run(
-                ['/bin/sh', '-g', 'foo'],
-                capture_output=True,
-                check=True,
-              )
-              assert result.stdout == b'foo\n'
-            '';
-          }
-          {
-            description = "check if only Exec* dependencies are included";
-            config.environment.FOOBAR = pkgs.writeText "foobar" "eek";
-            testScript = ''
-              with pytest.raises(FileNotFoundError):
-                Path(os.environ['FOOBAR']).read_text()
-            '';
-          }
-          {
-            description = "check if fullUnit includes all dependencies";
-            config.environment.FOOBAR = pkgs.writeText "foobar" "eek";
-            config.confinement.fullUnit = true;
-            testScript = ''
-              assert Path(os.environ['FOOBAR']).read_text() == 'eek'
-            '';
-          }
-          {
-            description = "check if shipped unit file still works";
-            config.confinement.mode = "chroot-only";
-            rawUnit = ''
-              [Service]
-              SystemCallFilter=~kill
-              SystemCallErrorNumber=ELOOP
-            '';
-            testScript = ''
-              with pytest.raises(OSError) as excinfo:
-                os.kill(os.getpid(), signal.SIGKILL)
-              assert excinfo.value.errno == errno.ELOOP
-            '';
-          }
-        ]
-      );
+                result = run(
+                  ['/bin/sh', '-c', 'echo -n bar'],
+                  capture_output=True,
+                  check=True,
+                )
+                assert result.stdout == b'bar'
+              '';
+            }
+            {
+              description = "check if suppressing /bin/sh works";
+              config.confinement.binSh = null;
+              testScript = ''
+                assert not Path('/bin/sh').exists()
+                with pytest.raises(FileNotFoundError):
+                  run(['/bin/sh', '-c', 'echo foo'])
+              '';
+            }
+            {
+              description = "check if we can set /bin/sh to something different";
+              config.confinement.binSh = "${pkgs.hello}/bin/hello";
+              testScript = ''
+                assert Path('/bin/sh').exists()
+                result = run(
+                  ['/bin/sh', '-g', 'foo'],
+                  capture_output=True,
+                  check=True,
+                )
+                assert result.stdout == b'foo\n'
+              '';
+            }
+            {
+              description = "check if only Exec* dependencies are included";
+              config.environment.FOOBAR = pkgs.writeText "foobar" "eek";
+              testScript = ''
+                with pytest.raises(FileNotFoundError):
+                  Path(os.environ['FOOBAR']).read_text()
+              '';
+            }
+            {
+              description = "check if fullUnit includes all dependencies";
+              config.environment.FOOBAR = pkgs.writeText "foobar" "eek";
+              config.confinement.fullUnit = true;
+              testScript = ''
+                assert Path(os.environ['FOOBAR']).read_text() == 'eek'
+              '';
+            }
+            {
+              description = "check if shipped unit file still works";
+              config.confinement.mode = "chroot-only";
+              rawUnit = ''
+                [Service]
+                SystemCallFilter=~kill
+                SystemCallErrorNumber=ELOOP
+              '';
+              testScript = ''
+                with pytest.raises(OSError) as excinfo:
+                  os.kill(os.getpid(), signal.SIGKILL)
+                assert excinfo.value.errno == errno.ELOOP
+              '';
+            }
+          ]
+        )
+        ++ [ concurrentTest ];

      config.users.groups.chroot-testgroup = { };
      config.users.users.chroot-testuser = {