nixos/systemd-confinement: Fix with template units

Quoting from <https://github.com/NixOS/nixpkgs/issues/464323>:

> When using confinement.enable = true for an instanced systemd service,
> the 2nd instance will fail to start if the 1st instance is still
> running.
>
> This only happens with confinement.enable = true;. Removing this
> option causes both service instances to succeed. Maybe this happens
> because the /run/confinement/fortune directory is shared between the
> instances.

The reason why this happens is that the root directory is set to
"/run/confinement/${mkPathSafeName name}", which is the non-expanded
unit name rather than the full unit name with the instance in case of a
template unit.

So when a template unit "foo@.service" is involved, the root directory
is then "/run/confinement/foo_" regardless of instance, so
foo@bar.service uses the same directory as foo@baz.service and when the
first unit cleans up the root directory, it also makes it inaccessible
for the unit started afterwards.

I added a small property test to test concurrent invocations, so we
cover this case and other issues that might come up with template units
in a future refactor.

Signed-off-by: aszlig <aszlig@nix.build>
This commit is contained in:
aszlig
2025-11-24 19:16:34 +01:00
parent 52be777193
commit 270601f6c4
3 changed files with 290 additions and 112 deletions

View File

@@ -128,11 +128,9 @@ in
lib.mkIf config.confinement.enable {
serviceConfig = {
ReadOnlyPaths = [ "+/" ];
RuntimeDirectory = [ "confinement/${mkPathSafeName name}" ];
RootDirectory = "/run/confinement/${mkPathSafeName name}";
InaccessiblePaths = [
"-+/run/confinement/${mkPathSafeName name}"
];
RuntimeDirectory = [ "confinement/%n" ];
RootDirectory = "/run/confinement/%n";
InaccessiblePaths = [ "-+/run/confinement/%n" ];
PrivateMounts = lib.mkDefault true;
# https://github.com/NixOS/nixpkgs/issues/14645 is a future attempt

View File

@@ -0,0 +1,131 @@
import click
import socket
import sys
from hypothesis import given, settings, strategies as st
from subprocess import run
from time import sleep
@st.composite
def client_actions(draw, size: int = 10):
"""
Generate a string describing a set of actions to perform.
This is specifically "stringly-typed" so that when looking at the output of
a failed test run, it's easy to visually identify what's wrong.
The string may consist of the following characters:
' ' - Sleep for one tick (0.1s)
'[' - Start the client
']' - Stop the client
'R' - Run a subprocess in the client
So for example the string " [ R ] " would mean:
* Sleep for two ticks (" ")
* Start the client ("[")
* Sleep for two ticks (" ")
* Run the subprocess ("R")
* Sleep for one tick (" ")
* Stop the client ("]")
* Sleep for two ticks (" ")
Exactly the same encoding as above is used for the network protocol, so for
debugging issues, all you need to know is the representation above.
"""
assert size > 1
start = None
stop = None
runs = set()
if draw(st.booleans()):
start = draw(st.integers(min_value=0, max_value=size - 2))
stop = draw(st.integers(min_value=start + 1, max_value=size - 1))
if start + 1 < stop:
runs = draw(st.sets(
st.integers(min_value=start + 1, max_value=stop - 1),
max_size=stop - start,
))
out = ''
for index in range(size):
if start is not None and index == start:
out += '['
elif stop is not None and index == stop:
out += ']'
elif index in runs:
out += 'R'
else:
out += ' '
return out
@click.group()
def cli() -> None:
pass
@cli.command('driver')
@settings(deadline=None, max_examples=20)
@given(st.lists(client_actions(), max_size=5))
def test_driver(client_actions: list[str]) -> None:
clients: list[None | socket.socket] = [None] * len(client_actions)
for index in range(max(map(len, client_actions), default=0)):
for n, actions in enumerate(client_actions):
client = clients[n]
try:
action = actions[index]
except IndexError:
continue
match action:
case '[':
client = socket.socket(socket.AF_INET6)
client.settimeout(60)
client.connect(('::1', 12345))
client.send(b'[')
clients[n] = client
case ']':
assert client is not None
client.send(b']')
# At this point if we get ']' back from the client, we know
# that everything went smoothly up to this point because
# otherwise the client would have just thrown an exception
# and the connection would be closed.
assert client.recv(1) == b']'
assert not client.recv(1)
client.close()
clients[n] = None
case 'R':
assert client is not None
client.send(b'R')
case ' ':
if client is not None:
client.send(b' ')
sleep(0.1)
assert all(c is None for c in clients), \
f'clients still running: {clients!r}'
@cli.command('client')
@click.argument('executable')
def test_client(executable: str) -> None:
if not (action := sys.stdin.read(1)):
raise SystemExit(1)
assert action == '[', f'{action!r} != "["'
while action := sys.stdin.read(1):
match action:
case 'R':
run([executable], check=True, stdout=sys.stderr)
case ']':
sys.stdout.write(']')
return
case ' ':
sleep(0.1)
case '':
raise SystemExit(1)
if __name__ == '__main__':
cli()

View File

@@ -2,7 +2,12 @@ import ../make-test-python.nix {
name = "systemd-confinement";
nodes.machine =
{ pkgs, lib, ... }:
{
pkgs,
lib,
utils,
...
}:
let
testLib = pkgs.python3Packages.buildPythonPackage {
name = "confinement-testlib";
@@ -215,122 +220,166 @@ import ../make-test-python.nix {
}
);
concurrentRunner = pkgs.writers.writePython3 "concurrent-runner" {
libraries = [
pkgs.python3Packages.click
pkgs.python3Packages.hypothesis
];
} ./concurrent-runner.py;
concurrentTest = {
systemd.services.concurrent-driver = {
description = "Driver for orchestrating concurrent processes";
requiredBy = [ "multi-user.target" ];
after = [
"network.target"
"concurrent-client.socket"
];
serviceConfig.Type = "oneshot";
serviceConfig.ExecStart = utils.escapeSystemdExecArgs [
concurrentRunner
"driver"
];
};
systemd.sockets.concurrent-client = {
description = "Socket for concurrent processes";
requiredBy = [ "sockets.target" ];
socketConfig.ListenStream = 12345;
socketConfig.Accept = true;
};
systemd.services."concurrent-client@" = {
description = "Process %I running concurrently with others";
confinement.enable = true;
serviceConfig.StandardInput = "socket";
serviceConfig.StandardError = "journal";
serviceConfig.ExecStart = utils.escapeSystemdExecArgs [
concurrentRunner
"client"
"${pkgs.fortune}/bin/fortune"
];
};
};
in
{
imports = lib.imap1 mkTestStep (
parametrisedTests
++ [
{
description = "existence of bind-mounted /etc";
config.serviceConfig.BindReadOnlyPaths = [ "/etc" ];
testScript = ''
assert Path('/etc/passwd').read_text()
'';
}
(
let
symlink = pkgs.runCommand "symlink" {
target = pkgs.writeText "symlink-target" "got me";
} "ln -s \"$target\" \"$out\"";
in
imports =
lib.imap1 mkTestStep (
parametrisedTests
++ [
{
description = "check if symlinks are properly bind-mounted";
config.confinement.packages = lib.singleton symlink;
description = "existence of bind-mounted /etc";
config.serviceConfig.BindReadOnlyPaths = [ "/etc" ];
testScript = ''
assert Path('${symlink}').read_text() == 'got me'
assert Path('/etc/passwd').read_text()
'';
}
)
{
description = "check if StateDirectory works";
config.serviceConfig.User = "chroot-testuser";
config.serviceConfig.Group = "chroot-testgroup";
config.serviceConfig.StateDirectory = "testme";
(
let
symlink = pkgs.runCommand "symlink" {
target = pkgs.writeText "symlink-target" "got me";
} "ln -s \"$target\" \"$out\"";
in
{
description = "check if symlinks are properly bind-mounted";
config.confinement.packages = lib.singleton symlink;
testScript = ''
assert Path('${symlink}').read_text() == 'got me'
'';
}
)
{
description = "check if StateDirectory works";
config.serviceConfig.User = "chroot-testuser";
config.serviceConfig.Group = "chroot-testgroup";
config.serviceConfig.StateDirectory = "testme";
# We restart on purpose here since we want to check whether the state
# directory actually persists.
config.serviceConfig.Restart = "on-failure";
config.serviceConfig.RestartMode = "direct";
# We restart on purpose here since we want to check whether the state
# directory actually persists.
config.serviceConfig.Restart = "on-failure";
config.serviceConfig.RestartMode = "direct";
testScript = ''
assert not Path('/tmp/canary').exists()
Path('/tmp/canary').touch()
testScript = ''
assert not Path('/tmp/canary').exists()
Path('/tmp/canary').touch()
if (foo := Path('/var/lib/testme/foo')).exists():
assert Path('/var/lib/testme/foo').read_text() == 'works'
else:
Path('/var/lib/testme/foo').write_text('works')
print('<4>Exiting with failure to check persistence on restart.')
raise SystemExit(1)
'';
}
{
description = "check if /bin/sh works";
testScript = ''
assert Path('/bin/sh').exists()
if (foo := Path('/var/lib/testme/foo')).exists():
assert Path('/var/lib/testme/foo').read_text() == 'works'
else:
Path('/var/lib/testme/foo').write_text('works')
print('<4>Exiting with failure to check persistence on restart.')
raise SystemExit(1)
'';
}
{
description = "check if /bin/sh works";
testScript = ''
assert Path('/bin/sh').exists()
result = run(
['/bin/sh', '-c', 'echo -n bar'],
capture_output=True,
check=True,
)
assert result.stdout == b'bar'
'';
}
{
description = "check if suppressing /bin/sh works";
config.confinement.binSh = null;
testScript = ''
assert not Path('/bin/sh').exists()
with pytest.raises(FileNotFoundError):
run(['/bin/sh', '-c', 'echo foo'])
'';
}
{
description = "check if we can set /bin/sh to something different";
config.confinement.binSh = "${pkgs.hello}/bin/hello";
testScript = ''
assert Path('/bin/sh').exists()
result = run(
['/bin/sh', '-g', 'foo'],
capture_output=True,
check=True,
)
assert result.stdout == b'foo\n'
'';
}
{
description = "check if only Exec* dependencies are included";
config.environment.FOOBAR = pkgs.writeText "foobar" "eek";
testScript = ''
with pytest.raises(FileNotFoundError):
Path(os.environ['FOOBAR']).read_text()
'';
}
{
description = "check if fullUnit includes all dependencies";
config.environment.FOOBAR = pkgs.writeText "foobar" "eek";
config.confinement.fullUnit = true;
testScript = ''
assert Path(os.environ['FOOBAR']).read_text() == 'eek'
'';
}
{
description = "check if shipped unit file still works";
config.confinement.mode = "chroot-only";
rawUnit = ''
[Service]
SystemCallFilter=~kill
SystemCallErrorNumber=ELOOP
'';
testScript = ''
with pytest.raises(OSError) as excinfo:
os.kill(os.getpid(), signal.SIGKILL)
assert excinfo.value.errno == errno.ELOOP
'';
}
]
);
result = run(
['/bin/sh', '-c', 'echo -n bar'],
capture_output=True,
check=True,
)
assert result.stdout == b'bar'
'';
}
{
description = "check if suppressing /bin/sh works";
config.confinement.binSh = null;
testScript = ''
assert not Path('/bin/sh').exists()
with pytest.raises(FileNotFoundError):
run(['/bin/sh', '-c', 'echo foo'])
'';
}
{
description = "check if we can set /bin/sh to something different";
config.confinement.binSh = "${pkgs.hello}/bin/hello";
testScript = ''
assert Path('/bin/sh').exists()
result = run(
['/bin/sh', '-g', 'foo'],
capture_output=True,
check=True,
)
assert result.stdout == b'foo\n'
'';
}
{
description = "check if only Exec* dependencies are included";
config.environment.FOOBAR = pkgs.writeText "foobar" "eek";
testScript = ''
with pytest.raises(FileNotFoundError):
Path(os.environ['FOOBAR']).read_text()
'';
}
{
description = "check if fullUnit includes all dependencies";
config.environment.FOOBAR = pkgs.writeText "foobar" "eek";
config.confinement.fullUnit = true;
testScript = ''
assert Path(os.environ['FOOBAR']).read_text() == 'eek'
'';
}
{
description = "check if shipped unit file still works";
config.confinement.mode = "chroot-only";
rawUnit = ''
[Service]
SystemCallFilter=~kill
SystemCallErrorNumber=ELOOP
'';
testScript = ''
with pytest.raises(OSError) as excinfo:
os.kill(os.getpid(), signal.SIGKILL)
assert excinfo.value.errno == errno.ELOOP
'';
}
]
)
++ [ concurrentTest ];
config.users.groups.chroot-testgroup = { };
config.users.users.chroot-testuser = {