nspawn: actually mask certain files under /proc/

/run/systemd/inaccessible/ exists only on host - in the container we have
/run/host/inaccessible/, and since all the inaccessible mounts have
MOUNT_IN_USERNS we need to use the latter one, otherwise the masking
gets silently skipped:

~# SYSTEMD_LOG_LEVEL=debug systemd-nspawn -q --directory=foo ls -la /proc/kallsyms
...
Bind-mounting /run/systemd/inaccessible/reg on /proc/kallsyms (MS_BIND "")...
Failed to mount /run/systemd/inaccessible/reg (type n/a) on /proc/kallsyms (MS_BIND ""): No such file or directory
Changing mount flags /proc/kallsyms (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_REMOUNT|MS_BIND "")...
Failed to mount n/a (type n/a) on /proc/kallsyms (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_REMOUNT|MS_BIND ""): Invalid argument
Bind-mounting /run/systemd/inaccessible/reg on /proc/kcore (MS_BIND "")...
Failed to mount /run/systemd/inaccessible/reg (type n/a) on /proc/kcore (MS_BIND ""): No such file or directory
Changing mount flags /proc/kcore (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_REMOUNT|MS_BIND "")...
Failed to mount n/a (type n/a) on /proc/kcore (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_REMOUNT|MS_BIND ""): Invalid argument
...
Inner child finished, invoking payload.
-r--r--r--. 1 root root 0 Feb 25 13:19 /proc/kallsyms
This commit is contained in:
Frantisek Sumsal
2026-02-25 19:13:37 +01:00
committed by Zbigniew Jędrzejewski-Szmek
parent 6e59d22380
commit 83b8daa032
2 changed files with 41 additions and 1 deletions

View File

@@ -534,7 +534,7 @@ int mount_all(const char *dest,
const char *selinux_apifs_context) {
#define PROC_INACCESSIBLE_REG(path) \
{ "/run/systemd/inaccessible/reg", (path), NULL, NULL, MS_BIND, \
{ "/run/host/inaccessible/reg", (path), NULL, NULL, MS_BIND, \
MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
{ NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */

View File

@@ -396,6 +396,46 @@ EOF
(! systemd-nspawn --rlimit==)
}
testcase_check_default_inaccessible_paths() {
local root container inaccessible_paths path exp
# Taken from src/nspawn/nspawn-mount.c:mount_all()
inaccessible_paths=(
"/proc/kallsyms"
"/proc/kcore"
"/proc/keys"
"/proc/sysrq-trigger"
"/proc/timer_list"
)
root="$(mktemp -d /var/lib/machines/TEST-13-NSPAWN.default_inaccessible_paths.XXX)"
container="$(basename "$root")"
create_dummy_container "$root"
# Each inaccessible path should have zeroed permissions, which stat's %a reports as a single 0
for path in "${inaccessible_paths[@]}"; do
systemd-nspawn --directory="$root" \
bash -xec "ls -l $path; [[ \$(stat --format=%a $path) -eq 0 ]]"
done
# SYSTEMD_NSPAWN_API_VFS_WRITABLE=yes mounts certain API directories under /sys/ and /proc/sys/
# as writable, and it also skips the path masking (by dropping the MOUNT_APPLY_APIVFS_RO flag)
for path in "${inaccessible_paths[@]}"; do
exp="$(stat --format=%a "$path")"
SYSTEMD_NSPAWN_API_VFS_WRITABLE=yes systemd-nspawn --directory="$root" \
bash -xec "ls -l $path; [[ \$(stat --format=%a $path) -eq $exp ]]"
done
# SYSTEMD_NSPAWN_API_VFS_WRITABLE=network mounts only /proc/sys/net/ as writable but doesn't
# drop the MOUNT_APPLY_APIVFS_RO flag, so the masking should still apply
for path in "${inaccessible_paths[@]}"; do
SYSTEMD_NSPAWN_API_VFS_WRITABLE=network systemd-nspawn --directory="$root" \
bash -xec "ls -l $path; [[ \$(stat --format=%a $path) -eq 0 ]]"
done
rm -fr "$root"
}
nspawn_settings_cleanup() {
for dev in sd-host-only sd-shared{1,2,3} sd-macvlan{1,2} sd-ipvlan{1,2}; do
ip link del "$dev" || :