homectl: retry DeactivateHome on transient busy errors

When 'homectl deactivate' is called immediately after a preceding
operation, the umount inside systemd-homework can fail with EBUSY
because something briefly holds a reference to the home mount (e.g. a
concurrent inspect). systemd-homed already handles this gracefully
by moving the home into the 'lingering' state and retrying deactivation
after 15 seconds, but the bus reply for the original DeactivateHome
call returns the org.freedesktop.home1.HomeBusy error immediately,
which makes TEST-46-HOMED flaky.

Fix homectl to follow homed and retry for up to 30 seconds on HomeBusy
and add a test case trying to make the issue more reproducible.
This commit is contained in:
Luca Boccassi
2026-05-08 14:21:33 +01:00
parent 80c92c896a
commit 39863e2b1e
4 changed files with 80 additions and 14 deletions

View File

@@ -35,5 +35,8 @@ int bus_message_append_secret(sd_bus_message *m, UserRecord *secret);
* operations permit a *very* long timeout */
#define HOME_SLOW_BUS_CALL_TIMEOUT_USEC (2*USEC_PER_MINUTE)
/* Retry to deactivate home directories again and again every 15s until it works */
#define HOME_RETRY_DEACTIVATE_USEC (15U * USEC_PER_SEC)
const char* home_record_dir(void);
const char* home_system_blob_dir(void);

View File

@@ -2062,22 +2062,41 @@ static int verb_deactivate_home(int argc, char *argv[], uintptr_t _data, void *u
return r;
STRV_FOREACH(i, strv_skip(argv, 1)) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
/* The home directory might still be busy for a brief moment after a preceding operation
* (e.g. a concurrent inspect/deactivate, or a stray reference holding the mount busy at
* unmount time). homed will transition the home into "lingering" state and retry
* deactivation internally after some time, but rather than failing immediately let's just
* retry the bus call here for a while, so callers don't need to deal with this transient
* condition themselves. Use double the time homed waits to avoid racing with it. */
usec_t end = usec_add(now(CLOCK_MONOTONIC), 2 * HOME_RETRY_DEACTIVATE_USEC);
r = bus_message_new_method_call(bus, &m, bus_mgr, "DeactivateHome");
if (r < 0)
return bus_log_create_error(r);
for (;;) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
r = sd_bus_message_append(m, "s", *i);
if (r < 0)
return bus_log_create_error(r);
r = bus_message_new_method_call(bus, &m, bus_mgr, "DeactivateHome");
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_append(m, "s", *i);
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, /* ret_reply= */ NULL);
if (r >= 0)
break;
if (sd_bus_error_has_name(&error, BUS_ERROR_HOME_BUSY) &&
now(CLOCK_MONOTONIC) < end) {
log_info("Home of user %s is currently busy, retrying deactivation.", *i);
(void) usleep_safe(1 * USEC_PER_SEC);
continue;
}
r = sd_bus_call(bus, m, HOME_SLOW_BUS_CALL_TIMEOUT_USEC, &error, NULL);
if (r < 0) {
log_error_errno(r, "Failed to deactivate user home: %s", bus_error_message(&error, r));
if (ret == 0)
ret = r;
break;
}
}

View File

@@ -49,9 +49,6 @@
#include "user-record-util.h"
#include "user-util.h"
/* Retry to deactivate home directories again and again every 15s until it works */
#define RETRY_DEACTIVATE_USEC (15U * USEC_PER_SEC)
#define HOME_USERS_MAX 500
#define PENDING_OPERATIONS_MAX 100
@@ -514,7 +511,7 @@ static void home_start_retry_deactivate(Home *h) {
h->manager->event,
&h->retry_deactivate_event_source,
CLOCK_MONOTONIC,
RETRY_DEACTIVATE_USEC,
HOME_RETRY_DEACTIVATE_USEC,
1*USEC_PER_MINUTE,
home_on_retry_deactivate,
h);

View File

@@ -1062,4 +1062,51 @@ testcase_fscrypt() {
homectl remove fscrypttest
}
testcase_deactivate_busy() {
# Verify that "homectl deactivate" is robust against transient EBUSY
# failures of the umount() inside systemd-homework. This used to make
# TEST-46-HOMED occasionally fail when something briefly held a reference
# to the home mount at the moment the deactivation tried to unmount it.
#
# Reproduce the situation deterministically by spawning a background
# process whose cwd is the home directory: that holds the mount busy via
# the kernel's cwd reference until the process exits, so the initial
# umount2() call in homework will fail with EBUSY. homectl is expected to
# transparently retry the bus call until it succeeds (once the holder
# exits).
NEWPASSWORD=hunter2 homectl create \
--storage=directory \
--enforce-password-policy=no \
busytest
PASSWORD=hunter2 homectl activate busytest
inspect busytest
# Make sure the home is actually mounted before we try to hold it busy,
# otherwise the subshell below would silently fail to acquire the cwd
# reference.
mountpoint /home/busytest
# Spawn a process whose cwd is inside the home mount. `cd` is a shell
# builtin so the subshell process itself acquires the cwd reference, and
# `exec sleep` then preserves it across the exec.
( cd /home/busytest && exec sleep 10 ) &
local busy_pid=$!
# Wait until the kernel actually reports the cwd of the background
# process as the home directory, so we know the busy reference is in
# place before we attempt to deactivate.
timeout 5 bash -c "until [[ \"\$(readlink /proc/${busy_pid}/cwd 2>/dev/null)\" == /home/busytest ]]; do sleep 0.1; done"
# The deactivate must succeed eventually: the first umount2() will fail
# with EBUSY, but homectl retries the call for up to 30 seconds, by
# which time the background process will have exited and released the
# cwd reference.
homectl deactivate busytest
wait_for_state busytest inactive
wait "$busy_pid" || true
homectl remove busytest
}
run_testcases