aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorLibravatar sommerfeld <sommerfeld@sommerfeld.dev>2026-05-29 11:18:15 +0100
committerLibravatar sommerfeld <sommerfeld@sommerfeld.dev>2026-05-29 11:18:15 +0100
commit3848d890979bd5fafae92054f85061edf10edff3 (patch)
treec2bdac9348f2505246b9b4edc6ad32a88b5090a2
parent6e0c5c33438e5e898bd075c33a45b3abf9d1b26b (diff)
downloaddotfiles-3848d890979bd5fafae92054f85061edf10edff3.tar.gz
dotfiles-3848d890979bd5fafae92054f85061edf10edff3.tar.bz2
dotfiles-3848d890979bd5fafae92054f85061edf10edff3.zip
fix(suspend): make zellij inhibit watcher resilient to local-only sessions
The previous watcher exited immediately whenever no SSH-spawned zellij was present. That caused a start-rate-limit storm: .path triggers service (zellij dir non-empty) -> watcher exits because no SSH zellij -> service stops -> .path retriggers (zellij dir still non-empty) -> ... 5 starts in 10s, systemd stops the path unit -> no inhibitor ever again, even after you SSH in Restructure so the watcher stays alive for the entire zellij socket directory lifetime and acquires/releases its own systemd-inhibit lock dynamically based on SSH-zellij presence: * Watcher now polls and exits only when the zellij socket dir is empty, matching the .path's trigger condition so it never re-fires while zellij is alive. * systemd-inhibit removed from ExecStart - watcher self-inhibits via a child 'systemd-inhibit ... sleep infinity' it can terminate on demand. * StartLimitIntervalSec=0 on the service as belt-and-braces against any future regression of the cycle. Recovery from the rate-limit hit: systemctl --user reset-failed zellij-inhibit-suspend.service zellij-inhibit-suspend.path systemctl --user daemon-reload systemctl --user restart zellij-inhibit-suspend.path
-rw-r--r--dot_config/systemd/user/zellij-inhibit-suspend.service18
-rwxr-xr-xdot_local/bin/executable_zellij-inhibit-watcher56
2 files changed, 59 insertions, 15 deletions
diff --git a/dot_config/systemd/user/zellij-inhibit-suspend.service b/dot_config/systemd/user/zellij-inhibit-suspend.service
index ed15fff..7c73c64 100644
--- a/dot_config/systemd/user/zellij-inhibit-suspend.service
+++ b/dot_config/systemd/user/zellij-inhibit-suspend.service
@@ -1,19 +1,21 @@
[Unit]
-Description=Hold a systemd-inhibit lock while SSH-spawned zellij sessions exist
+Description=Stay alive while any zellij session exists; inhibit suspend if SSH-spawned
Documentation=man:systemd-inhibit(1) man:zellij(1)
# Independent of any graphical session: this is meant to run on
# headless SSH-attached hosts too. The watcher itself decides whether
# the current zellij activity warrants inhibiting (SSH-spawned only),
-# so a local zellij session won't keep the laptop awake.
+# and acquires/releases its own systemd-inhibit lock dynamically. It
+# stays alive for the whole zellij dir lifetime so the .path unit does
+# not retrigger us in a busy loop when only local zellij sessions are
+# active.
+# Disable systemd's default start-rate limiter: even though the
+# refactored watcher should not cycle anymore, a zero rate-limit makes
+# this unit resilient if the user kills it manually.
+StartLimitIntervalSec=0
[Service]
Type=simple
-ExecStart=systemd-inhibit \
- --what=sleep:idle:handle-lid-switch \
- --who=zellij \
- --why=Active\x20zellij\x20sessions \
- --mode=block \
- %h/.local/bin/zellij-inhibit-watcher
+ExecStart=%h/.local/bin/zellij-inhibit-watcher
# Don't auto-restart: the .path unit reactivates us on the next session.
Restart=no
diff --git a/dot_local/bin/executable_zellij-inhibit-watcher b/dot_local/bin/executable_zellij-inhibit-watcher
index 6af7032..7537b36 100755
--- a/dot_local/bin/executable_zellij-inhibit-watcher
+++ b/dot_local/bin/executable_zellij-inhibit-watcher
@@ -1,6 +1,7 @@
#!/bin/sh
-# Block while at least one zellij server process was spawned from an
-# SSH context, exit cleanly once none remain.
+# Stay alive while any zellij session exists; hold a systemd-inhibit
+# lock only while at least one of those zellij sessions was spawned from
+# an SSH context.
#
# Rationale: a zellij session started locally (e.g. from a sway terminal)
# is the user actively sitting in front of the laptop — that should NOT
@@ -14,13 +15,17 @@
# after the original SSH session is gone. So an "ssh-spawned" zellij is
# one whose environ contains SSH_CONNECTION=.
#
-# This script is the ExecStart payload of zellij-inhibit-suspend.service,
-# which wraps it in systemd-inhibit. When this script exits, the lock is
-# released. The .path unit re-fires the service on the next zellij socket
-# transition.
+# Lifecycle: the .path unit starts this script when the zellij socket
+# directory becomes non-empty. The script then polls and stays alive as
+# long as any zellij socket exists, so the .path unit never re-triggers
+# the service while zellij is up (which previously caused a start-rate
+# limit storm when only local zellij was around). When the last zellij
+# exits, this script exits, the service stops, and the .path resumes
+# watching for the next session.
set -eu
poll=${ZELLIJ_INHIBIT_POLL:-15}
+sock_dir="${XDG_RUNTIME_DIR:-/run/user/$(id -u)}/zellij"
has_ssh_zellij() {
pids=$(pgrep -x zellij 2>/dev/null) || return 1
@@ -34,6 +39,43 @@ has_ssh_zellij() {
return 1
}
-while has_ssh_zellij; do
+any_zellij_socket() {
+ [ -d "$sock_dir" ] || return 1
+ found=$(ls -A "$sock_dir" 2>/dev/null || true)
+ [ -n "$found" ]
+}
+
+inhibit_pid=
+release_inhibit() {
+ pid=$inhibit_pid
+ inhibit_pid=
+ [ -n "$pid" ] || return 0
+ kill "$pid" 2>/dev/null || true
+ wait "$pid" 2>/dev/null || true
+}
+trap release_inhibit EXIT INT TERM
+
+inhibit_alive() {
+ [ -n "$inhibit_pid" ] || return 1
+ kill -0 "$inhibit_pid" 2>/dev/null
+}
+
+acquire_inhibit() {
+ if inhibit_alive; then return 0; fi
+ systemd-inhibit \
+ --what=sleep:idle:handle-lid-switch \
+ --who=zellij \
+ --why='Active SSH-spawned zellij sessions' \
+ --mode=block \
+ sleep infinity &
+ inhibit_pid=$!
+}
+
+while any_zellij_socket; do
+ if has_ssh_zellij; then
+ acquire_inhibit
+ else
+ release_inhibit
+ fi
sleep "$poll"
done