Harden LA publishers and add multi-relay guide
Some checks are pending
ci-gates / checks (push) Waiting to run
deploy-cloudflare / checks (push) Waiting to run
deploy-cloudflare / deploy (push) Blocked by required conditions

This commit is contained in:
Conrad Kramer 2026-06-10 01:28:15 -07:00
parent 5d6f77f868
commit cfc4902016
No known key found for this signature in database
13 changed files with 1430 additions and 402 deletions

View file

@ -452,6 +452,7 @@ in
systemd.tmpfiles.rules =
[
"d /run/every-channel 1777 root root - -"
"d /run/every-channel/source-locks 1777 root root - -"
]
++ lib.optionals cfg.nbc.enable [
"d /var/lib/every-channel 0750 every-channel every-channel - -"
@ -487,6 +488,7 @@ in
pkgs.findutils
pkgs.gawk
pkgs.iproute2
pkgs.util-linux
cfg.package
]
++ lib.optionals (isNbc && cfg.nbc.requireMullvad) [ pkgs.mullvad-vpn ]
@ -580,8 +582,36 @@ in
return "$status"
}
run_source_command() {
local status source_lock_fd
status=0
source_lock_fd=""
if [[ -n "''${source_lock:-}" ]]; then
exec {source_lock_fd}>"$source_lock"
if ! flock -n "$source_lock_fd"; then
echo "ec-node: source already active on this node, skipping duplicate publisher: $source_id" >&2
exec {source_lock_fd}>&-
return 0
fi
fi
set +e
"$@"
status=$?
set -e
if [[ -n "$source_lock_fd" ]]; then
flock -u "$source_lock_fd" 2>/dev/null || true
exec {source_lock_fd}>&-
fi
return "$status"
}
nbc_url=${lib.escapeShellArg nbcUrlStr}
input=""
source_id=""
source_lock=""
if [[ -z "$nbc_url" ]]; then
explicit_input=${lib.escapeShellArg explicitInputStr}
if [[ -n "$explicit_input" ]]; then
@ -676,9 +706,11 @@ in
host="''${hostport%%:*}"
input="http://$host:5004/auto/v$ch"
fi
source_id="$input"
fi
if [[ -n "$nbc_url" ]]; then
source_id="$nbc_url"
cmd=(
${lib.escapeShellArg "${cfg.package}/bin/ec-node"}
nbc-wt-publish
@ -715,6 +747,11 @@ in
''}
${extraArgsLine}
if [[ -n "$source_id" ]]; then
source_key="$(printf '%s' "$source_id" | tr -c 'A-Za-z0-9_.-' '_')"
source_lock="/run/every-channel/source-locks/$source_key.lock"
fi
# Keep the unit alive even if the relay is temporarily unreachable.
# This avoids `switch-to-configuration test` failing due to a unit that exits
# quickly during activation.
@ -726,9 +763,9 @@ in
continue
fi
''}
${lib.optionalString (isNbc && cfg.nbc.isolateWithUserNetns) "run_in_user_netns || true"}
${lib.optionalString (isNbc && cfg.nbc.isolateWithUserNetns) "run_source_command run_in_user_netns || true"}
${lib.optionalString (!isNbc || !cfg.nbc.isolateWithUserNetns) ''
"''${cmd[@]}" || true
run_source_command "''${cmd[@]}" || true
''}
sleep 2
done
@ -763,6 +800,9 @@ in
ExecStart = "${runner}/bin/${unit}";
Restart = "always";
RestartSec = 2;
KillMode = "control-group";
TimeoutStopSec = "10s";
SendSIGKILL = true;
DynamicUser = !isNbc;
User = lib.mkIf isNbc "every-channel";
@ -949,14 +989,24 @@ in
poll_secs="$(awk 'BEGIN { printf "%.3f", ${toString cfg.archive.pollIntervalMs} / 1000.0 }')"
cleanup_children() {
pids=()
for pid_file in "$pids_dir"/*.pid; do
[[ -e "$pid_file" ]] || continue
pid="$(cat "$pid_file" 2>/dev/null || true)"
if [[ -n "$pid" ]]; then
kill "$pid" 2>/dev/null || true
if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
pids+=("$pid")
fi
rm -f "$pid_file"
done
if [[ "''${#pids[@]}" -gt 0 ]]; then
kill -TERM "''${pids[@]}" 2>/dev/null || true
sleep 1
for pid in "''${pids[@]}"; do
if kill -0 "$pid" 2>/dev/null; then
kill -KILL "$pid" 2>/dev/null || true
fi
done
fi
}
trap cleanup_children INT TERM EXIT
@ -970,7 +1020,7 @@ in
while IFS= read -r entry; do
name="$(printf '%s\n' "$entry" | jq -r '.broadcast_name // empty')"
relay="$(printf '%s\n' "$entry" | jq -r '.relay_url // empty')"
relay="$(printf '%s\n' "$entry" | jq -r '(.relay_url // .relays[0].relay_url // empty)')"
if [[ -z "$name" ]]; then
continue
fi
@ -1039,6 +1089,9 @@ in
ExecStart = "${archiveRunner}/bin/${archiveUnit}";
Restart = "always";
RestartSec = 2;
KillMode = "control-group";
TimeoutStopSec = "10s";
SendSIGKILL = true;
NoNewPrivileges = true;
PrivateTmp = true;