fix: now fixed ssh unstable

This commit is contained in:
robcholz
2026-02-06 21:37:55 -05:00
parent 27e5733c93
commit 2c08e299ac

View File

@@ -5,13 +5,56 @@ SSH_USER="__SSH_USER__"
PROJECT_NAME="__PROJECT_NAME__"
KEY_PATH="__KEY_PATH__"
diag() { echo "[vibebox][diag] $*" >&2; }
# Extract default route facts (no hardcode)
default_route_line() { ip -4 route show default 2>/dev/null | head -n 1 || true; }
default_dev() {
default_route_line | awk '{for(i=1;i<=NF;i++) if($i=="dev"){print $(i+1); exit}}'
}
default_gw() {
default_route_line | awk '{for(i=1;i<=NF;i++) if($i=="via"){print $(i+1); exit}}'
}
dump_diag() {
diag "=== default route ==="
default_route_line >&2 || true
diag "=== systemctl status ssh ==="
systemctl status ssh --no-pager >&2 || true
diag "=== journalctl -u ssh (tail) ==="
journalctl -u ssh -n 120 --no-pager >&2 || true
diag "=== sshd -t ==="
sshd -t >&2 || true
diag "=== sshd -T (listen/addressfamily) ==="
sshd -T 2>/dev/null | egrep -i '^(listenaddress|addressfamily|port|permitrootlogin|allowusers)\b' >&2 || true
diag "=== listeners on :22 ==="
ss -lntp 2>/dev/null | awk 'NR==1 || $4 ~ /:22$/' >&2 || true
diag "=== ip -br addr ==="
ip -br addr >&2 || true
diag "=== ip route ==="
ip route >&2 || true
gw="$(default_gw || true)"
if [ -n "$gw" ]; then
diag "=== ping default gateway ($gw) ==="
ping -c1 -W1 "$gw" >/dev/null 2>&1 && diag "ping gw OK" || diag "ping gw FAIL"
fi
}
# 1) tmpfs mount
TARGET="/root/${PROJECT_NAME}/.vibebox"
if [ -d "$TARGET" ] && ! mountpoint -q "$TARGET"; then
mount -t tmpfs tmpfs "$TARGET"
fi
# 2)
# 2) user + authorized_keys
if ! id -u "$SSH_USER" >/dev/null 2>&1; then
useradd -m -s /bin/bash -U "$SSH_USER"
usermod -aG sudo "$SSH_USER" || true
@@ -20,41 +63,71 @@ fi
install -d -m 700 -o "$SSH_USER" -g "$SSH_USER" "/home/${SSH_USER}/.ssh"
install -m 600 -o "$SSH_USER" -g "$SSH_USER" "$KEY_PATH" "/home/${SSH_USER}/.ssh/authorized_keys"
# 3)
systemctl start ssh >/dev/null 2>&1 || true
# 4)
i=0
while :; do
if ss -lnt 2>/dev/null | awk '{print $4}' | grep -qE '(:22)$'; then
break
# 3) start ssh (don't swallow failures)
# If ssh is already active, don't force start/restart.
if ! systemctl is-active --quiet ssh; then
if ! systemctl start ssh; then
diag "systemctl start ssh failed"
dump_diag
exit 1
fi
i=$((i+1))
[ "$i" -ge 40 ] && break # ~4s
sleep 0.1
fi
# 4) obtain stable IPv4 on the default-route interface (wait up to ~30s)
ip_on_dev() {
dev="$1"
ip -4 -o addr show dev "$dev" scope global 2>/dev/null \
| awk '{print $4}' | cut -d/ -f1 | head -n 1 || true
}
ip=""
dev=""
gw=""
t=0
while [ "$t" -lt 60 ]; do
dev="$(default_dev || true)"
gw="$(default_gw || true)"
if [ -n "$dev" ]; then
ip="$(ip_on_dev "$dev")"
else
ip=""
fi
if [ -n "$dev" ] && [ -n "$ip" ]; then
# optional: if a gateway exists, require it to answer to avoid "ip exists but link dead"
if [ -z "$gw" ] || ping -c1 -W1 "$gw" >/dev/null 2>&1; then
break
fi
fi
t=$((t+1))
sleep 0.5
done
echo VIBEBOX_SSH_READY
if [ -z "$dev" ] || [ -z "$ip" ]; then
diag "no stable IPv4 on default route interface"
dump_diag
exit 1
fi
find_ip() {
if command -v ip >/dev/null 2>&1; then
ip -4 -o addr show scope global 2>/dev/null | awk '{print $4}' | cut -d/ -f1 | head -n 1
return
fi
if command -v hostname >/dev/null 2>&1; then
hostname -I 2>/dev/null | awk '{print $1}'
return
fi
# 5) strong verify: ssh must listen externally (0.0.0.0:22 or $ip:22 or [::]:22)
listens_ok() {
ss -lnt 2>/dev/null \
| awk 'NR>1 {print $4}' \
| grep -Eq "^(0\.0\.0\.0:22|\\[::\\]:22|${ip}:22)$"
}
i=0
while :; do
ip="$(find_ip || true)"
if [ -n "$ip" ]; then
echo VIBEBOX_IPV4=$ip
break
fi
while ! listens_ok && [ "$i" -lt 80 ]; do # ~8s
i=$((i+1))
[ "$i" -ge 60 ] && break
sleep 0.5
sleep 0.1
done
if ! listens_ok; then
diag "sshd not listening on 0.0.0.0:22 / ${ip}:22"
dump_diag
exit 1
fi
echo VIBEBOX_SSH_READY
echo "VIBEBOX_IPV4=$ip"