From 2c08e299acfae5029f59dafbbf12419cffd13b89 Mon Sep 17 00:00:00 2001 From: robcholz <84130577+robcholz@users.noreply.github.com> Date: Fri, 6 Feb 2026 21:37:55 -0500 Subject: [PATCH] fix: now fixed ssh unstable --- src/ssh.sh | 133 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 103 insertions(+), 30 deletions(-) diff --git a/src/ssh.sh b/src/ssh.sh index 8f8c131..4fb9c26 100644 --- a/src/ssh.sh +++ b/src/ssh.sh @@ -5,13 +5,56 @@ SSH_USER="__SSH_USER__" PROJECT_NAME="__PROJECT_NAME__" KEY_PATH="__KEY_PATH__" +diag() { echo "[vibebox][diag] $*" >&2; } + +# Extract default route facts (no hardcode) +default_route_line() { ip -4 route show default 2>/dev/null | head -n 1 || true; } +default_dev() { + default_route_line | awk '{for(i=1;i<=NF;i++) if($i=="dev"){print $(i+1); exit}}' +} +default_gw() { + default_route_line | awk '{for(i=1;i<=NF;i++) if($i=="via"){print $(i+1); exit}}' +} + +dump_diag() { + diag "=== default route ===" + default_route_line >&2 || true + + diag "=== systemctl status ssh ===" + systemctl status ssh --no-pager >&2 || true + + diag "=== journalctl -u ssh (tail) ===" + journalctl -u ssh -n 120 --no-pager >&2 || true + + diag "=== sshd -t ===" + sshd -t >&2 || true + + diag "=== sshd -T (listen/addressfamily) ===" + sshd -T 2>/dev/null | egrep -i '^(listenaddress|addressfamily|port|permitrootlogin|allowusers)\b' >&2 || true + + diag "=== listeners on :22 ===" + ss -lntp 2>/dev/null | awk 'NR==1 || $4 ~ /:22$/' >&2 || true + + diag "=== ip -br addr ===" + ip -br addr >&2 || true + + diag "=== ip route ===" + ip route >&2 || true + + gw="$(default_gw || true)" + if [ -n "$gw" ]; then + diag "=== ping default gateway ($gw) ===" + ping -c1 -W1 "$gw" >/dev/null 2>&1 && diag "ping gw OK" || diag "ping gw FAIL" + fi +} + # 1) tmpfs mount TARGET="/root/${PROJECT_NAME}/.vibebox" if [ -d "$TARGET" ] && ! mountpoint -q "$TARGET"; then mount -t tmpfs tmpfs "$TARGET" fi -# 2) +# 2) user + authorized_keys if ! id -u "$SSH_USER" >/dev/null 2>&1; then useradd -m -s /bin/bash -U "$SSH_USER" usermod -aG sudo "$SSH_USER" || true @@ -20,41 +63,71 @@ fi install -d -m 700 -o "$SSH_USER" -g "$SSH_USER" "/home/${SSH_USER}/.ssh" install -m 600 -o "$SSH_USER" -g "$SSH_USER" "$KEY_PATH" "/home/${SSH_USER}/.ssh/authorized_keys" -# 3) -systemctl start ssh >/dev/null 2>&1 || true - -# 4) -i=0 -while :; do - if ss -lnt 2>/dev/null | awk '{print $4}' | grep -qE '(:22)$'; then - break +# 3) start ssh (don't swallow failures) +# If ssh is already active, don't force start/restart. +if ! systemctl is-active --quiet ssh; then + if ! systemctl start ssh; then + diag "systemctl start ssh failed" + dump_diag + exit 1 fi - i=$((i+1)) - [ "$i" -ge 40 ] && break # ~4s - sleep 0.1 +fi + +# 4) obtain stable IPv4 on the default-route interface (wait up to ~30s) +ip_on_dev() { + dev="$1" + ip -4 -o addr show dev "$dev" scope global 2>/dev/null \ + | awk '{print $4}' | cut -d/ -f1 | head -n 1 || true +} + +ip="" +dev="" +gw="" +t=0 +while [ "$t" -lt 60 ]; do + dev="$(default_dev || true)" + gw="$(default_gw || true)" + if [ -n "$dev" ]; then + ip="$(ip_on_dev "$dev")" + else + ip="" + fi + + if [ -n "$dev" ] && [ -n "$ip" ]; then + # optional: if a gateway exists, require it to answer to avoid "ip exists but link dead" + if [ -z "$gw" ] || ping -c1 -W1 "$gw" >/dev/null 2>&1; then + break + fi + fi + + t=$((t+1)) + sleep 0.5 done -echo VIBEBOX_SSH_READY +if [ -z "$dev" ] || [ -z "$ip" ]; then + diag "no stable IPv4 on default route interface" + dump_diag + exit 1 +fi -find_ip() { - if command -v ip >/dev/null 2>&1; then - ip -4 -o addr show scope global 2>/dev/null | awk '{print $4}' | cut -d/ -f1 | head -n 1 - return - fi - if command -v hostname >/dev/null 2>&1; then - hostname -I 2>/dev/null | awk '{print $1}' - return - fi +# 5) strong verify: ssh must listen externally (0.0.0.0:22 or $ip:22 or [::]:22) +listens_ok() { + ss -lnt 2>/dev/null \ + | awk 'NR>1 {print $4}' \ + | grep -Eq "^(0\.0\.0\.0:22|\\[::\\]:22|${ip}:22)$" } i=0 -while :; do - ip="$(find_ip || true)" - if [ -n "$ip" ]; then - echo VIBEBOX_IPV4=$ip - break - fi +while ! listens_ok && [ "$i" -lt 80 ]; do # ~8s i=$((i+1)) - [ "$i" -ge 60 ] && break - sleep 0.5 + sleep 0.1 done + +if ! listens_ok; then + diag "sshd not listening on 0.0.0.0:22 / ${ip}:22" + dump_diag + exit 1 +fi + +echo VIBEBOX_SSH_READY +echo "VIBEBOX_IPV4=$ip"