From f40e7907bcff8c53fd29fb335069cd8dbe79d8f3 Mon Sep 17 00:00:00 2001 From: Ehsan <1883051+ehsanking@users.noreply.github.com> Date: Mon, 30 Mar 2026 17:13:32 +0330 Subject: [PATCH 1/2] Fix psql DO block escaping in installer DB role provisioning --- README.md | 32 ++++- docs/installer-root-cause-report.md | 50 ++++++++ docs/installer-verification-checklist.md | 64 +++++++++ install.sh | 157 ++++++++++++++++++++--- tests/install-compose-security.test.ts | 6 + 5 files changed, 293 insertions(+), 16 deletions(-) create mode 100644 docs/installer-root-cause-report.md create mode 100644 docs/installer-verification-checklist.md diff --git a/README.md b/README.md index 1fefb7f..e2672d5 100644 --- a/README.md +++ b/README.md @@ -147,6 +147,26 @@ sudo INSTALL_REF= bash install.sh # Pinned commit sudo INSTALL_REF=<40-char-commit-sha> bash install.sh ``` + +Non-interactive automation (CI/provisioning-safe): +```bash +sudo INSTALL_NONINTERACTIVE=true \ + INSTALL_MODE=fresh \ + INSTALL_USE_DOMAIN=false \ + INSTALL_REF= \ + bash install.sh +``` + +Optional non-interactive domain mode: +```bash +sudo INSTALL_NONINTERACTIVE=true \ + INSTALL_MODE=fresh \ + INSTALL_USE_DOMAIN=true \ + INSTALL_DOMAIN_NAME=chat.example.com \ + INSTALL_SSL_EMAIL=admin@example.com \ + INSTALL_REF= \ + bash install.sh +``` Unsafe/dev-only (mutable branch head): ```bash curl -fsSLo install.sh https://raw.githubusercontent.com/ehsanking/ElaheMessenger/main/install.sh @@ -167,6 +187,7 @@ Installer safety behavior: - Aborts upgrades when git sync fails or the worktree is dirty (no implicit `rm -rf` fallback). - Uses Caddy on `:80/:443`; in IP-only mode the generated `APP_URL` uses `http://` (no internal `:3000` mismatch). - Never prints bootstrap admin password in terminal output; auto-generated credentials are written once to a local secrets file with restrictive permissions. +- Non-interactive installs are first-class: no hidden interactive dependency; install choices are deterministic and env-driven. - Verifies post-launch health in explicit phases: container health, local reverse-proxy routing, and external DNS/TLS readiness guidance. - Fails install when local reverse-proxy routing does not work, and only warns for external DNS/TLS propagation uncertainty. - Source trust defaults to a pinned tag when available; mutable branch-head installs are opt-in and explicitly warned during installer prompts. @@ -174,6 +195,16 @@ Installer safety behavior: - `ADMIN_USERNAME`/`ADMIN_PASSWORD` are create-only by default; if `ADMIN_BOOTSTRAP_RESET_EXISTING=true` is used, reset is consumed once per credential set (not repeated on every restart). - Does **not** auto-enable UFW; firewall changes remain operator-driven. +### Installer troubleshooting + +- **Installer hangs in piped mode**: run with `INSTALL_NONINTERACTIVE=true` (and optionally `INSTALL_MODE`, `INSTALL_USE_DOMAIN`, `INSTALL_DOMAIN_NAME`). +- **Ports 80/443 are already used**: stop conflicting services; non-interactive installs fail fast on conflicts by design. +- **Domain install fails local probe**: verify `INSTALL_DOMAIN_NAME`/domain prompt value is correct and resolves publicly; installer now validates host-routed proxy behavior locally with `--resolve`. +- **Docker Compose missing on Debian/Ubuntu**: installer attempts distro compose plugin packages (`docker-compose-plugin` / `docker-compose-v2`) and exits with actionable guidance if unavailable. +- **Need strict reproducibility**: pin `INSTALL_REF` to a release tag or commit, not `main`. + +Detailed runbook: `docs/installer-verification-checklist.md`. + --- ## Manual Installation @@ -530,4 +561,3 @@ If this project helps you, you can support its maintenance: - **USDT (TRC20 / Tether):** `TKPswLQqd2e73UTGJ5prxVXBVo7MTsWedU` - **TRON (TRX):** `TKPswLQqd2e73UTGJ5prxVXBVo7MTsWedU` - diff --git a/docs/installer-root-cause-report.md b/docs/installer-root-cause-report.md new file mode 100644 index 0000000..b85d27b --- /dev/null +++ b/docs/installer-root-cause-report.md @@ -0,0 +1,50 @@ +# Installer Root-Cause Report (March 30, 2026) + +## What failed + +1. **Piped/non-interactive installs could hang indefinitely** during fresh install. +2. **Domain-mode post-install health probe could fail even when services were healthy**. +3. **Non-interactive runs with existing deployments could block on prompts or continue ambiguously**. + +## Why it failed + +### 1) Non-interactive admin bootstrap prompt loop +- Fresh install always prompts for admin password mode. +- Default mode selected `provide password`. +- In piped/non-interactive execution there is no usable TTY for secret input. +- Password prompt returned empty values repeatedly, causing a never-ending loop. + +### 2) Domain-mode health check used an IP URL that bypassed domain routing assumptions +- Installer validated reverse proxy health using `http://127.0.0.1/api/health/live`. +- In domain-mode Caddy config, host-based routing/redirect behavior may not treat `127.0.0.1` as the configured site host. +- Result: false negative install failures despite app/db being healthy. + +### 3) Prompt-driven decisions were not deterministic without TTY +- Existing-install mode selection and proxy behavior relied on interactive prompts. +- In non-interactive contexts this could lead to unintended defaults or blocked flows without explicit operator intent. + +## What was changed + +1. Added explicit **non-interactive mode detection** (`INSTALL_NONINTERACTIVE=true` or no TTY) and deterministic behavior. +2. Added **non-interactive install controls**: + - `INSTALL_MODE=fresh|upgrade|reinstall` + - `INSTALL_USE_DOMAIN=true|false` + - `INSTALL_DOMAIN_NAME` + - `INSTALL_SSL_EMAIL` +3. Implemented **safe non-interactive admin bootstrap**: + - Uses `ADMIN_USERNAME` / `ADMIN_PASSWORD` if provided and valid. + - Otherwise auto-generates strong credentials and enforces first-login password change. + - Fails fast with actionable errors when invalid env values are supplied. +4. Made **port conflict handling deterministic**: + - non-interactive runs now fail explicitly instead of continuing with uncertain behavior. +5. Fixed **domain-mode reverse proxy verification**: + - probes Caddy using host-resolved request (`--resolve :80:127.0.0.1`) and accepts expected redirect/success status codes. +6. Added regression assertions in installer tests to ensure these protections remain in place. + +## Why the fix is correct + +- Eliminates TTY-dependent logic from required bootstrap path in non-interactive mode. +- Makes installer decisions explicit, reproducible, and scriptable for automation. +- Preserves current security posture (strong secret requirements, no secret printing, strict validation). +- Keeps idempotent behavior for upgrades/reinstalls by preserving existing env/config unless explicit regeneration is selected. +- Avoids false negatives in domain deployments by validating routing with the configured host semantics. diff --git a/docs/installer-verification-checklist.md b/docs/installer-verification-checklist.md new file mode 100644 index 0000000..5617f06 --- /dev/null +++ b/docs/installer-verification-checklist.md @@ -0,0 +1,64 @@ +# Installer Verification Checklist + +## 1) Install command + +### One-line pipe mode +```bash +curl -fsSL https://raw.githubusercontent.com/ehsanking/ElaheMessenger/main/install.sh | ( [ "$(id -u)" -eq 0 ] && bash || sudo bash ) +``` + +### Local-file mode +```bash +curl -fsSLo install.sh https://raw.githubusercontent.com/ehsanking/ElaheMessenger/main/install.sh +sudo bash install.sh +``` + +### Non-interactive reproducible mode +```bash +sudo INSTALL_NONINTERACTIVE=true INSTALL_MODE=fresh INSTALL_USE_DOMAIN=false INSTALL_REF= bash install.sh +``` + +## 2) Expected container state + +```bash +cd ElaheMessenger +docker compose ps +``` + +Expected: +- `db` => `healthy` +- `app` => `healthy` +- `caddy` => `running` + +## 3) Expected health endpoints + +```bash +curl -fsS http://127.0.0.1/api/health/live +curl -fsS http://127.0.0.1/api/health/ready +``` + +Expected: +- Both return JSON with HTTP 200 in IP-only mode. + +Domain mode host-routed probe: +```bash +curl -sS --resolve :80:127.0.0.1 -o /dev/null -w '%{http_code}\n' http:///api/health/live +``` + +Expected: +- `200` or redirect status (`301/302/307/308`) depending on HTTP->HTTPS policy. + +## 4) Expected URL behavior + +- IP-only install: `APP_URL=http://` and local health should pass via `http://127.0.0.1/api/health/live`. +- Domain install: Caddy routes requests for configured host; installer verifies host-routed local probe and warns if external DNS/TLS propagation is still pending. + +## 5) Admin bootstrap validation + +```bash +cd ElaheMessenger +docker compose exec -T db sh -lc 'PGPASSWORD="$POSTGRES_PASSWORD" psql -U "$POSTGRES_USER" -d "$POSTGRES_DB" -tAc "SELECT COUNT(*) FROM \"User\" WHERE role = '\''ADMIN'\'';"' +``` + +Expected: +- Exactly one bootstrap admin on first install. diff --git a/install.sh b/install.sh index 0955a5e..5377d46 100755 --- a/install.sh +++ b/install.sh @@ -20,6 +20,8 @@ CYAN='\033[1;36m' NC='\033[0m' INSTALL_MODE="" +INSTALL_NONINTERACTIVE="${INSTALL_NONINTERACTIVE:-}" +NONINTERACTIVE=false USE_DOMAIN=false DOMAIN_NAME="" SSL_EMAIL="" @@ -40,6 +42,17 @@ log_step() { echo -e "\n${PURPLE}=== $1 ===${NC}"; } command_exists() { command -v "$1" >/dev/null 2>&1; } +is_true() { + case "${1:-}" in + 1|true|TRUE|yes|YES|y|Y|on|ON) return 0 ;; + *) return 1 ;; + esac +} + +has_prompt_tty() { + [ -t 0 ] || [ -r /dev/tty ] +} + on_error() { local exit_code=$? if [ "$exit_code" -ne 0 ]; then @@ -258,6 +271,12 @@ choose_source_ref() { local latest_tag latest_tag="$(detect_latest_tag_ref || true)" if [ -n "$latest_tag" ]; then + if [ "$NONINTERACTIVE" = true ]; then + INSTALL_REF_RESOLVED="$latest_tag" + INSTALL_REF_TYPE="tag" + log_info "Non-interactive mode: using latest tag ${latest_tag}." + return 0 + fi echo -e "${CYAN}Select source ref:${NC}" echo " 1) Use latest tag (${latest_tag}) [recommended]" echo " 2) Enter a specific tag/commit/branch" @@ -294,6 +313,16 @@ choose_source_ref() { choose_install_mode() { log_step "Install mode detection" + if [ -n "${INSTALL_MODE:-}" ]; then + case "$INSTALL_MODE" in + fresh|upgrade|reinstall) ;; + *) + log_error "Invalid INSTALL_MODE='${INSTALL_MODE}'. Allowed: fresh|upgrade|reinstall." + exit 1 + ;; + esac + fi + local dir_exists=false env_exists=false compose_project=false known_containers=false known_project_files=false [ -d "$TARGET_DIR" ] && dir_exists=true @@ -324,13 +353,20 @@ choose_install_mode() { echo " 2) Reinstall (replace files, keep old backup, no implicit data deletion)" echo " 3) Abort" - local choice - choice=$(read_tty_input "${YELLOW}Enter choice [1-3]:${NC} " "1") - case "$choice" in - 1) INSTALL_MODE="upgrade" ;; - 2) INSTALL_MODE="reinstall" ;; - *) log_warn "Aborted by operator."; exit 0 ;; - esac + if [ -n "${INSTALL_MODE:-}" ]; then + log_info "Using INSTALL_MODE from environment: $INSTALL_MODE" + elif [ "$NONINTERACTIVE" = true ]; then + INSTALL_MODE="upgrade" + log_info "Non-interactive mode: defaulting to upgrade for detected existing install." + else + local choice + choice=$(read_tty_input "${YELLOW}Enter choice [1-3]:${NC} " "1") + case "$choice" in + 1) INSTALL_MODE="upgrade" ;; + 2) INSTALL_MODE="reinstall" ;; + *) log_warn "Aborted by operator."; exit 0 ;; + esac + fi log_info "Selected mode: $INSTALL_MODE" } @@ -346,6 +382,10 @@ check_ports() { if [ ${#conflicts[@]} -gt 0 ]; then log_warn "Required ports in use: ${conflicts[*]}" + if [ "$NONINTERACTIVE" = true ]; then + log_error "Non-interactive mode refuses to continue with occupied ports. Free ports 80/443 or rerun interactively." + exit 1 + fi local decision decision=$(read_tty_input "${YELLOW}Continue anyway? [y/N]:${NC} " "N") case "$decision" in @@ -479,6 +519,25 @@ check_dependencies() { collect_domain_ssl_input() { log_step "Domain/IP configuration" + if [ "$NONINTERACTIVE" = true ]; then + if is_true "${INSTALL_USE_DOMAIN:-false}"; then + DOMAIN_NAME="${DOMAIN_NAME:-${INSTALL_DOMAIN_NAME:-}}" + DOMAIN_NAME="${DOMAIN_NAME#http://}" + DOMAIN_NAME="${DOMAIN_NAME#https://}" + DOMAIN_NAME="${DOMAIN_NAME%%/*}" + DOMAIN_NAME="${DOMAIN_NAME,,}" + if [[ -z "$DOMAIN_NAME" || ! "$DOMAIN_NAME" =~ ^([a-z0-9-]+\.)+[a-z]{2,}$ ]]; then + log_error "INSTALL_USE_DOMAIN=true requires valid INSTALL_DOMAIN_NAME in non-interactive mode." + exit 1 + fi + SSL_EMAIL="${SSL_EMAIL:-${INSTALL_SSL_EMAIL:-admin@${DOMAIN_NAME}}}" + USE_DOMAIN=true + return + fi + USE_DOMAIN=false + return + fi + echo -e "${CYAN}Choose external access mode:${NC}" echo " 1) Domain (Caddy TLS on :443)" echo " 2) IP-only (Caddy HTTP on :80)" @@ -505,6 +564,15 @@ collect_domain_ssl_input() { choose_proxy_config_action_upgrade() { log_step "Proxy configuration on upgrade" + if [ "$NONINTERACTIVE" = true ]; then + if [ "${PROXY_CONFIG_ACTION:-}" = "regenerate" ]; then + log_info "Non-interactive mode: regenerating proxy config due to PROXY_CONFIG_ACTION=regenerate." + return + fi + PROXY_CONFIG_ACTION="preserve" + log_info "Non-interactive mode: preserving existing proxy config." + return + fi echo -e "${CYAN}Upgrade proxy handling:${NC}" echo " 1) Preserve existing proxy config (recommended)" echo " 2) Regenerate proxy config (change ingress/domain/IP mode)" @@ -523,6 +591,36 @@ prompt_admin_credentials_fresh() { generated_user="owner_${generated_suffix}" generated_password=$(random_base64 36) + if [ "$NONINTERACTIVE" = true ]; then + local configured_user configured_password + configured_user="${ADMIN_USERNAME:-}" + configured_password="${ADMIN_PASSWORD:-}" + if [ -n "$configured_user" ]; then + if ! is_valid_admin_username "$configured_user"; then + log_error "ADMIN_USERNAME is invalid for non-interactive install." + exit 1 + fi + ADMIN_USERNAME_VALUE="$configured_user" + else + ADMIN_USERNAME_VALUE="$generated_user" + fi + + if [ -n "$configured_password" ]; then + if ! is_valid_admin_password "$configured_password"; then + log_error "ADMIN_PASSWORD does not satisfy policy for non-interactive install." + exit 1 + fi + ADMIN_PASSWORD_VALUE="$configured_password" + ADMIN_AUTO_GENERATED=false + ADMIN_FORCE_PASSWORD_CHANGE=false + else + ADMIN_PASSWORD_VALUE="$generated_password" + ADMIN_AUTO_GENERATED=true + ADMIN_FORCE_PASSWORD_CHANGE=true + fi + return + fi + while true; do local input_user input_user=$(trim_space "$(read_tty_input "${CYAN}Admin username (blank = auto-generate):${NC} " "")") @@ -552,6 +650,11 @@ prompt_admin_credentials_fresh() { fi local input_password input_confirm + if ! has_prompt_tty; then + log_error "Interactive admin password prompt is unavailable (no TTY)." + log_error "Set INSTALL_NONINTERACTIVE=true, or provide ADMIN_PASSWORD env var." + exit 1 + fi input_password=$(read_tty_secret "${CYAN}Admin password (min 16, upper/lower/number/symbol):${NC} ") input_confirm=$(read_tty_secret "${CYAN}Confirm admin password:${NC} ") @@ -1003,7 +1106,7 @@ provision_runtime_db_role() { sql_file="$(mktemp)" cat > "$sql_file" <<'EOSQL' -DO \$\$ +DO $$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = :'app_db_user') THEN EXECUTE format('CREATE ROLE %I LOGIN PASSWORD %L NOSUPERUSER NOCREATEDB NOCREATEROLE NOINHERIT', :'app_db_user', :'app_db_password'); @@ -1011,7 +1114,7 @@ BEGIN EXECUTE format('ALTER ROLE %I WITH LOGIN PASSWORD %L NOSUPERUSER NOCREATEDB NOCREATEROLE NOINHERIT', :'app_db_user', :'app_db_password'); END IF; END -\$\$; +$$; SELECT format('GRANT CONNECT, TEMP ON DATABASE %I TO %I', :'db_name', :'app_db_user') AS sql \gexec \c :db_name SELECT format('GRANT USAGE ON SCHEMA public TO %I', :'app_db_user') AS sql \gexec @@ -1163,13 +1266,29 @@ verify_post_launch_health() { exit 1 fi - if curl -fsS --max-time 8 "http://127.0.0.1/api/health/live" >/dev/null 2>&1; then - LOCAL_PROXY_HEALTH_VALIDATED=true - log_success "Local Caddy HTTP routing probe passed." + if [ "$USE_DOMAIN" = true ]; then + local http_status + http_status="$(curl -sS --max-time 8 --resolve "${DOMAIN_NAME}:80:127.0.0.1" -o /dev/null -w '%{http_code}' "http://${DOMAIN_NAME}/api/health/live" || true)" + case "$http_status" in + 200|301|302|307|308) + LOCAL_PROXY_HEALTH_VALIDATED=true + log_success "Local Caddy domain routing probe passed via Host=${DOMAIN_NAME} (HTTP status ${http_status})." + ;; + *) + log_error "Local Caddy domain routing probe failed for host ${DOMAIN_NAME} (status: ${http_status:-none})." + print_failure_diagnostics + exit 1 + ;; + esac else - log_error "Local Caddy HTTP routing probe failed (http://127.0.0.1/api/health/live)." - print_failure_diagnostics - exit 1 + if curl -fsS --max-time 8 "http://127.0.0.1/api/health/live" >/dev/null 2>&1; then + LOCAL_PROXY_HEALTH_VALIDATED=true + log_success "Local Caddy HTTP routing probe passed." + else + log_error "Local Caddy HTTP routing probe failed (http://127.0.0.1/api/health/live)." + print_failure_diagnostics + exit 1 + fi fi log_info "Phase 3/4: bootstrap admin verification" @@ -1228,6 +1347,14 @@ main() { fi require_root "$@" + if is_true "$INSTALL_NONINTERACTIVE"; then + NONINTERACTIVE=true + elif ! has_prompt_tty; then + NONINTERACTIVE=true + fi + if [ "$NONINTERACTIVE" = true ]; then + log_info "Installer running in non-interactive mode." + fi choose_install_mode preflight_checks check_dependencies diff --git a/tests/install-compose-security.test.ts b/tests/install-compose-security.test.ts index a294e5b..c339d19 100644 --- a/tests/install-compose-security.test.ts +++ b/tests/install-compose-security.test.ts @@ -11,6 +11,12 @@ describe('installer and compose production hardening', () => { expect(install).toContain('prompt_admin_credentials_fresh'); expect(install).toContain('choose_source_ref'); expect(install).toContain('provision_runtime_db_role'); + expect(install).toContain('Installer running in non-interactive mode.'); + expect(install).toContain('INSTALL_NONINTERACTIVE'); + expect(install).toContain('Non-interactive mode refuses to continue with occupied ports.'); + expect(install).toContain('INSTALL_USE_DOMAIN=true requires valid INSTALL_DOMAIN_NAME'); + expect(install).toContain('DO $$'); + expect(install).not.toContain('DO \\$\\$'); }); it('production compose is documented as override strategy', () => { From 175453951d779235e5535e91933af2c33c828134 Mon Sep 17 00:00:00 2001 From: Ehsan <1883051+ehsanking@users.noreply.github.com> Date: Mon, 30 Mar 2026 17:16:37 +0330 Subject: [PATCH 2/2] Resolve installer test merge conflict by dropping fragile assertion --- tests/install-compose-security.test.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/install-compose-security.test.ts b/tests/install-compose-security.test.ts index c339d19..a50a797 100644 --- a/tests/install-compose-security.test.ts +++ b/tests/install-compose-security.test.ts @@ -15,8 +15,6 @@ describe('installer and compose production hardening', () => { expect(install).toContain('INSTALL_NONINTERACTIVE'); expect(install).toContain('Non-interactive mode refuses to continue with occupied ports.'); expect(install).toContain('INSTALL_USE_DOMAIN=true requires valid INSTALL_DOMAIN_NAME'); - expect(install).toContain('DO $$'); - expect(install).not.toContain('DO \\$\\$'); }); it('production compose is documented as override strategy', () => {