name: Cluster Health Monitor on: schedule: - cron: '0 * * * *' workflow_dispatch: jobs: health-check: name: Monitor Cluster Endpoints runs-on: ubuntu-latest steps: - name: Check endpoints run: | # Simple and clean endpoint checker ENDPOINTS="${{ secrets.CLUSTER_ENDPOINTS }}" # Validate input if [[ -z "$ENDPOINTS" ]]; then echo "❌ No endpoints found in CLUSTER_ENDPOINTS secret" exit 1 fi # Initialize counters total=0 failed=0 # Process each endpoint while IFS= read -r url; do # Skip empty lines and comments [[ -z "$url" || "$url" =~ ^[[:space:]]*# ]] && continue url=$(echo "$url" | xargs) [[ -z "$url" ]] && continue total=$((total + 1)) echo "Testing: $url" # Get HTTP status code status=$(curl -sfLI --max-time 10 -o /dev/null -w "%{http_code}" "$url" 2>&1 || echo "000") # Accept 2xx, 3xx, 401, and 403 as healthy if [[ "$status" =~ ^(2|3|401|403) ]]; then echo "✅ OK (HTTP $status)" else echo "❌ FAILED (HTTP $status)" failed=$((failed + 1)) fi echo "" done <<< "$ENDPOINTS" # Summary echo "📊 Summary: $((total - failed))/$total endpoints OK" # Exit with error if any failed if [[ $failed -gt 0 ]]; then echo "💥 $failed endpoint(s) failed!" exit 1 else echo "🎉 All endpoints healthy!" fi