Files
dev-cluster/.github/workflows/health_monitor.yaml

61 lines
1.6 KiB
YAML

name: Cluster Health Status
on:
schedule:
- cron: '0 * * * *'
push:
paths:
- .github/workflows/*
workflow_dispatch:
jobs:
health-check:
name: Monitor Cluster Endpoints
runs-on: ubuntu-latest
steps:
- name: Check endpoints
run: |
ENDPOINTS="${{ secrets.CLUSTER_ENDPOINTS }}"
# Validate input
if [[ -z "$ENDPOINTS" ]]; then
echo "❌ No endpoints found in CLUSTER_ENDPOINTS secret"
exit 1
fi
# Initialize counters
total=0
failed=0
# Process each endpoint
while IFS= read -r url; do
# Skip empty lines and comments
[[ -z "$url" || "$url" =~ ^[[:space:]]*# ]] && continue
url=$(echo "$url" | xargs)
[[ -z "$url" ]] && continue
total=$((total + 1))
echo "Testing: $url"
# Simple curl check - fail if HTTP code >= 400 or connection fails
if curl -sfLI --max-time 10 "$url" >/dev/null 2>&1; then
echo "✅ OK"
else
echo "❌ FAILED"
failed=$((failed + 1))
fi
echo ""
done <<< "$ENDPOINTS"
# Summary
echo "📊 Summary: $((total - failed))/$total endpoints OK"
# Exit with error if any failed
if [[ $failed -gt 0 ]]; then
echo "💥 $failed endpoint(s) failed!"
exit 1
else
echo "🎉 All endpoints healthy!"
fi