Update health_monitor.yaml

This commit is contained in:
2025-10-23 15:29:48 +02:00
committed by GitHub
parent fa2bc3e843
commit f94fdd5b27

View File

@@ -2,132 +2,57 @@ name: Cluster Health Monitor
on: on:
schedule: schedule:
# Run every hour
- cron: '0 * * * *' - cron: '0 * * * *'
workflow_dispatch: # Allow manual triggering workflow_dispatch:
env:
# Timeout for each health check request (in seconds)
REQUEST_TIMEOUT: 30
# Expected HTTP status codes (comma-separated)
EXPECTED_STATUS_CODES: "200,301,302,401,403"
jobs: jobs:
health-check: health-check:
name: Monitor Cluster Endpoints name: Monitor Cluster Endpoints
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Set up endpoint monitoring - name: Check endpoints
run: | run: |
# Create endpoints file from secrets # Simple and clean endpoint checker
cat << 'EOF' > endpoints.txt ENDPOINTS="${{ secrets.CLUSTER_ENDPOINTS }}"
${{ secrets.CLUSTER_ENDPOINTS }}
EOF
# Validate endpoints file # Validate input
if [ ! -s endpoints.txt ]; then if [[ -z "$ENDPOINTS" ]]; then
echo "❌ No endpoints found in CLUSTER_ENDPOINTS secret" echo "❌ No endpoints found in CLUSTER_ENDPOINTS secret"
exit 1 exit 1
fi fi
endpoint_count=$(grep -v '^[[:space:]]*$' endpoints.txt | grep -v '^[[:space:]]*#' | wc -l) # Initialize counters
echo "📋 Found $endpoint_count endpoints to monitor" total=0
echo "" failed=0
- name: Monitor endpoints # Process each endpoint
run: | while IFS= read -r url; do
#!/bin/bash
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Counters
total_endpoints=0
successful_checks=0
failed_checks=0
failed_endpoints=()
endpoint_index=0
echo -e "${BLUE}🔍 Starting cluster health monitoring...${NC}"
echo ""
# Read endpoints and perform health checks
while IFS= read -r endpoint || [ -n "$endpoint" ]; do
# Skip empty lines and comments # Skip empty lines and comments
if [[ -z "$endpoint" || "$endpoint" =~ ^[[:space:]]*# ]]; then [[ -z "$url" || "$url" =~ ^[[:space:]]*# ]] && continue
continue url=$(echo "$url" | xargs)
fi [[ -z "$url" ]] && continue
# Trim whitespace total=$((total + 1))
endpoint=$(echo "$endpoint" | xargs) echo "Testing: $url"
if [[ -z "$endpoint" ]]; then # Simple curl check - fail if HTTP code >= 400 or connection fails
continue if curl -sfLI --max-time 10 "$url" >/dev/null 2>&1; then
fi echo "✅ OK"
total_endpoints=$((total_endpoints + 1))
endpoint_index=$((endpoint_index + 1))
echo -e "${BLUE}Testing endpoint #${endpoint_index}:${NC}"
# Perform health check with curl
response=$(curl -s -w "\n%{http_code}\n%{time_total}" \
--max-time $REQUEST_TIMEOUT \
--connect-timeout 10 \
--retry 2 \
--retry-delay 1 \
--user-agent "GitHub-Actions-Health-Monitor/1.0" \
--location \
--insecure \
"$endpoint" 2>/dev/null || echo -e "\nERROR\n0")
# Parse response
http_code=$(echo "$response" | tail -n 2 | head -n 1)
response_time=$(echo "$response" | tail -n 1)
# Check if request was successful
if [[ "$http_code" == "ERROR" || -z "$http_code" ]]; then
echo -e " ${RED}❌ Connection failed${NC}"
failed_checks=$((failed_checks + 1))
failed_endpoints+=("Endpoint #$endpoint_index (Connection failed)")
else else
# Check if status code is expected echo "❌ FAILED"
if [[ ",$EXPECTED_STATUS_CODES," == *",$http_code,"* ]]; then failed=$((failed + 1))
echo -e " ${GREEN}✅ HTTP $http_code${NC} (${response_time}s)"
successful_checks=$((successful_checks + 1))
else
echo -e " ${RED}❌ HTTP $http_code${NC} (${response_time}s) - Unexpected status code"
failed_checks=$((failed_checks + 1))
failed_endpoints+=("Endpoint #$endpoint_index (HTTP $http_code)")
fi
fi fi
echo "" echo ""
done < endpoints.txt done <<< "$ENDPOINTS"
# Summary # Summary
echo -e "${BLUE}📊 Health Check Summary:${NC}" echo "📊 Summary: $((total - failed))/$total endpoints OK"
echo -e " Total endpoints: $total_endpoints"
echo -e " ${GREEN}Successful: $successful_checks${NC}"
echo -e " ${RED}Failed: $failed_checks${NC}"
echo ""
# Report failed endpoints
if [ $failed_checks -gt 0 ]; then
echo -e "${RED}💥 Failed endpoints:${NC}"
for failed_endpoint in "${failed_endpoints[@]}"; do
echo -e " ${RED}• $failed_endpoint${NC}"
done
echo ""
# Exit with error if any failed
if [[ $failed -gt 0 ]]; then
echo "💥 $failed endpoint(s) failed!"
exit 1 exit 1
else else
echo -e "${GREEN}🎉 All endpoints are healthy!${NC}" echo "🎉 All endpoints healthy!"
fi fi