diff --git a/.github/workflows/health_monitor.yaml b/.github/workflows/health_monitor.yaml index acfe6a0..2b2e16f 100644 --- a/.github/workflows/health_monitor.yaml +++ b/.github/workflows/health_monitor.yaml @@ -2,132 +2,57 @@ name: Cluster Health Monitor on: schedule: - # Run every hour - cron: '0 * * * *' - workflow_dispatch: # Allow manual triggering - -env: - # Timeout for each health check request (in seconds) - REQUEST_TIMEOUT: 30 - # Expected HTTP status codes (comma-separated) - EXPECTED_STATUS_CODES: "200,301,302,401,403" + workflow_dispatch: jobs: health-check: name: Monitor Cluster Endpoints runs-on: ubuntu-latest - steps: - - name: Set up endpoint monitoring + - name: Check endpoints run: | - # Create endpoints file from secrets - cat << 'EOF' > endpoints.txt - ${{ secrets.CLUSTER_ENDPOINTS }} - EOF + # Simple and clean endpoint checker + ENDPOINTS="${{ secrets.CLUSTER_ENDPOINTS }}" - # Validate endpoints file - if [ ! -s endpoints.txt ]; then + # Validate input + if [[ -z "$ENDPOINTS" ]]; then echo "❌ No endpoints found in CLUSTER_ENDPOINTS secret" exit 1 fi - endpoint_count=$(grep -v '^[[:space:]]*$' endpoints.txt | grep -v '^[[:space:]]*#' | wc -l) - echo "📋 Found $endpoint_count endpoints to monitor" - echo "" - - - name: Monitor endpoints - run: | - #!/bin/bash - set -e + # Initialize counters + total=0 + failed=0 - # Colors for output - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[1;33m' - BLUE='\033[0;34m' - NC='\033[0m' # No Color - - # Counters - total_endpoints=0 - successful_checks=0 - failed_checks=0 - failed_endpoints=() - endpoint_index=0 - - echo -e "${BLUE}🔍 Starting cluster health monitoring...${NC}" - echo "" - - # Read endpoints and perform health checks - while IFS= read -r endpoint || [ -n "$endpoint" ]; do + # Process each endpoint + while IFS= read -r url; do # Skip empty lines and comments - if [[ -z "$endpoint" || "$endpoint" =~ ^[[:space:]]*# ]]; then - continue - fi + [[ -z "$url" || "$url" =~ ^[[:space:]]*# ]] && continue + url=$(echo "$url" | xargs) + [[ -z "$url" ]] && continue - # Trim whitespace - endpoint=$(echo "$endpoint" | xargs) + total=$((total + 1)) + echo "Testing: $url" - if [[ -z "$endpoint" ]]; then - continue - fi - - total_endpoints=$((total_endpoints + 1)) - endpoint_index=$((endpoint_index + 1)) - - echo -e "${BLUE}Testing endpoint #${endpoint_index}:${NC}" - - # Perform health check with curl - response=$(curl -s -w "\n%{http_code}\n%{time_total}" \ - --max-time $REQUEST_TIMEOUT \ - --connect-timeout 10 \ - --retry 2 \ - --retry-delay 1 \ - --user-agent "GitHub-Actions-Health-Monitor/1.0" \ - --location \ - --insecure \ - "$endpoint" 2>/dev/null || echo -e "\nERROR\n0") - - # Parse response - http_code=$(echo "$response" | tail -n 2 | head -n 1) - response_time=$(echo "$response" | tail -n 1) - - # Check if request was successful - if [[ "$http_code" == "ERROR" || -z "$http_code" ]]; then - echo -e " ${RED}❌ Connection failed${NC}" - failed_checks=$((failed_checks + 1)) - failed_endpoints+=("Endpoint #$endpoint_index (Connection failed)") + # Simple curl check - fail if HTTP code >= 400 or connection fails + if curl -sfLI --max-time 10 "$url" >/dev/null 2>&1; then + echo "✅ OK" else - # Check if status code is expected - if [[ ",$EXPECTED_STATUS_CODES," == *",$http_code,"* ]]; then - echo -e " ${GREEN}✅ HTTP $http_code${NC} (${response_time}s)" - successful_checks=$((successful_checks + 1)) - else - echo -e " ${RED}❌ HTTP $http_code${NC} (${response_time}s) - Unexpected status code" - failed_checks=$((failed_checks + 1)) - failed_endpoints+=("Endpoint #$endpoint_index (HTTP $http_code)") - fi + echo "❌ FAILED" + failed=$((failed + 1)) fi - echo "" - done < endpoints.txt + done <<< "$ENDPOINTS" # Summary - echo -e "${BLUE}📊 Health Check Summary:${NC}" - echo -e " Total endpoints: $total_endpoints" - echo -e " ${GREEN}Successful: $successful_checks${NC}" - echo -e " ${RED}Failed: $failed_checks${NC}" - echo "" + echo "📊 Summary: $((total - failed))/$total endpoints OK" - # Report failed endpoints - if [ $failed_checks -gt 0 ]; then - echo -e "${RED}💥 Failed endpoints:${NC}" - for failed_endpoint in "${failed_endpoints[@]}"; do - echo -e " ${RED}• $failed_endpoint${NC}" - done - echo "" - + # Exit with error if any failed + if [[ $failed -gt 0 ]]; then + echo "💥 $failed endpoint(s) failed!" exit 1 else - echo -e "${GREEN}🎉 All endpoints are healthy!${NC}" + echo "🎉 All endpoints healthy!" fi