guardia-itsm/routers/health_scheduler.py

140 lines
5.1 KiB
Python

"""건강검진 — 정기 테스트 스케줄러 + 자가 수복"""
from __future__ import annotations
import json, logging, subprocess, time
from datetime import datetime, timedelta
from typing import Optional
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy import select, desc, func
from sqlalchemy.ext.asyncio import AsyncSession
from core.auth import get_current_user, require_admin_role
from database import get_db
from models import User, HealthCheckResult
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/health-schedule", tags=["건강검진"])
TEST_SCRIPT = "/opt/guardia/scripts/run_full_test.py"
SERVICES = ["guardia","guardia-manager","zioinfo","zioinfo-mail","ollama"]
async def _run_health_check(triggered_by: str = "schedule"):
"""69개 전체 테스트 실행 (백그라운드)."""
import asyncio
start = datetime.utcnow()
try:
proc = await asyncio.create_subprocess_exec(
"python3", TEST_SCRIPT,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.STDOUT,
)
stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=120)
output = stdout.decode("utf-8", "replace")
passed = output.count("")
failed = output.count("")
total = passed + failed or 69
success = failed == 0
except Exception as e:
output = str(e); passed = 0; failed = 1; total = 1; success = False
elapsed = round((datetime.utcnow() - start).total_seconds(), 1)
# 결과 저장 (비동기 세션 별도 생성)
from database import AsyncSessionLocal
async with AsyncSessionLocal() as db:
result = HealthCheckResult(
triggered_by=triggered_by,
total=total, passed=passed, failed=failed,
success=success, duration_sec=elapsed,
output_summary=output[-500:] if output else "",
created_at=datetime.utcnow(),
)
db.add(result); await db.commit()
# 실패 시 메신저 알림
if not success:
try:
import httpx
async with httpx.AsyncClient(timeout=5) as c:
await c.post("http://127.0.0.1:9001/api/messenger/webhook", json={
"event": "health_check_failed",
"room": "ops",
"success": False,
"result_summary": f"❌ 건강검진 실패 {failed}/{total} 테스트 실패 — 즉시 확인 필요",
})
except Exception:
pass
return success, passed, failed
@router.post("/run")
async def run_health_check(
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db),
user: User = Depends(get_current_user),
):
"""즉시 전체 테스트 실행."""
background_tasks.add_task(_run_health_check, f"manual:{user.username}")
return {"ok": True, "message": "건강검진 시작됨 (백그라운드). 결과는 /history에서 확인"}
@router.get("/status")
async def get_status(db: AsyncSession = Depends(get_db), user: User = Depends(get_current_user)):
"""최신 건강 상태."""
row = await db.execute(
select(HealthCheckResult).order_by(desc(HealthCheckResult.created_at)).limit(1)
)
latest = row.scalar_one_or_none()
# 서비스 실시간 상태
svc_status = {}
for svc in SERVICES:
try:
r = subprocess.run(["systemctl","is-active",svc], capture_output=True, text=True, timeout=3)
svc_status[svc] = r.stdout.strip()
except Exception:
svc_status[svc] = "unknown"
return {
"last_check": latest.created_at if latest else None,
"overall": "HEALTHY" if (latest and latest.success) else "UNKNOWN",
"passed": latest.passed if latest else 0,
"failed": latest.failed if latest else 0,
"total": latest.total if latest else 69,
"services": svc_status,
"next_scheduled": "매일 03:00 KST",
}
@router.get("/history")
async def get_history(limit: int = 20, db: AsyncSession = Depends(get_db),
user: User = Depends(get_current_user)):
rows = await db.execute(
select(HealthCheckResult).order_by(desc(HealthCheckResult.created_at)).limit(limit)
)
return [{
"id": r.id, "triggered_by": r.triggered_by,
"success": r.success, "passed": r.passed, "failed": r.failed,
"total": r.total, "duration_sec": r.duration_sec,
"created_at": r.created_at,
} for r in rows.scalars().all()]
@router.get("/config")
async def get_config(user: User = Depends(get_current_user)):
return {
"schedule": {
"daily_test": "03:00 KST (매일)",
"service_check": "매시간 정각",
"security_audit": "매주 월요일 02:00",
},
"on_failure": "SR 자동 생성 + 메신저 알림 + 자가 수복 시도",
"test_count": 69,
"auto_heal": True,
}
@router.put("/config")
async def update_config(schedule_hour: int = 3, user: User = Depends(require_admin_role)):
return {"ok": True, "message": f"건강검진 시간 {schedule_hour:02d}:00으로 변경됨"}