guardia-itsm/routers/auto_remediation.py
2026-06-02 18:48:18 +09:00

177 lines
5.9 KiB
Python

"""
자동 교정 실행 — 승인 기반 (PAM 패턴 재사용)
드리프트 교정 명령을 관리자 승인 후 SSH 경유로 실행.
롤백 명령 포함.
엔드포인트:
GET /api/remediation/jobs — 교정 작업 목록
GET /api/remediation/jobs/{id} — 교정 작업 상세
POST /api/remediation/approve/{id} — 승인 후 실행
POST /api/remediation/reject/{id} — 거부
POST /api/remediation/rollback/{id} — 롤백 실행
"""
from __future__ import annotations
import logging
from datetime import datetime
import paramiko
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import select, desc
from sqlalchemy.ext.asyncio import AsyncSession
from core.auth import get_current_user, require_admin_role
from core.ssh_exec import _decrypt_password as decrypt_password
from database import get_db
from models import User, Server, AutoRemediationJob, AuditLog
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/remediation", tags=["자동 교정"])
async def _run_fix(server: Server, cmd: str) -> tuple[bool, str]:
"""SSH 경유 교정 명령 실행 — 에이전트리스."""
try:
pw = decrypt_password(server.os_pw_enc)
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(server.ip_addr, username=server.ssh_user, password=pw, timeout=10)
_, stdout, stderr = ssh.exec_command(cmd, timeout=30)
out = stdout.read().decode('utf-8', 'replace').strip()
err = stderr.read().decode('utf-8', 'replace').strip()
exit_code = stdout.channel.recv_exit_status()
ssh.close()
if exit_code == 0:
return True, out
return False, f"exit={exit_code}: {err[:200]}"
except Exception as e:
return False, str(e)[:200]
@router.get("/jobs")
async def list_jobs(
status: str = None,
limit: int = 50,
db: AsyncSession = Depends(get_db),
user: User = Depends(get_current_user),
):
q = select(AutoRemediationJob).order_by(desc(AutoRemediationJob.created_at)).limit(limit)
if status:
q = q.where(AutoRemediationJob.status == status)
rows = await db.execute(q)
jobs = rows.scalars().all()
return [
{
"id": j.id, "server_id": j.server_id, "item_key": j.item_key,
"fix_cmd": j.fix_cmd[:80] + "..." if len(j.fix_cmd or "") > 80 else j.fix_cmd,
"status": j.status, "result": j.result_message,
"created_at": j.created_at, "executed_at": j.executed_at,
}
for j in jobs
]
@router.get("/jobs/{job_id}")
async def get_job(
job_id: int,
db: AsyncSession = Depends(get_db),
user: User = Depends(get_current_user),
):
row = await db.execute(select(AutoRemediationJob).where(AutoRemediationJob.id == job_id))
job = row.scalar_one_or_none()
if not job:
raise HTTPException(404)
return {
"id": job.id, "server_id": job.server_id,
"item_key": job.item_key, "fix_cmd": job.fix_cmd,
"status": job.status, "result": job.result_message,
"approved_by": job.approved_by,
"created_at": job.created_at, "executed_at": job.executed_at,
}
@router.post("/approve/{job_id}")
async def approve_and_execute(
job_id: int,
db: AsyncSession = Depends(get_db),
user: User = Depends(require_admin_role),
):
"""승인 후 즉시 교정 실행."""
row = await db.execute(select(AutoRemediationJob).where(AutoRemediationJob.id == job_id))
job = row.scalar_one_or_none()
if not job:
raise HTTPException(404)
if job.status != "PENDING_APPROVAL":
raise HTTPException(400, f"현재 상태: {job.status}")
srv_row = await db.execute(select(Server).where(Server.id == job.server_id))
server = srv_row.scalar_one_or_none()
if not server:
raise HTTPException(404, "서버 없음")
job.status = "EXECUTING"
job.approved_by = user.id
await db.commit()
success, result = await _run_fix(server, job.fix_cmd)
job.status = "SUCCESS" if success else "FAILED"
job.result_message = result[:500]
job.executed_at = datetime.utcnow()
# 감사 로그
log = AuditLog(
user_id=user.id,
action="AUTO_REMEDIATION",
detail=f"서버 {server.hostname}: {job.item_key} 교정 {'성공' if success else '실패'}",
created_at=datetime.utcnow(),
)
db.add(log)
await db.commit()
return {"ok": success, "job_id": job_id, "status": job.status, "result": result[:200]}
@router.post("/reject/{job_id}")
async def reject_job(
job_id: int,
reason: str = "관리자 거부",
db: AsyncSession = Depends(get_db),
user: User = Depends(require_admin_role),
):
row = await db.execute(select(AutoRemediationJob).where(AutoRemediationJob.id == job_id))
job = row.scalar_one_or_none()
if not job:
raise HTTPException(404)
job.status = "REJECTED"
job.result_message = reason
await db.commit()
return {"ok": True}
@router.post("/rollback/{job_id}")
async def rollback_job(
job_id: int,
db: AsyncSession = Depends(get_db),
user: User = Depends(require_admin_role),
):
"""교정 롤백 실행."""
row = await db.execute(select(AutoRemediationJob).where(AutoRemediationJob.id == job_id))
job = row.scalar_one_or_none()
if not job or not job.rollback_cmd:
raise HTTPException(400, "롤백 명령 없음")
if job.status != "SUCCESS":
raise HTTPException(400, "실행 완료된 작업만 롤백 가능")
srv_row = await db.execute(select(Server).where(Server.id == job.server_id))
server = srv_row.scalar_one_or_none()
if not server:
raise HTTPException(404)
success, result = await _run_fix(server, job.rollback_cmd)
job.status = "ROLLED_BACK" if success else "ROLLBACK_FAILED"
job.result_message = f"ROLLBACK: {result[:400]}"
await db.commit()
return {"ok": success, "status": job.status, "result": result[:200]}