""" SR 접수 자동 리뷰 엔진 (SR Auto-Review) SR 생성 즉시 백그라운드로 기동: 1. 관련 서버 조회 (CMDB) 2. tmux 세션 생성 + 서버 상태 스냅샷 (paramiko → tmux) 3. 하네스 선택 (SR 유형 기반) 4. Ollama AI 리뷰 생성 5. TB_SR_AUTO_REVIEW 저장 + SSE 브로드캐스트 """ import asyncio import json import logging import re from datetime import datetime, timedelta from typing import List, Optional import httpx from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel, ConfigDict from sqlalchemy import and_, or_, select from sqlalchemy.ext.asyncio import AsyncSession from core.auth import get_current_user from core.events import broadcast from database import SessionLocal, get_db from models import SRAutoReview, SRRequest, Server, User router = APIRouter(prefix="/api/sr-review", tags=["SR Auto Review"]) log = logging.getLogger(__name__) OLLAMA_URL = "http://localhost:11434/api/generate" OLLAMA_MODEL = "llama3" # SR 유형 → 하네스 매핑 _HARNESS_MAP = { "DEPLOY": "deploy-validation", "RESTART": "incident-response", "LOG": "log-analysis", "INQUIRY": "faq-search", "OTHER": "general-ops", } # 안전한 읽기 전용 스냅샷 명령 _SNAPSHOT_CMDS = { "uptime": "uptime", "disk": "df -h", "memory": "free -h", "top_procs": "ps aux --sort=-%cpu 2>/dev/null | head -10", "services": ( "systemctl list-units --type=service --state=running --no-pager 2>/dev/null " "| head -15 || service --status-all 2>/dev/null | head -15 " "|| echo 'service list unavailable'" ), "recent_log": ( "tail -n 40 /var/log/messages 2>/dev/null " "|| tail -n 40 /var/log/syslog 2>/dev/null " "|| journalctl -n 40 --no-pager 2>/dev/null " "|| echo 'log unavailable'" ), } # ── SSH 유틸 ────────────────────────────────────────────────────────────────── def _decrypt_pw(enc: str) -> str: try: from core.crypto import decrypt_field return decrypt_field(enc) except Exception: return "" async def _capture_server_snapshot(server: Server) -> dict: """ SSH → tmux 세션 생성 → 스냅샷 수집. 세션은 종료하지 않고 유지 — 담당자가 이후 'tmux attach' 로 접속 가능. """ import time import paramiko snapshot: dict = {} session_name = f"sr-{int(time.time())}" try: client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) kw: dict = { "hostname": server.ip_addr, "port": server.port or 22, "username": server.ssh_user, "timeout": 15, } if server.ssh_method == "KEY" and server.ssh_key_path: kw["key_filename"] = server.ssh_key_path else: pw = _decrypt_pw(server.os_pw_enc or "") if not pw: return {"error": "자격증명 복호화 실패"} kw["password"] = pw client.connect(**kw) # tmux 설치 여부 _, stdout, _ = client.exec_command("which tmux 2>/dev/null && echo HAS_TMUX || echo NO_TMUX") has_tmux = "HAS_TMUX" in stdout.read().decode() if has_tmux: client.exec_command(f"tmux new-session -d -s '{session_name}' 2>/dev/null; true") await asyncio.sleep(0.3) snapshot["tmux_session"] = session_name for key, cmd in _SNAPSHOT_CMDS.items(): try: if has_tmux: # tmux 세션에 명령 전송 (히스토리에 남음) safe_cmd = cmd.replace("'", "'\\''") client.exec_command(f"tmux send-keys -t '{session_name}' '{safe_cmd}' Enter") await asyncio.sleep(0.8) _, out, _ = client.exec_command( f"tmux capture-pane -p -t '{session_name}' | tail -20" ) else: _, out, _ = client.exec_command(cmd, timeout=10) output = out.read().decode(errors="replace").strip() snapshot[key] = output[:600] except Exception: snapshot[key] = "수집 실패" client.close() except Exception as e: snapshot["error"] = f"SSH 실패: {type(e).__name__}: {e}" log.debug("SR 리뷰 스냅샷 오류: %s", e) return snapshot # ── 유사 SR 조회 ────────────────────────────────────────────────────────────── async def _find_similar_srs(sr: SRRequest, db: AsyncSession) -> list: cutoff = datetime.now() - timedelta(days=30) q = ( select(SRRequest) .where( and_( SRRequest.sr_id != sr.sr_id, SRRequest.status == "COMPLETED", SRRequest.created_at >= cutoff, or_( SRRequest.sr_type == sr.sr_type, SRRequest.inst_id == sr.inst_id, ), ) ) .order_by(SRRequest.created_at.desc()) .limit(5) ) rows = (await db.execute(q)).scalars().all() return [ {"sr_id": r.sr_id, "title": r.title, "resolution": (r.description or "")[:100]} for r in rows ] # ── Ollama AI 리뷰 생성 ─────────────────────────────────────────────────────── async def _generate_ai_review(sr: SRRequest, snapshot: dict, similar: list) -> dict: similar_text = "\n".join( f"- [{s['sr_id']}] {s['title']}" for s in similar[:3] ) or "없음" snap_text = "\n".join( f"[{k}]\n{v}" for k, v in snapshot.items() if k not in ("tmux_session", "error") ) or "서버 정보 없음" prompt = f"""공공기관 IT 인프라 전문 엔지니어로서 SR을 분석하고 JSON만 반환하라. SR: - ID: {sr.sr_id} - 유형: {sr.sr_type} - 제목: {sr.title} - 내용: {sr.description or '(없음)'} - 우선순위: {sr.priority} - 대상 서버: {sr.target_server or '미지정'} 서버 상태: {snap_text} 유사 SR: {similar_text} JSON 형식으로만 응답 (다른 텍스트 없이): {{ "summary": "문제 요약 (1-2문장)", "root_cause": "추정 원인", "recommended_actions": ["조치1", "조치2", "조치3"], "estimated_minutes": 30, "risk_level": "LOW", "auto_resolvable": false }}""" try: async with httpx.AsyncClient(timeout=90) as client: resp = await client.post( OLLAMA_URL, json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False}, ) text = resp.json().get("response", "{}") m = re.search(r"\{[\s\S]*\}", text) if m: return json.loads(m.group()) except Exception as e: log.debug("Ollama SR 리뷰 오류: %s", e) return { "summary": "자동 리뷰 생성 실패 — 수동 검토 필요", "root_cause": "알 수 없음", "recommended_actions": ["담당자 직접 확인"], "estimated_minutes": 60, "risk_level": "MEDIUM", "auto_resolvable": False, } # ── 핵심 리뷰 실행 (background task 진입점) ─────────────────────────────────── async def run_sr_review(sr_id: str) -> None: """ tasks.py create_task() 에서 fire-and-forget으로 호출된다. 독립 DB 세션을 사용하므로 메인 트랜잭션과 무관하게 실행된다. """ async with SessionLocal() as db: try: sr = (await db.execute( select(SRRequest).where(SRRequest.sr_id == sr_id) )).scalars().first() if not sr: return # 중복 방지 if (await db.execute( select(SRAutoReview).where(SRAutoReview.sr_id == sr_id) )).scalars().first(): return harness = _HARNESS_MAP.get(sr.sr_type or "OTHER", "general-ops") # 리뷰 레코드 초기 생성 review = SRAutoReview( sr_id=sr_id, harness_name=harness, status="reviewing", started_at=datetime.now(), ) db.add(review) await db.commit() except Exception as e: log.exception("SR 리뷰 초기화 실패 %s: %s", sr_id, e) return # ── Step 1: 관련 서버 조회 + tmux 스냅샷 snapshot: dict = {} async with SessionLocal() as db: sr = (await db.execute( select(SRRequest).where(SRRequest.sr_id == sr_id) )).scalars().first() if not sr: return if sr.target_server: srv = (await db.execute( select(Server).where( Server.server_name == sr.target_server, Server.is_active == True, ).limit(1) )).scalars().first() if srv: snapshot = await _capture_server_snapshot(srv) similar = await _find_similar_srs(sr, db) # ── Step 2: Ollama AI 리뷰 생성 async with SessionLocal() as db: sr = (await db.execute( select(SRRequest).where(SRRequest.sr_id == sr_id) )).scalars().first() if not sr: return ai = await _generate_ai_review(sr, snapshot, similar) # ── Step 3: 결과 저장 try: async with SessionLocal() as db: rev = (await db.execute( select(SRAutoReview).where(SRAutoReview.sr_id == sr_id) )).scalars().first() if rev: rev.status = "completed" rev.summary = ai.get("summary", "") rev.root_cause = ai.get("root_cause", "") rev.recommended_actions = json.dumps( ai.get("recommended_actions", []), ensure_ascii=False ) rev.estimated_minutes = ai.get("estimated_minutes", 60) rev.risk_level = ai.get("risk_level", "MEDIUM") rev.similar_count = len(similar) rev.auto_resolvable = ai.get("auto_resolvable", False) rev.server_snapshot = json.dumps(snapshot, ensure_ascii=False) rev.tmux_session = snapshot.get("tmux_session") rev.completed_at = datetime.now() await db.commit() except Exception as e: log.exception("SR 리뷰 저장 실패 %s: %s", sr_id, e) async with SessionLocal() as db: rev = (await db.execute( select(SRAutoReview).where(SRAutoReview.sr_id == sr_id) )).scalars().first() if rev: rev.status = "failed" rev.summary = f"리뷰 실패: {type(e).__name__}" rev.completed_at = datetime.now() await db.commit() return # ── Step 4: SSE 브로드캐스트 await broadcast("sr_review_completed", { "sr_id": sr_id, "risk_level": ai.get("risk_level", "MEDIUM"), "summary": ai.get("summary", ""), "harness": harness, "tmux_session": snapshot.get("tmux_session"), }) # ── REST 엔드포인트 ──────────────────────────────────────────────────────────── class SRReviewOut(BaseModel): model_config = ConfigDict(from_attributes=True) id: int sr_id: str harness_name: str status: str summary: Optional[str] = None root_cause: Optional[str] = None recommended_actions: Optional[str] = None estimated_minutes: Optional[int] = None risk_level: Optional[str] = None similar_count: Optional[int] = None auto_resolvable: Optional[bool] = None tmux_session: Optional[str] = None started_at: Optional[datetime] = None completed_at: Optional[datetime] = None @router.get("", response_model=List[SRReviewOut]) async def list_reviews( status: Optional[str] = None, risk_level: Optional[str] = None, skip: int = 0, limit: int = 50, db: AsyncSession = Depends(get_db), _u: User = Depends(get_current_user), ): q = select(SRAutoReview).order_by(SRAutoReview.started_at.desc()) if status: q = q.where(SRAutoReview.status == status) if risk_level: q = q.where(SRAutoReview.risk_level == risk_level) q = q.offset(skip).limit(limit) return (await db.execute(q)).scalars().all() @router.get("/{sr_id}", response_model=SRReviewOut) async def get_review( sr_id: str, db: AsyncSession = Depends(get_db), _u: User = Depends(get_current_user), ): r = (await db.execute( select(SRAutoReview).where(SRAutoReview.sr_id == sr_id) )).scalars().first() if not r: raise HTTPException(404, detail="리뷰 결과 없음 (리뷰 진행 중이거나 미접수 SR)") return r @router.post("/{sr_id}/run", status_code=202) async def trigger_review( sr_id: str, db: AsyncSession = Depends(get_db), _u: User = Depends(get_current_user), ): """수동 재실행 — 기존 결과 삭제 후 재시작.""" existing = (await db.execute( select(SRAutoReview).where(SRAutoReview.sr_id == sr_id) )).scalars().first() if existing: await db.delete(existing) await db.commit() asyncio.create_task(run_sr_review(sr_id)) return {"message": f"SR {sr_id} 리뷰 재실행", "sr_id": sr_id} @router.get("/{sr_id}/tmux") async def get_tmux_info( sr_id: str, db: AsyncSession = Depends(get_db), _u: User = Depends(get_current_user), ): """리뷰 중 생성된 tmux 세션 정보 + 서버 스냅샷 조회.""" r = (await db.execute( select(SRAutoReview).where(SRAutoReview.sr_id == sr_id) )).scalars().first() if not r: raise HTTPException(404, detail="리뷰 없음") return { "sr_id": sr_id, "tmux_session": r.tmux_session, "snapshot": json.loads(r.server_snapshot) if r.server_snapshot else {}, "attach_hint": f"tmux attach -t {r.tmux_session}" if r.tmux_session else None, "risk_level": r.risk_level, "status": r.status, }