guardia-itsm/core/auto_rca.py

"""자동 RCA 분석 — Ollama LLM을 활용한 근본 원인 초안 생성."""
from __future__ import annotations

import json
import logging
from datetime import datetime
from typing import Optional

logger = logging.getLogger(__name__)

RCA_PROMPT = """당신은 IT 인프라 전문가입니다. 다음 장애 정보를 분석하여 RCA(근본 원인 분석)를 JSON으로 제공하세요.

장애 정보:
{incident_info}

변경 이력:
{change_history}

다음 필드를 포함하는 JSON만 출력하세요 (한국어):
{{
  "root_cause": "추정 근본 원인",
  "contributing_factors": ["기여 요인 1", "기여 요인 2"],
  "timeline": "발생부터 발견까지 타임라인",
  "prevention": ["재발 방지 조치 1", "재발 방지 조치 2"],
  "confidence": 0.8
}}"""


async def analyze_rca(incident_id: int, db) -> dict:
    """장애 ID로 RCA 초안 자동 생성."""
    from models import Incident
    from sqlalchemy import select, text

    incident = await db.get(Incident, incident_id)
    if not incident:
        raise ValueError(f"장애 ID {incident_id}를 찾을 수 없습니다.")

    incident_info = (
        f"장애번호: {incident.incident_id}\n"
        f"제목: {incident.title}\n"
        f"등급: {incident.grade}\n"
        f"설명: {incident.description or '없음'}\n"
        f"영향 서비스: {incident.affected_service or '없음'}\n"
        f"발생시각: {incident.occurred_at}\n"
        f"복구시각: {incident.resolved_at or '미복구'}"
    )

    # 최근 변경 이력 조회 (CI 변경 로그)
    try:
        from models import CIChangeLog
        logs = (await db.execute(
            select(CIChangeLog)
            .order_by(CIChangeLog.changed_at.desc())
            .limit(5)
        )).scalars().all()
        change_history = "\n".join(
            f"- [{l.changed_at}] {l.change_type}: {l.summary or ''}"
            for l in logs
        ) or "최근 변경 이력 없음"
    except Exception:
        change_history = "변경 이력 조회 불가"

    prompt = RCA_PROMPT.format(
        incident_info=incident_info,
        change_history=change_history,
    )

    try:
        from core.llm_client import get_llm_client
        client = get_llm_client()
        resp = await client.chat(prompt)
        raw = resp.content.strip()
        # JSON 블록 추출
        if "```" in raw:
            raw = raw.split("```")[1]
            if raw.startswith("json"):
                raw = raw[4:]
        rca = json.loads(raw)
    except Exception as e:
        logger.warning("LLM RCA 분석 실패 — 기본 템플릿 사용: %s", e)
        rca = {
            "root_cause": f"자동 분석 실패 — 수동 분석 필요. ({str(e)[:100]})",
            "contributing_factors": ["분석 데이터 부족"],
            "timeline": incident_info,
            "prevention": ["장애 재발 방지 조치를 수동으로 기록하세요."],
            "confidence": 0.0,
        }

    return {
        "incident_id": incident.incident_id,
        "rca": rca,
        "auto_generated": True,
        "generated_at": datetime.utcnow().isoformat(),
    }


async def analyze_problem_rca(problem_id: int, db) -> dict:
    """Problem 레코드 ID로 RCA 초안 자동 생성."""
    from models import ProblemRecord
    from sqlalchemy import select

    prb = await db.get(ProblemRecord, problem_id)
    if not prb:
        raise ValueError(f"Problem ID {problem_id}를 찾을 수 없습니다.")

    incident_info = (
        f"문제번호: {prb.problem_id}\n"
        f"제목: {prb.title}\n"
        f"설명: {prb.description or '없음'}\n"
        f"카테고리: {prb.category}\n"
        f"영향 사용자: {prb.affected_users}명\n"
        f"관련 인시던트 수: {prb.incident_count}건\n"
        f"누적 다운타임: {prb.total_downtime_min}분"
    )

    prompt = RCA_PROMPT.format(
        incident_info=incident_info,
        change_history="문제 레코드 기반 분석",
    )

    try:
        from core.llm_client import get_llm_client
        client = get_llm_client()
        resp = await client.chat(prompt)
        raw = resp.content.strip()
        if "```" in raw:
            raw = raw.split("```")[1]
            if raw.startswith("json"):
                raw = raw[4:]
        rca = json.loads(raw)
    except Exception as e:
        logger.warning("LLM Problem RCA 분석 실패: %s", e)
        rca = {
            "root_cause": "자동 분석 실패 — 수동 분석 필요.",
            "contributing_factors": [],
            "timeline": incident_info,
            "prevention": [],
            "confidence": 0.0,
        }

    return {
        "problem_id": prb.problem_id,
        "rca": rca,
        "auto_generated": True,
        "generated_at": datetime.utcnow().isoformat(),
    }