zioinfo-mail/workspace/guardia-itsm/core/chatbot.py

"""
B-2: 자연어 SR 접수 챗봇 엔진

기능:
  - Ollama LLM 기반 자연어 의도 분류 + 엔티티 추출
  - Ollama 미연결 시 규칙 기반 폴백 (키워드 매칭)
  - 다단계 대화: 정보 수집 → SR 자동 생성
  - 대화 컨텍스트 누적 관리
"""
from __future__ import annotations

import json
import logging
import re
import uuid
from datetime import datetime
from typing import Dict, List, Optional, Tuple

import httpx

logger = logging.getLogger(__name__)

# ── 설정 ─────────────────────────────────────────────────────────────────────

OLLAMA_URL    = "http://localhost:11434/api/generate"
DEFAULT_MODEL = "llama3"

# ── 규칙 기반 폴백 ──────────────────────────────────────────────────────────

# 인텐트 키워드 맵
_INTENT_KEYWORDS: Dict[str, List[str]] = {
    "SR_CREATE": [
        "오류", "에러", "error", "장애", "느려", "안 돼", "안돼", "문제",
        "접속 안", "접속이 안", "서버", "다운", "중단", "실패", "요청",
        "불가", "이상", "이슈", "고장", "먹통", "응답 없", "timeout",
        "배포 요청", "업데이트", "설치 요청",
    ],
    "INCIDENT_REPORT": [
        "긴급", "즉시", "critical", "전면", "전체 장애", "서비스 중단",
        "대규모", "모든 사용자", "운영 중단",
    ],
    "DEPLOY_REQUEST": [
        "배포", "릴리즈", "deploy", "release", "빌드", "build",
        "소스 반영", "패치", "업그레이드",
    ],
    "SR_QUERY": [
        "조회", "확인", "상태", "어떻게", "얼마나", "진행", "처리",
        "언제", "완료", "sr-", "SR-",
    ],
    "GENERAL_INQUIRY": [
        "문의", "질문", "어떻게 하면", "도움", "help", "방법", "알려",
    ],
}

# 우선순위 키워드
_PRIORITY_KEYWORDS: Dict[str, List[str]] = {
    "CRITICAL": ["긴급", "즉시", "critical", "전면 장애", "모든 사용자", "운영 중단", "지금 당장"],
    "HIGH":     ["빠르게", "빨리", "urgent", "high", "중요", "높음", "오늘 중"],
    "MEDIUM":   ["medium", "보통", "일반", "중간"],
    "LOW":      ["천천히", "여유", "low", "낮음", "나중에"],
}

# SR 유형 키워드
_SR_TYPE_KEYWORDS: Dict[str, List[str]] = {
    "DEPLOY":  ["배포", "deploy", "릴리즈", "소스 반영", "패치"],
    "RESTART": ["재기동", "restart", "재시작", "기동"],
    "LOG":     ["로그", "log", "로그 확인", "오류 로그"],
    "INCIDENT":["장애", "중단", "다운", "먹통"],
}


def classify_intent_rule(text: str) -> Tuple[str, float]:
    """규칙 기반 인텐트 분류. Returns (intent, confidence)."""
    text_lower = text.lower()
    scores: Dict[str, int] = {}

    for intent, keywords in _INTENT_KEYWORDS.items():
        score = sum(1 for kw in keywords if kw.lower() in text_lower)
        if score > 0:
            scores[intent] = score

    if not scores:
        return "GENERAL_INQUIRY", 0.3

    # INCIDENT_REPORT > SR_CREATE (인시던트는 더 구체적)
    best = max(scores, key=lambda k: scores[k])
    confidence = min(0.9, 0.4 + scores[best] * 0.15)
    return best, confidence


def extract_entities_rule(text: str) -> Dict:
    """규칙 기반 엔티티 추출."""
    entities: Dict = {}
    text_lower = text.lower()

    # 우선순위
    for prio, keywords in _PRIORITY_KEYWORDS.items():
        if any(kw.lower() in text_lower for kw in keywords):
            entities["priority"] = prio
            break
    if "priority" not in entities:
        entities["priority"] = "MEDIUM"

    # SR 유형
    for sr_type, keywords in _SR_TYPE_KEYWORDS.items():
        if any(kw.lower() in text_lower for kw in keywords):
            entities["sr_type"] = sr_type
            break
    if "sr_type" not in entities:
        entities["sr_type"] = "OTHER"

    # 서버명 패턴: app-서버명, web01, was-prod 등
    server_pattern = re.search(
        r'(?:서버|서비스|시스템|앱)[\s:]*([A-Za-z0-9\-_가-힣]+)', text
    )
    if server_pattern:
        entities["server"] = server_pattern.group(1)

    # SR-xxxx 패턴
    sr_ref = re.search(r'SR-\d{4,}', text, re.IGNORECASE)
    if sr_ref:
        entities["sr_ref"] = sr_ref.group().upper()

    # 설명 (원문 그대로)
    entities["description"] = text.strip()

    return entities


# ── Ollama LLM 기반 NLU ───────────────────────────────────────────────────────

_NLU_PROMPT_TEMPLATE = """\
너는 IT 서비스 관리(ITSM) 챗봇이다. 사용자 메시지를 분석하여 JSON만 반환하라.

사용자 메시지: "{message}"

이전 대화 컨텍스트:
{context}

다음 JSON을 반환하라 (다른 텍스트 없이 순수 JSON만):
{{
  "intent": "SR_CREATE | INCIDENT_REPORT | DEPLOY_REQUEST | SR_QUERY | GENERAL_INQUIRY | CLARIFICATION",
  "confidence": 0.0~1.0,
  "entities": {{
    "priority": "CRITICAL | HIGH | MEDIUM | LOW",
    "sr_type": "DEPLOY | RESTART | LOG | INCIDENT | OTHER",
    "description": "문제 설명 (원문 기준 요약)",
    "server": "서버명 또는 null",
    "application": "애플리케이션명 또는 null",
    "symptom": "증상 요약 또는 null"
  }},
  "needs_clarification": true/false,
  "clarification_prompt": "추가 질문 또는 null",
  "reply": "사용자에게 보낼 친절한 한국어 응답 (1-3 문장)"
}}"""


async def analyze_with_llm(
    message: str,
    context: List[Dict],
    model: str = DEFAULT_MODEL,
    timeout: int = 30,
) -> Optional[Dict]:
    """Ollama LLM으로 메시지 분석. 실패 시 None 반환."""
    context_str = "\n".join(
        f"{m['role']}: {m['content'][:100]}" for m in context[-4:]
    ) if context else "없음"

    prompt = _NLU_PROMPT_TEMPLATE.format(
        message=message,
        context=context_str,
    )

    try:
        async with httpx.AsyncClient(timeout=timeout) as client:
            resp = await client.post(
                OLLAMA_URL,
                json={"model": model, "prompt": prompt, "stream": False},
            )
        if resp.status_code != 200:
            return None

        raw = resp.json().get("response", "")
        # JSON 추출
        start = raw.find("{")
        end = raw.rfind("}") + 1
        if start >= 0 and end > start:
            parsed = json.loads(raw[start:end])
            return parsed
    except (httpx.ConnectError, httpx.TimeoutException):
        logger.debug("Ollama 연결 실패 — 규칙 기반 폴백 사용")
    except json.JSONDecodeError as e:
        logger.debug("LLM JSON 파싱 실패: %s", e)
    return None


# ── 응답 템플릿 ──────────────────────────────────────────────────────────────

_REPLY_TEMPLATES = {
    "SR_CREATE": {
        "need_info": (
            "무슨 문제가 발생했는지 파악했습니다. "
            "SR 접수를 위해 몇 가지 정보가 더 필요합니다.\n\n"
            "{question}"
        ),
        "confirm": (
            "다음과 같이 SR을 접수하겠습니다:\n"
            "- 제목: {title}\n"
            "- 우선순위: {priority}\n"
            "- 유형: {sr_type}\n\n"
            "접수를 진행할까요? (네/아니오)"
        ),
        "created": (
            "✅ SR이 접수되었습니다!\n\n"
            "- SR ID: **{sr_id}**\n"
            "- 제목: {title}\n"
            "- 우선순위: {priority}\n\n"
            "담당자가 곧 연락드릴 예정입니다."
        ),
    },
    "INCIDENT_REPORT": {
        "created": (
            "🚨 긴급 인시던트로 접수했습니다!\n\n"
            "- SR ID: **{sr_id}**\n"
            "- 우선순위: CRITICAL\n\n"
            "온콜 엔지니어에게 즉시 알림을 발송했습니다."
        ),
    },
    "GENERAL_INQUIRY": {
        "default": (
            "안녕하세요! GUARDiA ITSM 챗봇입니다. 😊\n\n"
            "다음과 같은 도움을 드릴 수 있습니다:\n"
            "• IT 장애/오류 신고 → SR 자동 접수\n"
            "• 배포 요청\n"
            "• SR 상태 조회 (예: SR-0042 상태가 어떻게 됩니까?)\n\n"
            "어떤 문제가 발생했나요?"
        ),
    },
    "SR_QUERY": {
        "default": "SR 조회는 'SR-{숫자}' 형식으로 말씀해 주세요. 예: SR-0042 상태 알려줘",
    },
}

# 수집 필요 정보 순서
_REQUIRED_FIELDS = ["description", "priority", "sr_type"]

_CLARIFICATION_QUESTIONS = {
    "description": "어떤 문제가 발생했나요? 증상을 구체적으로 설명해 주세요.",
    "priority": "긴급도가 어느 정도인가요? (긴급/높음/보통/낮음)",
    "sr_type": "어떤 유형의 요청인가요? (장애신고/배포요청/재기동/로그분석/기타)",
}


def build_sr_title(entities: Dict) -> str:
    """수집된 엔티티로 SR 제목 생성."""
    desc = entities.get("description", "")
    sr_type = entities.get("sr_type", "OTHER")
    server = entities.get("server", "")

    type_prefix = {
        "DEPLOY": "[배포]",
        "RESTART": "[재기동]",
        "LOG": "[로그분석]",
        "INCIDENT": "[장애]",
        "OTHER": "[SR]",
    }.get(sr_type, "[SR]")

    title_parts = [type_prefix]
    if server:
        title_parts.append(server)

    # 설명 앞 30자
    short_desc = desc[:40].strip()
    if short_desc:
        title_parts.append(short_desc)

    return " ".join(title_parts)


# ── 대화 처리 메인 함수 ────────────────────────────────────────────────────────

async def process_message(
    message: str,
    session_context: Dict,
    use_llm: bool = True,
    model: str = DEFAULT_MODEL,
) -> Dict:
    """
    사용자 메시지 처리.

    session_context: {
        "history": [...],        # 이전 메시지 목록
        "collected": {...},      # 수집된 엔티티
        "state": "GATHERING|CONFIRMING|DONE",
        "intent": str,
    }

    Returns: {
        "intent": str,
        "entities": dict,
        "reply": str,
        "needs_clarification": bool,
        "clarification_prompt": str | None,
        "action": "CREATE_SR | NONE",
        "sr_data": dict | None,
        "confidence": float,
    }
    """
    history    = session_context.get("history", [])
    collected  = session_context.get("collected", {})
    state      = session_context.get("state", "GATHERING")
    prev_intent = session_context.get("intent", "")

    # ── LLM 분석 시도 ─────────────────────────────────────────────────────────
    llm_result = None
    if use_llm:
        llm_result = await analyze_with_llm(message, history, model=model)

    # LLM 결과 또는 규칙 기반 폴백
    if llm_result:
        intent     = llm_result.get("intent", "GENERAL_INQUIRY")
        confidence = llm_result.get("confidence", 0.5)
        entities   = llm_result.get("entities", {})
        llm_reply  = llm_result.get("reply", "")
        needs_clarif = llm_result.get("needs_clarification", False)
        clarif_prompt = llm_result.get("clarification_prompt")
    else:
        # 규칙 기반 폴백
        intent, confidence = classify_intent_rule(message)
        entities = extract_entities_rule(message)
        llm_reply = ""
        needs_clarif = False
        clarif_prompt = None

    # CLARIFICATION 상태: 이전 인텐트 유지
    if intent == "CLARIFICATION" and prev_intent:
        intent = prev_intent

    # 수집된 엔티티 업데이트 (None이 아닌 값만)
    for k, v in entities.items():
        if v and v != "null":
            collected[k] = v

    # 상태 머신 처리
    result = {
        "intent":     intent,
        "entities":   collected.copy(),
        "reply":      "",
        "needs_clarification": False,
        "clarification_prompt": None,
        "action":     "NONE",
        "sr_data":    None,
        "confidence": confidence,
    }

    if intent in ("SR_CREATE", "INCIDENT_REPORT", "DEPLOY_REQUEST"):
        # SR 관련 인텐트 처리
        result = await _handle_sr_flow(
            message, intent, collected, state, llm_reply, needs_clarif, clarif_prompt, result
        )
    elif intent == "SR_QUERY":
        result["reply"] = _handle_sr_query(message, collected)
    elif intent == "GENERAL_INQUIRY":
        result["reply"] = llm_reply or _REPLY_TEMPLATES["GENERAL_INQUIRY"]["default"]
    else:
        result["reply"] = llm_reply or "무슨 문제가 발생했나요? 자세히 말씀해 주세요."

    return result


async def _handle_sr_flow(
    message: str,
    intent: str,
    collected: Dict,
    state: str,
    llm_reply: str,
    needs_clarif: bool,
    clarif_prompt: Optional[str],
    result: Dict,
) -> Dict:
    """SR 관련 대화 흐름 처리."""
    # 긴급 인시던트는 즉시 접수
    if intent == "INCIDENT_REPORT":
        if "priority" not in collected or collected.get("priority") != "CRITICAL":
            collected["priority"] = "CRITICAL"
        if "description" not in collected:
            collected["description"] = message
        result["action"] = "CREATE_SR"
        result["sr_data"] = _build_sr_data(collected, intent)
        result["reply"] = llm_reply or (
            "🚨 긴급 인시던트로 접수합니다. "
            "담당자와 온콜 팀에 즉시 알림을 발송합니다."
        )
        return result

    # 일반 SR — 필수 정보 수집
    missing = [f for f in ["description", "priority"] if f not in collected or not collected.get(f)]

    if missing:
        # 첫 번째 미수집 필드에 대한 질문
        next_q = missing[0]
        question = clarif_prompt or _CLARIFICATION_QUESTIONS.get(next_q, "추가 정보를 알려주세요.")
        result["reply"] = llm_reply or _REPLY_TEMPLATES["SR_CREATE"]["need_info"].format(question=question)
        result["needs_clarification"] = True
        result["clarification_prompt"] = question
        return result

    # 모든 정보 수집 완료 → SR 생성 준비
    if state == "GATHERING":
        # 확인 요청
        title = build_sr_title(collected)
        result["reply"] = (
            llm_reply or
            _REPLY_TEMPLATES["SR_CREATE"]["confirm"].format(
                title=title,
                priority=collected.get("priority", "MEDIUM"),
                sr_type=collected.get("sr_type", "OTHER"),
            )
        )
        result["needs_clarification"] = True
        result["clarification_prompt"] = "접수를 진행할까요? (네/아니오)"
        return result

    # 확인 응답 처리
    confirm_positive = any(w in message.lower() for w in ["네", "예", "yes", "맞아", "확인", "진행", "ok"])
    confirm_negative = any(w in message.lower() for w in ["아니", "no", "취소", "수정", "다시"])

    if confirm_positive or state == "CONFIRMING":
        result["action"] = "CREATE_SR"
        result["sr_data"] = _build_sr_data(collected, intent)
        result["reply"] = ""  # 실제 SR ID는 라우터에서 채움
        return result
    elif confirm_negative:
        result["reply"] = "알겠습니다. 어떤 내용을 수정하시겠나요?"
        result["needs_clarification"] = True
        result["clarification_prompt"] = "수정할 내용을 말씀해 주세요."
        return result

    result["reply"] = llm_reply or "접수를 진행할까요? (네/아니오)"
    result["needs_clarification"] = True
    return result


def _build_sr_data(collected: Dict, intent: str) -> Dict:
    """수집된 엔티티로 SR 생성 데이터 빌드."""
    priority = collected.get("priority", "MEDIUM")
    if intent == "INCIDENT_REPORT":
        priority = "CRITICAL"

    sr_type_map = {
        "DEPLOY": "DEPLOY",
        "RESTART": "RESTART",
        "LOG": "LOG",
        "INCIDENT": "OTHER",
        "OTHER": "OTHER",
    }
    sr_type = sr_type_map.get(collected.get("sr_type", "OTHER"), "OTHER")

    server = collected.get("server", "")
    desc = collected.get("description", "자연어 챗봇 접수")
    title = build_sr_title(collected)

    return {
        "title":       title,
        "description": desc,
        "priority":    priority,
        "sr_type":     sr_type,
        "server_name": server,
        "source":      "chatbot",
    }


def _handle_sr_query(message: str, collected: Dict) -> str:
    """SR 조회 의도 처리."""
    sr_ref = collected.get("sr_ref")
    if sr_ref:
        return f"SR '{sr_ref}' 조회를 진행합니다. 잠시만 기다려 주세요."
    return _REPLY_TEMPLATES["SR_QUERY"]["default"]


def new_session_key() -> str:
    """새 세션 키 생성."""
    return str(uuid.uuid4()).replace("-", "")[:24]