guardia-itsm/core/learning.py

"""
Self-Improving Learning Loop — GUARDiA

4개 피드백 루프:
  1. RecurrenceTracker      — 재발 패턴 자동 감지 + Problem 티켓 격상
  2. SolutionEffectiveness  — KB 솔루션 효과 추적 + 검증된 룬북 승격
  3. AdaptiveThreshold      — 이상 탐지 임계값 자동 보정
  4. LessonMiner            — 패턴 마이닝 → 교훈 자동 생성
"""
from __future__ import annotations

import hashlib
import json
import logging
import re
import statistics
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple

from sqlalchemy import select, and_, desc, func
from sqlalchemy.ext.asyncio import AsyncSession

logger = logging.getLogger(__name__)

# ── 한국어/영어 불용어 ─────────────────────────────────────────────────────────

_STOPWORDS = {
    "이", "가", "을", "를", "의", "에", "는", "은", "그", "및", "또한",
    "the", "a", "an", "is", "are", "was", "were", "for", "with", "and",
    "sr", "요청", "작업", "처리", "확인", "수행", "필요", "발생",
}

_TECH_PATTERNS = [
    (r'\b(tomcat|jboss|weblogic|jeus|websphere)\b', "was"),
    (r'\b(nginx|apache|iis|webtob)\b', "web"),
    (r'\b(oracle|mysql|postgresql|mssql|tibero)\b', "db"),
    (r'\b(cpu|memory|메모리|heap|힙)\b', "resource"),
    (r'\b(disk|디스크|storage)\b', "disk"),
    (r'\b(deploy|배포|release|릴리즈)\b', "deploy"),
    (r'\b(restart|재기동|재시작)\b', "restart"),
    (r'\b(error|오류|에러|exception|예외)\b', "error"),
    (r'\b(timeout|타임아웃|연결|connection)\b', "network"),
    (r'\b(ssl|tls|인증서|certificate)\b', "ssl"),
    (r'\b(backup|백업)\b', "backup"),
    (r'\b(장애|incident|down|다운)\b', "incident"),
]


# ── 텍스트 지문 유틸 ───────────────────────────────────────────────────────────

def _tokenize(text: str) -> set:
    text = text.lower()
    tokens = re.split(r'[\s,;:.()[\]{}!?]+', text)
    return {t for t in tokens if len(t) >= 2 and t not in _STOPWORDS}


def _extract_tech_keywords(text: str) -> List[str]:
    text = text.lower()
    found = []
    for pattern, label in _TECH_PATTERNS:
        if re.search(pattern, text):
            found.append(label)
    return sorted(set(found))


def _jaccard(a: set, b: set) -> float:
    if not a or not b:
        return 0.0
    return len(a & b) / len(a | b)


def _make_pattern_key(inst_id: Optional[int], sr_type: str, keywords: List[str]) -> str:
    parts = [str(inst_id or "global"), sr_type] + sorted(keywords)
    return "|".join(parts)


def _hash_key(key: str) -> str:
    return hashlib.sha256(key.encode()).hexdigest()[:16]


# ═══════════════════════════════════════════════════════════════════════════════
# 1. RecurrenceTracker — 재발 패턴 감지
# ═══════════════════════════════════════════════════════════════════════════════

async def detect_recurrence(
    db: AsyncSession,
    sr_id: str,
    title: str,
    description: str,
    sr_type: str,
    inst_id: Optional[int] = None,
    escalate_threshold: int = 3,
    similarity_threshold: float = 0.35,
) -> Dict:
    """
    신규 SR과 유사한 재발 패턴이 있는지 감지한다.
    Returns: {
        "recurrence_found": bool,
        "pattern_id": int | None,
        "occurrence_count": int,
        "escalated": bool,
        "problem_id": str | None,
        "similar_sr_ids": list,
        "suggested_kb_ids": list,
    }
    """
    from models import RecurrencePattern, KBDocument

    text = f"{title} {description}"
    tech_kw = _extract_tech_keywords(text)
    tokens = _tokenize(text)

    pattern_key = _make_pattern_key(inst_id, sr_type, tech_kw)
    pattern_hash = _hash_key(pattern_key)

    # 동일 패턴 해시 먼저 검색
    existing = (await db.execute(
        select(RecurrencePattern).where(RecurrencePattern.pattern_hash == pattern_hash)
    )).scalars().first()

    # 해시 미일치 → 유사도 기반 폴백 검색 (최근 90일)
    if not existing:
        since = datetime.utcnow() - timedelta(days=90)
        candidates = (await db.execute(
            select(RecurrencePattern).where(
                and_(
                    RecurrencePattern.last_seen_at >= since,
                    RecurrencePattern.sr_type == sr_type,
                )
            ).order_by(desc(RecurrencePattern.occurrence_count)).limit(50)
        )).scalars().all()

        for cand in candidates:
            sig_tokens = _tokenize(cand.keyword_signature or "")
            sim = _jaccard(tokens, sig_tokens)
            if sim >= similarity_threshold:
                existing = cand
                break

    escalated = False
    problem_id = None

    if existing:
        # 재발 카운트 업데이트
        sr_ids = existing.sr_ids or []
        if sr_id not in sr_ids:
            sr_ids.append(sr_id)
        existing.sr_ids = sr_ids
        existing.occurrence_count = len(sr_ids)
        existing.last_seen_at = datetime.utcnow()
        await db.flush()

        # 임계값 초과 시 Problem 티켓 자동 생성
        if existing.occurrence_count >= escalate_threshold and not existing.escalated:
            pid = await _escalate_to_problem(db, existing, title, sr_type, inst_id)
            existing.escalated = True
            existing.problem_id = pid
            escalated = True
            problem_id = pid
            await db.flush()

        await db.commit()

        # 연관 KB 제안
        suggested_kbs = await _find_kb_for_pattern(db, existing.keyword_signature or text)

        return {
            "recurrence_found": True,
            "pattern_id": existing.id,
            "occurrence_count": existing.occurrence_count,
            "escalated": escalated,
            "problem_id": problem_id,
            "similar_sr_ids": (existing.sr_ids or [])[-5:],
            "suggested_kb_ids": suggested_kbs,
        }

    # 신규 패턴 등록
    new_pattern = RecurrencePattern(
        pattern_hash      = pattern_hash,
        pattern_key       = pattern_key[:200],
        sr_type           = sr_type,
        inst_id           = inst_id,
        keyword_signature = " ".join(sorted(tokens)[:30]),
        tech_keywords     = ",".join(tech_kw),
        occurrence_count  = 1,
        first_seen_at     = datetime.utcnow(),
        last_seen_at      = datetime.utcnow(),
        sr_ids            = [sr_id],
        escalated         = False,
    )
    db.add(new_pattern)
    await db.commit()
    await db.refresh(new_pattern)

    return {
        "recurrence_found": False,
        "pattern_id": new_pattern.id,
        "occurrence_count": 1,
        "escalated": False,
        "problem_id": None,
        "similar_sr_ids": [],
        "suggested_kb_ids": [],
    }


async def _escalate_to_problem(
    db: AsyncSession,
    pattern: "RecurrencePattern",
    title: str,
    sr_type: str,
    inst_id: Optional[int],
) -> Optional[str]:
    """재발 패턴을 Problem 티켓으로 자동 격상."""
    try:
        from models import ProblemRecord

        today = datetime.utcnow().strftime("%Y%m%d")
        prefix = f"PRB-{today}-"
        last = (await db.execute(
            select(ProblemRecord.prb_id)
            .where(ProblemRecord.prb_id.like(f"{prefix}%"))
            .order_by(desc(ProblemRecord.prb_id))
            .limit(1)
        )).scalar()
        seq = int(last.split("-")[-1]) + 1 if last else 1
        prb_id = f"{prefix}{seq:04d}"

        sr_count = pattern.occurrence_count
        prb = ProblemRecord(
            prb_id      = prb_id,
            title       = f"[자동감지] 반복 장애: {title[:80]}",
            description = (
                f"학습 엔진이 동일 유형 SR이 {sr_count}회 반복됨을 자동 감지했습니다.\n"
                f"SR 유형: {sr_type}\n"
                f"연관 SR: {', '.join((pattern.sr_ids or [])[-5:])}\n"
                f"기술 키워드: {pattern.tech_keywords or '-'}\n\n"
                f"근본 원인 분석(RCA) 후 영구 조치가 필요합니다."
            ),
            status      = "OPEN",
            priority    = "HIGH",
            inst_id     = inst_id,
            created_at  = datetime.utcnow(),
            source      = "learning-engine",
        )
        db.add(prb)
        await db.flush()
        logger.info("재발 패턴 → Problem 티켓 생성: %s (SR %d회)", prb_id, sr_count)
        return prb_id
    except Exception as e:
        logger.warning("Problem 티켓 자동 생성 실패: %s", e)
        return None


async def _find_kb_for_pattern(
    db: AsyncSession,
    keyword_text: str,
    limit: int = 3,
) -> List[int]:
    """패턴 키워드와 유사한 KB 문서 ID 목록 반환."""
    try:
        from models import KBDocument
        tokens = _tokenize(keyword_text)
        rows = (await db.execute(
            select(KBDocument)
            .order_by(desc(KBDocument.created_at))
            .limit(100)
        )).scalars().all()

        scored = []
        for doc in rows:
            doc_tokens = _tokenize(f"{doc.title} {doc.symptoms or ''} {doc.tags or ''}")
            sim = _jaccard(tokens, doc_tokens)
            if sim > 0.15:
                scored.append((sim, doc.id))

        scored.sort(reverse=True)
        return [doc_id for _, doc_id in scored[:limit]]
    except Exception:
        return []


# ═══════════════════════════════════════════════════════════════════════════════
# 2. SolutionEffectivenessTracker — KB 솔루션 효과 추적
# ═══════════════════════════════════════════════════════════════════════════════

async def record_kb_usage(
    db: AsyncSession,
    sr_id: str,
    kb_doc_id: str,
    kb_id: Optional[int] = None,
) -> Dict:
    """SR 해결에 사용된 KB를 기록한다."""
    from models import SolutionFeedback

    # 이미 기록된 경우 중복 방지
    existing = (await db.execute(
        select(SolutionFeedback).where(
            and_(
                SolutionFeedback.sr_id == sr_id,
                SolutionFeedback.kb_doc_id == kb_doc_id,
            )
        )
    )).scalars().first()

    if existing:
        return {"recorded": False, "reason": "이미 기록됨", "feedback_id": existing.id}

    fb = SolutionFeedback(
        sr_id       = sr_id,
        kb_id       = kb_id,
        kb_doc_id   = kb_doc_id,
        applied_at  = datetime.utcnow(),
        resolved    = True,
    )
    db.add(fb)
    await db.commit()
    await db.refresh(fb)
    logger.info("KB 사용 기록: SR=%s KB=%s", sr_id, kb_doc_id)
    return {"recorded": True, "feedback_id": fb.id}


async def check_solution_effectiveness(
    db: AsyncSession,
    feedback_id: int,
    recurrence_window_days: int = 7,
) -> Dict:
    """
    피드백 기록 이후 같은 패턴이 재발했는지 확인하고 효과 점수를 업데이트한다.
    자동 프로모션: KBDocument.effectiveness_score >= 10 → LessonLearned 생성
    """
    from models import SolutionFeedback, SRRequest, KBDocument

    fb = (await db.execute(
        select(SolutionFeedback).where(SolutionFeedback.id == feedback_id)
    )).scalars().first()

    if not fb:
        return {"error": "feedback not found"}

    sr = (await db.execute(
        select(SRRequest).where(SRRequest.sr_id == fb.sr_id)
    )).scalars().first()

    if not sr:
        return {"error": "SR not found"}

    # 해결 후 recurrence_window_days 내 같은 기관 동일 유형 SR 재발 확인
    since = fb.applied_at
    until = since + timedelta(days=recurrence_window_days)
    recurred = (await db.execute(
        select(SRRequest).where(
            and_(
                SRRequest.sr_id != fb.sr_id,
                SRRequest.inst_id == sr.inst_id,
                SRRequest.sr_type == sr.sr_type,
                SRRequest.created_at >= since,
                SRRequest.created_at <= until,
            )
        )
    )).scalars().first()

    if recurred:
        fb.recurred_within_days = (recurred.created_at - since).days
        fb.effectiveness_score = -1
    else:
        fb.recurred_within_days = None
        fb.effectiveness_score = 1

    fb.checked_at = datetime.utcnow()
    await db.flush()

    # KB 문서 효과 점수 누적
    score_delta = fb.effectiveness_score
    if fb.kb_doc_id:
        kb_doc = (await db.execute(
            select(KBDocument).where(KBDocument.doc_id == fb.kb_doc_id)
        )).scalars().first()
        if kb_doc:
            current = getattr(kb_doc, "effectiveness_score", 0) or 0
            kb_doc.effectiveness_score = current + score_delta
            await db.flush()

            # 자동 프로모션 체크
            if kb_doc.effectiveness_score >= 10:
                await _promote_kb_to_lesson(db, kb_doc)

    await db.commit()

    return {
        "feedback_id": feedback_id,
        "sr_id": fb.sr_id,
        "kb_doc_id": fb.kb_doc_id,
        "effectiveness_score": fb.effectiveness_score,
        "recurred": recurred is not None,
        "recurred_within_days": fb.recurred_within_days,
    }


async def _promote_kb_to_lesson(db: AsyncSession, kb_doc) -> Optional[Dict]:
    """KB 효과 점수 10 이상 → 검증된 룬북(LessonLearned)으로 자동 승격."""
    from models import LessonLearned

    # 이미 승격됐으면 스킵
    existing = (await db.execute(
        select(LessonLearned).where(LessonLearned.promoted_from_kb_id == kb_doc.id)
    )).scalars().first()
    if existing:
        return None

    today = datetime.utcnow().strftime("%Y%m%d")
    prefix = f"LESSON-{today}-"
    last = (await db.execute(
        select(LessonLearned.lesson_id)
        .where(LessonLearned.lesson_id.like(f"{prefix}%"))
        .order_by(desc(LessonLearned.lesson_id))
        .limit(1)
    )).scalar()
    seq = int(last.split("-")[-1]) + 1 if last else 1
    lesson_id = f"{prefix}{seq:04d}"

    lesson = LessonLearned(
        lesson_id            = lesson_id,
        title                = f"[검증됨] {kb_doc.title}",
        category             = kb_doc.category or "일반",
        problem_pattern      = kb_doc.symptoms or "",
        root_cause           = kb_doc.cause or "",
        effective_solution   = kb_doc.solution or "",
        prevention           = "",
        confidence_score     = kb_doc.effectiveness_score,
        source_kb_ids        = [kb_doc.id],
        source_sr_ids        = ([kb_doc.source_sr_id] if kb_doc.source_sr_id else []),
        promoted_from_kb_id  = kb_doc.id,
        is_verified          = True,
        created_at           = datetime.utcnow(),
    )
    db.add(lesson)
    await db.flush()
    logger.info("KB %d → LessonLearned %s 자동 승격 (효과점수=%d)",
                kb_doc.id, lesson_id, kb_doc.effectiveness_score)
    return {"lesson_id": lesson_id, "kb_id": kb_doc.id}


# ═══════════════════════════════════════════════════════════════════════════════
# 3. AdaptiveThresholdCalibrator — 이상 탐지 임계값 자동 보정
# ═══════════════════════════════════════════════════════════════════════════════

# 임계값 보정 경계 (기본값의 50%~200% 범위)
_THRESHOLD_MIN_RATIO = 0.5
_THRESHOLD_MAX_RATIO = 2.0
_FALSE_POSITIVE_TRIGGER = 0.25   # 오탐률 25% 초과 시 임계값 상향
_MISSED_TRIGGER = 3              # 누락 탐지 3회 초과 시 임계값 하향
_ADJUSTMENT_STEP = 0.08          # 1회 조정 폭 8%


async def record_anomaly_outcome(
    db: AsyncSession,
    source: str,
    metric_type: str,
    was_actual_incident: bool,
    base_threshold: Optional[float] = None,
) -> Dict:
    """
    이상 탐지 이벤트가 실제 장애로 이어졌는지 기록한다.
    was_actual_incident=True → True Positive
    was_actual_incident=False → False Positive (오탐)
    """
    from models import AdaptiveThreshold
    from core.anomaly import DEFAULT_THRESHOLDS

    rec = (await db.execute(
        select(AdaptiveThreshold).where(
            and_(
                AdaptiveThreshold.source == source,
                AdaptiveThreshold.metric_type == metric_type,
            )
        )
    )).scalars().first()

    if not rec:
        # 기본 임계값 참조
        if base_threshold is None:
            info = DEFAULT_THRESHOLDS.get(metric_type)
            base_threshold = info[0] if info else 90.0

        rec = AdaptiveThreshold(
            source             = source,
            metric_type        = metric_type,
            base_threshold     = base_threshold,
            adapted_threshold  = base_threshold,
            true_positive      = 0,
            false_positive     = 0,
            missed_count       = 0,
            adaptation_count   = 0,
            created_at         = datetime.utcnow(),
        )
        db.add(rec)
        await db.flush()

    if was_actual_incident:
        rec.true_positive += 1
    else:
        rec.false_positive += 1

    await db.commit()
    return {"source": source, "metric_type": metric_type,
            "true_positive": rec.true_positive, "false_positive": rec.false_positive}


async def record_missed_detection(
    db: AsyncSession,
    source: str,
    metric_type: str,
) -> Dict:
    """실제 장애였는데 탐지 못한 경우(누락 탐지)를 기록한다."""
    from models import AdaptiveThreshold
    from core.anomaly import DEFAULT_THRESHOLDS

    rec = (await db.execute(
        select(AdaptiveThreshold).where(
            and_(
                AdaptiveThreshold.source == source,
                AdaptiveThreshold.metric_type == metric_type,
            )
        )
    )).scalars().first()

    if not rec:
        info = DEFAULT_THRESHOLDS.get(metric_type)
        base = info[0] if info else 90.0
        rec = AdaptiveThreshold(
            source=source, metric_type=metric_type,
            base_threshold=base, adapted_threshold=base,
            true_positive=0, false_positive=0, missed_count=0,
            adaptation_count=0, created_at=datetime.utcnow(),
        )
        db.add(rec)
        await db.flush()

    rec.missed_count += 1
    await db.commit()
    return {"source": source, "metric_type": metric_type, "missed_count": rec.missed_count}


async def calibrate_threshold(
    db: AsyncSession,
    source: str,
    metric_type: str,
) -> Dict:
    """
    누적된 오탐/누락 데이터를 기반으로 임계값을 자동 보정한다.
    Returns: {"adjusted": bool, "old_threshold": float, "new_threshold": float, "reason": str}
    """
    from models import AdaptiveThreshold

    rec = (await db.execute(
        select(AdaptiveThreshold).where(
            and_(
                AdaptiveThreshold.source == source,
                AdaptiveThreshold.metric_type == metric_type,
            )
        )
    )).scalars().first()

    if not rec:
        return {"adjusted": False, "reason": "기록 없음"}

    total = rec.true_positive + rec.false_positive
    old_thr = rec.adapted_threshold
    new_thr = old_thr
    reason = "변경 없음"

    if total >= 5:
        fp_rate = rec.false_positive / total
        if fp_rate > _FALSE_POSITIVE_TRIGGER:
            # 오탐 많음 → 임계값 상향 (더 높은 값에서만 알림)
            new_thr = old_thr * (1 + _ADJUSTMENT_STEP)
            reason = f"오탐률 {fp_rate:.0%} > {_FALSE_POSITIVE_TRIGGER:.0%} → 임계값 상향"

    if rec.missed_count > _MISSED_TRIGGER:
        # 누락 탐지 많음 → 임계값 하향 (더 민감하게)
        new_thr = old_thr * (1 - _ADJUSTMENT_STEP)
        reason = f"누락 탐지 {rec.missed_count}회 > {_MISSED_TRIGGER}회 → 임계값 하향"

    # 범위 제한
    base = rec.base_threshold
    new_thr = max(base * _THRESHOLD_MIN_RATIO, min(base * _THRESHOLD_MAX_RATIO, new_thr))
    new_thr = round(new_thr, 2)

    if abs(new_thr - old_thr) < 0.01:
        return {"adjusted": False, "old_threshold": old_thr,
                "new_threshold": new_thr, "reason": reason}

    rec.adapted_threshold = new_thr
    rec.last_adapted_at   = datetime.utcnow()
    rec.adaptation_count  += 1
    # 보정 후 카운터 부분 리셋 (누적 오염 방지)
    rec.false_positive = rec.false_positive // 2
    rec.true_positive  = rec.true_positive // 2
    rec.missed_count   = 0
    await db.commit()

    logger.info("임계값 보정: %s/%s %.2f → %.2f (%s)", source, metric_type, old_thr, new_thr, reason)
    return {
        "adjusted": True,
        "source": source,
        "metric_type": metric_type,
        "old_threshold": old_thr,
        "new_threshold": new_thr,
        "reason": reason,
    }


async def get_adapted_threshold(
    db: AsyncSession,
    source: str,
    metric_type: str,
) -> Optional[float]:
    """현재 적응형 임계값 조회 (없으면 None — 기본값 사용)."""
    from models import AdaptiveThreshold
    rec = (await db.execute(
        select(AdaptiveThreshold.adapted_threshold).where(
            and_(
                AdaptiveThreshold.source == source,
                AdaptiveThreshold.metric_type == metric_type,
            )
        )
    )).scalar()
    return rec


# ═══════════════════════════════════════════════════════════════════════════════
# 4. LessonMiner — 패턴 마이닝 & 교훈 자동 생성
# ═══════════════════════════════════════════════════════════════════════════════

async def run_lesson_mining(
    db: AsyncSession,
    days_back: int = 30,
    min_occurrences: int = 3,
) -> Dict:
    """
    최근 N일 재발 패턴을 분석하여 교훈(LessonLearned)을 자동 생성한다.
    Returns: {"patterns_analyzed": int, "lessons_created": int, "lessons_updated": int}
    """
    from models import RecurrencePattern, SolutionFeedback, KBDocument, LessonLearned

    since = datetime.utcnow() - timedelta(days=days_back)
    patterns = (await db.execute(
        select(RecurrencePattern).where(
            and_(
                RecurrencePattern.last_seen_at >= since,
                RecurrencePattern.occurrence_count >= min_occurrences,
            )
        ).order_by(desc(RecurrencePattern.occurrence_count))
    )).scalars().all()

    created = 0
    updated = 0

    for pat in patterns:
        sr_ids = pat.sr_ids or []
        if not sr_ids:
            continue

        # 이 패턴의 SR들에서 사용된 KB 중 가장 효과적인 것 선택
        best_kb = await _find_best_kb_for_pattern_srs(db, sr_ids)

        # 기존 Lesson 확인
        existing_lesson = (await db.execute(
            select(LessonLearned).where(
                LessonLearned.lesson_id.like(f"%-PAT{pat.id}")
            )
        )).scalars().first()

        if existing_lesson:
            # 업데이트
            existing_lesson.confidence_score = pat.occurrence_count
            existing_lesson.source_sr_ids    = sr_ids[-10:]
            if best_kb:
                existing_lesson.effective_solution = best_kb.get("solution", existing_lesson.effective_solution)
            existing_lesson.updated_at = datetime.utcnow()
            updated += 1
        else:
            # 신규 생성
            today = datetime.utcnow().strftime("%Y%m%d")
            lesson_id = f"LESSON-{today}-PAT{pat.id}"

            solution_text = ""
            root_cause_text = ""
            if best_kb:
                solution_text   = best_kb.get("solution", "")
                root_cause_text = best_kb.get("cause", "")

            lesson = LessonLearned(
                lesson_id          = lesson_id,
                title              = f"[반복패턴] {pat.sr_type} 재발 {pat.occurrence_count}회 — {(pat.tech_keywords or '').replace(',', '/')}",
                category           = _infer_category(pat.tech_keywords or ""),
                problem_pattern    = pat.keyword_signature or "",
                root_cause         = root_cause_text or "반복 패턴 기반 분석 중",
                effective_solution = solution_text or "KB 문서 참조 또는 전문가 조치 필요",
                prevention         = f"동일 유형({pat.sr_type}) SR이 {pat.occurrence_count}회 반복됨. 근본 원인 제거 필요.",
                confidence_score   = pat.occurrence_count,
                source_kb_ids      = ([best_kb["id"]] if best_kb else []),
                source_sr_ids      = sr_ids[-10:],
                is_verified        = False,
                created_at         = datetime.utcnow(),
            )
            db.add(lesson)
            created += 1

    # pending 피드백 효과 일괄 검증
    await _batch_check_pending_feedbacks(db)

    # 임계값 일괄 보정
    calibration_results = await _batch_calibrate_thresholds(db)

    await db.commit()

    return {
        "patterns_analyzed": len(patterns),
        "lessons_created":   created,
        "lessons_updated":   updated,
        "thresholds_calibrated": calibration_results,
        "run_at": datetime.utcnow().isoformat(),
    }


async def _find_best_kb_for_pattern_srs(
    db: AsyncSession,
    sr_ids: List[str],
) -> Optional[Dict]:
    """패턴의 SR들에서 가장 효과적으로 사용된 KB 반환."""
    from models import SolutionFeedback, KBDocument

    feedbacks = (await db.execute(
        select(SolutionFeedback).where(
            and_(
                SolutionFeedback.sr_id.in_(sr_ids),
                SolutionFeedback.effectiveness_score > 0,
                SolutionFeedback.kb_doc_id.isnot(None),
            )
        ).order_by(desc(SolutionFeedback.effectiveness_score))
        .limit(1)
    )).scalars().first()

    if not feedbacks or not feedbacks.kb_doc_id:
        return None

    kb = (await db.execute(
        select(KBDocument).where(KBDocument.doc_id == feedbacks.kb_doc_id)
    )).scalars().first()

    if not kb:
        return None

    return {
        "id": kb.id,
        "doc_id": kb.doc_id,
        "title": kb.title,
        "solution": kb.solution,
        "cause": kb.cause,
    }


async def _batch_check_pending_feedbacks(db: AsyncSession) -> int:
    """효과 미검증 피드백을 일괄 처리한다."""
    from models import SolutionFeedback

    cutoff = datetime.utcnow() - timedelta(days=7)
    pending = (await db.execute(
        select(SolutionFeedback).where(
            and_(
                SolutionFeedback.checked_at.is_(None),
                SolutionFeedback.applied_at <= cutoff,
            )
        ).limit(50)
    )).scalars().all()

    checked = 0
    for fb in pending:
        try:
            await check_solution_effectiveness(db, fb.id)
            checked += 1
        except Exception as e:
            logger.debug("피드백 %d 검증 실패: %s", fb.id, e)

    return checked


async def _batch_calibrate_thresholds(db: AsyncSession) -> int:
    """적응형 임계값 일괄 보정."""
    from models import AdaptiveThreshold

    recs = (await db.execute(
        select(AdaptiveThreshold)
        .where(
            (AdaptiveThreshold.true_positive + AdaptiveThreshold.false_positive) >= 5
        )
        .limit(100)
    )).scalars().all()

    calibrated = 0
    for rec in recs:
        result = await calibrate_threshold(db, rec.source, rec.metric_type)
        if result.get("adjusted"):
            calibrated += 1

    return calibrated


def _infer_category(tech_keywords: str) -> str:
    kw = tech_keywords.lower()
    if "deploy" in kw:     return "배포"
    if "was" in kw:        return "WAS"
    if "db" in kw:         return "DB"
    if "web" in kw:        return "웹서버"
    if "resource" in kw:   return "서버 운영"
    if "network" in kw:    return "네트워크"
    if "ssl" in kw:        return "보안"
    if "incident" in kw:   return "장애 대응"
    return "일반"


# ═══════════════════════════════════════════════════════════════════════════════
# 통계 요약
# ═══════════════════════════════════════════════════════════════════════════════

async def get_learning_stats(db: AsyncSession) -> Dict:
    """학습 엔진 현황 통계 요약."""
    from models import (
        RecurrencePattern, SolutionFeedback, AdaptiveThreshold, LessonLearned, KBDocument
    )

    pattern_count = (await db.execute(
        select(func.count()).select_from(RecurrencePattern)
    )).scalar() or 0

    escalated_count = (await db.execute(
        select(func.count()).select_from(RecurrencePattern)
        .where(RecurrencePattern.escalated == True)
    )).scalar() or 0

    feedback_count = (await db.execute(
        select(func.count()).select_from(SolutionFeedback)
    )).scalar() or 0

    effective_count = (await db.execute(
        select(func.count()).select_from(SolutionFeedback)
        .where(SolutionFeedback.effectiveness_score > 0)
    )).scalar() or 0

    lesson_count = (await db.execute(
        select(func.count()).select_from(LessonLearned)
    )).scalar() or 0

    verified_lesson_count = (await db.execute(
        select(func.count()).select_from(LessonLearned)
        .where(LessonLearned.is_verified == True)
    )).scalar() or 0

    threshold_count = (await db.execute(
        select(func.count()).select_from(AdaptiveThreshold)
    )).scalar() or 0

    adapted_count = (await db.execute(
        select(func.count()).select_from(AdaptiveThreshold)
        .where(AdaptiveThreshold.adaptation_count > 0)
    )).scalar() or 0

    # 가장 자주 재발하는 패턴 Top 5
    top_patterns = (await db.execute(
        select(RecurrencePattern)
        .order_by(desc(RecurrencePattern.occurrence_count))
        .limit(5)
    )).scalars().all()

    return {
        "recurrence": {
            "total_patterns":    pattern_count,
            "escalated":         escalated_count,
            "top_recurring": [
                {
                    "id":           p.id,
                    "sr_type":      p.sr_type,
                    "count":        p.occurrence_count,
                    "tech_keywords": p.tech_keywords,
                    "last_seen":    p.last_seen_at.isoformat() if p.last_seen_at else None,
                }
                for p in top_patterns
            ],
        },
        "solution_effectiveness": {
            "total_feedbacks":   feedback_count,
            "effective":         effective_count,
            "effectiveness_rate": round(effective_count / feedback_count, 3) if feedback_count else 0,
        },
        "lessons": {
            "total":    lesson_count,
            "verified": verified_lesson_count,
        },
        "adaptive_thresholds": {
            "total_tracked":  threshold_count,
            "adapted":        adapted_count,
        },
        "generated_at": datetime.utcnow().isoformat(),
    }