G-1: 메신저 Webhook Relay + _send_to_room 실제 httpx 호출 구현 G-2: POST /api/tasks/bulk SR 대량작업 엔드포인트 (최대 100건) G-3: 라이선스 만료 알림 스케줄러 (매일 09:00 KST) G-4: 체험판 upgrade_banner 필드 + license.py 배너 로직 G-5: core/auto_rca.py + incidents/problem auto-rca 엔드포인트 G-6: core/deploy_impact.py + vibe impact-analysis 엔드포인트 G-7: core/ticket_classifier.py + SR 생성 시 AI 분류 + ai-suggestion API G-8: VulnPatchRecord 모델 + vuln_scan 패치추적 4개 엔드포인트 G-9: core/jira_sync.py + gateway Jira/Confluence 연동 엔드포인트 G-10: core/push_notify.py + routers/push.py + PushSubscription 모델 G-11: approvals 다중승인 (위임/서명/기한초과/마감연장) G-12: alembic.ini + migrations/ + cicd/migrate_to_postgres.sh 하네스: guardia-orchestrator 확장기능 Phase 반영 봇명령어: /sr /status /license /bulk 슬래시 명령어 추가 설치스크립트: setup/ (Ubuntu, CentOS, RHEL, Windows) --test 옵션 포함 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
906 lines
32 KiB
Python
906 lines
32 KiB
Python
"""
|
|
Self-Improving Learning Loop — GUARDiA
|
|
|
|
4개 피드백 루프:
|
|
1. RecurrenceTracker — 재발 패턴 자동 감지 + Problem 티켓 격상
|
|
2. SolutionEffectiveness — KB 솔루션 효과 추적 + 검증된 룬북 승격
|
|
3. AdaptiveThreshold — 이상 탐지 임계값 자동 보정
|
|
4. LessonMiner — 패턴 마이닝 → 교훈 자동 생성
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import json
|
|
import logging
|
|
import re
|
|
import statistics
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
from sqlalchemy import select, and_, desc, func
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ── 한국어/영어 불용어 ─────────────────────────────────────────────────────────
|
|
|
|
_STOPWORDS = {
|
|
"이", "가", "을", "를", "의", "에", "는", "은", "그", "및", "또한",
|
|
"the", "a", "an", "is", "are", "was", "were", "for", "with", "and",
|
|
"sr", "요청", "작업", "처리", "확인", "수행", "필요", "발생",
|
|
}
|
|
|
|
_TECH_PATTERNS = [
|
|
(r'\b(tomcat|jboss|weblogic|jeus|websphere)\b', "was"),
|
|
(r'\b(nginx|apache|iis|webtob)\b', "web"),
|
|
(r'\b(oracle|mysql|postgresql|mssql|tibero)\b', "db"),
|
|
(r'\b(cpu|memory|메모리|heap|힙)\b', "resource"),
|
|
(r'\b(disk|디스크|storage)\b', "disk"),
|
|
(r'\b(deploy|배포|release|릴리즈)\b', "deploy"),
|
|
(r'\b(restart|재기동|재시작)\b', "restart"),
|
|
(r'\b(error|오류|에러|exception|예외)\b', "error"),
|
|
(r'\b(timeout|타임아웃|연결|connection)\b', "network"),
|
|
(r'\b(ssl|tls|인증서|certificate)\b', "ssl"),
|
|
(r'\b(backup|백업)\b', "backup"),
|
|
(r'\b(장애|incident|down|다운)\b', "incident"),
|
|
]
|
|
|
|
|
|
# ── 텍스트 지문 유틸 ───────────────────────────────────────────────────────────
|
|
|
|
def _tokenize(text: str) -> set:
|
|
text = text.lower()
|
|
tokens = re.split(r'[\s,;:.()[\]{}!?]+', text)
|
|
return {t for t in tokens if len(t) >= 2 and t not in _STOPWORDS}
|
|
|
|
|
|
def _extract_tech_keywords(text: str) -> List[str]:
|
|
text = text.lower()
|
|
found = []
|
|
for pattern, label in _TECH_PATTERNS:
|
|
if re.search(pattern, text):
|
|
found.append(label)
|
|
return sorted(set(found))
|
|
|
|
|
|
def _jaccard(a: set, b: set) -> float:
|
|
if not a or not b:
|
|
return 0.0
|
|
return len(a & b) / len(a | b)
|
|
|
|
|
|
def _make_pattern_key(inst_id: Optional[int], sr_type: str, keywords: List[str]) -> str:
|
|
parts = [str(inst_id or "global"), sr_type] + sorted(keywords)
|
|
return "|".join(parts)
|
|
|
|
|
|
def _hash_key(key: str) -> str:
|
|
return hashlib.sha256(key.encode()).hexdigest()[:16]
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# 1. RecurrenceTracker — 재발 패턴 감지
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
async def detect_recurrence(
|
|
db: AsyncSession,
|
|
sr_id: str,
|
|
title: str,
|
|
description: str,
|
|
sr_type: str,
|
|
inst_id: Optional[int] = None,
|
|
escalate_threshold: int = 3,
|
|
similarity_threshold: float = 0.35,
|
|
) -> Dict:
|
|
"""
|
|
신규 SR과 유사한 재발 패턴이 있는지 감지한다.
|
|
Returns: {
|
|
"recurrence_found": bool,
|
|
"pattern_id": int | None,
|
|
"occurrence_count": int,
|
|
"escalated": bool,
|
|
"problem_id": str | None,
|
|
"similar_sr_ids": list,
|
|
"suggested_kb_ids": list,
|
|
}
|
|
"""
|
|
from models import RecurrencePattern, KBDocument
|
|
|
|
text = f"{title} {description}"
|
|
tech_kw = _extract_tech_keywords(text)
|
|
tokens = _tokenize(text)
|
|
|
|
pattern_key = _make_pattern_key(inst_id, sr_type, tech_kw)
|
|
pattern_hash = _hash_key(pattern_key)
|
|
|
|
# 동일 패턴 해시 먼저 검색
|
|
existing = (await db.execute(
|
|
select(RecurrencePattern).where(RecurrencePattern.pattern_hash == pattern_hash)
|
|
)).scalars().first()
|
|
|
|
# 해시 미일치 → 유사도 기반 폴백 검색 (최근 90일)
|
|
if not existing:
|
|
since = datetime.utcnow() - timedelta(days=90)
|
|
candidates = (await db.execute(
|
|
select(RecurrencePattern).where(
|
|
and_(
|
|
RecurrencePattern.last_seen_at >= since,
|
|
RecurrencePattern.sr_type == sr_type,
|
|
)
|
|
).order_by(desc(RecurrencePattern.occurrence_count)).limit(50)
|
|
)).scalars().all()
|
|
|
|
for cand in candidates:
|
|
sig_tokens = _tokenize(cand.keyword_signature or "")
|
|
sim = _jaccard(tokens, sig_tokens)
|
|
if sim >= similarity_threshold:
|
|
existing = cand
|
|
break
|
|
|
|
escalated = False
|
|
problem_id = None
|
|
|
|
if existing:
|
|
# 재발 카운트 업데이트
|
|
sr_ids = existing.sr_ids or []
|
|
if sr_id not in sr_ids:
|
|
sr_ids.append(sr_id)
|
|
existing.sr_ids = sr_ids
|
|
existing.occurrence_count = len(sr_ids)
|
|
existing.last_seen_at = datetime.utcnow()
|
|
await db.flush()
|
|
|
|
# 임계값 초과 시 Problem 티켓 자동 생성
|
|
if existing.occurrence_count >= escalate_threshold and not existing.escalated:
|
|
pid = await _escalate_to_problem(db, existing, title, sr_type, inst_id)
|
|
existing.escalated = True
|
|
existing.problem_id = pid
|
|
escalated = True
|
|
problem_id = pid
|
|
await db.flush()
|
|
|
|
await db.commit()
|
|
|
|
# 연관 KB 제안
|
|
suggested_kbs = await _find_kb_for_pattern(db, existing.keyword_signature or text)
|
|
|
|
return {
|
|
"recurrence_found": True,
|
|
"pattern_id": existing.id,
|
|
"occurrence_count": existing.occurrence_count,
|
|
"escalated": escalated,
|
|
"problem_id": problem_id,
|
|
"similar_sr_ids": (existing.sr_ids or [])[-5:],
|
|
"suggested_kb_ids": suggested_kbs,
|
|
}
|
|
|
|
# 신규 패턴 등록
|
|
new_pattern = RecurrencePattern(
|
|
pattern_hash = pattern_hash,
|
|
pattern_key = pattern_key[:200],
|
|
sr_type = sr_type,
|
|
inst_id = inst_id,
|
|
keyword_signature = " ".join(sorted(tokens)[:30]),
|
|
tech_keywords = ",".join(tech_kw),
|
|
occurrence_count = 1,
|
|
first_seen_at = datetime.utcnow(),
|
|
last_seen_at = datetime.utcnow(),
|
|
sr_ids = [sr_id],
|
|
escalated = False,
|
|
)
|
|
db.add(new_pattern)
|
|
await db.commit()
|
|
await db.refresh(new_pattern)
|
|
|
|
return {
|
|
"recurrence_found": False,
|
|
"pattern_id": new_pattern.id,
|
|
"occurrence_count": 1,
|
|
"escalated": False,
|
|
"problem_id": None,
|
|
"similar_sr_ids": [],
|
|
"suggested_kb_ids": [],
|
|
}
|
|
|
|
|
|
async def _escalate_to_problem(
|
|
db: AsyncSession,
|
|
pattern: "RecurrencePattern",
|
|
title: str,
|
|
sr_type: str,
|
|
inst_id: Optional[int],
|
|
) -> Optional[str]:
|
|
"""재발 패턴을 Problem 티켓으로 자동 격상."""
|
|
try:
|
|
from models import ProblemRecord
|
|
|
|
today = datetime.utcnow().strftime("%Y%m%d")
|
|
prefix = f"PRB-{today}-"
|
|
last = (await db.execute(
|
|
select(ProblemRecord.prb_id)
|
|
.where(ProblemRecord.prb_id.like(f"{prefix}%"))
|
|
.order_by(desc(ProblemRecord.prb_id))
|
|
.limit(1)
|
|
)).scalar()
|
|
seq = int(last.split("-")[-1]) + 1 if last else 1
|
|
prb_id = f"{prefix}{seq:04d}"
|
|
|
|
sr_count = pattern.occurrence_count
|
|
prb = ProblemRecord(
|
|
prb_id = prb_id,
|
|
title = f"[자동감지] 반복 장애: {title[:80]}",
|
|
description = (
|
|
f"학습 엔진이 동일 유형 SR이 {sr_count}회 반복됨을 자동 감지했습니다.\n"
|
|
f"SR 유형: {sr_type}\n"
|
|
f"연관 SR: {', '.join((pattern.sr_ids or [])[-5:])}\n"
|
|
f"기술 키워드: {pattern.tech_keywords or '-'}\n\n"
|
|
f"근본 원인 분석(RCA) 후 영구 조치가 필요합니다."
|
|
),
|
|
status = "OPEN",
|
|
priority = "HIGH",
|
|
inst_id = inst_id,
|
|
created_at = datetime.utcnow(),
|
|
source = "learning-engine",
|
|
)
|
|
db.add(prb)
|
|
await db.flush()
|
|
logger.info("재발 패턴 → Problem 티켓 생성: %s (SR %d회)", prb_id, sr_count)
|
|
return prb_id
|
|
except Exception as e:
|
|
logger.warning("Problem 티켓 자동 생성 실패: %s", e)
|
|
return None
|
|
|
|
|
|
async def _find_kb_for_pattern(
|
|
db: AsyncSession,
|
|
keyword_text: str,
|
|
limit: int = 3,
|
|
) -> List[int]:
|
|
"""패턴 키워드와 유사한 KB 문서 ID 목록 반환."""
|
|
try:
|
|
from models import KBDocument
|
|
tokens = _tokenize(keyword_text)
|
|
rows = (await db.execute(
|
|
select(KBDocument)
|
|
.order_by(desc(KBDocument.created_at))
|
|
.limit(100)
|
|
)).scalars().all()
|
|
|
|
scored = []
|
|
for doc in rows:
|
|
doc_tokens = _tokenize(f"{doc.title} {doc.symptoms or ''} {doc.tags or ''}")
|
|
sim = _jaccard(tokens, doc_tokens)
|
|
if sim > 0.15:
|
|
scored.append((sim, doc.id))
|
|
|
|
scored.sort(reverse=True)
|
|
return [doc_id for _, doc_id in scored[:limit]]
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# 2. SolutionEffectivenessTracker — KB 솔루션 효과 추적
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
async def record_kb_usage(
|
|
db: AsyncSession,
|
|
sr_id: str,
|
|
kb_doc_id: str,
|
|
kb_id: Optional[int] = None,
|
|
) -> Dict:
|
|
"""SR 해결에 사용된 KB를 기록한다."""
|
|
from models import SolutionFeedback
|
|
|
|
# 이미 기록된 경우 중복 방지
|
|
existing = (await db.execute(
|
|
select(SolutionFeedback).where(
|
|
and_(
|
|
SolutionFeedback.sr_id == sr_id,
|
|
SolutionFeedback.kb_doc_id == kb_doc_id,
|
|
)
|
|
)
|
|
)).scalars().first()
|
|
|
|
if existing:
|
|
return {"recorded": False, "reason": "이미 기록됨", "feedback_id": existing.id}
|
|
|
|
fb = SolutionFeedback(
|
|
sr_id = sr_id,
|
|
kb_id = kb_id,
|
|
kb_doc_id = kb_doc_id,
|
|
applied_at = datetime.utcnow(),
|
|
resolved = True,
|
|
)
|
|
db.add(fb)
|
|
await db.commit()
|
|
await db.refresh(fb)
|
|
logger.info("KB 사용 기록: SR=%s KB=%s", sr_id, kb_doc_id)
|
|
return {"recorded": True, "feedback_id": fb.id}
|
|
|
|
|
|
async def check_solution_effectiveness(
|
|
db: AsyncSession,
|
|
feedback_id: int,
|
|
recurrence_window_days: int = 7,
|
|
) -> Dict:
|
|
"""
|
|
피드백 기록 이후 같은 패턴이 재발했는지 확인하고 효과 점수를 업데이트한다.
|
|
자동 프로모션: KBDocument.effectiveness_score >= 10 → LessonLearned 생성
|
|
"""
|
|
from models import SolutionFeedback, SRRequest, KBDocument
|
|
|
|
fb = (await db.execute(
|
|
select(SolutionFeedback).where(SolutionFeedback.id == feedback_id)
|
|
)).scalars().first()
|
|
|
|
if not fb:
|
|
return {"error": "feedback not found"}
|
|
|
|
sr = (await db.execute(
|
|
select(SRRequest).where(SRRequest.sr_id == fb.sr_id)
|
|
)).scalars().first()
|
|
|
|
if not sr:
|
|
return {"error": "SR not found"}
|
|
|
|
# 해결 후 recurrence_window_days 내 같은 기관 동일 유형 SR 재발 확인
|
|
since = fb.applied_at
|
|
until = since + timedelta(days=recurrence_window_days)
|
|
recurred = (await db.execute(
|
|
select(SRRequest).where(
|
|
and_(
|
|
SRRequest.sr_id != fb.sr_id,
|
|
SRRequest.inst_id == sr.inst_id,
|
|
SRRequest.sr_type == sr.sr_type,
|
|
SRRequest.created_at >= since,
|
|
SRRequest.created_at <= until,
|
|
)
|
|
)
|
|
)).scalars().first()
|
|
|
|
if recurred:
|
|
fb.recurred_within_days = (recurred.created_at - since).days
|
|
fb.effectiveness_score = -1
|
|
else:
|
|
fb.recurred_within_days = None
|
|
fb.effectiveness_score = 1
|
|
|
|
fb.checked_at = datetime.utcnow()
|
|
await db.flush()
|
|
|
|
# KB 문서 효과 점수 누적
|
|
score_delta = fb.effectiveness_score
|
|
if fb.kb_doc_id:
|
|
kb_doc = (await db.execute(
|
|
select(KBDocument).where(KBDocument.doc_id == fb.kb_doc_id)
|
|
)).scalars().first()
|
|
if kb_doc:
|
|
current = getattr(kb_doc, "effectiveness_score", 0) or 0
|
|
kb_doc.effectiveness_score = current + score_delta
|
|
await db.flush()
|
|
|
|
# 자동 프로모션 체크
|
|
if kb_doc.effectiveness_score >= 10:
|
|
await _promote_kb_to_lesson(db, kb_doc)
|
|
|
|
await db.commit()
|
|
|
|
return {
|
|
"feedback_id": feedback_id,
|
|
"sr_id": fb.sr_id,
|
|
"kb_doc_id": fb.kb_doc_id,
|
|
"effectiveness_score": fb.effectiveness_score,
|
|
"recurred": recurred is not None,
|
|
"recurred_within_days": fb.recurred_within_days,
|
|
}
|
|
|
|
|
|
async def _promote_kb_to_lesson(db: AsyncSession, kb_doc) -> Optional[Dict]:
|
|
"""KB 효과 점수 10 이상 → 검증된 룬북(LessonLearned)으로 자동 승격."""
|
|
from models import LessonLearned
|
|
|
|
# 이미 승격됐으면 스킵
|
|
existing = (await db.execute(
|
|
select(LessonLearned).where(LessonLearned.promoted_from_kb_id == kb_doc.id)
|
|
)).scalars().first()
|
|
if existing:
|
|
return None
|
|
|
|
today = datetime.utcnow().strftime("%Y%m%d")
|
|
prefix = f"LESSON-{today}-"
|
|
last = (await db.execute(
|
|
select(LessonLearned.lesson_id)
|
|
.where(LessonLearned.lesson_id.like(f"{prefix}%"))
|
|
.order_by(desc(LessonLearned.lesson_id))
|
|
.limit(1)
|
|
)).scalar()
|
|
seq = int(last.split("-")[-1]) + 1 if last else 1
|
|
lesson_id = f"{prefix}{seq:04d}"
|
|
|
|
lesson = LessonLearned(
|
|
lesson_id = lesson_id,
|
|
title = f"[검증됨] {kb_doc.title}",
|
|
category = kb_doc.category or "일반",
|
|
problem_pattern = kb_doc.symptoms or "",
|
|
root_cause = kb_doc.cause or "",
|
|
effective_solution = kb_doc.solution or "",
|
|
prevention = "",
|
|
confidence_score = kb_doc.effectiveness_score,
|
|
source_kb_ids = [kb_doc.id],
|
|
source_sr_ids = ([kb_doc.source_sr_id] if kb_doc.source_sr_id else []),
|
|
promoted_from_kb_id = kb_doc.id,
|
|
is_verified = True,
|
|
created_at = datetime.utcnow(),
|
|
)
|
|
db.add(lesson)
|
|
await db.flush()
|
|
logger.info("KB %d → LessonLearned %s 자동 승격 (효과점수=%d)",
|
|
kb_doc.id, lesson_id, kb_doc.effectiveness_score)
|
|
return {"lesson_id": lesson_id, "kb_id": kb_doc.id}
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# 3. AdaptiveThresholdCalibrator — 이상 탐지 임계값 자동 보정
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
# 임계값 보정 경계 (기본값의 50%~200% 범위)
|
|
_THRESHOLD_MIN_RATIO = 0.5
|
|
_THRESHOLD_MAX_RATIO = 2.0
|
|
_FALSE_POSITIVE_TRIGGER = 0.25 # 오탐률 25% 초과 시 임계값 상향
|
|
_MISSED_TRIGGER = 3 # 누락 탐지 3회 초과 시 임계값 하향
|
|
_ADJUSTMENT_STEP = 0.08 # 1회 조정 폭 8%
|
|
|
|
|
|
async def record_anomaly_outcome(
|
|
db: AsyncSession,
|
|
source: str,
|
|
metric_type: str,
|
|
was_actual_incident: bool,
|
|
base_threshold: Optional[float] = None,
|
|
) -> Dict:
|
|
"""
|
|
이상 탐지 이벤트가 실제 장애로 이어졌는지 기록한다.
|
|
was_actual_incident=True → True Positive
|
|
was_actual_incident=False → False Positive (오탐)
|
|
"""
|
|
from models import AdaptiveThreshold
|
|
from core.anomaly import DEFAULT_THRESHOLDS
|
|
|
|
rec = (await db.execute(
|
|
select(AdaptiveThreshold).where(
|
|
and_(
|
|
AdaptiveThreshold.source == source,
|
|
AdaptiveThreshold.metric_type == metric_type,
|
|
)
|
|
)
|
|
)).scalars().first()
|
|
|
|
if not rec:
|
|
# 기본 임계값 참조
|
|
if base_threshold is None:
|
|
info = DEFAULT_THRESHOLDS.get(metric_type)
|
|
base_threshold = info[0] if info else 90.0
|
|
|
|
rec = AdaptiveThreshold(
|
|
source = source,
|
|
metric_type = metric_type,
|
|
base_threshold = base_threshold,
|
|
adapted_threshold = base_threshold,
|
|
true_positive = 0,
|
|
false_positive = 0,
|
|
missed_count = 0,
|
|
adaptation_count = 0,
|
|
created_at = datetime.utcnow(),
|
|
)
|
|
db.add(rec)
|
|
await db.flush()
|
|
|
|
if was_actual_incident:
|
|
rec.true_positive += 1
|
|
else:
|
|
rec.false_positive += 1
|
|
|
|
await db.commit()
|
|
return {"source": source, "metric_type": metric_type,
|
|
"true_positive": rec.true_positive, "false_positive": rec.false_positive}
|
|
|
|
|
|
async def record_missed_detection(
|
|
db: AsyncSession,
|
|
source: str,
|
|
metric_type: str,
|
|
) -> Dict:
|
|
"""실제 장애였는데 탐지 못한 경우(누락 탐지)를 기록한다."""
|
|
from models import AdaptiveThreshold
|
|
from core.anomaly import DEFAULT_THRESHOLDS
|
|
|
|
rec = (await db.execute(
|
|
select(AdaptiveThreshold).where(
|
|
and_(
|
|
AdaptiveThreshold.source == source,
|
|
AdaptiveThreshold.metric_type == metric_type,
|
|
)
|
|
)
|
|
)).scalars().first()
|
|
|
|
if not rec:
|
|
info = DEFAULT_THRESHOLDS.get(metric_type)
|
|
base = info[0] if info else 90.0
|
|
rec = AdaptiveThreshold(
|
|
source=source, metric_type=metric_type,
|
|
base_threshold=base, adapted_threshold=base,
|
|
true_positive=0, false_positive=0, missed_count=0,
|
|
adaptation_count=0, created_at=datetime.utcnow(),
|
|
)
|
|
db.add(rec)
|
|
await db.flush()
|
|
|
|
rec.missed_count += 1
|
|
await db.commit()
|
|
return {"source": source, "metric_type": metric_type, "missed_count": rec.missed_count}
|
|
|
|
|
|
async def calibrate_threshold(
|
|
db: AsyncSession,
|
|
source: str,
|
|
metric_type: str,
|
|
) -> Dict:
|
|
"""
|
|
누적된 오탐/누락 데이터를 기반으로 임계값을 자동 보정한다.
|
|
Returns: {"adjusted": bool, "old_threshold": float, "new_threshold": float, "reason": str}
|
|
"""
|
|
from models import AdaptiveThreshold
|
|
|
|
rec = (await db.execute(
|
|
select(AdaptiveThreshold).where(
|
|
and_(
|
|
AdaptiveThreshold.source == source,
|
|
AdaptiveThreshold.metric_type == metric_type,
|
|
)
|
|
)
|
|
)).scalars().first()
|
|
|
|
if not rec:
|
|
return {"adjusted": False, "reason": "기록 없음"}
|
|
|
|
total = rec.true_positive + rec.false_positive
|
|
old_thr = rec.adapted_threshold
|
|
new_thr = old_thr
|
|
reason = "변경 없음"
|
|
|
|
if total >= 5:
|
|
fp_rate = rec.false_positive / total
|
|
if fp_rate > _FALSE_POSITIVE_TRIGGER:
|
|
# 오탐 많음 → 임계값 상향 (더 높은 값에서만 알림)
|
|
new_thr = old_thr * (1 + _ADJUSTMENT_STEP)
|
|
reason = f"오탐률 {fp_rate:.0%} > {_FALSE_POSITIVE_TRIGGER:.0%} → 임계값 상향"
|
|
|
|
if rec.missed_count > _MISSED_TRIGGER:
|
|
# 누락 탐지 많음 → 임계값 하향 (더 민감하게)
|
|
new_thr = old_thr * (1 - _ADJUSTMENT_STEP)
|
|
reason = f"누락 탐지 {rec.missed_count}회 > {_MISSED_TRIGGER}회 → 임계값 하향"
|
|
|
|
# 범위 제한
|
|
base = rec.base_threshold
|
|
new_thr = max(base * _THRESHOLD_MIN_RATIO, min(base * _THRESHOLD_MAX_RATIO, new_thr))
|
|
new_thr = round(new_thr, 2)
|
|
|
|
if abs(new_thr - old_thr) < 0.01:
|
|
return {"adjusted": False, "old_threshold": old_thr,
|
|
"new_threshold": new_thr, "reason": reason}
|
|
|
|
rec.adapted_threshold = new_thr
|
|
rec.last_adapted_at = datetime.utcnow()
|
|
rec.adaptation_count += 1
|
|
# 보정 후 카운터 부분 리셋 (누적 오염 방지)
|
|
rec.false_positive = rec.false_positive // 2
|
|
rec.true_positive = rec.true_positive // 2
|
|
rec.missed_count = 0
|
|
await db.commit()
|
|
|
|
logger.info("임계값 보정: %s/%s %.2f → %.2f (%s)", source, metric_type, old_thr, new_thr, reason)
|
|
return {
|
|
"adjusted": True,
|
|
"source": source,
|
|
"metric_type": metric_type,
|
|
"old_threshold": old_thr,
|
|
"new_threshold": new_thr,
|
|
"reason": reason,
|
|
}
|
|
|
|
|
|
async def get_adapted_threshold(
|
|
db: AsyncSession,
|
|
source: str,
|
|
metric_type: str,
|
|
) -> Optional[float]:
|
|
"""현재 적응형 임계값 조회 (없으면 None — 기본값 사용)."""
|
|
from models import AdaptiveThreshold
|
|
rec = (await db.execute(
|
|
select(AdaptiveThreshold.adapted_threshold).where(
|
|
and_(
|
|
AdaptiveThreshold.source == source,
|
|
AdaptiveThreshold.metric_type == metric_type,
|
|
)
|
|
)
|
|
)).scalar()
|
|
return rec
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# 4. LessonMiner — 패턴 마이닝 & 교훈 자동 생성
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
async def run_lesson_mining(
|
|
db: AsyncSession,
|
|
days_back: int = 30,
|
|
min_occurrences: int = 3,
|
|
) -> Dict:
|
|
"""
|
|
최근 N일 재발 패턴을 분석하여 교훈(LessonLearned)을 자동 생성한다.
|
|
Returns: {"patterns_analyzed": int, "lessons_created": int, "lessons_updated": int}
|
|
"""
|
|
from models import RecurrencePattern, SolutionFeedback, KBDocument, LessonLearned
|
|
|
|
since = datetime.utcnow() - timedelta(days=days_back)
|
|
patterns = (await db.execute(
|
|
select(RecurrencePattern).where(
|
|
and_(
|
|
RecurrencePattern.last_seen_at >= since,
|
|
RecurrencePattern.occurrence_count >= min_occurrences,
|
|
)
|
|
).order_by(desc(RecurrencePattern.occurrence_count))
|
|
)).scalars().all()
|
|
|
|
created = 0
|
|
updated = 0
|
|
|
|
for pat in patterns:
|
|
sr_ids = pat.sr_ids or []
|
|
if not sr_ids:
|
|
continue
|
|
|
|
# 이 패턴의 SR들에서 사용된 KB 중 가장 효과적인 것 선택
|
|
best_kb = await _find_best_kb_for_pattern_srs(db, sr_ids)
|
|
|
|
# 기존 Lesson 확인
|
|
existing_lesson = (await db.execute(
|
|
select(LessonLearned).where(
|
|
LessonLearned.lesson_id.like(f"%-PAT{pat.id}")
|
|
)
|
|
)).scalars().first()
|
|
|
|
if existing_lesson:
|
|
# 업데이트
|
|
existing_lesson.confidence_score = pat.occurrence_count
|
|
existing_lesson.source_sr_ids = sr_ids[-10:]
|
|
if best_kb:
|
|
existing_lesson.effective_solution = best_kb.get("solution", existing_lesson.effective_solution)
|
|
existing_lesson.updated_at = datetime.utcnow()
|
|
updated += 1
|
|
else:
|
|
# 신규 생성
|
|
today = datetime.utcnow().strftime("%Y%m%d")
|
|
lesson_id = f"LESSON-{today}-PAT{pat.id}"
|
|
|
|
solution_text = ""
|
|
root_cause_text = ""
|
|
if best_kb:
|
|
solution_text = best_kb.get("solution", "")
|
|
root_cause_text = best_kb.get("cause", "")
|
|
|
|
lesson = LessonLearned(
|
|
lesson_id = lesson_id,
|
|
title = f"[반복패턴] {pat.sr_type} 재발 {pat.occurrence_count}회 — {(pat.tech_keywords or '').replace(',', '/')}",
|
|
category = _infer_category(pat.tech_keywords or ""),
|
|
problem_pattern = pat.keyword_signature or "",
|
|
root_cause = root_cause_text or "반복 패턴 기반 분석 중",
|
|
effective_solution = solution_text or "KB 문서 참조 또는 전문가 조치 필요",
|
|
prevention = f"동일 유형({pat.sr_type}) SR이 {pat.occurrence_count}회 반복됨. 근본 원인 제거 필요.",
|
|
confidence_score = pat.occurrence_count,
|
|
source_kb_ids = ([best_kb["id"]] if best_kb else []),
|
|
source_sr_ids = sr_ids[-10:],
|
|
is_verified = False,
|
|
created_at = datetime.utcnow(),
|
|
)
|
|
db.add(lesson)
|
|
created += 1
|
|
|
|
# pending 피드백 효과 일괄 검증
|
|
await _batch_check_pending_feedbacks(db)
|
|
|
|
# 임계값 일괄 보정
|
|
calibration_results = await _batch_calibrate_thresholds(db)
|
|
|
|
await db.commit()
|
|
|
|
return {
|
|
"patterns_analyzed": len(patterns),
|
|
"lessons_created": created,
|
|
"lessons_updated": updated,
|
|
"thresholds_calibrated": calibration_results,
|
|
"run_at": datetime.utcnow().isoformat(),
|
|
}
|
|
|
|
|
|
async def _find_best_kb_for_pattern_srs(
|
|
db: AsyncSession,
|
|
sr_ids: List[str],
|
|
) -> Optional[Dict]:
|
|
"""패턴의 SR들에서 가장 효과적으로 사용된 KB 반환."""
|
|
from models import SolutionFeedback, KBDocument
|
|
|
|
feedbacks = (await db.execute(
|
|
select(SolutionFeedback).where(
|
|
and_(
|
|
SolutionFeedback.sr_id.in_(sr_ids),
|
|
SolutionFeedback.effectiveness_score > 0,
|
|
SolutionFeedback.kb_doc_id.isnot(None),
|
|
)
|
|
).order_by(desc(SolutionFeedback.effectiveness_score))
|
|
.limit(1)
|
|
)).scalars().first()
|
|
|
|
if not feedbacks or not feedbacks.kb_doc_id:
|
|
return None
|
|
|
|
kb = (await db.execute(
|
|
select(KBDocument).where(KBDocument.doc_id == feedbacks.kb_doc_id)
|
|
)).scalars().first()
|
|
|
|
if not kb:
|
|
return None
|
|
|
|
return {
|
|
"id": kb.id,
|
|
"doc_id": kb.doc_id,
|
|
"title": kb.title,
|
|
"solution": kb.solution,
|
|
"cause": kb.cause,
|
|
}
|
|
|
|
|
|
async def _batch_check_pending_feedbacks(db: AsyncSession) -> int:
|
|
"""효과 미검증 피드백을 일괄 처리한다."""
|
|
from models import SolutionFeedback
|
|
|
|
cutoff = datetime.utcnow() - timedelta(days=7)
|
|
pending = (await db.execute(
|
|
select(SolutionFeedback).where(
|
|
and_(
|
|
SolutionFeedback.checked_at.is_(None),
|
|
SolutionFeedback.applied_at <= cutoff,
|
|
)
|
|
).limit(50)
|
|
)).scalars().all()
|
|
|
|
checked = 0
|
|
for fb in pending:
|
|
try:
|
|
await check_solution_effectiveness(db, fb.id)
|
|
checked += 1
|
|
except Exception as e:
|
|
logger.debug("피드백 %d 검증 실패: %s", fb.id, e)
|
|
|
|
return checked
|
|
|
|
|
|
async def _batch_calibrate_thresholds(db: AsyncSession) -> int:
|
|
"""적응형 임계값 일괄 보정."""
|
|
from models import AdaptiveThreshold
|
|
|
|
recs = (await db.execute(
|
|
select(AdaptiveThreshold)
|
|
.where(
|
|
(AdaptiveThreshold.true_positive + AdaptiveThreshold.false_positive) >= 5
|
|
)
|
|
.limit(100)
|
|
)).scalars().all()
|
|
|
|
calibrated = 0
|
|
for rec in recs:
|
|
result = await calibrate_threshold(db, rec.source, rec.metric_type)
|
|
if result.get("adjusted"):
|
|
calibrated += 1
|
|
|
|
return calibrated
|
|
|
|
|
|
def _infer_category(tech_keywords: str) -> str:
|
|
kw = tech_keywords.lower()
|
|
if "deploy" in kw: return "배포"
|
|
if "was" in kw: return "WAS"
|
|
if "db" in kw: return "DB"
|
|
if "web" in kw: return "웹서버"
|
|
if "resource" in kw: return "서버 운영"
|
|
if "network" in kw: return "네트워크"
|
|
if "ssl" in kw: return "보안"
|
|
if "incident" in kw: return "장애 대응"
|
|
return "일반"
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# 통계 요약
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
async def get_learning_stats(db: AsyncSession) -> Dict:
|
|
"""학습 엔진 현황 통계 요약."""
|
|
from models import (
|
|
RecurrencePattern, SolutionFeedback, AdaptiveThreshold, LessonLearned, KBDocument
|
|
)
|
|
|
|
pattern_count = (await db.execute(
|
|
select(func.count()).select_from(RecurrencePattern)
|
|
)).scalar() or 0
|
|
|
|
escalated_count = (await db.execute(
|
|
select(func.count()).select_from(RecurrencePattern)
|
|
.where(RecurrencePattern.escalated == True)
|
|
)).scalar() or 0
|
|
|
|
feedback_count = (await db.execute(
|
|
select(func.count()).select_from(SolutionFeedback)
|
|
)).scalar() or 0
|
|
|
|
effective_count = (await db.execute(
|
|
select(func.count()).select_from(SolutionFeedback)
|
|
.where(SolutionFeedback.effectiveness_score > 0)
|
|
)).scalar() or 0
|
|
|
|
lesson_count = (await db.execute(
|
|
select(func.count()).select_from(LessonLearned)
|
|
)).scalar() or 0
|
|
|
|
verified_lesson_count = (await db.execute(
|
|
select(func.count()).select_from(LessonLearned)
|
|
.where(LessonLearned.is_verified == True)
|
|
)).scalar() or 0
|
|
|
|
threshold_count = (await db.execute(
|
|
select(func.count()).select_from(AdaptiveThreshold)
|
|
)).scalar() or 0
|
|
|
|
adapted_count = (await db.execute(
|
|
select(func.count()).select_from(AdaptiveThreshold)
|
|
.where(AdaptiveThreshold.adaptation_count > 0)
|
|
)).scalar() or 0
|
|
|
|
# 가장 자주 재발하는 패턴 Top 5
|
|
top_patterns = (await db.execute(
|
|
select(RecurrencePattern)
|
|
.order_by(desc(RecurrencePattern.occurrence_count))
|
|
.limit(5)
|
|
)).scalars().all()
|
|
|
|
return {
|
|
"recurrence": {
|
|
"total_patterns": pattern_count,
|
|
"escalated": escalated_count,
|
|
"top_recurring": [
|
|
{
|
|
"id": p.id,
|
|
"sr_type": p.sr_type,
|
|
"count": p.occurrence_count,
|
|
"tech_keywords": p.tech_keywords,
|
|
"last_seen": p.last_seen_at.isoformat() if p.last_seen_at else None,
|
|
}
|
|
for p in top_patterns
|
|
],
|
|
},
|
|
"solution_effectiveness": {
|
|
"total_feedbacks": feedback_count,
|
|
"effective": effective_count,
|
|
"effectiveness_rate": round(effective_count / feedback_count, 3) if feedback_count else 0,
|
|
},
|
|
"lessons": {
|
|
"total": lesson_count,
|
|
"verified": verified_lesson_count,
|
|
},
|
|
"adaptive_thresholds": {
|
|
"total_tracked": threshold_count,
|
|
"adapted": adapted_count,
|
|
},
|
|
"generated_at": datetime.utcnow().isoformat(),
|
|
}
|