438 lines
15 KiB
Python
438 lines
15 KiB
Python
"""
|
|
B-5: 멀티 에이전트 협업 오케스트레이션 엔진
|
|
|
|
워크플로우 유형:
|
|
SR_TO_DEPLOY — SR 접수 → 코드 리뷰 → 변경 승인 → 배포
|
|
INCIDENT_RESP — 인시던트 탐지 → 담당자 배정 → RCA → 복구
|
|
CODE_REVIEW — 코드 리뷰 → 취약점 스캔 → 보고서 생성
|
|
MAINTENANCE — 정기 점검 → 변경 관리 → 완료 보고
|
|
CUSTOM — 사용자 정의 단계
|
|
|
|
각 에이전트는 API 호출로 작업을 수행.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
from datetime import datetime
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
import httpx
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ── 에이전트 레지스트리 (내부 API URL) ────────────────────────────────────────
|
|
|
|
_BASE = "http://localhost:8000" # FastAPI 자기 자신
|
|
|
|
AGENT_ACTIONS: Dict[str, Dict[str, str]] = {
|
|
"sr-manager": {
|
|
"create_sr": f"{_BASE}/api/tasks",
|
|
"update_status": f"{_BASE}/api/tasks/{{sr_id}}/status",
|
|
"assign": f"{_BASE}/api/assign/{{sr_id}}",
|
|
},
|
|
"code-reviewer": {
|
|
"quick_scan": f"{_BASE}/api/code-review/quick-scan",
|
|
"full_review": f"{_BASE}/api/code-review",
|
|
"get_result": f"{_BASE}/api/code-review/{{review_id}}",
|
|
},
|
|
"deploy-engineer": {
|
|
"create_session": f"{_BASE}/api/vibe",
|
|
"trigger_build": f"{_BASE}/api/vibe/{{session_id}}/build",
|
|
"request_approval": f"{_BASE}/api/vibe/{{session_id}}/request-approval",
|
|
},
|
|
"sla-guardian": {
|
|
"check_sla": f"{_BASE}/api/sla/check",
|
|
},
|
|
"anomaly-detector": {
|
|
"detect": f"{_BASE}/api/anomaly/detect",
|
|
"get_events": f"{_BASE}/api/anomaly/events",
|
|
},
|
|
"kb-agent": {
|
|
"analyze_sr": f"{_BASE}/api/kb-agent/analyze/{{sr_id}}",
|
|
"run_batch": f"{_BASE}/api/kb-agent/run",
|
|
},
|
|
}
|
|
|
|
|
|
# ── 워크플로우 템플릿 ─────────────────────────────────────────────────────────
|
|
|
|
def _sr_to_deploy_steps() -> List[Dict]:
|
|
"""SR → 코드 리뷰 → 배포 워크플로우 단계."""
|
|
return [
|
|
{
|
|
"order": 1,
|
|
"agent_name": "sr-manager",
|
|
"action": "sr_status_in_progress",
|
|
"description": "SR 상태 IN_PROGRESS 전환",
|
|
},
|
|
{
|
|
"order": 2,
|
|
"agent_name": "code-reviewer",
|
|
"action": "quick_security_scan",
|
|
"description": "빠른 보안 스캔 (즉시 실행)",
|
|
},
|
|
{
|
|
"order": 3,
|
|
"agent_name": "code-reviewer",
|
|
"action": "full_code_review",
|
|
"description": "전체 코드 리뷰 (Ollama LLM)",
|
|
"requires_llm": True,
|
|
},
|
|
{
|
|
"order": 4,
|
|
"agent_name": "deploy-engineer",
|
|
"action": "create_vibe_session",
|
|
"description": "배포 세션 생성",
|
|
},
|
|
{
|
|
"order": 5,
|
|
"agent_name": "deploy-engineer",
|
|
"action": "trigger_build",
|
|
"description": "빌드 트리거",
|
|
},
|
|
{
|
|
"order": 6,
|
|
"agent_name": "sr-manager",
|
|
"action": "sr_status_completed",
|
|
"description": "SR 완료 처리",
|
|
},
|
|
{
|
|
"order": 7,
|
|
"agent_name": "kb-agent",
|
|
"action": "analyze_and_create_kb",
|
|
"description": "KB 자동 업데이트",
|
|
},
|
|
]
|
|
|
|
|
|
def _incident_response_steps() -> List[Dict]:
|
|
"""인시던트 대응 워크플로우 단계."""
|
|
return [
|
|
{
|
|
"order": 1,
|
|
"agent_name": "sr-manager",
|
|
"action": "create_incident_sr",
|
|
"description": "인시던트 SR 생성 (CRITICAL 우선순위)",
|
|
},
|
|
{
|
|
"order": 2,
|
|
"agent_name": "sr-manager",
|
|
"action": "assign_oncall_engineer",
|
|
"description": "온콜 엔지니어 즉시 배정",
|
|
},
|
|
{
|
|
"order": 3,
|
|
"agent_name": "anomaly-detector",
|
|
"action": "gather_metrics",
|
|
"description": "관련 메트릭 수집",
|
|
},
|
|
{
|
|
"order": 4,
|
|
"agent_name": "code-reviewer",
|
|
"action": "quick_security_scan",
|
|
"description": "보안 취약점 즉시 스캔",
|
|
},
|
|
{
|
|
"order": 5,
|
|
"agent_name": "sr-manager",
|
|
"action": "sr_status_completed",
|
|
"description": "인시던트 해결 완료 처리",
|
|
},
|
|
{
|
|
"order": 6,
|
|
"agent_name": "kb-agent",
|
|
"action": "analyze_and_create_kb",
|
|
"description": "인시던트 KB 문서 자동 생성",
|
|
},
|
|
]
|
|
|
|
|
|
def _code_review_steps() -> List[Dict]:
|
|
"""코드 리뷰 워크플로우 단계."""
|
|
return [
|
|
{
|
|
"order": 1,
|
|
"agent_name": "code-reviewer",
|
|
"action": "quick_security_scan",
|
|
"description": "빠른 보안 스캔",
|
|
},
|
|
{
|
|
"order": 2,
|
|
"agent_name": "code-reviewer",
|
|
"action": "full_code_review",
|
|
"description": "전체 코드 리뷰",
|
|
"requires_llm": True,
|
|
},
|
|
{
|
|
"order": 3,
|
|
"agent_name": "kb-agent",
|
|
"action": "analyze_and_create_kb",
|
|
"description": "리뷰 결과 KB 업데이트",
|
|
},
|
|
]
|
|
|
|
|
|
WORKFLOW_TEMPLATES: Dict[str, List[Dict]] = {
|
|
"SR_TO_DEPLOY": _sr_to_deploy_steps(),
|
|
"INCIDENT_RESP": _incident_response_steps(),
|
|
"CODE_REVIEW": _code_review_steps(),
|
|
}
|
|
|
|
|
|
# ── 에이전트 액션 실행 ────────────────────────────────────────────────────────
|
|
|
|
async def _execute_action(
|
|
agent_name: str,
|
|
action: str,
|
|
context: Dict,
|
|
timeout: int = 30,
|
|
) -> Dict:
|
|
"""
|
|
에이전트 액션 실행 (내부 API 호출).
|
|
실패 시 {"success": False, "error": ...} 반환.
|
|
"""
|
|
sr_id = context.get("sr_id", "")
|
|
project_id = context.get("project_id")
|
|
project_dir = context.get("project_dir", "")
|
|
|
|
result = {"success": True, "action": action, "agent": agent_name, "data": {}}
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
|
|
# SR 상태 변경
|
|
if agent_name == "sr-manager" and action.startswith("sr_status_"):
|
|
new_status = "IN_PROGRESS" if "in_progress" in action else "COMPLETED"
|
|
if sr_id:
|
|
resp = await client.patch(
|
|
f"{_BASE}/api/tasks/{sr_id}/status",
|
|
json={"status": new_status, "note": f"워크플로우 자동 처리: {new_status}"},
|
|
)
|
|
result["data"]["status_changed"] = new_status
|
|
else:
|
|
result["data"]["skipped"] = "sr_id 없음"
|
|
|
|
# 빠른 보안 스캔
|
|
elif agent_name == "code-reviewer" and action == "quick_security_scan":
|
|
if project_dir:
|
|
resp = await client.post(
|
|
f"{_BASE}/api/code-review/quick-scan",
|
|
params={"project_dir": project_dir},
|
|
)
|
|
if resp.status_code == 200:
|
|
data = resp.json()
|
|
result["data"]["findings"] = len(data.get("findings", []))
|
|
result["data"]["critical"] = data.get("critical_count", 0)
|
|
else:
|
|
result["data"]["skipped"] = f"scan 실패 {resp.status_code}"
|
|
else:
|
|
result["data"]["skipped"] = "project_dir 없음"
|
|
|
|
# KB 분석
|
|
elif agent_name == "kb-agent" and action == "analyze_and_create_kb":
|
|
if sr_id:
|
|
resp = await client.post(
|
|
f"{_BASE}/api/kb-agent/analyze/{sr_id}",
|
|
params={"use_llm": "false"},
|
|
)
|
|
if resp.status_code == 200:
|
|
data = resp.json()
|
|
result["data"]["kb_created"] = data.get("created", False)
|
|
result["data"]["kb_id"] = data.get("kb_id")
|
|
else:
|
|
result["data"]["skipped"] = "KB 생성 실패"
|
|
else:
|
|
result["data"]["skipped"] = "sr_id 없음"
|
|
|
|
# 기타 액션 — 시뮬레이션 성공
|
|
else:
|
|
result["data"]["simulated"] = True
|
|
result["data"]["action"] = action
|
|
|
|
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
|
# 자기 자신에 대한 API 호출 실패 → 시뮬레이션 모드
|
|
logger.debug("에이전트 액션 API 호출 실패 (%s.%s): %s", agent_name, action, e)
|
|
result["data"]["simulated"] = True
|
|
result["data"]["note"] = "API 미연결 → 시뮬레이션"
|
|
|
|
except Exception as e:
|
|
logger.error("에이전트 액션 오류 (%s.%s): %s", agent_name, action, e)
|
|
result["success"] = False
|
|
result["error"] = str(e)[:100]
|
|
|
|
return result
|
|
|
|
|
|
# ── 워크플로우 실행 엔진 ──────────────────────────────────────────────────────
|
|
|
|
async def execute_workflow(
|
|
instance_id: int,
|
|
steps: List[Dict],
|
|
context: Dict,
|
|
db_session_factory,
|
|
) -> None:
|
|
"""
|
|
워크플로우 단계 순차 실행.
|
|
각 단계 완료 후 DB 업데이트.
|
|
"""
|
|
from models import WorkflowInstance, WorkflowStep, WorkflowStatus
|
|
|
|
async with db_session_factory() as db:
|
|
instance = (await db.execute(
|
|
select(WorkflowInstance).where(WorkflowInstance.id == instance_id)
|
|
)).scalars().first()
|
|
|
|
if not instance:
|
|
logger.error("워크플로우 인스턴스 %d 없음", instance_id)
|
|
return
|
|
|
|
instance.status = WorkflowStatus.RUNNING
|
|
instance.started_at = datetime.utcnow()
|
|
await db.commit()
|
|
|
|
results = []
|
|
current_step = 0
|
|
|
|
for step_def in steps:
|
|
current_step += 1
|
|
step_order = step_def["order"]
|
|
agent_name = step_def["agent_name"]
|
|
action = step_def["action"]
|
|
|
|
async with db_session_factory() as db:
|
|
# 단계 조회
|
|
step_row = (await db.execute(
|
|
select(WorkflowStep).where(
|
|
WorkflowStep.instance_id == instance_id,
|
|
WorkflowStep.step_order == step_order,
|
|
)
|
|
)).scalars().first()
|
|
|
|
if step_row:
|
|
step_row.status = WorkflowStatus.RUNNING
|
|
step_row.started_at = datetime.utcnow()
|
|
await db.commit()
|
|
|
|
# 액션 실행
|
|
start_ts = datetime.utcnow()
|
|
action_result = await _execute_action(agent_name, action, context)
|
|
duration = int((datetime.utcnow() - start_ts).total_seconds())
|
|
|
|
results.append(action_result)
|
|
|
|
# 컨텍스트 업데이트 (다음 단계에 전달)
|
|
if action_result.get("success") and action_result.get("data"):
|
|
context[f"step_{step_order}_result"] = action_result["data"]
|
|
|
|
async with db_session_factory() as db:
|
|
step_row = (await db.execute(
|
|
select(WorkflowStep).where(
|
|
WorkflowStep.instance_id == instance_id,
|
|
WorkflowStep.step_order == step_order,
|
|
)
|
|
)).scalars().first()
|
|
|
|
instance = (await db.execute(
|
|
select(WorkflowInstance).where(WorkflowInstance.id == instance_id)
|
|
)).scalars().first()
|
|
|
|
if step_row:
|
|
step_row.status = "COMPLETED" if action_result.get("success") else "FAILED"
|
|
step_row.output_json = json.dumps(action_result, ensure_ascii=False)
|
|
step_row.completed_at = datetime.utcnow()
|
|
step_row.duration_sec = duration
|
|
if not action_result.get("success"):
|
|
step_row.error_msg = action_result.get("error", "")
|
|
|
|
if instance:
|
|
instance.current_step = current_step
|
|
instance.progress_pct = int(current_step / instance.total_steps * 100)
|
|
|
|
await db.commit()
|
|
|
|
# 단계 실패 시 워크플로우 중단
|
|
if not action_result.get("success"):
|
|
async with db_session_factory() as db:
|
|
inst = (await db.execute(
|
|
select(WorkflowInstance).where(WorkflowInstance.id == instance_id)
|
|
)).scalars().first()
|
|
if inst:
|
|
inst.status = WorkflowStatus.FAILED
|
|
inst.error_msg = f"Step {step_order} ({agent_name}.{action}) 실패"
|
|
inst.completed_at = datetime.utcnow()
|
|
await db.commit()
|
|
return
|
|
|
|
# 짧은 대기 (연속 API 호출 부하 방지)
|
|
await asyncio.sleep(0.1)
|
|
|
|
# 완료
|
|
async with db_session_factory() as db:
|
|
inst = (await db.execute(
|
|
select(WorkflowInstance).where(WorkflowInstance.id == instance_id)
|
|
)).scalars().first()
|
|
if inst:
|
|
inst.status = WorkflowStatus.COMPLETED
|
|
inst.progress_pct = 100
|
|
inst.completed_at = datetime.utcnow()
|
|
inst.result_json = json.dumps(
|
|
{"steps": results, "context": context},
|
|
ensure_ascii=False,
|
|
default=str,
|
|
)
|
|
await db.commit()
|
|
|
|
logger.info("워크플로우 %d 완료: %d 단계", instance_id, len(steps))
|
|
|
|
|
|
async def create_workflow_instance(
|
|
db: AsyncSession,
|
|
workflow_type: str,
|
|
title: str,
|
|
sr_id: Optional[str] = None,
|
|
project_id: Optional[int] = None,
|
|
context: Optional[Dict] = None,
|
|
triggered_by: str = "system",
|
|
) -> "WorkflowInstance":
|
|
"""워크플로우 인스턴스 + 단계 레코드 생성."""
|
|
from models import WorkflowInstance, WorkflowStep
|
|
|
|
steps = WORKFLOW_TEMPLATES.get(workflow_type, [])
|
|
ctx = {
|
|
"sr_id": sr_id or "",
|
|
"project_id": project_id,
|
|
**(context or {}),
|
|
}
|
|
|
|
instance = WorkflowInstance(
|
|
workflow_type = workflow_type,
|
|
status = "PENDING",
|
|
title = title,
|
|
sr_id = sr_id,
|
|
project_id = project_id,
|
|
triggered_by = triggered_by,
|
|
context_json = json.dumps(ctx, ensure_ascii=False, default=str),
|
|
total_steps = len(steps),
|
|
progress_pct = 0,
|
|
)
|
|
db.add(instance)
|
|
await db.flush()
|
|
|
|
for step_def in steps:
|
|
db.add(WorkflowStep(
|
|
instance_id = instance.id,
|
|
step_order = step_def["order"],
|
|
agent_name = step_def["agent_name"],
|
|
action = step_def["action"],
|
|
status = "PENDING",
|
|
input_json = json.dumps({"description": step_def.get("description", "")}, ensure_ascii=False),
|
|
))
|
|
|
|
await db.commit()
|
|
await db.refresh(instance)
|
|
return instance
|