guardia-itsm/routers/predictive_capacity.py

446 lines
16 KiB
Python

"""
예측 용량 계획 (Predictive Capacity Planning) API 라우터
엔드포인트:
GET /api/capacity-ai/forecast — 예측 현황 (최근 예측 목록)
POST /api/capacity-ai/forecast — 예측 모델 실행
GET /api/capacity-ai/forecast/{days} — N일 후 용량 예측 (30/60/90)
GET /api/capacity-ai/recommendations — 증설·감축 권고 목록
POST /api/capacity-ai/recommendations/{id}/approve — 권고 승인
POST /api/capacity-ai/recommendations/{id}/reject — 권고 반려
GET /api/capacity-ai/budget-cycle — 예산 사이클 현황
POST /api/capacity-ai/budget-cycle — 예산 사이클 등록
GET /api/capacity-ai/alerts — 용량 임박 경보 (80% 이상 예측)
"""
from __future__ import annotations
import logging
import random
from datetime import datetime
from typing import List, Optional
import httpx
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy import select, desc, and_
from sqlalchemy.ext.asyncio import AsyncSession
from core.auth import get_current_user
from database import get_db
from models import (
User,
CapacityForecast, CapacityForecastOut,
CapacityRecommendation, CapacityRecommendationOut,
BudgetCycle, BudgetCycleOut, BudgetCycleCreate,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/capacity-ai", tags=["predictive-capacity"])
# ── 공공기관 예산 사이클 분기별 권고 문구 ────────────────────────────────────
_BUDGET_QUARTER_MSG = {
1: "1분기: 예산 집행 초기 — 신규 도입 권고",
2: "2분기: 중간 점검 — 절감 기회 발굴",
3: "3분기: 하반기 대비 — 증설 검토",
4: "4분기: 연말 집행 — 불용 예산 활용 권고",
}
# 용량 임박 경보 기준 (예측값 %, 일수)
_ALERT_RULES = [
(30, 80.0, "immediate"), # 30일 내 80% 초과 → 즉시 권고
(60, 90.0, "30days"), # 60일 내 90% 초과 → 검토 필요
(90, 95.0, "60days"), # 90일 내 95% 초과 → 계획 수립
]
def get_budget_recommendation(quarter: int) -> str:
return _BUDGET_QUARTER_MSG.get(quarter, "예산 계획 수립 중")
def _urgency_from_predicted(days: int, predicted: float) -> Optional[str]:
"""예측값과 예측 일수로 긴급도 반환. 경보 기준 미달 시 None."""
for rule_days, threshold, urgency in _ALERT_RULES:
if days <= rule_days and predicted >= threshold:
return urgency
return None
def _trend_label(growth_rate: float) -> str:
if growth_rate > 0.5:
return "increasing"
if growth_rate < -0.1:
return "decreasing"
return "stable"
async def _ollama_reason(server_name: str, metric: str, predicted: float, days: int) -> str:
"""Ollama를 통해 증설 권고 이유 생성. 실패 시 기본 메시지 반환."""
prompt = (
f"서버 '{server_name}'{metric} 사용률이 {days}일 후 {predicted:.1f}%에 "
f"도달할 것으로 예측됩니다. 공공기관 IT 운영 관점에서 증설이 필요한 이유를 "
f"한국어로 2문장 이내로 간결하게 설명하세요."
)
try:
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.post(
"http://localhost:11434/api/generate",
json={"model": "llama3", "prompt": prompt, "stream": False},
)
if resp.status_code == 200:
data = resp.json()
return data.get("response", "").strip()
except Exception:
pass
return (
f"{days}일 내 {metric.upper()} 사용률이 {predicted:.1f}%로 임계치를 초과할 것으로 예측됩니다. "
f"서비스 안정성 확보를 위해 증설 검토가 필요합니다."
)
async def run_forecast(days: int, db: AsyncSession, current_user: User) -> dict:
"""
예측 모델 실행.
1. CMDB 서버 목록 조회 (없으면 시뮬레이션 서버 사용)
2. 각 서버에 대해 간단한 추세 분석
3. 예측값 > 85% → 권고 자동 생성
4. Ollama로 증설 이유 텍스트 생성
"""
# CMDB 서버 목록 시도
try:
from models import Server
result = await db.execute(select(Server).limit(20))
servers = result.scalars().all()
server_names = [s.server_name for s in servers if s.server_name]
except Exception:
server_names = []
# CMDB 서버 없으면 시뮬레이션 서버 목록 사용
if not server_names:
server_names = [
"WEB-SRV-01", "WEB-SRV-02",
"DB-SRV-01", "DB-SRV-02",
"APP-SRV-01", "BATCH-SRV-01",
]
metrics = ["cpu", "memory", "disk"]
forecasts_created = 0
recommendations_created = 0
for server_name in server_names:
for metric in metrics:
# 현재값 시뮬레이션 (실제 환경에서는 모니터링 API 연동)
current_value = round(random.uniform(30.0, 75.0), 1)
# 일별 증가율 시뮬레이션 (% per day)
daily_growth = random.uniform(0.3, 1.2)
# N일 후 예측값
predicted_value = min(current_value + daily_growth * days, 100.0)
predicted_value = round(predicted_value, 1)
# 신뢰도 — 예측 기간이 길수록 낮아짐
confidence = round(max(0.5, 0.95 - days * 0.003), 2)
trend = _trend_label(daily_growth)
# CapacityForecast 저장
forecast = CapacityForecast(
server_name=server_name,
metric=metric,
forecast_days=days,
current_value=current_value,
predicted_value=predicted_value,
confidence=confidence,
trend=trend,
)
db.add(forecast)
# 권고 자동 생성 — 예측값이 85% 초과 시
if predicted_value >= 85.0:
urgency = _urgency_from_predicted(days, predicted_value)
if urgency is None:
urgency = "60days"
reason = await _ollama_reason(server_name, metric, predicted_value, days)
# 예상 비용 계산 (간단한 추정: CPU 증설 300만원, 메모리 150만원, 디스크 50만원)
cost_map = {"cpu": 300.0, "memory": 150.0, "disk": 50.0}
estimated_cost = cost_map.get(metric, 100.0)
rec_type = "scale_up" if metric in ("cpu", "memory") else "add_server"
rec = CapacityRecommendation(
server_name=server_name,
rec_type=rec_type,
urgency=urgency,
reason=reason,
estimated_cost=estimated_cost,
status="pending",
)
db.add(rec)
recommendations_created += 1
forecasts_created += 1
await db.commit()
return {
"status": "completed",
"forecast_days": days,
"servers_analyzed": len(server_names),
"forecasts_created": forecasts_created,
"recommendations_created": recommendations_created,
"executed_at": datetime.utcnow().isoformat(),
}
# ── 엔드포인트 ───────────────────────────────────────────────────────────────
@router.get("/forecast", response_model=List[CapacityForecastOut])
async def list_forecasts(
metric: Optional[str] = Query(None, description="cpu|memory|disk"),
server_name: Optional[str] = Query(None),
limit: int = Query(50, ge=1, le=200),
db: AsyncSession = Depends(get_db),
_u: User = Depends(get_current_user),
):
"""최근 예측 목록 조회."""
conditions = []
if metric:
conditions.append(CapacityForecast.metric == metric.lower())
if server_name:
conditions.append(CapacityForecast.server_name == server_name)
q = select(CapacityForecast)
if conditions:
q = q.where(and_(*conditions))
q = q.order_by(desc(CapacityForecast.created_at)).limit(limit)
return (await db.execute(q)).scalars().all()
@router.post("/forecast", status_code=201)
async def run_forecast_endpoint(
days: int = Query(30, description="예측 일수 (30/60/90)", ge=1, le=365),
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""예측 모델 실행. CMDB 서버 목록 기반 N일 후 용량 예측 및 권고 자동 생성."""
result = await run_forecast(days, db, current_user)
return result
@router.get("/forecast/{days}", response_model=List[CapacityForecastOut])
async def get_forecast_by_days(
days: int,
metric: Optional[str] = Query(None),
limit: int = Query(50, ge=1, le=200),
db: AsyncSession = Depends(get_db),
_u: User = Depends(get_current_user),
):
"""N일 후 용량 예측 결과 조회 (30/60/90)."""
if days not in (30, 60, 90):
raise HTTPException(400, "forecast_days는 30, 60, 90 중 하나여야 합니다.")
conditions = [CapacityForecast.forecast_days == days]
if metric:
conditions.append(CapacityForecast.metric == metric.lower())
q = (
select(CapacityForecast)
.where(and_(*conditions))
.order_by(desc(CapacityForecast.created_at))
.limit(limit)
)
return (await db.execute(q)).scalars().all()
@router.get("/recommendations", response_model=List[CapacityRecommendationOut])
async def list_recommendations(
status: Optional[str] = Query(None, description="pending|approved|rejected"),
urgency: Optional[str] = Query(None),
limit: int = Query(50, ge=1, le=200),
db: AsyncSession = Depends(get_db),
_u: User = Depends(get_current_user),
):
"""증설·감축 권고 목록."""
conditions = []
if status:
conditions.append(CapacityRecommendation.status == status)
if urgency:
conditions.append(CapacityRecommendation.urgency == urgency)
q = select(CapacityRecommendation)
if conditions:
q = q.where(and_(*conditions))
q = q.order_by(desc(CapacityRecommendation.created_at)).limit(limit)
return (await db.execute(q)).scalars().all()
@router.post("/recommendations/{rec_id}/approve", response_model=CapacityRecommendationOut)
async def approve_recommendation(
rec_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""권고 승인."""
rec = (
await db.execute(
select(CapacityRecommendation).where(CapacityRecommendation.id == rec_id)
)
).scalars().first()
if not rec:
raise HTTPException(404, "권고를 찾을 수 없습니다.")
if rec.status != "pending":
raise HTTPException(400, f"이미 처리된 권고입니다. (현재 상태: {rec.status})")
rec.status = "approved"
rec.approved_by = current_user.username
await db.commit()
await db.refresh(rec)
return rec
@router.post("/recommendations/{rec_id}/reject", response_model=CapacityRecommendationOut)
async def reject_recommendation(
rec_id: int,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""권고 반려."""
rec = (
await db.execute(
select(CapacityRecommendation).where(CapacityRecommendation.id == rec_id)
)
).scalars().first()
if not rec:
raise HTTPException(404, "권고를 찾을 수 없습니다.")
if rec.status != "pending":
raise HTTPException(400, f"이미 처리된 권고입니다. (현재 상태: {rec.status})")
rec.status = "rejected"
rec.approved_by = current_user.username
await db.commit()
await db.refresh(rec)
return rec
@router.get("/budget-cycle", response_model=List[BudgetCycleOut])
async def list_budget_cycles(
year: Optional[int] = Query(None),
limit: int = Query(20, ge=1, le=100),
db: AsyncSession = Depends(get_db),
_u: User = Depends(get_current_user),
):
"""예산 사이클 현황 목록."""
q = select(BudgetCycle)
if year:
q = q.where(BudgetCycle.year == year)
q = q.order_by(desc(BudgetCycle.year), desc(BudgetCycle.quarter)).limit(limit)
cycles = (await db.execute(q)).scalars().all()
return cycles
@router.post("/budget-cycle", response_model=BudgetCycleOut, status_code=201)
async def create_budget_cycle(
body: BudgetCycleCreate,
db: AsyncSession = Depends(get_db),
_u: User = Depends(get_current_user),
):
"""예산 사이클 등록."""
if body.quarter not in (1, 2, 3, 4):
raise HTTPException(400, "quarter는 1~4 사이여야 합니다.")
# 중복 확인
existing = (
await db.execute(
select(BudgetCycle).where(
and_(BudgetCycle.year == body.year, BudgetCycle.quarter == body.quarter)
)
)
).scalars().first()
if existing:
raise HTTPException(409, f"{body.year}{body.quarter}분기 예산 사이클이 이미 존재합니다.")
cycle = BudgetCycle(
year=body.year,
quarter=body.quarter,
budget_infra=body.budget_infra,
budget_license=body.budget_license,
budget_cloud=body.budget_cloud,
spent=body.spent,
forecast_spend=body.forecast_spend,
status=body.status,
)
db.add(cycle)
await db.commit()
await db.refresh(cycle)
return cycle
@router.get("/alerts")
async def capacity_alerts(
db: AsyncSession = Depends(get_db),
_u: User = Depends(get_current_user),
):
"""
용량 임박 경보.
- 30일 내 80% 초과 예측 → 즉시 권고
- 60일 내 90% 초과 예측 → 검토 필요
- 90일 내 95% 초과 예측 → 계획 수립
"""
alerts = []
for rule_days, threshold, urgency in _ALERT_RULES:
rows = (
await db.execute(
select(CapacityForecast).where(
and_(
CapacityForecast.forecast_days <= rule_days,
CapacityForecast.predicted_value >= threshold,
)
).order_by(desc(CapacityForecast.predicted_value)).limit(30)
)
).scalars().all()
for row in rows:
alerts.append({
"id": row.id,
"server_name": row.server_name,
"metric": row.metric,
"forecast_days": row.forecast_days,
"current_value": row.current_value,
"predicted_value": row.predicted_value,
"confidence": row.confidence,
"trend": row.trend,
"urgency": urgency,
"threshold": threshold,
"alert_message": (
f"{row.server_name} {row.metric.upper()} 사용률이 "
f"{row.forecast_days}일 후 {row.predicted_value:.1f}%로 "
f"임계치({threshold}%)를 초과할 것으로 예측됩니다."
),
"created_at": row.created_at.isoformat(),
})
# 중복 제거 (forecast id 기준)
seen_ids: set = set()
unique_alerts = []
for alert in alerts:
if alert["id"] not in seen_ids:
seen_ids.add(alert["id"])
unique_alerts.append(alert)
# urgency 우선순위 정렬 (immediate > 30days > 60days)
urgency_order = {"immediate": 0, "30days": 1, "60days": 2, "90days": 3}
unique_alerts.sort(key=lambda x: (urgency_order.get(x["urgency"], 9), -x["predicted_value"]))
return {
"total_alerts": len(unique_alerts),
"alerts": unique_alerts[:50],
"budget_recommendation": get_budget_recommendation(datetime.utcnow().month // 4 + 1),
"as_of": datetime.utcnow().isoformat(),
}