- itsm/ -> workspace/guardia-itsm/ - manager/ -> workspace/guardia-manager/ - app/ -> workspace/guardia-messenger/ - manual/ -> workspace/guardia-docs/ workspace/zioinfo-web/ unchanged. git mv preserves full commit history. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
191 lines
5.9 KiB
Python
191 lines
5.9 KiB
Python
"""
|
|
Scouter APM 연동 — GUARDiA ITSM
|
|
|
|
Scouter는 Java WAS(Tomcat/JBoss/JEUs) 전문 APM 도구입니다.
|
|
Scouter HTTP API를 통해 실시간 메트릭을 수집하여 GUARDiA 대시보드에 표시합니다.
|
|
|
|
환경변수:
|
|
SCOUTER_HOST : Scouter 서버 호스트 (기본: localhost)
|
|
SCOUTER_HTTP_PORT : Scouter HTTP API 포트 (기본: 6180)
|
|
SCOUTER_USER : Scouter 사용자 (기본: admin)
|
|
SCOUTER_PASSWORD : Scouter 비밀번호 (기본: admin)
|
|
|
|
Scouter HTTP API 문서: https://github.com/scouter-project/scouter/wiki/Scouter-HTTP-API
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
from typing import Optional
|
|
|
|
import httpx
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
SCOUTER_HOST = os.getenv("SCOUTER_HOST", "localhost")
|
|
SCOUTER_HTTP_PORT = int(os.getenv("SCOUTER_HTTP_PORT", "6180"))
|
|
SCOUTER_USER = os.getenv("SCOUTER_USER", "admin")
|
|
SCOUTER_PASSWORD = os.getenv("SCOUTER_PASSWORD", "admin")
|
|
|
|
_BASE = f"http://{SCOUTER_HOST}:{SCOUTER_HTTP_PORT}/scouter/v1"
|
|
_ENABLED = bool(os.getenv("SCOUTER_HOST")) # 환경변수 없으면 비활성화
|
|
|
|
|
|
async def _get(path: str, params: dict = None) -> Optional[dict]:
|
|
"""Scouter HTTP API GET 요청."""
|
|
if not _ENABLED:
|
|
return None
|
|
try:
|
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
r = await client.get(
|
|
f"{_BASE}{path}",
|
|
params=params or {},
|
|
auth=(SCOUTER_USER, SCOUTER_PASSWORD),
|
|
)
|
|
if r.status_code == 200:
|
|
return r.json()
|
|
except Exception as e:
|
|
logger.debug("Scouter API 오류 (%s): %s", path, str(e)[:80])
|
|
return None
|
|
|
|
|
|
async def is_available() -> bool:
|
|
"""Scouter 서버 응답 여부 확인."""
|
|
result = await _get("/info/version")
|
|
return result is not None
|
|
|
|
|
|
async def get_object_list() -> list:
|
|
"""모니터링 대상 객체(서버/서비스) 목록 조회."""
|
|
result = await _get("/object")
|
|
if not result:
|
|
return []
|
|
return result.get("result", [])
|
|
|
|
|
|
async def get_server_metrics(obj_hash: int) -> dict:
|
|
"""
|
|
특정 서버의 실시간 메트릭 조회.
|
|
|
|
Returns:
|
|
{
|
|
"cpu": float, # CPU 사용률 (%)
|
|
"heap_used": int, # Heap 사용량 (MB)
|
|
"heap_max": int, # Heap 최대 (MB)
|
|
"tps": float, # 초당 트랜잭션
|
|
"active_service": int, # 활성 서비스 수
|
|
"response_time": float, # 평균 응답시간 (ms)
|
|
"error_rate": float, # 에러율 (%)
|
|
}
|
|
"""
|
|
result = await _get(f"/object/{obj_hash}/realtime/summary")
|
|
if not result:
|
|
return {}
|
|
|
|
data = result.get("result", {})
|
|
return {
|
|
"cpu": data.get("cpuPct", 0.0),
|
|
"heap_used": data.get("heapUsed", 0) // (1024 * 1024),
|
|
"heap_max": data.get("heapMax", 0) // (1024 * 1024),
|
|
"tps": data.get("tps", 0.0),
|
|
"active_service": data.get("activeService", 0),
|
|
"response_time": data.get("elapsedTime", 0.0),
|
|
"error_rate": data.get("errorRate", 0.0),
|
|
}
|
|
|
|
|
|
async def get_all_metrics() -> list:
|
|
"""
|
|
모든 모니터링 대상 서버의 실시간 메트릭 수집.
|
|
|
|
Returns: [{"name": str, "type": str, "metrics": {...}}, ...]
|
|
"""
|
|
objects = await get_object_list()
|
|
if not objects:
|
|
return []
|
|
|
|
results = []
|
|
for obj in objects:
|
|
obj_hash = obj.get("objHash")
|
|
if not obj_hash:
|
|
continue
|
|
|
|
metrics = await get_server_metrics(obj_hash)
|
|
results.append({
|
|
"name": obj.get("objName", "unknown"),
|
|
"type": obj.get("objType", "unknown"),
|
|
"host": obj.get("address", ""),
|
|
"metrics": metrics,
|
|
})
|
|
|
|
return results
|
|
|
|
|
|
async def get_active_services(obj_hash: int) -> list:
|
|
"""활성 서비스(현재 처리 중인 요청) 목록 조회."""
|
|
result = await _get(f"/object/{obj_hash}/activeService")
|
|
if not result:
|
|
return []
|
|
return result.get("result", [])
|
|
|
|
|
|
async def get_xlog_recent(obj_hash: int, limit: int = 20) -> list:
|
|
"""최근 트랜잭션 X-Log 조회."""
|
|
result = await _get(f"/xlog/realtime/{obj_hash}", params={"limit": limit})
|
|
if not result:
|
|
return []
|
|
return result.get("result", [])
|
|
|
|
|
|
async def get_alert_list(obj_hash: int = None) -> list:
|
|
"""Scouter 경보 목록 조회."""
|
|
path = f"/object/{obj_hash}/alert" if obj_hash else "/alert"
|
|
result = await _get(path)
|
|
if not result:
|
|
return []
|
|
return result.get("result", [])
|
|
|
|
|
|
async def get_summary() -> dict:
|
|
"""
|
|
전체 모니터링 현황 요약 (GUARDiA 대시보드용).
|
|
|
|
Returns:
|
|
{
|
|
"enabled": bool,
|
|
"total_servers": int,
|
|
"avg_cpu": float,
|
|
"avg_tps": float,
|
|
"avg_response_ms": float,
|
|
"critical_servers": [str], # CPU > 80% or 에러율 > 5%
|
|
"servers": [...]
|
|
}
|
|
"""
|
|
if not _ENABLED:
|
|
return {"enabled": False, "total_servers": 0, "avg_cpu": 0, "avg_tps": 0}
|
|
|
|
all_metrics = await get_all_metrics()
|
|
if not all_metrics:
|
|
return {"enabled": True, "total_servers": 0, "avg_cpu": 0, "avg_tps": 0, "servers": []}
|
|
|
|
total = len(all_metrics)
|
|
avg_cpu = sum(m["metrics"].get("cpu", 0) for m in all_metrics) / total
|
|
avg_tps = sum(m["metrics"].get("tps", 0) for m in all_metrics) / total
|
|
avg_resp = sum(m["metrics"].get("response_time", 0) for m in all_metrics) / total
|
|
|
|
critical = [
|
|
m["name"] for m in all_metrics
|
|
if m["metrics"].get("cpu", 0) > 80
|
|
or m["metrics"].get("error_rate", 0) > 5
|
|
]
|
|
|
|
return {
|
|
"enabled": True,
|
|
"total_servers": total,
|
|
"avg_cpu": round(avg_cpu, 1),
|
|
"avg_tps": round(avg_tps, 2),
|
|
"avg_response_ms": round(avg_resp, 1),
|
|
"critical_servers": critical,
|
|
"servers": all_metrics,
|
|
}
|