guardia-itsm/routers/dependency_map.py
2026-06-02 18:48:18 +09:00

232 lines
7.9 KiB
Python

"""
서비스 의존성 자동 매핑
SSH 경유 netstat/ss 분석으로 서비스 간 upstream/downstream 의존성을 자동 탐지.
엔드포인트:
POST /api/depmap/discover/{server_id} — 단일 서버 의존성 탐지
POST /api/depmap/discover-all — 전체 서버 의존성 탐지
GET /api/depmap/ — 의존성 맵 조회
GET /api/depmap/impact/{ci_id} — 특정 CI 영향 범위 분석
DELETE /api/depmap/{dep_id} — 의존성 수동 삭제
"""
from __future__ import annotations
import logging
import re
from datetime import datetime
import paramiko
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from sqlalchemy import select, or_
from sqlalchemy.ext.asyncio import AsyncSession
from core.auth import get_current_user, require_admin_role
from core.ssh_exec import _decrypt_password as decrypt_password
from database import get_db
from models import User, Server, ServiceDependency
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/depmap", tags=["서비스 의존성 맵"])
async def _ssh_run(server: Server, cmd: str) -> str:
try:
pw = decrypt_password(server.os_pw_enc)
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(server.ip_addr, username=server.ssh_user, password=pw, timeout=10)
_, stdout, _ = ssh.exec_command(cmd, timeout=15)
result = stdout.read().decode('utf-8', 'replace').strip()
ssh.close()
return result
except Exception as e:
logger.warning(f"SSH 실패 ({server.ip_addr}): {e}")
return ""
async def _discover_connections(server: Server) -> list[dict]:
"""netstat/ss로 ESTABLISHED 연결 분석 → 의존성 추출."""
output = await _ssh_run(
server,
"ss -tnp state established 2>/dev/null | awk 'NR>1{print $4,$5}' || "
"netstat -tnp 2>/dev/null | grep ESTABLISHED | awk '{print $4,$5}'"
)
connections = []
seen = set()
for line in output.splitlines():
parts = line.split()
if len(parts) < 2:
continue
local, remote = parts[0], parts[1]
# 외부 IP:포트 추출
m = re.match(r'(\d+\.\d+\.\d+\.\d+):(\d+)$', remote)
if not m:
continue
remote_ip, remote_port = m.group(1), int(m.group(2))
if remote_ip in ("127.0.0.1", "0.0.0.0"):
continue
key = f"{remote_ip}:{remote_port}"
if key in seen:
continue
seen.add(key)
# 포트 → 서비스 유형 추정
service_map = {
3306: "MySQL", 5432: "PostgreSQL", 6379: "Redis",
27017: "MongoDB", 9200: "Elasticsearch",
80: "HTTP", 443: "HTTPS", 8080: "HTTP-Alt",
2181: "Zookeeper", 9092: "Kafka", 5672: "RabbitMQ",
}
dep_type = service_map.get(remote_port, f"TCP:{remote_port}")
connections.append({
"remote_ip": remote_ip, "remote_port": remote_port,
"dependency_type": dep_type, "protocol": "TCP",
})
return connections
async def _do_discover(server_id: int, db: AsyncSession):
"""단일 서버 의존성 탐지 (백그라운드)."""
srv_row = await db.execute(select(Server).where(Server.id == server_id))
server = srv_row.scalar_one_or_none()
if not server:
return
connections = await _discover_connections(server)
new_deps = 0
for conn in connections:
# 원격 IP가 CMDB에 있는지 확인
remote_srv_row = await db.execute(
select(Server).where(Server.ip_addr == conn["remote_ip"])
)
remote_server = remote_srv_row.scalar_one_or_none()
if not remote_server:
continue # CMDB에 없는 서버는 스킵
# 중복 체크
existing = await db.execute(
select(ServiceDependency).where(
ServiceDependency.upstream_ci_id == server_id,
ServiceDependency.downstream_ci_id == remote_server.id,
ServiceDependency.port == conn["remote_port"],
)
)
if existing.scalar_one_or_none():
continue
dep = ServiceDependency(
upstream_ci_id=server_id,
downstream_ci_id=remote_server.id,
dependency_type=conn["dependency_type"],
port=conn["remote_port"],
protocol=conn["protocol"],
discovered_at=datetime.utcnow(),
)
db.add(dep)
new_deps += 1
await db.commit()
logger.info(f"서버 {server_id} 의존성 {new_deps}개 발견")
@router.post("/discover/{server_id}")
async def discover_server_deps(
server_id: int,
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db),
user: User = Depends(get_current_user),
):
background_tasks.add_task(_do_discover, server_id, db)
return {"ok": True, "server_id": server_id, "queued": True}
@router.post("/discover-all")
async def discover_all_deps(
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db),
user: User = Depends(require_admin_role),
):
rows = await db.execute(select(Server).limit(100))
servers = rows.scalars().all()
for s in servers:
background_tasks.add_task(_do_discover, s.id, db)
return {"ok": True, "queued": len(servers)}
@router.get("/")
async def get_dependency_map(
db: AsyncSession = Depends(get_db),
user: User = Depends(get_current_user),
):
rows = await db.execute(
select(
ServiceDependency,
Server.hostname.label("up_name"), Server.ip_addr.label("up_ip"),
).join(Server, ServiceDependency.upstream_ci_id == Server.id)
.limit(500)
)
deps = rows.all()
return [
{
"id": d.ServiceDependency.id,
"upstream": {"id": d.ServiceDependency.upstream_ci_id, "name": d.up_name, "ip": d.up_ip},
"downstream_id": d.ServiceDependency.downstream_ci_id,
"type": d.ServiceDependency.dependency_type,
"port": d.ServiceDependency.port,
"discovered_at": d.ServiceDependency.discovered_at,
}
for d in deps
]
@router.get("/impact/{ci_id}")
async def get_impact_analysis(
ci_id: int,
db: AsyncSession = Depends(get_db),
user: User = Depends(get_current_user),
):
"""특정 CI 장애 시 영향 범위 분석 (upstream 서비스 = 영향받는 서비스)."""
# ci_id에 의존하는 서비스들 (ci_id가 downstream인 경우)
rows = await db.execute(
select(ServiceDependency, Server.hostname, Server.ip_addr).join(
Server, ServiceDependency.upstream_ci_id == Server.id
).where(ServiceDependency.downstream_ci_id == ci_id)
)
impacted = rows.all()
# ci_id가 의존하는 서비스들 (ci_id가 upstream인 경우)
dep_rows = await db.execute(
select(ServiceDependency, Server.hostname, Server.ip_addr).join(
Server, ServiceDependency.downstream_ci_id == Server.id
).where(ServiceDependency.upstream_ci_id == ci_id)
)
dependencies = dep_rows.all()
return {
"ci_id": ci_id,
"impacted_services": [
{"server": r.hostname, "ip": r.ip_addr, "depends_via": r.ServiceDependency.dependency_type}
for r in impacted
],
"depends_on": [
{"server": r.hostname, "ip": r.ip_addr, "type": r.ServiceDependency.dependency_type}
for r in dependencies
],
"blast_radius": len(impacted),
}
@router.delete("/{dep_id}")
async def delete_dependency(
dep_id: int,
db: AsyncSession = Depends(get_db),
user: User = Depends(require_admin_role),
):
row = await db.execute(select(ServiceDependency).where(ServiceDependency.id == dep_id))
dep = row.scalar_one_or_none()
if not dep:
raise HTTPException(404)
await db.delete(dep)
await db.commit()
return {"ok": True}