zioinfo-mail/itsm/scripts/sm/agent/agent_pinpoint_sm.sh
DESKTOP-TKLFCPR\ython e228faabf5 feat(itsm): G-1~G-12 확장 기능 + 하네스/봇/설치스크립트 구현
G-1: 메신저 Webhook Relay + _send_to_room 실제 httpx 호출 구현
G-2: POST /api/tasks/bulk SR 대량작업 엔드포인트 (최대 100건)
G-3: 라이선스 만료 알림 스케줄러 (매일 09:00 KST)
G-4: 체험판 upgrade_banner 필드 + license.py 배너 로직
G-5: core/auto_rca.py + incidents/problem auto-rca 엔드포인트
G-6: core/deploy_impact.py + vibe impact-analysis 엔드포인트
G-7: core/ticket_classifier.py + SR 생성 시 AI 분류 + ai-suggestion API
G-8: VulnPatchRecord 모델 + vuln_scan 패치추적 4개 엔드포인트
G-9: core/jira_sync.py + gateway Jira/Confluence 연동 엔드포인트
G-10: core/push_notify.py + routers/push.py + PushSubscription 모델
G-11: approvals 다중승인 (위임/서명/기한초과/마감연장)
G-12: alembic.ini + migrations/ + cicd/migrate_to_postgres.sh

하네스: guardia-orchestrator 확장기능 Phase 반영
봇명령어: /sr /status /license /bulk 슬래시 명령어 추가
설치스크립트: setup/ (Ubuntu, CentOS, RHEL, Windows) --test 옵션 포함

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-29 18:18:52 +09:00

129 lines
6.1 KiB
Bash

#!/bin/bash
# ============================================================
# GUARDiA SM | agent_pinpoint_sm.sh
# 대상: Pinpoint APM (Collector / Web / Flink / HBase)
# 파라미터: PINPOINT_HOME=/opt/pinpoint
# PP_COLLECTOR_PORT=9994 PP_WEB_PORT=8080
# PP_FLINK_PORT=8081 PP_HBASE_PORT=16000
# PP_WEB_URL=http://localhost:8080
# ============================================================
set -euo pipefail
PINPOINT_HOME=${PINPOINT_HOME:-/opt/pinpoint}
PP_COLLECTOR_PORT=${PP_COLLECTOR_PORT:-9994}
PP_WEB_PORT=${PP_WEB_PORT:-8080}
PP_FLINK_PORT=${PP_FLINK_PORT:-8081}
PP_HBASE_PORT=${PP_HBASE_PORT:-16000}
PP_WEB_URL=${PP_WEB_URL:-"http://localhost:${PP_WEB_PORT}"}
OK="[OK]"; WARN="[WARN]"; CRIT="[CRIT]"
SEP="─────────────────────────────────────────"
RESULT=0
echo "======================================================"
echo " GUARDiA SM 점검 | Pinpoint APM | $(hostname -s)"
echo " 점검 시각: $(date '+%Y-%m-%d %H:%M:%S %Z')"
echo "======================================================"
# ── 1. Pinpoint Collector ─────────────────────────────────
echo; echo "[$SEP] 1. Pinpoint Collector"
COLL_PROC=$(pgrep -f "pinpoint-collector\|PinpointCollector" 2>/dev/null | wc -l || echo 0)
if [ "$COLL_PROC" -gt 0 ]; then
COLL_PID=$(pgrep -f "pinpoint-collector\|PinpointCollector" | head -1)
echo " ${OK} Collector 실행 중 (PID: ${COLL_PID})"
RSS_MB=$(awk '/VmRSS/{print $2}' /proc/${COLL_PID}/status 2>/dev/null | \
awk '{printf "%d", $1/1024}' || echo "N/A")
echo " RSS 메모리: ${RSS_MB} MB"
else
echo " ${CRIT} Pinpoint Collector 프로세스 없음"
RESULT=2
fi
ss -tlnp 2>/dev/null | grep -q ":${PP_COLLECTOR_PORT} " && \
echo " ${OK} Collector 포트 ${PP_COLLECTOR_PORT} LISTEN" || \
echo " ${WARN} Collector 포트 ${PP_COLLECTOR_PORT} LISTEN 없음"
# gRPC 포트 (9991~9993)
for GRPC_PORT in 9991 9992 9993; do
ss -tlnp 2>/dev/null | grep -q ":${GRPC_PORT} " && \
echo " ${OK} gRPC 포트 ${GRPC_PORT} LISTEN" || true
done
# ── 2. Pinpoint Web ───────────────────────────────────────
echo; echo "[$SEP] 2. Pinpoint Web"
WEB_PROC=$(pgrep -f "pinpoint-web\|PinpointWeb" 2>/dev/null | wc -l || echo 0)
if [ "$WEB_PROC" -gt 0 ]; then
echo " ${OK} Pinpoint Web 실행 중"
else
echo " ${WARN} Pinpoint Web 프로세스 없음"
[ $RESULT -lt 1 ] && RESULT=1
fi
if command -v curl &>/dev/null; then
HTTP_CODE=$(curl -sk -o /dev/null -w "%{http_code}" \
--max-time 10 "${PP_WEB_URL}" 2>/dev/null || echo "ERR")
echo "$HTTP_CODE" | grep -qE "^[23]" && \
echo " ${OK} Web UI 응답: ${HTTP_CODE}" || \
echo " ${WARN} Web UI 응답: ${HTTP_CODE}"
fi
# ── 3. HBase 연결 ─────────────────────────────────────────
echo; echo "[$SEP] 3. HBase 연결"
HBASE_PROC=$(pgrep -f "hbase\|HMaster\|HRegionServer" 2>/dev/null | wc -l || echo 0)
if [ "$HBASE_PROC" -gt 0 ]; then
echo " ${OK} HBase 프로세스 실행 중 (${HBASE_PROC}개)"
else
echo " ${WARN} HBase 프로세스 없음 (외부 HBase 연결 시 무시)"
fi
ss -tlnp 2>/dev/null | grep -q ":${PP_HBASE_PORT} " && \
echo " ${OK} HBase Master 포트 ${PP_HBASE_PORT} LISTEN" || \
echo " ${WARN} HBase 포트 ${PP_HBASE_PORT} 없음 (외부 HBase 사용 시 정상)"
# ── 4. Flink (실시간 집계) ───────────────────────────────
echo; echo "[$SEP] 4. Flink Job Manager"
FLINK_PROC=$(pgrep -f "flink\|StandaloneJobManager\|TaskManager" 2>/dev/null | wc -l || echo 0)
if [ "$FLINK_PROC" -gt 0 ]; then
echo " ${OK} Flink 실행 중 (${FLINK_PROC}개)"
else
echo " ${WARN} Flink 프로세스 없음 (Inspector 기능 비활성화)"
fi
if command -v curl &>/dev/null; then
FLINK_JOBS=$(curl -sk --max-time 5 \
"http://localhost:${PP_FLINK_PORT}/jobs" 2>/dev/null | \
python3 -c "import sys,json; d=json.load(sys.stdin); \
[print(f' {j[\"id\"][:8]}... {j[\"status\"]}') for j in d.get('jobs',[])]" 2>/dev/null || echo "")
[ -n "$FLINK_JOBS" ] && echo " Flink 작업:" && echo "$FLINK_JOBS" || true
fi
# ── 5. Agent 수집 통계 (Web API) ──────────────────────────
echo; echo "[$SEP] 5. 에이전트 수집 현황"
if command -v curl &>/dev/null && [ "$WEB_PROC" -gt 0 ]; then
APPS=$(curl -sk --max-time 5 \
"${PP_WEB_URL}/getApplicationList.pinpoint" 2>/dev/null || echo "[]")
APP_COUNT=$(echo "$APPS" | python3 -c \
"import sys,json; print(len(json.load(sys.stdin)))" 2>/dev/null || echo 0)
echo " 모니터링 애플리케이션 수: ${APP_COUNT}"
fi
# ── 6. 로그 오류 ─────────────────────────────────────────
echo; echo "[$SEP] 6. Pinpoint 로그 오류"
for LOGDIR in "${PINPOINT_HOME}/logs" "${PINPOINT_HOME}/collector/logs" \
"${PINPOINT_HOME}/web/logs"; do
if [ -d "$LOGDIR" ]; then
LOGFILE=$(ls -t "${LOGDIR}"/*.log 2>/dev/null | head -1 || echo "")
if [ -n "$LOGFILE" ] && [ -r "$LOGFILE" ]; then
ERR=$(tail -500 "$LOGFILE" | grep -cE "ERROR|FATAL" || echo 0)
echo " 최근 오류: ${ERR}건 (${LOGFILE})"
[ "$ERR" -gt 0 ] && tail -500 "$LOGFILE" | grep -E "ERROR|FATAL" | tail -5 | sed 's/^/ /'
fi
break
fi
done
# ── 요약 ─────────────────────────────────────────────────
echo
echo "======================================================"
case $RESULT in
0) echo " 최종 결과: ${OK} Pinpoint APM 정상" ;;
1) echo " 최종 결과: ${WARN} 주의 항목 있음" ;;
2) echo " 최종 결과: ${CRIT} 즉시 조치 필요" ;;
esac
echo " 점검 완료: $(date '+%Y-%m-%d %H:%M:%S')"
echo "======================================================"
exit $RESULT