zioinfo-mail/workspace/guardia-itsm/scripts/sm/agent/agent_pinpoint_sm.sh
DESKTOP-TKLFCPR\ython cfe2901a55 refactor(structure): consolidate all projects under workspace/
- itsm/    -> workspace/guardia-itsm/
- manager/ -> workspace/guardia-manager/
- app/     -> workspace/guardia-messenger/
- manual/  -> workspace/guardia-docs/

workspace/zioinfo-web/ unchanged.
git mv preserves full commit history.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-31 23:50:56 +09:00

129 lines
6.1 KiB
Bash

#!/bin/bash
# ============================================================
# GUARDiA SM | agent_pinpoint_sm.sh
# 대상: Pinpoint APM (Collector / Web / Flink / HBase)
# 파라미터: PINPOINT_HOME=/opt/pinpoint
# PP_COLLECTOR_PORT=9994 PP_WEB_PORT=8080
# PP_FLINK_PORT=8081 PP_HBASE_PORT=16000
# PP_WEB_URL=http://localhost:8080
# ============================================================
set -euo pipefail
PINPOINT_HOME=${PINPOINT_HOME:-/opt/pinpoint}
PP_COLLECTOR_PORT=${PP_COLLECTOR_PORT:-9994}
PP_WEB_PORT=${PP_WEB_PORT:-8080}
PP_FLINK_PORT=${PP_FLINK_PORT:-8081}
PP_HBASE_PORT=${PP_HBASE_PORT:-16000}
PP_WEB_URL=${PP_WEB_URL:-"http://localhost:${PP_WEB_PORT}"}
OK="[OK]"; WARN="[WARN]"; CRIT="[CRIT]"
SEP="─────────────────────────────────────────"
RESULT=0
echo "======================================================"
echo " GUARDiA SM 점검 | Pinpoint APM | $(hostname -s)"
echo " 점검 시각: $(date '+%Y-%m-%d %H:%M:%S %Z')"
echo "======================================================"
# ── 1. Pinpoint Collector ─────────────────────────────────
echo; echo "[$SEP] 1. Pinpoint Collector"
COLL_PROC=$(pgrep -f "pinpoint-collector\|PinpointCollector" 2>/dev/null | wc -l || echo 0)
if [ "$COLL_PROC" -gt 0 ]; then
COLL_PID=$(pgrep -f "pinpoint-collector\|PinpointCollector" | head -1)
echo " ${OK} Collector 실행 중 (PID: ${COLL_PID})"
RSS_MB=$(awk '/VmRSS/{print $2}' /proc/${COLL_PID}/status 2>/dev/null | \
awk '{printf "%d", $1/1024}' || echo "N/A")
echo " RSS 메모리: ${RSS_MB} MB"
else
echo " ${CRIT} Pinpoint Collector 프로세스 없음"
RESULT=2
fi
ss -tlnp 2>/dev/null | grep -q ":${PP_COLLECTOR_PORT} " && \
echo " ${OK} Collector 포트 ${PP_COLLECTOR_PORT} LISTEN" || \
echo " ${WARN} Collector 포트 ${PP_COLLECTOR_PORT} LISTEN 없음"
# gRPC 포트 (9991~9993)
for GRPC_PORT in 9991 9992 9993; do
ss -tlnp 2>/dev/null | grep -q ":${GRPC_PORT} " && \
echo " ${OK} gRPC 포트 ${GRPC_PORT} LISTEN" || true
done
# ── 2. Pinpoint Web ───────────────────────────────────────
echo; echo "[$SEP] 2. Pinpoint Web"
WEB_PROC=$(pgrep -f "pinpoint-web\|PinpointWeb" 2>/dev/null | wc -l || echo 0)
if [ "$WEB_PROC" -gt 0 ]; then
echo " ${OK} Pinpoint Web 실행 중"
else
echo " ${WARN} Pinpoint Web 프로세스 없음"
[ $RESULT -lt 1 ] && RESULT=1
fi
if command -v curl &>/dev/null; then
HTTP_CODE=$(curl -sk -o /dev/null -w "%{http_code}" \
--max-time 10 "${PP_WEB_URL}" 2>/dev/null || echo "ERR")
echo "$HTTP_CODE" | grep -qE "^[23]" && \
echo " ${OK} Web UI 응답: ${HTTP_CODE}" || \
echo " ${WARN} Web UI 응답: ${HTTP_CODE}"
fi
# ── 3. HBase 연결 ─────────────────────────────────────────
echo; echo "[$SEP] 3. HBase 연결"
HBASE_PROC=$(pgrep -f "hbase\|HMaster\|HRegionServer" 2>/dev/null | wc -l || echo 0)
if [ "$HBASE_PROC" -gt 0 ]; then
echo " ${OK} HBase 프로세스 실행 중 (${HBASE_PROC}개)"
else
echo " ${WARN} HBase 프로세스 없음 (외부 HBase 연결 시 무시)"
fi
ss -tlnp 2>/dev/null | grep -q ":${PP_HBASE_PORT} " && \
echo " ${OK} HBase Master 포트 ${PP_HBASE_PORT} LISTEN" || \
echo " ${WARN} HBase 포트 ${PP_HBASE_PORT} 없음 (외부 HBase 사용 시 정상)"
# ── 4. Flink (실시간 집계) ───────────────────────────────
echo; echo "[$SEP] 4. Flink Job Manager"
FLINK_PROC=$(pgrep -f "flink\|StandaloneJobManager\|TaskManager" 2>/dev/null | wc -l || echo 0)
if [ "$FLINK_PROC" -gt 0 ]; then
echo " ${OK} Flink 실행 중 (${FLINK_PROC}개)"
else
echo " ${WARN} Flink 프로세스 없음 (Inspector 기능 비활성화)"
fi
if command -v curl &>/dev/null; then
FLINK_JOBS=$(curl -sk --max-time 5 \
"http://localhost:${PP_FLINK_PORT}/jobs" 2>/dev/null | \
python3 -c "import sys,json; d=json.load(sys.stdin); \
[print(f' {j[\"id\"][:8]}... {j[\"status\"]}') for j in d.get('jobs',[])]" 2>/dev/null || echo "")
[ -n "$FLINK_JOBS" ] && echo " Flink 작업:" && echo "$FLINK_JOBS" || true
fi
# ── 5. Agent 수집 통계 (Web API) ──────────────────────────
echo; echo "[$SEP] 5. 에이전트 수집 현황"
if command -v curl &>/dev/null && [ "$WEB_PROC" -gt 0 ]; then
APPS=$(curl -sk --max-time 5 \
"${PP_WEB_URL}/getApplicationList.pinpoint" 2>/dev/null || echo "[]")
APP_COUNT=$(echo "$APPS" | python3 -c \
"import sys,json; print(len(json.load(sys.stdin)))" 2>/dev/null || echo 0)
echo " 모니터링 애플리케이션 수: ${APP_COUNT}"
fi
# ── 6. 로그 오류 ─────────────────────────────────────────
echo; echo "[$SEP] 6. Pinpoint 로그 오류"
for LOGDIR in "${PINPOINT_HOME}/logs" "${PINPOINT_HOME}/collector/logs" \
"${PINPOINT_HOME}/web/logs"; do
if [ -d "$LOGDIR" ]; then
LOGFILE=$(ls -t "${LOGDIR}"/*.log 2>/dev/null | head -1 || echo "")
if [ -n "$LOGFILE" ] && [ -r "$LOGFILE" ]; then
ERR=$(tail -500 "$LOGFILE" | grep -cE "ERROR|FATAL" || echo 0)
echo " 최근 오류: ${ERR}건 (${LOGFILE})"
[ "$ERR" -gt 0 ] && tail -500 "$LOGFILE" | grep -E "ERROR|FATAL" | tail -5 | sed 's/^/ /'
fi
break
fi
done
# ── 요약 ─────────────────────────────────────────────────
echo
echo "======================================================"
case $RESULT in
0) echo " 최종 결과: ${OK} Pinpoint APM 정상" ;;
1) echo " 최종 결과: ${WARN} 주의 항목 있음" ;;
2) echo " 최종 결과: ${CRIT} 즉시 조치 필요" ;;
esac
echo " 점검 완료: $(date '+%Y-%m-%d %H:%M:%S')"
echo "======================================================"
exit $RESULT