- itsm/ -> workspace/guardia-itsm/ - manager/ -> workspace/guardia-manager/ - app/ -> workspace/guardia-messenger/ - manual/ -> workspace/guardia-docs/ workspace/zioinfo-web/ unchanged. git mv preserves full commit history. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
129 lines
6.1 KiB
Bash
129 lines
6.1 KiB
Bash
#!/bin/bash
|
|
# ============================================================
|
|
# GUARDiA SM | agent_pinpoint_sm.sh
|
|
# 대상: Pinpoint APM (Collector / Web / Flink / HBase)
|
|
# 파라미터: PINPOINT_HOME=/opt/pinpoint
|
|
# PP_COLLECTOR_PORT=9994 PP_WEB_PORT=8080
|
|
# PP_FLINK_PORT=8081 PP_HBASE_PORT=16000
|
|
# PP_WEB_URL=http://localhost:8080
|
|
# ============================================================
|
|
set -euo pipefail
|
|
PINPOINT_HOME=${PINPOINT_HOME:-/opt/pinpoint}
|
|
PP_COLLECTOR_PORT=${PP_COLLECTOR_PORT:-9994}
|
|
PP_WEB_PORT=${PP_WEB_PORT:-8080}
|
|
PP_FLINK_PORT=${PP_FLINK_PORT:-8081}
|
|
PP_HBASE_PORT=${PP_HBASE_PORT:-16000}
|
|
PP_WEB_URL=${PP_WEB_URL:-"http://localhost:${PP_WEB_PORT}"}
|
|
OK="[OK]"; WARN="[WARN]"; CRIT="[CRIT]"
|
|
SEP="─────────────────────────────────────────"
|
|
RESULT=0
|
|
|
|
echo "======================================================"
|
|
echo " GUARDiA SM 점검 | Pinpoint APM | $(hostname -s)"
|
|
echo " 점검 시각: $(date '+%Y-%m-%d %H:%M:%S %Z')"
|
|
echo "======================================================"
|
|
|
|
# ── 1. Pinpoint Collector ─────────────────────────────────
|
|
echo; echo "[$SEP] 1. Pinpoint Collector"
|
|
COLL_PROC=$(pgrep -f "pinpoint-collector\|PinpointCollector" 2>/dev/null | wc -l || echo 0)
|
|
if [ "$COLL_PROC" -gt 0 ]; then
|
|
COLL_PID=$(pgrep -f "pinpoint-collector\|PinpointCollector" | head -1)
|
|
echo " ${OK} Collector 실행 중 (PID: ${COLL_PID})"
|
|
RSS_MB=$(awk '/VmRSS/{print $2}' /proc/${COLL_PID}/status 2>/dev/null | \
|
|
awk '{printf "%d", $1/1024}' || echo "N/A")
|
|
echo " RSS 메모리: ${RSS_MB} MB"
|
|
else
|
|
echo " ${CRIT} Pinpoint Collector 프로세스 없음"
|
|
RESULT=2
|
|
fi
|
|
ss -tlnp 2>/dev/null | grep -q ":${PP_COLLECTOR_PORT} " && \
|
|
echo " ${OK} Collector 포트 ${PP_COLLECTOR_PORT} LISTEN" || \
|
|
echo " ${WARN} Collector 포트 ${PP_COLLECTOR_PORT} LISTEN 없음"
|
|
# gRPC 포트 (9991~9993)
|
|
for GRPC_PORT in 9991 9992 9993; do
|
|
ss -tlnp 2>/dev/null | grep -q ":${GRPC_PORT} " && \
|
|
echo " ${OK} gRPC 포트 ${GRPC_PORT} LISTEN" || true
|
|
done
|
|
|
|
# ── 2. Pinpoint Web ───────────────────────────────────────
|
|
echo; echo "[$SEP] 2. Pinpoint Web"
|
|
WEB_PROC=$(pgrep -f "pinpoint-web\|PinpointWeb" 2>/dev/null | wc -l || echo 0)
|
|
if [ "$WEB_PROC" -gt 0 ]; then
|
|
echo " ${OK} Pinpoint Web 실행 중"
|
|
else
|
|
echo " ${WARN} Pinpoint Web 프로세스 없음"
|
|
[ $RESULT -lt 1 ] && RESULT=1
|
|
fi
|
|
if command -v curl &>/dev/null; then
|
|
HTTP_CODE=$(curl -sk -o /dev/null -w "%{http_code}" \
|
|
--max-time 10 "${PP_WEB_URL}" 2>/dev/null || echo "ERR")
|
|
echo "$HTTP_CODE" | grep -qE "^[23]" && \
|
|
echo " ${OK} Web UI 응답: ${HTTP_CODE}" || \
|
|
echo " ${WARN} Web UI 응답: ${HTTP_CODE}"
|
|
fi
|
|
|
|
# ── 3. HBase 연결 ─────────────────────────────────────────
|
|
echo; echo "[$SEP] 3. HBase 연결"
|
|
HBASE_PROC=$(pgrep -f "hbase\|HMaster\|HRegionServer" 2>/dev/null | wc -l || echo 0)
|
|
if [ "$HBASE_PROC" -gt 0 ]; then
|
|
echo " ${OK} HBase 프로세스 실행 중 (${HBASE_PROC}개)"
|
|
else
|
|
echo " ${WARN} HBase 프로세스 없음 (외부 HBase 연결 시 무시)"
|
|
fi
|
|
ss -tlnp 2>/dev/null | grep -q ":${PP_HBASE_PORT} " && \
|
|
echo " ${OK} HBase Master 포트 ${PP_HBASE_PORT} LISTEN" || \
|
|
echo " ${WARN} HBase 포트 ${PP_HBASE_PORT} 없음 (외부 HBase 사용 시 정상)"
|
|
|
|
# ── 4. Flink (실시간 집계) ───────────────────────────────
|
|
echo; echo "[$SEP] 4. Flink Job Manager"
|
|
FLINK_PROC=$(pgrep -f "flink\|StandaloneJobManager\|TaskManager" 2>/dev/null | wc -l || echo 0)
|
|
if [ "$FLINK_PROC" -gt 0 ]; then
|
|
echo " ${OK} Flink 실행 중 (${FLINK_PROC}개)"
|
|
else
|
|
echo " ${WARN} Flink 프로세스 없음 (Inspector 기능 비활성화)"
|
|
fi
|
|
if command -v curl &>/dev/null; then
|
|
FLINK_JOBS=$(curl -sk --max-time 5 \
|
|
"http://localhost:${PP_FLINK_PORT}/jobs" 2>/dev/null | \
|
|
python3 -c "import sys,json; d=json.load(sys.stdin); \
|
|
[print(f' {j[\"id\"][:8]}... {j[\"status\"]}') for j in d.get('jobs',[])]" 2>/dev/null || echo "")
|
|
[ -n "$FLINK_JOBS" ] && echo " Flink 작업:" && echo "$FLINK_JOBS" || true
|
|
fi
|
|
|
|
# ── 5. Agent 수집 통계 (Web API) ──────────────────────────
|
|
echo; echo "[$SEP] 5. 에이전트 수집 현황"
|
|
if command -v curl &>/dev/null && [ "$WEB_PROC" -gt 0 ]; then
|
|
APPS=$(curl -sk --max-time 5 \
|
|
"${PP_WEB_URL}/getApplicationList.pinpoint" 2>/dev/null || echo "[]")
|
|
APP_COUNT=$(echo "$APPS" | python3 -c \
|
|
"import sys,json; print(len(json.load(sys.stdin)))" 2>/dev/null || echo 0)
|
|
echo " 모니터링 애플리케이션 수: ${APP_COUNT}"
|
|
fi
|
|
|
|
# ── 6. 로그 오류 ─────────────────────────────────────────
|
|
echo; echo "[$SEP] 6. Pinpoint 로그 오류"
|
|
for LOGDIR in "${PINPOINT_HOME}/logs" "${PINPOINT_HOME}/collector/logs" \
|
|
"${PINPOINT_HOME}/web/logs"; do
|
|
if [ -d "$LOGDIR" ]; then
|
|
LOGFILE=$(ls -t "${LOGDIR}"/*.log 2>/dev/null | head -1 || echo "")
|
|
if [ -n "$LOGFILE" ] && [ -r "$LOGFILE" ]; then
|
|
ERR=$(tail -500 "$LOGFILE" | grep -cE "ERROR|FATAL" || echo 0)
|
|
echo " 최근 오류: ${ERR}건 (${LOGFILE})"
|
|
[ "$ERR" -gt 0 ] && tail -500 "$LOGFILE" | grep -E "ERROR|FATAL" | tail -5 | sed 's/^/ /'
|
|
fi
|
|
break
|
|
fi
|
|
done
|
|
|
|
# ── 요약 ─────────────────────────────────────────────────
|
|
echo
|
|
echo "======================================================"
|
|
case $RESULT in
|
|
0) echo " 최종 결과: ${OK} Pinpoint APM 정상" ;;
|
|
1) echo " 최종 결과: ${WARN} 주의 항목 있음" ;;
|
|
2) echo " 최종 결과: ${CRIT} 즉시 조치 필요" ;;
|
|
esac
|
|
echo " 점검 완료: $(date '+%Y-%m-%d %H:%M:%S')"
|
|
echo "======================================================"
|
|
exit $RESULT
|