#!/bin/bash # ============================================================ # GUARDiA SM | agent_pinpoint_sm.sh # 대상: Pinpoint APM (Collector / Web / Flink / HBase) # 파라미터: PINPOINT_HOME=/opt/pinpoint # PP_COLLECTOR_PORT=9994 PP_WEB_PORT=8080 # PP_FLINK_PORT=8081 PP_HBASE_PORT=16000 # PP_WEB_URL=http://localhost:8080 # ============================================================ set -euo pipefail PINPOINT_HOME=${PINPOINT_HOME:-/opt/pinpoint} PP_COLLECTOR_PORT=${PP_COLLECTOR_PORT:-9994} PP_WEB_PORT=${PP_WEB_PORT:-8080} PP_FLINK_PORT=${PP_FLINK_PORT:-8081} PP_HBASE_PORT=${PP_HBASE_PORT:-16000} PP_WEB_URL=${PP_WEB_URL:-"http://localhost:${PP_WEB_PORT}"} OK="[OK]"; WARN="[WARN]"; CRIT="[CRIT]" SEP="─────────────────────────────────────────" RESULT=0 echo "======================================================" echo " GUARDiA SM 점검 | Pinpoint APM | $(hostname -s)" echo " 점검 시각: $(date '+%Y-%m-%d %H:%M:%S %Z')" echo "======================================================" # ── 1. Pinpoint Collector ───────────────────────────────── echo; echo "[$SEP] 1. Pinpoint Collector" COLL_PROC=$(pgrep -f "pinpoint-collector\|PinpointCollector" 2>/dev/null | wc -l || echo 0) if [ "$COLL_PROC" -gt 0 ]; then COLL_PID=$(pgrep -f "pinpoint-collector\|PinpointCollector" | head -1) echo " ${OK} Collector 실행 중 (PID: ${COLL_PID})" RSS_MB=$(awk '/VmRSS/{print $2}' /proc/${COLL_PID}/status 2>/dev/null | \ awk '{printf "%d", $1/1024}' || echo "N/A") echo " RSS 메모리: ${RSS_MB} MB" else echo " ${CRIT} Pinpoint Collector 프로세스 없음" RESULT=2 fi ss -tlnp 2>/dev/null | grep -q ":${PP_COLLECTOR_PORT} " && \ echo " ${OK} Collector 포트 ${PP_COLLECTOR_PORT} LISTEN" || \ echo " ${WARN} Collector 포트 ${PP_COLLECTOR_PORT} LISTEN 없음" # gRPC 포트 (9991~9993) for GRPC_PORT in 9991 9992 9993; do ss -tlnp 2>/dev/null | grep -q ":${GRPC_PORT} " && \ echo " ${OK} gRPC 포트 ${GRPC_PORT} LISTEN" || true done # ── 2. Pinpoint Web ─────────────────────────────────────── echo; echo "[$SEP] 2. Pinpoint Web" WEB_PROC=$(pgrep -f "pinpoint-web\|PinpointWeb" 2>/dev/null | wc -l || echo 0) if [ "$WEB_PROC" -gt 0 ]; then echo " ${OK} Pinpoint Web 실행 중" else echo " ${WARN} Pinpoint Web 프로세스 없음" [ $RESULT -lt 1 ] && RESULT=1 fi if command -v curl &>/dev/null; then HTTP_CODE=$(curl -sk -o /dev/null -w "%{http_code}" \ --max-time 10 "${PP_WEB_URL}" 2>/dev/null || echo "ERR") echo "$HTTP_CODE" | grep -qE "^[23]" && \ echo " ${OK} Web UI 응답: ${HTTP_CODE}" || \ echo " ${WARN} Web UI 응답: ${HTTP_CODE}" fi # ── 3. HBase 연결 ───────────────────────────────────────── echo; echo "[$SEP] 3. HBase 연결" HBASE_PROC=$(pgrep -f "hbase\|HMaster\|HRegionServer" 2>/dev/null | wc -l || echo 0) if [ "$HBASE_PROC" -gt 0 ]; then echo " ${OK} HBase 프로세스 실행 중 (${HBASE_PROC}개)" else echo " ${WARN} HBase 프로세스 없음 (외부 HBase 연결 시 무시)" fi ss -tlnp 2>/dev/null | grep -q ":${PP_HBASE_PORT} " && \ echo " ${OK} HBase Master 포트 ${PP_HBASE_PORT} LISTEN" || \ echo " ${WARN} HBase 포트 ${PP_HBASE_PORT} 없음 (외부 HBase 사용 시 정상)" # ── 4. Flink (실시간 집계) ─────────────────────────────── echo; echo "[$SEP] 4. Flink Job Manager" FLINK_PROC=$(pgrep -f "flink\|StandaloneJobManager\|TaskManager" 2>/dev/null | wc -l || echo 0) if [ "$FLINK_PROC" -gt 0 ]; then echo " ${OK} Flink 실행 중 (${FLINK_PROC}개)" else echo " ${WARN} Flink 프로세스 없음 (Inspector 기능 비활성화)" fi if command -v curl &>/dev/null; then FLINK_JOBS=$(curl -sk --max-time 5 \ "http://localhost:${PP_FLINK_PORT}/jobs" 2>/dev/null | \ python3 -c "import sys,json; d=json.load(sys.stdin); \ [print(f' {j[\"id\"][:8]}... {j[\"status\"]}') for j in d.get('jobs',[])]" 2>/dev/null || echo "") [ -n "$FLINK_JOBS" ] && echo " Flink 작업:" && echo "$FLINK_JOBS" || true fi # ── 5. Agent 수집 통계 (Web API) ────────────────────────── echo; echo "[$SEP] 5. 에이전트 수집 현황" if command -v curl &>/dev/null && [ "$WEB_PROC" -gt 0 ]; then APPS=$(curl -sk --max-time 5 \ "${PP_WEB_URL}/getApplicationList.pinpoint" 2>/dev/null || echo "[]") APP_COUNT=$(echo "$APPS" | python3 -c \ "import sys,json; print(len(json.load(sys.stdin)))" 2>/dev/null || echo 0) echo " 모니터링 애플리케이션 수: ${APP_COUNT}" fi # ── 6. 로그 오류 ───────────────────────────────────────── echo; echo "[$SEP] 6. Pinpoint 로그 오류" for LOGDIR in "${PINPOINT_HOME}/logs" "${PINPOINT_HOME}/collector/logs" \ "${PINPOINT_HOME}/web/logs"; do if [ -d "$LOGDIR" ]; then LOGFILE=$(ls -t "${LOGDIR}"/*.log 2>/dev/null | head -1 || echo "") if [ -n "$LOGFILE" ] && [ -r "$LOGFILE" ]; then ERR=$(tail -500 "$LOGFILE" | grep -cE "ERROR|FATAL" || echo 0) echo " 최근 오류: ${ERR}건 (${LOGFILE})" [ "$ERR" -gt 0 ] && tail -500 "$LOGFILE" | grep -E "ERROR|FATAL" | tail -5 | sed 's/^/ /' fi break fi done # ── 요약 ───────────────────────────────────────────────── echo echo "======================================================" case $RESULT in 0) echo " 최종 결과: ${OK} Pinpoint APM 정상" ;; 1) echo " 최종 결과: ${WARN} 주의 항목 있음" ;; 2) echo " 최종 결과: ${CRIT} 즉시 조치 필요" ;; esac echo " 점검 완료: $(date '+%Y-%m-%d %H:%M:%S')" echo "======================================================" exit $RESULT