""" 자연어 쿼리 엔진 (Text-to-SQL) — Ollama 기반 운영자가 자연어로 ITSM 데이터를 조회한다. Ollama가 SQL을 생성하고 ITSM DB에서 결과를 반환. SELECT만 허용 — DML/DDL 절대 차단. 엔드포인트: POST /api/nlquery/ask — 자연어 → SQL → 결과 POST /api/nlquery/validate — SQL 안전성 검증 (실행 없음) GET /api/nlquery/schema — DB 스키마 컨텍스트 조회 GET /api/nlquery/examples — 예시 질의 목록 """ from __future__ import annotations import json import logging import re from datetime import datetime import httpx from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel, Field from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession from core.auth import get_current_user from database import get_db from models import User, QueryHistory logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/nlquery", tags=["자연어 쿼리"]) OLLAMA_URL = "http://localhost:11434" CHAT_MODEL = "llama3" MAX_ROWS = 500 QUERY_TIMEOUT = 5.0 # 민감 컬럼 — SQL 결과에서 자동 마스킹 SENSITIVE_COLS = {"os_pw_enc", "ssh_user", "ip_addr", "password", "secret", "token"} # DML/DDL 금지 키워드 FORBIDDEN_KEYWORDS = [ "INSERT", "UPDATE", "DELETE", "DROP", "TRUNCATE", "ALTER", "CREATE", "REPLACE", "GRANT", "REVOKE", "EXEC", "EXECUTE", "CALL", "MERGE", "UPSERT", ] DB_SCHEMA_CONTEXT = """ GUARDiA ITSM PostgreSQL 스키마 (주요 테이블): tb_sr_request: id, title, description, status(OPEN/IN_PROGRESS/PENDING/DONE), priority(LOW/MEDIUM/HIGH), category, assignee_id, created_at, updated_at tb_user: id, name, email, role(ADMIN/ENGINEER/PM/CUSTOMER), tenant_id, is_active, created_at tb_server_info: id, hostname, ip_addr, os_type, cpu_cores, memory_mb, inst_id, is_active, created_at tb_audit_log: id, user_id, action, detail, created_at tb_incident: id, title, severity(P1/P2/P3/P4), status, rca_summary, created_at, resolved_at tb_kpi_definition: id, name, display_name, unit, direction, target, period tb_kpi_value: id, kpi_id, value, calculated_at tb_jira_sync_mapping: id, sr_id, jira_issue_key, synced_at 조인 예시: - SR + 담당자: JOIN tb_user u ON sr.assignee_id = u.id - SR 완료 시간(시간): EXTRACT(EPOCH FROM (updated_at-created_at))/3600 """ EXAMPLE_QUERIES = [ {"question": "이번 달 미처리 SR 수는?", "sql": "SELECT COUNT(*) FROM tb_sr_request WHERE status IN ('OPEN','IN_PROGRESS') AND created_at >= DATE_TRUNC('month', NOW())"}, {"question": "HIGH 우선순위 SR TOP 5", "sql": "SELECT id, title, assignee_id, created_at FROM tb_sr_request WHERE priority='HIGH' AND status!='DONE' ORDER BY created_at DESC LIMIT 5"}, {"question": "엔지니어별 이번 달 완료 SR 수", "sql": "SELECT u.name, COUNT(*) as cnt FROM tb_sr_request sr JOIN tb_user u ON sr.assignee_id=u.id WHERE sr.status='DONE' AND sr.updated_at>=DATE_TRUNC('month',NOW()) GROUP BY u.name ORDER BY cnt DESC"}, {"question": "평균 SR 처리 시간 (시간)", "sql": "SELECT ROUND(AVG(EXTRACT(EPOCH FROM (updated_at-created_at))/3600)::numeric,1) as avg_hours FROM tb_sr_request WHERE status='DONE'"}, {"question": "가장 많이 발생한 SR 카테고리 TOP 5", "sql": "SELECT category, COUNT(*) as cnt FROM tb_sr_request GROUP BY category ORDER BY cnt DESC LIMIT 5"}, ] class NLQueryRequest(BaseModel): question: str = Field(..., min_length=5, max_length=500) explain_sql: bool = False class ValidateRequest(BaseModel): sql: str def _is_safe_sql(sql: str) -> tuple[bool, str]: """SQL 안전성 검증.""" sql_upper = sql.upper().strip() # SELECT로 시작해야 함 if not re.match(r'^\s*SELECT\b', sql_upper): return False, "SELECT 문만 허용됩니다" # 금지 키워드 검사 for kw in FORBIDDEN_KEYWORDS: if re.search(r'\b' + kw + r'\b', sql_upper): return False, f"금지된 SQL 키워드: {kw}" # 세미콜론 다중 구문 방지 if sql.count(';') > 1: return False, "다중 SQL 구문 금지" return True, "OK" def _mask_sensitive(rows: list[dict]) -> list[dict]: """결과에서 민감 컬럼 마스킹.""" masked = [] for row in rows: new_row = {} for k, v in row.items(): if k.lower() in SENSITIVE_COLS: new_row[k] = "***" else: new_row[k] = v masked.append(new_row) return masked async def _generate_sql(question: str) -> tuple[str, str]: """Ollama로 SQL 생성 → (sql, explanation).""" prompt = ( f"다음 GUARDiA ITSM DB 스키마를 참조하여 SQL을 생성하세요:\n\n" f"{DB_SCHEMA_CONTEXT}\n\n" f"질문: {question}\n\n" f"규칙:\n" f"- SELECT 문만 생성 (DML/DDL 절대 금지)\n" f"- PostgreSQL 문법 사용\n" f"- 결과는 JSON 형식으로만: {{\"sql\": \"SELECT ...\", \"explanation\": \"한국어 설명\"}}\n" f"- 테이블명 앞에 스키마 prefix 불필요\n" f"- LIMIT은 최대 {MAX_ROWS}으로 제한" ) try: async with httpx.AsyncClient(timeout=30) as client: r = await client.post(f"{OLLAMA_URL}/api/generate", json={ "model": CHAT_MODEL, "prompt": prompt, "stream": False, }) if r.status_code == 200: response_text = r.json().get("response", "") # JSON 추출 match = re.search(r'\{[^{}]*"sql"[^{}]*\}', response_text, re.DOTALL) if match: data = json.loads(match.group()) return data.get("sql", ""), data.get("explanation", "") # JSON 없으면 코드블록에서 추출 match = re.search(r'```(?:sql)?\s*(SELECT[^`]+)```', response_text, re.IGNORECASE | re.DOTALL) if match: return match.group(1).strip(), "Ollama 생성 SQL" except Exception as e: logger.warning(f"Ollama 호출 실패: {e}") return "", "SQL 생성 실패" @router.post("/ask") async def nl_query( req: NLQueryRequest, db: AsyncSession = Depends(get_db), user: User = Depends(get_current_user), ): """자연어 질문 → SQL 생성 → 실행 → 결과 반환.""" sql, explanation = await _generate_sql(req.question) if not sql: return {"question": req.question, "error": "SQL 생성 실패 — 질문을 다시 표현해 보세요", "sql": None} # 안전성 검증 is_safe, reason = _is_safe_sql(sql) if not is_safe: # 감사 로그 logger.warning(f"위험 SQL 차단: user={user.email}, reason={reason}, sql={sql[:100]}") return {"question": req.question, "error": f"보안 차단: {reason}", "sql": sql} # LIMIT 강제 추가 if "LIMIT" not in sql.upper(): sql = sql.rstrip(';') + f" LIMIT {MAX_ROWS}" # SQL 실행 try: result = await db.execute(text(sql)) rows = result.fetchall() columns = list(result.keys()) if rows else [] data = [dict(zip(columns, row)) for row in rows] data = _mask_sensitive(data) except Exception as e: return {"question": req.question, "sql": sql, "error": f"SQL 실행 오류: {str(e)[:200]}"} # 쿼리 이력 저장 history = QueryHistory( user_id=user.id, question=req.question, generated_sql=sql, row_count=len(data), executed_at=datetime.utcnow(), ) db.add(history) await db.commit() response = { "question": req.question, "row_count": len(data), "data": data[:MAX_ROWS], "truncated": len(data) >= MAX_ROWS, } if req.explain_sql: response["sql"] = sql response["explanation"] = explanation return response @router.post("/validate") async def validate_sql(req: ValidateRequest): """SQL 안전성 검증 (실행 없음).""" is_safe, reason = _is_safe_sql(req.sql) return {"safe": is_safe, "reason": reason, "sql": req.sql} @router.get("/schema") async def get_schema(_: User = Depends(get_current_user)): return {"schema": DB_SCHEMA_CONTEXT, "sensitive_columns": list(SENSITIVE_COLS)} @router.get("/examples") async def get_examples(_: User = Depends(get_current_user)): return {"examples": EXAMPLE_QUERIES}