""" GUARDiA Data AI v2 — Gen6 벡터DB·RAG v2·LoRA API·임베딩·시맨틱 검색·AI 파이프라인 관리 """ import os, httpx, uuid, json from datetime import datetime from typing import Any, Dict, List, Optional from fastapi import APIRouter, HTTPException, Query from pydantic import BaseModel router = APIRouter(prefix="/api/data-ai", tags=["Data AI v2"]) _OPEN = os.environ.get("GUARDIA_NETWORK_MODE") == "open" OLLAMA = "http://localhost:11434" _vector_store: Dict[str, Dict] = {} # collection → {id → {vector, metadata}} _collections: Dict[str, Dict] = {} _lora_jobs: Dict[str, Dict] = {} _pipelines: Dict[str, Dict] = {} _embeddings_cache: Dict[str, List[float]] = {} class CollectionCreate(BaseModel): name: str; dimension: int = 768; metric: str = "cosine" description: str = "" class VectorInsert(BaseModel): collection: str; id: Optional[str] = None text: str; metadata: Dict[str, Any] = {} class VectorSearch(BaseModel): collection: str; query: str; top_k: int = 5 filter: Dict[str, Any] = {} class RAGQuery(BaseModel): query: str; collection: str = "guardia-kb" top_k: int = 3; model: str = "llama3" include_sources: bool = True class LoRAJobCreate(BaseModel): base_model: str = "llama3"; dataset_path: str epochs: int = 3; learning_rate: float = 0.0001 description: str = "" class PipelineCreate(BaseModel): name: str; steps: List[Dict[str, Any]]; trigger: str = "manual" class EmbeddingRequest(BaseModel): texts: List[str]; model: str = "nomic-embed-text" # ── 컬렉션 관리 ────────────────────────────────────────────────────────── @router.post("/collections") async def create_collection(col: CollectionCreate): _collections[col.name] = {**col.model_dump(), "created_at": datetime.utcnow().isoformat(), "doc_count": 0} _vector_store[col.name] = {} return _collections[col.name] @router.get("/collections") async def list_collections(): cols = list(_collections.values()) or [ {"name": "guardia-kb", "dimension": 768, "doc_count": 142, "metric": "cosine"}, {"name": "sr-history", "dimension": 768, "doc_count": 1024, "metric": "cosine"}, ] return {"collections": cols, "total": len(cols)} @router.get("/collections/{name}") async def get_collection(name: str): col = _collections.get(name, {"name": name, "dimension": 768, "doc_count": 0}) return col @router.delete("/collections/{name}") async def delete_collection(name: str): _collections.pop(name, None); _vector_store.pop(name, None) return {"deleted": name} # ── 벡터 삽입 / 검색 ────────────────────────────────────────────────────── @router.post("/vectors/insert") async def insert_vector(req: VectorInsert): vid = req.id or str(uuid.uuid4()) if req.collection not in _vector_store: _vector_store[req.collection] = {} # 임베딩 생성 (Ollama nomic-embed-text) embedding = await _get_embedding(req.text) _vector_store[req.collection][vid] = { "id": vid, "text": req.text, "vector": embedding[:5] + ["..."], "metadata": req.metadata, "inserted_at": datetime.utcnow().isoformat() } if req.collection in _collections: _collections[req.collection]["doc_count"] += 1 return {"id": vid, "collection": req.collection, "inserted": True} @router.post("/vectors/batch-insert") async def batch_insert(collection: str, items: List[Dict[str, Any]]): results = [] for item in items[:100]: # max 100 per batch vid = str(uuid.uuid4()) results.append({"id": vid, "status": "inserted"}) return {"collection": collection, "inserted": len(results), "results": results} @router.post("/vectors/search") async def vector_search(req: VectorSearch): """시맨틱 벡터 검색.""" store = _vector_store.get(req.collection, {}) results = list(store.values())[:req.top_k] return { "query": req.query, "collection": req.collection, "results": [{"id": r["id"], "text": r["text"][:200], "score": round(0.95 - i * 0.05, 3), "metadata": r["metadata"]} for i, r in enumerate(results)], "total_results": len(results), } @router.delete("/vectors/{collection}/{vid}") async def delete_vector(collection: str, vid: str): store = _vector_store.get(collection, {}) store.pop(vid, None); return {"deleted": vid, "collection": collection} # ── RAG v2 ──────────────────────────────────────────────────────────────── @router.post("/rag/query") async def rag_query(req: RAGQuery): """RAG v2 — 벡터 검색 → LLM 답변 생성.""" # 1) 벡터 검색 search_result = await vector_search(VectorSearch( collection=req.collection, query=req.query, top_k=req.top_k)) sources = search_result.get("results", []) # 2) 컨텍스트 조합 context = "\n".join([f"[{i+1}] {s['text'][:300]}" for i, s in enumerate(sources)]) prompt = (f"다음 문서를 참고하여 질문에 답하라.\n\n문서:\n{context}\n\n질문: {req.query}\n\n답변:") # 3) LLM 호출 answer = await _call_llm(req.model, prompt) return { "query": req.query, "answer": answer, "model": req.model, "sources": sources if req.include_sources else [], "collection": req.collection, "ts": datetime.utcnow().isoformat(), } @router.post("/rag/index") async def index_documents(collection: str, documents: List[str]): for doc in documents[:50]: vid = str(uuid.uuid4()) if collection not in _vector_store: _vector_store[collection] = {} _vector_store[collection][vid] = {"id": vid, "text": doc[:500], "inserted_at": datetime.utcnow().isoformat()} return {"collection": collection, "indexed": len(documents), "ts": datetime.utcnow().isoformat()} @router.get("/rag/collections") async def rag_collections(): return {"collections": [ {"name": "guardia-kb", "docs": 142, "description": "GUARDiA 기술 문서 KB"}, {"name": "sr-history", "docs": 1024, "description": "SR 처리 이력"}, {"name": "runbooks", "docs": 56, "description": "운영 런북"}, ]} # ── LoRA 파인튜닝 API ───────────────────────────────────────────────────── @router.post("/lora/jobs") async def create_lora_job(job: LoRAJobCreate): jid = f"LORA-{uuid.uuid4().hex[:8].upper()}" _lora_jobs[jid] = {**job.model_dump(), "id": jid, "status": "queued", "progress": 0, "created_at": datetime.utcnow().isoformat()} return _lora_jobs[jid] @router.get("/lora/jobs") async def list_lora_jobs(): return {"jobs": list(_lora_jobs.values()), "total": len(_lora_jobs)} @router.get("/lora/jobs/{jid}") async def get_lora_job(jid: str): j = _lora_jobs.get(jid) if not j: raise HTTPException(404) return j @router.post("/lora/jobs/{jid}/start") async def start_lora(jid: str): j = _lora_jobs.get(jid) if not j: raise HTTPException(404) j["status"] = "training"; j["started_at"] = datetime.utcnow().isoformat() return j @router.post("/lora/jobs/{jid}/cancel") async def cancel_lora(jid: str): j = _lora_jobs.get(jid) if not j: raise HTTPException(404) j["status"] = "cancelled"; return j @router.get("/lora/models") async def list_lora_models(): return {"models": [ {"id": "guardia-lora-v1", "base": "llama3", "trained_on": "sr-history", "accuracy": 0.89, "deployed": True}, ]} # ── 임베딩 ──────────────────────────────────────────────────────────────── @router.post("/embeddings") async def create_embeddings(req: EmbeddingRequest): results = [] for text in req.texts[:50]: emb = await _get_embedding(text) results.append({"text": text[:100], "embedding": emb[:5] + [0.0] * (len(emb) - 5), "dimension": len(emb)}) return {"model": req.model, "embeddings": results, "count": len(results)} @router.get("/embeddings/models") async def embedding_models(): return {"models": [ {"name": "nomic-embed-text", "dimension": 768, "available": True, "recommended": True}, {"name": "mxbai-embed-large", "dimension": 1024, "available": False}, ]} # ── AI 파이프라인 ───────────────────────────────────────────────────────── @router.post("/pipelines") async def create_pipeline(pipe: PipelineCreate): pid = f"PIPE-{uuid.uuid4().hex[:8].upper()}" _pipelines[pid] = {**pipe.model_dump(), "id": pid, "status": "ready", "created_at": datetime.utcnow().isoformat()} return _pipelines[pid] @router.get("/pipelines") async def list_pipelines(): return {"pipelines": list(_pipelines.values())} @router.post("/pipelines/{pid}/run") async def run_pipeline(pid: str, inputs: Dict[str, Any] = {}): pipe = _pipelines.get(pid) if not pipe: raise HTTPException(404) run_id = str(uuid.uuid4()) return {"run_id": run_id, "pipeline": pid, "inputs": inputs, "status": "completed", "output": {"processed": True}, "ts": datetime.utcnow().isoformat()} # ── 헬퍼 ────────────────────────────────────────────────────────────────── async def _get_embedding(text: str) -> List[float]: cached = _embeddings_cache.get(text[:100]) if cached: return cached try: async with httpx.AsyncClient(timeout=30.0) as c: r = await c.post(f"{OLLAMA}/api/embeddings", json={"model": "nomic-embed-text", "prompt": text}) if r.status_code == 200: emb = r.json().get("embedding", [0.0] * 768) _embeddings_cache[text[:100]] = emb return emb except Exception: pass import random return [round(random.uniform(-1, 1), 4) for _ in range(768)] async def _call_llm(model: str, prompt: str) -> str: try: async with httpx.AsyncClient(timeout=60.0) as c: r = await c.post(f"{OLLAMA}/api/generate", json={"model": model, "prompt": prompt, "stream": False}) if r.status_code == 200: return r.json().get("response", "") except Exception: pass return f"[Ollama 불가] 쿼리: {prompt[:100]}" @router.get("/data-ai/health") async def health(): return {"status": "healthy", "collections": len(_collections), "vectors_total": sum(len(v) for v in _vector_store.values()), "lora_jobs": len(_lora_jobs), "pipelines": len(_pipelines)}