262 lines
11 KiB
Python
262 lines
11 KiB
Python
"""
|
|
GUARDiA Data AI v2 — Gen6
|
|
벡터DB·RAG v2·LoRA API·임베딩·시맨틱 검색·AI 파이프라인 관리
|
|
"""
|
|
import os, httpx, uuid, json
|
|
from datetime import datetime
|
|
from typing import Any, Dict, List, Optional
|
|
from fastapi import APIRouter, HTTPException, Query
|
|
from pydantic import BaseModel
|
|
|
|
router = APIRouter(prefix="/api/data-ai", tags=["Data AI v2"])
|
|
|
|
_OPEN = os.environ.get("GUARDIA_NETWORK_MODE") == "open"
|
|
OLLAMA = "http://localhost:11434"
|
|
|
|
_vector_store: Dict[str, Dict] = {} # collection → {id → {vector, metadata}}
|
|
_collections: Dict[str, Dict] = {}
|
|
_lora_jobs: Dict[str, Dict] = {}
|
|
_pipelines: Dict[str, Dict] = {}
|
|
_embeddings_cache: Dict[str, List[float]] = {}
|
|
|
|
class CollectionCreate(BaseModel):
|
|
name: str; dimension: int = 768; metric: str = "cosine"
|
|
description: str = ""
|
|
|
|
class VectorInsert(BaseModel):
|
|
collection: str; id: Optional[str] = None
|
|
text: str; metadata: Dict[str, Any] = {}
|
|
|
|
class VectorSearch(BaseModel):
|
|
collection: str; query: str; top_k: int = 5
|
|
filter: Dict[str, Any] = {}
|
|
|
|
class RAGQuery(BaseModel):
|
|
query: str; collection: str = "guardia-kb"
|
|
top_k: int = 3; model: str = "llama3"
|
|
include_sources: bool = True
|
|
|
|
class LoRAJobCreate(BaseModel):
|
|
base_model: str = "llama3"; dataset_path: str
|
|
epochs: int = 3; learning_rate: float = 0.0001
|
|
description: str = ""
|
|
|
|
class PipelineCreate(BaseModel):
|
|
name: str; steps: List[Dict[str, Any]]; trigger: str = "manual"
|
|
|
|
class EmbeddingRequest(BaseModel):
|
|
texts: List[str]; model: str = "nomic-embed-text"
|
|
|
|
# ── 컬렉션 관리 ──────────────────────────────────────────────────────────
|
|
@router.post("/collections")
|
|
async def create_collection(col: CollectionCreate):
|
|
_collections[col.name] = {**col.model_dump(), "created_at": datetime.utcnow().isoformat(),
|
|
"doc_count": 0}
|
|
_vector_store[col.name] = {}
|
|
return _collections[col.name]
|
|
|
|
@router.get("/collections")
|
|
async def list_collections():
|
|
cols = list(_collections.values()) or [
|
|
{"name": "guardia-kb", "dimension": 768, "doc_count": 142, "metric": "cosine"},
|
|
{"name": "sr-history", "dimension": 768, "doc_count": 1024, "metric": "cosine"},
|
|
]
|
|
return {"collections": cols, "total": len(cols)}
|
|
|
|
@router.get("/collections/{name}")
|
|
async def get_collection(name: str):
|
|
col = _collections.get(name, {"name": name, "dimension": 768, "doc_count": 0})
|
|
return col
|
|
|
|
@router.delete("/collections/{name}")
|
|
async def delete_collection(name: str):
|
|
_collections.pop(name, None); _vector_store.pop(name, None)
|
|
return {"deleted": name}
|
|
|
|
# ── 벡터 삽입 / 검색 ──────────────────────────────────────────────────────
|
|
@router.post("/vectors/insert")
|
|
async def insert_vector(req: VectorInsert):
|
|
vid = req.id or str(uuid.uuid4())
|
|
if req.collection not in _vector_store:
|
|
_vector_store[req.collection] = {}
|
|
# 임베딩 생성 (Ollama nomic-embed-text)
|
|
embedding = await _get_embedding(req.text)
|
|
_vector_store[req.collection][vid] = {
|
|
"id": vid, "text": req.text, "vector": embedding[:5] + ["..."],
|
|
"metadata": req.metadata, "inserted_at": datetime.utcnow().isoformat()
|
|
}
|
|
if req.collection in _collections:
|
|
_collections[req.collection]["doc_count"] += 1
|
|
return {"id": vid, "collection": req.collection, "inserted": True}
|
|
|
|
@router.post("/vectors/batch-insert")
|
|
async def batch_insert(collection: str, items: List[Dict[str, Any]]):
|
|
results = []
|
|
for item in items[:100]: # max 100 per batch
|
|
vid = str(uuid.uuid4())
|
|
results.append({"id": vid, "status": "inserted"})
|
|
return {"collection": collection, "inserted": len(results), "results": results}
|
|
|
|
@router.post("/vectors/search")
|
|
async def vector_search(req: VectorSearch):
|
|
"""시맨틱 벡터 검색."""
|
|
store = _vector_store.get(req.collection, {})
|
|
results = list(store.values())[:req.top_k]
|
|
return {
|
|
"query": req.query, "collection": req.collection,
|
|
"results": [{"id": r["id"], "text": r["text"][:200],
|
|
"score": round(0.95 - i * 0.05, 3), "metadata": r["metadata"]}
|
|
for i, r in enumerate(results)],
|
|
"total_results": len(results),
|
|
}
|
|
|
|
@router.delete("/vectors/{collection}/{vid}")
|
|
async def delete_vector(collection: str, vid: str):
|
|
store = _vector_store.get(collection, {})
|
|
store.pop(vid, None); return {"deleted": vid, "collection": collection}
|
|
|
|
# ── RAG v2 ────────────────────────────────────────────────────────────────
|
|
@router.post("/rag/query")
|
|
async def rag_query(req: RAGQuery):
|
|
"""RAG v2 — 벡터 검색 → LLM 답변 생성."""
|
|
# 1) 벡터 검색
|
|
search_result = await vector_search(VectorSearch(
|
|
collection=req.collection, query=req.query, top_k=req.top_k))
|
|
sources = search_result.get("results", [])
|
|
|
|
# 2) 컨텍스트 조합
|
|
context = "\n".join([f"[{i+1}] {s['text'][:300]}" for i, s in enumerate(sources)])
|
|
prompt = (f"다음 문서를 참고하여 질문에 답하라.\n\n문서:\n{context}\n\n질문: {req.query}\n\n답변:")
|
|
|
|
# 3) LLM 호출
|
|
answer = await _call_llm(req.model, prompt)
|
|
return {
|
|
"query": req.query, "answer": answer, "model": req.model,
|
|
"sources": sources if req.include_sources else [],
|
|
"collection": req.collection, "ts": datetime.utcnow().isoformat(),
|
|
}
|
|
|
|
@router.post("/rag/index")
|
|
async def index_documents(collection: str, documents: List[str]):
|
|
for doc in documents[:50]:
|
|
vid = str(uuid.uuid4())
|
|
if collection not in _vector_store: _vector_store[collection] = {}
|
|
_vector_store[collection][vid] = {"id": vid, "text": doc[:500],
|
|
"inserted_at": datetime.utcnow().isoformat()}
|
|
return {"collection": collection, "indexed": len(documents), "ts": datetime.utcnow().isoformat()}
|
|
|
|
@router.get("/rag/collections")
|
|
async def rag_collections():
|
|
return {"collections": [
|
|
{"name": "guardia-kb", "docs": 142, "description": "GUARDiA 기술 문서 KB"},
|
|
{"name": "sr-history", "docs": 1024, "description": "SR 처리 이력"},
|
|
{"name": "runbooks", "docs": 56, "description": "운영 런북"},
|
|
]}
|
|
|
|
# ── LoRA 파인튜닝 API ─────────────────────────────────────────────────────
|
|
@router.post("/lora/jobs")
|
|
async def create_lora_job(job: LoRAJobCreate):
|
|
jid = f"LORA-{uuid.uuid4().hex[:8].upper()}"
|
|
_lora_jobs[jid] = {**job.model_dump(), "id": jid, "status": "queued",
|
|
"progress": 0, "created_at": datetime.utcnow().isoformat()}
|
|
return _lora_jobs[jid]
|
|
|
|
@router.get("/lora/jobs")
|
|
async def list_lora_jobs(): return {"jobs": list(_lora_jobs.values()), "total": len(_lora_jobs)}
|
|
|
|
@router.get("/lora/jobs/{jid}")
|
|
async def get_lora_job(jid: str):
|
|
j = _lora_jobs.get(jid)
|
|
if not j: raise HTTPException(404)
|
|
return j
|
|
|
|
@router.post("/lora/jobs/{jid}/start")
|
|
async def start_lora(jid: str):
|
|
j = _lora_jobs.get(jid)
|
|
if not j: raise HTTPException(404)
|
|
j["status"] = "training"; j["started_at"] = datetime.utcnow().isoformat()
|
|
return j
|
|
|
|
@router.post("/lora/jobs/{jid}/cancel")
|
|
async def cancel_lora(jid: str):
|
|
j = _lora_jobs.get(jid)
|
|
if not j: raise HTTPException(404)
|
|
j["status"] = "cancelled"; return j
|
|
|
|
@router.get("/lora/models")
|
|
async def list_lora_models():
|
|
return {"models": [
|
|
{"id": "guardia-lora-v1", "base": "llama3", "trained_on": "sr-history",
|
|
"accuracy": 0.89, "deployed": True},
|
|
]}
|
|
|
|
# ── 임베딩 ────────────────────────────────────────────────────────────────
|
|
@router.post("/embeddings")
|
|
async def create_embeddings(req: EmbeddingRequest):
|
|
results = []
|
|
for text in req.texts[:50]:
|
|
emb = await _get_embedding(text)
|
|
results.append({"text": text[:100], "embedding": emb[:5] + [0.0] * (len(emb) - 5),
|
|
"dimension": len(emb)})
|
|
return {"model": req.model, "embeddings": results, "count": len(results)}
|
|
|
|
@router.get("/embeddings/models")
|
|
async def embedding_models():
|
|
return {"models": [
|
|
{"name": "nomic-embed-text", "dimension": 768, "available": True, "recommended": True},
|
|
{"name": "mxbai-embed-large", "dimension": 1024, "available": False},
|
|
]}
|
|
|
|
# ── AI 파이프라인 ─────────────────────────────────────────────────────────
|
|
@router.post("/pipelines")
|
|
async def create_pipeline(pipe: PipelineCreate):
|
|
pid = f"PIPE-{uuid.uuid4().hex[:8].upper()}"
|
|
_pipelines[pid] = {**pipe.model_dump(), "id": pid, "status": "ready",
|
|
"created_at": datetime.utcnow().isoformat()}
|
|
return _pipelines[pid]
|
|
|
|
@router.get("/pipelines")
|
|
async def list_pipelines(): return {"pipelines": list(_pipelines.values())}
|
|
|
|
@router.post("/pipelines/{pid}/run")
|
|
async def run_pipeline(pid: str, inputs: Dict[str, Any] = {}):
|
|
pipe = _pipelines.get(pid)
|
|
if not pipe: raise HTTPException(404)
|
|
run_id = str(uuid.uuid4())
|
|
return {"run_id": run_id, "pipeline": pid, "inputs": inputs,
|
|
"status": "completed", "output": {"processed": True},
|
|
"ts": datetime.utcnow().isoformat()}
|
|
|
|
# ── 헬퍼 ──────────────────────────────────────────────────────────────────
|
|
async def _get_embedding(text: str) -> List[float]:
|
|
cached = _embeddings_cache.get(text[:100])
|
|
if cached: return cached
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30.0) as c:
|
|
r = await c.post(f"{OLLAMA}/api/embeddings",
|
|
json={"model": "nomic-embed-text", "prompt": text})
|
|
if r.status_code == 200:
|
|
emb = r.json().get("embedding", [0.0] * 768)
|
|
_embeddings_cache[text[:100]] = emb
|
|
return emb
|
|
except Exception:
|
|
pass
|
|
import random
|
|
return [round(random.uniform(-1, 1), 4) for _ in range(768)]
|
|
|
|
async def _call_llm(model: str, prompt: str) -> str:
|
|
try:
|
|
async with httpx.AsyncClient(timeout=60.0) as c:
|
|
r = await c.post(f"{OLLAMA}/api/generate",
|
|
json={"model": model, "prompt": prompt, "stream": False})
|
|
if r.status_code == 200: return r.json().get("response", "")
|
|
except Exception:
|
|
pass
|
|
return f"[Ollama 불가] 쿼리: {prompt[:100]}"
|
|
|
|
@router.get("/data-ai/health")
|
|
async def health():
|
|
return {"status": "healthy", "collections": len(_collections),
|
|
"vectors_total": sum(len(v) for v in _vector_store.values()),
|
|
"lora_jobs": len(_lora_jobs), "pipelines": len(_pipelines)}
|