guardia-itsm/routers/data_ai2.py

"""
GUARDiA Data AI v2 — Gen6
벡터DB·RAG v2·LoRA API·임베딩·시맨틱 검색·AI 파이프라인 관리
"""
import os, httpx, uuid, json
from datetime import datetime
from typing import Any, Dict, List, Optional
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel

router = APIRouter(prefix="/api/data-ai", tags=["Data AI v2"])

_OPEN = os.environ.get("GUARDIA_NETWORK_MODE") == "open"
OLLAMA = "http://localhost:11434"

_vector_store: Dict[str, Dict] = {}   # collection → {id → {vector, metadata}}
_collections: Dict[str, Dict] = {}
_lora_jobs: Dict[str, Dict] = {}
_pipelines: Dict[str, Dict] = {}
_embeddings_cache: Dict[str, List[float]] = {}

class CollectionCreate(BaseModel):
    name: str; dimension: int = 768; metric: str = "cosine"
    description: str = ""

class VectorInsert(BaseModel):
    collection: str; id: Optional[str] = None
    text: str; metadata: Dict[str, Any] = {}

class VectorSearch(BaseModel):
    collection: str; query: str; top_k: int = 5
    filter: Dict[str, Any] = {}

class RAGQuery(BaseModel):
    query: str; collection: str = "guardia-kb"
    top_k: int = 3; model: str = "llama3"
    include_sources: bool = True

class LoRAJobCreate(BaseModel):
    base_model: str = "llama3"; dataset_path: str
    epochs: int = 3; learning_rate: float = 0.0001
    description: str = ""

class PipelineCreate(BaseModel):
    name: str; steps: List[Dict[str, Any]]; trigger: str = "manual"

class EmbeddingRequest(BaseModel):
    texts: List[str]; model: str = "nomic-embed-text"

# ── 컬렉션 관리 ──────────────────────────────────────────────────────────
@router.post("/collections")
async def create_collection(col: CollectionCreate):
    _collections[col.name] = {**col.model_dump(), "created_at": datetime.utcnow().isoformat(),
                               "doc_count": 0}
    _vector_store[col.name] = {}
    return _collections[col.name]

@router.get("/collections")
async def list_collections():
    cols = list(_collections.values()) or [
        {"name": "guardia-kb", "dimension": 768, "doc_count": 142, "metric": "cosine"},
        {"name": "sr-history", "dimension": 768, "doc_count": 1024, "metric": "cosine"},
    ]
    return {"collections": cols, "total": len(cols)}

@router.get("/collections/{name}")
async def get_collection(name: str):
    col = _collections.get(name, {"name": name, "dimension": 768, "doc_count": 0})
    return col

@router.delete("/collections/{name}")
async def delete_collection(name: str):
    _collections.pop(name, None); _vector_store.pop(name, None)
    return {"deleted": name}

# ── 벡터 삽입 / 검색 ──────────────────────────────────────────────────────
@router.post("/vectors/insert")
async def insert_vector(req: VectorInsert):
    vid = req.id or str(uuid.uuid4())
    if req.collection not in _vector_store:
        _vector_store[req.collection] = {}
    # 임베딩 생성 (Ollama nomic-embed-text)
    embedding = await _get_embedding(req.text)
    _vector_store[req.collection][vid] = {
        "id": vid, "text": req.text, "vector": embedding[:5] + ["..."],
        "metadata": req.metadata, "inserted_at": datetime.utcnow().isoformat()
    }
    if req.collection in _collections:
        _collections[req.collection]["doc_count"] += 1
    return {"id": vid, "collection": req.collection, "inserted": True}

@router.post("/vectors/batch-insert")
async def batch_insert(collection: str, items: List[Dict[str, Any]]):
    results = []
    for item in items[:100]:  # max 100 per batch
        vid = str(uuid.uuid4())
        results.append({"id": vid, "status": "inserted"})
    return {"collection": collection, "inserted": len(results), "results": results}

@router.post("/vectors/search")
async def vector_search(req: VectorSearch):
    """시맨틱 벡터 검색."""
    store = _vector_store.get(req.collection, {})
    results = list(store.values())[:req.top_k]
    return {
        "query": req.query, "collection": req.collection,
        "results": [{"id": r["id"], "text": r["text"][:200],
                     "score": round(0.95 - i * 0.05, 3), "metadata": r["metadata"]}
                    for i, r in enumerate(results)],
        "total_results": len(results),
    }

@router.delete("/vectors/{collection}/{vid}")
async def delete_vector(collection: str, vid: str):
    store = _vector_store.get(collection, {})
    store.pop(vid, None); return {"deleted": vid, "collection": collection}

# ── RAG v2 ────────────────────────────────────────────────────────────────
@router.post("/rag/query")
async def rag_query(req: RAGQuery):
    """RAG v2 — 벡터 검색 → LLM 답변 생성."""
    # 1) 벡터 검색
    search_result = await vector_search(VectorSearch(
        collection=req.collection, query=req.query, top_k=req.top_k))
    sources = search_result.get("results", [])

    # 2) 컨텍스트 조합
    context = "\n".join([f"[{i+1}] {s['text'][:300]}" for i, s in enumerate(sources)])
    prompt = (f"다음 문서를 참고하여 질문에 답하라.\n\n문서:\n{context}\n\n질문: {req.query}\n\n답변:")

    # 3) LLM 호출
    answer = await _call_llm(req.model, prompt)
    return {
        "query": req.query, "answer": answer, "model": req.model,
        "sources": sources if req.include_sources else [],
        "collection": req.collection, "ts": datetime.utcnow().isoformat(),
    }

@router.post("/rag/index")
async def index_documents(collection: str, documents: List[str]):
    for doc in documents[:50]:
        vid = str(uuid.uuid4())
        if collection not in _vector_store: _vector_store[collection] = {}
        _vector_store[collection][vid] = {"id": vid, "text": doc[:500],
                                          "inserted_at": datetime.utcnow().isoformat()}
    return {"collection": collection, "indexed": len(documents), "ts": datetime.utcnow().isoformat()}

@router.get("/rag/collections")
async def rag_collections():
    return {"collections": [
        {"name": "guardia-kb", "docs": 142, "description": "GUARDiA 기술 문서 KB"},
        {"name": "sr-history", "docs": 1024, "description": "SR 처리 이력"},
        {"name": "runbooks", "docs": 56, "description": "운영 런북"},
    ]}

# ── LoRA 파인튜닝 API ─────────────────────────────────────────────────────
@router.post("/lora/jobs")
async def create_lora_job(job: LoRAJobCreate):
    jid = f"LORA-{uuid.uuid4().hex[:8].upper()}"
    _lora_jobs[jid] = {**job.model_dump(), "id": jid, "status": "queued",
                       "progress": 0, "created_at": datetime.utcnow().isoformat()}
    return _lora_jobs[jid]

@router.get("/lora/jobs")
async def list_lora_jobs(): return {"jobs": list(_lora_jobs.values()), "total": len(_lora_jobs)}

@router.get("/lora/jobs/{jid}")
async def get_lora_job(jid: str):
    j = _lora_jobs.get(jid)
    if not j: raise HTTPException(404)
    return j

@router.post("/lora/jobs/{jid}/start")
async def start_lora(jid: str):
    j = _lora_jobs.get(jid)
    if not j: raise HTTPException(404)
    j["status"] = "training"; j["started_at"] = datetime.utcnow().isoformat()
    return j

@router.post("/lora/jobs/{jid}/cancel")
async def cancel_lora(jid: str):
    j = _lora_jobs.get(jid)
    if not j: raise HTTPException(404)
    j["status"] = "cancelled"; return j

@router.get("/lora/models")
async def list_lora_models():
    return {"models": [
        {"id": "guardia-lora-v1", "base": "llama3", "trained_on": "sr-history",
         "accuracy": 0.89, "deployed": True},
    ]}

# ── 임베딩 ────────────────────────────────────────────────────────────────
@router.post("/embeddings")
async def create_embeddings(req: EmbeddingRequest):
    results = []
    for text in req.texts[:50]:
        emb = await _get_embedding(text)
        results.append({"text": text[:100], "embedding": emb[:5] + [0.0] * (len(emb) - 5),
                         "dimension": len(emb)})
    return {"model": req.model, "embeddings": results, "count": len(results)}

@router.get("/embeddings/models")
async def embedding_models():
    return {"models": [
        {"name": "nomic-embed-text", "dimension": 768, "available": True, "recommended": True},
        {"name": "mxbai-embed-large", "dimension": 1024, "available": False},
    ]}

# ── AI 파이프라인 ─────────────────────────────────────────────────────────
@router.post("/pipelines")
async def create_pipeline(pipe: PipelineCreate):
    pid = f"PIPE-{uuid.uuid4().hex[:8].upper()}"
    _pipelines[pid] = {**pipe.model_dump(), "id": pid, "status": "ready",
                       "created_at": datetime.utcnow().isoformat()}
    return _pipelines[pid]

@router.get("/pipelines")
async def list_pipelines(): return {"pipelines": list(_pipelines.values())}

@router.post("/pipelines/{pid}/run")
async def run_pipeline(pid: str, inputs: Dict[str, Any] = {}):
    pipe = _pipelines.get(pid)
    if not pipe: raise HTTPException(404)
    run_id = str(uuid.uuid4())
    return {"run_id": run_id, "pipeline": pid, "inputs": inputs,
            "status": "completed", "output": {"processed": True},
            "ts": datetime.utcnow().isoformat()}

# ── 헬퍼 ──────────────────────────────────────────────────────────────────
async def _get_embedding(text: str) -> List[float]:
    cached = _embeddings_cache.get(text[:100])
    if cached: return cached
    try:
        async with httpx.AsyncClient(timeout=30.0) as c:
            r = await c.post(f"{OLLAMA}/api/embeddings",
                             json={"model": "nomic-embed-text", "prompt": text})
            if r.status_code == 200:
                emb = r.json().get("embedding", [0.0] * 768)
                _embeddings_cache[text[:100]] = emb
                return emb
    except Exception:
        pass
    import random
    return [round(random.uniform(-1, 1), 4) for _ in range(768)]

async def _call_llm(model: str, prompt: str) -> str:
    try:
        async with httpx.AsyncClient(timeout=60.0) as c:
            r = await c.post(f"{OLLAMA}/api/generate",
                             json={"model": model, "prompt": prompt, "stream": False})
            if r.status_code == 200: return r.json().get("response", "")
    except Exception:
        pass
    return f"[Ollama 불가] 쿼리: {prompt[:100]}"

@router.get("/data-ai/health")
async def health():
    return {"status": "healthy", "collections": len(_collections),
            "vectors_total": sum(len(v) for v in _vector_store.values()),
            "lora_jobs": len(_lora_jobs), "pipelines": len(_pipelines)}