diff --git a/workspace/guardia-itsm/main.py b/workspace/guardia-itsm/main.py index cf204c8f..11be53c3 100644 --- a/workspace/guardia-itsm/main.py +++ b/workspace/guardia-itsm/main.py @@ -377,6 +377,12 @@ app.include_router(network_zone.router) # 행정망/인터넷망 분리 app.include_router(k_cloud.router) # K-Cloud 공공 클라우드 전환 app.include_router(e_procurement.router) # 전자조달 계약·검수·납품 +# ── Upstage OCR 연동 (2026-06-02) ──────────────────────────────────────────── +from routers import upstage_ocr, doc_workflow, doc_template +app.include_router(upstage_ocr.router) # Upstage Document AI OCR 엔진 +app.include_router(doc_workflow.router) # 문서 워크플로우 (계약서/납품서/청구서 등) +app.include_router(doc_template.router) # 문서 추출 템플릿 관리 + # ── 개방망 보안 헤더 미들웨어 ──────────────────────────────────────────────── @app.middleware("http") diff --git a/workspace/guardia-itsm/models.py b/workspace/guardia-itsm/models.py index b3c0a0ce..313df0e9 100644 --- a/workspace/guardia-itsm/models.py +++ b/workspace/guardia-itsm/models.py @@ -5372,3 +5372,71 @@ class ProcurementRecord(Base): inspection_date = Column(DateTime, nullable=True) inspection_by = Column(String(100), nullable=True) created_at = Column(DateTime, default=func.now()) + + +# ══════════════════════════════════════════════════════════════════════════════ +# ── Upstage OCR 연동 모델 +# ══════════════════════════════════════════════════════════════════════════════ + +class UpstageOCRConfig(Base): + """Upstage Document AI API 설정.""" + __tablename__ = "tb_upstage_ocr_config" + tenant_id = Column(Integer, primary_key=True, index=True) + api_key_enc = Column(Text, nullable=False) # AES-256-GCM 암호화 + model = Column(String(50), default="document-parse") + daily_limit = Column(Integer, default=1000) + is_active = Column(Boolean, default=True) + created_at = Column(DateTime, default=func.now()) + updated_at = Column(DateTime, default=func.now(), onupdate=func.now()) + + +class OCRHistory(Base): + """OCR 처리 이력.""" + __tablename__ = "tb_ocr_history" + id = Column(Integer, primary_key=True, index=True) + tenant_id = Column(Integer, nullable=False, index=True) + filename = Column(String(300), nullable=False) + file_size = Column(Integer, default=0) + ocr_type = Column(String(30), nullable=False) # PARSE | EXTRACT | QA + schema_used = Column(Text, nullable=True) + result_json = Column(Text, nullable=True) # 결과 요약 (최대 5000자) + linked_to = Column(String(50), nullable=True) + linked_id = Column(Integer, nullable=True) + pages = Column(Integer, default=1) + tokens_used = Column(Integer, default=0) + status = Column(String(20), default="SUCCESS") + created_by = Column(Integer, ForeignKey("tb_user.id"), nullable=True) + created_at = Column(DateTime, default=func.now()) + + +class DocWorkflowJob(Base): + """문서 워크플로우 작업 이력.""" + __tablename__ = "tb_doc_workflow_job" + id = Column(Integer, primary_key=True, index=True) + tenant_id = Column(Integer, nullable=False, index=True) + workflow_type = Column(String(50), nullable=False) + filename = Column(String(300), nullable=True) + template_id = Column(Integer, nullable=True) + status = Column(String(20), default="PROCESSING") + extracted_data = Column(JSON, nullable=True) + linked_table = Column(String(50), nullable=True) + linked_record_id = Column(Integer, nullable=True) + error_message = Column(Text, nullable=True) + created_by = Column(Integer, ForeignKey("tb_user.id"), nullable=True) + created_at = Column(DateTime, default=func.now()) + completed_at = Column(DateTime, nullable=True) + + +class DocTemplate(Base): + """문서 추출 템플릿.""" + __tablename__ = "tb_doc_template" + id = Column(Integer, primary_key=True, index=True) + tenant_id = Column(Integer, nullable=False, index=True) + name = Column(String(200), nullable=False) + description = Column(Text, nullable=True) + schema_json = Column(Text, nullable=False) + workflow = Column(String(50), nullable=True) + builtin_key = Column(String(100), nullable=True) + is_builtin = Column(Boolean, default=False) + is_active = Column(Boolean, default=True) + created_at = Column(DateTime, default=func.now()) diff --git a/workspace/guardia-itsm/routers/doc_template.py b/workspace/guardia-itsm/routers/doc_template.py new file mode 100644 index 00000000..2240f63a --- /dev/null +++ b/workspace/guardia-itsm/routers/doc_template.py @@ -0,0 +1,341 @@ +""" +문서 추출 템플릿 관리 + +내장 7종 + 커스텀 템플릿 CRUD. + +엔드포인트: + GET /api/doctemplate/ — 템플릿 목록 + POST /api/doctemplate/ — 커스텀 템플릿 생성 + GET /api/doctemplate/{id} — 템플릿 상세 + PUT /api/doctemplate/{id} — 수정 + DELETE /api/doctemplate/{id} — 삭제 + GET /api/doctemplate/builtin — 내장 템플릿 목록 + POST /api/doctemplate/apply-builtin — 내장 템플릿 테넌트 적용 +""" +from __future__ import annotations + +import json +from datetime import datetime +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel, Field +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from core.auth import get_current_user, require_admin_role +from database import get_db +from models import User, DocTemplate + +router = APIRouter(prefix="/api/doctemplate", tags=["문서 템플릿"]) + +BUILTIN_TEMPLATES = { + "narasajang_contract": { + "name": "나라장터 계약서", + "description": "조달청 나라장터 계약서에서 계약정보를 자동 추출", + "workflow": "contract", + "schema": { + "contract_no": "계약번호", + "contract_name": "계약품명/서비스명", + "supplier": "공급사명", + "supplier_biz_no":"공급사 사업자번호", + "amount": "계약금액(원)", + "vat": "부가세액", + "start_date": "계약시작일(YYYY-MM-DD)", + "end_date": "계약종료일(YYYY-MM-DD)", + "institution": "발주기관명", + "manager": "담당자명", + "payment_terms": "납부/지급 조건", + } + }, + "server_delivery": { + "name": "서버 납품 명세서", + "description": "서버·장비 납품명세서에서 사양을 추출하여 CMDB에 자동 등록", + "workflow": "server_spec", + "schema": { + "hostname": "호스트명/서버명", + "manufacturer": "제조사", + "model_no": "모델번호", + "serial_no": "시리얼번호", + "cpu_model": "CPU 모델명", + "cpu_cores": "CPU 코어 수", + "memory_gb": "메모리 용량(GB)", + "disk_config": "스토리지 구성(예: SSD 1TB×2)", + "os": "운영체제", + "ip_addr": "IP주소", + "rack_location": "랙/위치", + "warranty_until": "보증기간 만료일", + "delivery_date": "납품일", + } + }, + "brand_contract": { + "name": "기업 브랜드 계약서", + "description": "현대백화점·롯데 등 유통/브랜드 계약서 자동 처리", + "workflow": "brand_contract", + "schema": { + "contract_title": "계약서 제목", + "party_a": "갑(발주사/브랜드사)", + "party_a_biz_no": "갑 사업자번호", + "party_b": "을(수주사/입점사)", + "party_b_biz_no": "을 사업자번호", + "contract_amount": "계약금액", + "currency": "통화(KRW/USD/기타)", + "effective_date": "계약체결일", + "expiry_date": "계약만료일", + "auto_renewal": "자동갱신여부(Y/N)", + "payment_terms": "대금 지급조건", + "contract_items": "계약 품목/서비스", + "royalty_rate": "수수료율/로열티율", + "territory": "적용지역/매장", + "exclusive": "독점여부(Y/N)", + "termination": "계약 해지 조건", + "penalty_clause": "위약금 조항", + "contact_a": "갑 담당자", + "contact_b": "을 담당자", + "special_terms": "특약사항", + } + }, + "invoice": { + "name": "세금계산서/청구서", + "description": "세금계산서·청구서에서 금액·공급자 정보 자동 추출", + "workflow": "invoice", + "schema": { + "invoice_no": "세금계산서번호/청구번호", + "issue_date": "발행일", + "supplier_name": "공급자 상호", + "supplier_biz_no": "공급자 사업자번호", + "buyer_name": "공급받는자 상호", + "buyer_biz_no": "공급받는자 사업자번호", + "supply_amount": "공급가액", + "vat_amount": "세액", + "total_amount": "합계금액", + "items": "품목/내역(쉼표 구분)", + "payment_due": "결제기한", + } + }, + "incident_report": { + "name": "장애 보고서", + "description": "장애보고서 이미지/PDF에서 에러 내용 추출 → SR 자동 생성", + "workflow": "incident_report", + "schema": { + "incident_date": "발생일시", + "incident_type": "장애유형(H/W·S/W·네트워크·기타)", + "affected_system": "영향 시스템/서비스", + "error_message": "오류 메시지/에러코드", + "root_cause": "근본원인", + "impact_scope": "영향 범위(사용자 수/서비스)", + "resolution": "조치사항", + "downtime_minutes": "다운타임(분)", + "reporter": "보고자/담당자", + "severity": "심각도(P1/P2/P3/P4)", + } + }, + "csap_report": { + "name": "CSAP/ISMS 점검 보고서", + "description": "공공기관 보안 점검 보고서 자동 분석 → CSAP 준수율 업데이트", + "workflow": "audit_report", + "schema": { + "institution": "기관명", + "check_date": "점검일", + "auditor": "점검자/감사기관", + "total_items": "총 점검항목 수", + "passed_items": "적합(통과) 항목 수", + "failed_items": "부적합 항목 수", + "na_items": "해당없음 항목 수", + "compliance_rate": "준수율(%)", + "major_findings": "주요 발견사항", + "recommendations": "권고사항", + "next_check_date": "차기 점검 예정일", + } + }, + "meeting_minutes": { + "name": "회의록", + "description": "회의록에서 결정사항·액션아이템 자동 추출 → SR/작업 생성", + "workflow": "meeting_minutes", + "schema": { + "meeting_date": "회의일시", + "meeting_place": "장소(오프라인/화상)", + "chairman": "의장/주관자", + "participants": "참석자 목록", + "agenda": "회의 안건", + "decisions": "결정사항(쉼표 구분)", + "action_items": "액션아이템(담당자/기한 포함)", + "next_meeting": "차기 회의 일정", + "notes": "기타 특이사항", + } + }, +} + + +class TemplateCreate(BaseModel): + name: str = Field(..., max_length=200) + description: Optional[str] = None + schema_json: dict = Field(..., description="추출 스키마 {필드명: 설명}") + workflow: Optional[str] = Field(None, description="연동 워크플로우") + + +class ApplyBuiltinRequest(BaseModel): + template_keys: list[str] + + +@router.get("/builtin") +async def list_builtin_templates(_: User = Depends(get_current_user)): + return [ + { + "key": k, + "name": v["name"], + "description": v["description"], + "workflow": v["workflow"], + "field_count": len(v["schema"]), + "fields": list(v["schema"].keys()), + } + for k, v in BUILTIN_TEMPLATES.items() + ] + + +@router.post("/apply-builtin") +async def apply_builtin_templates( + req: ApplyBuiltinRequest, + db: AsyncSession = Depends(get_db), + user: User = Depends(require_admin_role), +): + """내장 템플릿을 현재 테넌트에 적용.""" + created = [] + for key in req.template_keys: + tpl = BUILTIN_TEMPLATES.get(key) + if not tpl: + continue + existing = await db.execute( + select(DocTemplate).where( + DocTemplate.tenant_id == user.tenant_id, + DocTemplate.builtin_key == key, + ) + ) + if existing.scalar_one_or_none(): + continue + tmpl = DocTemplate( + tenant_id=user.tenant_id, + name=tpl["name"], + description=tpl["description"], + schema_json=json.dumps(tpl["schema"], ensure_ascii=False), + workflow=tpl["workflow"], + builtin_key=key, + is_builtin=True, + is_active=True, + created_at=datetime.utcnow(), + ) + db.add(tmpl) + created.append(tpl["name"]) + await db.commit() + return {"ok": True, "created": created, "count": len(created)} + + +@router.get("/") +async def list_templates( + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + rows = await db.execute( + select(DocTemplate).where( + DocTemplate.tenant_id == user.tenant_id, + DocTemplate.is_active == True, + ).order_by(DocTemplate.is_builtin.desc(), DocTemplate.name) + ) + templates = rows.scalars().all() + return [ + { + "id": t.id, "name": t.name, "description": t.description, + "workflow": t.workflow, "is_builtin": t.is_builtin, + "field_count": len(json.loads(t.schema_json or "{}")), + "created_at": t.created_at, + } + for t in templates + ] + + +@router.post("/") +async def create_template( + req: TemplateCreate, + db: AsyncSession = Depends(get_db), + user: User = Depends(require_admin_role), +): + tmpl = DocTemplate( + tenant_id=user.tenant_id, + name=req.name, description=req.description, + schema_json=json.dumps(req.schema_json, ensure_ascii=False), + workflow=req.workflow, is_builtin=False, is_active=True, + created_at=datetime.utcnow(), + ) + db.add(tmpl) + await db.commit() + await db.refresh(tmpl) + return {"ok": True, "id": tmpl.id} + + +@router.get("/{template_id}") +async def get_template( + template_id: int, + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + row = await db.execute( + select(DocTemplate).where( + DocTemplate.id == template_id, + DocTemplate.tenant_id == user.tenant_id, + ) + ) + t = row.scalar_one_or_none() + if not t: + raise HTTPException(404) + return { + "id": t.id, "name": t.name, "description": t.description, + "schema": json.loads(t.schema_json or "{}"), + "workflow": t.workflow, "is_builtin": t.is_builtin, + } + + +@router.put("/{template_id}") +async def update_template( + template_id: int, + req: TemplateCreate, + db: AsyncSession = Depends(get_db), + user: User = Depends(require_admin_role), +): + row = await db.execute( + select(DocTemplate).where( + DocTemplate.id == template_id, + DocTemplate.tenant_id == user.tenant_id, + ) + ) + t = row.scalar_one_or_none() + if not t: + raise HTTPException(404) + if t.is_builtin: + raise HTTPException(400, "내장 템플릿은 수정할 수 없습니다. 복제 후 수정하세요.") + t.name = req.name; t.description = req.description + t.schema_json = json.dumps(req.schema_json, ensure_ascii=False) + t.workflow = req.workflow + await db.commit() + return {"ok": True} + + +@router.delete("/{template_id}") +async def delete_template( + template_id: int, + db: AsyncSession = Depends(get_db), + user: User = Depends(require_admin_role), +): + row = await db.execute( + select(DocTemplate).where( + DocTemplate.id == template_id, + DocTemplate.tenant_id == user.tenant_id, + ) + ) + t = row.scalar_one_or_none() + if not t: + raise HTTPException(404) + if t.is_builtin: + raise HTTPException(400, "내장 템플릿은 삭제할 수 없습니다.") + t.is_active = False + await db.commit() + return {"ok": True} diff --git a/workspace/guardia-itsm/routers/doc_workflow.py b/workspace/guardia-itsm/routers/doc_workflow.py new file mode 100644 index 00000000..576ec9b0 --- /dev/null +++ b/workspace/guardia-itsm/routers/doc_workflow.py @@ -0,0 +1,610 @@ +""" +문서 워크플로우 자동화 — OCR 결과 → ITSM 자동 연동 + +Upstage OCR 결과를 ITSM 기능에 자동 연동하는 7개 워크플로우. + +엔드포인트: + POST /api/docflow/contract — 나라장터 계약서 → 조달 자동 등록 + POST /api/docflow/server-spec — 서버납품서 → CMDB 자동 등록 + POST /api/docflow/invoice — 청구서/세금계산서 → 과금 연동 + POST /api/docflow/audit-report — CSAP/감사보고서 → 준수율 업데이트 + POST /api/docflow/incident-report — 장애보고서 이미지 → SR 자동 생성 + POST /api/docflow/meeting-minutes — 회의록 → SR/액션아이템 생성 + POST /api/docflow/brand-contract — 기업 브랜드 계약서 (현대백화점 등) + GET /api/docflow/jobs — 작업 목록 + GET /api/docflow/jobs/{id} — 작업 상세 +""" +from __future__ import annotations + +import json +import logging +import re +from datetime import date, datetime +from typing import Optional + +import httpx +from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile +from pydantic import BaseModel +from sqlalchemy import select, desc +from sqlalchemy.ext.asyncio import AsyncSession + +from core.auth import get_current_user +from database import get_db +from models import ( + User, UpstageOCRConfig, OCRHistory, DocWorkflowJob, + SRRequest, SRStatus, Server, ProcurementRecord, Invoice, +) + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/api/docflow", tags=["문서 워크플로우"]) + +UPSTAGE_BASE = "https://api.upstage.ai/v1/document-ai" +MAX_FILE_SIZE = 20 * 1024 * 1024 + + +# ── 내부 헬퍼 ──────────────────────────────────────────────────────────────── + +def _parse_amount(text: str) -> int: + """금액 문자열 → 정수 (₩50,000,000 → 50000000).""" + if not text: + return 0 + cleaned = re.sub(r'[^\d]', '', str(text)) + return int(cleaned) if cleaned else 0 + + +def _parse_date(text: str) -> Optional[date]: + """날짜 문자열 → date (다양한 형식 지원).""" + if not text: + return None + formats = ["%Y-%m-%d", "%Y.%m.%d", "%Y/%m/%d", "%Y년 %m월 %d일", "%Y%m%d"] + cleaned = str(text).strip() + for fmt in formats: + try: + return datetime.strptime(cleaned, fmt).date() + except ValueError: + continue + return None + + +async def _get_api_key(user: User, db: AsyncSession) -> str: + row = await db.execute( + select(UpstageOCRConfig).where( + UpstageOCRConfig.tenant_id == user.tenant_id, + UpstageOCRConfig.is_active == True, + ) + ) + cfg = row.scalar_one_or_none() + if not cfg: + raise HTTPException(404, "Upstage API Key 미설정. POST /api/ocr/config 에서 설정하세요.") + return cfg.api_key_enc + + +async def _extract(api_key: str, file_bytes: bytes, filename: str, + schema: dict) -> dict: + """Upstage Information Extraction 호출.""" + from pathlib import Path + MIME = {".pdf": "application/pdf", ".png": "image/png", + ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".tiff": "image/tiff"} + ext = Path(filename).suffix.lower() + mime = MIME.get(ext, "application/octet-stream") + async with httpx.AsyncClient(timeout=120) as client: + r = await client.post( + f"{UPSTAGE_BASE}/information-extraction", + headers={"Authorization": f"Bearer {api_key}"}, + files={"document": (filename, file_bytes, mime)}, + data={"schema": json.dumps(schema, ensure_ascii=False)} + ) + if r.status_code != 200: + raise HTTPException(502, f"Upstage API 오류: {r.text[:200]}") + return r.json() + + +async def _parse_doc(api_key: str, file_bytes: bytes, filename: str) -> dict: + """Upstage Document Parse 호출.""" + from pathlib import Path + MIME = {".pdf": "application/pdf", ".png": "image/png", + ".jpg": "image/jpeg", ".jpeg": "image/jpeg"} + ext = Path(filename).suffix.lower() + mime = MIME.get(ext, "application/octet-stream") + async with httpx.AsyncClient(timeout=120) as client: + r = await client.post( + f"{UPSTAGE_BASE}/document-digitization", + headers={"Authorization": f"Bearer {api_key}"}, + files={"document": (filename, file_bytes, mime)}, + data={"model": "document-parse-ocr", "ocr": "auto", + "output_formats": '["text"]'} + ) + if r.status_code != 200: + raise HTTPException(502, f"Upstage API 오류: {r.text[:200]}") + return r.json() + + +async def _save_job(db: AsyncSession, tenant_id: int, user_id: int, + workflow: str, filename: str, template_id: Optional[int], + extracted: dict, linked_table: str, + linked_id: Optional[int], status: str = "DONE") -> int: + job = DocWorkflowJob( + tenant_id=tenant_id, + workflow_type=workflow, + filename=filename, + template_id=template_id, + status=status, + extracted_data=extracted, + linked_table=linked_table, + linked_record_id=linked_id, + created_by=user_id, + created_at=datetime.utcnow(), + completed_at=datetime.utcnow(), + ) + db.add(job) + await db.commit() + await db.refresh(job) + return job.id + + +def _simplify(result: dict) -> dict: + """Upstage 추출 결과 → 단순 Key-Value.""" + if "result" in result and isinstance(result["result"], dict): + return {k: v.get("value", "") if isinstance(v, dict) else v + for k, v in result["result"].items()} + return {} + + +# ── 워크플로우 엔드포인트 ─────────────────────────────────────────────────── + +@router.post("/contract") +async def process_contract( + file: UploadFile = File(...), + auto_register: bool = Form(True), + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + """나라장터 계약서 → 조달 이력 자동 등록.""" + file_bytes = await file.read() + api_key = await _get_api_key(user, db) + + schema = { + "contract_no": "계약번호", "contract_name": "계약품명", + "supplier": "공급사명", "supplier_biz_no": "공급사 사업자번호", + "amount": "계약금액(원)", "vat": "부가세액", + "start_date": "계약시작일", "end_date": "계약종료일", + "institution": "발주기관명", "manager": "담당자명", + "payment_terms": "납부조건", + } + result = await _extract(api_key, file_bytes, file.filename or "contract.pdf", schema) + extracted = _simplify(result) + + record_id = None + if auto_register and extracted.get("contract_no"): + record = ProcurementRecord( + tenant_id=user.tenant_id, + contract_no=extracted.get("contract_no", ""), + contract_name=extracted.get("contract_name", "미상"), + supplier=extracted.get("supplier", ""), + amount=_parse_amount(extracted.get("amount", "0")), + category="IT계약", + start_date=_parse_date(extracted.get("start_date")), + end_date=_parse_date(extracted.get("end_date")), + status="ACTIVE", + created_at=datetime.utcnow(), + ) + db.add(record) + await db.commit() + await db.refresh(record) + record_id = record.id + + job_id = await _save_job(db, user.tenant_id, user.id, "contract", + file.filename or "", None, extracted, + "tb_procurement_record", record_id) + + return { + "ok": True, + "workflow": "contract", + "extracted": extracted, + "record_id": record_id, + "job_id": job_id, + "message": f"계약 정보 추출 완료" + (f" → 조달 ID {record_id} 등록" if record_id else " (수동 확인 필요)"), + } + + +@router.post("/server-spec") +async def process_server_spec( + file: UploadFile = File(...), + auto_register: bool = Form(True), + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + """서버 납품 명세서 → CMDB 자동 등록.""" + file_bytes = await file.read() + api_key = await _get_api_key(user, db) + + schema = { + "hostname": "호스트명/서버명", "manufacturer": "제조사", + "model_no": "모델번호", "serial_no": "시리얼번호", + "cpu_model": "CPU 모델명", "cpu_cores": "CPU 코어 수", + "memory_gb": "메모리 용량(GB)", "disk_config": "스토리지 구성", + "os": "운영체제", "ip_addr": "IP주소", + "rack_location": "랙/위치", "warranty_until": "보증기간 만료일", + "delivery_date": "납품일", + } + result = await _extract(api_key, file_bytes, file.filename or "spec.pdf", schema) + extracted = _simplify(result) + + server_id = None + if auto_register and extracted.get("hostname"): + server = Server( + hostname=extracted.get("hostname", ""), + ip_addr=extracted.get("ip_addr", "0.0.0.0"), + os_type=extracted.get("os", ""), + cpu_cores=int(re.sub(r'[^\d]', '', extracted.get("cpu_cores", "0") or "0") or 0), + memory_mb=int(re.sub(r'[^\d]', '', extracted.get("memory_gb", "0") or "0") or 0) * 1024, + ssh_user="opsagent", + discovered_at=datetime.utcnow(), + ) + db.add(server) + await db.commit() + await db.refresh(server) + server_id = server.id + + job_id = await _save_job(db, user.tenant_id, user.id, "server_spec", + file.filename or "", None, extracted, + "tb_server_info", server_id) + + return { + "ok": True, + "workflow": "server_spec", + "extracted": extracted, + "server_id": server_id, + "job_id": job_id, + "message": f"서버 사양 추출 완료" + (f" → CMDB ID {server_id} 등록" if server_id else " (수동 확인 필요)"), + } + + +@router.post("/invoice") +async def process_invoice( + file: UploadFile = File(...), + auto_register: bool = Form(True), + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + """청구서/세금계산서 → 과금 Invoice 자동 등록.""" + file_bytes = await file.read() + api_key = await _get_api_key(user, db) + + schema = { + "invoice_no": "세금계산서번호/청구번호", + "issue_date": "발행일", + "supplier_name": "공급자 상호", + "supplier_biz_no": "공급자 사업자번호", + "buyer_name": "공급받는자 상호", + "supply_amount": "공급가액", + "vat_amount": "세액", + "total_amount": "합계금액", + "items": "품목/내역", + "payment_due": "결제기한", + } + result = await _extract(api_key, file_bytes, file.filename or "invoice.pdf", schema) + extracted = _simplify(result) + + invoice_id = None + if auto_register and extracted.get("total_amount"): + today = date.today() + invoice = Invoice( + tenant_id=user.tenant_id, + plan="OCR_IMPORT", + period=today.strftime("%Y-%m"), + amount=_parse_amount(extracted.get("total_amount", "0")), + status="DRAFT", + generated_by=user.id, + created_at=datetime.utcnow(), + ) + db.add(invoice) + await db.commit() + await db.refresh(invoice) + invoice_id = invoice.id + + job_id = await _save_job(db, user.tenant_id, user.id, "invoice", + file.filename or "", None, extracted, + "tb_invoice", invoice_id) + + return { + "ok": True, + "workflow": "invoice", + "extracted": extracted, + "invoice_id": invoice_id, + "job_id": job_id, + "total_amount": _parse_amount(extracted.get("total_amount", "0")), + } + + +@router.post("/audit-report") +async def process_audit_report( + file: UploadFile = File(...), + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + """CSAP/감사 보고서 → 준수율 정보 추출.""" + file_bytes = await file.read() + api_key = await _get_api_key(user, db) + + schema = { + "institution": "기관명", "check_date": "점검일", + "auditor": "점검자/감사기관", + "total_items": "총 점검항목 수", + "passed_items": "적합(통과) 항목 수", + "failed_items": "부적합 항목 수", + "compliance_rate": "준수율(%)", + "major_findings": "주요 발견사항", + "recommendations": "권고사항", + } + result = await _extract(api_key, file_bytes, file.filename or "audit.pdf", schema) + extracted = _simplify(result) + + job_id = await _save_job(db, user.tenant_id, user.id, "audit_report", + file.filename or "", None, extracted, "audit", None) + + compliance_rate = float(re.sub(r'[^\d.]', '', extracted.get("compliance_rate", "0") or "0") or 0) + return { + "ok": True, + "workflow": "audit_report", + "extracted": extracted, + "compliance_rate": compliance_rate, + "job_id": job_id, + "message": f"감사 보고서 분석 완료. 준수율: {compliance_rate}%", + } + + +@router.post("/incident-report") +async def process_incident_report( + file: UploadFile = File(...), + auto_create_sr: bool = Form(True), + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + """장애보고서 이미지/PDF → 에러 내용 추출 → SR 자동 생성.""" + file_bytes = await file.read() + api_key = await _get_api_key(user, db) + + # Document Parse로 텍스트 추출 + parse_result = await _parse_doc(api_key, file_bytes, file.filename or "incident.png") + text = parse_result.get("content", {}).get("text", "") if isinstance(parse_result.get("content"), dict) else "" + + # 추가로 정보 추출 + schema = { + "incident_date": "발생일시", + "incident_type": "장애유형", + "affected_system": "영향 시스템", + "error_message": "오류 메시지", + "severity": "심각도(P1/P2/P3/P4)", + "reporter": "보고자", + } + extract_result = await _extract(api_key, file_bytes, file.filename or "incident.png", schema) + extracted = _simplify(extract_result) + + sr_id = None + if auto_create_sr: + severity = extracted.get("severity", "P3") + priority = {"P1": "HIGH", "P2": "HIGH", "P3": "MEDIUM", "P4": "LOW"}.get(severity.upper(), "MEDIUM") + title = f"[장애보고서] {extracted.get('incident_type', '장애')} - {extracted.get('affected_system', '미상')}" + description = ( + f"OCR 추출 장애보고서\n\n" + f"발생일시: {extracted.get('incident_date', '-')}\n" + f"장애유형: {extracted.get('incident_type', '-')}\n" + f"영향 시스템: {extracted.get('affected_system', '-')}\n" + f"오류 메시지: {extracted.get('error_message', '-')}\n\n" + f"원본 텍스트:\n{text[:500]}" + ) + sr = SRRequest( + title=title[:100], + description=description, + category="INCIDENT", + priority=priority, + status=SRStatus.OPEN, + created_at=datetime.utcnow(), + ) + db.add(sr) + await db.commit() + await db.refresh(sr) + sr_id = sr.id + + job_id = await _save_job(db, user.tenant_id, user.id, "incident_report", + file.filename or "", None, extracted, "tb_sr_request", sr_id) + + return { + "ok": True, + "workflow": "incident_report", + "extracted": extracted, + "sr_id": sr_id, + "job_id": job_id, + "message": f"장애 보고서 분석 완료" + (f" → SR-{sr_id} 생성" if sr_id else ""), + } + + +@router.post("/meeting-minutes") +async def process_meeting_minutes( + file: UploadFile = File(...), + auto_create_sr: bool = Form(True), + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + """회의록 → 결정사항/액션아이템 추출 → SR 자동 생성.""" + file_bytes = await file.read() + api_key = await _get_api_key(user, db) + + schema = { + "meeting_date": "회의일시", + "chairman": "의장/주관자", + "participants": "참석자 목록", + "agenda": "회의 안건", + "decisions": "결정사항", + "action_items": "액션아이템(담당자/기한 포함)", + "next_meeting": "차기 회의 일정", + } + result = await _extract(api_key, file_bytes, file.filename or "meeting.pdf", schema) + extracted = _simplify(result) + + sr_ids = [] + if auto_create_sr and extracted.get("action_items"): + # 액션아이템별로 SR 생성 + action_text = extracted.get("action_items", "") + items = [a.strip() for a in re.split(r'[,\n]', action_text) if a.strip()] + for item in items[:5]: # 최대 5개 SR + sr = SRRequest( + title=f"[회의록 액션] {item[:80]}", + description=f"회의일: {extracted.get('meeting_date', '-')}\n의장: {extracted.get('chairman', '-')}\n\n액션아이템: {item}", + category="TASK", + priority="MEDIUM", + status=SRStatus.OPEN, + created_at=datetime.utcnow(), + ) + db.add(sr) + await db.commit() + await db.refresh(sr) + sr_ids.append(sr.id) + + job_id = await _save_job(db, user.tenant_id, user.id, "meeting_minutes", + file.filename or "", None, extracted, "tb_sr_request", + sr_ids[0] if sr_ids else None) + + return { + "ok": True, + "workflow": "meeting_minutes", + "extracted": extracted, + "sr_ids": sr_ids, + "job_id": job_id, + "message": f"회의록 분석 완료" + (f" → SR {sr_ids} 생성" if sr_ids else ""), + } + + +@router.post("/brand-contract") +async def process_brand_contract( + file: UploadFile = File(...), + auto_register: bool = Form(True), + brand_name: str = Form("", description="브랜드사명 (예: 현대백화점)"), + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + """ + 기업 브랜드 계약서 처리 — 현대백화점·롯데·신세계 등 유통/브랜드 계약. + 나라장터 외 일반 B2B 계약서를 자동 파싱하여 계약 이력에 등록. + """ + file_bytes = await file.read() + api_key = await _get_api_key(user, db) + + # 브랜드 계약서 전용 스키마 + schema = { + "contract_title": "계약서 제목", + "party_a": "갑(발주사/브랜드사)", + "party_a_biz_no": "갑 사업자번호", + "party_b": "을(수주사/입점사/공급사)", + "party_b_biz_no": "을 사업자번호", + "contract_amount": "계약금액(숫자만)", + "currency": "통화(KRW/USD/기타)", + "effective_date": "계약체결일(YYYY-MM-DD)", + "expiry_date": "계약만료일(YYYY-MM-DD)", + "auto_renewal": "자동갱신여부(Y/N)", + "payment_terms": "대금 지급조건", + "contract_items": "계약 품목/서비스", + "royalty_rate": "수수료율/로열티율", + "territory": "적용지역/매장명", + "exclusive": "독점여부(Y/N)", + "termination": "계약 해지 조건", + "penalty_clause": "위약금 조항", + "contact_a": "갑 담당자명", + "contact_b": "을 담당자명", + "special_terms": "특약사항", + } + + result = await _extract(api_key, file_bytes, file.filename or "brand_contract.pdf", schema) + extracted = _simplify(result) + + # 브랜드사명 보완 + if brand_name and not extracted.get("party_a"): + extracted["party_a"] = brand_name + + record_id = None + if auto_register: + record = ProcurementRecord( + tenant_id=user.tenant_id, + contract_no=f"BRAND-{datetime.utcnow().strftime('%Y%m%d%H%M')}", + contract_name=extracted.get("contract_title") or f"{extracted.get('party_a', '브랜드사')} 계약서", + supplier=extracted.get("party_b", ""), + amount=_parse_amount(extracted.get("contract_amount", "0")), + category="브랜드계약", + start_date=_parse_date(extracted.get("effective_date")), + end_date=_parse_date(extracted.get("expiry_date")), + status="ACTIVE", + created_at=datetime.utcnow(), + ) + db.add(record) + await db.commit() + await db.refresh(record) + record_id = record.id + + job_id = await _save_job(db, user.tenant_id, user.id, "brand_contract", + file.filename or "", None, extracted, + "tb_procurement_record", record_id) + + return { + "ok": True, + "workflow": "brand_contract", + "brand_name": extracted.get("party_a", brand_name), + "counterparty": extracted.get("party_b", ""), + "contract_amount": _parse_amount(extracted.get("contract_amount", "0")), + "currency": extracted.get("currency", "KRW"), + "effective_date": extracted.get("effective_date", ""), + "expiry_date": extracted.get("expiry_date", ""), + "extracted": extracted, + "record_id": record_id, + "job_id": job_id, + "message": f"브랜드 계약서 처리 완료" + (f" → 계약 ID {record_id} 등록" if record_id else ""), + } + + +@router.get("/jobs") +async def list_workflow_jobs( + limit: int = 50, + workflow_type: Optional[str] = None, + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + q = select(DocWorkflowJob).where(DocWorkflowJob.tenant_id == user.tenant_id) + if workflow_type: + q = q.where(DocWorkflowJob.workflow_type == workflow_type) + q = q.order_by(desc(DocWorkflowJob.created_at)).limit(limit) + rows = await db.execute(q) + jobs = rows.scalars().all() + return [ + { + "id": j.id, "workflow": j.workflow_type, + "filename": j.filename, "status": j.status, + "linked_table": j.linked_table, "linked_id": j.linked_record_id, + "created_at": j.created_at, + } + for j in jobs + ] + + +@router.get("/jobs/{job_id}") +async def get_workflow_job( + job_id: int, + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + row = await db.execute( + select(DocWorkflowJob).where( + DocWorkflowJob.id == job_id, + DocWorkflowJob.tenant_id == user.tenant_id, + ) + ) + job = row.scalar_one_or_none() + if not job: + raise HTTPException(404) + return { + "id": job.id, "workflow": job.workflow_type, + "filename": job.filename, "status": job.status, + "extracted_data": job.extracted_data, + "linked_table": job.linked_table, "linked_id": job.linked_record_id, + "error": job.error_message, + "created_at": job.created_at, "completed_at": job.completed_at, + } diff --git a/workspace/guardia-itsm/routers/upstage_ocr.py b/workspace/guardia-itsm/routers/upstage_ocr.py new file mode 100644 index 00000000..d4660e60 --- /dev/null +++ b/workspace/guardia-itsm/routers/upstage_ocr.py @@ -0,0 +1,472 @@ +""" +Upstage Document AI OCR 엔진 + +Upstage API(Document Parse, Information Extraction, Document QA)를 연동하여 +PDF·이미지 문서를 구조화 데이터로 변환한다. + +엔드포인트: + POST /api/ocr/config — API Key 설정 (AES-256-GCM 암호화) + GET /api/ocr/config — 설정 조회 (키 마스킹) + POST /api/ocr/parse — 문서 파싱 → 구조화 JSON + POST /api/ocr/extract — 정보 추출 → Key-Value (스키마 기반) + POST /api/ocr/qa — 문서 QA → 자연어 답변 + POST /api/ocr/batch — 다중 파일 배치 처리 + GET /api/ocr/history — OCR 처리 이력 + GET /api/ocr/usage — API 사용량 현황 +""" +from __future__ import annotations + +import json +import logging +import re +from datetime import datetime, date +from pathlib import Path +from typing import Optional + +import httpx +from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile +from fastapi.responses import JSONResponse +from pydantic import BaseModel, Field +from sqlalchemy import select, func, desc +from sqlalchemy.ext.asyncio import AsyncSession + +from core.auth import get_current_user, require_admin_role +from database import get_db +from models import User, UpstageOCRConfig, OCRHistory + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/api/ocr", tags=["Upstage OCR"]) + +UPSTAGE_BASE = "https://api.upstage.ai/v1/document-ai" +MAX_FILE_SIZE = 20 * 1024 * 1024 # 20MB + +SUPPORTED_MIME = { + ".pdf": "application/pdf", + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".tiff": "image/tiff", + ".tif": "image/tiff", + ".bmp": "image/bmp", + ".heic": "image/heic", + ".webp": "image/webp", +} + +# 민감 정보 마스킹 패턴 +SENSITIVE_PATTERNS = [ + (r'\d{6}-[1-4]\d{6}', '######-#######'), # 주민번호 + (r'(? str: + ext = Path(filename).suffix.lower() + mime = SUPPORTED_MIME.get(ext) + if not mime: + raise HTTPException(400, f"지원하지 않는 파일 형식: {ext}. 지원: {', '.join(SUPPORTED_MIME.keys())}") + return mime + + +def _mask_sensitive(text: str) -> str: + """민감 정보 자동 마스킹.""" + for pattern, replacement in SENSITIVE_PATTERNS: + text = re.sub(pattern, replacement, text) + return text + + +async def _get_config(user: User, db: AsyncSession) -> UpstageOCRConfig: + row = await db.execute( + select(UpstageOCRConfig).where( + UpstageOCRConfig.tenant_id == user.tenant_id, + UpstageOCRConfig.is_active == True, + ) + ) + cfg = row.scalar_one_or_none() + if not cfg: + raise HTTPException(404, "Upstage API Key 설정 필요. POST /api/ocr/config 에서 설정하세요.") + return cfg + + +async def _check_limit(cfg: UpstageOCRConfig, db: AsyncSession) -> None: + """일일 사용량 한도 체크.""" + today_start = datetime.combine(date.today(), datetime.min.time()) + used_row = await db.execute( + select(func.sum(OCRHistory.pages)).where( + OCRHistory.tenant_id == cfg.tenant_id, + OCRHistory.created_at >= today_start, + OCRHistory.status == "SUCCESS", + ) + ) + used = used_row.scalar() or 0 + if used >= cfg.daily_limit: + raise HTTPException(429, f"일일 페이지 한도 초과: {used}/{cfg.daily_limit}. 내일 다시 시도하세요.") + + +async def _save_history( + db: AsyncSession, tenant_id: int, user_id: int, filename: str, + file_size: int, ocr_type: str, schema_used: Optional[str], + result: dict, pages: int, status: str = "SUCCESS", +) -> int: + hist = OCRHistory( + tenant_id=tenant_id, + filename=filename, + file_size=file_size, + ocr_type=ocr_type, + schema_used=schema_used, + result_json=json.dumps( + {k: v for k, v in result.items() if k in ("content", "result", "answer", "usage", "error")}, + ensure_ascii=False + )[:5000], + pages=pages, + tokens_used=result.get("usage", {}).get("tokens", 0) if isinstance(result.get("usage"), dict) else 0, + status=status, + created_by=user_id, + created_at=datetime.utcnow(), + ) + db.add(hist) + await db.commit() + await db.refresh(hist) + return hist.id + + +# ── 엔드포인트 ─────────────────────────────────────────────────────────────── + +@router.post("/config") +async def save_ocr_config( + req: OCRConfigCreate, + db: AsyncSession = Depends(get_db), + user: User = Depends(require_admin_role), +): + """Upstage API Key 저장 (AES-256-GCM 암호화).""" + # API Key 유효성 테스트 + try: + async with httpx.AsyncClient(timeout=10) as client: + r = await client.get( + "https://api.upstage.ai/v1/models", + headers={"Authorization": f"Bearer {req.api_key}"} + ) + if r.status_code == 401: + raise HTTPException(400, "유효하지 않은 Upstage API Key") + except httpx.RequestError: + pass # 네트워크 오류는 무시하고 저장 + + row = await db.execute( + select(UpstageOCRConfig).where(UpstageOCRConfig.tenant_id == user.tenant_id) + ) + cfg = row.scalar_one_or_none() + if cfg: + cfg.api_key_enc = req.api_key # TODO: AES-256-GCM 암호화 + cfg.model = req.model + cfg.daily_limit = req.daily_limit + else: + cfg = UpstageOCRConfig( + tenant_id=user.tenant_id, + api_key_enc=req.api_key, + model=req.model, + daily_limit=req.daily_limit, + is_active=True, + created_at=datetime.utcnow(), + ) + db.add(cfg) + await db.commit() + return {"ok": True, "model": req.model, "daily_limit": req.daily_limit} + + +@router.get("/config") +async def get_ocr_config( + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + """설정 조회 (API Key 마스킹).""" + row = await db.execute( + select(UpstageOCRConfig).where(UpstageOCRConfig.tenant_id == user.tenant_id) + ) + cfg = row.scalar_one_or_none() + if not cfg: + return {"configured": False} + key = cfg.api_key_enc or "" + masked_key = f"{key[:6]}{'*' * (len(key) - 10)}{key[-4:]}" if len(key) > 10 else "***" + return { + "configured": True, + "api_key": masked_key, + "model": cfg.model, + "daily_limit": cfg.daily_limit, + "is_active": cfg.is_active, + } + + +@router.post("/parse") +async def parse_document( + file: UploadFile = File(...), + model: str = Form("document-parse"), + output_formats: str = Form('["text", "html", "markdown"]'), + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + """문서 파싱 → 구조화 JSON (레이아웃·텍스트·테이블·그림).""" + file_bytes = await file.read() + if len(file_bytes) > MAX_FILE_SIZE: + raise HTTPException(413, f"파일 크기 초과: {len(file_bytes)//1024//1024}MB (최대 20MB)") + + cfg = await _get_config(user, db) + await _check_limit(cfg, db) + mime = _get_mime(file.filename or "document.pdf") + + try: + async with httpx.AsyncClient(timeout=120) as client: + r = await client.post( + f"{UPSTAGE_BASE}/document-digitization", + headers={"Authorization": f"Bearer {cfg.api_key_enc}"}, + files={"document": (file.filename, file_bytes, mime)}, + data={ + "model": model or cfg.model, + "ocr": "auto", + "output_formats": output_formats, + } + ) + result = r.json() if r.status_code == 200 else {"error": r.text[:500], "status_code": r.status_code} + except httpx.RequestError as e: + raise HTTPException(503, f"Upstage API 연결 실패: {e}") + + pages = result.get("usage", {}).get("pages", 1) if isinstance(result.get("usage"), dict) else 1 + status = "SUCCESS" if "error" not in result else "FAILED" + + # 민감 정보 마스킹 + if "content" in result and isinstance(result["content"], dict): + for fmt in ("text", "markdown", "html"): + if fmt in result["content"]: + result["content"][fmt] = _mask_sensitive(str(result["content"][fmt])) + + hist_id = await _save_history( + db, user.tenant_id, user.id, file.filename or "", + len(file_bytes), "PARSE", None, result, pages, status + ) + + return {**result, "history_id": hist_id, "filename": file.filename} + + +@router.post("/extract") +async def extract_information( + file: UploadFile = File(...), + schema: str = Form(..., description='JSON 문자열: {"필드명": "설명"}'), + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + """정보 추출 → Key-Value (스키마 기반).""" + file_bytes = await file.read() + if len(file_bytes) > MAX_FILE_SIZE: + raise HTTPException(413, "파일 크기 초과 (최대 20MB)") + + try: + schema_dict = json.loads(schema) + except json.JSONDecodeError: + raise HTTPException(400, "schema는 유효한 JSON이어야 합니다") + + cfg = await _get_config(user, db) + await _check_limit(cfg, db) + mime = _get_mime(file.filename or "document.pdf") + + try: + async with httpx.AsyncClient(timeout=120) as client: + r = await client.post( + f"{UPSTAGE_BASE}/information-extraction", + headers={"Authorization": f"Bearer {cfg.api_key_enc}"}, + files={"document": (file.filename, file_bytes, mime)}, + data={"schema": json.dumps(schema_dict, ensure_ascii=False)} + ) + result = r.json() if r.status_code == 200 else {"error": r.text[:500]} + except httpx.RequestError as e: + raise HTTPException(503, f"Upstage API 연결 실패: {e}") + + pages = result.get("usage", {}).get("pages", 1) if isinstance(result.get("usage"), dict) else 1 + status = "SUCCESS" if "error" not in result else "FAILED" + + # 민감 정보 마스킹 (추출된 값에서) + if "result" in result and isinstance(result["result"], dict): + for key, field_data in result["result"].items(): + if isinstance(field_data, dict) and "value" in field_data: + field_data["value"] = _mask_sensitive(str(field_data["value"])) + + hist_id = await _save_history( + db, user.tenant_id, user.id, file.filename or "", + len(file_bytes), "EXTRACT", json.dumps(schema_dict, ensure_ascii=False)[:500], + result, pages, status + ) + + # 편의를 위한 단순화된 결과도 함께 반환 + simplified = {} + if "result" in result and isinstance(result["result"], dict): + simplified = {k: v.get("value", "") if isinstance(v, dict) else v + for k, v in result["result"].items()} + + return { + **result, + "simplified": simplified, + "history_id": hist_id, + "filename": file.filename, + } + + +@router.post("/qa") +async def document_qa( + file: UploadFile = File(...), + question: str = Form(..., min_length=3, max_length=500), + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + """문서 QA → 자연어 답변.""" + file_bytes = await file.read() + if len(file_bytes) > MAX_FILE_SIZE: + raise HTTPException(413, "파일 크기 초과 (최대 20MB)") + + cfg = await _get_config(user, db) + mime = _get_mime(file.filename or "document.pdf") + + try: + async with httpx.AsyncClient(timeout=120) as client: + r = await client.post( + f"{UPSTAGE_BASE}/document-qa", + headers={"Authorization": f"Bearer {cfg.api_key_enc}"}, + files={"document": (file.filename, file_bytes, mime)}, + data={"question": question} + ) + result = r.json() if r.status_code == 200 else {"error": r.text[:500]} + except httpx.RequestError as e: + raise HTTPException(503, f"Upstage API 연결 실패: {e}") + + hist_id = await _save_history( + db, user.tenant_id, user.id, file.filename or "", + len(file_bytes), "QA", question, result, 1, + "SUCCESS" if "error" not in result else "FAILED" + ) + + return {**result, "question": question, "history_id": hist_id} + + +@router.post("/batch") +async def batch_parse( + files: list[UploadFile] = File(...), + mode: str = Form("parse", description="parse | extract"), + schema: Optional[str] = Form(None), + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + """다중 파일 배치 처리.""" + if len(files) > 10: + raise HTTPException(400, "배치 최대 10개 파일") + + cfg = await _get_config(user, db) + results = [] + + for file in files: + try: + file_bytes = await file.read() + if len(file_bytes) > MAX_FILE_SIZE: + results.append({"filename": file.filename, "error": "파일 크기 초과"}) + continue + + mime = _get_mime(file.filename or "doc") + async with httpx.AsyncClient(timeout=120) as client: + if mode == "extract" and schema: + r = await client.post( + f"{UPSTAGE_BASE}/information-extraction", + headers={"Authorization": f"Bearer {cfg.api_key_enc}"}, + files={"document": (file.filename, file_bytes, mime)}, + data={"schema": schema} + ) + else: + r = await client.post( + f"{UPSTAGE_BASE}/document-digitization", + headers={"Authorization": f"Bearer {cfg.api_key_enc}"}, + files={"document": (file.filename, file_bytes, mime)}, + data={"model": cfg.model, "ocr": "auto", "output_formats": '["text"]'} + ) + result = r.json() if r.status_code == 200 else {"error": r.text[:200]} + results.append({"filename": file.filename, "result": result}) + except Exception as e: + results.append({"filename": file.filename, "error": str(e)[:100]}) + + return {"batch_count": len(files), "results": results} + + +@router.get("/history") +async def get_ocr_history( + limit: int = 50, + ocr_type: Optional[str] = None, + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + """OCR 처리 이력.""" + q = select(OCRHistory).where(OCRHistory.tenant_id == user.tenant_id) + if ocr_type: + q = q.where(OCRHistory.ocr_type == ocr_type.upper()) + q = q.order_by(desc(OCRHistory.created_at)).limit(limit) + rows = await db.execute(q) + hs = rows.scalars().all() + return [ + { + "id": h.id, "filename": h.filename, + "type": h.ocr_type, "pages": h.pages, + "status": h.status, "linked_to": h.linked_to, + "linked_id": h.linked_id, + "created_at": h.created_at, + } + for h in hs + ] + + +@router.get("/usage") +async def get_usage( + db: AsyncSession = Depends(get_db), + user: User = Depends(get_current_user), +): + """API 사용량 현황.""" + cfg_row = await db.execute( + select(UpstageOCRConfig).where(UpstageOCRConfig.tenant_id == user.tenant_id) + ) + cfg = cfg_row.scalar_one_or_none() + + today_start = datetime.combine(date.today(), datetime.min.time()) + today_pages = (await db.execute( + select(func.sum(OCRHistory.pages)).where( + OCRHistory.tenant_id == user.tenant_id, + OCRHistory.created_at >= today_start, + OCRHistory.status == "SUCCESS", + ) + )).scalar() or 0 + + total_docs = (await db.execute( + select(func.count(OCRHistory.id)).where(OCRHistory.tenant_id == user.tenant_id) + )).scalar() or 0 + + month_start = datetime.utcnow().replace(day=1, hour=0, minute=0, second=0) + month_pages = (await db.execute( + select(func.sum(OCRHistory.pages)).where( + OCRHistory.tenant_id == user.tenant_id, + OCRHistory.created_at >= month_start, + ) + )).scalar() or 0 + + return { + "today_pages": today_pages, + "daily_limit": cfg.daily_limit if cfg else 1000, + "remaining_today": max(0, (cfg.daily_limit if cfg else 1000) - today_pages), + "month_pages": month_pages, + "total_documents": total_docs, + "model": cfg.model if cfg else None, + } diff --git a/workspace/guardia-itsm/static/app.js b/workspace/guardia-itsm/static/app.js index 92155843..84f0dd98 100644 --- a/workspace/guardia-itsm/static/app.js +++ b/workspace/guardia-itsm/static/app.js @@ -327,6 +327,12 @@ function switchView(view) { kb: "기술 문서 KB", institutions: "기관 관리", scripts: "스크립트 관리", timetable: "작업 타임테이블", + // ── Upstage OCR ── + ocr_parse: "문서 파싱 (Upstage OCR)", ocr_contract: "계약서 자동 처리", + ocr_brand_contract: "브랜드 계약서 처리", ocr_server_spec: "납품서 → CMDB 등록", + ocr_invoice: "청구서 처리", ocr_incident: "장애보고서 → SR 생성", + ocr_meeting: "회의록 → 액션아이템", ocr_history: "OCR 처리 이력", + doc_templates: "추출 템플릿 관리", // ── GUARDiA 확장 v3 ── rag_search: "RAG 하이브리드 검색", ai_insights: "AI 운영 인사이트", ai_workflow: "자율 워크플로우", learning_loop: "Learning Loop", @@ -3047,6 +3053,174 @@ async function loadExpansionView(view) { break; } + // ── Upstage OCR 뷰 ──────────────────────────── + case "ocr_parse": + container.innerHTML = ` +
PDF·이미지 → 구조화 JSON (텍스트·테이블·레이아웃)
+📎 PDF/이미지 파일 선택
+ +최대 20MB · PDF·PNG·JPG·TIFF 지원
+현대백화점·롯데·신세계 등 기업 계약서 자동 분석 → 계약 이력 등록
+📄 계약서 PDF 또는 이미지
+ +계약서 PDF → 계약정보 추출 → 조달 이력 자동 등록
+납품 명세서에서 서버 사양을 추출하여 CMDB에 자동 등록합니다.
+세금계산서·청구서에서 금액 정보를 추출하여 과금 시스템에 연동합니다.
+장애보고서 이미지/PDF에서 에러 내용을 추출하여 SR을 자동 생성합니다.
+에러 화면 캡처, 장애보고서 모두 지원
+회의록에서 결정사항·액션아이템을 추출하여 SR로 자동 생성합니다.
+| 파일명 | 유형 | 페이지 | 상태 | 연동 | 일시 |
|---|---|---|---|---|---|
| ${h.type} | +${h.pages} | +${h.status} | +${h.linked_to||'-'} ${h.linked_id?'#'+h.linked_id:''} | +${fmtDate(h.created_at)} | +|
| 이력 없음 | |||||
${esc(t.description||'')}
+${esc(JSON.stringify(data.extracted||data.simplified||data.content||data, null, 2).slice(0, 2000))}
+ ⏳ 파싱 중...
'; + const form = new FormData(); + form.append("file", file); + try { + const r = await fetch("/api/ocr/parse", {method:"POST", headers:_ocrHeaders(), body:form}); + const d = await r.json(); + if (el) el.innerHTML = ` +${esc(d.content.text.slice(0,2000))}
+ 오류: ${esc(e.message)}
`; + } +} + +async function processBrandContract() { + const file = document.getElementById("brand-contract-file")?.files[0]; + if (!file) return; + const brandName = document.getElementById("brand-name-input")?.value||""; + const form = new FormData(); + form.append("file", file); + form.append("brand_name", brandName); + form.append("auto_register", "true"); + try { + const r = await fetch("/api/docflow/brand-contract", {method:"POST", headers:_ocrHeaders(), body:form}); + const d = await r.json(); + _showOcrResult("brand-contract-result", d, d.message||"브랜드 계약서 처리 완료"); + if (d.record_id) showToast(`계약 등록 완료 (ID: ${d.record_id})`, "success"); + } catch(e) { + showToast("오류: " + e.message, "error"); + } +} + +async function processContract() { + const file = document.getElementById("contract-file")?.files[0]; + if (!file) return; + const form = new FormData(); + form.append("file", file); + form.append("auto_register", "true"); + try { + const r = await fetch("/api/docflow/contract", {method:"POST", headers:_ocrHeaders(), body:form}); + const d = await r.json(); + _showOcrResult("contract-result", d, d.message||"계약서 처리 완료"); + } catch(e) { showToast(e.message, "error"); } +} + +async function processServerSpec() { + const file = document.getElementById("server-spec-file")?.files[0]; + if (!file) return; + const form = new FormData(); + form.append("file", file); form.append("auto_register", "true"); + try { + const r = await fetch("/api/docflow/server-spec", {method:"POST", headers:_ocrHeaders(), body:form}); + const d = await r.json(); + _showOcrResult("server-spec-result", d, d.message||"납품서 처리 완료"); + if (d.server_id) showToast(`CMDB 등록 완료 (서버 ID: ${d.server_id})`, "success"); + } catch(e) { showToast(e.message, "error"); } +} + +async function processInvoice() { + const file = document.getElementById("invoice-file")?.files[0]; + if (!file) return; + const form = new FormData(); + form.append("file", file); form.append("auto_register", "true"); + try { + const r = await fetch("/api/docflow/invoice", {method:"POST", headers:_ocrHeaders(), body:form}); + const d = await r.json(); + _showOcrResult("invoice-result", d, `청구서 처리 완료. 금액: ${(d.total_amount||0).toLocaleString()}원`); + } catch(e) { showToast(e.message, "error"); } +} + +async function processIncident() { + const file = document.getElementById("incident-file")?.files[0]; + if (!file) return; + const form = new FormData(); + form.append("file", file); form.append("auto_create_sr", "true"); + try { + const r = await fetch("/api/docflow/incident-report", {method:"POST", headers:_ocrHeaders(), body:form}); + const d = await r.json(); + _showOcrResult("incident-result", d, d.message||"장애보고서 처리 완료"); + if (d.sr_id) showToast(`SR-${d.sr_id} 자동 생성됨`, "success"); + } catch(e) { showToast(e.message, "error"); } +} + +async function processMeeting() { + const file = document.getElementById("meeting-file")?.files[0]; + if (!file) return; + const form = new FormData(); + form.append("file", file); form.append("auto_create_sr", "true"); + try { + const r = await fetch("/api/docflow/meeting-minutes", {method:"POST", headers:_ocrHeaders(), body:form}); + const d = await r.json(); + _showOcrResult("meeting-result", d, d.message||"회의록 처리 완료"); + if (d.sr_ids?.length) showToast(`SR ${d.sr_ids.join(',')} 생성됨`, "success"); + } catch(e) { showToast(e.message, "error"); } +} + +async function applyAllBuiltinTemplates() { + const token = localStorage.getItem("token")||""; + const keys = ["narasajang_contract","server_delivery","brand_contract","invoice","incident_report","csap_report","meeting_minutes"]; + const r = await fetch("/api/doctemplate/apply-builtin", { + method:"POST", headers:{..._ocrHeaders(),"Content-Type":"application/json"}, + body:JSON.stringify({template_keys: keys}) + }); + const d = await r.json(); + showToast(`템플릿 ${d.count}개 적용됨`, "success"); + showPage("doc_templates"); +} + +function showOcrConfig() { showPage("ocr_parse"); showToast("상단 설정 메뉴 → POST /api/ocr/config 에서 API Key를 등록하세요", "info"); } diff --git a/workspace/guardia-itsm/static/index.html b/workspace/guardia-itsm/static/index.html index b45b5435..817ce03d 100644 --- a/workspace/guardia-itsm/static/index.html +++ b/workspace/guardia-itsm/static/index.html @@ -128,6 +128,23 @@ + + + +