422 lines
16 KiB
Python
422 lines
16 KiB
Python
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import logging
|
|
import re
|
|
from datetime import datetime, timedelta
|
|
from typing import Any, Optional
|
|
|
|
import httpx
|
|
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
|
from pydantic import BaseModel, Field
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from core.auth import get_current_user, require_admin_role
|
|
from database import get_db
|
|
from models import (
|
|
User, AuditLog,
|
|
DataLineage, DataRetentionPolicy, PIIScanResult,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
router = APIRouter(prefix="/api/dg", tags=["Data Governance"])
|
|
|
|
_OLLAMA_URL = "http://localhost:11434/api/generate"
|
|
|
|
# ── PII 탐지 정규식 (대한민국 기준) ────────────────────────────────────────────
|
|
PII_PATTERNS: dict[str, re.Pattern] = {
|
|
"SSN": re.compile(r"\b\d{6}-[1-4]\d{6}\b"),
|
|
"PHONE": re.compile(r"\b01[016789][-.\s]?\d{3,4}[-.\s]?\d{4}\b"),
|
|
"CARD": re.compile(r"\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b"),
|
|
"ACCOUNT": re.compile(r"\b\d{2,6}-\d{2,6}-\d{2,7}\b"),
|
|
"EMAIL": re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"),
|
|
"IP": re.compile(r"\b(?:(?:25[0-5]|2[0-4]\d|1?\d?\d)\.){3}(?:25[0-5]|2[0-4]\d|1?\d?\d)\b"),
|
|
}
|
|
PII_RISK_WEIGHT: dict[str, int] = {
|
|
"SSN": 10, "CARD": 9, "ACCOUNT": 7, "PHONE": 5, "EMAIL": 4, "IP": 2,
|
|
}
|
|
|
|
|
|
def _tenant(user: User) -> str:
|
|
return user.inst_code or str(user.id)
|
|
|
|
|
|
def _detect_pii(text: str) -> dict[str, list[str]]:
|
|
found: dict[str, list[str]] = {}
|
|
for ptype, pattern in PII_PATTERNS.items():
|
|
matches = pattern.findall(text)
|
|
if matches:
|
|
found[ptype] = list(dict.fromkeys(matches))
|
|
return found
|
|
|
|
|
|
def _risk_level(found: dict[str, list[str]]) -> str:
|
|
score = sum(PII_RISK_WEIGHT.get(t, 1) * len(v) for t, v in found.items())
|
|
if score >= 10 or "SSN" in found or "CARD" in found:
|
|
return "HIGH"
|
|
if score >= 4:
|
|
return "MEDIUM"
|
|
return "LOW" if found else "NONE"
|
|
|
|
|
|
def _mask_value(value: str, method: str) -> str:
|
|
if method == "hash":
|
|
return "sha256:" + hashlib.sha256(value.encode()).hexdigest()[:16]
|
|
if len(value) <= 2:
|
|
return "*" * len(value)
|
|
return value[0] + "*" * (len(value) - 2) + value[-1]
|
|
|
|
|
|
def _mask_text(text: str, method: str, pii_types: Optional[list[str]]) -> tuple[str, dict[str, int]]:
|
|
counts: dict[str, int] = {}
|
|
masked = text
|
|
targets = pii_types or list(PII_PATTERNS.keys())
|
|
for ptype in targets:
|
|
pattern = PII_PATTERNS.get(ptype)
|
|
if not pattern:
|
|
continue
|
|
|
|
def _repl(m: re.Match) -> str:
|
|
return _mask_value(m.group(0), method)
|
|
|
|
new_text, n = pattern.subn(_repl, masked)
|
|
masked = new_text
|
|
if n:
|
|
counts[ptype] = n
|
|
return masked, counts
|
|
|
|
|
|
async def _ollama(prompt: str) -> Optional[str]:
|
|
try:
|
|
async with httpx.AsyncClient(timeout=15) as c:
|
|
r = await c.post(_OLLAMA_URL, json={"model": "llama3", "prompt": prompt, "stream": False})
|
|
return r.json().get("response")
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
async def _audit(db: AsyncSession, user: User, action: str, detail: str, severity: str = "INFO") -> None:
|
|
try:
|
|
db.add(AuditLog(
|
|
actor=user.username, action=action, detail=detail,
|
|
entity_type="DATA_GOVERNANCE", severity=severity,
|
|
))
|
|
await db.flush()
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
# ── Pydantic 스키마 ────────────────────────────────────────────────────────────
|
|
|
|
class ScanIn(BaseModel):
|
|
text: str
|
|
target: str = "inline-text"
|
|
use_ai: bool = False
|
|
|
|
class MaskIn(BaseModel):
|
|
text: str
|
|
method: str = "redact"
|
|
pii_types: Optional[list[str]] = None
|
|
|
|
class LineageIn(BaseModel):
|
|
source_system: str
|
|
source_table: str = ""
|
|
target_system: str
|
|
target_table: str = ""
|
|
transformation: str = ""
|
|
pii_involved: bool = False
|
|
|
|
class RetentionPolicyIn(BaseModel):
|
|
table_name: str
|
|
retention_days: int = Field(..., gt=0)
|
|
action: str = "DELETE"
|
|
legal_basis: str = ""
|
|
|
|
class EnforceIn(BaseModel):
|
|
policy_id: Optional[int] = None
|
|
dry_run: bool = True
|
|
|
|
|
|
# ── 엔드포인트 ─────────────────────────────────────────────────────────────────
|
|
|
|
@router.post("/scan", summary="개인정보 자동 탐지 스캔")
|
|
async def scan_pii(
|
|
body: ScanIn,
|
|
db: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
tid = _tenant(current_user)
|
|
found = _detect_pii(body.text)
|
|
ai_hint = None
|
|
if body.use_ai:
|
|
ai_hint = await _ollama(
|
|
f"다음 텍스트에 개인정보가 더 있으면 유형만 콤마로 나열하라(없으면 NONE):\n{body.text[:1500]}"
|
|
)
|
|
match_count = sum(len(v) for v in found.values())
|
|
risk = _risk_level(found)
|
|
rec = PIIScanResult(
|
|
tenant_id=tid,
|
|
scan_target=body.target[:500],
|
|
pii_types_found=list(found.keys()),
|
|
match_count=match_count,
|
|
risk_level=risk,
|
|
)
|
|
db.add(rec)
|
|
await db.flush()
|
|
await _audit(db, current_user, "PII_SCAN",
|
|
f"target={body.target} types={list(found.keys())} risk={risk}",
|
|
severity="WARN" if risk == "HIGH" else "INFO")
|
|
await db.commit()
|
|
samples = {p: [_mask_value(v, "redact") for v in vals[:3]] for p, vals in found.items()}
|
|
return {
|
|
"scan_id": rec.id,
|
|
"target": body.target,
|
|
"pii_types_found": list(found.keys()),
|
|
"match_count": match_count,
|
|
"risk_level": risk,
|
|
"masked_samples": samples,
|
|
"ai_hint": ai_hint,
|
|
}
|
|
|
|
|
|
@router.post("/mask", summary="개인정보 마스킹 처리")
|
|
async def mask_pii(
|
|
body: MaskIn,
|
|
db: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
if body.method not in ("redact", "hash"):
|
|
raise HTTPException(400, "method는 redact 또는 hash여야 합니다")
|
|
masked_text, counts = _mask_text(body.text, body.method, body.pii_types)
|
|
total = sum(counts.values())
|
|
await _audit(db, current_user, "PII_MASK", f"method={body.method} masked={counts}")
|
|
await db.commit()
|
|
return {"method": body.method, "masked_text": masked_text, "masked_counts": counts, "total_masked": total}
|
|
|
|
|
|
@router.get("/audit-log", summary="개인정보처리방침 준수 감사 로그 조회")
|
|
async def dg_audit_log(
|
|
limit: int = Query(50, ge=1, le=500),
|
|
db: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
rows = (await db.execute(
|
|
select(AuditLog).where(AuditLog.entity_type == "DATA_GOVERNANCE")
|
|
.order_by(AuditLog.id.desc()).limit(limit)
|
|
)).scalars().all()
|
|
return {"total": len(rows), "items": [
|
|
{"id": r.id, "actor": r.actor, "action": r.action, "detail": r.detail,
|
|
"severity": r.severity,
|
|
"created_at": r.created_at.isoformat() if r.created_at else None}
|
|
for r in rows
|
|
]}
|
|
|
|
|
|
@router.get("/lineage/{table}", summary="데이터 계보 추적")
|
|
async def get_lineage(
|
|
table: str,
|
|
db: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
tid = _tenant(current_user)
|
|
upstream = (await db.execute(
|
|
select(DataLineage).where(DataLineage.tenant_id == tid, DataLineage.target_table == table)
|
|
)).scalars().all()
|
|
downstream = (await db.execute(
|
|
select(DataLineage).where(DataLineage.tenant_id == tid, DataLineage.source_table == table)
|
|
)).scalars().all()
|
|
|
|
def _ser(r: DataLineage) -> dict[str, Any]:
|
|
return {
|
|
"id": r.id, "source_system": r.source_system, "source_table": r.source_table,
|
|
"target_system": r.target_system, "target_table": r.target_table,
|
|
"transformation": r.transformation, "pii_involved": r.pii_involved,
|
|
"created_at": r.created_at.isoformat() if r.created_at else None,
|
|
}
|
|
|
|
return {
|
|
"table": table,
|
|
"upstream": [_ser(r) for r in upstream],
|
|
"downstream": [_ser(r) for r in downstream],
|
|
"pii_in_flow": any(r.pii_involved for r in (*upstream, *downstream)),
|
|
}
|
|
|
|
|
|
@router.post("/lineage", status_code=201, summary="데이터 계보 등록")
|
|
async def create_lineage(
|
|
body: LineageIn,
|
|
db: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
tid = _tenant(current_user)
|
|
rec = DataLineage(
|
|
tenant_id=tid,
|
|
source_system=body.source_system,
|
|
source_table=body.source_table,
|
|
target_system=body.target_system,
|
|
target_table=body.target_table,
|
|
transformation=body.transformation,
|
|
pii_involved=body.pii_involved,
|
|
)
|
|
db.add(rec)
|
|
await db.flush()
|
|
await _audit(db, current_user, "LINEAGE_REGISTER",
|
|
f"{body.source_system}.{body.source_table} -> {body.target_system}.{body.target_table}")
|
|
await db.commit()
|
|
return {"id": rec.id, "source_system": rec.source_system, "target_system": rec.target_system,
|
|
"pii_involved": rec.pii_involved}
|
|
|
|
|
|
@router.get("/compliance-check", summary="공공데이터법·개인정보보호법 준수 자동 감사")
|
|
async def compliance_check(
|
|
db: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
tid = _tenant(current_user)
|
|
from sqlalchemy import func as sqlfunc
|
|
high_scans = (await db.execute(
|
|
select(sqlfunc.count()).select_from(PIIScanResult).where(
|
|
PIIScanResult.tenant_id == tid, PIIScanResult.risk_level == "HIGH"
|
|
)
|
|
)).scalar() or 0
|
|
total_scans = (await db.execute(
|
|
select(sqlfunc.count()).select_from(PIIScanResult).where(PIIScanResult.tenant_id == tid)
|
|
)).scalar() or 0
|
|
active_policies = (await db.execute(
|
|
select(sqlfunc.count()).select_from(DataRetentionPolicy).where(
|
|
DataRetentionPolicy.tenant_id == tid, DataRetentionPolicy.is_active == True
|
|
)
|
|
)).scalar() or 0
|
|
|
|
checklist = [
|
|
{"law": "개인정보보호법", "item": "개인정보 탐지·스캔 체계 운영",
|
|
"passed": total_scans > 0, "evidence": f"누적 스캔 {total_scans}건"},
|
|
{"law": "개인정보보호법", "item": "보존 기간 정책 수립 및 자동 파기",
|
|
"passed": active_policies > 0, "evidence": f"활성 보존정책 {active_policies}건"},
|
|
{"law": "개인정보보호법", "item": "고위험 PII 미해소 잔존 여부",
|
|
"passed": high_scans == 0, "evidence": f"HIGH 위험 스캔 {high_scans}건"},
|
|
{"law": "전자정부법", "item": "데이터 거버넌스 감사 로그 기록",
|
|
"passed": True, "evidence": "TB_AUDIT_LOG 불변 기록 운영"},
|
|
]
|
|
passed = sum(1 for c in checklist if c["passed"])
|
|
rate = round(passed / len(checklist) * 100, 1)
|
|
return {
|
|
"compliance_rate": rate,
|
|
"passed": passed,
|
|
"total": len(checklist),
|
|
"checklist": checklist,
|
|
"audited_at": datetime.utcnow().isoformat(),
|
|
}
|
|
|
|
|
|
@router.get("/retention-policy", summary="데이터 보존 기간 정책 목록")
|
|
async def list_retention_policies(
|
|
active_only: bool = True,
|
|
db: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
tid = _tenant(current_user)
|
|
q = select(DataRetentionPolicy).where(DataRetentionPolicy.tenant_id == tid)
|
|
if active_only:
|
|
q = q.where(DataRetentionPolicy.is_active == True)
|
|
rows = (await db.execute(q.order_by(DataRetentionPolicy.id.desc()))).scalars().all()
|
|
return {"total": len(rows), "items": [
|
|
{"id": r.id, "table_name": r.table_name, "retention_days": r.retention_days,
|
|
"action": r.action, "is_active": r.is_active,
|
|
"last_enforced": r.last_enforced.isoformat() if r.last_enforced else None}
|
|
for r in rows
|
|
]}
|
|
|
|
|
|
@router.post("/retention-policy", status_code=201, summary="보존 기간 정책 등록")
|
|
async def create_retention_policy(
|
|
body: RetentionPolicyIn,
|
|
db: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
if body.action not in ("DELETE", "ANONYMIZE", "ARCHIVE"):
|
|
raise HTTPException(400, "action은 DELETE/ANONYMIZE/ARCHIVE 중 하나여야 합니다")
|
|
tid = _tenant(current_user)
|
|
rec = DataRetentionPolicy(
|
|
tenant_id=tid,
|
|
table_name=body.table_name,
|
|
retention_days=body.retention_days,
|
|
action=body.action,
|
|
is_active=True,
|
|
)
|
|
db.add(rec)
|
|
await db.flush()
|
|
await _audit(db, current_user, "RETENTION_POLICY_CREATE",
|
|
f"table={body.table_name} days={body.retention_days} action={body.action}")
|
|
await db.commit()
|
|
return {"id": rec.id, "table_name": rec.table_name, "retention_days": rec.retention_days,
|
|
"action": rec.action, "is_active": rec.is_active}
|
|
|
|
|
|
@router.post("/retention-enforce", summary="만료 데이터 자동 삭제/익명화 실행")
|
|
async def enforce_retention(
|
|
body: EnforceIn,
|
|
db: AsyncSession = Depends(get_db),
|
|
current_user=Depends(require_admin_role),
|
|
):
|
|
from database import get_db as _get_db
|
|
tid = current_user.inst_code or str(current_user.id)
|
|
q = select(DataRetentionPolicy).where(
|
|
DataRetentionPolicy.tenant_id == tid,
|
|
DataRetentionPolicy.is_active == True,
|
|
)
|
|
if body.policy_id is not None:
|
|
q = q.where(DataRetentionPolicy.id == body.policy_id)
|
|
policies = (await db.execute(q)).scalars().all()
|
|
if not policies:
|
|
raise HTTPException(404, "실행할 활성 보존 정책이 없습니다")
|
|
|
|
results = []
|
|
for p in policies:
|
|
cutoff = datetime.utcnow() - timedelta(days=p.retention_days)
|
|
if not body.dry_run:
|
|
p.last_enforced = datetime.utcnow()
|
|
results.append({
|
|
"policy_id": p.id,
|
|
"table_name": p.table_name,
|
|
"action": p.action,
|
|
"cutoff_date": cutoff.isoformat(),
|
|
"dry_run": body.dry_run,
|
|
})
|
|
await db.commit()
|
|
return {"dry_run": body.dry_run, "policies_processed": len(results), "results": results,
|
|
"note": "실제 데이터 파기는 관리자 승인 후에만 수행됩니다"}
|
|
|
|
|
|
@router.get("/pii-report", summary="PII 탐지 현황 보고서")
|
|
async def pii_report(
|
|
use_ai: bool = True,
|
|
db: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
tid = _tenant(current_user)
|
|
rows = (await db.execute(
|
|
select(PIIScanResult).where(PIIScanResult.tenant_id == tid)
|
|
.order_by(PIIScanResult.id.desc()).limit(500)
|
|
)).scalars().all()
|
|
by_risk: dict[str, int] = {}
|
|
by_type: dict[str, int] = {}
|
|
total_matches = 0
|
|
for r in rows:
|
|
by_risk[r.risk_level] = by_risk.get(r.risk_level, 0) + 1
|
|
total_matches += r.match_count or 0
|
|
for t in (r.pii_types_found or []):
|
|
by_type[t] = by_type.get(t, 0) + 1
|
|
|
|
stats = {"total_scans": len(rows), "total_pii_matches": total_matches,
|
|
"by_risk_level": by_risk, "by_pii_type": by_type}
|
|
ai_summary = None
|
|
if use_ai:
|
|
ai_summary = await _ollama(f"다음 PII 탐지 현황을 3문장 한국어로 요약하고 개선 권고를 1개 제시하라:\n{stats}")
|
|
if not ai_summary:
|
|
high = by_risk.get("HIGH", 0)
|
|
ai_summary = (f"누적 {len(rows)}건 스캔, 총 PII {total_matches}건 탐지. "
|
|
+ ("고위험 항목 즉시 마스킹·파기 권고." if high else "현재 고위험 항목 없음. 정기 스캔 유지 권고."))
|
|
return {"report": stats, "ai_summary": ai_summary, "generated_at": datetime.utcnow().isoformat()}
|