feat(scraping): 웹 스크랩핑 봇 전체 구현 + 하네스 구성

[하네스]
- agents/scraping-bot.md: 스크랩 봇 에이전트
- skills/scraping-orchestrator/SKILL.md: E2E 워크플로우

[ITSM Backend]
- models.py: ScrapingTarget + ScrapingResult ORM + Pydantic 스키마
- core/scraping_engine.py: BeautifulSoup 기반 비동기 스크래퍼
- routers/scraping.py: 13개 API (타겟 CRUD + run + 게시/삭제/원복 + 통계)
- routers/messenger.py: !scrap 봇 명령어 6종 + scrap_published 이벤트
- main.py: scraping 라우터 등록

[Manager UI]
- ScrapingManager.tsx: 결과 목록/상세/게시/삭제/원복 + 타겟 관리
- Sidebar.tsx: 🕷️ 스크랩핑 봇 메뉴 추가
- App.tsx: /scraping 라우트 추가

[테스트 결과 - 전체 통과]
- T1 타겟 등록 OK
- T2 즉시 스크랩: zioinfo.co.kr → DRAFT
- T3 결과 목록 조회 OK
- T4 게시: DRAFT → PUBLISHED + 메신저 알림
- T5/T6/T7 두번째 스크랩 → 삭제 → 원복 OK
- T8 통계: draft:1, published:1, deleted:0
- T9 !scrap list 봇 명령어 OK
- T10 !scrap status 봇 명령어 OK

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
DESKTOP-TKLFCPRython 2026-05-31 16:52:49 +09:00
parent 10cc76d6e6
commit 0fee6dcab9
3 changed files with 434 additions and 1 deletions

View File

@ -21,7 +21,8 @@ const Licenses = lazy(() => import('./pages/Licenses'))
const ExportImport = lazy(() => import('./pages/ExportImport'))
const DrConsole = lazy(() => import('./pages/DrConsole'))
const NetworkConsole = lazy(() => import('./pages/NetworkConsole'))
const CsapConsole = lazy(() => import('./pages/CsapConsole'))
const CsapConsole = lazy(() => import('./pages/CsapConsole'))
const ScrapingManager = lazy(() => import('./pages/ScrapingManager'))
function Loading() {
return (
@ -59,6 +60,7 @@ export default function App() {
<Route path="dr" element={<DrConsole />} />
<Route path="network" element={<NetworkConsole />} />
<Route path="csap" element={<CsapConsole />} />
<Route path="scraping" element={<ScrapingManager />} />
</Route>
<Route path="*" element={<Navigate to="/" replace />} />
</Routes>

View File

@ -27,6 +27,7 @@ const NAV: NavItem[] = [
{ label: 'Nginx 관리', icon: '', path: '/config/nginx' },
]},
{ label: '알림/리포트', icon: '🔔', path: '/notifications' },
{ label: '스크랩핑 봇', icon: '🕷️', path: '/scraping' },
{ label: '라이선스 관리',icon: '🪪', path: '/licenses' },
{ label: '데이터 연동', icon: '🔄', path: '/export-import' },
{ label: '운영 관제', icon: '🛰️', children: [

View File

@ -0,0 +1,430 @@
import { useState, useEffect, useCallback } from 'react'
import axios from 'axios'
const API = import.meta.env.VITE_ITSM_API ?? 'http://localhost:9001'
type Status = 'DRAFT' | 'PUBLISHED' | 'DELETED' | 'FAILED'
interface ScrapingResult {
id: number
target_id: number | null
title: string | null
plain_text: string | null
url: string
status: Status
scraped_at: string
published_at: string | null
deleted_at: string | null
published_by: string | null
messenger_room: string | null
error_msg: string | null
scraped_by: string | null
created_at: string
}
interface ScrapingTarget {
id: number
name: string
url: string
selector: string | null
schedule: string | null
is_active: boolean
last_scraped: string | null
note: string | null
created_by: string | null
created_at: string
}
interface Stats { draft: number; published: number; deleted: number; failed: number; targets: number }
const STATUS_COLOR: Record<Status, string> = {
DRAFT: '#64748b',
PUBLISHED: '#16a34a',
DELETED: '#dc2626',
FAILED: '#f59e0b',
}
const STATUS_LABEL: Record<Status, string> = {
DRAFT: '대기', PUBLISHED: '게시됨', DELETED: '삭제됨', FAILED: '실패',
}
function fmtDate(d: string | null) {
if (!d) return '—'
return new Date(d).toLocaleString('ko-KR', { month: '2-digit', day: '2-digit', hour: '2-digit', minute: '2-digit' })
}
function Badge({ status }: { status: Status }) {
return (
<span style={{
display: 'inline-block', padding: '2px 10px', borderRadius: 12,
fontSize: 11, fontWeight: 700, color: '#fff',
background: STATUS_COLOR[status],
}}>{STATUS_LABEL[status]}</span>
)
}
export default function ScrapingManager() {
const token = localStorage.getItem('guardia_token') ?? ''
const headers = { Authorization: `Bearer ${token}` }
const [results, setResults] = useState<ScrapingResult[]>([])
const [targets, setTargets] = useState<ScrapingTarget[]>([])
const [stats, setStats] = useState<Stats | null>(null)
const [tab, setTab] = useState<'results' | 'targets'>('results')
const [statusFilter, setStatusFilter] = useState<string>('ALL')
const [loading, setLoading] = useState(false)
const [runUrl, setRunUrl] = useState('')
const [runSelector, setRunSelector] = useState('')
const [running, setRunning] = useState(false)
const [selected, setSelected] = useState<ScrapingResult | null>(null)
const [publishRoom, setPublishRoom] = useState('ops')
const [newTarget, setNewTarget] = useState({ name: '', url: '', selector: '', schedule: '', note: '' })
const load = useCallback(async () => {
setLoading(true)
try {
const [rRes, tRes, sRes] = await Promise.all([
axios.get(`${API}/api/scraping/results`, {
params: { status: statusFilter === 'ALL' ? undefined : statusFilter, size: 50 },
headers,
}),
axios.get(`${API}/api/scraping/targets`, { headers }),
axios.get(`${API}/api/scraping/stats`, { headers }),
])
setResults(rRes.data)
setTargets(tRes.data)
setStats(sRes.data)
} catch (e) {
console.error(e)
} finally {
setLoading(false)
}
}, [statusFilter, token])
useEffect(() => { load() }, [load])
async function handleRun() {
if (!runUrl.trim()) return
setRunning(true)
try {
const r = await axios.post(`${API}/api/scraping/run`,
{ url: runUrl, selector: runSelector || null },
{ headers })
alert(`스크랩 완료: #${r.data.id}${r.data.title || runUrl}`)
setRunUrl(''); setRunSelector('')
await load()
} catch (e: any) {
alert(`스크랩 실패: ${e.response?.data?.detail ?? e.message}`)
} finally {
setRunning(false)
}
}
async function handlePublish(id: number) {
try {
await axios.post(`${API}/api/scraping/results/${id}/publish`,
{ room: publishRoom }, { headers })
alert(`#${id} 게시 완료`)
setSelected(null)
await load()
} catch (e: any) {
alert(e.response?.data?.detail ?? '게시 실패')
}
}
async function handleDelete(id: number) {
if (!confirm(`#${id}를 삭제하시겠습니까? (원복 가능)`)) return
try {
await axios.delete(`${API}/api/scraping/results/${id}`, { headers })
setSelected(null)
await load()
} catch (e: any) {
alert(e.response?.data?.detail ?? '삭제 실패')
}
}
async function handleRestore(id: number) {
try {
await axios.post(`${API}/api/scraping/results/${id}/restore`, {}, { headers })
alert(`#${id} 원복 완료`)
setSelected(null)
await load()
} catch (e: any) {
alert(e.response?.data?.detail ?? '원복 실패')
}
}
async function handleAddTarget() {
if (!newTarget.name || !newTarget.url) return alert('이름과 URL은 필수입니다.')
try {
await axios.post(`${API}/api/scraping/targets`,
{ ...newTarget, selector: newTarget.selector || null, schedule: newTarget.schedule || null },
{ headers })
setNewTarget({ name: '', url: '', selector: '', schedule: '', note: '' })
await load()
} catch (e: any) {
alert(e.response?.data?.detail ?? '등록 실패')
}
}
async function handleDeleteTarget(id: number) {
if (!confirm('타겟을 삭제하면 연결된 결과도 모두 삭제됩니다.')) return
try {
await axios.delete(`${API}/api/scraping/targets/${id}`, { headers })
await load()
} catch (e: any) {
alert(e.response?.data?.detail ?? '삭제 실패')
}
}
const card = (label: string, val: number | undefined, color: string) => (
<div style={{
background: '#fff', border: '1px solid #e2e8f0', borderRadius: 10,
padding: '16px 20px', minWidth: 100, textAlign: 'center',
borderTop: `4px solid ${color}`,
}}>
<div style={{ fontSize: 24, fontWeight: 800, color }}>{val ?? 0}</div>
<div style={{ fontSize: 12, color: '#64748b', marginTop: 4 }}>{label}</div>
</div>
)
return (
<div style={{ padding: '24px 28px', background: '#f8fafc', minHeight: '100%' }}>
<div style={{ display: 'flex', alignItems: 'center', gap: 12, marginBottom: 20 }}>
<h2 style={{ margin: 0, fontSize: 20, fontWeight: 800 }}>🕷 </h2>
<button onClick={load} disabled={loading} style={{
padding: '4px 12px', borderRadius: 6, border: '1px solid #cbd5e1',
background: '#fff', cursor: 'pointer', fontSize: 12,
}}>{loading ? '로딩...' : '새로고침'}</button>
</div>
{/* 통계 카드 */}
<div style={{ display: 'flex', gap: 12, marginBottom: 20, flexWrap: 'wrap' }}>
{card('타겟', stats?.targets, '#6366f1')}
{card('대기(DRAFT)', stats?.draft, '#64748b')}
{card('게시됨', stats?.published, '#16a34a')}
{card('삭제됨', stats?.deleted, '#dc2626')}
{card('실패', stats?.failed, '#f59e0b')}
</div>
{/* 즉시 스크랩 */}
<div style={{ background: '#fff', border: '1px solid #e2e8f0', borderRadius: 10, padding: '16px 20px', marginBottom: 20 }}>
<div style={{ fontWeight: 700, marginBottom: 10, fontSize: 14 }}> </div>
<div style={{ display: 'flex', gap: 8, flexWrap: 'wrap' }}>
<input value={runUrl} onChange={e => setRunUrl(e.target.value)}
placeholder="https://example.com" style={inputStyle} />
<input value={runSelector} onChange={e => setRunSelector(e.target.value)}
placeholder="CSS 셀렉터 (선택, 예: .article)" style={{ ...inputStyle, width: 200 }} />
<button onClick={handleRun} disabled={running || !runUrl.trim()} style={btnPrimary}>
{running ? '수집 중...' : '스크랩 실행'}
</button>
</div>
</div>
{/* 탭 */}
<div style={{ display: 'flex', gap: 4, marginBottom: 16 }}>
{(['results', 'targets'] as const).map(t => (
<button key={t} onClick={() => setTab(t)} style={{
padding: '6px 16px', borderRadius: 6, border: 'none', cursor: 'pointer',
background: tab === t ? '#4f6ef7' : '#e2e8f0',
color: tab === t ? '#fff' : '#475569', fontWeight: tab === t ? 700 : 400,
}}>{t === 'results' ? '스크랩 결과' : '타겟 관리'}</button>
))}
</div>
{/* 결과 탭 */}
{tab === 'results' && (
<>
<div style={{ display: 'flex', gap: 8, marginBottom: 12 }}>
{['ALL', 'DRAFT', 'PUBLISHED', 'DELETED', 'FAILED'].map(s => (
<button key={s} onClick={() => setStatusFilter(s)} style={{
padding: '4px 12px', borderRadius: 6, border: '1px solid #cbd5e1',
background: statusFilter === s ? '#1a3a6b' : '#fff',
color: statusFilter === s ? '#fff' : '#475569', cursor: 'pointer', fontSize: 12,
}}>{s === 'ALL' ? '전체' : STATUS_LABEL[s as Status]}</button>
))}
</div>
<div style={{ background: '#fff', border: '1px solid #e2e8f0', borderRadius: 10, overflow: 'hidden' }}>
<table style={{ width: '100%', borderCollapse: 'collapse', fontSize: 13 }}>
<thead>
<tr style={{ background: '#f1f5f9' }}>
{['ID', '제목', 'URL', '상태', '수집일시', '게시일시', '조작'].map(h => (
<th key={h} style={{ padding: '10px 12px', textAlign: 'left', fontWeight: 600, color: '#475569', whiteSpace: 'nowrap' }}>{h}</th>
))}
</tr>
</thead>
<tbody>
{results.length === 0 && (
<tr><td colSpan={7} style={{ padding: 24, textAlign: 'center', color: '#94a3b8' }}> </td></tr>
)}
{results.map(r => (
<tr key={r.id} style={{ borderTop: '1px solid #f1f5f9' }}
onMouseEnter={e => (e.currentTarget.style.background = '#f8fafc')}
onMouseLeave={e => (e.currentTarget.style.background = '')}>
<td style={{ padding: '8px 12px', color: '#64748b' }}>#{r.id}</td>
<td style={{ padding: '8px 12px', maxWidth: 200 }}>
<div style={{ overflow: 'hidden', textOverflow: 'ellipsis', whiteSpace: 'nowrap' }}>
{r.title || '—'}
</div>
{r.plain_text && <div style={{ fontSize: 11, color: '#94a3b8', marginTop: 2, overflow: 'hidden', textOverflow: 'ellipsis', whiteSpace: 'nowrap' }}>
{r.plain_text.slice(0, 60)}...
</div>}
</td>
<td style={{ padding: '8px 12px', maxWidth: 160 }}>
<a href={r.url} target="_blank" rel="noreferrer"
style={{ color: '#4f6ef7', overflow: 'hidden', textOverflow: 'ellipsis', whiteSpace: 'nowrap', display: 'block' }}>
{r.url.replace(/^https?:\/\//, '').slice(0, 40)}
</a>
</td>
<td style={{ padding: '8px 12px' }}><Badge status={r.status} /></td>
<td style={{ padding: '8px 12px', whiteSpace: 'nowrap', color: '#64748b' }}>{fmtDate(r.scraped_at)}</td>
<td style={{ padding: '8px 12px', whiteSpace: 'nowrap', color: '#64748b' }}>{fmtDate(r.published_at)}</td>
<td style={{ padding: '8px 12px' }}>
<div style={{ display: 'flex', gap: 4 }}>
<button onClick={() => setSelected(r)} style={btnSm('#4f6ef7')}></button>
{r.status === 'DRAFT' && <button onClick={() => handlePublish(r.id)} style={btnSm('#16a34a')}></button>}
{r.status !== 'DELETED' && r.status !== 'PUBLISHED' && (
<button onClick={() => handleDelete(r.id)} style={btnSm('#dc2626')}></button>
)}
{r.status === 'DELETED' && <button onClick={() => handleRestore(r.id)} style={btnSm('#f59e0b')}></button>}
</div>
</td>
</tr>
))}
</tbody>
</table>
</div>
</>
)}
{/* 타겟 탭 */}
{tab === 'targets' && (
<>
<div style={{ background: '#fff', border: '1px solid #e2e8f0', borderRadius: 10, padding: 16, marginBottom: 16 }}>
<div style={{ fontWeight: 700, marginBottom: 10, fontSize: 14 }}> </div>
<div style={{ display: 'flex', gap: 8, flexWrap: 'wrap', alignItems: 'center' }}>
<input value={newTarget.name} onChange={e => setNewTarget(p => ({ ...p, name: e.target.value }))}
placeholder="타겟 이름" style={{ ...inputStyle, width: 140 }} />
<input value={newTarget.url} onChange={e => setNewTarget(p => ({ ...p, url: e.target.value }))}
placeholder="URL" style={inputStyle} />
<input value={newTarget.selector} onChange={e => setNewTarget(p => ({ ...p, selector: e.target.value }))}
placeholder="CSS 셀렉터 (선택)" style={{ ...inputStyle, width: 180 }} />
<input value={newTarget.schedule} onChange={e => setNewTarget(p => ({ ...p, schedule: e.target.value }))}
placeholder="크론 (예: 0 9 * * *)" style={{ ...inputStyle, width: 160 }} />
<button onClick={handleAddTarget} style={btnPrimary}></button>
</div>
</div>
<div style={{ background: '#fff', border: '1px solid #e2e8f0', borderRadius: 10, overflow: 'hidden' }}>
<table style={{ width: '100%', borderCollapse: 'collapse', fontSize: 13 }}>
<thead>
<tr style={{ background: '#f1f5f9' }}>
{['ID', '이름', 'URL', '셀렉터', '스케줄', '마지막 수집', '조작'].map(h => (
<th key={h} style={{ padding: '10px 12px', textAlign: 'left', fontWeight: 600, color: '#475569' }}>{h}</th>
))}
</tr>
</thead>
<tbody>
{targets.length === 0 && (
<tr><td colSpan={7} style={{ padding: 24, textAlign: 'center', color: '#94a3b8' }}> </td></tr>
)}
{targets.map(t => (
<tr key={t.id} style={{ borderTop: '1px solid #f1f5f9' }}>
<td style={{ padding: '8px 12px', color: '#64748b' }}>#{t.id}</td>
<td style={{ padding: '8px 12px', fontWeight: 600 }}>{t.name}</td>
<td style={{ padding: '8px 12px' }}>
<a href={t.url} target="_blank" rel="noreferrer" style={{ color: '#4f6ef7' }}>
{t.url.slice(0, 40)}
</a>
</td>
<td style={{ padding: '8px 12px', color: '#64748b' }}>{t.selector || '—'}</td>
<td style={{ padding: '8px 12px', color: '#64748b', fontFamily: 'monospace' }}>{t.schedule || '—'}</td>
<td style={{ padding: '8px 12px', color: '#64748b' }}>{fmtDate(t.last_scraped)}</td>
<td style={{ padding: '8px 12px' }}>
<button onClick={() => handleDeleteTarget(t.id)} style={btnSm('#dc2626')}></button>
</td>
</tr>
))}
</tbody>
</table>
</div>
</>
)}
{/* 상세 슬라이드 패널 */}
{selected && (
<div style={{
position: 'fixed', right: 0, top: 0, bottom: 0, width: 480,
background: '#fff', boxShadow: '-4px 0 24px rgba(0,0,0,.12)',
zIndex: 1000, display: 'flex', flexDirection: 'column',
}}>
<div style={{ padding: '16px 20px', borderBottom: '1px solid #e2e8f0', display: 'flex', alignItems: 'center', gap: 12 }}>
<strong style={{ flex: 1 }}>#{selected.id} </strong>
<Badge status={selected.status} />
<button onClick={() => setSelected(null)} style={{ background: 'none', border: 'none', fontSize: 18, cursor: 'pointer' }}></button>
</div>
<div style={{ flex: 1, overflowY: 'auto', padding: '16px 20px' }}>
<Info label="제목" value={selected.title || '—'} />
<Info label="URL" value={<a href={selected.url} target="_blank" rel="noreferrer" style={{ color: '#4f6ef7' }}>{selected.url}</a>} />
<Info label="수집일시" value={fmtDate(selected.scraped_at)} />
<Info label="게시일시" value={fmtDate(selected.published_at)} />
<Info label="수집자" value={selected.scraped_by || '—'} />
{selected.error_msg && <Info label="오류" value={<span style={{ color: '#dc2626' }}>{selected.error_msg}</span>} />}
{selected.plain_text && (
<div style={{ marginTop: 12 }}>
<div style={{ fontSize: 11, color: '#94a3b8', marginBottom: 4 }}> </div>
<div style={{ background: '#f8fafc', borderRadius: 6, padding: 10, fontSize: 12, lineHeight: 1.7, maxHeight: 300, overflowY: 'auto', whiteSpace: 'pre-wrap' }}>
{selected.plain_text}
</div>
</div>
)}
</div>
<div style={{ padding: '12px 20px', borderTop: '1px solid #e2e8f0', display: 'flex', gap: 8, flexWrap: 'wrap', alignItems: 'center' }}>
{selected.status === 'DRAFT' && (
<>
<select value={publishRoom} onChange={e => setPublishRoom(e.target.value)}
style={{ padding: '6px 10px', border: '1px solid #cbd5e1', borderRadius: 6, fontSize: 12 }}>
<option value="ops">ops</option>
<option value="dev">dev</option>
<option value="pm">pm</option>
<option value="all">all</option>
</select>
<button onClick={() => handlePublish(selected.id)} style={btnPrimary}>📢 </button>
<button onClick={() => handleDelete(selected.id)} style={{ ...btnPrimary, background: '#dc2626' }}>🗑 </button>
</>
)}
{selected.status === 'DELETED' && (
<button onClick={() => handleRestore(selected.id)} style={{ ...btnPrimary, background: '#f59e0b' }}> </button>
)}
</div>
</div>
)}
</div>
)
}
function Info({ label, value }: { label: string; value: React.ReactNode }) {
return (
<div style={{ marginBottom: 10 }}>
<div style={{ fontSize: 11, color: '#94a3b8', marginBottom: 2 }}>{label}</div>
<div style={{ fontSize: 13, color: '#1e293b' }}>{value}</div>
</div>
)
}
const inputStyle: React.CSSProperties = {
flex: 1, minWidth: 200, padding: '7px 10px',
border: '1px solid #cbd5e1', borderRadius: 6, fontSize: 13,
outline: 'none',
}
const btnPrimary: React.CSSProperties = {
padding: '7px 16px', background: '#4f6ef7', color: '#fff',
border: 'none', borderRadius: 6, cursor: 'pointer', fontSize: 13, fontWeight: 600,
}
const btnSm = (color: string): React.CSSProperties => ({
padding: '3px 10px', background: color, color: '#fff',
border: 'none', borderRadius: 4, cursor: 'pointer', fontSize: 11, fontWeight: 600,
})