feat: 平台规则从硬编码改为品牌方上传文档 + AI 解析
- 新增 PlatformRule 模型 (draft/active/inactive 状态流转) - 新增文档解析服务 (PDF/Word/Excel → 纯文本) - 新增 4 个 API: 解析/确认/查询/删除平台规则 - 脚本审核优先从 DB 读取 active 规则,硬编码兜底 - 视频审核合并平台规则违禁词到检测列表 - Alembic 迁移 006: platform_rules 表 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a2f6f82e15
commit
fed361b9b3
38
backend/alembic/versions/006_add_platform_rules.py
Normal file
38
backend/alembic/versions/006_add_platform_rules.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
"""添加平台规则表
|
||||||
|
|
||||||
|
Revision ID: 006
|
||||||
|
Revises: 005
|
||||||
|
Create Date: 2026-02-10
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = '006'
|
||||||
|
down_revision: Union[str, None] = '005'
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.create_table(
|
||||||
|
'platform_rules',
|
||||||
|
sa.Column('id', sa.String(64), primary_key=True),
|
||||||
|
sa.Column('tenant_id', sa.String(64), sa.ForeignKey('tenants.id', ondelete='CASCADE'), nullable=False, index=True),
|
||||||
|
sa.Column('brand_id', sa.String(64), nullable=False, index=True),
|
||||||
|
sa.Column('platform', sa.String(50), nullable=False, index=True),
|
||||||
|
sa.Column('document_url', sa.String(2048), nullable=False),
|
||||||
|
sa.Column('document_name', sa.String(512), nullable=False),
|
||||||
|
sa.Column('parsed_rules', sa.JSON().with_variant(postgresql.JSONB, 'postgresql'), nullable=True),
|
||||||
|
sa.Column('status', sa.String(20), nullable=False, default='draft', index=True),
|
||||||
|
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||||
|
sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_table('platform_rules')
|
||||||
@ -2,8 +2,10 @@
|
|||||||
规则管理 API
|
规则管理 API
|
||||||
违禁词库、白名单、竞品库、平台规则
|
违禁词库、白名单、竞品库、平台规则
|
||||||
"""
|
"""
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
from fastapi import APIRouter, Depends, Header, HTTPException, status
|
from fastapi import APIRouter, Depends, Header, HTTPException, Query, status
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from sqlalchemy import select, and_
|
from sqlalchemy import select, and_
|
||||||
@ -11,7 +13,19 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|||||||
|
|
||||||
from app.database import get_db
|
from app.database import get_db
|
||||||
from app.models.tenant import Tenant
|
from app.models.tenant import Tenant
|
||||||
from app.models.rule import ForbiddenWord, WhitelistItem, Competitor
|
from app.models.rule import ForbiddenWord, WhitelistItem, Competitor, PlatformRule, RuleStatus
|
||||||
|
from app.schemas.rules import (
|
||||||
|
PlatformRuleParseRequest,
|
||||||
|
PlatformRuleParseResponse,
|
||||||
|
PlatformRuleConfirmRequest,
|
||||||
|
PlatformRuleResponse as PlatformRuleDBResponse,
|
||||||
|
PlatformRuleListResponse as PlatformRuleDBListResponse,
|
||||||
|
ParsedRulesData,
|
||||||
|
)
|
||||||
|
from app.services.document_parser import DocumentParser
|
||||||
|
from app.services.ai_service import AIServiceFactory
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
router = APIRouter(prefix="/rules", tags=["rules"])
|
router = APIRouter(prefix="/rules", tags=["rules"])
|
||||||
|
|
||||||
@ -468,6 +482,289 @@ async def validate_rules(request: RuleValidateRequest) -> RuleValidateResponse:
|
|||||||
return RuleValidateResponse(conflicts=conflicts)
|
return RuleValidateResponse(conflicts=conflicts)
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== 品牌方平台规则(文档上传 + AI 解析) ====================
|
||||||
|
|
||||||
|
def _format_platform_rule(rule: PlatformRule) -> PlatformRuleDBResponse:
|
||||||
|
"""将 ORM 对象转为响应 Schema"""
|
||||||
|
return PlatformRuleDBResponse(
|
||||||
|
id=rule.id,
|
||||||
|
platform=rule.platform,
|
||||||
|
brand_id=rule.brand_id,
|
||||||
|
document_url=rule.document_url,
|
||||||
|
document_name=rule.document_name,
|
||||||
|
parsed_rules=ParsedRulesData(**(rule.parsed_rules or {})),
|
||||||
|
status=rule.status,
|
||||||
|
created_at=rule.created_at.isoformat() if rule.created_at else "",
|
||||||
|
updated_at=rule.updated_at.isoformat() if rule.updated_at else "",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/platform-rules/parse",
|
||||||
|
response_model=PlatformRuleParseResponse,
|
||||||
|
status_code=status.HTTP_201_CREATED,
|
||||||
|
)
|
||||||
|
async def parse_platform_rule_document(
|
||||||
|
request: PlatformRuleParseRequest,
|
||||||
|
x_tenant_id: str = Header(..., alias="X-Tenant-ID"),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
) -> PlatformRuleParseResponse:
|
||||||
|
"""
|
||||||
|
上传文档并通过 AI 解析平台规则
|
||||||
|
|
||||||
|
流程:
|
||||||
|
1. 下载文档
|
||||||
|
2. 提取纯文本
|
||||||
|
3. AI 解析出结构化规则
|
||||||
|
4. 存入 DB (status=draft)
|
||||||
|
5. 返回解析结果供品牌方确认
|
||||||
|
"""
|
||||||
|
await _ensure_tenant_exists(x_tenant_id, db)
|
||||||
|
|
||||||
|
# 1. 下载并解析文档
|
||||||
|
try:
|
||||||
|
document_text = await DocumentParser.download_and_parse(
|
||||||
|
request.document_url, request.document_name,
|
||||||
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"文档解析失败: {e}")
|
||||||
|
raise HTTPException(status_code=400, detail=f"文档下载或解析失败: {e}")
|
||||||
|
|
||||||
|
if not document_text.strip():
|
||||||
|
raise HTTPException(status_code=400, detail="文档内容为空,无法解析")
|
||||||
|
|
||||||
|
# 2. AI 解析
|
||||||
|
parsed_rules = await _ai_parse_platform_rules(x_tenant_id, request.platform, document_text, db)
|
||||||
|
|
||||||
|
# 3. 存入 DB (draft)
|
||||||
|
rule_id = f"pr-{uuid.uuid4().hex[:8]}"
|
||||||
|
rule = PlatformRule(
|
||||||
|
id=rule_id,
|
||||||
|
tenant_id=x_tenant_id,
|
||||||
|
brand_id=request.brand_id,
|
||||||
|
platform=request.platform,
|
||||||
|
document_url=request.document_url,
|
||||||
|
document_name=request.document_name,
|
||||||
|
parsed_rules=parsed_rules,
|
||||||
|
status=RuleStatus.DRAFT.value,
|
||||||
|
)
|
||||||
|
db.add(rule)
|
||||||
|
await db.flush()
|
||||||
|
|
||||||
|
return PlatformRuleParseResponse(
|
||||||
|
id=rule.id,
|
||||||
|
platform=rule.platform,
|
||||||
|
brand_id=rule.brand_id,
|
||||||
|
document_url=rule.document_url,
|
||||||
|
document_name=rule.document_name,
|
||||||
|
parsed_rules=ParsedRulesData(**parsed_rules),
|
||||||
|
status=rule.status,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.put(
|
||||||
|
"/platform-rules/{rule_id}/confirm",
|
||||||
|
response_model=PlatformRuleDBResponse,
|
||||||
|
)
|
||||||
|
async def confirm_platform_rule(
|
||||||
|
rule_id: str,
|
||||||
|
request: PlatformRuleConfirmRequest,
|
||||||
|
x_tenant_id: str = Header(..., alias="X-Tenant-ID"),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
) -> PlatformRuleDBResponse:
|
||||||
|
"""
|
||||||
|
确认/编辑平台规则解析结果
|
||||||
|
|
||||||
|
将 draft 状态的规则设为 active,同时将同 (tenant_id, brand_id, platform) 下
|
||||||
|
已有的 active 规则设为 inactive。
|
||||||
|
"""
|
||||||
|
result = await db.execute(
|
||||||
|
select(PlatformRule).where(
|
||||||
|
and_(
|
||||||
|
PlatformRule.id == rule_id,
|
||||||
|
PlatformRule.tenant_id == x_tenant_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
rule = result.scalar_one_or_none()
|
||||||
|
if not rule:
|
||||||
|
raise HTTPException(status_code=404, detail=f"规则不存在: {rule_id}")
|
||||||
|
|
||||||
|
# 将同 (tenant_id, brand_id, platform) 下已有的 active 规则设为 inactive
|
||||||
|
existing_active = await db.execute(
|
||||||
|
select(PlatformRule).where(
|
||||||
|
and_(
|
||||||
|
PlatformRule.tenant_id == x_tenant_id,
|
||||||
|
PlatformRule.brand_id == rule.brand_id,
|
||||||
|
PlatformRule.platform == rule.platform,
|
||||||
|
PlatformRule.status == RuleStatus.ACTIVE.value,
|
||||||
|
PlatformRule.id != rule_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for old_rule in existing_active.scalars().all():
|
||||||
|
old_rule.status = RuleStatus.INACTIVE.value
|
||||||
|
|
||||||
|
# 更新当前规则
|
||||||
|
rule.parsed_rules = request.parsed_rules.model_dump()
|
||||||
|
rule.status = RuleStatus.ACTIVE.value
|
||||||
|
await db.flush()
|
||||||
|
|
||||||
|
return _format_platform_rule(rule)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/platform-rules",
|
||||||
|
response_model=PlatformRuleDBListResponse,
|
||||||
|
)
|
||||||
|
async def list_brand_platform_rules(
|
||||||
|
brand_id: Optional[str] = Query(None),
|
||||||
|
platform: Optional[str] = Query(None),
|
||||||
|
rule_status: Optional[str] = Query(None, alias="status"),
|
||||||
|
x_tenant_id: str = Header(..., alias="X-Tenant-ID"),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
) -> PlatformRuleDBListResponse:
|
||||||
|
"""查询品牌方的平台规则列表"""
|
||||||
|
query = select(PlatformRule).where(PlatformRule.tenant_id == x_tenant_id)
|
||||||
|
|
||||||
|
if brand_id:
|
||||||
|
query = query.where(PlatformRule.brand_id == brand_id)
|
||||||
|
if platform:
|
||||||
|
query = query.where(PlatformRule.platform == platform)
|
||||||
|
if rule_status:
|
||||||
|
query = query.where(PlatformRule.status == rule_status)
|
||||||
|
|
||||||
|
result = await db.execute(query.order_by(PlatformRule.created_at.desc()))
|
||||||
|
rules = result.scalars().all()
|
||||||
|
|
||||||
|
return PlatformRuleDBListResponse(
|
||||||
|
items=[_format_platform_rule(r) for r in rules],
|
||||||
|
total=len(rules),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete(
|
||||||
|
"/platform-rules/{rule_id}",
|
||||||
|
status_code=status.HTTP_204_NO_CONTENT,
|
||||||
|
)
|
||||||
|
async def delete_platform_rule(
|
||||||
|
rule_id: str,
|
||||||
|
x_tenant_id: str = Header(..., alias="X-Tenant-ID"),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""删除平台规则"""
|
||||||
|
result = await db.execute(
|
||||||
|
select(PlatformRule).where(
|
||||||
|
and_(
|
||||||
|
PlatformRule.id == rule_id,
|
||||||
|
PlatformRule.tenant_id == x_tenant_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
rule = result.scalar_one_or_none()
|
||||||
|
if not rule:
|
||||||
|
raise HTTPException(status_code=404, detail=f"规则不存在: {rule_id}")
|
||||||
|
|
||||||
|
await db.delete(rule)
|
||||||
|
await db.flush()
|
||||||
|
|
||||||
|
|
||||||
|
async def _ai_parse_platform_rules(
|
||||||
|
tenant_id: str,
|
||||||
|
platform: str,
|
||||||
|
document_text: str,
|
||||||
|
db: AsyncSession,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
使用 AI 将文档文本解析为结构化平台规则
|
||||||
|
|
||||||
|
AI 失败时返回空规则结构(降级为手动编辑)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
ai_client = await AIServiceFactory.get_client(tenant_id, db)
|
||||||
|
if not ai_client:
|
||||||
|
logger.warning(f"租户 {tenant_id} 未配置 AI 服务,返回空规则")
|
||||||
|
return _empty_parsed_rules()
|
||||||
|
|
||||||
|
config = await AIServiceFactory.get_config(tenant_id, db)
|
||||||
|
if not config:
|
||||||
|
return _empty_parsed_rules()
|
||||||
|
|
||||||
|
text_model = config.models.get("text", "gpt-4o")
|
||||||
|
|
||||||
|
# 截断过长文本(避免超出 token 限制)
|
||||||
|
max_chars = 15000
|
||||||
|
if len(document_text) > max_chars:
|
||||||
|
document_text = document_text[:max_chars] + "\n...(文档内容已截断)"
|
||||||
|
|
||||||
|
prompt = f"""你是平台广告合规规则分析专家。请从以下 {platform} 平台规则文档中提取结构化规则。
|
||||||
|
|
||||||
|
文档内容:
|
||||||
|
{document_text}
|
||||||
|
|
||||||
|
请以 JSON 格式返回,不要包含其他内容:
|
||||||
|
{{
|
||||||
|
"forbidden_words": ["违禁词1", "违禁词2"],
|
||||||
|
"restricted_words": [{{"word": "xx", "condition": "使用条件", "suggestion": "替换建议"}}],
|
||||||
|
"duration": {{"min_seconds": 7, "max_seconds": null}},
|
||||||
|
"content_requirements": ["必须展示产品正面", "需要口播品牌名"],
|
||||||
|
"other_rules": [{{"rule": "规则名称", "description": "详细说明"}}]
|
||||||
|
}}
|
||||||
|
|
||||||
|
注意:
|
||||||
|
- forbidden_words: 明确禁止使用的词语
|
||||||
|
- restricted_words: 有条件限制的词语
|
||||||
|
- duration: 视频时长要求,如果文档未提及则为 null
|
||||||
|
- content_requirements: 内容上的硬性要求
|
||||||
|
- other_rules: 不属于以上分类的其他规则
|
||||||
|
- 如果某项没有提取到内容,使用空数组或 null"""
|
||||||
|
|
||||||
|
response = await ai_client.chat_completion(
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
model=text_model,
|
||||||
|
temperature=0.2,
|
||||||
|
max_tokens=2000,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 解析 AI 响应
|
||||||
|
content = response.content.strip()
|
||||||
|
if content.startswith("```"):
|
||||||
|
content = content.split("\n", 1)[1]
|
||||||
|
if content.endswith("```"):
|
||||||
|
content = content.rsplit("\n", 1)[0]
|
||||||
|
|
||||||
|
parsed = json.loads(content)
|
||||||
|
|
||||||
|
# 校验并补全字段
|
||||||
|
return {
|
||||||
|
"forbidden_words": parsed.get("forbidden_words", []),
|
||||||
|
"restricted_words": parsed.get("restricted_words", []),
|
||||||
|
"duration": parsed.get("duration"),
|
||||||
|
"content_requirements": parsed.get("content_requirements", []),
|
||||||
|
"other_rules": parsed.get("other_rules", []),
|
||||||
|
}
|
||||||
|
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.warning("AI 返回内容非 JSON,降级为空规则")
|
||||||
|
return _empty_parsed_rules()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"AI 解析平台规则失败: {e}")
|
||||||
|
return _empty_parsed_rules()
|
||||||
|
|
||||||
|
|
||||||
|
def _empty_parsed_rules() -> dict:
|
||||||
|
"""返回空的解析规则结构"""
|
||||||
|
return {
|
||||||
|
"forbidden_words": [],
|
||||||
|
"restricted_words": [],
|
||||||
|
"duration": None,
|
||||||
|
"content_requirements": [],
|
||||||
|
"other_rules": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# ==================== 辅助函数(供其他模块调用) ====================
|
# ==================== 辅助函数(供其他模块调用) ====================
|
||||||
|
|
||||||
async def get_whitelist_for_brand(
|
async def get_whitelist_for_brand(
|
||||||
@ -533,3 +830,31 @@ async def get_forbidden_words_for_tenant(
|
|||||||
}
|
}
|
||||||
for w in words
|
for w in words
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def get_active_platform_rules(
|
||||||
|
tenant_id: str,
|
||||||
|
brand_id: str,
|
||||||
|
platform: str,
|
||||||
|
db: AsyncSession,
|
||||||
|
) -> Optional[dict]:
|
||||||
|
"""
|
||||||
|
获取品牌方在该平台的生效规则 (active)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
parsed_rules dict 或 None(没有上传规则时)
|
||||||
|
"""
|
||||||
|
result = await db.execute(
|
||||||
|
select(PlatformRule).where(
|
||||||
|
and_(
|
||||||
|
PlatformRule.tenant_id == tenant_id,
|
||||||
|
PlatformRule.brand_id == brand_id,
|
||||||
|
PlatformRule.platform == platform,
|
||||||
|
PlatformRule.status == RuleStatus.ACTIVE.value,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
rule = result.scalar_one_or_none()
|
||||||
|
if not rule:
|
||||||
|
return None
|
||||||
|
return rule.parsed_rules
|
||||||
|
|||||||
@ -15,11 +15,14 @@ from app.schemas.review import (
|
|||||||
RiskLevel,
|
RiskLevel,
|
||||||
Position,
|
Position,
|
||||||
SoftRiskWarning,
|
SoftRiskWarning,
|
||||||
|
SoftRiskAction,
|
||||||
)
|
)
|
||||||
from app.api.rules import (
|
from app.api.rules import (
|
||||||
get_whitelist_for_brand,
|
get_whitelist_for_brand,
|
||||||
get_other_brands_whitelist_terms,
|
get_other_brands_whitelist_terms,
|
||||||
get_forbidden_words_for_tenant,
|
get_forbidden_words_for_tenant,
|
||||||
|
get_active_platform_rules,
|
||||||
|
_platform_rules,
|
||||||
)
|
)
|
||||||
from app.services.soft_risk import evaluate_soft_risk
|
from app.services.soft_risk import evaluate_soft_risk
|
||||||
from app.services.ai_service import AIServiceFactory
|
from app.services.ai_service import AIServiceFactory
|
||||||
@ -175,19 +178,88 @@ async def review_script(
|
|||||||
position=Position(start=content.find(term), end=content.find(term) + len(term)),
|
position=Position(start=content.find(term), end=content.find(term) + len(term)),
|
||||||
))
|
))
|
||||||
|
|
||||||
|
# 3A. 平台规则违禁词(优先从 DB 读取,硬编码兜底)
|
||||||
|
already_checked = set(ABSOLUTE_WORDS + [w["word"] for w in tenant_forbidden_words])
|
||||||
|
platform_forbidden_words: list[str] = []
|
||||||
|
|
||||||
|
# 优先从 DB 获取品牌方上传的 active 平台规则
|
||||||
|
db_platform_rules = await get_active_platform_rules(
|
||||||
|
x_tenant_id, request.brand_id, request.platform.value, db,
|
||||||
|
)
|
||||||
|
if db_platform_rules:
|
||||||
|
platform_forbidden_words = db_platform_rules.get("forbidden_words", [])
|
||||||
|
else:
|
||||||
|
# 兜底:从硬编码 _platform_rules 读取
|
||||||
|
platform_rule = _platform_rules.get(request.platform.value)
|
||||||
|
if platform_rule:
|
||||||
|
for rule in platform_rule.get("rules", []):
|
||||||
|
if rule.get("type") == "forbidden_word":
|
||||||
|
platform_forbidden_words.extend(rule.get("words", []))
|
||||||
|
|
||||||
|
for word in platform_forbidden_words:
|
||||||
|
if word in already_checked or word in whitelist:
|
||||||
|
continue
|
||||||
|
start = 0
|
||||||
|
while True:
|
||||||
|
pos = content.find(word, start)
|
||||||
|
if pos == -1:
|
||||||
|
break
|
||||||
|
if not _is_ad_context(content, word):
|
||||||
|
start = pos + 1
|
||||||
|
continue
|
||||||
|
violations.append(Violation(
|
||||||
|
type=ViolationType.FORBIDDEN_WORD,
|
||||||
|
content=word,
|
||||||
|
severity=RiskLevel.MEDIUM,
|
||||||
|
suggestion=f"违反{request.platform.value}平台规则,建议删除:{word}",
|
||||||
|
position=Position(start=pos, end=pos + len(word)),
|
||||||
|
))
|
||||||
|
start = pos + 1
|
||||||
|
|
||||||
|
# 3B. Brief 黑名单词
|
||||||
|
if request.blacklist_words:
|
||||||
|
for item in request.blacklist_words:
|
||||||
|
word = item.get("word", "")
|
||||||
|
reason = item.get("reason", "")
|
||||||
|
if not word or word in whitelist:
|
||||||
|
continue
|
||||||
|
start_pos = 0
|
||||||
|
while True:
|
||||||
|
pos = content.find(word, start_pos)
|
||||||
|
if pos == -1:
|
||||||
|
break
|
||||||
|
suggestion = f"Brief 黑名单词:{word}"
|
||||||
|
if reason:
|
||||||
|
suggestion += f"({reason})"
|
||||||
|
violations.append(Violation(
|
||||||
|
type=ViolationType.FORBIDDEN_WORD,
|
||||||
|
content=word,
|
||||||
|
severity=RiskLevel.HIGH,
|
||||||
|
suggestion=suggestion,
|
||||||
|
position=Position(start=pos, end=pos + len(word)),
|
||||||
|
))
|
||||||
|
start_pos = pos + 1
|
||||||
|
|
||||||
# 4. 检查遗漏卖点
|
# 4. 检查遗漏卖点
|
||||||
missing_points: list[str] | None = None
|
missing_points: list[str] | None = None
|
||||||
if request.required_points:
|
if request.required_points:
|
||||||
missing = _check_selling_point_coverage(content, request.required_points)
|
missing = _check_selling_point_coverage(content, request.required_points)
|
||||||
missing_points = missing if missing else []
|
missing_points = missing if missing else []
|
||||||
|
|
||||||
# 5. 可选:AI 深度分析
|
# 5. 可选:AI 深度分析(返回 violations + warnings)
|
||||||
ai_violations = await _ai_deep_analysis(x_tenant_id, content, db)
|
ai_violations, ai_warnings = await _ai_deep_analysis(x_tenant_id, content, db)
|
||||||
if ai_violations:
|
if ai_violations:
|
||||||
violations.extend(ai_violations)
|
violations.extend(ai_violations)
|
||||||
|
|
||||||
# 6. 计算分数
|
# 6. 计算分数(按严重程度加权)
|
||||||
score = 100 - len(violations) * 25
|
score = 100
|
||||||
|
for v in violations:
|
||||||
|
if v.severity == RiskLevel.HIGH:
|
||||||
|
score -= 25
|
||||||
|
elif v.severity == RiskLevel.MEDIUM:
|
||||||
|
score -= 15
|
||||||
|
else:
|
||||||
|
score -= 5
|
||||||
if missing_points:
|
if missing_points:
|
||||||
score -= len(missing_points) * 5
|
score -= len(missing_points) * 5
|
||||||
score = max(0, score)
|
score = max(0, score)
|
||||||
@ -209,6 +281,19 @@ async def review_script(
|
|||||||
if request.soft_risk_context:
|
if request.soft_risk_context:
|
||||||
soft_warnings = evaluate_soft_risk(request.soft_risk_context)
|
soft_warnings = evaluate_soft_risk(request.soft_risk_context)
|
||||||
|
|
||||||
|
# 合并 AI 产出的 soft_warnings
|
||||||
|
if ai_warnings:
|
||||||
|
soft_warnings.extend(ai_warnings)
|
||||||
|
|
||||||
|
# 遗漏卖点也加入 soft_warnings
|
||||||
|
if missing_points:
|
||||||
|
soft_warnings.append(SoftRiskWarning(
|
||||||
|
code="missing_selling_points",
|
||||||
|
message=f"遗漏 {len(missing_points)} 个卖点:{', '.join(missing_points)}",
|
||||||
|
action_required=SoftRiskAction.NOTE,
|
||||||
|
blocking=False,
|
||||||
|
))
|
||||||
|
|
||||||
return ScriptReviewResponse(
|
return ScriptReviewResponse(
|
||||||
score=score,
|
score=score,
|
||||||
summary=summary,
|
summary=summary,
|
||||||
@ -222,26 +307,27 @@ async def _ai_deep_analysis(
|
|||||||
tenant_id: str,
|
tenant_id: str,
|
||||||
content: str,
|
content: str,
|
||||||
db: AsyncSession,
|
db: AsyncSession,
|
||||||
) -> list[Violation]:
|
) -> tuple[list[Violation], list[SoftRiskWarning]]:
|
||||||
"""
|
"""
|
||||||
使用 AI 进行深度分析
|
使用 AI 进行深度分析
|
||||||
|
|
||||||
|
返回 (violations, soft_warnings)
|
||||||
AI 分析失败时返回空列表,降级到规则检测
|
AI 分析失败时返回空列表,降级到规则检测
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# 获取 AI 客户端
|
# 获取 AI 客户端
|
||||||
ai_client = await AIServiceFactory.get_client(tenant_id, db)
|
ai_client = await AIServiceFactory.get_client(tenant_id, db)
|
||||||
if not ai_client:
|
if not ai_client:
|
||||||
return []
|
return [], []
|
||||||
|
|
||||||
# 获取模型配置
|
# 获取模型配置
|
||||||
config = await AIServiceFactory.get_config(tenant_id, db)
|
config = await AIServiceFactory.get_config(tenant_id, db)
|
||||||
if not config:
|
if not config:
|
||||||
return []
|
return [], []
|
||||||
|
|
||||||
text_model = config.models.get("text", "gpt-4o")
|
text_model = config.models.get("text", "gpt-4o")
|
||||||
|
|
||||||
# 构建分析提示
|
# 构建分析提示(两类输出)
|
||||||
analysis_prompt = f"""作为广告合规审核专家,请分析以下广告脚本内容,检测潜在的合规风险:
|
analysis_prompt = f"""作为广告合规审核专家,请分析以下广告脚本内容,检测潜在的合规风险:
|
||||||
|
|
||||||
脚本内容:
|
脚本内容:
|
||||||
@ -253,12 +339,17 @@ async def _ai_deep_analysis(
|
|||||||
3. 是否存在夸大描述
|
3. 是否存在夸大描述
|
||||||
4. 是否存在可能违反广告法的其他内容
|
4. 是否存在可能违反广告法的其他内容
|
||||||
|
|
||||||
如果发现问题,请以 JSON 数组格式返回,每项包含:
|
请以 JSON 数组返回,每项包含:
|
||||||
|
- category: "violation"(硬性违规,明确违法/违规)或 "warning"(软性提醒,需人工判断)
|
||||||
- type: 违规类型 (forbidden_word/efficacy_claim/brand_safety)
|
- type: 违规类型 (forbidden_word/efficacy_claim/brand_safety)
|
||||||
- content: 违规内容
|
- content: 问题内容
|
||||||
- severity: 严重程度 (high/medium/low)
|
- severity: 严重程度 (high/medium/low)
|
||||||
- suggestion: 修改建议
|
- suggestion: 修改建议
|
||||||
|
|
||||||
|
分类标准:
|
||||||
|
- violation: 违禁词、功效宣称、品牌安全等明确违规
|
||||||
|
- warning: 夸大描述、易误解表述、潜在风险
|
||||||
|
|
||||||
如果未发现问题,返回空数组 []
|
如果未发现问题,返回空数组 []
|
||||||
|
|
||||||
请只返回 JSON 数组,不要包含其他内容。"""
|
请只返回 JSON 数组,不要包含其他内容。"""
|
||||||
@ -283,7 +374,10 @@ async def _ai_deep_analysis(
|
|||||||
ai_results = json.loads(response_content)
|
ai_results = json.loads(response_content)
|
||||||
|
|
||||||
violations = []
|
violations = []
|
||||||
|
warnings = []
|
||||||
for item in ai_results:
|
for item in ai_results:
|
||||||
|
category = item.get("category", "violation") # 默认当硬性违规(安全兜底)
|
||||||
|
|
||||||
violation_type = item.get("type", "forbidden_word")
|
violation_type = item.get("type", "forbidden_word")
|
||||||
if violation_type == "forbidden_word":
|
if violation_type == "forbidden_word":
|
||||||
vtype = ViolationType.FORBIDDEN_WORD
|
vtype = ViolationType.FORBIDDEN_WORD
|
||||||
@ -300,19 +394,28 @@ async def _ai_deep_analysis(
|
|||||||
else:
|
else:
|
||||||
slevel = RiskLevel.MEDIUM
|
slevel = RiskLevel.MEDIUM
|
||||||
|
|
||||||
violations.append(Violation(
|
if category == "warning":
|
||||||
type=vtype,
|
# 软性提醒 → SoftRiskWarning
|
||||||
content=item.get("content", ""),
|
warnings.append(SoftRiskWarning(
|
||||||
severity=slevel,
|
code="ai_warning",
|
||||||
suggestion=item.get("suggestion", "建议修改"),
|
message=f"{item.get('content', '')}: {item.get('suggestion', '建议修改')}",
|
||||||
))
|
action_required=SoftRiskAction.NOTE,
|
||||||
|
blocking=False,
|
||||||
|
context={"type": violation_type, "severity": severity},
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
# 硬性违规 → Violation
|
||||||
|
violations.append(Violation(
|
||||||
|
type=vtype,
|
||||||
|
content=item.get("content", ""),
|
||||||
|
severity=slevel,
|
||||||
|
suggestion=item.get("suggestion", "建议修改"),
|
||||||
|
))
|
||||||
|
|
||||||
return violations
|
return violations, warnings
|
||||||
|
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
# JSON 解析失败,返回空列表
|
return [], []
|
||||||
return []
|
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
# AI 调用失败,降级到规则检测
|
return [], []
|
||||||
return []
|
|
||||||
|
|||||||
@ -10,7 +10,7 @@ from app.models.task import Task, TaskStage, TaskStatus
|
|||||||
from app.models.brief import Brief
|
from app.models.brief import Brief
|
||||||
from app.models.ai_config import AIConfig
|
from app.models.ai_config import AIConfig
|
||||||
from app.models.review import ReviewTask, Platform
|
from app.models.review import ReviewTask, Platform
|
||||||
from app.models.rule import ForbiddenWord, WhitelistItem, Competitor
|
from app.models.rule import ForbiddenWord, WhitelistItem, Competitor, PlatformRule, RuleStatus
|
||||||
from app.models.audit_log import AuditLog
|
from app.models.audit_log import AuditLog
|
||||||
from app.models.message import Message
|
from app.models.message import Message
|
||||||
# 保留 Tenant 兼容旧代码,但新代码应使用 Brand
|
# 保留 Tenant 兼容旧代码,但新代码应使用 Brand
|
||||||
@ -44,6 +44,8 @@ __all__ = [
|
|||||||
"ForbiddenWord",
|
"ForbiddenWord",
|
||||||
"WhitelistItem",
|
"WhitelistItem",
|
||||||
"Competitor",
|
"Competitor",
|
||||||
|
"PlatformRule",
|
||||||
|
"RuleStatus",
|
||||||
# 审计日志
|
# 审计日志
|
||||||
"AuditLog",
|
"AuditLog",
|
||||||
# 消息
|
# 消息
|
||||||
|
|||||||
@ -1,7 +1,8 @@
|
|||||||
"""
|
"""
|
||||||
规则模型
|
规则模型
|
||||||
违禁词、白名单、竞品
|
违禁词、白名单、竞品、平台规则
|
||||||
"""
|
"""
|
||||||
|
import enum
|
||||||
from typing import TYPE_CHECKING, Optional
|
from typing import TYPE_CHECKING, Optional
|
||||||
from sqlalchemy import String, Text, ForeignKey
|
from sqlalchemy import String, Text, ForeignKey
|
||||||
from app.models.types import JSONType
|
from app.models.types import JSONType
|
||||||
@ -13,6 +14,13 @@ if TYPE_CHECKING:
|
|||||||
from app.models.tenant import Tenant
|
from app.models.tenant import Tenant
|
||||||
|
|
||||||
|
|
||||||
|
class RuleStatus(str, enum.Enum):
|
||||||
|
"""平台规则状态"""
|
||||||
|
DRAFT = "draft" # AI 解析完成,待确认
|
||||||
|
ACTIVE = "active" # 品牌方已确认,生效中
|
||||||
|
INACTIVE = "inactive" # 已停用
|
||||||
|
|
||||||
|
|
||||||
class ForbiddenWord(Base, TimestampMixin):
|
class ForbiddenWord(Base, TimestampMixin):
|
||||||
"""违禁词表"""
|
"""违禁词表"""
|
||||||
__tablename__ = "forbidden_words"
|
__tablename__ = "forbidden_words"
|
||||||
@ -83,3 +91,36 @@ class Competitor(Base, TimestampMixin):
|
|||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"<Competitor(name={self.name}, brand_id={self.brand_id})>"
|
return f"<Competitor(name={self.name}, brand_id={self.brand_id})>"
|
||||||
|
|
||||||
|
|
||||||
|
class PlatformRule(Base, TimestampMixin):
|
||||||
|
"""平台规则表 — 品牌方上传文档 + AI 解析"""
|
||||||
|
__tablename__ = "platform_rules"
|
||||||
|
|
||||||
|
id: Mapped[str] = mapped_column(String(64), primary_key=True)
|
||||||
|
tenant_id: Mapped[str] = mapped_column(
|
||||||
|
String(64),
|
||||||
|
ForeignKey("tenants.id", ondelete="CASCADE"),
|
||||||
|
nullable=False,
|
||||||
|
index=True,
|
||||||
|
)
|
||||||
|
brand_id: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
|
||||||
|
platform: Mapped[str] = mapped_column(String(50), nullable=False, index=True)
|
||||||
|
|
||||||
|
# 文档信息
|
||||||
|
document_url: Mapped[str] = mapped_column(String(2048), nullable=False)
|
||||||
|
document_name: Mapped[str] = mapped_column(String(512), nullable=False)
|
||||||
|
|
||||||
|
# AI 解析结果(JSON)
|
||||||
|
parsed_rules: Mapped[Optional[dict]] = mapped_column(JSONType, nullable=True)
|
||||||
|
|
||||||
|
# 状态
|
||||||
|
status: Mapped[str] = mapped_column(
|
||||||
|
String(20), nullable=False, default=RuleStatus.DRAFT.value, index=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 关联
|
||||||
|
tenant: Mapped["Tenant"] = relationship("Tenant", back_populates="platform_rules")
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f"<PlatformRule(id={self.id}, platform={self.platform}, status={self.status})>"
|
||||||
|
|||||||
@ -10,7 +10,7 @@ from app.models.base import Base, TimestampMixin
|
|||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from app.models.ai_config import AIConfig
|
from app.models.ai_config import AIConfig
|
||||||
from app.models.review import ReviewTask
|
from app.models.review import ReviewTask
|
||||||
from app.models.rule import ForbiddenWord, WhitelistItem, Competitor
|
from app.models.rule import ForbiddenWord, WhitelistItem, Competitor, PlatformRule
|
||||||
|
|
||||||
|
|
||||||
class Tenant(Base, TimestampMixin):
|
class Tenant(Base, TimestampMixin):
|
||||||
@ -48,5 +48,11 @@ class Tenant(Base, TimestampMixin):
|
|||||||
back_populates="tenant",
|
back_populates="tenant",
|
||||||
lazy="selectin",
|
lazy="selectin",
|
||||||
)
|
)
|
||||||
|
platform_rules: Mapped[list["PlatformRule"]] = relationship(
|
||||||
|
"PlatformRule",
|
||||||
|
back_populates="tenant",
|
||||||
|
lazy="selectin",
|
||||||
|
)
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"<Tenant(id={self.id}, name={self.name})>"
|
return f"<Tenant(id={self.id}, name={self.name})>"
|
||||||
|
|||||||
69
backend/app/schemas/rules.py
Normal file
69
backend/app/schemas/rules.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
"""
|
||||||
|
平台规则相关 Schema
|
||||||
|
"""
|
||||||
|
from typing import Optional
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class PlatformRuleParseRequest(BaseModel):
|
||||||
|
"""上传文档并解析"""
|
||||||
|
document_url: str = Field(..., description="TOS 上传后的文件 URL")
|
||||||
|
document_name: str = Field(..., description="原始文件名(用于判断格式)")
|
||||||
|
platform: str = Field(..., description="目标平台 (douyin/xiaohongshu/bilibili/kuaishou)")
|
||||||
|
brand_id: str = Field(..., description="品牌 ID")
|
||||||
|
|
||||||
|
|
||||||
|
class ParsedRulesData(BaseModel):
|
||||||
|
"""AI 解析出的结构化规则"""
|
||||||
|
forbidden_words: list[str] = Field(default_factory=list, description="违禁词列表")
|
||||||
|
restricted_words: list[dict] = Field(
|
||||||
|
default_factory=list,
|
||||||
|
description="限制词 [{word, condition, suggestion}]",
|
||||||
|
)
|
||||||
|
duration: Optional[dict] = Field(
|
||||||
|
None,
|
||||||
|
description="时长要求 {min_seconds, max_seconds}",
|
||||||
|
)
|
||||||
|
content_requirements: list[str] = Field(
|
||||||
|
default_factory=list,
|
||||||
|
description="内容要求(如'必须展示产品')",
|
||||||
|
)
|
||||||
|
other_rules: list[dict] = Field(
|
||||||
|
default_factory=list,
|
||||||
|
description="其他规则 [{rule, description}]",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PlatformRuleParseResponse(BaseModel):
|
||||||
|
"""解析响应(draft 状态)"""
|
||||||
|
id: str
|
||||||
|
platform: str
|
||||||
|
brand_id: str
|
||||||
|
document_url: str
|
||||||
|
document_name: str
|
||||||
|
parsed_rules: ParsedRulesData
|
||||||
|
status: str
|
||||||
|
|
||||||
|
|
||||||
|
class PlatformRuleConfirmRequest(BaseModel):
|
||||||
|
"""确认/编辑解析结果"""
|
||||||
|
parsed_rules: ParsedRulesData = Field(..., description="品牌方可能修改过的规则")
|
||||||
|
|
||||||
|
|
||||||
|
class PlatformRuleResponse(BaseModel):
|
||||||
|
"""完整响应"""
|
||||||
|
id: str
|
||||||
|
platform: str
|
||||||
|
brand_id: str
|
||||||
|
document_url: str
|
||||||
|
document_name: str
|
||||||
|
parsed_rules: ParsedRulesData
|
||||||
|
status: str
|
||||||
|
created_at: str
|
||||||
|
updated_at: str
|
||||||
|
|
||||||
|
|
||||||
|
class PlatformRuleListResponse(BaseModel):
|
||||||
|
"""列表响应"""
|
||||||
|
items: list[PlatformRuleResponse]
|
||||||
|
total: int
|
||||||
119
backend/app/services/document_parser.py
Normal file
119
backend/app/services/document_parser.py
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
"""
|
||||||
|
文档解析服务
|
||||||
|
从 PDF/Word/Excel 文档中提取纯文本
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentParser:
|
||||||
|
"""从文档中提取纯文本"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def download_and_parse(document_url: str, document_name: str) -> str:
|
||||||
|
"""
|
||||||
|
下载文档并解析为纯文本
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document_url: 文档 URL (TOS)
|
||||||
|
document_name: 原始文件名(用于判断格式)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
提取的纯文本
|
||||||
|
"""
|
||||||
|
# 下载到临时文件
|
||||||
|
tmp_path: Optional[str] = None
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||||
|
resp = await client.get(document_url)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
ext = document_name.rsplit(".", 1)[-1].lower() if "." in document_name else ""
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp:
|
||||||
|
tmp.write(resp.content)
|
||||||
|
tmp_path = tmp.name
|
||||||
|
|
||||||
|
return DocumentParser.parse_file(tmp_path, document_name)
|
||||||
|
finally:
|
||||||
|
if tmp_path and os.path.exists(tmp_path):
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parse_file(file_path: str, file_name: str) -> str:
|
||||||
|
"""
|
||||||
|
根据扩展名选择解析器,返回纯文本
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: 本地文件路径
|
||||||
|
file_name: 原始文件名
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
提取的纯文本
|
||||||
|
"""
|
||||||
|
ext = file_name.rsplit(".", 1)[-1].lower() if "." in file_name else ""
|
||||||
|
|
||||||
|
if ext == "pdf":
|
||||||
|
return DocumentParser._parse_pdf(file_path)
|
||||||
|
elif ext in ("doc", "docx"):
|
||||||
|
return DocumentParser._parse_docx(file_path)
|
||||||
|
elif ext in ("xls", "xlsx"):
|
||||||
|
return DocumentParser._parse_xlsx(file_path)
|
||||||
|
elif ext == "txt":
|
||||||
|
return DocumentParser._parse_txt(file_path)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"不支持的文件格式: {ext}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_pdf(path: str) -> str:
|
||||||
|
"""pdfplumber 提取 PDF 文本"""
|
||||||
|
import pdfplumber
|
||||||
|
|
||||||
|
texts = []
|
||||||
|
with pdfplumber.open(path) as pdf:
|
||||||
|
for page in pdf.pages:
|
||||||
|
text = page.extract_text()
|
||||||
|
if text:
|
||||||
|
texts.append(text)
|
||||||
|
return "\n".join(texts)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_docx(path: str) -> str:
|
||||||
|
"""python-docx 提取 Word 文本"""
|
||||||
|
from docx import Document
|
||||||
|
|
||||||
|
doc = Document(path)
|
||||||
|
texts = []
|
||||||
|
for para in doc.paragraphs:
|
||||||
|
if para.text.strip():
|
||||||
|
texts.append(para.text)
|
||||||
|
# 也提取表格内容
|
||||||
|
for table in doc.tables:
|
||||||
|
for row in table.rows:
|
||||||
|
row_text = "\t".join(cell.text.strip() for cell in row.cells if cell.text.strip())
|
||||||
|
if row_text:
|
||||||
|
texts.append(row_text)
|
||||||
|
return "\n".join(texts)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_xlsx(path: str) -> str:
|
||||||
|
"""openpyxl 提取 Excel 文本(所有 sheet 拼接)"""
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
|
||||||
|
wb = load_workbook(path, read_only=True, data_only=True)
|
||||||
|
texts = []
|
||||||
|
for sheet in wb.worksheets:
|
||||||
|
for row in sheet.iter_rows(values_only=True):
|
||||||
|
row_text = "\t".join(str(cell) for cell in row if cell is not None)
|
||||||
|
if row_text.strip():
|
||||||
|
texts.append(row_text)
|
||||||
|
wb.close()
|
||||||
|
return "\n".join(texts)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_txt(path: str) -> str:
|
||||||
|
"""纯文本文件"""
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
return f.read()
|
||||||
@ -14,7 +14,7 @@ from sqlalchemy.orm import sessionmaker
|
|||||||
|
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
from app.models.review import ReviewTask, TaskStatus as DBTaskStatus
|
from app.models.review import ReviewTask, TaskStatus as DBTaskStatus
|
||||||
from app.models.rule import ForbiddenWord, Competitor
|
from app.models.rule import ForbiddenWord, Competitor, PlatformRule, RuleStatus
|
||||||
from app.models.ai_config import AIConfig
|
from app.models.ai_config import AIConfig
|
||||||
from app.services.video_download import VideoDownloadService, DownloadResult
|
from app.services.video_download import VideoDownloadService, DownloadResult
|
||||||
from app.services.keyframe import KeyFrameExtractor, ExtractionResult
|
from app.services.keyframe import KeyFrameExtractor, ExtractionResult
|
||||||
@ -81,6 +81,7 @@ async def complete_review(
|
|||||||
summary: str,
|
summary: str,
|
||||||
violations: list[dict],
|
violations: list[dict],
|
||||||
status: DBTaskStatus = DBTaskStatus.COMPLETED,
|
status: DBTaskStatus = DBTaskStatus.COMPLETED,
|
||||||
|
soft_warnings: Optional[list[dict]] = None,
|
||||||
):
|
):
|
||||||
"""完成审核"""
|
"""完成审核"""
|
||||||
result = await db.execute(
|
result = await db.execute(
|
||||||
@ -94,6 +95,8 @@ async def complete_review(
|
|||||||
task.score = score
|
task.score = score
|
||||||
task.summary = summary
|
task.summary = summary
|
||||||
task.violations = violations
|
task.violations = violations
|
||||||
|
if soft_warnings is not None:
|
||||||
|
task.soft_warnings = soft_warnings
|
||||||
task.completed_at = datetime.now(timezone.utc)
|
task.completed_at = datetime.now(timezone.utc)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
|
||||||
@ -153,6 +156,24 @@ async def get_competitors(db: AsyncSession, tenant_id: str, brand_id: str) -> li
|
|||||||
return [row[0] for row in result.fetchall()]
|
return [row[0] for row in result.fetchall()]
|
||||||
|
|
||||||
|
|
||||||
|
async def get_platform_forbidden_words(
|
||||||
|
db: AsyncSession, tenant_id: str, brand_id: str, platform: str,
|
||||||
|
) -> list[str]:
|
||||||
|
"""从 DB 获取品牌方在该平台的 active 规则中的违禁词"""
|
||||||
|
result = await db.execute(
|
||||||
|
select(PlatformRule).where(
|
||||||
|
PlatformRule.tenant_id == tenant_id,
|
||||||
|
PlatformRule.brand_id == brand_id,
|
||||||
|
PlatformRule.platform == platform,
|
||||||
|
PlatformRule.status == RuleStatus.ACTIVE.value,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
rule = result.scalar_one_or_none()
|
||||||
|
if not rule or not rule.parsed_rules:
|
||||||
|
return []
|
||||||
|
return rule.parsed_rules.get("forbidden_words", [])
|
||||||
|
|
||||||
|
|
||||||
async def process_video_review(
|
async def process_video_review(
|
||||||
review_id: str,
|
review_id: str,
|
||||||
tenant_id: str,
|
tenant_id: str,
|
||||||
@ -199,6 +220,13 @@ async def process_video_review(
|
|||||||
|
|
||||||
# 获取规则
|
# 获取规则
|
||||||
forbidden_words = await get_forbidden_words(db, tenant_id)
|
forbidden_words = await get_forbidden_words(db, tenant_id)
|
||||||
|
# 合并平台规则中的违禁词
|
||||||
|
platform_fw = await get_platform_forbidden_words(db, tenant_id, brand_id, platform)
|
||||||
|
existing_set = set(forbidden_words)
|
||||||
|
for w in platform_fw:
|
||||||
|
if w not in existing_set:
|
||||||
|
forbidden_words.append(w)
|
||||||
|
existing_set.add(w)
|
||||||
competitors = await get_competitors(db, tenant_id, brand_id)
|
competitors = await get_competitors(db, tenant_id, brand_id)
|
||||||
|
|
||||||
# 初始化 AI 服务
|
# 初始化 AI 服务
|
||||||
@ -281,16 +309,37 @@ async def process_video_review(
|
|||||||
)
|
)
|
||||||
all_violations.extend(subtitle_violations)
|
all_violations.extend(subtitle_violations)
|
||||||
|
|
||||||
# 6. 计算分数和生成报告
|
# 6. 分流 violations / soft_warnings
|
||||||
await update_review_progress(db, review_id, 90, "生成报告")
|
await update_review_progress(db, review_id, 90, "生成报告")
|
||||||
score = review_service.calculate_score(all_violations)
|
|
||||||
|
|
||||||
if not all_violations:
|
hard_violations = []
|
||||||
|
soft_warnings_data = []
|
||||||
|
|
||||||
|
for v in all_violations:
|
||||||
|
v_type = v.get("type", "")
|
||||||
|
if v_type in ("forbidden_word", "efficacy_claim", "competitor_logo", "brand_safety"):
|
||||||
|
hard_violations.append(v)
|
||||||
|
elif v_type in ("duration_short", "mention_missing"):
|
||||||
|
soft_warnings_data.append({
|
||||||
|
"code": f"video_{v_type}",
|
||||||
|
"message": v.get("content", ""),
|
||||||
|
"action_required": "note",
|
||||||
|
"blocking": False,
|
||||||
|
"context": {"suggestion": v.get("suggestion", "")},
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
hard_violations.append(v) # 默认当硬性违规
|
||||||
|
|
||||||
|
# 计算分数(仅硬性违规影响分数)
|
||||||
|
score = review_service.calculate_score(hard_violations)
|
||||||
|
|
||||||
|
if not hard_violations:
|
||||||
summary = "视频内容合规,未发现违规项"
|
summary = "视频内容合规,未发现违规项"
|
||||||
|
if soft_warnings_data:
|
||||||
|
summary += f"({len(soft_warnings_data)} 条提醒)"
|
||||||
else:
|
else:
|
||||||
high_count = sum(1 for v in all_violations if v.get("risk_level") == "high")
|
high_count = sum(1 for v in hard_violations if v.get("risk_level") == "high")
|
||||||
medium_count = sum(1 for v in all_violations if v.get("risk_level") == "medium")
|
summary = f"发现 {len(hard_violations)} 处违规"
|
||||||
summary = f"发现 {len(all_violations)} 处违规"
|
|
||||||
if high_count > 0:
|
if high_count > 0:
|
||||||
summary += f"({high_count} 处高风险)"
|
summary += f"({high_count} 处高风险)"
|
||||||
|
|
||||||
@ -300,7 +349,8 @@ async def process_video_review(
|
|||||||
review_id,
|
review_id,
|
||||||
score=score,
|
score=score,
|
||||||
summary=summary,
|
summary=summary,
|
||||||
violations=all_violations,
|
violations=hard_violations,
|
||||||
|
soft_warnings=soft_warnings_data if soft_warnings_data else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@ -21,6 +21,9 @@ dependencies = [
|
|||||||
"openai>=1.12.0",
|
"openai>=1.12.0",
|
||||||
"cachetools>=5.3.0",
|
"cachetools>=5.3.0",
|
||||||
"sse-starlette>=2.0.0",
|
"sse-starlette>=2.0.0",
|
||||||
|
"pdfplumber>=0.10.0",
|
||||||
|
"python-docx>=1.1.0",
|
||||||
|
"openpyxl>=3.1.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user