- 新增 PlatformRule 模型 (draft/active/inactive 状态流转) - 新增文档解析服务 (PDF/Word/Excel → 纯文本) - 新增 4 个 API: 解析/确认/查询/删除平台规则 - 脚本审核优先从 DB 读取 active 规则,硬编码兜底 - 视频审核合并平台规则违禁词到检测列表 - Alembic 迁移 006: platform_rules 表 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
422 lines
14 KiB
Python
422 lines
14 KiB
Python
"""
|
||
脚本预审 API
|
||
"""
|
||
import re
|
||
from typing import Optional
|
||
from fastapi import APIRouter, Depends, Header
|
||
from sqlalchemy.ext.asyncio import AsyncSession
|
||
|
||
from app.database import get_db
|
||
from app.schemas.review import (
|
||
ScriptReviewRequest,
|
||
ScriptReviewResponse,
|
||
Violation,
|
||
ViolationType,
|
||
RiskLevel,
|
||
Position,
|
||
SoftRiskWarning,
|
||
SoftRiskAction,
|
||
)
|
||
from app.api.rules import (
|
||
get_whitelist_for_brand,
|
||
get_other_brands_whitelist_terms,
|
||
get_forbidden_words_for_tenant,
|
||
get_active_platform_rules,
|
||
_platform_rules,
|
||
)
|
||
from app.services.soft_risk import evaluate_soft_risk
|
||
from app.services.ai_service import AIServiceFactory
|
||
|
||
router = APIRouter(prefix="/scripts", tags=["scripts"])
|
||
|
||
# 内置违禁词库(广告极限词)
|
||
ABSOLUTE_WORDS = ["最好", "第一", "最佳", "绝对", "100%"]
|
||
|
||
# 功效词库(医疗/功效宣称)
|
||
EFFICACY_WORDS = ["根治", "治愈", "治疗", "药效", "疗效", "特效"]
|
||
|
||
# 广告语境关键词(用于判断是否为广告场景)
|
||
AD_CONTEXT_KEYWORDS = ["产品", "购买", "销量", "品质", "推荐", "价格", "优惠", "促销"]
|
||
|
||
|
||
def _is_ad_context(content: str, word: str) -> bool:
|
||
"""
|
||
判断是否为广告语境
|
||
|
||
规则:
|
||
- 如果内容中包含广告关键词,认为是广告语境
|
||
- 如果违禁词出现在明显的非广告句式中,不是广告语境
|
||
"""
|
||
# 非广告语境模式
|
||
non_ad_patterns = [
|
||
r"他是第一[个名位]", # 他是第一个/名
|
||
r"[是为]第一[个名位]", # 是第一个
|
||
r"最开心|最高兴|最难忘", # 情感表达
|
||
r"第一[次个].*[到来抵达]", # 第一次到达
|
||
]
|
||
|
||
for pattern in non_ad_patterns:
|
||
if re.search(pattern, content):
|
||
return False
|
||
|
||
# 检查是否包含广告关键词
|
||
return any(kw in content for kw in AD_CONTEXT_KEYWORDS)
|
||
|
||
|
||
def _check_selling_point_coverage(content: str, required_points: list[str]) -> list[str]:
|
||
"""
|
||
检查卖点覆盖情况
|
||
|
||
使用语义匹配而非精确匹配
|
||
"""
|
||
missing = []
|
||
|
||
# 卖点关键词映射
|
||
point_keywords = {
|
||
"品牌名称": ["品牌", "牌子", "品牌A", "品牌B"],
|
||
"使用方法": ["使用", "用法", "早晚", "每天", "一次", "涂抹", "喷洒"],
|
||
"功效说明": ["功效", "效果", "水润", "美白", "保湿", "滋润", "改善"],
|
||
}
|
||
|
||
for point in required_points:
|
||
# 精确匹配
|
||
if point in content:
|
||
continue
|
||
|
||
# 关键词匹配
|
||
keywords = point_keywords.get(point, [])
|
||
if any(kw in content for kw in keywords):
|
||
continue
|
||
|
||
missing.append(point)
|
||
|
||
return missing
|
||
|
||
|
||
@router.post("/review", response_model=ScriptReviewResponse)
|
||
async def review_script(
|
||
request: ScriptReviewRequest,
|
||
x_tenant_id: str = Header(..., alias="X-Tenant-ID"),
|
||
db: AsyncSession = Depends(get_db),
|
||
) -> ScriptReviewResponse:
|
||
"""
|
||
脚本预审
|
||
|
||
- 检测违禁词(支持语境感知)
|
||
- 检测功效词
|
||
- 检查必要卖点
|
||
- 应用白名单
|
||
- 可选 AI 深度分析
|
||
- 返回合规分数和修改建议
|
||
"""
|
||
violations = []
|
||
content = request.content
|
||
|
||
# 获取品牌白名单
|
||
whitelist = await get_whitelist_for_brand(x_tenant_id, request.brand_id, db)
|
||
|
||
# 获取租户自定义违禁词
|
||
tenant_forbidden_words = await get_forbidden_words_for_tenant(x_tenant_id, db)
|
||
|
||
# 1. 违禁词检测(广告极限词)
|
||
all_forbidden_words = ABSOLUTE_WORDS + [w["word"] for w in tenant_forbidden_words]
|
||
|
||
for word in all_forbidden_words:
|
||
# 白名单跳过
|
||
if word in whitelist:
|
||
continue
|
||
|
||
start = 0
|
||
while True:
|
||
pos = content.find(word, start)
|
||
if pos == -1:
|
||
break
|
||
|
||
# 语境感知:非广告语境跳过
|
||
if not _is_ad_context(content, word):
|
||
start = pos + 1
|
||
continue
|
||
|
||
violations.append(Violation(
|
||
type=ViolationType.FORBIDDEN_WORD,
|
||
content=word,
|
||
severity=RiskLevel.HIGH,
|
||
suggestion=f"建议删除或替换违禁词:{word}",
|
||
position=Position(start=pos, end=pos + len(word)),
|
||
))
|
||
start = pos + 1
|
||
|
||
# 2. 功效词检测
|
||
for word in EFFICACY_WORDS:
|
||
if word in whitelist:
|
||
continue
|
||
|
||
start = 0
|
||
while True:
|
||
pos = content.find(word, start)
|
||
if pos == -1:
|
||
break
|
||
|
||
violations.append(Violation(
|
||
type=ViolationType.EFFICACY_CLAIM,
|
||
content=word,
|
||
severity=RiskLevel.HIGH,
|
||
suggestion=f"功效宣称词违反广告法,建议删除:{word}",
|
||
position=Position(start=pos, end=pos + len(word)),
|
||
))
|
||
start = pos + 1
|
||
|
||
# 3. 检测其他品牌专属词(品牌安全风险)
|
||
other_brand_terms = await get_other_brands_whitelist_terms(x_tenant_id, request.brand_id, db)
|
||
for term, owner_brand in other_brand_terms:
|
||
if term in content:
|
||
violations.append(Violation(
|
||
type=ViolationType.BRAND_SAFETY,
|
||
content=term,
|
||
severity=RiskLevel.MEDIUM,
|
||
suggestion=f"使用了其他品牌的专属词汇:{term}",
|
||
position=Position(start=content.find(term), end=content.find(term) + len(term)),
|
||
))
|
||
|
||
# 3A. 平台规则违禁词(优先从 DB 读取,硬编码兜底)
|
||
already_checked = set(ABSOLUTE_WORDS + [w["word"] for w in tenant_forbidden_words])
|
||
platform_forbidden_words: list[str] = []
|
||
|
||
# 优先从 DB 获取品牌方上传的 active 平台规则
|
||
db_platform_rules = await get_active_platform_rules(
|
||
x_tenant_id, request.brand_id, request.platform.value, db,
|
||
)
|
||
if db_platform_rules:
|
||
platform_forbidden_words = db_platform_rules.get("forbidden_words", [])
|
||
else:
|
||
# 兜底:从硬编码 _platform_rules 读取
|
||
platform_rule = _platform_rules.get(request.platform.value)
|
||
if platform_rule:
|
||
for rule in platform_rule.get("rules", []):
|
||
if rule.get("type") == "forbidden_word":
|
||
platform_forbidden_words.extend(rule.get("words", []))
|
||
|
||
for word in platform_forbidden_words:
|
||
if word in already_checked or word in whitelist:
|
||
continue
|
||
start = 0
|
||
while True:
|
||
pos = content.find(word, start)
|
||
if pos == -1:
|
||
break
|
||
if not _is_ad_context(content, word):
|
||
start = pos + 1
|
||
continue
|
||
violations.append(Violation(
|
||
type=ViolationType.FORBIDDEN_WORD,
|
||
content=word,
|
||
severity=RiskLevel.MEDIUM,
|
||
suggestion=f"违反{request.platform.value}平台规则,建议删除:{word}",
|
||
position=Position(start=pos, end=pos + len(word)),
|
||
))
|
||
start = pos + 1
|
||
|
||
# 3B. Brief 黑名单词
|
||
if request.blacklist_words:
|
||
for item in request.blacklist_words:
|
||
word = item.get("word", "")
|
||
reason = item.get("reason", "")
|
||
if not word or word in whitelist:
|
||
continue
|
||
start_pos = 0
|
||
while True:
|
||
pos = content.find(word, start_pos)
|
||
if pos == -1:
|
||
break
|
||
suggestion = f"Brief 黑名单词:{word}"
|
||
if reason:
|
||
suggestion += f"({reason})"
|
||
violations.append(Violation(
|
||
type=ViolationType.FORBIDDEN_WORD,
|
||
content=word,
|
||
severity=RiskLevel.HIGH,
|
||
suggestion=suggestion,
|
||
position=Position(start=pos, end=pos + len(word)),
|
||
))
|
||
start_pos = pos + 1
|
||
|
||
# 4. 检查遗漏卖点
|
||
missing_points: list[str] | None = None
|
||
if request.required_points:
|
||
missing = _check_selling_point_coverage(content, request.required_points)
|
||
missing_points = missing if missing else []
|
||
|
||
# 5. 可选:AI 深度分析(返回 violations + warnings)
|
||
ai_violations, ai_warnings = await _ai_deep_analysis(x_tenant_id, content, db)
|
||
if ai_violations:
|
||
violations.extend(ai_violations)
|
||
|
||
# 6. 计算分数(按严重程度加权)
|
||
score = 100
|
||
for v in violations:
|
||
if v.severity == RiskLevel.HIGH:
|
||
score -= 25
|
||
elif v.severity == RiskLevel.MEDIUM:
|
||
score -= 15
|
||
else:
|
||
score -= 5
|
||
if missing_points:
|
||
score -= len(missing_points) * 5
|
||
score = max(0, score)
|
||
|
||
# 7. 生成摘要
|
||
parts = []
|
||
if violations:
|
||
parts.append(f"发现 {len(violations)} 处违规")
|
||
if missing_points:
|
||
parts.append(f"遗漏 {len(missing_points)} 个卖点")
|
||
|
||
if not parts:
|
||
summary = "脚本内容合规,未发现问题"
|
||
else:
|
||
summary = ",".join(parts)
|
||
|
||
# 8. 软性风控评估
|
||
soft_warnings: list[SoftRiskWarning] = []
|
||
if request.soft_risk_context:
|
||
soft_warnings = evaluate_soft_risk(request.soft_risk_context)
|
||
|
||
# 合并 AI 产出的 soft_warnings
|
||
if ai_warnings:
|
||
soft_warnings.extend(ai_warnings)
|
||
|
||
# 遗漏卖点也加入 soft_warnings
|
||
if missing_points:
|
||
soft_warnings.append(SoftRiskWarning(
|
||
code="missing_selling_points",
|
||
message=f"遗漏 {len(missing_points)} 个卖点:{', '.join(missing_points)}",
|
||
action_required=SoftRiskAction.NOTE,
|
||
blocking=False,
|
||
))
|
||
|
||
return ScriptReviewResponse(
|
||
score=score,
|
||
summary=summary,
|
||
violations=violations,
|
||
missing_points=missing_points,
|
||
soft_warnings=soft_warnings,
|
||
)
|
||
|
||
|
||
async def _ai_deep_analysis(
|
||
tenant_id: str,
|
||
content: str,
|
||
db: AsyncSession,
|
||
) -> tuple[list[Violation], list[SoftRiskWarning]]:
|
||
"""
|
||
使用 AI 进行深度分析
|
||
|
||
返回 (violations, soft_warnings)
|
||
AI 分析失败时返回空列表,降级到规则检测
|
||
"""
|
||
try:
|
||
# 获取 AI 客户端
|
||
ai_client = await AIServiceFactory.get_client(tenant_id, db)
|
||
if not ai_client:
|
||
return [], []
|
||
|
||
# 获取模型配置
|
||
config = await AIServiceFactory.get_config(tenant_id, db)
|
||
if not config:
|
||
return [], []
|
||
|
||
text_model = config.models.get("text", "gpt-4o")
|
||
|
||
# 构建分析提示(两类输出)
|
||
analysis_prompt = f"""作为广告合规审核专家,请分析以下广告脚本内容,检测潜在的合规风险:
|
||
|
||
脚本内容:
|
||
{content}
|
||
|
||
请检查以下方面:
|
||
1. 是否存在隐性的虚假宣传(如暗示疗效但不直接说明)
|
||
2. 是否存在容易引起误解的表述
|
||
3. 是否存在夸大描述
|
||
4. 是否存在可能违反广告法的其他内容
|
||
|
||
请以 JSON 数组返回,每项包含:
|
||
- category: "violation"(硬性违规,明确违法/违规)或 "warning"(软性提醒,需人工判断)
|
||
- type: 违规类型 (forbidden_word/efficacy_claim/brand_safety)
|
||
- content: 问题内容
|
||
- severity: 严重程度 (high/medium/low)
|
||
- suggestion: 修改建议
|
||
|
||
分类标准:
|
||
- violation: 违禁词、功效宣称、品牌安全等明确违规
|
||
- warning: 夸大描述、易误解表述、潜在风险
|
||
|
||
如果未发现问题,返回空数组 []
|
||
|
||
请只返回 JSON 数组,不要包含其他内容。"""
|
||
|
||
response = await ai_client.chat_completion(
|
||
messages=[{"role": "user", "content": analysis_prompt}],
|
||
model=text_model,
|
||
temperature=0.3,
|
||
max_tokens=1000,
|
||
)
|
||
|
||
# 解析 AI 响应
|
||
import json
|
||
try:
|
||
# 清理响应内容(移除可能的 markdown 标记)
|
||
response_content = response.content.strip()
|
||
if response_content.startswith("```"):
|
||
response_content = response_content.split("\n", 1)[1]
|
||
if response_content.endswith("```"):
|
||
response_content = response_content.rsplit("\n", 1)[0]
|
||
|
||
ai_results = json.loads(response_content)
|
||
|
||
violations = []
|
||
warnings = []
|
||
for item in ai_results:
|
||
category = item.get("category", "violation") # 默认当硬性违规(安全兜底)
|
||
|
||
violation_type = item.get("type", "forbidden_word")
|
||
if violation_type == "forbidden_word":
|
||
vtype = ViolationType.FORBIDDEN_WORD
|
||
elif violation_type == "efficacy_claim":
|
||
vtype = ViolationType.EFFICACY_CLAIM
|
||
else:
|
||
vtype = ViolationType.BRAND_SAFETY
|
||
|
||
severity = item.get("severity", "medium")
|
||
if severity == "high":
|
||
slevel = RiskLevel.HIGH
|
||
elif severity == "low":
|
||
slevel = RiskLevel.LOW
|
||
else:
|
||
slevel = RiskLevel.MEDIUM
|
||
|
||
if category == "warning":
|
||
# 软性提醒 → SoftRiskWarning
|
||
warnings.append(SoftRiskWarning(
|
||
code="ai_warning",
|
||
message=f"{item.get('content', '')}: {item.get('suggestion', '建议修改')}",
|
||
action_required=SoftRiskAction.NOTE,
|
||
blocking=False,
|
||
context={"type": violation_type, "severity": severity},
|
||
))
|
||
else:
|
||
# 硬性违规 → Violation
|
||
violations.append(Violation(
|
||
type=vtype,
|
||
content=item.get("content", ""),
|
||
severity=slevel,
|
||
suggestion=item.get("suggestion", "建议修改"),
|
||
))
|
||
|
||
return violations, warnings
|
||
|
||
except json.JSONDecodeError:
|
||
return [], []
|
||
|
||
except Exception:
|
||
return [], []
|