Your Name fed361b9b3 feat: 平台规则从硬编码改为品牌方上传文档 + AI 解析
- 新增 PlatformRule 模型 (draft/active/inactive 状态流转)
- 新增文档解析服务 (PDF/Word/Excel → 纯文本)
- 新增 4 个 API: 解析/确认/查询/删除平台规则
- 脚本审核优先从 DB 读取 active 规则,硬编码兜底
- 视频审核合并平台规则违禁词到检测列表
- Alembic 迁移 006: platform_rules 表

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 13:23:11 +08:00

422 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
脚本预审 API
"""
import re
from typing import Optional
from fastapi import APIRouter, Depends, Header
from sqlalchemy.ext.asyncio import AsyncSession
from app.database import get_db
from app.schemas.review import (
ScriptReviewRequest,
ScriptReviewResponse,
Violation,
ViolationType,
RiskLevel,
Position,
SoftRiskWarning,
SoftRiskAction,
)
from app.api.rules import (
get_whitelist_for_brand,
get_other_brands_whitelist_terms,
get_forbidden_words_for_tenant,
get_active_platform_rules,
_platform_rules,
)
from app.services.soft_risk import evaluate_soft_risk
from app.services.ai_service import AIServiceFactory
router = APIRouter(prefix="/scripts", tags=["scripts"])
# 内置违禁词库(广告极限词)
ABSOLUTE_WORDS = ["最好", "第一", "最佳", "绝对", "100%"]
# 功效词库(医疗/功效宣称)
EFFICACY_WORDS = ["根治", "治愈", "治疗", "药效", "疗效", "特效"]
# 广告语境关键词(用于判断是否为广告场景)
AD_CONTEXT_KEYWORDS = ["产品", "购买", "销量", "品质", "推荐", "价格", "优惠", "促销"]
def _is_ad_context(content: str, word: str) -> bool:
"""
判断是否为广告语境
规则:
- 如果内容中包含广告关键词,认为是广告语境
- 如果违禁词出现在明显的非广告句式中,不是广告语境
"""
# 非广告语境模式
non_ad_patterns = [
r"他是第一[个名位]", # 他是第一个/名
r"[是为]第一[个名位]", # 是第一个
r"最开心|最高兴|最难忘", # 情感表达
r"第一[次个].*[到来抵达]", # 第一次到达
]
for pattern in non_ad_patterns:
if re.search(pattern, content):
return False
# 检查是否包含广告关键词
return any(kw in content for kw in AD_CONTEXT_KEYWORDS)
def _check_selling_point_coverage(content: str, required_points: list[str]) -> list[str]:
"""
检查卖点覆盖情况
使用语义匹配而非精确匹配
"""
missing = []
# 卖点关键词映射
point_keywords = {
"品牌名称": ["品牌", "牌子", "品牌A", "品牌B"],
"使用方法": ["使用", "用法", "早晚", "每天", "一次", "涂抹", "喷洒"],
"功效说明": ["功效", "效果", "水润", "美白", "保湿", "滋润", "改善"],
}
for point in required_points:
# 精确匹配
if point in content:
continue
# 关键词匹配
keywords = point_keywords.get(point, [])
if any(kw in content for kw in keywords):
continue
missing.append(point)
return missing
@router.post("/review", response_model=ScriptReviewResponse)
async def review_script(
request: ScriptReviewRequest,
x_tenant_id: str = Header(..., alias="X-Tenant-ID"),
db: AsyncSession = Depends(get_db),
) -> ScriptReviewResponse:
"""
脚本预审
- 检测违禁词(支持语境感知)
- 检测功效词
- 检查必要卖点
- 应用白名单
- 可选 AI 深度分析
- 返回合规分数和修改建议
"""
violations = []
content = request.content
# 获取品牌白名单
whitelist = await get_whitelist_for_brand(x_tenant_id, request.brand_id, db)
# 获取租户自定义违禁词
tenant_forbidden_words = await get_forbidden_words_for_tenant(x_tenant_id, db)
# 1. 违禁词检测(广告极限词)
all_forbidden_words = ABSOLUTE_WORDS + [w["word"] for w in tenant_forbidden_words]
for word in all_forbidden_words:
# 白名单跳过
if word in whitelist:
continue
start = 0
while True:
pos = content.find(word, start)
if pos == -1:
break
# 语境感知:非广告语境跳过
if not _is_ad_context(content, word):
start = pos + 1
continue
violations.append(Violation(
type=ViolationType.FORBIDDEN_WORD,
content=word,
severity=RiskLevel.HIGH,
suggestion=f"建议删除或替换违禁词:{word}",
position=Position(start=pos, end=pos + len(word)),
))
start = pos + 1
# 2. 功效词检测
for word in EFFICACY_WORDS:
if word in whitelist:
continue
start = 0
while True:
pos = content.find(word, start)
if pos == -1:
break
violations.append(Violation(
type=ViolationType.EFFICACY_CLAIM,
content=word,
severity=RiskLevel.HIGH,
suggestion=f"功效宣称词违反广告法,建议删除:{word}",
position=Position(start=pos, end=pos + len(word)),
))
start = pos + 1
# 3. 检测其他品牌专属词(品牌安全风险)
other_brand_terms = await get_other_brands_whitelist_terms(x_tenant_id, request.brand_id, db)
for term, owner_brand in other_brand_terms:
if term in content:
violations.append(Violation(
type=ViolationType.BRAND_SAFETY,
content=term,
severity=RiskLevel.MEDIUM,
suggestion=f"使用了其他品牌的专属词汇:{term}",
position=Position(start=content.find(term), end=content.find(term) + len(term)),
))
# 3A. 平台规则违禁词(优先从 DB 读取,硬编码兜底)
already_checked = set(ABSOLUTE_WORDS + [w["word"] for w in tenant_forbidden_words])
platform_forbidden_words: list[str] = []
# 优先从 DB 获取品牌方上传的 active 平台规则
db_platform_rules = await get_active_platform_rules(
x_tenant_id, request.brand_id, request.platform.value, db,
)
if db_platform_rules:
platform_forbidden_words = db_platform_rules.get("forbidden_words", [])
else:
# 兜底:从硬编码 _platform_rules 读取
platform_rule = _platform_rules.get(request.platform.value)
if platform_rule:
for rule in platform_rule.get("rules", []):
if rule.get("type") == "forbidden_word":
platform_forbidden_words.extend(rule.get("words", []))
for word in platform_forbidden_words:
if word in already_checked or word in whitelist:
continue
start = 0
while True:
pos = content.find(word, start)
if pos == -1:
break
if not _is_ad_context(content, word):
start = pos + 1
continue
violations.append(Violation(
type=ViolationType.FORBIDDEN_WORD,
content=word,
severity=RiskLevel.MEDIUM,
suggestion=f"违反{request.platform.value}平台规则,建议删除:{word}",
position=Position(start=pos, end=pos + len(word)),
))
start = pos + 1
# 3B. Brief 黑名单词
if request.blacklist_words:
for item in request.blacklist_words:
word = item.get("word", "")
reason = item.get("reason", "")
if not word or word in whitelist:
continue
start_pos = 0
while True:
pos = content.find(word, start_pos)
if pos == -1:
break
suggestion = f"Brief 黑名单词:{word}"
if reason:
suggestion += f"{reason}"
violations.append(Violation(
type=ViolationType.FORBIDDEN_WORD,
content=word,
severity=RiskLevel.HIGH,
suggestion=suggestion,
position=Position(start=pos, end=pos + len(word)),
))
start_pos = pos + 1
# 4. 检查遗漏卖点
missing_points: list[str] | None = None
if request.required_points:
missing = _check_selling_point_coverage(content, request.required_points)
missing_points = missing if missing else []
# 5. 可选AI 深度分析(返回 violations + warnings
ai_violations, ai_warnings = await _ai_deep_analysis(x_tenant_id, content, db)
if ai_violations:
violations.extend(ai_violations)
# 6. 计算分数(按严重程度加权)
score = 100
for v in violations:
if v.severity == RiskLevel.HIGH:
score -= 25
elif v.severity == RiskLevel.MEDIUM:
score -= 15
else:
score -= 5
if missing_points:
score -= len(missing_points) * 5
score = max(0, score)
# 7. 生成摘要
parts = []
if violations:
parts.append(f"发现 {len(violations)} 处违规")
if missing_points:
parts.append(f"遗漏 {len(missing_points)} 个卖点")
if not parts:
summary = "脚本内容合规,未发现问题"
else:
summary = "".join(parts)
# 8. 软性风控评估
soft_warnings: list[SoftRiskWarning] = []
if request.soft_risk_context:
soft_warnings = evaluate_soft_risk(request.soft_risk_context)
# 合并 AI 产出的 soft_warnings
if ai_warnings:
soft_warnings.extend(ai_warnings)
# 遗漏卖点也加入 soft_warnings
if missing_points:
soft_warnings.append(SoftRiskWarning(
code="missing_selling_points",
message=f"遗漏 {len(missing_points)} 个卖点:{', '.join(missing_points)}",
action_required=SoftRiskAction.NOTE,
blocking=False,
))
return ScriptReviewResponse(
score=score,
summary=summary,
violations=violations,
missing_points=missing_points,
soft_warnings=soft_warnings,
)
async def _ai_deep_analysis(
tenant_id: str,
content: str,
db: AsyncSession,
) -> tuple[list[Violation], list[SoftRiskWarning]]:
"""
使用 AI 进行深度分析
返回 (violations, soft_warnings)
AI 分析失败时返回空列表,降级到规则检测
"""
try:
# 获取 AI 客户端
ai_client = await AIServiceFactory.get_client(tenant_id, db)
if not ai_client:
return [], []
# 获取模型配置
config = await AIServiceFactory.get_config(tenant_id, db)
if not config:
return [], []
text_model = config.models.get("text", "gpt-4o")
# 构建分析提示(两类输出)
analysis_prompt = f"""作为广告合规审核专家,请分析以下广告脚本内容,检测潜在的合规风险:
脚本内容:
{content}
请检查以下方面:
1. 是否存在隐性的虚假宣传(如暗示疗效但不直接说明)
2. 是否存在容易引起误解的表述
3. 是否存在夸大描述
4. 是否存在可能违反广告法的其他内容
请以 JSON 数组返回,每项包含:
- category: "violation"(硬性违规,明确违法/违规)或 "warning"(软性提醒,需人工判断)
- type: 违规类型 (forbidden_word/efficacy_claim/brand_safety)
- content: 问题内容
- severity: 严重程度 (high/medium/low)
- suggestion: 修改建议
分类标准:
- violation: 违禁词、功效宣称、品牌安全等明确违规
- warning: 夸大描述、易误解表述、潜在风险
如果未发现问题,返回空数组 []
请只返回 JSON 数组,不要包含其他内容。"""
response = await ai_client.chat_completion(
messages=[{"role": "user", "content": analysis_prompt}],
model=text_model,
temperature=0.3,
max_tokens=1000,
)
# 解析 AI 响应
import json
try:
# 清理响应内容(移除可能的 markdown 标记)
response_content = response.content.strip()
if response_content.startswith("```"):
response_content = response_content.split("\n", 1)[1]
if response_content.endswith("```"):
response_content = response_content.rsplit("\n", 1)[0]
ai_results = json.loads(response_content)
violations = []
warnings = []
for item in ai_results:
category = item.get("category", "violation") # 默认当硬性违规(安全兜底)
violation_type = item.get("type", "forbidden_word")
if violation_type == "forbidden_word":
vtype = ViolationType.FORBIDDEN_WORD
elif violation_type == "efficacy_claim":
vtype = ViolationType.EFFICACY_CLAIM
else:
vtype = ViolationType.BRAND_SAFETY
severity = item.get("severity", "medium")
if severity == "high":
slevel = RiskLevel.HIGH
elif severity == "low":
slevel = RiskLevel.LOW
else:
slevel = RiskLevel.MEDIUM
if category == "warning":
# 软性提醒 → SoftRiskWarning
warnings.append(SoftRiskWarning(
code="ai_warning",
message=f"{item.get('content', '')}: {item.get('suggestion', '建议修改')}",
action_required=SoftRiskAction.NOTE,
blocking=False,
context={"type": violation_type, "severity": severity},
))
else:
# 硬性违规 → Violation
violations.append(Violation(
type=vtype,
content=item.get("content", ""),
severity=slevel,
suggestion=item.get("suggestion", "建议修改"),
))
return violations, warnings
except json.JSONDecodeError:
return [], []
except Exception:
return [], []