""" 脚本预审 API """ import re from typing import Optional from fastapi import APIRouter, Depends, Header from sqlalchemy.ext.asyncio import AsyncSession from app.database import get_db from app.schemas.review import ( ScriptReviewRequest, ScriptReviewResponse, Violation, ViolationType, RiskLevel, Position, SoftRiskWarning, SoftRiskAction, ) from app.api.rules import ( get_whitelist_for_brand, get_other_brands_whitelist_terms, get_forbidden_words_for_tenant, get_active_platform_rules, _platform_rules, ) from app.services.soft_risk import evaluate_soft_risk from app.services.ai_service import AIServiceFactory router = APIRouter(prefix="/scripts", tags=["scripts"]) # 内置违禁词库(广告极限词) ABSOLUTE_WORDS = ["最好", "第一", "最佳", "绝对", "100%"] # 功效词库(医疗/功效宣称) EFFICACY_WORDS = ["根治", "治愈", "治疗", "药效", "疗效", "特效"] # 广告语境关键词(用于判断是否为广告场景) AD_CONTEXT_KEYWORDS = ["产品", "购买", "销量", "品质", "推荐", "价格", "优惠", "促销"] def _is_ad_context(content: str, word: str) -> bool: """ 判断是否为广告语境 规则: - 如果内容中包含广告关键词,认为是广告语境 - 如果违禁词出现在明显的非广告句式中,不是广告语境 """ # 非广告语境模式 non_ad_patterns = [ r"他是第一[个名位]", # 他是第一个/名 r"[是为]第一[个名位]", # 是第一个 r"最开心|最高兴|最难忘", # 情感表达 r"第一[次个].*[到来抵达]", # 第一次到达 ] for pattern in non_ad_patterns: if re.search(pattern, content): return False # 检查是否包含广告关键词 return any(kw in content for kw in AD_CONTEXT_KEYWORDS) def _check_selling_point_coverage(content: str, required_points: list[str]) -> list[str]: """ 检查卖点覆盖情况 使用语义匹配而非精确匹配 """ missing = [] # 卖点关键词映射 point_keywords = { "品牌名称": ["品牌", "牌子", "品牌A", "品牌B"], "使用方法": ["使用", "用法", "早晚", "每天", "一次", "涂抹", "喷洒"], "功效说明": ["功效", "效果", "水润", "美白", "保湿", "滋润", "改善"], } for point in required_points: # 精确匹配 if point in content: continue # 关键词匹配 keywords = point_keywords.get(point, []) if any(kw in content for kw in keywords): continue missing.append(point) return missing @router.post("/review", response_model=ScriptReviewResponse) async def review_script( request: ScriptReviewRequest, x_tenant_id: str = Header(..., alias="X-Tenant-ID"), db: AsyncSession = Depends(get_db), ) -> ScriptReviewResponse: """ 脚本预审 - 检测违禁词(支持语境感知) - 检测功效词 - 检查必要卖点 - 应用白名单 - 可选 AI 深度分析 - 返回合规分数和修改建议 """ violations = [] content = request.content # 获取品牌白名单 whitelist = await get_whitelist_for_brand(x_tenant_id, request.brand_id, db) # 获取租户自定义违禁词 tenant_forbidden_words = await get_forbidden_words_for_tenant(x_tenant_id, db) # 1. 违禁词检测(广告极限词) all_forbidden_words = ABSOLUTE_WORDS + [w["word"] for w in tenant_forbidden_words] for word in all_forbidden_words: # 白名单跳过 if word in whitelist: continue start = 0 while True: pos = content.find(word, start) if pos == -1: break # 语境感知:非广告语境跳过 if not _is_ad_context(content, word): start = pos + 1 continue violations.append(Violation( type=ViolationType.FORBIDDEN_WORD, content=word, severity=RiskLevel.HIGH, suggestion=f"建议删除或替换违禁词:{word}", position=Position(start=pos, end=pos + len(word)), )) start = pos + 1 # 2. 功效词检测 for word in EFFICACY_WORDS: if word in whitelist: continue start = 0 while True: pos = content.find(word, start) if pos == -1: break violations.append(Violation( type=ViolationType.EFFICACY_CLAIM, content=word, severity=RiskLevel.HIGH, suggestion=f"功效宣称词违反广告法,建议删除:{word}", position=Position(start=pos, end=pos + len(word)), )) start = pos + 1 # 3. 检测其他品牌专属词(品牌安全风险) other_brand_terms = await get_other_brands_whitelist_terms(x_tenant_id, request.brand_id, db) for term, owner_brand in other_brand_terms: if term in content: violations.append(Violation( type=ViolationType.BRAND_SAFETY, content=term, severity=RiskLevel.MEDIUM, suggestion=f"使用了其他品牌的专属词汇:{term}", position=Position(start=content.find(term), end=content.find(term) + len(term)), )) # 3A. 平台规则违禁词(优先从 DB 读取,硬编码兜底) already_checked = set(ABSOLUTE_WORDS + [w["word"] for w in tenant_forbidden_words]) platform_forbidden_words: list[str] = [] # 优先从 DB 获取品牌方上传的 active 平台规则 db_platform_rules = await get_active_platform_rules( x_tenant_id, request.brand_id, request.platform.value, db, ) if db_platform_rules: platform_forbidden_words = db_platform_rules.get("forbidden_words", []) else: # 兜底:从硬编码 _platform_rules 读取 platform_rule = _platform_rules.get(request.platform.value) if platform_rule: for rule in platform_rule.get("rules", []): if rule.get("type") == "forbidden_word": platform_forbidden_words.extend(rule.get("words", [])) for word in platform_forbidden_words: if word in already_checked or word in whitelist: continue start = 0 while True: pos = content.find(word, start) if pos == -1: break if not _is_ad_context(content, word): start = pos + 1 continue violations.append(Violation( type=ViolationType.FORBIDDEN_WORD, content=word, severity=RiskLevel.MEDIUM, suggestion=f"违反{request.platform.value}平台规则,建议删除:{word}", position=Position(start=pos, end=pos + len(word)), )) start = pos + 1 # 3B. Brief 黑名单词 if request.blacklist_words: for item in request.blacklist_words: word = item.get("word", "") reason = item.get("reason", "") if not word or word in whitelist: continue start_pos = 0 while True: pos = content.find(word, start_pos) if pos == -1: break suggestion = f"Brief 黑名单词:{word}" if reason: suggestion += f"({reason})" violations.append(Violation( type=ViolationType.FORBIDDEN_WORD, content=word, severity=RiskLevel.HIGH, suggestion=suggestion, position=Position(start=pos, end=pos + len(word)), )) start_pos = pos + 1 # 4. 检查遗漏卖点 missing_points: list[str] | None = None if request.required_points: missing = _check_selling_point_coverage(content, request.required_points) missing_points = missing if missing else [] # 5. 可选:AI 深度分析(返回 violations + warnings) ai_violations, ai_warnings = await _ai_deep_analysis(x_tenant_id, content, db) if ai_violations: violations.extend(ai_violations) # 6. 计算分数(按严重程度加权) score = 100 for v in violations: if v.severity == RiskLevel.HIGH: score -= 25 elif v.severity == RiskLevel.MEDIUM: score -= 15 else: score -= 5 if missing_points: score -= len(missing_points) * 5 score = max(0, score) # 7. 生成摘要 parts = [] if violations: parts.append(f"发现 {len(violations)} 处违规") if missing_points: parts.append(f"遗漏 {len(missing_points)} 个卖点") if not parts: summary = "脚本内容合规,未发现问题" else: summary = ",".join(parts) # 8. 软性风控评估 soft_warnings: list[SoftRiskWarning] = [] if request.soft_risk_context: soft_warnings = evaluate_soft_risk(request.soft_risk_context) # 合并 AI 产出的 soft_warnings if ai_warnings: soft_warnings.extend(ai_warnings) # 遗漏卖点也加入 soft_warnings if missing_points: soft_warnings.append(SoftRiskWarning( code="missing_selling_points", message=f"遗漏 {len(missing_points)} 个卖点:{', '.join(missing_points)}", action_required=SoftRiskAction.NOTE, blocking=False, )) return ScriptReviewResponse( score=score, summary=summary, violations=violations, missing_points=missing_points, soft_warnings=soft_warnings, ) async def _ai_deep_analysis( tenant_id: str, content: str, db: AsyncSession, ) -> tuple[list[Violation], list[SoftRiskWarning]]: """ 使用 AI 进行深度分析 返回 (violations, soft_warnings) AI 分析失败时返回空列表,降级到规则检测 """ try: # 获取 AI 客户端 ai_client = await AIServiceFactory.get_client(tenant_id, db) if not ai_client: return [], [] # 获取模型配置 config = await AIServiceFactory.get_config(tenant_id, db) if not config: return [], [] text_model = config.models.get("text", "gpt-4o") # 构建分析提示(两类输出) analysis_prompt = f"""作为广告合规审核专家,请分析以下广告脚本内容,检测潜在的合规风险: 脚本内容: {content} 请检查以下方面: 1. 是否存在隐性的虚假宣传(如暗示疗效但不直接说明) 2. 是否存在容易引起误解的表述 3. 是否存在夸大描述 4. 是否存在可能违反广告法的其他内容 请以 JSON 数组返回,每项包含: - category: "violation"(硬性违规,明确违法/违规)或 "warning"(软性提醒,需人工判断) - type: 违规类型 (forbidden_word/efficacy_claim/brand_safety) - content: 问题内容 - severity: 严重程度 (high/medium/low) - suggestion: 修改建议 分类标准: - violation: 违禁词、功效宣称、品牌安全等明确违规 - warning: 夸大描述、易误解表述、潜在风险 如果未发现问题,返回空数组 [] 请只返回 JSON 数组,不要包含其他内容。""" response = await ai_client.chat_completion( messages=[{"role": "user", "content": analysis_prompt}], model=text_model, temperature=0.3, max_tokens=1000, ) # 解析 AI 响应 import json try: # 清理响应内容(移除可能的 markdown 标记) response_content = response.content.strip() if response_content.startswith("```"): response_content = response_content.split("\n", 1)[1] if response_content.endswith("```"): response_content = response_content.rsplit("\n", 1)[0] ai_results = json.loads(response_content) violations = [] warnings = [] for item in ai_results: category = item.get("category", "violation") # 默认当硬性违规(安全兜底) violation_type = item.get("type", "forbidden_word") if violation_type == "forbidden_word": vtype = ViolationType.FORBIDDEN_WORD elif violation_type == "efficacy_claim": vtype = ViolationType.EFFICACY_CLAIM else: vtype = ViolationType.BRAND_SAFETY severity = item.get("severity", "medium") if severity == "high": slevel = RiskLevel.HIGH elif severity == "low": slevel = RiskLevel.LOW else: slevel = RiskLevel.MEDIUM if category == "warning": # 软性提醒 → SoftRiskWarning warnings.append(SoftRiskWarning( code="ai_warning", message=f"{item.get('content', '')}: {item.get('suggestion', '建议修改')}", action_required=SoftRiskAction.NOTE, blocking=False, context={"type": violation_type, "severity": severity}, )) else: # 硬性违规 → Violation violations.append(Violation( type=vtype, content=item.get("content", ""), severity=slevel, suggestion=item.get("suggestion", "建议修改"), )) return violations, warnings except json.JSONDecodeError: return [], [] except Exception: return [], []