实现以下模块并通过全部测试 (150 passed, 92.65% coverage):
- validators.py: 数据验证器 (Brief/视频/审核决策/申诉/时间戳/UUID)
- timestamp_align.py: 多模态时间戳对齐 (ASR/OCR/CV 融合)
- rule_engine.py: 规则引擎 (违禁词检测/语境感知/规则版本管理)
- brief_parser.py: Brief 解析 (卖点/禁忌词/时序要求/品牌调性提取)
- video_auditor.py: 视频审核 (文件验证/ASR/OCR/Logo检测/合规检查)
验收标准达成:
- 违禁词召回率 ≥ 95%
- 误报率 ≤ 5%
- 时长统计误差 ≤ 0.5秒
- 语境感知检测 ("最开心的一天" 不误判)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
271 lines
8.2 KiB
Python
271 lines
8.2 KiB
Python
"""
|
|
数据验证器模块
|
|
|
|
提供所有输入数据的格式和约束验证
|
|
"""
|
|
|
|
import re
|
|
import uuid
|
|
from dataclasses import dataclass
|
|
from typing import Any
|
|
|
|
|
|
@dataclass
|
|
class ValidationResult:
|
|
"""验证结果"""
|
|
is_valid: bool
|
|
error_message: str = ""
|
|
errors: list[str] | None = None
|
|
|
|
|
|
class BriefValidator:
|
|
"""Brief 数据验证器"""
|
|
|
|
# 支持的平台列表
|
|
SUPPORTED_PLATFORMS = {"douyin", "xiaohongshu", "bilibili", "kuaishou"}
|
|
|
|
# 支持的区域列表
|
|
SUPPORTED_REGIONS = {"mainland_china", "hk_tw", "overseas"}
|
|
|
|
def validate_platform(self, platform: str | None) -> ValidationResult:
|
|
"""验证平台"""
|
|
if not platform:
|
|
return ValidationResult(is_valid=False, error_message="平台不能为空")
|
|
|
|
if platform not in self.SUPPORTED_PLATFORMS:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message=f"不支持的平台: {platform}"
|
|
)
|
|
|
|
return ValidationResult(is_valid=True)
|
|
|
|
def validate_region(self, region: str | None) -> ValidationResult:
|
|
"""验证区域"""
|
|
if not region:
|
|
return ValidationResult(is_valid=False, error_message="区域不能为空")
|
|
|
|
if region not in self.SUPPORTED_REGIONS:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message=f"不支持的区域: {region}"
|
|
)
|
|
|
|
return ValidationResult(is_valid=True)
|
|
|
|
def validate_selling_points(self, selling_points: list[Any]) -> ValidationResult:
|
|
"""验证卖点结构"""
|
|
if not isinstance(selling_points, list):
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message="卖点必须是列表"
|
|
)
|
|
|
|
for i, sp in enumerate(selling_points):
|
|
if not isinstance(sp, dict):
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message=f"卖点 {i} 格式错误,必须是字典"
|
|
)
|
|
|
|
if "text" not in sp or not sp.get("text"):
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message=f"卖点 {i} 缺少 text 字段或 text 为空"
|
|
)
|
|
|
|
if "priority" not in sp:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message=f"卖点 {i} 缺少 priority 字段"
|
|
)
|
|
|
|
return ValidationResult(is_valid=True)
|
|
|
|
|
|
class VideoValidator:
|
|
"""视频数据验证器"""
|
|
|
|
# 最大时长限制(秒)
|
|
MAX_DURATION_SECONDS = 1800 # 30 分钟
|
|
|
|
# 最小分辨率
|
|
MIN_WIDTH = 720
|
|
MIN_HEIGHT = 720
|
|
|
|
def validate_duration(self, duration_seconds: int) -> ValidationResult:
|
|
"""验证视频时长"""
|
|
if duration_seconds <= 0:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message="视频时长必须大于 0"
|
|
)
|
|
|
|
if duration_seconds > self.MAX_DURATION_SECONDS:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message=f"视频时长超过限制 {self.MAX_DURATION_SECONDS} 秒"
|
|
)
|
|
|
|
return ValidationResult(is_valid=True)
|
|
|
|
def validate_resolution(self, resolution: str) -> ValidationResult:
|
|
"""验证分辨率"""
|
|
try:
|
|
width, height = map(int, resolution.lower().split("x"))
|
|
except (ValueError, AttributeError):
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message="分辨率格式错误,应为 WIDTHxHEIGHT"
|
|
)
|
|
|
|
# 取较小值判断(支持横屏和竖屏)
|
|
min_dimension = min(width, height)
|
|
|
|
if min_dimension < self.MIN_WIDTH:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message=f"分辨率过低,最小要求 {self.MIN_WIDTH}p"
|
|
)
|
|
|
|
return ValidationResult(is_valid=True)
|
|
|
|
|
|
class ReviewDecisionValidator:
|
|
"""审核决策验证器"""
|
|
|
|
VALID_DECISIONS = {"passed", "rejected", "force_passed"}
|
|
|
|
def validate_decision_type(self, decision: str | None) -> ValidationResult:
|
|
"""验证决策类型"""
|
|
if not decision:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message="决策类型不能为空"
|
|
)
|
|
|
|
if decision not in self.VALID_DECISIONS:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message=f"无效的决策类型: {decision}"
|
|
)
|
|
|
|
return ValidationResult(is_valid=True)
|
|
|
|
def validate(self, request: dict[str, Any]) -> ValidationResult:
|
|
"""验证完整的审核决策请求"""
|
|
decision = request.get("decision")
|
|
|
|
# 验证决策类型
|
|
decision_result = self.validate_decision_type(decision)
|
|
if not decision_result.is_valid:
|
|
return decision_result
|
|
|
|
# 强制通过必须填写原因
|
|
if decision == "force_passed":
|
|
reason = request.get("force_pass_reason", "")
|
|
if not reason or not reason.strip():
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message="强制通过必须填写原因"
|
|
)
|
|
|
|
# 驳回必须选择违规项
|
|
if decision == "rejected":
|
|
violations = request.get("selected_violations", [])
|
|
if not violations:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message="驳回必须选择至少一个违规项"
|
|
)
|
|
|
|
return ValidationResult(is_valid=True)
|
|
|
|
|
|
class AppealValidator:
|
|
"""申诉验证器"""
|
|
|
|
MIN_REASON_LENGTH = 10 # 最少 10 个字
|
|
|
|
def validate_reason(self, reason: str) -> ValidationResult:
|
|
"""验证申诉理由长度"""
|
|
if not reason:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message="申诉理由不能为空"
|
|
)
|
|
|
|
if len(reason) < self.MIN_REASON_LENGTH:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message=f"申诉理由至少 {self.MIN_REASON_LENGTH} 个字"
|
|
)
|
|
|
|
return ValidationResult(is_valid=True)
|
|
|
|
def validate_token_available(self, user_id: str, token_count: int = 0) -> ValidationResult:
|
|
"""验证申诉令牌是否可用"""
|
|
# 这里简化实现,实际应查询数据库
|
|
if token_count <= 0:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message="申诉次数已用完"
|
|
)
|
|
|
|
return ValidationResult(is_valid=True, error_message="", errors=None)
|
|
|
|
|
|
class TimestampValidator:
|
|
"""时间戳验证器"""
|
|
|
|
def validate_range(
|
|
self,
|
|
timestamp_ms: int,
|
|
video_duration_ms: int
|
|
) -> ValidationResult:
|
|
"""验证时间戳范围"""
|
|
if timestamp_ms < 0:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message="时间戳不能为负数"
|
|
)
|
|
|
|
if timestamp_ms > video_duration_ms:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message="时间戳超出视频时长"
|
|
)
|
|
|
|
return ValidationResult(is_valid=True)
|
|
|
|
def validate_order(self, start: int, end: int) -> ValidationResult:
|
|
"""验证时间戳顺序 - start < end"""
|
|
if start >= end:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message="开始时间必须小于结束时间"
|
|
)
|
|
|
|
return ValidationResult(is_valid=True)
|
|
|
|
|
|
class UUIDValidator:
|
|
"""UUID 验证器"""
|
|
|
|
def validate(self, uuid_str: str) -> ValidationResult:
|
|
"""验证 UUID 格式"""
|
|
if not uuid_str:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message="UUID 不能为空"
|
|
)
|
|
|
|
try:
|
|
uuid.UUID(uuid_str)
|
|
return ValidationResult(is_valid=True)
|
|
except ValueError:
|
|
return ValidationResult(
|
|
is_valid=False,
|
|
error_message="无效的 UUID 格式"
|
|
)
|