videos1.0/backend/app/utils/validators.py
Your Name e77af7f8f0 feat: 实现 TDD 绿色阶段核心模块
实现以下模块并通过全部测试 (150 passed, 92.65% coverage):

- validators.py: 数据验证器 (Brief/视频/审核决策/申诉/时间戳/UUID)
- timestamp_align.py: 多模态时间戳对齐 (ASR/OCR/CV 融合)
- rule_engine.py: 规则引擎 (违禁词检测/语境感知/规则版本管理)
- brief_parser.py: Brief 解析 (卖点/禁忌词/时序要求/品牌调性提取)
- video_auditor.py: 视频审核 (文件验证/ASR/OCR/Logo检测/合规检查)

验收标准达成:
- 违禁词召回率 ≥ 95%
- 误报率 ≤ 5%
- 时长统计误差 ≤ 0.5秒
- 语境感知检测 ("最开心的一天" 不误判)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 17:41:37 +08:00

271 lines
8.2 KiB
Python

"""
数据验证器模块
提供所有输入数据的格式和约束验证
"""
import re
import uuid
from dataclasses import dataclass
from typing import Any
@dataclass
class ValidationResult:
"""验证结果"""
is_valid: bool
error_message: str = ""
errors: list[str] | None = None
class BriefValidator:
"""Brief 数据验证器"""
# 支持的平台列表
SUPPORTED_PLATFORMS = {"douyin", "xiaohongshu", "bilibili", "kuaishou"}
# 支持的区域列表
SUPPORTED_REGIONS = {"mainland_china", "hk_tw", "overseas"}
def validate_platform(self, platform: str | None) -> ValidationResult:
"""验证平台"""
if not platform:
return ValidationResult(is_valid=False, error_message="平台不能为空")
if platform not in self.SUPPORTED_PLATFORMS:
return ValidationResult(
is_valid=False,
error_message=f"不支持的平台: {platform}"
)
return ValidationResult(is_valid=True)
def validate_region(self, region: str | None) -> ValidationResult:
"""验证区域"""
if not region:
return ValidationResult(is_valid=False, error_message="区域不能为空")
if region not in self.SUPPORTED_REGIONS:
return ValidationResult(
is_valid=False,
error_message=f"不支持的区域: {region}"
)
return ValidationResult(is_valid=True)
def validate_selling_points(self, selling_points: list[Any]) -> ValidationResult:
"""验证卖点结构"""
if not isinstance(selling_points, list):
return ValidationResult(
is_valid=False,
error_message="卖点必须是列表"
)
for i, sp in enumerate(selling_points):
if not isinstance(sp, dict):
return ValidationResult(
is_valid=False,
error_message=f"卖点 {i} 格式错误,必须是字典"
)
if "text" not in sp or not sp.get("text"):
return ValidationResult(
is_valid=False,
error_message=f"卖点 {i} 缺少 text 字段或 text 为空"
)
if "priority" not in sp:
return ValidationResult(
is_valid=False,
error_message=f"卖点 {i} 缺少 priority 字段"
)
return ValidationResult(is_valid=True)
class VideoValidator:
"""视频数据验证器"""
# 最大时长限制(秒)
MAX_DURATION_SECONDS = 1800 # 30 分钟
# 最小分辨率
MIN_WIDTH = 720
MIN_HEIGHT = 720
def validate_duration(self, duration_seconds: int) -> ValidationResult:
"""验证视频时长"""
if duration_seconds <= 0:
return ValidationResult(
is_valid=False,
error_message="视频时长必须大于 0"
)
if duration_seconds > self.MAX_DURATION_SECONDS:
return ValidationResult(
is_valid=False,
error_message=f"视频时长超过限制 {self.MAX_DURATION_SECONDS}"
)
return ValidationResult(is_valid=True)
def validate_resolution(self, resolution: str) -> ValidationResult:
"""验证分辨率"""
try:
width, height = map(int, resolution.lower().split("x"))
except (ValueError, AttributeError):
return ValidationResult(
is_valid=False,
error_message="分辨率格式错误,应为 WIDTHxHEIGHT"
)
# 取较小值判断(支持横屏和竖屏)
min_dimension = min(width, height)
if min_dimension < self.MIN_WIDTH:
return ValidationResult(
is_valid=False,
error_message=f"分辨率过低,最小要求 {self.MIN_WIDTH}p"
)
return ValidationResult(is_valid=True)
class ReviewDecisionValidator:
"""审核决策验证器"""
VALID_DECISIONS = {"passed", "rejected", "force_passed"}
def validate_decision_type(self, decision: str | None) -> ValidationResult:
"""验证决策类型"""
if not decision:
return ValidationResult(
is_valid=False,
error_message="决策类型不能为空"
)
if decision not in self.VALID_DECISIONS:
return ValidationResult(
is_valid=False,
error_message=f"无效的决策类型: {decision}"
)
return ValidationResult(is_valid=True)
def validate(self, request: dict[str, Any]) -> ValidationResult:
"""验证完整的审核决策请求"""
decision = request.get("decision")
# 验证决策类型
decision_result = self.validate_decision_type(decision)
if not decision_result.is_valid:
return decision_result
# 强制通过必须填写原因
if decision == "force_passed":
reason = request.get("force_pass_reason", "")
if not reason or not reason.strip():
return ValidationResult(
is_valid=False,
error_message="强制通过必须填写原因"
)
# 驳回必须选择违规项
if decision == "rejected":
violations = request.get("selected_violations", [])
if not violations:
return ValidationResult(
is_valid=False,
error_message="驳回必须选择至少一个违规项"
)
return ValidationResult(is_valid=True)
class AppealValidator:
"""申诉验证器"""
MIN_REASON_LENGTH = 10 # 最少 10 个字
def validate_reason(self, reason: str) -> ValidationResult:
"""验证申诉理由长度"""
if not reason:
return ValidationResult(
is_valid=False,
error_message="申诉理由不能为空"
)
if len(reason) < self.MIN_REASON_LENGTH:
return ValidationResult(
is_valid=False,
error_message=f"申诉理由至少 {self.MIN_REASON_LENGTH} 个字"
)
return ValidationResult(is_valid=True)
def validate_token_available(self, user_id: str, token_count: int = 0) -> ValidationResult:
"""验证申诉令牌是否可用"""
# 这里简化实现,实际应查询数据库
if token_count <= 0:
return ValidationResult(
is_valid=False,
error_message="申诉次数已用完"
)
return ValidationResult(is_valid=True, error_message="", errors=None)
class TimestampValidator:
"""时间戳验证器"""
def validate_range(
self,
timestamp_ms: int,
video_duration_ms: int
) -> ValidationResult:
"""验证时间戳范围"""
if timestamp_ms < 0:
return ValidationResult(
is_valid=False,
error_message="时间戳不能为负数"
)
if timestamp_ms > video_duration_ms:
return ValidationResult(
is_valid=False,
error_message="时间戳超出视频时长"
)
return ValidationResult(is_valid=True)
def validate_order(self, start: int, end: int) -> ValidationResult:
"""验证时间戳顺序 - start < end"""
if start >= end:
return ValidationResult(
is_valid=False,
error_message="开始时间必须小于结束时间"
)
return ValidationResult(is_valid=True)
class UUIDValidator:
"""UUID 验证器"""
def validate(self, uuid_str: str) -> ValidationResult:
"""验证 UUID 格式"""
if not uuid_str:
return ValidationResult(
is_valid=False,
error_message="UUID 不能为空"
)
try:
uuid.UUID(uuid_str)
return ValidationResult(is_valid=True)
except ValueError:
return ValidationResult(
is_valid=False,
error_message="无效的 UUID 格式"
)