videos1.0/backend/tests/conftest.py
Your Name e77af7f8f0 feat: 实现 TDD 绿色阶段核心模块
实现以下模块并通过全部测试 (150 passed, 92.65% coverage):

- validators.py: 数据验证器 (Brief/视频/审核决策/申诉/时间戳/UUID)
- timestamp_align.py: 多模态时间戳对齐 (ASR/OCR/CV 融合)
- rule_engine.py: 规则引擎 (违禁词检测/语境感知/规则版本管理)
- brief_parser.py: Brief 解析 (卖点/禁忌词/时序要求/品牌调性提取)
- video_auditor.py: 视频审核 (文件验证/ASR/OCR/Logo检测/合规检查)

验收标准达成:
- 违禁词召回率 ≥ 95%
- 误报率 ≤ 5%
- 时长统计误差 ≤ 0.5秒
- 语境感知检测 ("最开心的一天" 不误判)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 17:41:37 +08:00

279 lines
9.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
SmartAudit 测试全局配置
本文件定义所有测试共享的 fixtures 和配置。
遵循 TDD 原则:先写测试,后写实现。
"""
import pytest
from typing import Any
from pathlib import Path
# ============================================================================
# 路径配置
# ============================================================================
@pytest.fixture
def fixtures_path() -> Path:
"""测试数据目录"""
return Path(__file__).parent / "fixtures"
@pytest.fixture
def sample_brief_pdf(fixtures_path: Path) -> Path:
"""示例 Brief PDF 文件路径"""
return fixtures_path / "briefs" / "sample_brief.pdf"
@pytest.fixture
def sample_video_path(fixtures_path: Path) -> Path:
"""示例视频文件路径"""
return fixtures_path / "videos" / "sample_video.mp4"
# ============================================================================
# Brief 规则 Fixtures
# ============================================================================
@pytest.fixture
def sample_brief_rules() -> dict[str, Any]:
"""标准 Brief 规则示例"""
return {
"selling_points": [
{"text": "24小时持妆", "priority": "high"},
{"text": "天然成分", "priority": "medium"},
{"text": "敏感肌适用", "priority": "medium"},
],
"forbidden_words": [
{"word": "", "reason": "广告法极限词", "severity": "hard"},
{"word": "第一", "reason": "广告法极限词", "severity": "hard"},
{"word": "药用", "reason": "化妆品禁用", "severity": "hard"},
{"word": "治疗", "reason": "化妆品禁用", "severity": "hard"},
{"word": "绝对", "reason": "广告法极限词", "severity": "hard"},
{"word": "领导者", "reason": "广告法极限词", "severity": "hard"},
{"word": "史上", "reason": "广告法极限词", "severity": "hard"},
],
"brand_tone": {
"style": "年轻活力",
"description": "面向 18-35 岁女性用户"
},
"timing_requirements": [
{"type": "product_visible", "min_duration_seconds": 5},
{"type": "brand_mention", "min_frequency": 3},
],
"platform": "douyin",
"region": "mainland_china",
}
@pytest.fixture
def sample_platform_rules() -> dict[str, Any]:
"""抖音平台规则示例"""
return {
"platform": "douyin",
"version": "2026.01",
"forbidden_words": [
{"word": "", "category": "ad_law"},
{"word": "第一", "category": "ad_law"},
{"word": "国家级", "category": "ad_law"},
{"word": "绝对", "category": "ad_law"},
],
"content_rules": [
{"rule": "不得含有虚假宣传", "category": "compliance"},
{"rule": "不得使用竞品 Logo", "category": "brand_safety"},
],
}
# ============================================================================
# 视频审核 Fixtures
# ============================================================================
@pytest.fixture
def sample_asr_result() -> dict[str, Any]:
"""ASR 语音识别结果示例"""
return {
"text": "大家好这款产品真的非常好用24小时持妆效果特别棒",
"segments": [
{"word": "大家好", "start_ms": 0, "end_ms": 800, "confidence": 0.98},
{"word": "这款产品", "start_ms": 850, "end_ms": 1500, "confidence": 0.97},
{"word": "真的非常好用", "start_ms": 1550, "end_ms": 2800, "confidence": 0.96},
{"word": "24小时持妆", "start_ms": 2900, "end_ms": 4000, "confidence": 0.99},
{"word": "效果特别棒", "start_ms": 4100, "end_ms": 5200, "confidence": 0.95},
],
}
@pytest.fixture
def sample_ocr_result() -> dict[str, Any]:
"""OCR 字幕识别结果示例"""
return {
"frames": [
{"timestamp_ms": 1000, "text": "产品名称", "confidence": 0.98, "bbox": [100, 450, 300, 480]},
{"timestamp_ms": 3000, "text": "24小时持妆", "confidence": 0.97, "bbox": [150, 450, 350, 480]},
{"timestamp_ms": 5000, "text": "立即购买", "confidence": 0.96, "bbox": [200, 500, 400, 530]},
],
}
@pytest.fixture
def sample_cv_result() -> dict[str, Any]:
"""CV 视觉检测结果示例"""
return {
"detections": [
{
"object_type": "product",
"start_frame": 30,
"end_frame": 180,
"fps": 30,
"start_ms": 1000, # 30/30 * 1000 = 1000ms
"end_ms": 6000, # 180/30 * 1000 = 6000ms (5秒时长)
"confidence": 0.95,
"bbox": [200, 100, 400, 350],
},
{
"object_type": "competitor_logo",
"start_frame": 200,
"end_frame": 230,
"fps": 30,
"start_ms": 6667, # 200/30 * 1000
"end_ms": 7667, # 230/30 * 1000
"confidence": 0.88,
"bbox": [50, 50, 100, 100],
"logo_id": "competitor_001",
},
],
}
# ============================================================================
# 违禁词测试数据
# ============================================================================
@pytest.fixture
def prohibited_word_test_cases() -> list[dict[str, Any]]:
"""违禁词检测测试用例集"""
return [
# 广告语境下应检出
{"text": "这是全网销量第一的产品", "context": "advertisement", "expected": ["第一"], "should_detect": True},
{"text": "我们是行业领导者", "context": "advertisement", "expected": ["领导者"], "should_detect": True},
{"text": "史上最低价促销", "context": "advertisement", "expected": ["", "史上"], "should_detect": True},
{"text": "绝对有效,药用级别", "context": "advertisement", "expected": ["绝对", "药用"], "should_detect": True},
# 日常语境下不应检出(语境感知)
{"text": "今天是我最开心的一天", "context": "daily", "expected": [], "should_detect": False},
{"text": "这是我第一次来这里", "context": "daily", "expected": [], "should_detect": False},
{"text": "我们家排行第一", "context": "daily", "expected": [], "should_detect": False},
# 边界情况
{"text": "", "context": "advertisement", "expected": [], "should_detect": False},
{"text": "这是一个普通的产品介绍", "context": "advertisement", "expected": [], "should_detect": False},
# 组合违禁词
{"text": "全网销量第一,史上最低价", "context": "advertisement", "expected": ["第一", "", "史上"], "should_detect": True},
]
@pytest.fixture
def context_understanding_test_cases() -> list[dict[str, Any]]:
"""语境理解测试用例集"""
return [
{"text": "这款产品是最好的选择", "expected_context": "advertisement", "should_flag": True},
{"text": "最近天气真好", "expected_context": "daily", "should_flag": False},
{"text": "今天心情最棒了", "expected_context": "daily", "should_flag": False},
{"text": "我们的产品效果最显著", "expected_context": "advertisement", "should_flag": True},
{"text": "这是我见过最美的风景", "expected_context": "daily", "should_flag": False},
]
# ============================================================================
# 时间戳对齐测试数据
# ============================================================================
@pytest.fixture
def multimodal_alignment_test_cases() -> list[dict[str, Any]]:
"""多模态时间戳对齐测试用例"""
return [
# 完全对齐情况
{
"asr_ts": 1000,
"ocr_ts": 1000,
"cv_ts": 1000,
"tolerance_ms": 500,
"expected_merged": True,
"expected_timestamp": 1000,
},
# 容差范围内对齐
{
"asr_ts": 1000,
"ocr_ts": 1200,
"cv_ts": 1100,
"tolerance_ms": 500,
"expected_merged": True,
"expected_timestamp": 1100, # 取中位数
},
# 超出容差
{
"asr_ts": 1000,
"ocr_ts": 2000,
"cv_ts": 3000,
"tolerance_ms": 500,
"expected_merged": False,
"expected_timestamp": None,
},
# 部分对齐
{
"asr_ts": 1000,
"ocr_ts": 1300,
"cv_ts": 5000,
"tolerance_ms": 500,
"expected_merged": "partial", # ASR 和 OCR 对齐CV 独立
"expected_timestamp": 1150,
},
]
# ============================================================================
# API 测试数据
# ============================================================================
@pytest.fixture
def valid_brief_upload_request() -> dict[str, Any]:
"""有效的 Brief 上传请求"""
return {
"task_id": "task_001",
"platform": "douyin",
"region": "mainland_china",
}
@pytest.fixture
def valid_video_submit_request() -> dict[str, Any]:
"""有效的视频提交请求"""
return {
"task_id": "task_001",
"video_id": "video_001",
"brief_id": "brief_001",
}
@pytest.fixture
def valid_review_decision_request() -> dict[str, Any]:
"""有效的审核决策请求"""
return {
"report_id": "report_001",
"decision": "passed",
"selected_violations": [],
}
@pytest.fixture
def force_pass_decision_request() -> dict[str, Any]:
"""强制通过请求(需填写原因)"""
return {
"report_id": "report_001",
"decision": "force_passed",
"selected_violations": ["violation_001"],
"force_pass_reason": "达人玩的新梗,品牌方认可",
}