videos1.0/backend/tests/conftest.py

"""
SmartAudit 测试全局配置

本文件定义所有测试共享的 fixtures 和配置。
遵循 TDD 原则：先写测试，后写实现。
"""

import pytest
from typing import Any
from pathlib import Path

# ============================================================================
# 路径配置
# ============================================================================

@pytest.fixture
def fixtures_path() -> Path:
    """测试数据目录"""
    return Path(__file__).parent / "fixtures"


@pytest.fixture
def sample_brief_pdf(fixtures_path: Path) -> Path:
    """示例 Brief PDF 文件路径"""
    return fixtures_path / "briefs" / "sample_brief.pdf"


@pytest.fixture
def sample_video_path(fixtures_path: Path) -> Path:
    """示例视频文件路径"""
    return fixtures_path / "videos" / "sample_video.mp4"


# ============================================================================
# Brief 规则 Fixtures
# ============================================================================

@pytest.fixture
def sample_brief_rules() -> dict[str, Any]:
    """标准 Brief 规则示例"""
    return {
        "selling_points": [
            {"text": "24小时持妆", "priority": "high"},
            {"text": "天然成分", "priority": "medium"},
            {"text": "敏感肌适用", "priority": "medium"},
        ],
        "forbidden_words": [
            {"word": "最", "reason": "广告法极限词", "severity": "hard"},
            {"word": "第一", "reason": "广告法极限词", "severity": "hard"},
            {"word": "药用", "reason": "化妆品禁用", "severity": "hard"},
            {"word": "治疗", "reason": "化妆品禁用", "severity": "hard"},
            {"word": "绝对", "reason": "广告法极限词", "severity": "hard"},
            {"word": "领导者", "reason": "广告法极限词", "severity": "hard"},
            {"word": "史上", "reason": "广告法极限词", "severity": "hard"},
        ],
        "brand_tone": {
            "style": "年轻活力",
            "description": "面向 18-35 岁女性用户"
        },
        "timing_requirements": [
            {"type": "product_visible", "min_duration_seconds": 5},
            {"type": "brand_mention", "min_frequency": 3},
        ],
        "platform": "douyin",
        "region": "mainland_china",
    }


@pytest.fixture
def sample_platform_rules() -> dict[str, Any]:
    """抖音平台规则示例"""
    return {
        "platform": "douyin",
        "version": "2026.01",
        "forbidden_words": [
            {"word": "最", "category": "ad_law"},
            {"word": "第一", "category": "ad_law"},
            {"word": "国家级", "category": "ad_law"},
            {"word": "绝对", "category": "ad_law"},
        ],
        "content_rules": [
            {"rule": "不得含有虚假宣传", "category": "compliance"},
            {"rule": "不得使用竞品 Logo", "category": "brand_safety"},
        ],
    }


# ============================================================================
# 视频审核 Fixtures
# ============================================================================

@pytest.fixture
def sample_asr_result() -> dict[str, Any]:
    """ASR 语音识别结果示例"""
    return {
        "text": "大家好，这款产品真的非常好用，24小时持妆效果特别棒",
        "segments": [
            {"word": "大家好", "start_ms": 0, "end_ms": 800, "confidence": 0.98},
            {"word": "这款产品", "start_ms": 850, "end_ms": 1500, "confidence": 0.97},
            {"word": "真的非常好用", "start_ms": 1550, "end_ms": 2800, "confidence": 0.96},
            {"word": "24小时持妆", "start_ms": 2900, "end_ms": 4000, "confidence": 0.99},
            {"word": "效果特别棒", "start_ms": 4100, "end_ms": 5200, "confidence": 0.95},
        ],
    }


@pytest.fixture
def sample_ocr_result() -> dict[str, Any]:
    """OCR 字幕识别结果示例"""
    return {
        "frames": [
            {"timestamp_ms": 1000, "text": "产品名称", "confidence": 0.98, "bbox": [100, 450, 300, 480]},
            {"timestamp_ms": 3000, "text": "24小时持妆", "confidence": 0.97, "bbox": [150, 450, 350, 480]},
            {"timestamp_ms": 5000, "text": "立即购买", "confidence": 0.96, "bbox": [200, 500, 400, 530]},
        ],
    }


@pytest.fixture
def sample_cv_result() -> dict[str, Any]:
    """CV 视觉检测结果示例"""
    return {
        "detections": [
            {
                "object_type": "product",
                "start_frame": 30,
                "end_frame": 180,
                "fps": 30,
                "start_ms": 1000,   # 30/30 * 1000 = 1000ms
                "end_ms": 6000,    # 180/30 * 1000 = 6000ms (5秒时长)
                "confidence": 0.95,
                "bbox": [200, 100, 400, 350],
            },
            {
                "object_type": "competitor_logo",
                "start_frame": 200,
                "end_frame": 230,
                "fps": 30,
                "start_ms": 6667,   # 200/30 * 1000
                "end_ms": 7667,     # 230/30 * 1000
                "confidence": 0.88,
                "bbox": [50, 50, 100, 100],
                "logo_id": "competitor_001",
            },
        ],
    }


# ============================================================================
# 违禁词测试数据
# ============================================================================

@pytest.fixture
def prohibited_word_test_cases() -> list[dict[str, Any]]:
    """违禁词检测测试用例集"""
    return [
        # 广告语境下应检出
        {"text": "这是全网销量第一的产品", "context": "advertisement", "expected": ["第一"], "should_detect": True},
        {"text": "我们是行业领导者", "context": "advertisement", "expected": ["领导者"], "should_detect": True},
        {"text": "史上最低价促销", "context": "advertisement", "expected": ["最", "史上"], "should_detect": True},
        {"text": "绝对有效，药用级别", "context": "advertisement", "expected": ["绝对", "药用"], "should_detect": True},

        # 日常语境下不应检出（语境感知）
        {"text": "今天是我最开心的一天", "context": "daily", "expected": [], "should_detect": False},
        {"text": "这是我第一次来这里", "context": "daily", "expected": [], "should_detect": False},
        {"text": "我们家排行第一", "context": "daily", "expected": [], "should_detect": False},

        # 边界情况
        {"text": "", "context": "advertisement", "expected": [], "should_detect": False},
        {"text": "这是一个普通的产品介绍", "context": "advertisement", "expected": [], "should_detect": False},

        # 组合违禁词
        {"text": "全网销量第一，史上最低价", "context": "advertisement", "expected": ["第一", "最", "史上"], "should_detect": True},
    ]


@pytest.fixture
def context_understanding_test_cases() -> list[dict[str, Any]]:
    """语境理解测试用例集"""
    return [
        {"text": "这款产品是最好的选择", "expected_context": "advertisement", "should_flag": True},
        {"text": "最近天气真好", "expected_context": "daily", "should_flag": False},
        {"text": "今天心情最棒了", "expected_context": "daily", "should_flag": False},
        {"text": "我们的产品效果最显著", "expected_context": "advertisement", "should_flag": True},
        {"text": "这是我见过最美的风景", "expected_context": "daily", "should_flag": False},
    ]


# ============================================================================
# 时间戳对齐测试数据
# ============================================================================

@pytest.fixture
def multimodal_alignment_test_cases() -> list[dict[str, Any]]:
    """多模态时间戳对齐测试用例"""
    return [
        # 完全对齐情况
        {
            "asr_ts": 1000,
            "ocr_ts": 1000,
            "cv_ts": 1000,
            "tolerance_ms": 500,
            "expected_merged": True,
            "expected_timestamp": 1000,
        },
        # 容差范围内对齐
        {
            "asr_ts": 1000,
            "ocr_ts": 1200,
            "cv_ts": 1100,
            "tolerance_ms": 500,
            "expected_merged": True,
            "expected_timestamp": 1100,  # 取中位数
        },
        # 超出容差
        {
            "asr_ts": 1000,
            "ocr_ts": 2000,
            "cv_ts": 3000,
            "tolerance_ms": 500,
            "expected_merged": False,
            "expected_timestamp": None,
        },
        # 部分对齐
        {
            "asr_ts": 1000,
            "ocr_ts": 1300,
            "cv_ts": 5000,
            "tolerance_ms": 500,
            "expected_merged": "partial",  # ASR 和 OCR 对齐，CV 独立
            "expected_timestamp": 1150,
        },
    ]


# ============================================================================
# API 测试数据
# ============================================================================

@pytest.fixture
def valid_brief_upload_request() -> dict[str, Any]:
    """有效的 Brief 上传请求"""
    return {
        "task_id": "task_001",
        "platform": "douyin",
        "region": "mainland_china",
    }


@pytest.fixture
def valid_video_submit_request() -> dict[str, Any]:
    """有效的视频提交请求"""
    return {
        "task_id": "task_001",
        "video_id": "video_001",
        "brief_id": "brief_001",
    }


@pytest.fixture
def valid_review_decision_request() -> dict[str, Any]:
    """有效的审核决策请求"""
    return {
        "report_id": "report_001",
        "decision": "passed",
        "selected_violations": [],
    }


@pytest.fixture
def force_pass_decision_request() -> dict[str, Any]:
    """强制通过请求（需填写原因）"""
    return {
        "report_id": "report_001",
        "decision": "force_passed",
        "selected_violations": ["violation_001"],
        "force_pass_reason": "达人玩的新梗，品牌方认可",
    }