videos1.0/backend/tests/unit/test_video_auditor.py

"""
视频审核模块单元测试

TDD 测试用例 - 基于 FeatureSummary.md (F-10~F-18) 的验收标准

验收标准：
- 100MB 视频审核 ≤ 5 分钟
- 竞品 Logo F1 ≥ 0.85
- ASR 字错率 ≤ 10%
- OCR 准确率 ≥ 95%
"""

import pytest
from typing import Any

from app.services.video_auditor import (
    VideoFileValidator,
    ASRService,
    OCRService,
    LogoDetector,
    BriefComplianceChecker,
    VideoAuditor,
    ProcessingStatus,
)


class TestVideoUpload:
    """
    视频上传测试

    验收标准 (FeatureSummary.md F-10):
    - 支持 ≤ 100MB 视频
    - 支持 MP4/MOV 格式
    - 支持断点续传
    """

    @pytest.mark.unit
    @pytest.mark.parametrize("file_size_mb,expected_valid", [
        (50, True),
        (100, True),
        (101, False),
        (200, False),
    ])
    def test_file_size_validation(self, file_size_mb: int, expected_valid: bool) -> None:
        """测试文件大小验证 - 最大 100MB"""
        file_size_bytes = file_size_mb * 1024 * 1024

        validator = VideoFileValidator()
        result = validator.validate_size(file_size_bytes)

        assert result.is_valid == expected_valid
        if not expected_valid:
            assert "100MB" in result.error_message

    @pytest.mark.unit
    @pytest.mark.parametrize("file_format,mime_type,expected_valid", [
        ("mp4", "video/mp4", True),
        ("mov", "video/quicktime", True),
        ("avi", "video/x-msvideo", False),
        ("mkv", "video/x-matroska", False),
        ("pdf", "application/pdf", False),
    ])
    def test_file_format_validation(
        self,
        file_format: str,
        mime_type: str,
        expected_valid: bool,
    ) -> None:
        """测试文件格式验证 - 仅支持 MP4/MOV"""
        validator = VideoFileValidator()
        result = validator.validate_format(file_format, mime_type)

        assert result.is_valid == expected_valid


class TestASRAccuracy:
    """
    ASR 语音识别测试

    验收标准 (DevelopmentPlan.md):
    - 字错率 (WER) ≤ 10%
    """

    @pytest.mark.unit
    def test_asr_output_format(self) -> None:
        """测试 ASR 输出格式"""
        asr = ASRService()
        result = asr.transcribe("test_audio.wav")

        assert "text" in result
        assert "segments" in result
        for segment in result["segments"]:
            assert "word" in segment
            assert "start_ms" in segment
            assert "end_ms" in segment
            assert "confidence" in segment
            assert segment["end_ms"] >= segment["start_ms"]

    @pytest.mark.unit
    def test_asr_word_error_rate_calculation(self) -> None:
        """测试 WER 计算"""
        asr = ASRService()

        # 完全匹配
        wer = asr.calculate_wer("测试文本", "测试文本")
        assert wer == 0.0

        # 完全不同
        wer = asr.calculate_wer("完全不同", "测试文本")
        assert wer == 1.0

        # 部分匹配
        wer = asr.calculate_wer("测试文字", "测试文本")
        assert 0 < wer < 1

    @pytest.mark.unit
    def test_asr_timestamp_accuracy(self) -> None:
        """测试 ASR 时间戳准确性"""
        asr = ASRService()
        result = asr.transcribe("test_audio.wav")

        # 时间戳应递增
        prev_end = 0
        for segment in result["segments"]:
            assert segment["start_ms"] >= prev_end
            prev_end = segment["end_ms"]


class TestOCRAccuracy:
    """
    OCR 字幕识别测试

    验收标准 (DevelopmentPlan.md):
    - 准确率 ≥ 95%（含复杂背景）
    """

    @pytest.mark.unit
    def test_ocr_output_format(self) -> None:
        """测试 OCR 输出格式"""
        ocr = OCRService()
        result = ocr.extract_text("video_frame.jpg")

        assert "frames" in result
        for frame in result["frames"]:
            assert "timestamp_ms" in frame
            assert "text" in frame
            assert "confidence" in frame
            assert "bbox" in frame

    @pytest.mark.unit
    def test_ocr_confidence_range(self) -> None:
        """测试 OCR 置信度范围"""
        ocr = OCRService()
        result = ocr.extract_text("video_frame.jpg")

        for frame in result["frames"]:
            assert 0 <= frame["confidence"] <= 1


class TestLogoDetection:
    """
    竞品 Logo 检测测试

    验收标准 (FeatureSummary.md F-12):
    - F1 ≥ 0.85（含遮挡 30% 场景）
    """

    @pytest.mark.unit
    def test_logo_detection_output_format(self) -> None:
        """测试 Logo 检测输出格式"""
        detector = LogoDetector()
        result = detector.detect("video_frame.jpg")

        assert "detections" in result
        # 如果有检测结果，验证格式
        for detection in result["detections"]:
            assert "logo_id" in detection
            assert "confidence" in detection
            assert "bbox" in detection
            assert 0 <= detection["confidence"] <= 1

    @pytest.mark.unit
    def test_add_new_logo(self) -> None:
        """测试添加新 Logo"""
        detector = LogoDetector()

        # 初始为空
        assert len(detector.known_logos) == 0

        # 添加 Logo
        detector.add_logo("new_competitor_logo.png", brand="New Competitor")

        # 验证添加成功
        assert len(detector.known_logos) == 1
        logo_id = list(detector.known_logos.keys())[0]
        assert detector.known_logos[logo_id]["brand"] == "New Competitor"


class TestAuditPipeline:
    """
    审核流水线集成测试
    """

    @pytest.mark.unit
    def test_audit_report_structure(self) -> None:
        """测试审核报告结构"""
        auditor = VideoAuditor()
        report = auditor.audit("test_video.mp4")

        # 验证报告必需字段
        required_fields = [
            "report_id", "video_id", "processing_status",
            "asr_results", "ocr_results", "cv_results",
            "violations", "brief_compliance"
        ]
        for field in required_fields:
            assert field in report

    @pytest.mark.unit
    def test_audit_processing_status(self) -> None:
        """测试审核处理状态"""
        auditor = VideoAuditor()
        report = auditor.audit("test_video.mp4")

        assert report["processing_status"] == ProcessingStatus.COMPLETED.value


class TestBriefCompliance:
    """
    Brief 合规检查测试

    验收标准 (FeatureSummary.md F-45):
    - 时长统计误差 ≤ 0.5秒
    - 频次统计准确率 ≥ 95%
    """

    @pytest.mark.unit
    def test_selling_point_coverage(
        self,
        sample_brief_rules: dict[str, Any],
    ) -> None:
        """测试卖点覆盖检测"""
        video_content = {
            "asr_text": "24小时持妆效果非常好，使用天然成分",
            "ocr_text": "24小时持妆",
        }

        checker = BriefComplianceChecker()
        result = checker.check_selling_points(
            video_content,
            sample_brief_rules["selling_points"]
        )

        # 应检测到 2/3 卖点覆盖
        assert result["coverage_rate"] >= 0.66
        assert "24小时持妆" in result["detected"]
        assert "天然成分" in result["detected"]

    @pytest.mark.unit
    def test_duration_requirement_check(
        self,
        sample_brief_rules: dict[str, Any],
    ) -> None:
        """测试时长要求检查"""
        cv_detections = [
            {"object_type": "product", "start_ms": 0, "end_ms": 6000},  # 6秒
        ]

        # 要求: 产品同框 > 5秒
        checker = BriefComplianceChecker()
        result = checker.check_duration(
            cv_detections,
            sample_brief_rules["timing_requirements"]
        )

        assert result["product_visible"]["status"] == "passed"
        assert result["product_visible"]["detected_seconds"] == 6.0

    @pytest.mark.unit
    def test_frequency_requirement_check(
        self,
        sample_brief_rules: dict[str, Any],
    ) -> None:
        """测试频次要求检查"""
        asr_segments = [
            {"text": "品牌名产品"},
            {"text": "这个品牌名很好"},
            {"text": "推荐品牌名"},
        ]

        # 要求: 品牌名提及 ≥ 3次
        checker = BriefComplianceChecker()
        result = checker.check_frequency(
            asr_segments,
            sample_brief_rules["timing_requirements"],
            brand_keyword="品牌名"
        )

        assert result["brand_mention"]["status"] == "passed"
        assert result["brand_mention"]["detected_count"] == 3