""" 视频审核模块单元测试 TDD 测试用例 - 基于 FeatureSummary.md (F-10~F-18) 的验收标准 验收标准: - 100MB 视频审核 ≤ 5 分钟 - 竞品 Logo F1 ≥ 0.85 - ASR 字错率 ≤ 10% - OCR 准确率 ≥ 95% """ import pytest from typing import Any from app.services.video_auditor import ( VideoFileValidator, ASRService, OCRService, LogoDetector, BriefComplianceChecker, VideoAuditor, ProcessingStatus, ) class TestVideoUpload: """ 视频上传测试 验收标准 (FeatureSummary.md F-10): - 支持 ≤ 100MB 视频 - 支持 MP4/MOV 格式 - 支持断点续传 """ @pytest.mark.unit @pytest.mark.parametrize("file_size_mb,expected_valid", [ (50, True), (100, True), (101, False), (200, False), ]) def test_file_size_validation(self, file_size_mb: int, expected_valid: bool) -> None: """测试文件大小验证 - 最大 100MB""" file_size_bytes = file_size_mb * 1024 * 1024 validator = VideoFileValidator() result = validator.validate_size(file_size_bytes) assert result.is_valid == expected_valid if not expected_valid: assert "100MB" in result.error_message @pytest.mark.unit @pytest.mark.parametrize("file_format,mime_type,expected_valid", [ ("mp4", "video/mp4", True), ("mov", "video/quicktime", True), ("avi", "video/x-msvideo", False), ("mkv", "video/x-matroska", False), ("pdf", "application/pdf", False), ]) def test_file_format_validation( self, file_format: str, mime_type: str, expected_valid: bool, ) -> None: """测试文件格式验证 - 仅支持 MP4/MOV""" validator = VideoFileValidator() result = validator.validate_format(file_format, mime_type) assert result.is_valid == expected_valid class TestASRAccuracy: """ ASR 语音识别测试 验收标准 (DevelopmentPlan.md): - 字错率 (WER) ≤ 10% """ @pytest.mark.unit def test_asr_output_format(self) -> None: """测试 ASR 输出格式""" asr = ASRService() result = asr.transcribe("test_audio.wav") assert "text" in result assert "segments" in result for segment in result["segments"]: assert "word" in segment assert "start_ms" in segment assert "end_ms" in segment assert "confidence" in segment assert segment["end_ms"] >= segment["start_ms"] @pytest.mark.unit def test_asr_word_error_rate_calculation(self) -> None: """测试 WER 计算""" asr = ASRService() # 完全匹配 wer = asr.calculate_wer("测试文本", "测试文本") assert wer == 0.0 # 完全不同 wer = asr.calculate_wer("完全不同", "测试文本") assert wer == 1.0 # 部分匹配 wer = asr.calculate_wer("测试文字", "测试文本") assert 0 < wer < 1 @pytest.mark.unit def test_asr_timestamp_accuracy(self) -> None: """测试 ASR 时间戳准确性""" asr = ASRService() result = asr.transcribe("test_audio.wav") # 时间戳应递增 prev_end = 0 for segment in result["segments"]: assert segment["start_ms"] >= prev_end prev_end = segment["end_ms"] class TestOCRAccuracy: """ OCR 字幕识别测试 验收标准 (DevelopmentPlan.md): - 准确率 ≥ 95%(含复杂背景) """ @pytest.mark.unit def test_ocr_output_format(self) -> None: """测试 OCR 输出格式""" ocr = OCRService() result = ocr.extract_text("video_frame.jpg") assert "frames" in result for frame in result["frames"]: assert "timestamp_ms" in frame assert "text" in frame assert "confidence" in frame assert "bbox" in frame @pytest.mark.unit def test_ocr_confidence_range(self) -> None: """测试 OCR 置信度范围""" ocr = OCRService() result = ocr.extract_text("video_frame.jpg") for frame in result["frames"]: assert 0 <= frame["confidence"] <= 1 class TestLogoDetection: """ 竞品 Logo 检测测试 验收标准 (FeatureSummary.md F-12): - F1 ≥ 0.85(含遮挡 30% 场景) """ @pytest.mark.unit def test_logo_detection_output_format(self) -> None: """测试 Logo 检测输出格式""" detector = LogoDetector() result = detector.detect("video_frame.jpg") assert "detections" in result # 如果有检测结果,验证格式 for detection in result["detections"]: assert "logo_id" in detection assert "confidence" in detection assert "bbox" in detection assert 0 <= detection["confidence"] <= 1 @pytest.mark.unit def test_add_new_logo(self) -> None: """测试添加新 Logo""" detector = LogoDetector() # 初始为空 assert len(detector.known_logos) == 0 # 添加 Logo detector.add_logo("new_competitor_logo.png", brand="New Competitor") # 验证添加成功 assert len(detector.known_logos) == 1 logo_id = list(detector.known_logos.keys())[0] assert detector.known_logos[logo_id]["brand"] == "New Competitor" class TestAuditPipeline: """ 审核流水线集成测试 """ @pytest.mark.unit def test_audit_report_structure(self) -> None: """测试审核报告结构""" auditor = VideoAuditor() report = auditor.audit("test_video.mp4") # 验证报告必需字段 required_fields = [ "report_id", "video_id", "processing_status", "asr_results", "ocr_results", "cv_results", "violations", "brief_compliance" ] for field in required_fields: assert field in report @pytest.mark.unit def test_audit_processing_status(self) -> None: """测试审核处理状态""" auditor = VideoAuditor() report = auditor.audit("test_video.mp4") assert report["processing_status"] == ProcessingStatus.COMPLETED.value class TestBriefCompliance: """ Brief 合规检查测试 验收标准 (FeatureSummary.md F-45): - 时长统计误差 ≤ 0.5秒 - 频次统计准确率 ≥ 95% """ @pytest.mark.unit def test_selling_point_coverage( self, sample_brief_rules: dict[str, Any], ) -> None: """测试卖点覆盖检测""" video_content = { "asr_text": "24小时持妆效果非常好,使用天然成分", "ocr_text": "24小时持妆", } checker = BriefComplianceChecker() result = checker.check_selling_points( video_content, sample_brief_rules["selling_points"] ) # 应检测到 2/3 卖点覆盖 assert result["coverage_rate"] >= 0.66 assert "24小时持妆" in result["detected"] assert "天然成分" in result["detected"] @pytest.mark.unit def test_duration_requirement_check( self, sample_brief_rules: dict[str, Any], ) -> None: """测试时长要求检查""" cv_detections = [ {"object_type": "product", "start_ms": 0, "end_ms": 6000}, # 6秒 ] # 要求: 产品同框 > 5秒 checker = BriefComplianceChecker() result = checker.check_duration( cv_detections, sample_brief_rules["timing_requirements"] ) assert result["product_visible"]["status"] == "passed" assert result["product_visible"]["detected_seconds"] == 6.0 @pytest.mark.unit def test_frequency_requirement_check( self, sample_brief_rules: dict[str, Any], ) -> None: """测试频次要求检查""" asr_segments = [ {"text": "品牌名产品"}, {"text": "这个品牌名很好"}, {"text": "推荐品牌名"}, ] # 要求: 品牌名提及 ≥ 3次 checker = BriefComplianceChecker() result = checker.check_frequency( asr_segments, sample_brief_rules["timing_requirements"], brand_keyword="品牌名" ) assert result["brand_mention"]["status"] == "passed" assert result["brand_mention"]["detected_count"] == 3