videos1.0/backend/tests/unit/test_video_auditor.py
Your Name 040aada160 feat: 添加全面的 TDD 测试套件框架
基于项目需求文档(PRD.md, FeatureSummary.md, DevelopmentPlan.md,
UIDesign.md, User_Role_Interfaces.md)编写的 TDD 测试用例。

后端测试 (Python/pytest):
- 单元测试: rule_engine, brief_parser, timestamp_alignment,
  video_auditor, validators
- 集成测试: API Brief, Video, Review 端点
- AI 模块测试: ASR, OCR, Logo 检测服务
- 全局 fixtures 和 pytest 配置

前端测试 (TypeScript/Vitest):
- 工具函数测试: utils.test.ts
- 组件测试: Button, VideoPlayer, ViolationList
- Hooks 测试: useVideoAudit, useVideoPlayer, useAppeal
- MSW mock handlers 配置

E2E 测试 (Playwright):
- 认证流程测试
- 视频上传流程测试
- 视频审核流程测试
- 申诉流程测试

所有测试当前使用 pytest.skip() / it.skip() 作为占位符,
遵循 TDD 红灯阶段 - 等待实现代码后运行。

验收标准覆盖:
- ASR WER ≤ 10%
- OCR 准确率 ≥ 95%
- Logo F1 ≥ 0.85
- 时间戳误差 ≤ 0.5s
- 频次统计准确率 ≥ 95%

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 17:22:24 +08:00

412 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
视频审核模块单元测试
TDD 测试用例 - 基于 FeatureSummary.md (F-10~F-18) 的验收标准
验收标准:
- 100MB 视频审核 ≤ 5 分钟
- 竞品 Logo F1 ≥ 0.85
- ASR 字错率 ≤ 10%
- OCR 准确率 ≥ 95%
"""
import pytest
from typing import Any
# 导入待实现的模块TDD 红灯阶段)
# from app.services.video_auditor import VideoAuditor, AuditReport
class TestVideoUpload:
"""
视频上传测试
验收标准 (FeatureSummary.md F-10):
- 支持 ≤ 100MB 视频
- 支持 MP4/MOV 格式
- 支持断点续传
"""
@pytest.mark.unit
@pytest.mark.parametrize("file_size_mb,expected_valid", [
(50, True),
(100, True),
(101, False),
(200, False),
])
def test_file_size_validation(self, file_size_mb: int, expected_valid: bool) -> None:
"""测试文件大小验证 - 最大 100MB"""
file_size_bytes = file_size_mb * 1024 * 1024
# TODO: 实现文件大小验证
# validator = VideoFileValidator()
# result = validator.validate_size(file_size_bytes)
#
# assert result.is_valid == expected_valid
# if not expected_valid:
# assert "100MB" in result.error_message
pytest.skip("待实现:文件大小验证")
@pytest.mark.unit
@pytest.mark.parametrize("file_format,mime_type,expected_valid", [
("mp4", "video/mp4", True),
("mov", "video/quicktime", True),
("avi", "video/x-msvideo", False),
("mkv", "video/x-matroska", False),
("pdf", "application/pdf", False),
])
def test_file_format_validation(
self,
file_format: str,
mime_type: str,
expected_valid: bool,
) -> None:
"""测试文件格式验证 - 仅支持 MP4/MOV"""
# TODO: 实现格式验证
# validator = VideoFileValidator()
# result = validator.validate_format(file_format, mime_type)
#
# assert result.is_valid == expected_valid
pytest.skip("待实现:文件格式验证")
class TestASRAccuracy:
"""
ASR 语音识别测试
验收标准 (DevelopmentPlan.md):
- 字错率 (WER) ≤ 10%
"""
@pytest.mark.unit
def test_asr_output_format(self) -> None:
"""测试 ASR 输出格式"""
# TODO: 实现 ASR 服务
# asr = ASRService()
# result = asr.transcribe("test_audio.wav")
#
# assert "text" in result
# assert "segments" in result
# for segment in result["segments"]:
# assert "word" in segment
# assert "start_ms" in segment
# assert "end_ms" in segment
# assert "confidence" in segment
# assert segment["end_ms"] >= segment["start_ms"]
pytest.skip("待实现ASR 输出格式")
@pytest.mark.unit
def test_asr_word_error_rate(self) -> None:
"""
测试 ASR 字错率
验收标准WER ≤ 10%
"""
# TODO: 使用标注测试集验证
# asr = ASRService()
# test_set = load_asr_test_set() # 标注数据集
#
# total_errors = 0
# total_words = 0
#
# for sample in test_set:
# result = asr.transcribe(sample["audio_path"])
# wer = calculate_wer(result["text"], sample["ground_truth"])
# total_errors += wer * len(sample["ground_truth"].split())
# total_words += len(sample["ground_truth"].split())
#
# overall_wer = total_errors / total_words
# assert overall_wer <= 0.10, f"WER {overall_wer:.2%} 超过阈值 10%"
pytest.skip("待实现ASR 字错率测试")
@pytest.mark.unit
def test_asr_timestamp_accuracy(self) -> None:
"""测试 ASR 时间戳准确性"""
# TODO: 实现时间戳验证
# asr = ASRService()
# result = asr.transcribe("test_audio.wav")
#
# # 时间戳应递增
# prev_end = 0
# for segment in result["segments"]:
# assert segment["start_ms"] >= prev_end
# prev_end = segment["end_ms"]
pytest.skip("待实现ASR 时间戳准确性")
class TestOCRAccuracy:
"""
OCR 字幕识别测试
验收标准 (DevelopmentPlan.md):
- 准确率 ≥ 95%(含复杂背景)
"""
@pytest.mark.unit
def test_ocr_output_format(self) -> None:
"""测试 OCR 输出格式"""
# TODO: 实现 OCR 服务
# ocr = OCRService()
# result = ocr.extract_text("video_frame.jpg")
#
# assert "frames" in result
# for frame in result["frames"]:
# assert "timestamp_ms" in frame
# assert "text" in frame
# assert "confidence" in frame
# assert "bbox" in frame
pytest.skip("待实现OCR 输出格式")
@pytest.mark.unit
def test_ocr_accuracy_rate(self) -> None:
"""
测试 OCR 准确率
验收标准:准确率 ≥ 95%
"""
# TODO: 使用标注测试集验证
# ocr = OCRService()
# test_set = load_ocr_test_set()
#
# correct = 0
# for sample in test_set:
# result = ocr.extract_text(sample["image_path"])
# if result["text"] == sample["ground_truth"]:
# correct += 1
#
# accuracy = correct / len(test_set)
# assert accuracy >= 0.95, f"准确率 {accuracy:.2%} 低于阈值 95%"
pytest.skip("待实现OCR 准确率测试")
@pytest.mark.unit
def test_ocr_complex_background(self) -> None:
"""测试复杂背景下的 OCR"""
# TODO: 测试复杂背景
# ocr = OCRService()
#
# # 测试不同背景复杂度
# test_cases = [
# {"image": "simple_bg.jpg", "text": "测试文字"},
# {"image": "complex_bg.jpg", "text": "复杂背景"},
# {"image": "gradient_bg.jpg", "text": "渐变背景"},
# ]
#
# for case in test_cases:
# result = ocr.extract_text(case["image"])
# assert result["text"] == case["text"]
pytest.skip("待实现:复杂背景 OCR")
class TestLogoDetection:
"""
竞品 Logo 检测测试
验收标准 (FeatureSummary.md F-12):
- F1 ≥ 0.85(含遮挡 30% 场景)
"""
@pytest.mark.unit
def test_logo_detection_output_format(self) -> None:
"""测试 Logo 检测输出格式"""
# TODO: 实现 Logo 检测服务
# detector = LogoDetector()
# result = detector.detect("video_frame.jpg")
#
# assert "detections" in result
# for detection in result["detections"]:
# assert "logo_id" in detection
# assert "confidence" in detection
# assert "bbox" in detection
# assert detection["confidence"] >= 0 and detection["confidence"] <= 1
pytest.skip("待实现Logo 检测输出格式")
@pytest.mark.unit
def test_logo_detection_f1_score(self) -> None:
"""
测试 Logo 检测 F1 值
验收标准F1 ≥ 0.85
"""
# TODO: 使用标注测试集验证
# detector = LogoDetector()
# test_set = load_logo_test_set() # ≥ 200 张图片
#
# predictions = []
# ground_truths = []
#
# for sample in test_set:
# result = detector.detect(sample["image_path"])
# predictions.append(result["detections"])
# ground_truths.append(sample["ground_truth_logos"])
#
# f1 = calculate_f1(predictions, ground_truths)
# assert f1 >= 0.85, f"F1 {f1:.2f} 低于阈值 0.85"
pytest.skip("待实现Logo F1 测试")
@pytest.mark.unit
def test_logo_detection_with_occlusion(self) -> None:
"""
测试遮挡场景下的 Logo 检测
验收标准30% 遮挡仍可检测
"""
# TODO: 测试遮挡场景
# detector = LogoDetector()
#
# # 30% 遮挡的 Logo 图片
# result = detector.detect("logo_30_percent_occluded.jpg")
#
# assert len(result["detections"]) > 0
# assert result["detections"][0]["confidence"] >= 0.7
pytest.skip("待实现:遮挡场景 Logo 检测")
@pytest.mark.unit
def test_new_logo_instant_effect(self) -> None:
"""测试新 Logo 上传即刻生效"""
# TODO: 测试动态添加 Logo
# detector = LogoDetector()
#
# # 上传新 Logo
# detector.add_logo("new_competitor_logo.png", brand="New Competitor")
#
# # 立即测试检测
# result = detector.detect("frame_with_new_logo.jpg")
# assert any(d["brand"] == "New Competitor" for d in result["detections"])
pytest.skip("待实现Logo 动态添加")
class TestAuditPipeline:
"""
审核流水线集成测试
"""
@pytest.mark.unit
def test_audit_processing_time(self) -> None:
"""
测试审核处理时间
验收标准100MB 视频 ≤ 5 分钟
"""
# TODO: 实现处理时间测试
# import time
#
# auditor = VideoAuditor()
# start_time = time.time()
#
# result = auditor.audit("100mb_test_video.mp4")
#
# processing_time = time.time() - start_time
# assert processing_time <= 300, f"处理时间 {processing_time:.1f}s 超过 5 分钟"
pytest.skip("待实现:处理时间测试")
@pytest.mark.unit
def test_audit_report_structure(self) -> None:
"""测试审核报告结构"""
# TODO: 实现报告结构验证
# auditor = VideoAuditor()
# report = auditor.audit("test_video.mp4")
#
# # 验证报告必需字段
# required_fields = [
# "report_id", "video_id", "processing_status",
# "asr_results", "ocr_results", "cv_results",
# "violations", "brief_compliance"
# ]
# for field in required_fields:
# assert field in report
pytest.skip("待实现:报告结构验证")
@pytest.mark.unit
def test_violation_with_evidence(self) -> None:
"""测试违规项包含证据"""
# TODO: 实现证据验证
# auditor = VideoAuditor()
# report = auditor.audit("video_with_violation.mp4")
#
# for violation in report["violations"]:
# assert "evidence" in violation
# assert violation["evidence"]["url"] is not None
# assert violation["evidence"]["timestamp_start"] is not None
pytest.skip("待实现:违规证据")
class TestBriefCompliance:
"""
Brief 合规检查测试
验收标准 (FeatureSummary.md F-45):
- 时长统计误差 ≤ 0.5秒
- 频次统计准确率 ≥ 95%
"""
@pytest.mark.unit
def test_selling_point_coverage(
self,
sample_brief_rules: dict[str, Any],
) -> None:
"""测试卖点覆盖检测"""
video_content = {
"asr_text": "24小时持妆效果非常好使用天然成分",
"ocr_text": "24小时持妆",
}
# TODO: 实现卖点覆盖检测
# checker = BriefComplianceChecker()
# result = checker.check_selling_points(
# video_content,
# sample_brief_rules["selling_points"]
# )
#
# # 应检测到 2/3 卖点覆盖
# assert result["coverage_rate"] >= 0.66
# assert "24小时持妆" in result["detected"]
# assert "天然成分" in result["detected"]
pytest.skip("待实现:卖点覆盖检测")
@pytest.mark.unit
def test_duration_requirement_check(
self,
sample_brief_rules: dict[str, Any],
) -> None:
"""测试时长要求检查"""
cv_detections = [
{"object_type": "product", "start_ms": 0, "end_ms": 6000}, # 6秒
]
# 要求: 产品同框 > 5秒
# TODO: 实现时长检查
# checker = BriefComplianceChecker()
# result = checker.check_duration(
# cv_detections,
# sample_brief_rules["timing_requirements"]
# )
#
# assert result["product_visible"]["status"] == "passed"
# assert result["product_visible"]["detected_seconds"] == 6.0
pytest.skip("待实现:时长要求检查")
@pytest.mark.unit
def test_frequency_requirement_check(
self,
sample_brief_rules: dict[str, Any],
) -> None:
"""测试频次要求检查"""
asr_segments = [
{"text": "品牌名产品"},
{"text": "这个品牌名很好"},
{"text": "推荐品牌名"},
]
# 要求: 品牌名提及 ≥ 3次
# TODO: 实现频次检查
# checker = BriefComplianceChecker()
# result = checker.check_frequency(
# asr_segments,
# sample_brief_rules["timing_requirements"],
# brand_keyword="品牌名"
# )
#
# assert result["brand_mention"]["status"] == "passed"
# assert result["brand_mention"]["detected_count"] == 3
pytest.skip("待实现:频次要求检查")