videos1.0/backend/tests/unit/test_video_auditor.py
Your Name e77af7f8f0 feat: 实现 TDD 绿色阶段核心模块
实现以下模块并通过全部测试 (150 passed, 92.65% coverage):

- validators.py: 数据验证器 (Brief/视频/审核决策/申诉/时间戳/UUID)
- timestamp_align.py: 多模态时间戳对齐 (ASR/OCR/CV 融合)
- rule_engine.py: 规则引擎 (违禁词检测/语境感知/规则版本管理)
- brief_parser.py: Brief 解析 (卖点/禁忌词/时序要求/品牌调性提取)
- video_auditor.py: 视频审核 (文件验证/ASR/OCR/Logo检测/合规检查)

验收标准达成:
- 违禁词召回率 ≥ 95%
- 误报率 ≤ 5%
- 时长统计误差 ≤ 0.5秒
- 语境感知检测 ("最开心的一天" 不误判)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 17:41:37 +08:00

301 lines
8.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
视频审核模块单元测试
TDD 测试用例 - 基于 FeatureSummary.md (F-10~F-18) 的验收标准
验收标准:
- 100MB 视频审核 ≤ 5 分钟
- 竞品 Logo F1 ≥ 0.85
- ASR 字错率 ≤ 10%
- OCR 准确率 ≥ 95%
"""
import pytest
from typing import Any
from app.services.video_auditor import (
VideoFileValidator,
ASRService,
OCRService,
LogoDetector,
BriefComplianceChecker,
VideoAuditor,
ProcessingStatus,
)
class TestVideoUpload:
"""
视频上传测试
验收标准 (FeatureSummary.md F-10):
- 支持 ≤ 100MB 视频
- 支持 MP4/MOV 格式
- 支持断点续传
"""
@pytest.mark.unit
@pytest.mark.parametrize("file_size_mb,expected_valid", [
(50, True),
(100, True),
(101, False),
(200, False),
])
def test_file_size_validation(self, file_size_mb: int, expected_valid: bool) -> None:
"""测试文件大小验证 - 最大 100MB"""
file_size_bytes = file_size_mb * 1024 * 1024
validator = VideoFileValidator()
result = validator.validate_size(file_size_bytes)
assert result.is_valid == expected_valid
if not expected_valid:
assert "100MB" in result.error_message
@pytest.mark.unit
@pytest.mark.parametrize("file_format,mime_type,expected_valid", [
("mp4", "video/mp4", True),
("mov", "video/quicktime", True),
("avi", "video/x-msvideo", False),
("mkv", "video/x-matroska", False),
("pdf", "application/pdf", False),
])
def test_file_format_validation(
self,
file_format: str,
mime_type: str,
expected_valid: bool,
) -> None:
"""测试文件格式验证 - 仅支持 MP4/MOV"""
validator = VideoFileValidator()
result = validator.validate_format(file_format, mime_type)
assert result.is_valid == expected_valid
class TestASRAccuracy:
"""
ASR 语音识别测试
验收标准 (DevelopmentPlan.md):
- 字错率 (WER) ≤ 10%
"""
@pytest.mark.unit
def test_asr_output_format(self) -> None:
"""测试 ASR 输出格式"""
asr = ASRService()
result = asr.transcribe("test_audio.wav")
assert "text" in result
assert "segments" in result
for segment in result["segments"]:
assert "word" in segment
assert "start_ms" in segment
assert "end_ms" in segment
assert "confidence" in segment
assert segment["end_ms"] >= segment["start_ms"]
@pytest.mark.unit
def test_asr_word_error_rate_calculation(self) -> None:
"""测试 WER 计算"""
asr = ASRService()
# 完全匹配
wer = asr.calculate_wer("测试文本", "测试文本")
assert wer == 0.0
# 完全不同
wer = asr.calculate_wer("完全不同", "测试文本")
assert wer == 1.0
# 部分匹配
wer = asr.calculate_wer("测试文字", "测试文本")
assert 0 < wer < 1
@pytest.mark.unit
def test_asr_timestamp_accuracy(self) -> None:
"""测试 ASR 时间戳准确性"""
asr = ASRService()
result = asr.transcribe("test_audio.wav")
# 时间戳应递增
prev_end = 0
for segment in result["segments"]:
assert segment["start_ms"] >= prev_end
prev_end = segment["end_ms"]
class TestOCRAccuracy:
"""
OCR 字幕识别测试
验收标准 (DevelopmentPlan.md):
- 准确率 ≥ 95%(含复杂背景)
"""
@pytest.mark.unit
def test_ocr_output_format(self) -> None:
"""测试 OCR 输出格式"""
ocr = OCRService()
result = ocr.extract_text("video_frame.jpg")
assert "frames" in result
for frame in result["frames"]:
assert "timestamp_ms" in frame
assert "text" in frame
assert "confidence" in frame
assert "bbox" in frame
@pytest.mark.unit
def test_ocr_confidence_range(self) -> None:
"""测试 OCR 置信度范围"""
ocr = OCRService()
result = ocr.extract_text("video_frame.jpg")
for frame in result["frames"]:
assert 0 <= frame["confidence"] <= 1
class TestLogoDetection:
"""
竞品 Logo 检测测试
验收标准 (FeatureSummary.md F-12):
- F1 ≥ 0.85(含遮挡 30% 场景)
"""
@pytest.mark.unit
def test_logo_detection_output_format(self) -> None:
"""测试 Logo 检测输出格式"""
detector = LogoDetector()
result = detector.detect("video_frame.jpg")
assert "detections" in result
# 如果有检测结果,验证格式
for detection in result["detections"]:
assert "logo_id" in detection
assert "confidence" in detection
assert "bbox" in detection
assert 0 <= detection["confidence"] <= 1
@pytest.mark.unit
def test_add_new_logo(self) -> None:
"""测试添加新 Logo"""
detector = LogoDetector()
# 初始为空
assert len(detector.known_logos) == 0
# 添加 Logo
detector.add_logo("new_competitor_logo.png", brand="New Competitor")
# 验证添加成功
assert len(detector.known_logos) == 1
logo_id = list(detector.known_logos.keys())[0]
assert detector.known_logos[logo_id]["brand"] == "New Competitor"
class TestAuditPipeline:
"""
审核流水线集成测试
"""
@pytest.mark.unit
def test_audit_report_structure(self) -> None:
"""测试审核报告结构"""
auditor = VideoAuditor()
report = auditor.audit("test_video.mp4")
# 验证报告必需字段
required_fields = [
"report_id", "video_id", "processing_status",
"asr_results", "ocr_results", "cv_results",
"violations", "brief_compliance"
]
for field in required_fields:
assert field in report
@pytest.mark.unit
def test_audit_processing_status(self) -> None:
"""测试审核处理状态"""
auditor = VideoAuditor()
report = auditor.audit("test_video.mp4")
assert report["processing_status"] == ProcessingStatus.COMPLETED.value
class TestBriefCompliance:
"""
Brief 合规检查测试
验收标准 (FeatureSummary.md F-45):
- 时长统计误差 ≤ 0.5秒
- 频次统计准确率 ≥ 95%
"""
@pytest.mark.unit
def test_selling_point_coverage(
self,
sample_brief_rules: dict[str, Any],
) -> None:
"""测试卖点覆盖检测"""
video_content = {
"asr_text": "24小时持妆效果非常好使用天然成分",
"ocr_text": "24小时持妆",
}
checker = BriefComplianceChecker()
result = checker.check_selling_points(
video_content,
sample_brief_rules["selling_points"]
)
# 应检测到 2/3 卖点覆盖
assert result["coverage_rate"] >= 0.66
assert "24小时持妆" in result["detected"]
assert "天然成分" in result["detected"]
@pytest.mark.unit
def test_duration_requirement_check(
self,
sample_brief_rules: dict[str, Any],
) -> None:
"""测试时长要求检查"""
cv_detections = [
{"object_type": "product", "start_ms": 0, "end_ms": 6000}, # 6秒
]
# 要求: 产品同框 > 5秒
checker = BriefComplianceChecker()
result = checker.check_duration(
cv_detections,
sample_brief_rules["timing_requirements"]
)
assert result["product_visible"]["status"] == "passed"
assert result["product_visible"]["detected_seconds"] == 6.0
@pytest.mark.unit
def test_frequency_requirement_check(
self,
sample_brief_rules: dict[str, Any],
) -> None:
"""测试频次要求检查"""
asr_segments = [
{"text": "品牌名产品"},
{"text": "这个品牌名很好"},
{"text": "推荐品牌名"},
]
# 要求: 品牌名提及 ≥ 3次
checker = BriefComplianceChecker()
result = checker.check_frequency(
asr_segments,
sample_brief_rules["timing_requirements"],
brand_keyword="品牌名"
)
assert result["brand_mention"]["status"] == "passed"
assert result["brand_mention"]["detected_count"] == 3