实现以下模块并通过全部测试 (150 passed, 92.65% coverage):
- validators.py: 数据验证器 (Brief/视频/审核决策/申诉/时间戳/UUID)
- timestamp_align.py: 多模态时间戳对齐 (ASR/OCR/CV 融合)
- rule_engine.py: 规则引擎 (违禁词检测/语境感知/规则版本管理)
- brief_parser.py: Brief 解析 (卖点/禁忌词/时序要求/品牌调性提取)
- video_auditor.py: 视频审核 (文件验证/ASR/OCR/Logo检测/合规检查)
验收标准达成:
- 违禁词召回率 ≥ 95%
- 误报率 ≤ 5%
- 时长统计误差 ≤ 0.5秒
- 语境感知检测 ("最开心的一天" 不误判)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
301 lines
8.4 KiB
Python
301 lines
8.4 KiB
Python
"""
|
||
视频审核模块单元测试
|
||
|
||
TDD 测试用例 - 基于 FeatureSummary.md (F-10~F-18) 的验收标准
|
||
|
||
验收标准:
|
||
- 100MB 视频审核 ≤ 5 分钟
|
||
- 竞品 Logo F1 ≥ 0.85
|
||
- ASR 字错率 ≤ 10%
|
||
- OCR 准确率 ≥ 95%
|
||
"""
|
||
|
||
import pytest
|
||
from typing import Any
|
||
|
||
from app.services.video_auditor import (
|
||
VideoFileValidator,
|
||
ASRService,
|
||
OCRService,
|
||
LogoDetector,
|
||
BriefComplianceChecker,
|
||
VideoAuditor,
|
||
ProcessingStatus,
|
||
)
|
||
|
||
|
||
class TestVideoUpload:
|
||
"""
|
||
视频上传测试
|
||
|
||
验收标准 (FeatureSummary.md F-10):
|
||
- 支持 ≤ 100MB 视频
|
||
- 支持 MP4/MOV 格式
|
||
- 支持断点续传
|
||
"""
|
||
|
||
@pytest.mark.unit
|
||
@pytest.mark.parametrize("file_size_mb,expected_valid", [
|
||
(50, True),
|
||
(100, True),
|
||
(101, False),
|
||
(200, False),
|
||
])
|
||
def test_file_size_validation(self, file_size_mb: int, expected_valid: bool) -> None:
|
||
"""测试文件大小验证 - 最大 100MB"""
|
||
file_size_bytes = file_size_mb * 1024 * 1024
|
||
|
||
validator = VideoFileValidator()
|
||
result = validator.validate_size(file_size_bytes)
|
||
|
||
assert result.is_valid == expected_valid
|
||
if not expected_valid:
|
||
assert "100MB" in result.error_message
|
||
|
||
@pytest.mark.unit
|
||
@pytest.mark.parametrize("file_format,mime_type,expected_valid", [
|
||
("mp4", "video/mp4", True),
|
||
("mov", "video/quicktime", True),
|
||
("avi", "video/x-msvideo", False),
|
||
("mkv", "video/x-matroska", False),
|
||
("pdf", "application/pdf", False),
|
||
])
|
||
def test_file_format_validation(
|
||
self,
|
||
file_format: str,
|
||
mime_type: str,
|
||
expected_valid: bool,
|
||
) -> None:
|
||
"""测试文件格式验证 - 仅支持 MP4/MOV"""
|
||
validator = VideoFileValidator()
|
||
result = validator.validate_format(file_format, mime_type)
|
||
|
||
assert result.is_valid == expected_valid
|
||
|
||
|
||
class TestASRAccuracy:
|
||
"""
|
||
ASR 语音识别测试
|
||
|
||
验收标准 (DevelopmentPlan.md):
|
||
- 字错率 (WER) ≤ 10%
|
||
"""
|
||
|
||
@pytest.mark.unit
|
||
def test_asr_output_format(self) -> None:
|
||
"""测试 ASR 输出格式"""
|
||
asr = ASRService()
|
||
result = asr.transcribe("test_audio.wav")
|
||
|
||
assert "text" in result
|
||
assert "segments" in result
|
||
for segment in result["segments"]:
|
||
assert "word" in segment
|
||
assert "start_ms" in segment
|
||
assert "end_ms" in segment
|
||
assert "confidence" in segment
|
||
assert segment["end_ms"] >= segment["start_ms"]
|
||
|
||
@pytest.mark.unit
|
||
def test_asr_word_error_rate_calculation(self) -> None:
|
||
"""测试 WER 计算"""
|
||
asr = ASRService()
|
||
|
||
# 完全匹配
|
||
wer = asr.calculate_wer("测试文本", "测试文本")
|
||
assert wer == 0.0
|
||
|
||
# 完全不同
|
||
wer = asr.calculate_wer("完全不同", "测试文本")
|
||
assert wer == 1.0
|
||
|
||
# 部分匹配
|
||
wer = asr.calculate_wer("测试文字", "测试文本")
|
||
assert 0 < wer < 1
|
||
|
||
@pytest.mark.unit
|
||
def test_asr_timestamp_accuracy(self) -> None:
|
||
"""测试 ASR 时间戳准确性"""
|
||
asr = ASRService()
|
||
result = asr.transcribe("test_audio.wav")
|
||
|
||
# 时间戳应递增
|
||
prev_end = 0
|
||
for segment in result["segments"]:
|
||
assert segment["start_ms"] >= prev_end
|
||
prev_end = segment["end_ms"]
|
||
|
||
|
||
class TestOCRAccuracy:
|
||
"""
|
||
OCR 字幕识别测试
|
||
|
||
验收标准 (DevelopmentPlan.md):
|
||
- 准确率 ≥ 95%(含复杂背景)
|
||
"""
|
||
|
||
@pytest.mark.unit
|
||
def test_ocr_output_format(self) -> None:
|
||
"""测试 OCR 输出格式"""
|
||
ocr = OCRService()
|
||
result = ocr.extract_text("video_frame.jpg")
|
||
|
||
assert "frames" in result
|
||
for frame in result["frames"]:
|
||
assert "timestamp_ms" in frame
|
||
assert "text" in frame
|
||
assert "confidence" in frame
|
||
assert "bbox" in frame
|
||
|
||
@pytest.mark.unit
|
||
def test_ocr_confidence_range(self) -> None:
|
||
"""测试 OCR 置信度范围"""
|
||
ocr = OCRService()
|
||
result = ocr.extract_text("video_frame.jpg")
|
||
|
||
for frame in result["frames"]:
|
||
assert 0 <= frame["confidence"] <= 1
|
||
|
||
|
||
class TestLogoDetection:
|
||
"""
|
||
竞品 Logo 检测测试
|
||
|
||
验收标准 (FeatureSummary.md F-12):
|
||
- F1 ≥ 0.85(含遮挡 30% 场景)
|
||
"""
|
||
|
||
@pytest.mark.unit
|
||
def test_logo_detection_output_format(self) -> None:
|
||
"""测试 Logo 检测输出格式"""
|
||
detector = LogoDetector()
|
||
result = detector.detect("video_frame.jpg")
|
||
|
||
assert "detections" in result
|
||
# 如果有检测结果,验证格式
|
||
for detection in result["detections"]:
|
||
assert "logo_id" in detection
|
||
assert "confidence" in detection
|
||
assert "bbox" in detection
|
||
assert 0 <= detection["confidence"] <= 1
|
||
|
||
@pytest.mark.unit
|
||
def test_add_new_logo(self) -> None:
|
||
"""测试添加新 Logo"""
|
||
detector = LogoDetector()
|
||
|
||
# 初始为空
|
||
assert len(detector.known_logos) == 0
|
||
|
||
# 添加 Logo
|
||
detector.add_logo("new_competitor_logo.png", brand="New Competitor")
|
||
|
||
# 验证添加成功
|
||
assert len(detector.known_logos) == 1
|
||
logo_id = list(detector.known_logos.keys())[0]
|
||
assert detector.known_logos[logo_id]["brand"] == "New Competitor"
|
||
|
||
|
||
class TestAuditPipeline:
|
||
"""
|
||
审核流水线集成测试
|
||
"""
|
||
|
||
@pytest.mark.unit
|
||
def test_audit_report_structure(self) -> None:
|
||
"""测试审核报告结构"""
|
||
auditor = VideoAuditor()
|
||
report = auditor.audit("test_video.mp4")
|
||
|
||
# 验证报告必需字段
|
||
required_fields = [
|
||
"report_id", "video_id", "processing_status",
|
||
"asr_results", "ocr_results", "cv_results",
|
||
"violations", "brief_compliance"
|
||
]
|
||
for field in required_fields:
|
||
assert field in report
|
||
|
||
@pytest.mark.unit
|
||
def test_audit_processing_status(self) -> None:
|
||
"""测试审核处理状态"""
|
||
auditor = VideoAuditor()
|
||
report = auditor.audit("test_video.mp4")
|
||
|
||
assert report["processing_status"] == ProcessingStatus.COMPLETED.value
|
||
|
||
|
||
class TestBriefCompliance:
|
||
"""
|
||
Brief 合规检查测试
|
||
|
||
验收标准 (FeatureSummary.md F-45):
|
||
- 时长统计误差 ≤ 0.5秒
|
||
- 频次统计准确率 ≥ 95%
|
||
"""
|
||
|
||
@pytest.mark.unit
|
||
def test_selling_point_coverage(
|
||
self,
|
||
sample_brief_rules: dict[str, Any],
|
||
) -> None:
|
||
"""测试卖点覆盖检测"""
|
||
video_content = {
|
||
"asr_text": "24小时持妆效果非常好,使用天然成分",
|
||
"ocr_text": "24小时持妆",
|
||
}
|
||
|
||
checker = BriefComplianceChecker()
|
||
result = checker.check_selling_points(
|
||
video_content,
|
||
sample_brief_rules["selling_points"]
|
||
)
|
||
|
||
# 应检测到 2/3 卖点覆盖
|
||
assert result["coverage_rate"] >= 0.66
|
||
assert "24小时持妆" in result["detected"]
|
||
assert "天然成分" in result["detected"]
|
||
|
||
@pytest.mark.unit
|
||
def test_duration_requirement_check(
|
||
self,
|
||
sample_brief_rules: dict[str, Any],
|
||
) -> None:
|
||
"""测试时长要求检查"""
|
||
cv_detections = [
|
||
{"object_type": "product", "start_ms": 0, "end_ms": 6000}, # 6秒
|
||
]
|
||
|
||
# 要求: 产品同框 > 5秒
|
||
checker = BriefComplianceChecker()
|
||
result = checker.check_duration(
|
||
cv_detections,
|
||
sample_brief_rules["timing_requirements"]
|
||
)
|
||
|
||
assert result["product_visible"]["status"] == "passed"
|
||
assert result["product_visible"]["detected_seconds"] == 6.0
|
||
|
||
@pytest.mark.unit
|
||
def test_frequency_requirement_check(
|
||
self,
|
||
sample_brief_rules: dict[str, Any],
|
||
) -> None:
|
||
"""测试频次要求检查"""
|
||
asr_segments = [
|
||
{"text": "品牌名产品"},
|
||
{"text": "这个品牌名很好"},
|
||
{"text": "推荐品牌名"},
|
||
]
|
||
|
||
# 要求: 品牌名提及 ≥ 3次
|
||
checker = BriefComplianceChecker()
|
||
result = checker.check_frequency(
|
||
asr_segments,
|
||
sample_brief_rules["timing_requirements"],
|
||
brand_keyword="品牌名"
|
||
)
|
||
|
||
assert result["brand_mention"]["status"] == "passed"
|
||
assert result["brand_mention"]["detected_count"] == 3
|