基于项目需求文档(PRD.md, FeatureSummary.md, DevelopmentPlan.md, UIDesign.md, User_Role_Interfaces.md)编写的 TDD 测试用例。 后端测试 (Python/pytest): - 单元测试: rule_engine, brief_parser, timestamp_alignment, video_auditor, validators - 集成测试: API Brief, Video, Review 端点 - AI 模块测试: ASR, OCR, Logo 检测服务 - 全局 fixtures 和 pytest 配置 前端测试 (TypeScript/Vitest): - 工具函数测试: utils.test.ts - 组件测试: Button, VideoPlayer, ViolationList - Hooks 测试: useVideoAudit, useVideoPlayer, useAppeal - MSW mock handlers 配置 E2E 测试 (Playwright): - 认证流程测试 - 视频上传流程测试 - 视频审核流程测试 - 申诉流程测试 所有测试当前使用 pytest.skip() / it.skip() 作为占位符, 遵循 TDD 红灯阶段 - 等待实现代码后运行。 验收标准覆盖: - ASR WER ≤ 10% - OCR 准确率 ≥ 95% - Logo F1 ≥ 0.85 - 时间戳误差 ≤ 0.5s - 频次统计准确率 ≥ 95% Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
279 lines
9.4 KiB
Python
279 lines
9.4 KiB
Python
"""
|
||
规则引擎单元测试
|
||
|
||
TDD 测试用例 - 基于 FeatureSummary.md (F-03, F-04, F-05-A, F-06) 的验收标准
|
||
|
||
验收标准:
|
||
- 违禁词召回率 ≥ 95%
|
||
- 违禁词误报率 ≤ 5%
|
||
- 语境理解误报率 ≤ 5%
|
||
- 规则冲突提示清晰可追溯
|
||
"""
|
||
|
||
import pytest
|
||
from typing import Any
|
||
|
||
# 导入待实现的模块(TDD 红灯阶段 - 模块尚未实现)
|
||
# from app.services.rule_engine import RuleEngine, ProhibitedWordDetector, RuleConflictDetector
|
||
|
||
|
||
class TestProhibitedWordDetector:
|
||
"""
|
||
违禁词检测器测试
|
||
|
||
验收标准 (FeatureSummary.md):
|
||
- 召回率 ≥ 95%
|
||
- 误报率 ≤ 5%
|
||
"""
|
||
|
||
@pytest.mark.unit
|
||
@pytest.mark.parametrize("text,context,expected_violations,should_detect", [
|
||
# 广告语境 - 应检出
|
||
("这是全网销量第一的产品", "advertisement", ["第一"], True),
|
||
("我们是行业领导者", "advertisement", ["领导者"], True),
|
||
("史上最低价促销", "advertisement", ["最", "史上"], True),
|
||
("绝对有效果", "advertisement", ["绝对"], True),
|
||
|
||
# 日常语境 - 不应检出 (语境感知)
|
||
("今天是我最开心的一天", "daily", [], False),
|
||
("这是我第一次来这里", "daily", [], False),
|
||
("我最喜欢吃苹果", "daily", [], False),
|
||
|
||
# 边界情况
|
||
("", "advertisement", [], False),
|
||
("普通的产品介绍,没有违禁词", "advertisement", [], False),
|
||
])
|
||
def test_detect_prohibited_words(
|
||
self,
|
||
text: str,
|
||
context: str,
|
||
expected_violations: list[str],
|
||
should_detect: bool,
|
||
) -> None:
|
||
"""测试违禁词检测的准确性"""
|
||
# TODO: 实现 ProhibitedWordDetector
|
||
# detector = ProhibitedWordDetector()
|
||
# result = detector.detect(text, context=context)
|
||
#
|
||
# if should_detect:
|
||
# assert len(result.violations) > 0
|
||
# for word in expected_violations:
|
||
# assert any(word in v.content for v in result.violations)
|
||
# else:
|
||
# assert len(result.violations) == 0
|
||
pytest.skip("待实现:ProhibitedWordDetector")
|
||
|
||
@pytest.mark.unit
|
||
def test_recall_rate_above_threshold(
|
||
self,
|
||
prohibited_word_test_cases: list[dict[str, Any]],
|
||
) -> None:
|
||
"""
|
||
验证召回率 ≥ 95%
|
||
|
||
召回率 = 正确检出数 / 应检出总数
|
||
"""
|
||
# TODO: 使用完整测试集验证召回率
|
||
# detector = ProhibitedWordDetector()
|
||
# positive_cases = [c for c in prohibited_word_test_cases if c["should_detect"]]
|
||
#
|
||
# true_positives = 0
|
||
# for case in positive_cases:
|
||
# result = detector.detect(case["text"], context=case["context"])
|
||
# if result.violations:
|
||
# true_positives += 1
|
||
#
|
||
# recall = true_positives / len(positive_cases)
|
||
# assert recall >= 0.95, f"召回率 {recall:.2%} 低于阈值 95%"
|
||
pytest.skip("待实现:召回率测试")
|
||
|
||
@pytest.mark.unit
|
||
def test_false_positive_rate_below_threshold(
|
||
self,
|
||
prohibited_word_test_cases: list[dict[str, Any]],
|
||
) -> None:
|
||
"""
|
||
验证误报率 ≤ 5%
|
||
|
||
误报率 = 错误检出数 / 不应检出总数
|
||
"""
|
||
# TODO: 使用完整测试集验证误报率
|
||
# detector = ProhibitedWordDetector()
|
||
# negative_cases = [c for c in prohibited_word_test_cases if not c["should_detect"]]
|
||
#
|
||
# false_positives = 0
|
||
# for case in negative_cases:
|
||
# result = detector.detect(case["text"], context=case["context"])
|
||
# if result.violations:
|
||
# false_positives += 1
|
||
#
|
||
# fpr = false_positives / len(negative_cases)
|
||
# assert fpr <= 0.05, f"误报率 {fpr:.2%} 超过阈值 5%"
|
||
pytest.skip("待实现:误报率测试")
|
||
|
||
|
||
class TestContextUnderstanding:
|
||
"""
|
||
语境理解测试
|
||
|
||
验收标准 (DevelopmentPlan.md 第 8 章):
|
||
- 广告极限词与非广告语境区分误报率 ≤ 5%
|
||
- 不将「最开心的一天」误判为违规
|
||
"""
|
||
|
||
@pytest.mark.unit
|
||
@pytest.mark.parametrize("text,expected_context,should_flag", [
|
||
("这款产品是最好的选择", "advertisement", True),
|
||
("最近天气真好", "daily", False),
|
||
("今天心情最棒了", "daily", False),
|
||
("我们的产品效果最显著", "advertisement", True),
|
||
("这是我见过最美的风景", "daily", False),
|
||
("全网销量第一,值得信赖", "advertisement", True),
|
||
("我第一次尝试这个运动", "daily", False),
|
||
])
|
||
def test_context_classification(
|
||
self,
|
||
text: str,
|
||
expected_context: str,
|
||
should_flag: bool,
|
||
) -> None:
|
||
"""测试语境分类准确性"""
|
||
# TODO: 实现语境分类器
|
||
# classifier = ContextClassifier()
|
||
# result = classifier.classify(text)
|
||
#
|
||
# assert result.context == expected_context
|
||
# if should_flag:
|
||
# assert result.is_advertisement_context
|
||
# else:
|
||
# assert not result.is_advertisement_context
|
||
pytest.skip("待实现:ContextClassifier")
|
||
|
||
@pytest.mark.unit
|
||
def test_happy_day_not_flagged(self) -> None:
|
||
"""
|
||
关键测试:「最开心的一天」不应被误判
|
||
|
||
这是 DevelopmentPlan.md 明确要求的测试用例
|
||
"""
|
||
text = "今天是我最开心的一天"
|
||
|
||
# TODO: 实现检测器
|
||
# detector = ProhibitedWordDetector()
|
||
# result = detector.detect(text, context="auto") # 自动识别语境
|
||
#
|
||
# assert len(result.violations) == 0, "「最开心的一天」被误判为违规"
|
||
pytest.skip("待实现:语境感知检测")
|
||
|
||
|
||
class TestRuleConflictDetector:
|
||
"""
|
||
规则冲突检测测试
|
||
|
||
验收标准 (FeatureSummary.md F-03):
|
||
- 规则冲突提示清晰可追溯
|
||
"""
|
||
|
||
@pytest.mark.unit
|
||
def test_detect_brief_platform_conflict(
|
||
self,
|
||
sample_brief_rules: dict[str, Any],
|
||
sample_platform_rules: dict[str, Any],
|
||
) -> None:
|
||
"""测试 Brief 规则与平台规则冲突检测"""
|
||
# 构造冲突场景:Brief 允许使用「最佳效果」,但平台禁止「最」
|
||
brief_rules = {
|
||
**sample_brief_rules,
|
||
"allowed_words": ["最佳效果"],
|
||
}
|
||
|
||
# TODO: 实现冲突检测器
|
||
# detector = RuleConflictDetector()
|
||
# conflicts = detector.detect(brief_rules, sample_platform_rules)
|
||
#
|
||
# assert len(conflicts) > 0
|
||
# assert any("最" in c.conflicting_term for c in conflicts)
|
||
# assert all(c.resolution_suggestion is not None for c in conflicts)
|
||
pytest.skip("待实现:RuleConflictDetector")
|
||
|
||
@pytest.mark.unit
|
||
def test_no_conflict_when_compatible(
|
||
self,
|
||
sample_brief_rules: dict[str, Any],
|
||
sample_platform_rules: dict[str, Any],
|
||
) -> None:
|
||
"""测试规则兼容时无冲突"""
|
||
# TODO: 实现冲突检测器
|
||
# detector = RuleConflictDetector()
|
||
# conflicts = detector.detect(sample_brief_rules, sample_platform_rules)
|
||
#
|
||
# # 标准 Brief 规则应与平台规则兼容
|
||
# assert len(conflicts) == 0
|
||
pytest.skip("待实现:规则兼容性测试")
|
||
|
||
|
||
class TestRuleVersioning:
|
||
"""
|
||
规则版本管理测试
|
||
|
||
验收标准 (FeatureSummary.md F-06):
|
||
- 规则变更历史可追溯
|
||
- 支持回滚到历史版本
|
||
"""
|
||
|
||
@pytest.mark.unit
|
||
def test_rule_version_tracking(self) -> None:
|
||
"""测试规则版本追踪"""
|
||
# TODO: 实现规则版本管理
|
||
# rule_manager = RuleVersionManager()
|
||
#
|
||
# # 创建规则
|
||
# rule_v1 = rule_manager.create_rule({"word": "最", "severity": "hard"})
|
||
# assert rule_v1.version == "v1.0.0"
|
||
#
|
||
# # 更新规则
|
||
# rule_v2 = rule_manager.update_rule(rule_v1.id, {"severity": "soft"})
|
||
# assert rule_v2.version == "v1.1.0"
|
||
#
|
||
# # 查看历史
|
||
# history = rule_manager.get_history(rule_v1.id)
|
||
# assert len(history) == 2
|
||
pytest.skip("待实现:RuleVersionManager")
|
||
|
||
@pytest.mark.unit
|
||
def test_rule_rollback(self) -> None:
|
||
"""测试规则回滚"""
|
||
# TODO: 实现规则回滚
|
||
# rule_manager = RuleVersionManager()
|
||
#
|
||
# rule_v1 = rule_manager.create_rule({"word": "最", "severity": "hard"})
|
||
# rule_v2 = rule_manager.update_rule(rule_v1.id, {"severity": "soft"})
|
||
#
|
||
# # 回滚到 v1
|
||
# rolled_back = rule_manager.rollback(rule_v1.id, "v1.0.0")
|
||
# assert rolled_back.severity == "hard"
|
||
pytest.skip("待实现:规则回滚")
|
||
|
||
|
||
class TestPlatformRuleSync:
|
||
"""
|
||
平台规则同步测试
|
||
|
||
验收标准 (PRD.md):
|
||
- 平台规则变更后 ≤ 1 工作日内更新
|
||
"""
|
||
|
||
@pytest.mark.unit
|
||
def test_platform_rule_update_notification(self) -> None:
|
||
"""测试平台规则更新通知"""
|
||
# TODO: 实现平台规则同步
|
||
# sync_service = PlatformRuleSyncService()
|
||
#
|
||
# # 模拟抖音规则更新
|
||
# new_rules = {"forbidden_words": [{"word": "新违禁词", "category": "ad_law"}]}
|
||
# result = sync_service.sync_platform_rules("douyin", new_rules)
|
||
#
|
||
# assert result.updated
|
||
# assert result.notification_sent
|
||
pytest.skip("待实现:PlatformRuleSyncService")
|