videos1.0/backend/tests/unit/test_brief_parser.py
Your Name 040aada160 feat: 添加全面的 TDD 测试套件框架
基于项目需求文档(PRD.md, FeatureSummary.md, DevelopmentPlan.md,
UIDesign.md, User_Role_Interfaces.md)编写的 TDD 测试用例。

后端测试 (Python/pytest):
- 单元测试: rule_engine, brief_parser, timestamp_alignment,
  video_auditor, validators
- 集成测试: API Brief, Video, Review 端点
- AI 模块测试: ASR, OCR, Logo 检测服务
- 全局 fixtures 和 pytest 配置

前端测试 (TypeScript/Vitest):
- 工具函数测试: utils.test.ts
- 组件测试: Button, VideoPlayer, ViolationList
- Hooks 测试: useVideoAudit, useVideoPlayer, useAppeal
- MSW mock handlers 配置

E2E 测试 (Playwright):
- 认证流程测试
- 视频上传流程测试
- 视频审核流程测试
- 申诉流程测试

所有测试当前使用 pytest.skip() / it.skip() 作为占位符,
遵循 TDD 红灯阶段 - 等待实现代码后运行。

验收标准覆盖:
- ASR WER ≤ 10%
- OCR 准确率 ≥ 95%
- Logo F1 ≥ 0.85
- 时间戳误差 ≤ 0.5s
- 频次统计准确率 ≥ 95%

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 17:22:24 +08:00

340 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Brief 解析模块单元测试
TDD 测试用例 - 基于 FeatureSummary.md (F-01, F-02) 的验收标准
验收标准:
- 图文混排解析准确率 > 90%
- 支持 PDF/Word/Excel/PPT/图片格式
- 支持飞书/Notion 在线文档链接
"""
import pytest
from typing import Any
from pathlib import Path
# 导入待实现的模块TDD 红灯阶段)
# from app.services.brief_parser import BriefParser, BriefParsingResult
class TestBriefParser:
"""
Brief 解析器测试
验收标准 (FeatureSummary.md F-01):
- 解析准确率 > 90%
"""
@pytest.mark.unit
def test_extract_selling_points(self) -> None:
"""测试卖点提取"""
brief_content = """
产品核心卖点:
1. 24小时持妆
2. 天然成分
3. 敏感肌适用
"""
# TODO: 实现 BriefParser
# parser = BriefParser()
# result = parser.extract_selling_points(brief_content)
#
# assert len(result.selling_points) >= 3
# assert "24小时持妆" in [sp.text for sp in result.selling_points]
# assert "天然成分" in [sp.text for sp in result.selling_points]
# assert "敏感肌适用" in [sp.text for sp in result.selling_points]
pytest.skip("待实现BriefParser.extract_selling_points")
@pytest.mark.unit
def test_extract_forbidden_words(self) -> None:
"""测试禁忌词提取"""
brief_content = """
禁止使用的词汇:
- 药用
- 治疗
- 根治
- 最有效
"""
# TODO: 实现 BriefParser
# parser = BriefParser()
# result = parser.extract_forbidden_words(brief_content)
#
# expected = {"药用", "治疗", "根治", "最有效"}
# assert set(w.word for w in result.forbidden_words) == expected
pytest.skip("待实现BriefParser.extract_forbidden_words")
@pytest.mark.unit
def test_extract_timing_requirements(self) -> None:
"""测试时序要求提取"""
brief_content = """
拍摄要求:
- 产品同框时长 > 5秒
- 品牌名提及次数 ≥ 3次
- 产品使用演示 ≥ 10秒
"""
# TODO: 实现 BriefParser
# parser = BriefParser()
# result = parser.extract_timing_requirements(brief_content)
#
# assert len(result.timing_requirements) >= 3
#
# product_visible = next(
# (t for t in result.timing_requirements if t.type == "product_visible"),
# None
# )
# assert product_visible is not None
# assert product_visible.min_duration_seconds == 5
#
# brand_mention = next(
# (t for t in result.timing_requirements if t.type == "brand_mention"),
# None
# )
# assert brand_mention is not None
# assert brand_mention.min_frequency == 3
pytest.skip("待实现BriefParser.extract_timing_requirements")
@pytest.mark.unit
def test_extract_brand_tone(self) -> None:
"""测试品牌调性提取"""
brief_content = """
品牌调性:
- 风格:年轻活力、专业可信
- 目标人群18-35岁女性
- 表达方式:亲和、不做作
"""
# TODO: 实现 BriefParser
# parser = BriefParser()
# result = parser.extract_brand_tone(brief_content)
#
# assert result.brand_tone is not None
# assert "年轻活力" in result.brand_tone.style
# assert "专业可信" in result.brand_tone.style
pytest.skip("待实现BriefParser.extract_brand_tone")
@pytest.mark.unit
def test_full_brief_parsing_accuracy(self) -> None:
"""
测试完整 Brief 解析准确率
验收标准:准确率 > 90%
"""
brief_content = """
# 品牌 Brief - XX美妆产品
## 产品卖点
1. 24小时持妆效果
2. 添加天然植物成分
3. 通过敏感肌测试
## 禁用词汇
- 药用、治疗、根治
- 最好、第一、绝对
## 拍摄要求
- 产品正面展示 ≥ 5秒
- 品牌名提及 ≥ 3次
## 品牌调性
年轻、时尚、专业
"""
# TODO: 实现 BriefParser
# parser = BriefParser()
# result = parser.parse(brief_content)
#
# # 验证解析完整性
# assert len(result.selling_points) >= 3
# assert len(result.forbidden_words) >= 4
# assert len(result.timing_requirements) >= 2
# assert result.brand_tone is not None
#
# # 验证准确率
# assert result.accuracy_rate >= 0.90
pytest.skip("待实现BriefParser.parse")
class TestBriefFileFormats:
"""
Brief 文件格式支持测试
验收标准 (FeatureSummary.md F-01):
- 支持 PDF/Word/Excel/PPT/图片
"""
@pytest.mark.unit
@pytest.mark.parametrize("file_format,mime_type", [
("pdf", "application/pdf"),
("docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
("xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
("pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation"),
("png", "image/png"),
("jpg", "image/jpeg"),
])
def test_supported_file_formats(self, file_format: str, mime_type: str) -> None:
"""测试支持的文件格式"""
# TODO: 实现文件格式验证
# validator = BriefFileValidator()
# assert validator.is_supported(file_format)
# assert validator.get_mime_type(file_format) == mime_type
pytest.skip("待实现BriefFileValidator")
@pytest.mark.unit
@pytest.mark.parametrize("file_format", [
"exe", "zip", "rar", "mp4", "mp3",
])
def test_unsupported_file_formats(self, file_format: str) -> None:
"""测试不支持的文件格式"""
# TODO: 实现文件格式验证
# validator = BriefFileValidator()
# assert not validator.is_supported(file_format)
pytest.skip("待实现:不支持的格式验证")
class TestOnlineDocumentImport:
"""
在线文档导入测试
验收标准 (FeatureSummary.md F-02):
- 支持飞书/Notion 分享链接
- 仅支持授权的分享链接
"""
@pytest.mark.unit
@pytest.mark.parametrize("url,expected_valid", [
# 飞书文档
("https://docs.feishu.cn/docs/abc123", True),
("https://abc.feishu.cn/docx/xyz789", True),
# Notion 文档
("https://www.notion.so/workspace/page-abc123", True),
("https://notion.so/page-xyz789", True),
# 不支持的链接
("https://google.com/doc/123", False),
("https://docs.google.com/document/d/123", False), # Google Docs 暂不支持
("https://example.com/brief.pdf", False),
])
def test_online_document_url_validation(self, url: str, expected_valid: bool) -> None:
"""测试在线文档 URL 验证"""
# TODO: 实现 URL 验证器
# validator = OnlineDocumentValidator()
# assert validator.is_valid(url) == expected_valid
pytest.skip("待实现OnlineDocumentValidator")
@pytest.mark.unit
def test_unauthorized_link_returns_error(self) -> None:
"""测试无权限链接返回明确错误"""
unauthorized_url = "https://docs.feishu.cn/docs/restricted-doc"
# TODO: 实现在线文档导入
# importer = OnlineDocumentImporter()
# result = importer.import_document(unauthorized_url)
#
# assert result.status == "failed"
# assert result.error_code == "ACCESS_DENIED"
# assert "权限" in result.error_message or "access" in result.error_message.lower()
pytest.skip("待实现OnlineDocumentImporter")
class TestBriefParsingEdgeCases:
"""
Brief 解析边界情况测试
"""
@pytest.mark.unit
def test_encrypted_pdf_handling(self) -> None:
"""测试加密 PDF 处理 - 应降级提示手动输入"""
# TODO: 实现加密 PDF 检测
# parser = BriefParser()
# result = parser.parse_file("encrypted.pdf")
#
# assert result.status == "failed"
# assert result.error_code == "ENCRYPTED_FILE"
# assert "手动输入" in result.fallback_suggestion
pytest.skip("待实现:加密 PDF 处理")
@pytest.mark.unit
def test_empty_brief_handling(self) -> None:
"""测试空 Brief 处理"""
# TODO: 实现空内容处理
# parser = BriefParser()
# result = parser.parse("")
#
# assert result.status == "failed"
# assert result.error_code == "EMPTY_CONTENT"
pytest.skip("待实现:空 Brief 处理")
@pytest.mark.unit
def test_non_chinese_brief_handling(self) -> None:
"""测试非中文 Brief 处理"""
english_brief = """
Product Features:
1. 24-hour long-lasting
2. Natural ingredients
"""
# TODO: 实现多语言检测
# parser = BriefParser()
# result = parser.parse(english_brief)
#
# # 应该能处理英文,但提示语言
# assert result.detected_language == "en"
pytest.skip("待实现:多语言 Brief 处理")
@pytest.mark.unit
def test_image_brief_with_text_extraction(self) -> None:
"""测试图片 Brief 的文字提取 (OCR)"""
# TODO: 实现图片 Brief OCR
# parser = BriefParser()
# result = parser.parse_image("brief_screenshot.png")
#
# assert result.status == "success"
# assert len(result.extracted_text) > 0
pytest.skip("待实现:图片 Brief OCR")
class TestBriefParsingOutput:
"""
Brief 解析输出格式测试
"""
@pytest.mark.unit
def test_output_json_structure(self) -> None:
"""测试输出 JSON 结构符合规范"""
brief_content = "测试 Brief 内容"
# TODO: 实现 BriefParser
# parser = BriefParser()
# result = parser.parse(brief_content)
# output = result.to_json()
#
# # 验证必需字段
# assert "selling_points" in output
# assert "forbidden_words" in output
# assert "brand_tone" in output
# assert "timing_requirements" in output
# assert "platform" in output
# assert "region" in output
#
# # 验证字段类型
# assert isinstance(output["selling_points"], list)
# assert isinstance(output["forbidden_words"], list)
pytest.skip("待实现:输出 JSON 结构验证")
@pytest.mark.unit
def test_selling_point_structure(self) -> None:
"""测试卖点数据结构"""
# TODO: 实现卖点结构验证
# expected_fields = ["text", "priority", "evidence_snippet"]
#
# parser = BriefParser()
# result = parser.parse("卖点测试")
#
# for sp in result.selling_points:
# for field in expected_fields:
# assert hasattr(sp, field)
pytest.skip("待实现:卖点结构验证")