基于项目需求文档(PRD.md, FeatureSummary.md, DevelopmentPlan.md, UIDesign.md, User_Role_Interfaces.md)编写的 TDD 测试用例。 后端测试 (Python/pytest): - 单元测试: rule_engine, brief_parser, timestamp_alignment, video_auditor, validators - 集成测试: API Brief, Video, Review 端点 - AI 模块测试: ASR, OCR, Logo 检测服务 - 全局 fixtures 和 pytest 配置 前端测试 (TypeScript/Vitest): - 工具函数测试: utils.test.ts - 组件测试: Button, VideoPlayer, ViolationList - Hooks 测试: useVideoAudit, useVideoPlayer, useAppeal - MSW mock handlers 配置 E2E 测试 (Playwright): - 认证流程测试 - 视频上传流程测试 - 视频审核流程测试 - 申诉流程测试 所有测试当前使用 pytest.skip() / it.skip() 作为占位符, 遵循 TDD 红灯阶段 - 等待实现代码后运行。 验收标准覆盖: - ASR WER ≤ 10% - OCR 准确率 ≥ 95% - Logo F1 ≥ 0.85 - 时间戳误差 ≤ 0.5s - 频次统计准确率 ≥ 95% Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
308 lines
10 KiB
Python
308 lines
10 KiB
Python
"""
|
||
OCR 服务单元测试
|
||
|
||
TDD 测试用例 - 基于 DevelopmentPlan.md 的验收标准
|
||
|
||
验收标准:
|
||
- 准确率 ≥ 95%(含复杂背景)
|
||
"""
|
||
|
||
import pytest
|
||
from typing import Any
|
||
|
||
# 导入待实现的模块(TDD 红灯阶段)
|
||
# from app.services.ai.ocr import OCRService, OCRResult, OCRDetection
|
||
|
||
|
||
class TestOCRService:
|
||
"""OCR 服务测试"""
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_ocr_service_initialization(self) -> None:
|
||
"""测试 OCR 服务初始化"""
|
||
# TODO: 实现 OCR 服务
|
||
# service = OCRService()
|
||
# assert service.is_ready()
|
||
# assert service.model_name is not None
|
||
pytest.skip("待实现:OCR 服务初始化")
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_ocr_extract_text_from_image(self) -> None:
|
||
"""测试从图片提取文字"""
|
||
# TODO: 实现文字提取
|
||
# service = OCRService()
|
||
# result = service.extract_text("tests/fixtures/images/text_sample.jpg")
|
||
#
|
||
# assert result.status == "success"
|
||
# assert len(result.detections) > 0
|
||
pytest.skip("待实现:图片文字提取")
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_ocr_output_format(self) -> None:
|
||
"""测试 OCR 输出格式"""
|
||
# TODO: 实现 OCR 服务
|
||
# service = OCRService()
|
||
# result = service.extract_text("tests/fixtures/images/text_sample.jpg")
|
||
#
|
||
# # 验证输出结构
|
||
# assert hasattr(result, "detections")
|
||
# assert hasattr(result, "full_text")
|
||
#
|
||
# # 验证 detection 结构
|
||
# for detection in result.detections:
|
||
# assert hasattr(detection, "text")
|
||
# assert hasattr(detection, "confidence")
|
||
# assert hasattr(detection, "bbox")
|
||
# assert len(detection.bbox) == 4 # [x1, y1, x2, y2]
|
||
pytest.skip("待实现:OCR 输出格式")
|
||
|
||
|
||
class TestOCRAccuracy:
|
||
"""OCR 准确率测试"""
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_ocr_accuracy_threshold(self) -> None:
|
||
"""
|
||
测试 OCR 准确率阈值
|
||
|
||
验收标准:准确率 ≥ 95%
|
||
"""
|
||
# TODO: 使用标注测试集验证
|
||
# service = OCRService()
|
||
# test_cases = load_ocr_labeled_dataset()
|
||
#
|
||
# correct = 0
|
||
# for case in test_cases:
|
||
# result = service.extract_text(case["image_path"])
|
||
# if normalize_text(result.full_text) == normalize_text(case["ground_truth"]):
|
||
# correct += 1
|
||
#
|
||
# accuracy = correct / len(test_cases)
|
||
# assert accuracy >= 0.95, f"准确率 {accuracy:.2%} 低于阈值 95%"
|
||
pytest.skip("待实现:OCR 准确率测试")
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
@pytest.mark.parametrize("background_type,expected_accuracy", [
|
||
("simple_white", 0.99), # 简单白底
|
||
("solid_color", 0.98), # 纯色背景
|
||
("gradient", 0.95), # 渐变背景
|
||
("complex_image", 0.90), # 复杂图片背景
|
||
("video_frame", 0.90), # 视频帧
|
||
])
|
||
def test_ocr_accuracy_by_background(
|
||
self,
|
||
background_type: str,
|
||
expected_accuracy: float,
|
||
) -> None:
|
||
"""测试不同背景类型的 OCR 准确率"""
|
||
# TODO: 实现分背景类型测试
|
||
# service = OCRService()
|
||
# test_cases = load_ocr_test_set_by_background(background_type)
|
||
#
|
||
# accuracy = calculate_ocr_accuracy(service, test_cases)
|
||
# assert accuracy >= expected_accuracy
|
||
pytest.skip(f"待实现:{background_type} OCR 准确率测试")
|
||
|
||
|
||
class TestOCRChinese:
|
||
"""中文 OCR 测试"""
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_simplified_chinese_recognition(self) -> None:
|
||
"""测试简体中文识别"""
|
||
# TODO: 实现简体中文测试
|
||
# service = OCRService()
|
||
# result = service.extract_text("tests/fixtures/images/simplified_chinese.jpg")
|
||
#
|
||
# assert "测试" in result.full_text
|
||
pytest.skip("待实现:简体中文识别")
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_traditional_chinese_recognition(self) -> None:
|
||
"""测试繁体中文识别"""
|
||
# TODO: 实现繁体中文测试
|
||
# service = OCRService()
|
||
# result = service.extract_text("tests/fixtures/images/traditional_chinese.jpg")
|
||
#
|
||
# assert result.status == "success"
|
||
pytest.skip("待实现:繁体中文识别")
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_mixed_chinese_english(self) -> None:
|
||
"""测试中英混合文字识别"""
|
||
# TODO: 实现中英混合测试
|
||
# service = OCRService()
|
||
# result = service.extract_text("tests/fixtures/images/mixed_cn_en.jpg")
|
||
#
|
||
# # 应能同时识别中英文
|
||
# assert result.status == "success"
|
||
pytest.skip("待实现:中英混合识别")
|
||
|
||
|
||
class TestOCRVideoFrame:
|
||
"""视频帧 OCR 测试"""
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_ocr_video_subtitle(self) -> None:
|
||
"""测试视频字幕识别"""
|
||
# TODO: 实现字幕识别
|
||
# service = OCRService()
|
||
# result = service.extract_text("tests/fixtures/images/video_subtitle.jpg")
|
||
#
|
||
# assert len(result.detections) > 0
|
||
# # 字幕通常在画面下方
|
||
# subtitle_detection = result.detections[0]
|
||
# assert subtitle_detection.bbox[1] > 0.6 # y 坐标在下半部分
|
||
pytest.skip("待实现:视频字幕识别")
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_ocr_watermark_detection(self) -> None:
|
||
"""测试水印文字识别"""
|
||
# TODO: 实现水印识别
|
||
# service = OCRService()
|
||
# result = service.extract_text("tests/fixtures/images/with_watermark.jpg")
|
||
#
|
||
# # 应能检测到水印文字
|
||
# watermark_found = any(
|
||
# d.is_watermark for d in result.detections
|
||
# )
|
||
# assert watermark_found or len(result.detections) > 0
|
||
pytest.skip("待实现:水印文字识别")
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_ocr_batch_video_frames(self) -> None:
|
||
"""测试批量视频帧 OCR"""
|
||
# TODO: 实现批量处理
|
||
# service = OCRService()
|
||
# frame_paths = [
|
||
# f"tests/fixtures/images/frame_{i}.jpg"
|
||
# for i in range(10)
|
||
# ]
|
||
#
|
||
# results = service.batch_extract(frame_paths)
|
||
#
|
||
# assert len(results) == 10
|
||
# assert all(r.status == "success" for r in results)
|
||
pytest.skip("待实现:批量视频帧 OCR")
|
||
|
||
|
||
class TestOCRSpecialCases:
|
||
"""OCR 特殊情况测试"""
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_rotated_text(self) -> None:
|
||
"""测试旋转文字识别"""
|
||
# TODO: 实现旋转文字测试
|
||
# service = OCRService()
|
||
# result = service.extract_text("tests/fixtures/images/rotated_text.jpg")
|
||
#
|
||
# assert result.status == "success"
|
||
# assert len(result.detections) > 0
|
||
pytest.skip("待实现:旋转文字识别")
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_vertical_text(self) -> None:
|
||
"""测试竖排文字识别"""
|
||
# TODO: 实现竖排文字测试
|
||
# service = OCRService()
|
||
# result = service.extract_text("tests/fixtures/images/vertical_text.jpg")
|
||
#
|
||
# assert result.status == "success"
|
||
pytest.skip("待实现:竖排文字识别")
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_artistic_font(self) -> None:
|
||
"""测试艺术字体识别"""
|
||
# TODO: 实现艺术字体测试
|
||
# service = OCRService()
|
||
# result = service.extract_text("tests/fixtures/images/artistic_font.jpg")
|
||
#
|
||
# # 艺术字体准确率可能较低,但应能识别
|
||
# assert result.status == "success"
|
||
pytest.skip("待实现:艺术字体识别")
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_no_text_image(self) -> None:
|
||
"""测试无文字图片"""
|
||
# TODO: 实现无文字测试
|
||
# service = OCRService()
|
||
# result = service.extract_text("tests/fixtures/images/no_text.jpg")
|
||
#
|
||
# assert result.status == "success"
|
||
# assert len(result.detections) == 0
|
||
# assert result.full_text == ""
|
||
pytest.skip("待实现:无文字图片处理")
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.unit
|
||
def test_blurry_text(self) -> None:
|
||
"""测试模糊文字识别"""
|
||
# TODO: 实现模糊文字测试
|
||
# service = OCRService()
|
||
# result = service.extract_text("tests/fixtures/images/blurry_text.jpg")
|
||
#
|
||
# # 模糊文字可能识别失败或置信度低
|
||
# if result.status == "success" and len(result.detections) > 0:
|
||
# avg_confidence = sum(d.confidence for d in result.detections) / len(result.detections)
|
||
# assert avg_confidence < 0.9 # 置信度应较低
|
||
pytest.skip("待实现:模糊文字识别")
|
||
|
||
|
||
class TestOCRPerformance:
|
||
"""OCR 性能测试"""
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.performance
|
||
def test_ocr_processing_speed(self) -> None:
|
||
"""测试 OCR 处理速度"""
|
||
# TODO: 实现性能测试
|
||
# import time
|
||
#
|
||
# service = OCRService()
|
||
#
|
||
# # 标准 1080p 图片
|
||
# start_time = time.time()
|
||
# result = service.extract_text("tests/fixtures/images/1080p_sample.jpg")
|
||
# processing_time = time.time() - start_time
|
||
#
|
||
# # 单张图片处理应 < 1 秒
|
||
# assert processing_time < 1.0, \
|
||
# f"处理时间 {processing_time:.2f}s 超过阈值 1s"
|
||
pytest.skip("待实现:OCR 处理速度测试")
|
||
|
||
@pytest.mark.ai
|
||
@pytest.mark.performance
|
||
def test_ocr_batch_processing_speed(self) -> None:
|
||
"""测试批量 OCR 处理速度"""
|
||
# TODO: 实现批量性能测试
|
||
# import time
|
||
#
|
||
# service = OCRService()
|
||
# frame_paths = [
|
||
# f"tests/fixtures/images/frame_{i}.jpg"
|
||
# for i in range(30) # 30 帧 = 1 秒视频 @ 30fps
|
||
# ]
|
||
#
|
||
# start_time = time.time()
|
||
# results = service.batch_extract(frame_paths)
|
||
# processing_time = time.time() - start_time
|
||
#
|
||
# # 30 帧应在 5 秒内处理完成
|
||
# assert processing_time < 5.0
|
||
pytest.skip("待实现:批量 OCR 处理速度测试")
|