zfc f123f68be3 feat(video-analysis): 完成视频分析模块迭代任务
Bug 修复:
- T-019: 修复品牌API响应解析,正确解析 data[0].brand_name
- T-020: 添加品牌API Bearer Token认证

视频分析功能:
- T-021: SessionID池服务,从内部API获取Cookie列表
- T-022: SessionID自动重试,失效时自动切换重试
- T-023: 巨量云图API封装,支持超时和错误处理
- T-024: 视频分析数据接口 GET /api/v1/videos/{item_id}/analysis
- T-025: 数据库A3指标更新
- T-026: 视频分析前端页面,展示6大类25+指标

测试覆盖率:
- brand_api.py: 100%
- session_pool.py: 100%
- yuntu_api.py: 100%
- video_analysis.py: 99%

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-28 17:51:35 +08:00

229 lines
7.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
巨量云图API封装 (T-023)
封装GetContentMaterialAnalysisInfo接口调用获取视频分析数据。
"""
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any
import httpx
from app.config import settings
from app.services.session_pool import session_pool, get_session_with_retry
logger = logging.getLogger(__name__)
# 巨量云图API基础URL
YUNTU_BASE_URL = "https://yuntu.oceanengine.com"
# 触发点ID列表固定值
TRIGGER_POINT_IDS = ["610000", "610300", "610301"]
class YuntuAPIError(Exception):
"""巨量云图API错误"""
def __init__(self, message: str, status_code: int = 0, response_data: Any = None):
self.message = message
self.status_code = status_code
self.response_data = response_data
super().__init__(self.message)
class SessionInvalidError(YuntuAPIError):
"""SessionID失效错误"""
pass
async def call_yuntu_api(
item_id: str,
publish_time: datetime,
industry_id: str,
session_id: Optional[str] = None,
) -> Dict[str, Any]:
"""
调用巨量云图GetContentMaterialAnalysisInfo接口。
Args:
item_id: 视频ID
publish_time: 发布时间
industry_id: 行业ID
session_id: 可选的sessionid不提供则从池中获取
Returns:
Dict: API响应数据
Raises:
SessionInvalidError: SessionID失效时抛出
YuntuAPIError: API调用失败时抛出
"""
# 获取sessionid
if session_id is None:
session_id = await get_session_with_retry()
if session_id is None:
raise YuntuAPIError("Failed to get valid session")
# 构造请求参数
# end_date = start_date + 30天
start_date = publish_time.strftime("%Y-%m-%d")
end_date = (publish_time + timedelta(days=30)).strftime("%Y-%m-%d")
request_data = {
"is_my_video": "0",
"object_id": item_id,
"object_type": 2,
"start_date": start_date,
"end_date": end_date,
"assist_type": 3,
"assist_video_type": 3,
"industry_id_list": [industry_id] if industry_id else [],
"trigger_point_id_list": TRIGGER_POINT_IDS,
}
# 构造请求头
headers = {
"Content-Type": "application/json",
"Cookie": f"sessionid={session_id}",
}
try:
async with httpx.AsyncClient(timeout=settings.YUNTU_API_TIMEOUT) as client:
response = await client.post(
f"{YUNTU_BASE_URL}/yuntu_common/api/content/trigger_analysis/GetContentMaterialAnalysisInfo",
json=request_data,
headers=headers,
)
# 检查SessionID是否失效
if response.status_code in (401, 403):
logger.warning(f"Session invalid: {session_id[:8]}...")
raise SessionInvalidError(
f"Session invalid: {response.status_code}",
status_code=response.status_code,
)
if response.status_code != 200:
raise YuntuAPIError(
f"API returned {response.status_code}",
status_code=response.status_code,
response_data=response.text,
)
data = response.json()
# 检查业务错误码
if data.get("code") != 0:
error_msg = data.get("message", "Unknown error")
raise YuntuAPIError(
f"API business error: {error_msg}",
status_code=response.status_code,
response_data=data,
)
return data
except httpx.TimeoutException:
logger.error(f"Yuntu API timeout for item_id: {item_id}")
raise YuntuAPIError("API request timeout")
except httpx.RequestError as e:
logger.error(f"Yuntu API request error: {e}")
raise YuntuAPIError(f"API request error: {e}")
async def get_video_analysis(
item_id: str,
publish_time: datetime,
industry_id: str,
max_retries: int = 3,
) -> Dict[str, Any]:
"""
获取视频分析数据支持SessionID失效自动重试 (T-022)。
Args:
item_id: 视频ID
publish_time: 发布时间
industry_id: 行业ID
max_retries: 最大重试次数
Returns:
Dict: 视频分析数据
Raises:
YuntuAPIError: 所有重试失败后抛出
"""
last_error = None
for attempt in range(max_retries):
# 从池中获取sessionid
session_id = await get_session_with_retry()
if session_id is None:
last_error = YuntuAPIError("Failed to get valid session")
continue
try:
result = await call_yuntu_api(
item_id=item_id,
publish_time=publish_time,
industry_id=industry_id,
session_id=session_id,
)
return result
except SessionInvalidError:
# SessionID失效从池中移除并重试
session_pool.remove(session_id)
logger.info(
f"Session invalid, retrying... attempt {attempt + 1}/{max_retries}"
)
last_error = SessionInvalidError("All sessions invalid")
continue
except YuntuAPIError as e:
last_error = e
logger.error(f"Yuntu API error on attempt {attempt + 1}: {e.message}")
# 非SessionID问题不再重试
break
raise last_error or YuntuAPIError("Unknown error after retries")
def parse_analysis_response(data: Dict[str, Any]) -> Dict[str, Any]:
"""
解析巨量云图API响应提取关键指标。
Args:
data: API原始响应数据
Returns:
Dict: 结构化的分析数据
"""
result_data = data.get("data", {})
return {
# 触达指标
"total_show_cnt": result_data.get("total_show_cnt", 0), # 总曝光数
"natural_show_cnt": result_data.get("natural_show_cnt", 0), # 自然曝光数
"ad_show_cnt": result_data.get("ad_show_cnt", 0), # 加热曝光数
"total_play_cnt": result_data.get("total_play_cnt", 0), # 总播放数
"natural_play_cnt": result_data.get("natural_play_cnt", 0), # 自然播放数
"ad_play_cnt": result_data.get("ad_play_cnt", 0), # 加热播放数
"effective_play_cnt": result_data.get("effective_play_cnt", 0), # 有效播放数
# A3指标
"a3_increase_cnt": result_data.get("a3_increase_cnt", 0), # 新增A3
"ad_a3_increase_cnt": result_data.get("ad_a3_increase_cnt", 0), # 加热新增A3
"natural_a3_increase_cnt": result_data.get("natural_a3_increase_cnt", 0), # 自然新增A3
# 搜索指标
"after_view_search_uv": result_data.get("after_view_search_uv", 0), # 看后搜人数
"after_view_search_pv": result_data.get("after_view_search_pv", 0), # 看后搜次数
"brand_search_uv": result_data.get("brand_search_uv", 0), # 品牌搜索人数
"product_search_uv": result_data.get("product_search_uv", 0), # 商品搜索人数
"return_search_cnt": result_data.get("return_search_cnt", 0), # 回搜次数
# 费用指标
"cost": result_data.get("cost", 0), # 总花费
"natural_cost": result_data.get("natural_cost", 0), # 自然花费
"ad_cost": result_data.get("ad_cost", 0), # 加热花费
}