""" 巨量云图API封装 (T-023, T-027) 封装GetContentMaterialAnalysisInfo接口调用,获取视频分析数据。 T-027 修复: 1. 日期格式: YYYYMMDD (不是 YYYY-MM-DD) 2. Cookie 头: 直接使用 auth_token 完整值 3. industry_id: 字符串格式 ["12"] 4. A3 指标: API 返回字符串,需转为整数 """ import logging from datetime import datetime, timedelta from typing import Dict, Optional, Any, Union import httpx from app.config import settings from app.services.session_pool import ( session_pool, get_random_config, ) logger = logging.getLogger(__name__) # 巨量云图API基础URL YUNTU_BASE_URL = "https://yuntu.oceanengine.com" # 触发点ID列表(固定值) TRIGGER_POINT_IDS = ["610000", "610300", "610301"] class YuntuAPIError(Exception): """巨量云图API错误""" def __init__(self, message: str, status_code: int = 0, response_data: Any = None): self.message = message self.status_code = status_code self.response_data = response_data super().__init__(self.message) class SessionInvalidError(YuntuAPIError): """SessionID失效错误""" pass def _safe_int(value: Any, default: int = 0) -> int: """安全转换为整数,处理字符串类型的数字""" if value is None: return default if isinstance(value, int): return value if isinstance(value, str): try: return int(value) except ValueError: return default return default async def call_yuntu_api( item_id: str, publish_time: Union[datetime, None], industry_id: str, aadvid: str, auth_token: str, ) -> Dict[str, Any]: """ 调用巨量云图GetContentMaterialAnalysisInfo接口。 Args: item_id: 视频ID publish_time: 发布时间 industry_id: 行业ID(字符串格式) aadvid: 广告主ID(URL参数) auth_token: Cookie完整值(如 "sessionid=xxx") Returns: Dict: API响应数据 Raises: SessionInvalidError: SessionID失效时抛出 YuntuAPIError: API调用失败时抛出 """ # 处理 publish_time if publish_time is None: publish_time = datetime.now() # T-027: 日期格式必须为 YYYYMMDD start_date = publish_time.strftime("%Y%m%d") end_date = (publish_time + timedelta(days=30)).strftime("%Y%m%d") # T-027: industry_id_list 为字符串数组 industry_id_list = [str(industry_id)] if industry_id else [] request_data = { "is_my_video": "0", "object_id": item_id, "object_type": 2, "start_date": start_date, "end_date": end_date, "assist_type": 3, "assist_video_type": 3, "industry_id_list": industry_id_list, "trigger_point_id_list": TRIGGER_POINT_IDS, } # T-027: Cookie 直接使用 auth_token 完整值 headers = { "Content-Type": "application/json", "Cookie": auth_token, } # URL 带 aadvid 参数 url = f"{YUNTU_BASE_URL}/yuntu_common/api/content/trigger_analysis/GetContentMaterialAnalysisInfo?aadvid={aadvid}" try: async with httpx.AsyncClient(timeout=settings.YUNTU_API_TIMEOUT) as client: response = await client.post( url, json=request_data, headers=headers, ) # 检查SessionID是否失效 if response.status_code in (401, 403): logger.warning(f"Session invalid: {auth_token[:20]}...") raise SessionInvalidError( f"Session invalid: {response.status_code}", status_code=response.status_code, ) if response.status_code != 200: raise YuntuAPIError( f"API returned {response.status_code}", status_code=response.status_code, response_data=response.text, ) data = response.json() # 检查业务错误 status = data.get("status", data.get("code", 0)) if status != 0: error_msg = data.get("msg", data.get("message", "Unknown error")) raise YuntuAPIError( f"API business error: {error_msg}", status_code=response.status_code, response_data=data, ) return data except httpx.TimeoutException: logger.error(f"Yuntu API timeout for item_id: {item_id}") raise YuntuAPIError("API request timeout") except httpx.RequestError as e: logger.error(f"Yuntu API request error: {e}") raise YuntuAPIError(f"API request error: {e}") async def get_video_analysis( item_id: str, publish_time: datetime, industry_id: str, max_retries: int = 3, ) -> Dict[str, Any]: """ 获取视频分析数据(随机选取配置)。 T-027: 改为随机选取任意一组 aadvid/auth_token,不按 brand_id 匹配。 Args: item_id: 视频ID publish_time: 发布时间 industry_id: 行业ID(来自数据库中的视频) max_retries: 最大重试次数 Returns: Dict: 视频分析数据 Raises: YuntuAPIError: API调用失败时抛出 """ last_error = None for attempt in range(max_retries): # T-027: 随机选取任意一组配置 config = await get_random_config() if config is None: last_error = YuntuAPIError("No config available in session pool") logger.warning(f"No config available, attempt {attempt + 1}/{max_retries}") continue logger.info( f"Using random config: aadvid={config['aadvid']}, attempt {attempt + 1}" ) try: result = await call_yuntu_api( item_id=item_id, publish_time=publish_time, industry_id=industry_id, # T-027: 使用数据库中视频的 industry_id aadvid=config["aadvid"], auth_token=config["auth_token"], ) return result except SessionInvalidError: # SessionID失效,从池中移除 session_pool.remove_by_auth_token(config["auth_token"]) logger.info( f"Session invalid, attempt {attempt + 1}/{max_retries}" ) last_error = SessionInvalidError("Session invalid after retries") continue except YuntuAPIError as e: last_error = e logger.error(f"Yuntu API error on attempt {attempt + 1}: {e.message}") # 非 session 错误不重试 break raise last_error or YuntuAPIError("Unknown error after retries") def parse_analysis_response(data: Dict[str, Any]) -> Dict[str, Any]: """ 解析巨量云图API响应,提取关键指标。 T-027: A3 指标在 API 响应中是字符串类型,需要转为整数。 Args: data: API原始响应数据 Returns: Dict: 结构化的分析数据 """ result_data = data.get("data", {}) or {} return { # 触达指标 "total_show_cnt": _safe_int(result_data.get("total_show_cnt")), "natural_show_cnt": _safe_int(result_data.get("natural_show_cnt")), "ad_show_cnt": _safe_int(result_data.get("ad_show_cnt")), "total_play_cnt": _safe_int(result_data.get("total_play_cnt")), "natural_play_cnt": _safe_int(result_data.get("natural_play_cnt")), "ad_play_cnt": _safe_int(result_data.get("ad_play_cnt")), "effective_play_cnt": _safe_int(result_data.get("effective_play_cnt")), # A3指标 - T-027: 转为整数 "a3_increase_cnt": _safe_int(result_data.get("a3_increase_cnt")), "ad_a3_increase_cnt": _safe_int(result_data.get("ad_a3_increase_cnt")), "natural_a3_increase_cnt": _safe_int(result_data.get("natural_a3_increase_cnt")), # 搜索指标 "after_view_search_uv": _safe_int(result_data.get("after_view_search_uv")), "after_view_search_pv": _safe_int(result_data.get("after_view_search_pv")), "brand_search_uv": _safe_int(result_data.get("brand_search_uv")), "product_search_uv": _safe_int(result_data.get("product_search_uv")), "return_search_cnt": _safe_int(result_data.get("return_search_cnt")), # 费用指标 "cost": _safe_int(result_data.get("cost")), "natural_cost": _safe_int(result_data.get("natural_cost")), "ad_cost": _safe_int(result_data.get("ad_cost")), }