""" 练习 04: 解析 aweme_list。 这一题不连接浏览器,只练 JSON 结构解析。 目标: 1. 看懂 `aweme_list` 的层级 2. 提取 title / video_id / video_url 3. 返回一个列表,列表里每项都是字典 预期输出格式: [ { "title": "...", "video_id": "...", "video_url": "..." } ] 运行: ./.venv/bin/python learning/drissionpage_basics/04_parse_aweme.py """ from __future__ import annotations from typing import Any SAMPLE_PAYLOAD: dict[str, Any] = { "aweme_list": [ { "aweme_id": "7500000000000000001", "desc": "第一个示例视频", "video": { "play_addr": { "url_list": [ "https://example.com/play/first", "https://v11-weba.douyinvod.com/example/first.mp4", ] } }, }, { "aweme_id": "7500000000000000002", "desc": "第二个示例视频", "video": { "play_addr": { "url_list": [ "https://v26-web.douyinvod.com/example/second.mp4", ] } }, }, ] } def choose_video_url(url_list: list[str]) -> str: for url in url_list: if "douyinvod.com" in url: return url if url_list: return url_list[0] raise ValueError("url_list 为空,无法选择视频地址。") def parse_aweme_items(body: dict[str, Any]) -> list[dict[str, str]]: aweme_list = body.get("aweme_list") if not isinstance(aweme_list, list): raise ValueError("body 里缺少 aweme_list。") items: list[dict[str, str]] = [] for aweme in aweme_list: if not isinstance(aweme, dict): continue video_id = str(aweme.get("aweme_id") or "").strip() if not video_id: continue title = str(aweme.get("desc") or "").strip() or "untitled" video = aweme.get("video") or {} play_addr = video.get("play_addr") or {} url_list = play_addr.get("url_list") or [] if not isinstance(url_list, list) or not url_list: continue items.append( { "title": title, "video_id": video_id, "video_url": choose_video_url([str(url) for url in url_list]), } ) return items def main() -> None: items = parse_aweme_items(SAMPLE_PAYLOAD) print(items) if __name__ == "__main__": main()