wangshaoqing 321bbff1c1 添加学习教程目录(learning/)及对应单元测试
- 新增 DrissionPage 基础教程(01-05)
- 新增 Playwright 基础教程(01-05)
- 新增网络基础教程(01-05)
- 新增 test_learning_examples.py 单元测试
- 更新 .gitignore 忽略 learning/*/output/ 目录
2026-05-06 16:39:55 +08:00

105 lines
2.5 KiB
Python

"""
练习 04: 解析 aweme_list。
这一题不连接浏览器,只练 JSON 结构解析。
目标:
1. 看懂 `aweme_list` 的层级
2. 提取 title / video_id / video_url
3. 返回一个列表,列表里每项都是字典
预期输出格式:
[
{
"title": "...",
"video_id": "...",
"video_url": "..."
}
]
运行:
./.venv/bin/python learning/drissionpage_basics/04_parse_aweme.py
"""
from __future__ import annotations
from typing import Any
SAMPLE_PAYLOAD: dict[str, Any] = {
"aweme_list": [
{
"aweme_id": "7500000000000000001",
"desc": "第一个示例视频",
"video": {
"play_addr": {
"url_list": [
"https://example.com/play/first",
"https://v11-weba.douyinvod.com/example/first.mp4",
]
}
},
},
{
"aweme_id": "7500000000000000002",
"desc": "第二个示例视频",
"video": {
"play_addr": {
"url_list": [
"https://v26-web.douyinvod.com/example/second.mp4",
]
}
},
},
]
}
def choose_video_url(url_list: list[str]) -> str:
for url in url_list:
if "douyinvod.com" in url:
return url
if url_list:
return url_list[0]
raise ValueError("url_list 为空,无法选择视频地址。")
def parse_aweme_items(body: dict[str, Any]) -> list[dict[str, str]]:
aweme_list = body.get("aweme_list")
if not isinstance(aweme_list, list):
raise ValueError("body 里缺少 aweme_list。")
items: list[dict[str, str]] = []
for aweme in aweme_list:
if not isinstance(aweme, dict):
continue
video_id = str(aweme.get("aweme_id") or "").strip()
if not video_id:
continue
title = str(aweme.get("desc") or "").strip() or "untitled"
video = aweme.get("video") or {}
play_addr = video.get("play_addr") or {}
url_list = play_addr.get("url_list") or []
if not isinstance(url_list, list) or not url_list:
continue
items.append(
{
"title": title,
"video_id": video_id,
"video_url": choose_video_url([str(url) for url in url_list]),
}
)
return items
def main() -> None:
items = parse_aweme_items(SAMPLE_PAYLOAD)
print(items)
if __name__ == "__main__":
main()