- 新增 DrissionPage 基础教程(01-05) - 新增 Playwright 基础教程(01-05) - 新增网络基础教程(01-05) - 新增 test_learning_examples.py 单元测试 - 更新 .gitignore 忽略 learning/*/output/ 目录
105 lines
2.5 KiB
Python
105 lines
2.5 KiB
Python
"""
|
|
练习 04: 解析 aweme_list。
|
|
|
|
这一题不连接浏览器,只练 JSON 结构解析。
|
|
|
|
目标:
|
|
1. 看懂 `aweme_list` 的层级
|
|
2. 提取 title / video_id / video_url
|
|
3. 返回一个列表,列表里每项都是字典
|
|
|
|
预期输出格式:
|
|
[
|
|
{
|
|
"title": "...",
|
|
"video_id": "...",
|
|
"video_url": "..."
|
|
}
|
|
]
|
|
|
|
运行:
|
|
./.venv/bin/python learning/drissionpage_basics/04_parse_aweme.py
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
SAMPLE_PAYLOAD: dict[str, Any] = {
|
|
"aweme_list": [
|
|
{
|
|
"aweme_id": "7500000000000000001",
|
|
"desc": "第一个示例视频",
|
|
"video": {
|
|
"play_addr": {
|
|
"url_list": [
|
|
"https://example.com/play/first",
|
|
"https://v11-weba.douyinvod.com/example/first.mp4",
|
|
]
|
|
}
|
|
},
|
|
},
|
|
{
|
|
"aweme_id": "7500000000000000002",
|
|
"desc": "第二个示例视频",
|
|
"video": {
|
|
"play_addr": {
|
|
"url_list": [
|
|
"https://v26-web.douyinvod.com/example/second.mp4",
|
|
]
|
|
}
|
|
},
|
|
},
|
|
]
|
|
}
|
|
|
|
|
|
def choose_video_url(url_list: list[str]) -> str:
|
|
for url in url_list:
|
|
if "douyinvod.com" in url:
|
|
return url
|
|
if url_list:
|
|
return url_list[0]
|
|
raise ValueError("url_list 为空,无法选择视频地址。")
|
|
|
|
|
|
def parse_aweme_items(body: dict[str, Any]) -> list[dict[str, str]]:
|
|
aweme_list = body.get("aweme_list")
|
|
if not isinstance(aweme_list, list):
|
|
raise ValueError("body 里缺少 aweme_list。")
|
|
|
|
items: list[dict[str, str]] = []
|
|
for aweme in aweme_list:
|
|
if not isinstance(aweme, dict):
|
|
continue
|
|
|
|
video_id = str(aweme.get("aweme_id") or "").strip()
|
|
if not video_id:
|
|
continue
|
|
|
|
title = str(aweme.get("desc") or "").strip() or "untitled"
|
|
video = aweme.get("video") or {}
|
|
play_addr = video.get("play_addr") or {}
|
|
url_list = play_addr.get("url_list") or []
|
|
if not isinstance(url_list, list) or not url_list:
|
|
continue
|
|
|
|
items.append(
|
|
{
|
|
"title": title,
|
|
"video_id": video_id,
|
|
"video_url": choose_video_url([str(url) for url in url_list]),
|
|
}
|
|
)
|
|
|
|
return items
|
|
|
|
|
|
def main() -> None:
|
|
items = parse_aweme_items(SAMPLE_PAYLOAD)
|
|
print(items)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|