wangshaoqing 321bbff1c1 添加学习教程目录(learning/)及对应单元测试
- 新增 DrissionPage 基础教程(01-05)
- 新增 Playwright 基础教程(01-05)
- 新增网络基础教程(01-05)
- 新增 test_learning_examples.py 单元测试
- 更新 .gitignore 忽略 learning/*/output/ 目录
2026-05-06 16:39:55 +08:00

88 lines
2.7 KiB
Python

"""
练习 05: 用 Playwright 监听到的接口数据配合 requests 下载视频。
目标:
1. 用 Playwright 找到目标接口响应
2. 从 JSON 里提取第一个可下载视频
3. 用 `requests` 把 mp4 写到本地
4. 输出最终文件路径
建议:
- 浏览器负责“拿到页面里的接口数据”
- `requests` 负责“把真实 mp4 下载下来”
- 先拿第一条视频练通,不要一开始就做批量下载
运行:
./.venv/bin/python learning/playwright_basics/05_download_video.py
"""
from __future__ import annotations
import sys
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from Douyin import build_headers
from Douyin import DEFAULT_USER_URL
from Douyin import download_video
from Douyin import parse_aweme_items
from Douyin import sanitize_filename
OUTPUT_DIR = Path("learning/playwright_basics/output")
USER_DATA_DIR = PROJECT_ROOT / ".playwright-douyin-profile"
LISTEN_TARGET = "web/aweme/post/"
def build_output_path(title: str, video_id: str, output_dir: Path = OUTPUT_DIR) -> Path:
safe_title = sanitize_filename(title, fallback="playwright-video")
return output_dir / f"{safe_title}-{video_id}.mp4"
def extract_first_item_from_payload(payload) -> dict[str, str]:
items = parse_aweme_items(payload)
if not items:
raise RuntimeError("当前接口里没有可下载视频,请先确认页面已加载出作品。")
return items[0]
def main() -> None:
import requests
from playwright.sync_api import sync_playwright
with sync_playwright() as playwright:
context = playwright.chromium.launch_persistent_context(
user_data_dir=str(USER_DATA_DIR),
headless=False,
)
page = context.pages[0] if context.pages else context.new_page()
page.goto(DEFAULT_USER_URL)
print("[INFO] 页面已打开。请在必要时滚动一下,等待作品接口出现。")
with page.expect_response(lambda response: LISTEN_TARGET in response.url, timeout=20000) as response_info:
page.reload()
response = response_info.value
payload = response.json()
first_item = extract_first_item_from_payload(payload)
output_path = build_output_path(
title=first_item["title"],
video_id=first_item["video_id"],
output_dir=OUTPUT_DIR,
)
headers = build_headers(page.url)
download_video(
requests_module=requests,
headers=headers,
video_url=first_item["video_url"],
output_path=output_path,
)
print(f"[OK] 已下载到: {output_path}")
context.close()
if __name__ == "__main__":
main()