""" 练习 05: 用 Playwright 监听到的接口数据配合 requests 下载视频。 目标: 1. 用 Playwright 找到目标接口响应 2. 从 JSON 里提取第一个可下载视频 3. 用 `requests` 把 mp4 写到本地 4. 输出最终文件路径 建议: - 浏览器负责“拿到页面里的接口数据” - `requests` 负责“把真实 mp4 下载下来” - 先拿第一条视频练通,不要一开始就做批量下载 运行: ./.venv/bin/python learning/playwright_basics/05_download_video.py """ from __future__ import annotations import sys from pathlib import Path PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) from Douyin import build_headers from Douyin import DEFAULT_USER_URL from Douyin import download_video from Douyin import parse_aweme_items from Douyin import sanitize_filename OUTPUT_DIR = Path("learning/playwright_basics/output") USER_DATA_DIR = PROJECT_ROOT / ".playwright-douyin-profile" LISTEN_TARGET = "web/aweme/post/" def build_output_path(title: str, video_id: str, output_dir: Path = OUTPUT_DIR) -> Path: safe_title = sanitize_filename(title, fallback="playwright-video") return output_dir / f"{safe_title}-{video_id}.mp4" def extract_first_item_from_payload(payload) -> dict[str, str]: items = parse_aweme_items(payload) if not items: raise RuntimeError("当前接口里没有可下载视频,请先确认页面已加载出作品。") return items[0] def main() -> None: import requests from playwright.sync_api import sync_playwright with sync_playwright() as playwright: context = playwright.chromium.launch_persistent_context( user_data_dir=str(USER_DATA_DIR), headless=False, ) page = context.pages[0] if context.pages else context.new_page() page.goto(DEFAULT_USER_URL) print("[INFO] 页面已打开。请在必要时滚动一下,等待作品接口出现。") with page.expect_response(lambda response: LISTEN_TARGET in response.url, timeout=20000) as response_info: page.reload() response = response_info.value payload = response.json() first_item = extract_first_item_from_payload(payload) output_path = build_output_path( title=first_item["title"], video_id=first_item["video_id"], output_dir=OUTPUT_DIR, ) headers = build_headers(page.url) download_video( requests_module=requests, headers=headers, video_url=first_item["video_url"], output_path=output_path, ) print(f"[OK] 已下载到: {output_path}") context.close() if __name__ == "__main__": main()