wangshaoqing 321bbff1c1 添加学习教程目录(learning/)及对应单元测试
- 新增 DrissionPage 基础教程(01-05)
- 新增 Playwright 基础教程(01-05)
- 新增网络基础教程(01-05)
- 新增 test_learning_examples.py 单元测试
- 更新 .gitignore 忽略 learning/*/output/ 目录
2026-05-06 16:39:55 +08:00

104 lines
3.0 KiB
Python

"""
练习 05: 用 requests 下载一个 mp4 到本地。
目标:
1. 自己导入 requests
2. 发起 GET 请求
3. 把响应内容写入本地文件
4. 手动确认文件确实存在
建议:
- 先从你在 03 / 04 里拿到的真实 mp4 链接开始
- 不要一开始就封装复杂函数
- 第一版先写死 URL 和输出文件名
运行:
./.venv/bin/python learning/drissionpage_basics/05_download_video.py
"""
from __future__ import annotations
import sys
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from Douyin import (
LISTEN_TARGET,
build_headers,
download_video,
ensure_browser_debug_port_ready,
extract_aweme_payload,
parse_aweme_items,
sanitize_filename,
)
DEFAULT_BROWSER_PORT = 9223
OUTPUT_DIR = Path("learning/drissionpage_basics/output")
TIMEOUT_SECONDS = 20
def build_output_path(title: str, video_id: str, output_dir: Path = OUTPUT_DIR) -> Path:
safe_title = sanitize_filename(title, fallback="practice-video")
return output_dir / f"{safe_title}-{video_id}.mp4"
def attach_to_browser(browser_port: int = DEFAULT_BROWSER_PORT):
from DrissionPage import ChromiumOptions
from DrissionPage import ChromiumPage
ensure_browser_debug_port_ready(browser_port)
options = ChromiumOptions().set_address(f"127.0.0.1:{browser_port}").existing_only(True)
return ChromiumPage(options)
def download_first_real_video(
page,
requests_module,
output_dir: Path = OUTPUT_DIR,
timeout: int = TIMEOUT_SECONDS,
) -> Path:
current_url = page.url
page.listen.start(LISTEN_TARGET)
page.get(current_url)
packet = page.listen.wait(timeout=timeout)
if not packet or not hasattr(packet, "response"):
raise RuntimeError("当前页面没有监听到作品接口,请先切到博主主页并滚动页面后重试。")
payload = extract_aweme_payload(packet.response)
items = parse_aweme_items(payload)
if not items:
raise RuntimeError("当前页面没有解析到可下载视频,请先确认页面已加载出作品。")
first_item = items[0]
output_path = build_output_path(
title=first_item["title"],
video_id=first_item["video_id"],
output_dir=output_dir,
)
headers = build_headers(page.url)
download_video(
requests_module=requests_module,
headers=headers,
video_url=first_item["video_url"],
output_path=output_path,
)
return output_path
def main() -> None:
import requests
page = attach_to_browser()
print(f"[INFO] 当前页面: {page.title}")
print(f"[INFO] 当前 URL: {page.url}")
print("[INFO] 正在监听当前页面的作品接口,必要时请在浏览器中轻微滚动一下页面。")
output_path = download_first_real_video(page=page, requests_module=requests)
print(f"[OK] 已下载到: {output_path}")
if __name__ == "__main__":
main()