- 新增 DrissionPage 基础教程(01-05) - 新增 Playwright 基础教程(01-05) - 新增网络基础教程(01-05) - 新增 test_learning_examples.py 单元测试 - 更新 .gitignore 忽略 learning/*/output/ 目录
104 lines
3.0 KiB
Python
104 lines
3.0 KiB
Python
"""
|
|
练习 05: 用 requests 下载一个 mp4 到本地。
|
|
|
|
目标:
|
|
1. 自己导入 requests
|
|
2. 发起 GET 请求
|
|
3. 把响应内容写入本地文件
|
|
4. 手动确认文件确实存在
|
|
|
|
建议:
|
|
- 先从你在 03 / 04 里拿到的真实 mp4 链接开始
|
|
- 不要一开始就封装复杂函数
|
|
- 第一版先写死 URL 和输出文件名
|
|
|
|
运行:
|
|
./.venv/bin/python learning/drissionpage_basics/05_download_video.py
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
|
if str(PROJECT_ROOT) not in sys.path:
|
|
sys.path.insert(0, str(PROJECT_ROOT))
|
|
|
|
from Douyin import (
|
|
LISTEN_TARGET,
|
|
build_headers,
|
|
download_video,
|
|
ensure_browser_debug_port_ready,
|
|
extract_aweme_payload,
|
|
parse_aweme_items,
|
|
sanitize_filename,
|
|
)
|
|
|
|
DEFAULT_BROWSER_PORT = 9223
|
|
OUTPUT_DIR = Path("learning/drissionpage_basics/output")
|
|
TIMEOUT_SECONDS = 20
|
|
|
|
|
|
def build_output_path(title: str, video_id: str, output_dir: Path = OUTPUT_DIR) -> Path:
|
|
safe_title = sanitize_filename(title, fallback="practice-video")
|
|
return output_dir / f"{safe_title}-{video_id}.mp4"
|
|
|
|
|
|
def attach_to_browser(browser_port: int = DEFAULT_BROWSER_PORT):
|
|
from DrissionPage import ChromiumOptions
|
|
from DrissionPage import ChromiumPage
|
|
|
|
ensure_browser_debug_port_ready(browser_port)
|
|
options = ChromiumOptions().set_address(f"127.0.0.1:{browser_port}").existing_only(True)
|
|
return ChromiumPage(options)
|
|
|
|
|
|
def download_first_real_video(
|
|
page,
|
|
requests_module,
|
|
output_dir: Path = OUTPUT_DIR,
|
|
timeout: int = TIMEOUT_SECONDS,
|
|
) -> Path:
|
|
current_url = page.url
|
|
page.listen.start(LISTEN_TARGET)
|
|
page.get(current_url)
|
|
packet = page.listen.wait(timeout=timeout)
|
|
if not packet or not hasattr(packet, "response"):
|
|
raise RuntimeError("当前页面没有监听到作品接口,请先切到博主主页并滚动页面后重试。")
|
|
|
|
payload = extract_aweme_payload(packet.response)
|
|
items = parse_aweme_items(payload)
|
|
if not items:
|
|
raise RuntimeError("当前页面没有解析到可下载视频,请先确认页面已加载出作品。")
|
|
|
|
first_item = items[0]
|
|
output_path = build_output_path(
|
|
title=first_item["title"],
|
|
video_id=first_item["video_id"],
|
|
output_dir=output_dir,
|
|
)
|
|
headers = build_headers(page.url)
|
|
download_video(
|
|
requests_module=requests_module,
|
|
headers=headers,
|
|
video_url=first_item["video_url"],
|
|
output_path=output_path,
|
|
)
|
|
return output_path
|
|
|
|
|
|
def main() -> None:
|
|
import requests
|
|
|
|
page = attach_to_browser()
|
|
print(f"[INFO] 当前页面: {page.title}")
|
|
print(f"[INFO] 当前 URL: {page.url}")
|
|
print("[INFO] 正在监听当前页面的作品接口,必要时请在浏览器中轻微滚动一下页面。")
|
|
output_path = download_first_real_video(page=page, requests_module=requests)
|
|
print(f"[OK] 已下载到: {output_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|