From ec1ff6322c76fa64791d62d8d7c85509b5670d79 Mon Sep 17 00:00:00 2001 From: wangshaoqing Date: Wed, 6 May 2026 16:59:50 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=8E=A8=E8=8D=90=E6=B5=81?= =?UTF-8?q?=E6=8A=93=E5=8F=96=E8=AE=BE=E8=AE=A1=E6=96=87=E6=A1=A3=EF=BC=9A?= =?UTF-8?q?=E6=98=8E=E7=A1=AE=E6=8E=A5=E5=8F=A3=E8=B7=AF=E5=BE=84=E3=80=81?= =?UTF-8?q?=E9=A1=B5=E9=9D=A2=E6=89=93=E5=BC=80=E6=96=B9=E5=BC=8F=E3=80=81?= =?UTF-8?q?=E5=8E=BB=E9=87=8D=E6=9C=BA=E5=88=B6=E5=92=8C=E5=8F=82=E6=95=B0?= =?UTF-8?q?=E4=BA=A4=E4=BA=92=E8=A7=84=E5=88=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...-05-06-douyin-recommendation-crawling-design.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/superpowers/specs/2026-05-06-douyin-recommendation-crawling-design.md b/docs/superpowers/specs/2026-05-06-douyin-recommendation-crawling-design.md index 44bcdb0..3a5ac01 100644 --- a/docs/superpowers/specs/2026-05-06-douyin-recommendation-crawling-design.md +++ b/docs/superpowers/specs/2026-05-06-douyin-recommendation-crawling-design.md @@ -61,12 +61,12 @@ RECOMMENDATION_URL_PATTERN = re.compile( - `browser_port`: 浏览器调试端口(默认9223) **行为:** -1. 打开或切换到推荐流页面 -2. 启动监听,目标接口:`web/aweme/post/` 或推荐流专用接口 +1. 通过 `page.get("https://www.douyin.com/")` 打开推荐流页面(复用现有页面打开逻辑,不切换标签页) +2. 启动监听,目标接口:`web/aweme/post/`(推荐流与博主页共用此接口) 3. 循环直到收集够 `max_videos` 条或无法继续加载: - 等待接口响应 - 解析视频列表,提取:标题、视频ID、视频URL、博主信息 - - 过滤已下载(去重) + - 过滤已下载(按 `video_id` 去重,使用 `seen_ids: set[str]` 集合) - 下载视频 - 向下滚动加载更多 4. 返回实际下载数量 @@ -110,11 +110,11 @@ RECOMMENDATION_URL_PATTERN = re.compile( # 零参数,自动识别当前页面 ./.venv/bin/python Douyin.py -# 显式指定推荐流(可选) -./.venv/bin/python Douyin.py "https://www.douyin.com/" - -# 自定义抓取数量 +# 自定义抓取数量(仅对推荐流有效) ./.venv/bin/python Douyin.py --max-videos 30 + +# 显式传入URL时,--max-videos 不适用(博主页和单视频页忽略此参数) +./.venv/bin/python Douyin.py "https://www.douyin.com/user/xxx" ``` ### 7. 错误处理