Use XHS video tab channel

This commit is contained in:
wangshaoqing 2026-05-27 17:57:02 +08:00
parent a43168d5a9
commit b13f170ce8
3 changed files with 7 additions and 5 deletions

View File

@ -74,6 +74,7 @@ pip install requests DrissionPage
./.venv/bin/python XHS.py --max-videos 20 --max-runtime 600
# 长任务队列模式:适合下载大量视频,可中断后继续
# video-channel 对应网页顶部“视频”频道,通常比关键词搜索更适合大量下载
./.venv/bin/python XHS.py \
--source video-channel \
--target-videos 1000 \

9
XHS.py
View File

@ -17,7 +17,7 @@ from urllib.parse import urljoin
from dataclasses import replace
DEFAULT_EXPLORE_URL = "https://www.xiaohongshu.com/explore"
DEFAULT_VIDEO_CHANNEL_URL = "https://www.xiaohongshu.com/explore?channel_id=video"
DEFAULT_VIDEO_CHANNEL_URL = "https://www.xiaohongshu.com/explore?channel_id=homefeed.video_v3"
DEFAULT_BROWSER_PORT = 9223
DEFAULT_OUTPUT_DIR = Path("video")
LISTEN_TARGET = "/api/sns/web/v1/feed"
@ -874,14 +874,15 @@ def run_queue_download(
else:
page.get(source_url)
human_pause(human_settings)
note_limit = max(50, target_videos * 3 if source == "search" else target_videos * 2)
browse_rounds = 8 if source == "search" else 2
video_card_source = source in {"search", "video-channel"}
note_limit = max(50, target_videos * 3 if video_card_source else target_videos * 2)
browse_rounds = 8 if video_card_source else 2
note_urls = collect_note_urls_with_browse(
page,
limit=note_limit,
human_settings=human_settings,
rounds=browse_rounds,
video_only=source == "search",
video_only=video_card_source,
)
records = merge_note_urls_into_queue(records, note_urls, source=source)
save_queue(queue_file, records)

View File

@ -296,7 +296,7 @@ class XhsModuleTests(unittest.TestCase):
self.assertEqual(module.build_source_url("explore"), module.DEFAULT_EXPLORE_URL)
self.assertEqual(
module.build_source_url("video-channel"),
"https://www.xiaohongshu.com/explore?channel_id=video",
"https://www.xiaohongshu.com/explore?channel_id=homefeed.video_v3",
)
def test_build_source_url_supports_encoded_search_keyword(self) -> None: